{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 47151, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_loss": 0.632683277130127, "eval_runtime": 163.951, "eval_samples_per_second": 146.379, "eval_steps_per_second": 18.298, "step": 0 }, { "epoch": 2.120845793302369e-05, "grad_norm": 1.3760263919830322, "learning_rate": 0.0, "loss": 0.8088, "step": 1 }, { "epoch": 4.241691586604738e-05, "grad_norm": 1.833258032798767, "learning_rate": 2.0000000000000002e-07, "loss": 0.6605, "step": 2 }, { "epoch": 6.362537379907107e-05, "grad_norm": 1.6081106662750244, "learning_rate": 4.0000000000000003e-07, "loss": 0.8593, "step": 3 }, { "epoch": 8.483383173209476e-05, "grad_norm": 1.8282793760299683, "learning_rate": 6.000000000000001e-07, "loss": 0.8605, "step": 4 }, { "epoch": 0.00010604228966511846, "grad_norm": 2.0627074241638184, "learning_rate": 8.000000000000001e-07, "loss": 0.7355, "step": 5 }, { "epoch": 0.00012725074759814214, "grad_norm": 1.6110520362854004, "learning_rate": 1.0000000000000002e-06, "loss": 0.7585, "step": 6 }, { "epoch": 0.00014845920553116583, "grad_norm": 1.1363874673843384, "learning_rate": 1.2000000000000002e-06, "loss": 0.7535, "step": 7 }, { "epoch": 0.0001696676634641895, "grad_norm": 1.343659520149231, "learning_rate": 1.4000000000000001e-06, "loss": 0.7784, "step": 8 }, { "epoch": 0.0001908761213972132, "grad_norm": 1.4442864656448364, "learning_rate": 1.6000000000000001e-06, "loss": 0.8257, "step": 9 }, { "epoch": 0.0002120845793302369, "grad_norm": 1.9411156177520752, "learning_rate": 1.8000000000000001e-06, "loss": 0.8125, "step": 10 }, { "epoch": 0.0002332930372632606, "grad_norm": 1.2167284488677979, "learning_rate": 2.0000000000000003e-06, "loss": 0.6943, "step": 11 }, { "epoch": 0.0002545014951962843, "grad_norm": 1.0914123058319092, "learning_rate": 2.2e-06, "loss": 0.8102, "step": 12 }, { "epoch": 0.00027570995312930797, "grad_norm": 0.7351624369621277, "learning_rate": 2.4000000000000003e-06, "loss": 0.6369, "step": 13 }, { "epoch": 0.00029691841106233165, "grad_norm": 0.9040666222572327, "learning_rate": 2.6e-06, "loss": 0.7592, "step": 14 }, { "epoch": 0.00031812686899535534, "grad_norm": 0.9266452193260193, "learning_rate": 2.8000000000000003e-06, "loss": 0.7637, "step": 15 }, { "epoch": 0.000339335326928379, "grad_norm": 0.7653235197067261, "learning_rate": 3e-06, "loss": 0.7522, "step": 16 }, { "epoch": 0.0003605437848614027, "grad_norm": 1.0360254049301147, "learning_rate": 3.2000000000000003e-06, "loss": 0.6847, "step": 17 }, { "epoch": 0.0003817522427944264, "grad_norm": 0.6114339232444763, "learning_rate": 3.4000000000000005e-06, "loss": 0.6542, "step": 18 }, { "epoch": 0.0004029607007274501, "grad_norm": 0.7896522283554077, "learning_rate": 3.6000000000000003e-06, "loss": 0.6468, "step": 19 }, { "epoch": 0.0004241691586604738, "grad_norm": 0.5901504755020142, "learning_rate": 3.8000000000000005e-06, "loss": 0.6664, "step": 20 }, { "epoch": 0.0004453776165934975, "grad_norm": 0.948708713054657, "learning_rate": 4.000000000000001e-06, "loss": 0.6946, "step": 21 }, { "epoch": 0.0004665860745265212, "grad_norm": 0.7450428605079651, "learning_rate": 4.2000000000000004e-06, "loss": 0.5681, "step": 22 }, { "epoch": 0.0004877945324595449, "grad_norm": 0.6283453106880188, "learning_rate": 4.4e-06, "loss": 0.6517, "step": 23 }, { "epoch": 0.0005090029903925686, "grad_norm": 0.7397447824478149, "learning_rate": 4.600000000000001e-06, "loss": 0.5551, "step": 24 }, { "epoch": 0.0005302114483255922, "grad_norm": 0.5640890598297119, "learning_rate": 4.800000000000001e-06, "loss": 0.7288, "step": 25 }, { "epoch": 0.0005514199062586159, "grad_norm": 0.6332978010177612, "learning_rate": 5e-06, "loss": 0.5458, "step": 26 }, { "epoch": 0.0005726283641916397, "grad_norm": 0.5485689640045166, "learning_rate": 5.2e-06, "loss": 0.6115, "step": 27 }, { "epoch": 0.0005938368221246633, "grad_norm": 0.4971560835838318, "learning_rate": 5.400000000000001e-06, "loss": 0.7137, "step": 28 }, { "epoch": 0.000615045280057687, "grad_norm": 0.6547123789787292, "learning_rate": 5.600000000000001e-06, "loss": 0.7004, "step": 29 }, { "epoch": 0.0006362537379907107, "grad_norm": 0.545305073261261, "learning_rate": 5.8e-06, "loss": 0.6838, "step": 30 }, { "epoch": 0.0006574621959237344, "grad_norm": 0.43064361810684204, "learning_rate": 6e-06, "loss": 0.5912, "step": 31 }, { "epoch": 0.000678670653856758, "grad_norm": 0.45902061462402344, "learning_rate": 6.200000000000001e-06, "loss": 0.5949, "step": 32 }, { "epoch": 0.0006998791117897818, "grad_norm": 0.4450659155845642, "learning_rate": 6.4000000000000006e-06, "loss": 0.7063, "step": 33 }, { "epoch": 0.0007210875697228054, "grad_norm": 0.43253010511398315, "learning_rate": 6.600000000000001e-06, "loss": 0.6271, "step": 34 }, { "epoch": 0.0007422960276558292, "grad_norm": 0.44395703077316284, "learning_rate": 6.800000000000001e-06, "loss": 0.6995, "step": 35 }, { "epoch": 0.0007635044855888528, "grad_norm": 0.4254119098186493, "learning_rate": 7e-06, "loss": 0.7148, "step": 36 }, { "epoch": 0.0007847129435218765, "grad_norm": 0.5723405480384827, "learning_rate": 7.2000000000000005e-06, "loss": 0.7165, "step": 37 }, { "epoch": 0.0008059214014549002, "grad_norm": 0.48454171419143677, "learning_rate": 7.4e-06, "loss": 0.6498, "step": 38 }, { "epoch": 0.0008271298593879239, "grad_norm": 0.5259478092193604, "learning_rate": 7.600000000000001e-06, "loss": 0.6368, "step": 39 }, { "epoch": 0.0008483383173209476, "grad_norm": 0.4853329658508301, "learning_rate": 7.800000000000002e-06, "loss": 0.6099, "step": 40 }, { "epoch": 0.0008695467752539713, "grad_norm": 0.4156191051006317, "learning_rate": 8.000000000000001e-06, "loss": 0.5888, "step": 41 }, { "epoch": 0.000890755233186995, "grad_norm": 0.49781298637390137, "learning_rate": 8.2e-06, "loss": 0.6135, "step": 42 }, { "epoch": 0.0009119636911200186, "grad_norm": 0.41333436965942383, "learning_rate": 8.400000000000001e-06, "loss": 0.5718, "step": 43 }, { "epoch": 0.0009331721490530424, "grad_norm": 0.4478658139705658, "learning_rate": 8.6e-06, "loss": 0.5988, "step": 44 }, { "epoch": 0.000954380606986066, "grad_norm": 0.4393644332885742, "learning_rate": 8.8e-06, "loss": 0.6393, "step": 45 }, { "epoch": 0.0009755890649190898, "grad_norm": 1.2314794063568115, "learning_rate": 9e-06, "loss": 0.6364, "step": 46 }, { "epoch": 0.0009967975228521134, "grad_norm": 0.3816107213497162, "learning_rate": 9.200000000000002e-06, "loss": 0.7658, "step": 47 }, { "epoch": 0.0010180059807851371, "grad_norm": 0.3879571557044983, "learning_rate": 9.4e-06, "loss": 0.6092, "step": 48 }, { "epoch": 0.0010392144387181609, "grad_norm": 0.4650268256664276, "learning_rate": 9.600000000000001e-06, "loss": 0.684, "step": 49 }, { "epoch": 0.0010604228966511844, "grad_norm": 0.5362702012062073, "learning_rate": 9.800000000000001e-06, "loss": 0.5866, "step": 50 }, { "epoch": 0.0010816313545842081, "grad_norm": 0.49320805072784424, "learning_rate": 1e-05, "loss": 0.686, "step": 51 }, { "epoch": 0.0011028398125172319, "grad_norm": 0.4627537727355957, "learning_rate": 1.02e-05, "loss": 0.6521, "step": 52 }, { "epoch": 0.0011240482704502556, "grad_norm": 0.39710190892219543, "learning_rate": 1.04e-05, "loss": 0.6824, "step": 53 }, { "epoch": 0.0011452567283832794, "grad_norm": 0.40657612681388855, "learning_rate": 1.0600000000000002e-05, "loss": 0.662, "step": 54 }, { "epoch": 0.0011664651863163029, "grad_norm": 0.43612140417099, "learning_rate": 1.0800000000000002e-05, "loss": 0.6041, "step": 55 }, { "epoch": 0.0011876736442493266, "grad_norm": 0.3748439848423004, "learning_rate": 1.1000000000000001e-05, "loss": 0.5692, "step": 56 }, { "epoch": 0.0012088821021823504, "grad_norm": 0.4497641623020172, "learning_rate": 1.1200000000000001e-05, "loss": 0.6304, "step": 57 }, { "epoch": 0.001230090560115374, "grad_norm": 0.3763386011123657, "learning_rate": 1.14e-05, "loss": 0.5917, "step": 58 }, { "epoch": 0.0012512990180483976, "grad_norm": 0.402468204498291, "learning_rate": 1.16e-05, "loss": 0.5683, "step": 59 }, { "epoch": 0.0012725074759814214, "grad_norm": 0.4141341745853424, "learning_rate": 1.18e-05, "loss": 0.5687, "step": 60 }, { "epoch": 0.001293715933914445, "grad_norm": 0.4489157497882843, "learning_rate": 1.2e-05, "loss": 0.6047, "step": 61 }, { "epoch": 0.0013149243918474688, "grad_norm": 0.4011724591255188, "learning_rate": 1.22e-05, "loss": 0.6054, "step": 62 }, { "epoch": 0.0013361328497804924, "grad_norm": 0.3807128369808197, "learning_rate": 1.2400000000000002e-05, "loss": 0.5674, "step": 63 }, { "epoch": 0.001357341307713516, "grad_norm": 0.5187853574752808, "learning_rate": 1.2600000000000001e-05, "loss": 0.6323, "step": 64 }, { "epoch": 0.0013785497656465398, "grad_norm": 0.44311046600341797, "learning_rate": 1.2800000000000001e-05, "loss": 0.5974, "step": 65 }, { "epoch": 0.0013997582235795636, "grad_norm": 0.40255266427993774, "learning_rate": 1.3000000000000001e-05, "loss": 0.6451, "step": 66 }, { "epoch": 0.0014209666815125873, "grad_norm": 0.4574567675590515, "learning_rate": 1.3200000000000002e-05, "loss": 0.4934, "step": 67 }, { "epoch": 0.0014421751394456108, "grad_norm": 0.39655518531799316, "learning_rate": 1.3400000000000002e-05, "loss": 0.5791, "step": 68 }, { "epoch": 0.0014633835973786346, "grad_norm": 0.42089584469795227, "learning_rate": 1.3600000000000002e-05, "loss": 0.6191, "step": 69 }, { "epoch": 0.0014845920553116583, "grad_norm": 0.4004960358142853, "learning_rate": 1.38e-05, "loss": 0.6333, "step": 70 }, { "epoch": 0.001505800513244682, "grad_norm": 0.4793379008769989, "learning_rate": 1.4e-05, "loss": 0.6056, "step": 71 }, { "epoch": 0.0015270089711777056, "grad_norm": 0.43698325753211975, "learning_rate": 1.4200000000000001e-05, "loss": 0.6988, "step": 72 }, { "epoch": 0.0015482174291107293, "grad_norm": 0.4223780632019043, "learning_rate": 1.4400000000000001e-05, "loss": 0.6032, "step": 73 }, { "epoch": 0.001569425887043753, "grad_norm": 0.4635881185531616, "learning_rate": 1.46e-05, "loss": 0.5323, "step": 74 }, { "epoch": 0.0015906343449767768, "grad_norm": 0.3837103843688965, "learning_rate": 1.48e-05, "loss": 0.609, "step": 75 }, { "epoch": 0.0016118428029098003, "grad_norm": 0.3908292353153229, "learning_rate": 1.5000000000000002e-05, "loss": 0.6329, "step": 76 }, { "epoch": 0.001633051260842824, "grad_norm": 0.5340428352355957, "learning_rate": 1.5200000000000002e-05, "loss": 0.6765, "step": 77 }, { "epoch": 0.0016542597187758478, "grad_norm": 0.415363073348999, "learning_rate": 1.54e-05, "loss": 0.6, "step": 78 }, { "epoch": 0.0016754681767088715, "grad_norm": 0.4306538701057434, "learning_rate": 1.5600000000000003e-05, "loss": 0.6764, "step": 79 }, { "epoch": 0.0016966766346418953, "grad_norm": 0.4451252222061157, "learning_rate": 1.58e-05, "loss": 0.6125, "step": 80 }, { "epoch": 0.0017178850925749188, "grad_norm": 0.4680396020412445, "learning_rate": 1.6000000000000003e-05, "loss": 0.6258, "step": 81 }, { "epoch": 0.0017390935505079425, "grad_norm": 0.4483177065849304, "learning_rate": 1.62e-05, "loss": 0.6235, "step": 82 }, { "epoch": 0.0017603020084409663, "grad_norm": 0.4606911242008209, "learning_rate": 1.64e-05, "loss": 0.6085, "step": 83 }, { "epoch": 0.00178151046637399, "grad_norm": 0.4552142024040222, "learning_rate": 1.66e-05, "loss": 0.5648, "step": 84 }, { "epoch": 0.0018027189243070136, "grad_norm": 0.3898567855358124, "learning_rate": 1.6800000000000002e-05, "loss": 0.5511, "step": 85 }, { "epoch": 0.0018239273822400373, "grad_norm": 0.3857911229133606, "learning_rate": 1.7e-05, "loss": 0.6069, "step": 86 }, { "epoch": 0.001845135840173061, "grad_norm": 0.43374159932136536, "learning_rate": 1.72e-05, "loss": 0.6005, "step": 87 }, { "epoch": 0.0018663442981060848, "grad_norm": 0.4212774932384491, "learning_rate": 1.7400000000000003e-05, "loss": 0.6345, "step": 88 }, { "epoch": 0.0018875527560391083, "grad_norm": 0.41434594988822937, "learning_rate": 1.76e-05, "loss": 0.6277, "step": 89 }, { "epoch": 0.001908761213972132, "grad_norm": 0.431996613740921, "learning_rate": 1.7800000000000002e-05, "loss": 0.5471, "step": 90 }, { "epoch": 0.0019299696719051558, "grad_norm": 0.4048554003238678, "learning_rate": 1.8e-05, "loss": 0.6529, "step": 91 }, { "epoch": 0.0019511781298381795, "grad_norm": 0.38750192523002625, "learning_rate": 1.8200000000000002e-05, "loss": 0.6431, "step": 92 }, { "epoch": 0.0019723865877712033, "grad_norm": 0.4268713891506195, "learning_rate": 1.8400000000000003e-05, "loss": 0.6147, "step": 93 }, { "epoch": 0.0019935950457042268, "grad_norm": 0.44848859310150146, "learning_rate": 1.86e-05, "loss": 0.6338, "step": 94 }, { "epoch": 0.0020148035036372507, "grad_norm": 0.4437248706817627, "learning_rate": 1.88e-05, "loss": 0.6378, "step": 95 }, { "epoch": 0.0020360119615702743, "grad_norm": 0.4087796211242676, "learning_rate": 1.9e-05, "loss": 0.6324, "step": 96 }, { "epoch": 0.0020572204195032978, "grad_norm": 0.4110492467880249, "learning_rate": 1.9200000000000003e-05, "loss": 0.5362, "step": 97 }, { "epoch": 0.0020784288774363217, "grad_norm": 0.47200316190719604, "learning_rate": 1.94e-05, "loss": 0.59, "step": 98 }, { "epoch": 0.0020996373353693453, "grad_norm": 0.4618438184261322, "learning_rate": 1.9600000000000002e-05, "loss": 0.559, "step": 99 }, { "epoch": 0.0021208457933023688, "grad_norm": 0.6454976797103882, "learning_rate": 1.98e-05, "loss": 0.5617, "step": 100 }, { "epoch": 0.0021420542512353927, "grad_norm": 0.4212400019168854, "learning_rate": 2e-05, "loss": 0.6348, "step": 101 }, { "epoch": 0.0021632627091684163, "grad_norm": 0.4276679754257202, "learning_rate": 1.9999999994439044e-05, "loss": 0.6562, "step": 102 }, { "epoch": 0.0021844711671014402, "grad_norm": 0.46369728446006775, "learning_rate": 1.9999999977756174e-05, "loss": 0.6235, "step": 103 }, { "epoch": 0.0022056796250344637, "grad_norm": 0.6635697484016418, "learning_rate": 1.9999999949951393e-05, "loss": 0.6016, "step": 104 }, { "epoch": 0.0022268880829674873, "grad_norm": 0.5531042814254761, "learning_rate": 1.9999999911024696e-05, "loss": 0.5956, "step": 105 }, { "epoch": 0.0022480965409005112, "grad_norm": 0.4171169698238373, "learning_rate": 1.9999999860976088e-05, "loss": 0.5992, "step": 106 }, { "epoch": 0.0022693049988335347, "grad_norm": 0.4182076156139374, "learning_rate": 1.9999999799805567e-05, "loss": 0.5652, "step": 107 }, { "epoch": 0.0022905134567665587, "grad_norm": 0.5248743295669556, "learning_rate": 1.999999972751313e-05, "loss": 0.5877, "step": 108 }, { "epoch": 0.0023117219146995822, "grad_norm": 0.9655448198318481, "learning_rate": 1.9999999644098783e-05, "loss": 0.5327, "step": 109 }, { "epoch": 0.0023329303726326057, "grad_norm": 0.4122493267059326, "learning_rate": 1.9999999549562524e-05, "loss": 0.5876, "step": 110 }, { "epoch": 0.0023541388305656297, "grad_norm": 0.38544130325317383, "learning_rate": 1.999999944390435e-05, "loss": 0.6615, "step": 111 }, { "epoch": 0.0023753472884986532, "grad_norm": 0.39034026861190796, "learning_rate": 1.9999999327124265e-05, "loss": 0.6424, "step": 112 }, { "epoch": 0.0023965557464316768, "grad_norm": 0.4842027723789215, "learning_rate": 1.9999999199222267e-05, "loss": 0.6268, "step": 113 }, { "epoch": 0.0024177642043647007, "grad_norm": 0.43719595670700073, "learning_rate": 1.9999999060198356e-05, "loss": 0.5987, "step": 114 }, { "epoch": 0.0024389726622977242, "grad_norm": 0.42929607629776, "learning_rate": 1.9999998910052537e-05, "loss": 0.5391, "step": 115 }, { "epoch": 0.002460181120230748, "grad_norm": 0.39123278856277466, "learning_rate": 1.99999987487848e-05, "loss": 0.6122, "step": 116 }, { "epoch": 0.0024813895781637717, "grad_norm": 0.3510797917842865, "learning_rate": 1.9999998576395156e-05, "loss": 0.5438, "step": 117 }, { "epoch": 0.0025025980360967952, "grad_norm": 0.41061922907829285, "learning_rate": 1.9999998392883598e-05, "loss": 0.5623, "step": 118 }, { "epoch": 0.002523806494029819, "grad_norm": 0.41225937008857727, "learning_rate": 1.9999998198250128e-05, "loss": 0.5479, "step": 119 }, { "epoch": 0.0025450149519628427, "grad_norm": 0.4055570662021637, "learning_rate": 1.999999799249475e-05, "loss": 0.6211, "step": 120 }, { "epoch": 0.0025662234098958667, "grad_norm": 0.40054935216903687, "learning_rate": 1.9999997775617458e-05, "loss": 0.6405, "step": 121 }, { "epoch": 0.00258743186782889, "grad_norm": 0.4405639171600342, "learning_rate": 1.9999997547618256e-05, "loss": 0.593, "step": 122 }, { "epoch": 0.0026086403257619137, "grad_norm": 0.4158501923084259, "learning_rate": 1.9999997308497145e-05, "loss": 0.6079, "step": 123 }, { "epoch": 0.0026298487836949377, "grad_norm": 0.37060117721557617, "learning_rate": 1.9999997058254125e-05, "loss": 0.5628, "step": 124 }, { "epoch": 0.002651057241627961, "grad_norm": 0.4783400297164917, "learning_rate": 1.999999679688919e-05, "loss": 0.636, "step": 125 }, { "epoch": 0.0026722656995609847, "grad_norm": 0.4240272045135498, "learning_rate": 1.999999652440235e-05, "loss": 0.6246, "step": 126 }, { "epoch": 0.0026934741574940087, "grad_norm": 0.5086298584938049, "learning_rate": 1.99999962407936e-05, "loss": 0.6526, "step": 127 }, { "epoch": 0.002714682615427032, "grad_norm": 0.38824155926704407, "learning_rate": 1.999999594606294e-05, "loss": 0.6783, "step": 128 }, { "epoch": 0.002735891073360056, "grad_norm": 0.38264429569244385, "learning_rate": 1.999999564021037e-05, "loss": 0.6613, "step": 129 }, { "epoch": 0.0027570995312930797, "grad_norm": 0.4291940927505493, "learning_rate": 1.9999995323235895e-05, "loss": 0.561, "step": 130 }, { "epoch": 0.002778307989226103, "grad_norm": 0.37679922580718994, "learning_rate": 1.9999994995139513e-05, "loss": 0.5226, "step": 131 }, { "epoch": 0.002799516447159127, "grad_norm": 0.4182818531990051, "learning_rate": 1.999999465592122e-05, "loss": 0.6119, "step": 132 }, { "epoch": 0.0028207249050921507, "grad_norm": 0.3845260739326477, "learning_rate": 1.9999994305581022e-05, "loss": 0.5953, "step": 133 }, { "epoch": 0.0028419333630251746, "grad_norm": 0.3960956037044525, "learning_rate": 1.9999993944118914e-05, "loss": 0.5899, "step": 134 }, { "epoch": 0.002863141820958198, "grad_norm": 0.39025843143463135, "learning_rate": 1.99999935715349e-05, "loss": 0.5789, "step": 135 }, { "epoch": 0.0028843502788912217, "grad_norm": 0.4543626010417938, "learning_rate": 1.9999993187828983e-05, "loss": 0.6013, "step": 136 }, { "epoch": 0.0029055587368242456, "grad_norm": 0.4006597697734833, "learning_rate": 1.999999279300116e-05, "loss": 0.6337, "step": 137 }, { "epoch": 0.002926767194757269, "grad_norm": 0.3916458189487457, "learning_rate": 1.999999238705143e-05, "loss": 0.5203, "step": 138 }, { "epoch": 0.0029479756526902927, "grad_norm": 0.3862897753715515, "learning_rate": 1.9999991969979797e-05, "loss": 0.5035, "step": 139 }, { "epoch": 0.0029691841106233166, "grad_norm": 0.43404844403266907, "learning_rate": 1.9999991541786258e-05, "loss": 0.646, "step": 140 }, { "epoch": 0.00299039256855634, "grad_norm": 0.370470255613327, "learning_rate": 1.9999991102470817e-05, "loss": 0.6257, "step": 141 }, { "epoch": 0.003011601026489364, "grad_norm": 0.5360281467437744, "learning_rate": 1.999999065203347e-05, "loss": 0.6041, "step": 142 }, { "epoch": 0.0030328094844223876, "grad_norm": 0.44215795397758484, "learning_rate": 1.9999990190474224e-05, "loss": 0.5969, "step": 143 }, { "epoch": 0.003054017942355411, "grad_norm": 0.4186144769191742, "learning_rate": 1.9999989717793075e-05, "loss": 0.5708, "step": 144 }, { "epoch": 0.003075226400288435, "grad_norm": 0.3905005156993866, "learning_rate": 1.9999989233990024e-05, "loss": 0.5707, "step": 145 }, { "epoch": 0.0030964348582214586, "grad_norm": 0.40083858370780945, "learning_rate": 1.999998873906507e-05, "loss": 0.6163, "step": 146 }, { "epoch": 0.0031176433161544826, "grad_norm": 0.4340246617794037, "learning_rate": 1.9999988233018217e-05, "loss": 0.653, "step": 147 }, { "epoch": 0.003138851774087506, "grad_norm": 0.3513493239879608, "learning_rate": 1.9999987715849464e-05, "loss": 0.5101, "step": 148 }, { "epoch": 0.0031600602320205297, "grad_norm": 0.3896138370037079, "learning_rate": 1.999998718755881e-05, "loss": 0.6458, "step": 149 }, { "epoch": 0.0031812686899535536, "grad_norm": 0.35832804441452026, "learning_rate": 1.999998664814626e-05, "loss": 0.5885, "step": 150 }, { "epoch": 0.003202477147886577, "grad_norm": 0.3904860317707062, "learning_rate": 1.999998609761181e-05, "loss": 0.5202, "step": 151 }, { "epoch": 0.0032236856058196007, "grad_norm": 0.5017992854118347, "learning_rate": 1.9999985535955463e-05, "loss": 0.5976, "step": 152 }, { "epoch": 0.0032448940637526246, "grad_norm": 0.5869758725166321, "learning_rate": 1.999998496317722e-05, "loss": 0.6996, "step": 153 }, { "epoch": 0.003266102521685648, "grad_norm": 0.3584135174751282, "learning_rate": 1.999998437927708e-05, "loss": 0.5488, "step": 154 }, { "epoch": 0.003287310979618672, "grad_norm": 0.4342014789581299, "learning_rate": 1.999998378425504e-05, "loss": 0.5978, "step": 155 }, { "epoch": 0.0033085194375516956, "grad_norm": 0.3850497305393219, "learning_rate": 1.999998317811111e-05, "loss": 0.5502, "step": 156 }, { "epoch": 0.003329727895484719, "grad_norm": 0.45744091272354126, "learning_rate": 1.999998256084528e-05, "loss": 0.5807, "step": 157 }, { "epoch": 0.003350936353417743, "grad_norm": 0.3639981150627136, "learning_rate": 1.999998193245756e-05, "loss": 0.6178, "step": 158 }, { "epoch": 0.0033721448113507666, "grad_norm": 0.4426318407058716, "learning_rate": 1.9999981292947947e-05, "loss": 0.5291, "step": 159 }, { "epoch": 0.0033933532692837906, "grad_norm": 0.3978288471698761, "learning_rate": 1.9999980642316445e-05, "loss": 0.6265, "step": 160 }, { "epoch": 0.003414561727216814, "grad_norm": 0.39684316515922546, "learning_rate": 1.9999979980563047e-05, "loss": 0.6053, "step": 161 }, { "epoch": 0.0034357701851498376, "grad_norm": 0.41464167833328247, "learning_rate": 1.999997930768776e-05, "loss": 0.6106, "step": 162 }, { "epoch": 0.0034569786430828616, "grad_norm": 0.38096708059310913, "learning_rate": 1.999997862369058e-05, "loss": 0.586, "step": 163 }, { "epoch": 0.003478187101015885, "grad_norm": 0.4575459361076355, "learning_rate": 1.9999977928571512e-05, "loss": 0.5924, "step": 164 }, { "epoch": 0.0034993955589489086, "grad_norm": 0.38033226132392883, "learning_rate": 1.999997722233056e-05, "loss": 0.5661, "step": 165 }, { "epoch": 0.0035206040168819326, "grad_norm": 0.3581930994987488, "learning_rate": 1.9999976504967713e-05, "loss": 0.5501, "step": 166 }, { "epoch": 0.003541812474814956, "grad_norm": 0.37770596146583557, "learning_rate": 1.9999975776482983e-05, "loss": 0.4965, "step": 167 }, { "epoch": 0.00356302093274798, "grad_norm": 0.3918549716472626, "learning_rate": 1.9999975036876365e-05, "loss": 0.5253, "step": 168 }, { "epoch": 0.0035842293906810036, "grad_norm": 2.15242075920105, "learning_rate": 1.9999974286147866e-05, "loss": 0.6214, "step": 169 }, { "epoch": 0.003605437848614027, "grad_norm": 0.4124709367752075, "learning_rate": 1.9999973524297478e-05, "loss": 0.5926, "step": 170 }, { "epoch": 0.003626646306547051, "grad_norm": 0.381511926651001, "learning_rate": 1.999997275132521e-05, "loss": 0.5498, "step": 171 }, { "epoch": 0.0036478547644800746, "grad_norm": 0.4204103350639343, "learning_rate": 1.9999971967231055e-05, "loss": 0.6418, "step": 172 }, { "epoch": 0.0036690632224130985, "grad_norm": 0.35070425271987915, "learning_rate": 1.999997117201502e-05, "loss": 0.4718, "step": 173 }, { "epoch": 0.003690271680346122, "grad_norm": 0.37011754512786865, "learning_rate": 1.9999970365677103e-05, "loss": 0.5224, "step": 174 }, { "epoch": 0.0037114801382791456, "grad_norm": 0.4167177677154541, "learning_rate": 1.9999969548217305e-05, "loss": 0.5859, "step": 175 }, { "epoch": 0.0037326885962121695, "grad_norm": 0.4269498586654663, "learning_rate": 1.9999968719635632e-05, "loss": 0.6435, "step": 176 }, { "epoch": 0.003753897054145193, "grad_norm": 0.3678402602672577, "learning_rate": 1.9999967879932078e-05, "loss": 0.6474, "step": 177 }, { "epoch": 0.0037751055120782166, "grad_norm": 0.3712545335292816, "learning_rate": 1.9999967029106645e-05, "loss": 0.5612, "step": 178 }, { "epoch": 0.0037963139700112405, "grad_norm": 0.43316859006881714, "learning_rate": 1.999996616715934e-05, "loss": 0.5367, "step": 179 }, { "epoch": 0.003817522427944264, "grad_norm": 0.5710495114326477, "learning_rate": 1.9999965294090154e-05, "loss": 0.5641, "step": 180 }, { "epoch": 0.003838730885877288, "grad_norm": 0.4399798512458801, "learning_rate": 1.9999964409899094e-05, "loss": 0.6208, "step": 181 }, { "epoch": 0.0038599393438103115, "grad_norm": 0.3591434359550476, "learning_rate": 1.9999963514586163e-05, "loss": 0.6239, "step": 182 }, { "epoch": 0.003881147801743335, "grad_norm": 0.4465659260749817, "learning_rate": 1.999996260815136e-05, "loss": 0.6478, "step": 183 }, { "epoch": 0.003902356259676359, "grad_norm": 0.41934916377067566, "learning_rate": 1.9999961690594685e-05, "loss": 0.5463, "step": 184 }, { "epoch": 0.0039235647176093826, "grad_norm": 0.44979608058929443, "learning_rate": 1.999996076191614e-05, "loss": 0.6191, "step": 185 }, { "epoch": 0.0039447731755424065, "grad_norm": 0.4016929864883423, "learning_rate": 1.9999959822115724e-05, "loss": 0.6134, "step": 186 }, { "epoch": 0.00396598163347543, "grad_norm": 0.37906941771507263, "learning_rate": 1.9999958871193437e-05, "loss": 0.6052, "step": 187 }, { "epoch": 0.0039871900914084536, "grad_norm": 0.4045429229736328, "learning_rate": 1.999995790914929e-05, "loss": 0.6009, "step": 188 }, { "epoch": 0.0040083985493414775, "grad_norm": 0.38720864057540894, "learning_rate": 1.999995693598327e-05, "loss": 0.6663, "step": 189 }, { "epoch": 0.0040296070072745015, "grad_norm": 0.41273197531700134, "learning_rate": 1.9999955951695386e-05, "loss": 0.5446, "step": 190 }, { "epoch": 0.0040508154652075246, "grad_norm": 0.4549441635608673, "learning_rate": 1.9999954956285638e-05, "loss": 0.4508, "step": 191 }, { "epoch": 0.0040720239231405485, "grad_norm": 0.36949002742767334, "learning_rate": 1.999995394975403e-05, "loss": 0.6237, "step": 192 }, { "epoch": 0.0040932323810735725, "grad_norm": 0.5072430968284607, "learning_rate": 1.9999952932100558e-05, "loss": 0.596, "step": 193 }, { "epoch": 0.0041144408390065956, "grad_norm": 0.3697093725204468, "learning_rate": 1.9999951903325226e-05, "loss": 0.5249, "step": 194 }, { "epoch": 0.0041356492969396195, "grad_norm": 0.370307058095932, "learning_rate": 1.9999950863428034e-05, "loss": 0.5722, "step": 195 }, { "epoch": 0.0041568577548726435, "grad_norm": 0.34231486916542053, "learning_rate": 1.999994981240898e-05, "loss": 0.5181, "step": 196 }, { "epoch": 0.0041780662128056666, "grad_norm": 0.36852169036865234, "learning_rate": 1.9999948750268072e-05, "loss": 0.5923, "step": 197 }, { "epoch": 0.0041992746707386905, "grad_norm": 0.43313729763031006, "learning_rate": 1.999994767700531e-05, "loss": 0.724, "step": 198 }, { "epoch": 0.0042204831286717145, "grad_norm": 0.4057539701461792, "learning_rate": 1.999994659262069e-05, "loss": 0.5823, "step": 199 }, { "epoch": 0.0042416915866047376, "grad_norm": 0.3417372703552246, "learning_rate": 1.9999945497114216e-05, "loss": 0.6184, "step": 200 }, { "epoch": 0.0042629000445377615, "grad_norm": 0.3517211377620697, "learning_rate": 1.999994439048589e-05, "loss": 0.6153, "step": 201 }, { "epoch": 0.0042841085024707855, "grad_norm": 0.36391741037368774, "learning_rate": 1.9999943272735714e-05, "loss": 0.5758, "step": 202 }, { "epoch": 0.0043053169604038094, "grad_norm": 0.3756033778190613, "learning_rate": 1.9999942143863687e-05, "loss": 0.4683, "step": 203 }, { "epoch": 0.0043265254183368325, "grad_norm": 0.39180466532707214, "learning_rate": 1.9999941003869813e-05, "loss": 0.5668, "step": 204 }, { "epoch": 0.0043477338762698565, "grad_norm": 0.35968515276908875, "learning_rate": 1.999993985275409e-05, "loss": 0.6329, "step": 205 }, { "epoch": 0.0043689423342028804, "grad_norm": 0.38225463032722473, "learning_rate": 1.9999938690516517e-05, "loss": 0.5326, "step": 206 }, { "epoch": 0.0043901507921359035, "grad_norm": 0.44125065207481384, "learning_rate": 1.9999937517157104e-05, "loss": 0.6666, "step": 207 }, { "epoch": 0.0044113592500689275, "grad_norm": 0.4433561861515045, "learning_rate": 1.9999936332675846e-05, "loss": 0.5453, "step": 208 }, { "epoch": 0.0044325677080019514, "grad_norm": 0.3610874116420746, "learning_rate": 1.9999935137072747e-05, "loss": 0.5347, "step": 209 }, { "epoch": 0.0044537761659349745, "grad_norm": 0.43825602531433105, "learning_rate": 1.9999933930347804e-05, "loss": 0.5808, "step": 210 }, { "epoch": 0.0044749846238679985, "grad_norm": 0.40232449769973755, "learning_rate": 1.9999932712501024e-05, "loss": 0.5977, "step": 211 }, { "epoch": 0.0044961930818010224, "grad_norm": 0.37264934182167053, "learning_rate": 1.9999931483532403e-05, "loss": 0.584, "step": 212 }, { "epoch": 0.0045174015397340455, "grad_norm": 0.4740273356437683, "learning_rate": 1.9999930243441948e-05, "loss": 0.5108, "step": 213 }, { "epoch": 0.0045386099976670695, "grad_norm": 0.3722621500492096, "learning_rate": 1.999992899222966e-05, "loss": 0.6388, "step": 214 }, { "epoch": 0.0045598184556000934, "grad_norm": 0.4873678684234619, "learning_rate": 1.9999927729895533e-05, "loss": 0.6435, "step": 215 }, { "epoch": 0.004581026913533117, "grad_norm": 0.41888928413391113, "learning_rate": 1.9999926456439573e-05, "loss": 0.6177, "step": 216 }, { "epoch": 0.0046022353714661405, "grad_norm": 0.38451725244522095, "learning_rate": 1.9999925171861782e-05, "loss": 0.5959, "step": 217 }, { "epoch": 0.0046234438293991644, "grad_norm": 0.4246475398540497, "learning_rate": 1.999992387616216e-05, "loss": 0.5095, "step": 218 }, { "epoch": 0.004644652287332188, "grad_norm": 0.336139440536499, "learning_rate": 1.9999922569340712e-05, "loss": 0.5497, "step": 219 }, { "epoch": 0.0046658607452652115, "grad_norm": 0.4348197281360626, "learning_rate": 1.999992125139744e-05, "loss": 0.6072, "step": 220 }, { "epoch": 0.0046870692031982355, "grad_norm": 0.403519868850708, "learning_rate": 1.9999919922332337e-05, "loss": 0.6053, "step": 221 }, { "epoch": 0.004708277661131259, "grad_norm": 0.360597163438797, "learning_rate": 1.9999918582145413e-05, "loss": 0.5624, "step": 222 }, { "epoch": 0.0047294861190642825, "grad_norm": 0.435360312461853, "learning_rate": 1.9999917230836666e-05, "loss": 0.6311, "step": 223 }, { "epoch": 0.0047506945769973065, "grad_norm": 0.421346515417099, "learning_rate": 1.9999915868406098e-05, "loss": 0.4905, "step": 224 }, { "epoch": 0.00477190303493033, "grad_norm": 0.3708999454975128, "learning_rate": 1.999991449485371e-05, "loss": 0.617, "step": 225 }, { "epoch": 0.0047931114928633535, "grad_norm": 0.34154218435287476, "learning_rate": 1.99999131101795e-05, "loss": 0.5619, "step": 226 }, { "epoch": 0.0048143199507963775, "grad_norm": 0.3486165702342987, "learning_rate": 1.999991171438348e-05, "loss": 0.5298, "step": 227 }, { "epoch": 0.004835528408729401, "grad_norm": 0.4219681918621063, "learning_rate": 1.9999910307465644e-05, "loss": 0.6776, "step": 228 }, { "epoch": 0.004856736866662425, "grad_norm": 0.36917710304260254, "learning_rate": 1.999990888942599e-05, "loss": 0.6464, "step": 229 }, { "epoch": 0.0048779453245954485, "grad_norm": 0.3632355034351349, "learning_rate": 1.9999907460264528e-05, "loss": 0.5371, "step": 230 }, { "epoch": 0.004899153782528472, "grad_norm": 0.4465823471546173, "learning_rate": 1.9999906019981258e-05, "loss": 0.6081, "step": 231 }, { "epoch": 0.004920362240461496, "grad_norm": 0.34610915184020996, "learning_rate": 1.9999904568576174e-05, "loss": 0.5279, "step": 232 }, { "epoch": 0.0049415706983945195, "grad_norm": 0.5178088545799255, "learning_rate": 1.9999903106049287e-05, "loss": 0.5469, "step": 233 }, { "epoch": 0.004962779156327543, "grad_norm": 0.425580233335495, "learning_rate": 1.999990163240059e-05, "loss": 0.6052, "step": 234 }, { "epoch": 0.004983987614260567, "grad_norm": 0.3419185280799866, "learning_rate": 1.9999900147630092e-05, "loss": 0.5206, "step": 235 }, { "epoch": 0.0050051960721935905, "grad_norm": 0.3500162959098816, "learning_rate": 1.999989865173779e-05, "loss": 0.5424, "step": 236 }, { "epoch": 0.005026404530126614, "grad_norm": 0.43876421451568604, "learning_rate": 1.9999897144723693e-05, "loss": 0.681, "step": 237 }, { "epoch": 0.005047612988059638, "grad_norm": 0.4386579692363739, "learning_rate": 1.9999895626587796e-05, "loss": 0.4968, "step": 238 }, { "epoch": 0.0050688214459926615, "grad_norm": 0.45434918999671936, "learning_rate": 1.9999894097330098e-05, "loss": 0.5242, "step": 239 }, { "epoch": 0.005090029903925685, "grad_norm": 0.40281087160110474, "learning_rate": 1.9999892556950607e-05, "loss": 0.6039, "step": 240 }, { "epoch": 0.005111238361858709, "grad_norm": 0.39452987909317017, "learning_rate": 1.9999891005449322e-05, "loss": 0.5781, "step": 241 }, { "epoch": 0.005132446819791733, "grad_norm": 0.3485470116138458, "learning_rate": 1.9999889442826244e-05, "loss": 0.6109, "step": 242 }, { "epoch": 0.005153655277724756, "grad_norm": 0.3384239971637726, "learning_rate": 1.9999887869081377e-05, "loss": 0.6216, "step": 243 }, { "epoch": 0.00517486373565778, "grad_norm": 0.35351112484931946, "learning_rate": 1.9999886284214722e-05, "loss": 0.5533, "step": 244 }, { "epoch": 0.005196072193590804, "grad_norm": 0.36649996042251587, "learning_rate": 1.999988468822628e-05, "loss": 0.5728, "step": 245 }, { "epoch": 0.005217280651523827, "grad_norm": 0.35832351446151733, "learning_rate": 1.9999883081116054e-05, "loss": 0.4977, "step": 246 }, { "epoch": 0.005238489109456851, "grad_norm": 0.36467573046684265, "learning_rate": 1.9999881462884044e-05, "loss": 0.6308, "step": 247 }, { "epoch": 0.005259697567389875, "grad_norm": 0.3384789228439331, "learning_rate": 1.9999879833530253e-05, "loss": 0.5621, "step": 248 }, { "epoch": 0.0052809060253228984, "grad_norm": 0.358761191368103, "learning_rate": 1.999987819305468e-05, "loss": 0.5862, "step": 249 }, { "epoch": 0.005302114483255922, "grad_norm": 0.3740037679672241, "learning_rate": 1.9999876541457333e-05, "loss": 0.5789, "step": 250 }, { "epoch": 0.005323322941188946, "grad_norm": 0.37072908878326416, "learning_rate": 1.999987487873821e-05, "loss": 0.6244, "step": 251 }, { "epoch": 0.0053445313991219694, "grad_norm": 0.3884809613227844, "learning_rate": 1.999987320489731e-05, "loss": 0.6475, "step": 252 }, { "epoch": 0.005365739857054993, "grad_norm": 0.39136451482772827, "learning_rate": 1.9999871519934643e-05, "loss": 0.5811, "step": 253 }, { "epoch": 0.005386948314988017, "grad_norm": 0.3363403379917145, "learning_rate": 1.99998698238502e-05, "loss": 0.5808, "step": 254 }, { "epoch": 0.005408156772921041, "grad_norm": 0.365694522857666, "learning_rate": 1.999986811664399e-05, "loss": 0.5755, "step": 255 }, { "epoch": 0.005429365230854064, "grad_norm": 0.3385950028896332, "learning_rate": 1.9999866398316015e-05, "loss": 0.5026, "step": 256 }, { "epoch": 0.005450573688787088, "grad_norm": 0.4109686613082886, "learning_rate": 1.9999864668866275e-05, "loss": 0.5122, "step": 257 }, { "epoch": 0.005471782146720112, "grad_norm": 0.38498592376708984, "learning_rate": 1.999986292829477e-05, "loss": 0.5589, "step": 258 }, { "epoch": 0.005492990604653135, "grad_norm": 0.36216017603874207, "learning_rate": 1.999986117660151e-05, "loss": 0.6077, "step": 259 }, { "epoch": 0.005514199062586159, "grad_norm": 0.37129852175712585, "learning_rate": 1.9999859413786487e-05, "loss": 0.5616, "step": 260 }, { "epoch": 0.005535407520519183, "grad_norm": 0.40693211555480957, "learning_rate": 1.999985763984971e-05, "loss": 0.681, "step": 261 }, { "epoch": 0.005556615978452206, "grad_norm": 0.34610283374786377, "learning_rate": 1.9999855854791174e-05, "loss": 0.563, "step": 262 }, { "epoch": 0.00557782443638523, "grad_norm": 0.3836759626865387, "learning_rate": 1.999985405861089e-05, "loss": 0.5779, "step": 263 }, { "epoch": 0.005599032894318254, "grad_norm": 0.37091487646102905, "learning_rate": 1.999985225130885e-05, "loss": 0.631, "step": 264 }, { "epoch": 0.005620241352251277, "grad_norm": 0.3726845681667328, "learning_rate": 1.9999850432885063e-05, "loss": 0.5812, "step": 265 }, { "epoch": 0.005641449810184301, "grad_norm": 0.5140572190284729, "learning_rate": 1.999984860333953e-05, "loss": 0.5471, "step": 266 }, { "epoch": 0.005662658268117325, "grad_norm": 0.37064528465270996, "learning_rate": 1.9999846762672254e-05, "loss": 0.5179, "step": 267 }, { "epoch": 0.005683866726050349, "grad_norm": 0.3579655587673187, "learning_rate": 1.9999844910883234e-05, "loss": 0.5456, "step": 268 }, { "epoch": 0.005705075183983372, "grad_norm": 0.36915522813796997, "learning_rate": 1.9999843047972475e-05, "loss": 0.6323, "step": 269 }, { "epoch": 0.005726283641916396, "grad_norm": 0.36988428235054016, "learning_rate": 1.9999841173939974e-05, "loss": 0.6204, "step": 270 }, { "epoch": 0.00574749209984942, "grad_norm": 0.46595266461372375, "learning_rate": 1.9999839288785737e-05, "loss": 0.6972, "step": 271 }, { "epoch": 0.005768700557782443, "grad_norm": 0.39517438411712646, "learning_rate": 1.9999837392509768e-05, "loss": 0.5666, "step": 272 }, { "epoch": 0.005789909015715467, "grad_norm": 0.40325435996055603, "learning_rate": 1.9999835485112066e-05, "loss": 0.6702, "step": 273 }, { "epoch": 0.005811117473648491, "grad_norm": 0.36250609159469604, "learning_rate": 1.9999833566592635e-05, "loss": 0.5996, "step": 274 }, { "epoch": 0.005832325931581514, "grad_norm": 0.3746134042739868, "learning_rate": 1.9999831636951472e-05, "loss": 0.5692, "step": 275 }, { "epoch": 0.005853534389514538, "grad_norm": 0.4114323556423187, "learning_rate": 1.9999829696188587e-05, "loss": 0.607, "step": 276 }, { "epoch": 0.005874742847447562, "grad_norm": 0.34512653946876526, "learning_rate": 1.9999827744303973e-05, "loss": 0.5535, "step": 277 }, { "epoch": 0.005895951305380585, "grad_norm": 0.40095970034599304, "learning_rate": 1.9999825781297644e-05, "loss": 0.5439, "step": 278 }, { "epoch": 0.005917159763313609, "grad_norm": 0.38390910625457764, "learning_rate": 1.9999823807169592e-05, "loss": 0.5271, "step": 279 }, { "epoch": 0.005938368221246633, "grad_norm": 0.3875666856765747, "learning_rate": 1.999982182191982e-05, "loss": 0.5751, "step": 280 }, { "epoch": 0.005959576679179657, "grad_norm": 0.4039556682109833, "learning_rate": 1.9999819825548343e-05, "loss": 0.6107, "step": 281 }, { "epoch": 0.00598078513711268, "grad_norm": 0.369358092546463, "learning_rate": 1.9999817818055145e-05, "loss": 0.5436, "step": 282 }, { "epoch": 0.006001993595045704, "grad_norm": 0.33347269892692566, "learning_rate": 1.9999815799440235e-05, "loss": 0.529, "step": 283 }, { "epoch": 0.006023202052978728, "grad_norm": 0.44534584879875183, "learning_rate": 1.9999813769703623e-05, "loss": 0.5984, "step": 284 }, { "epoch": 0.006044410510911751, "grad_norm": 0.45138463377952576, "learning_rate": 1.9999811728845303e-05, "loss": 0.57, "step": 285 }, { "epoch": 0.006065618968844775, "grad_norm": 0.34743520617485046, "learning_rate": 1.9999809676865277e-05, "loss": 0.5036, "step": 286 }, { "epoch": 0.006086827426777799, "grad_norm": 0.58802330493927, "learning_rate": 1.999980761376355e-05, "loss": 0.608, "step": 287 }, { "epoch": 0.006108035884710822, "grad_norm": 0.373843789100647, "learning_rate": 1.9999805539540124e-05, "loss": 0.524, "step": 288 }, { "epoch": 0.006129244342643846, "grad_norm": 0.36434629559516907, "learning_rate": 1.9999803454195003e-05, "loss": 0.5561, "step": 289 }, { "epoch": 0.00615045280057687, "grad_norm": 0.3620583713054657, "learning_rate": 1.9999801357728188e-05, "loss": 0.5745, "step": 290 }, { "epoch": 0.006171661258509893, "grad_norm": 0.3556967079639435, "learning_rate": 1.999979925013968e-05, "loss": 0.5603, "step": 291 }, { "epoch": 0.006192869716442917, "grad_norm": 0.3384582996368408, "learning_rate": 1.999979713142948e-05, "loss": 0.545, "step": 292 }, { "epoch": 0.006214078174375941, "grad_norm": 0.31545376777648926, "learning_rate": 1.9999795001597593e-05, "loss": 0.515, "step": 293 }, { "epoch": 0.006235286632308965, "grad_norm": 0.5555678009986877, "learning_rate": 1.9999792860644024e-05, "loss": 0.6364, "step": 294 }, { "epoch": 0.006256495090241988, "grad_norm": 0.3661026954650879, "learning_rate": 1.9999790708568767e-05, "loss": 0.5632, "step": 295 }, { "epoch": 0.006277703548175012, "grad_norm": 0.36963093280792236, "learning_rate": 1.9999788545371835e-05, "loss": 0.6941, "step": 296 }, { "epoch": 0.006298912006108036, "grad_norm": 0.3533518612384796, "learning_rate": 1.999978637105322e-05, "loss": 0.4933, "step": 297 }, { "epoch": 0.006320120464041059, "grad_norm": 0.39026013016700745, "learning_rate": 1.9999784185612933e-05, "loss": 0.7325, "step": 298 }, { "epoch": 0.006341328921974083, "grad_norm": 0.3361560106277466, "learning_rate": 1.9999781989050973e-05, "loss": 0.5995, "step": 299 }, { "epoch": 0.006362537379907107, "grad_norm": 0.4177126884460449, "learning_rate": 1.9999779781367342e-05, "loss": 0.5848, "step": 300 }, { "epoch": 0.00638374583784013, "grad_norm": 0.363626092672348, "learning_rate": 1.999977756256204e-05, "loss": 0.5725, "step": 301 }, { "epoch": 0.006404954295773154, "grad_norm": 0.3136793076992035, "learning_rate": 1.9999775332635076e-05, "loss": 0.5724, "step": 302 }, { "epoch": 0.006426162753706178, "grad_norm": 0.3960459530353546, "learning_rate": 1.9999773091586444e-05, "loss": 0.6519, "step": 303 }, { "epoch": 0.006447371211639201, "grad_norm": 0.38129809498786926, "learning_rate": 1.999977083941616e-05, "loss": 0.6709, "step": 304 }, { "epoch": 0.006468579669572225, "grad_norm": 0.3790348172187805, "learning_rate": 1.999976857612421e-05, "loss": 0.5966, "step": 305 }, { "epoch": 0.006489788127505249, "grad_norm": 0.36675944924354553, "learning_rate": 1.9999766301710606e-05, "loss": 0.6369, "step": 306 }, { "epoch": 0.006510996585438273, "grad_norm": 0.35909128189086914, "learning_rate": 1.9999764016175347e-05, "loss": 0.6194, "step": 307 }, { "epoch": 0.006532205043371296, "grad_norm": 0.36890408396720886, "learning_rate": 1.9999761719518444e-05, "loss": 0.4803, "step": 308 }, { "epoch": 0.00655341350130432, "grad_norm": 0.34834960103034973, "learning_rate": 1.9999759411739886e-05, "loss": 0.5757, "step": 309 }, { "epoch": 0.006574621959237344, "grad_norm": 0.36303991079330444, "learning_rate": 1.9999757092839683e-05, "loss": 0.5147, "step": 310 }, { "epoch": 0.006595830417170367, "grad_norm": 0.4016430079936981, "learning_rate": 1.999975476281784e-05, "loss": 0.5919, "step": 311 }, { "epoch": 0.006617038875103391, "grad_norm": 0.35201895236968994, "learning_rate": 1.9999752421674356e-05, "loss": 0.5442, "step": 312 }, { "epoch": 0.006638247333036415, "grad_norm": 0.4396451711654663, "learning_rate": 1.999975006940923e-05, "loss": 0.6203, "step": 313 }, { "epoch": 0.006659455790969438, "grad_norm": 0.6032423973083496, "learning_rate": 1.9999747706022473e-05, "loss": 0.501, "step": 314 }, { "epoch": 0.006680664248902462, "grad_norm": 0.34556183218955994, "learning_rate": 1.9999745331514083e-05, "loss": 0.5771, "step": 315 }, { "epoch": 0.006701872706835486, "grad_norm": 0.3421584665775299, "learning_rate": 1.9999742945884066e-05, "loss": 0.5628, "step": 316 }, { "epoch": 0.006723081164768509, "grad_norm": 0.32758626341819763, "learning_rate": 1.999974054913242e-05, "loss": 0.5222, "step": 317 }, { "epoch": 0.006744289622701533, "grad_norm": 0.35171034932136536, "learning_rate": 1.9999738141259148e-05, "loss": 0.6056, "step": 318 }, { "epoch": 0.006765498080634557, "grad_norm": 0.3457407057285309, "learning_rate": 1.9999735722264252e-05, "loss": 0.6693, "step": 319 }, { "epoch": 0.006786706538567581, "grad_norm": 0.36685001850128174, "learning_rate": 1.999973329214774e-05, "loss": 0.637, "step": 320 }, { "epoch": 0.006807914996500604, "grad_norm": 0.3494229018688202, "learning_rate": 1.999973085090961e-05, "loss": 0.5059, "step": 321 }, { "epoch": 0.006829123454433628, "grad_norm": 0.46579819917678833, "learning_rate": 1.9999728398549867e-05, "loss": 0.5872, "step": 322 }, { "epoch": 0.006850331912366652, "grad_norm": 0.3804194927215576, "learning_rate": 1.9999725935068516e-05, "loss": 0.6053, "step": 323 }, { "epoch": 0.006871540370299675, "grad_norm": 0.3833121657371521, "learning_rate": 1.9999723460465552e-05, "loss": 0.6246, "step": 324 }, { "epoch": 0.006892748828232699, "grad_norm": 0.33845314383506775, "learning_rate": 1.9999720974740985e-05, "loss": 0.5495, "step": 325 }, { "epoch": 0.006913957286165723, "grad_norm": 0.34165501594543457, "learning_rate": 1.9999718477894813e-05, "loss": 0.6056, "step": 326 }, { "epoch": 0.006935165744098746, "grad_norm": 0.4044373333454132, "learning_rate": 1.9999715969927042e-05, "loss": 0.6169, "step": 327 }, { "epoch": 0.00695637420203177, "grad_norm": 0.3640027940273285, "learning_rate": 1.9999713450837674e-05, "loss": 0.5596, "step": 328 }, { "epoch": 0.006977582659964794, "grad_norm": 0.37424954771995544, "learning_rate": 1.9999710920626712e-05, "loss": 0.5912, "step": 329 }, { "epoch": 0.006998791117897817, "grad_norm": 0.5320624113082886, "learning_rate": 1.9999708379294157e-05, "loss": 0.5084, "step": 330 }, { "epoch": 0.007019999575830841, "grad_norm": 0.3432006537914276, "learning_rate": 1.9999705826840015e-05, "loss": 0.5932, "step": 331 }, { "epoch": 0.007041208033763865, "grad_norm": 0.35778117179870605, "learning_rate": 1.9999703263264287e-05, "loss": 0.4765, "step": 332 }, { "epoch": 0.007062416491696889, "grad_norm": 0.4017725884914398, "learning_rate": 1.9999700688566972e-05, "loss": 0.5953, "step": 333 }, { "epoch": 0.007083624949629912, "grad_norm": 0.6153385639190674, "learning_rate": 1.999969810274808e-05, "loss": 0.4903, "step": 334 }, { "epoch": 0.007104833407562936, "grad_norm": 0.34993231296539307, "learning_rate": 1.9999695505807612e-05, "loss": 0.5973, "step": 335 }, { "epoch": 0.00712604186549596, "grad_norm": 0.36413174867630005, "learning_rate": 1.9999692897745568e-05, "loss": 0.5642, "step": 336 }, { "epoch": 0.007147250323428983, "grad_norm": 0.3915337920188904, "learning_rate": 1.999969027856195e-05, "loss": 0.5903, "step": 337 }, { "epoch": 0.007168458781362007, "grad_norm": 0.38530901074409485, "learning_rate": 1.9999687648256766e-05, "loss": 0.6914, "step": 338 }, { "epoch": 0.007189667239295031, "grad_norm": 0.35699528455734253, "learning_rate": 1.999968500683002e-05, "loss": 0.583, "step": 339 }, { "epoch": 0.007210875697228054, "grad_norm": 0.36984890699386597, "learning_rate": 1.9999682354281703e-05, "loss": 0.6797, "step": 340 }, { "epoch": 0.007232084155161078, "grad_norm": 0.38596245646476746, "learning_rate": 1.9999679690611834e-05, "loss": 0.6579, "step": 341 }, { "epoch": 0.007253292613094102, "grad_norm": 0.3807539939880371, "learning_rate": 1.99996770158204e-05, "loss": 0.6095, "step": 342 }, { "epoch": 0.007274501071027125, "grad_norm": 0.4179855287075043, "learning_rate": 1.999967432990742e-05, "loss": 0.5355, "step": 343 }, { "epoch": 0.007295709528960149, "grad_norm": 0.35009029507637024, "learning_rate": 1.9999671632872886e-05, "loss": 0.5415, "step": 344 }, { "epoch": 0.007316917986893173, "grad_norm": 0.37012872099876404, "learning_rate": 1.9999668924716806e-05, "loss": 0.5754, "step": 345 }, { "epoch": 0.007338126444826197, "grad_norm": 0.31254449486732483, "learning_rate": 1.9999666205439176e-05, "loss": 0.5974, "step": 346 }, { "epoch": 0.00735933490275922, "grad_norm": 0.3877851963043213, "learning_rate": 1.9999663475040007e-05, "loss": 0.6018, "step": 347 }, { "epoch": 0.007380543360692244, "grad_norm": 0.3584613800048828, "learning_rate": 1.99996607335193e-05, "loss": 0.5849, "step": 348 }, { "epoch": 0.007401751818625268, "grad_norm": 0.4091740846633911, "learning_rate": 1.9999657980877055e-05, "loss": 0.604, "step": 349 }, { "epoch": 0.007422960276558291, "grad_norm": 0.3568224310874939, "learning_rate": 1.999965521711328e-05, "loss": 0.566, "step": 350 }, { "epoch": 0.007444168734491315, "grad_norm": 0.34188970923423767, "learning_rate": 1.9999652442227973e-05, "loss": 0.5089, "step": 351 }, { "epoch": 0.007465377192424339, "grad_norm": 0.3327527940273285, "learning_rate": 1.999964965622114e-05, "loss": 0.5491, "step": 352 }, { "epoch": 0.007486585650357362, "grad_norm": 0.38803115487098694, "learning_rate": 1.9999646859092782e-05, "loss": 0.5929, "step": 353 }, { "epoch": 0.007507794108290386, "grad_norm": 0.39950886368751526, "learning_rate": 1.9999644050842906e-05, "loss": 0.5069, "step": 354 }, { "epoch": 0.00752900256622341, "grad_norm": 0.3817100524902344, "learning_rate": 1.9999641231471512e-05, "loss": 0.5497, "step": 355 }, { "epoch": 0.007550211024156433, "grad_norm": 2.091578483581543, "learning_rate": 1.9999638400978605e-05, "loss": 0.7013, "step": 356 }, { "epoch": 0.007571419482089457, "grad_norm": 0.4122573733329773, "learning_rate": 1.9999635559364187e-05, "loss": 0.6028, "step": 357 }, { "epoch": 0.007592627940022481, "grad_norm": 0.58858323097229, "learning_rate": 1.999963270662826e-05, "loss": 0.6381, "step": 358 }, { "epoch": 0.007613836397955505, "grad_norm": 0.38271525502204895, "learning_rate": 1.9999629842770827e-05, "loss": 0.6152, "step": 359 }, { "epoch": 0.007635044855888528, "grad_norm": 0.3736836016178131, "learning_rate": 1.9999626967791895e-05, "loss": 0.5791, "step": 360 }, { "epoch": 0.007656253313821552, "grad_norm": 0.4060533046722412, "learning_rate": 1.9999624081691466e-05, "loss": 0.5722, "step": 361 }, { "epoch": 0.007677461771754576, "grad_norm": 0.3362186849117279, "learning_rate": 1.9999621184469537e-05, "loss": 0.5847, "step": 362 }, { "epoch": 0.007698670229687599, "grad_norm": 0.37396904826164246, "learning_rate": 1.999961827612612e-05, "loss": 0.5557, "step": 363 }, { "epoch": 0.007719878687620623, "grad_norm": 0.3498044013977051, "learning_rate": 1.9999615356661212e-05, "loss": 0.5077, "step": 364 }, { "epoch": 0.007741087145553647, "grad_norm": 0.43882429599761963, "learning_rate": 1.999961242607482e-05, "loss": 0.5227, "step": 365 }, { "epoch": 0.00776229560348667, "grad_norm": 0.36424005031585693, "learning_rate": 1.999960948436695e-05, "loss": 0.6425, "step": 366 }, { "epoch": 0.007783504061419694, "grad_norm": 0.6374667882919312, "learning_rate": 1.9999606531537593e-05, "loss": 0.5869, "step": 367 }, { "epoch": 0.007804712519352718, "grad_norm": 0.34580910205841064, "learning_rate": 1.999960356758677e-05, "loss": 0.5564, "step": 368 }, { "epoch": 0.007825920977285742, "grad_norm": 0.3815109431743622, "learning_rate": 1.999960059251447e-05, "loss": 0.6382, "step": 369 }, { "epoch": 0.007847129435218765, "grad_norm": 0.4314599335193634, "learning_rate": 1.9999597606320698e-05, "loss": 0.6013, "step": 370 }, { "epoch": 0.007868337893151788, "grad_norm": 0.3629453480243683, "learning_rate": 1.9999594609005465e-05, "loss": 0.5539, "step": 371 }, { "epoch": 0.007889546351084813, "grad_norm": 0.3525996506214142, "learning_rate": 1.9999591600568765e-05, "loss": 0.562, "step": 372 }, { "epoch": 0.007910754809017836, "grad_norm": 0.3635641634464264, "learning_rate": 1.999958858101061e-05, "loss": 0.5528, "step": 373 }, { "epoch": 0.00793196326695086, "grad_norm": 0.3775603175163269, "learning_rate": 1.9999585550330997e-05, "loss": 0.6014, "step": 374 }, { "epoch": 0.007953171724883884, "grad_norm": 0.36140960454940796, "learning_rate": 1.9999582508529936e-05, "loss": 0.5825, "step": 375 }, { "epoch": 0.007974380182816907, "grad_norm": 0.36164113879203796, "learning_rate": 1.999957945560742e-05, "loss": 0.6496, "step": 376 }, { "epoch": 0.007995588640749932, "grad_norm": 0.4543960988521576, "learning_rate": 1.9999576391563465e-05, "loss": 0.5711, "step": 377 }, { "epoch": 0.008016797098682955, "grad_norm": 0.3922000825405121, "learning_rate": 1.9999573316398065e-05, "loss": 0.5104, "step": 378 }, { "epoch": 0.008038005556615978, "grad_norm": 0.3175519108772278, "learning_rate": 1.9999570230111224e-05, "loss": 0.5753, "step": 379 }, { "epoch": 0.008059214014549003, "grad_norm": 0.33051201701164246, "learning_rate": 1.999956713270295e-05, "loss": 0.5829, "step": 380 }, { "epoch": 0.008080422472482026, "grad_norm": 0.33989495038986206, "learning_rate": 1.9999564024173245e-05, "loss": 0.5267, "step": 381 }, { "epoch": 0.008101630930415049, "grad_norm": 0.3296050727367401, "learning_rate": 1.999956090452211e-05, "loss": 0.5974, "step": 382 }, { "epoch": 0.008122839388348074, "grad_norm": 0.3552073538303375, "learning_rate": 1.9999557773749554e-05, "loss": 0.6684, "step": 383 }, { "epoch": 0.008144047846281097, "grad_norm": 0.3713781237602234, "learning_rate": 1.9999554631855575e-05, "loss": 0.6075, "step": 384 }, { "epoch": 0.00816525630421412, "grad_norm": 0.35817235708236694, "learning_rate": 1.9999551478840177e-05, "loss": 0.509, "step": 385 }, { "epoch": 0.008186464762147145, "grad_norm": 0.32835203409194946, "learning_rate": 1.9999548314703364e-05, "loss": 0.6316, "step": 386 }, { "epoch": 0.008207673220080168, "grad_norm": 0.369302898645401, "learning_rate": 1.9999545139445144e-05, "loss": 0.5114, "step": 387 }, { "epoch": 0.008228881678013191, "grad_norm": 0.4257858097553253, "learning_rate": 1.9999541953065513e-05, "loss": 0.5973, "step": 388 }, { "epoch": 0.008250090135946216, "grad_norm": 0.3995320200920105, "learning_rate": 1.999953875556448e-05, "loss": 0.4878, "step": 389 }, { "epoch": 0.008271298593879239, "grad_norm": 0.35539016127586365, "learning_rate": 1.9999535546942045e-05, "loss": 0.5437, "step": 390 }, { "epoch": 0.008292507051812262, "grad_norm": 0.34764471650123596, "learning_rate": 1.9999532327198214e-05, "loss": 0.5552, "step": 391 }, { "epoch": 0.008313715509745287, "grad_norm": 0.39997172355651855, "learning_rate": 1.9999529096332993e-05, "loss": 0.628, "step": 392 }, { "epoch": 0.00833492396767831, "grad_norm": 0.3348863422870636, "learning_rate": 1.999952585434638e-05, "loss": 0.5431, "step": 393 }, { "epoch": 0.008356132425611333, "grad_norm": 0.3589748442173004, "learning_rate": 1.9999522601238383e-05, "loss": 0.6102, "step": 394 }, { "epoch": 0.008377340883544358, "grad_norm": 0.513405442237854, "learning_rate": 1.9999519337009e-05, "loss": 0.6309, "step": 395 }, { "epoch": 0.008398549341477381, "grad_norm": 0.38893017172813416, "learning_rate": 1.9999516061658243e-05, "loss": 0.5977, "step": 396 }, { "epoch": 0.008419757799410404, "grad_norm": 0.3724134862422943, "learning_rate": 1.999951277518611e-05, "loss": 0.618, "step": 397 }, { "epoch": 0.008440966257343429, "grad_norm": 0.40982016921043396, "learning_rate": 1.9999509477592604e-05, "loss": 0.6426, "step": 398 }, { "epoch": 0.008462174715276452, "grad_norm": 1.2528408765792847, "learning_rate": 1.9999506168877733e-05, "loss": 0.6171, "step": 399 }, { "epoch": 0.008483383173209475, "grad_norm": 0.45700526237487793, "learning_rate": 1.9999502849041495e-05, "loss": 0.6819, "step": 400 }, { "epoch": 0.0085045916311425, "grad_norm": 0.45574039220809937, "learning_rate": 1.9999499518083895e-05, "loss": 0.5499, "step": 401 }, { "epoch": 0.008525800089075523, "grad_norm": 0.31446778774261475, "learning_rate": 1.9999496176004945e-05, "loss": 0.5054, "step": 402 }, { "epoch": 0.008547008547008548, "grad_norm": 0.3695220649242401, "learning_rate": 1.9999492822804636e-05, "loss": 0.4951, "step": 403 }, { "epoch": 0.008568217004941571, "grad_norm": 0.3591582179069519, "learning_rate": 1.999948945848298e-05, "loss": 0.6043, "step": 404 }, { "epoch": 0.008589425462874594, "grad_norm": 0.3924151360988617, "learning_rate": 1.9999486083039982e-05, "loss": 0.6838, "step": 405 }, { "epoch": 0.008610633920807619, "grad_norm": 0.31856220960617065, "learning_rate": 1.9999482696475637e-05, "loss": 0.557, "step": 406 }, { "epoch": 0.008631842378740642, "grad_norm": 0.3251158893108368, "learning_rate": 1.999947929878996e-05, "loss": 0.4975, "step": 407 }, { "epoch": 0.008653050836673665, "grad_norm": 0.3268466889858246, "learning_rate": 1.9999475889982944e-05, "loss": 0.538, "step": 408 }, { "epoch": 0.00867425929460669, "grad_norm": 0.3470994234085083, "learning_rate": 1.9999472470054602e-05, "loss": 0.5876, "step": 409 }, { "epoch": 0.008695467752539713, "grad_norm": 0.34029072523117065, "learning_rate": 1.999946903900493e-05, "loss": 0.5854, "step": 410 }, { "epoch": 0.008716676210472736, "grad_norm": 0.7140501737594604, "learning_rate": 1.9999465596833938e-05, "loss": 0.5263, "step": 411 }, { "epoch": 0.008737884668405761, "grad_norm": 0.3638480007648468, "learning_rate": 1.9999462143541625e-05, "loss": 0.7262, "step": 412 }, { "epoch": 0.008759093126338784, "grad_norm": 0.34296998381614685, "learning_rate": 1.9999458679127997e-05, "loss": 0.5477, "step": 413 }, { "epoch": 0.008780301584271807, "grad_norm": 0.34041011333465576, "learning_rate": 1.999945520359306e-05, "loss": 0.5667, "step": 414 }, { "epoch": 0.008801510042204832, "grad_norm": 0.38359254598617554, "learning_rate": 1.9999451716936815e-05, "loss": 0.5697, "step": 415 }, { "epoch": 0.008822718500137855, "grad_norm": 0.3342674970626831, "learning_rate": 1.9999448219159267e-05, "loss": 0.5959, "step": 416 }, { "epoch": 0.008843926958070878, "grad_norm": 0.3421357572078705, "learning_rate": 1.999944471026042e-05, "loss": 0.6475, "step": 417 }, { "epoch": 0.008865135416003903, "grad_norm": 0.34852898120880127, "learning_rate": 1.9999441190240275e-05, "loss": 0.6073, "step": 418 }, { "epoch": 0.008886343873936926, "grad_norm": 0.3438909351825714, "learning_rate": 1.9999437659098842e-05, "loss": 0.5756, "step": 419 }, { "epoch": 0.008907552331869949, "grad_norm": 0.4047551453113556, "learning_rate": 1.999943411683612e-05, "loss": 0.5977, "step": 420 }, { "epoch": 0.008928760789802974, "grad_norm": 0.35440000891685486, "learning_rate": 1.9999430563452113e-05, "loss": 0.6918, "step": 421 }, { "epoch": 0.008949969247735997, "grad_norm": 0.3896392583847046, "learning_rate": 1.9999426998946827e-05, "loss": 0.5569, "step": 422 }, { "epoch": 0.00897117770566902, "grad_norm": 0.3381519019603729, "learning_rate": 1.9999423423320263e-05, "loss": 0.6094, "step": 423 }, { "epoch": 0.008992386163602045, "grad_norm": 0.36632657051086426, "learning_rate": 1.999941983657243e-05, "loss": 0.5894, "step": 424 }, { "epoch": 0.009013594621535068, "grad_norm": 0.956028938293457, "learning_rate": 1.9999416238703328e-05, "loss": 0.5663, "step": 425 }, { "epoch": 0.009034803079468091, "grad_norm": 0.3639381229877472, "learning_rate": 1.9999412629712963e-05, "loss": 0.4702, "step": 426 }, { "epoch": 0.009056011537401116, "grad_norm": 0.3457621932029724, "learning_rate": 1.9999409009601337e-05, "loss": 0.6286, "step": 427 }, { "epoch": 0.009077219995334139, "grad_norm": 0.42247989773750305, "learning_rate": 1.9999405378368456e-05, "loss": 0.5645, "step": 428 }, { "epoch": 0.009098428453267164, "grad_norm": 0.3667770326137543, "learning_rate": 1.999940173601432e-05, "loss": 0.6142, "step": 429 }, { "epoch": 0.009119636911200187, "grad_norm": 0.35754260420799255, "learning_rate": 1.999939808253894e-05, "loss": 0.5432, "step": 430 }, { "epoch": 0.00914084536913321, "grad_norm": 0.3716523051261902, "learning_rate": 1.9999394417942316e-05, "loss": 0.5563, "step": 431 }, { "epoch": 0.009162053827066235, "grad_norm": 0.35152721405029297, "learning_rate": 1.999939074222445e-05, "loss": 0.5481, "step": 432 }, { "epoch": 0.009183262284999258, "grad_norm": 0.3486500680446625, "learning_rate": 1.999938705538535e-05, "loss": 0.5421, "step": 433 }, { "epoch": 0.009204470742932281, "grad_norm": 0.5393831133842468, "learning_rate": 1.999938335742502e-05, "loss": 0.5573, "step": 434 }, { "epoch": 0.009225679200865306, "grad_norm": 0.3936908543109894, "learning_rate": 1.9999379648343463e-05, "loss": 0.5621, "step": 435 }, { "epoch": 0.009246887658798329, "grad_norm": 0.3709598183631897, "learning_rate": 1.999937592814068e-05, "loss": 0.541, "step": 436 }, { "epoch": 0.009268096116731352, "grad_norm": 0.3513515293598175, "learning_rate": 1.999937219681668e-05, "loss": 0.5579, "step": 437 }, { "epoch": 0.009289304574664377, "grad_norm": 0.3252469599246979, "learning_rate": 1.9999368454371464e-05, "loss": 0.545, "step": 438 }, { "epoch": 0.0093105130325974, "grad_norm": 0.410153329372406, "learning_rate": 1.9999364700805037e-05, "loss": 0.6404, "step": 439 }, { "epoch": 0.009331721490530423, "grad_norm": 0.32812049984931946, "learning_rate": 1.9999360936117406e-05, "loss": 0.5043, "step": 440 }, { "epoch": 0.009352929948463448, "grad_norm": 0.35126006603240967, "learning_rate": 1.999935716030857e-05, "loss": 0.6073, "step": 441 }, { "epoch": 0.009374138406396471, "grad_norm": 0.35101068019866943, "learning_rate": 1.9999353373378535e-05, "loss": 0.5921, "step": 442 }, { "epoch": 0.009395346864329494, "grad_norm": 0.3495838940143585, "learning_rate": 1.999934957532731e-05, "loss": 0.5267, "step": 443 }, { "epoch": 0.009416555322262519, "grad_norm": 0.37457001209259033, "learning_rate": 1.999934576615489e-05, "loss": 0.6499, "step": 444 }, { "epoch": 0.009437763780195542, "grad_norm": 0.34123948216438293, "learning_rate": 1.9999341945861288e-05, "loss": 0.6133, "step": 445 }, { "epoch": 0.009458972238128565, "grad_norm": 0.38095951080322266, "learning_rate": 1.9999338114446505e-05, "loss": 0.5, "step": 446 }, { "epoch": 0.00948018069606159, "grad_norm": 0.34769919514656067, "learning_rate": 1.9999334271910546e-05, "loss": 0.6389, "step": 447 }, { "epoch": 0.009501389153994613, "grad_norm": 0.3405681252479553, "learning_rate": 1.999933041825341e-05, "loss": 0.5442, "step": 448 }, { "epoch": 0.009522597611927636, "grad_norm": 0.3728877902030945, "learning_rate": 1.999932655347511e-05, "loss": 0.5649, "step": 449 }, { "epoch": 0.00954380606986066, "grad_norm": 0.34099581837654114, "learning_rate": 1.999932267757564e-05, "loss": 0.5591, "step": 450 }, { "epoch": 0.009565014527793684, "grad_norm": 0.36134907603263855, "learning_rate": 1.9999318790555016e-05, "loss": 0.6004, "step": 451 }, { "epoch": 0.009586222985726707, "grad_norm": 0.4268510043621063, "learning_rate": 1.9999314892413234e-05, "loss": 0.5706, "step": 452 }, { "epoch": 0.009607431443659732, "grad_norm": 0.36186766624450684, "learning_rate": 1.9999310983150302e-05, "loss": 0.5807, "step": 453 }, { "epoch": 0.009628639901592755, "grad_norm": 0.40808233618736267, "learning_rate": 1.999930706276622e-05, "loss": 0.6013, "step": 454 }, { "epoch": 0.00964984835952578, "grad_norm": 0.36072519421577454, "learning_rate": 1.9999303131261e-05, "loss": 0.6185, "step": 455 }, { "epoch": 0.009671056817458803, "grad_norm": 0.3597070872783661, "learning_rate": 1.999929918863464e-05, "loss": 0.5703, "step": 456 }, { "epoch": 0.009692265275391826, "grad_norm": 0.36220377683639526, "learning_rate": 1.9999295234887145e-05, "loss": 0.5373, "step": 457 }, { "epoch": 0.00971347373332485, "grad_norm": 0.40686583518981934, "learning_rate": 1.9999291270018527e-05, "loss": 0.5499, "step": 458 }, { "epoch": 0.009734682191257874, "grad_norm": 0.3222808241844177, "learning_rate": 1.9999287294028776e-05, "loss": 0.5662, "step": 459 }, { "epoch": 0.009755890649190897, "grad_norm": 0.34298616647720337, "learning_rate": 1.9999283306917906e-05, "loss": 0.5408, "step": 460 }, { "epoch": 0.009777099107123922, "grad_norm": 0.3325575292110443, "learning_rate": 1.9999279308685924e-05, "loss": 0.5836, "step": 461 }, { "epoch": 0.009798307565056945, "grad_norm": 0.3583713173866272, "learning_rate": 1.999927529933283e-05, "loss": 0.5665, "step": 462 }, { "epoch": 0.009819516022989968, "grad_norm": 0.3725135922431946, "learning_rate": 1.9999271278858626e-05, "loss": 0.6495, "step": 463 }, { "epoch": 0.009840724480922993, "grad_norm": 0.3807242214679718, "learning_rate": 1.999926724726332e-05, "loss": 0.6531, "step": 464 }, { "epoch": 0.009861932938856016, "grad_norm": 0.3441523313522339, "learning_rate": 1.9999263204546916e-05, "loss": 0.5816, "step": 465 }, { "epoch": 0.009883141396789039, "grad_norm": 0.4009213447570801, "learning_rate": 1.999925915070942e-05, "loss": 0.5196, "step": 466 }, { "epoch": 0.009904349854722064, "grad_norm": 0.3465103507041931, "learning_rate": 1.9999255085750834e-05, "loss": 0.5789, "step": 467 }, { "epoch": 0.009925558312655087, "grad_norm": 0.35132041573524475, "learning_rate": 1.9999251009671157e-05, "loss": 0.6604, "step": 468 }, { "epoch": 0.00994676677058811, "grad_norm": 0.36808913946151733, "learning_rate": 1.999924692247041e-05, "loss": 0.5526, "step": 469 }, { "epoch": 0.009967975228521135, "grad_norm": 0.4012724757194519, "learning_rate": 1.999924282414858e-05, "loss": 0.5491, "step": 470 }, { "epoch": 0.009989183686454158, "grad_norm": 0.3620636463165283, "learning_rate": 1.9999238714705683e-05, "loss": 0.6205, "step": 471 }, { "epoch": 0.010010392144387181, "grad_norm": 0.3526698052883148, "learning_rate": 1.999923459414172e-05, "loss": 0.6113, "step": 472 }, { "epoch": 0.010031600602320206, "grad_norm": 0.37424296140670776, "learning_rate": 1.9999230462456694e-05, "loss": 0.6062, "step": 473 }, { "epoch": 0.010052809060253229, "grad_norm": 0.366551011800766, "learning_rate": 1.999922631965061e-05, "loss": 0.5925, "step": 474 }, { "epoch": 0.010074017518186252, "grad_norm": 0.33987194299697876, "learning_rate": 1.9999222165723473e-05, "loss": 0.5981, "step": 475 }, { "epoch": 0.010095225976119277, "grad_norm": 0.3265867531299591, "learning_rate": 1.9999218000675288e-05, "loss": 0.6076, "step": 476 }, { "epoch": 0.0101164344340523, "grad_norm": 0.3954448699951172, "learning_rate": 1.999921382450606e-05, "loss": 0.6357, "step": 477 }, { "epoch": 0.010137642891985323, "grad_norm": 0.310009628534317, "learning_rate": 1.9999209637215796e-05, "loss": 0.4833, "step": 478 }, { "epoch": 0.010158851349918348, "grad_norm": 0.3272819221019745, "learning_rate": 1.9999205438804494e-05, "loss": 0.5334, "step": 479 }, { "epoch": 0.01018005980785137, "grad_norm": 0.3951958119869232, "learning_rate": 1.9999201229272164e-05, "loss": 0.5292, "step": 480 }, { "epoch": 0.010201268265784396, "grad_norm": 0.36113256216049194, "learning_rate": 1.999919700861881e-05, "loss": 0.6017, "step": 481 }, { "epoch": 0.010222476723717419, "grad_norm": 0.33809125423431396, "learning_rate": 1.9999192776844438e-05, "loss": 0.4933, "step": 482 }, { "epoch": 0.010243685181650442, "grad_norm": 0.3866797387599945, "learning_rate": 1.9999188533949045e-05, "loss": 0.5861, "step": 483 }, { "epoch": 0.010264893639583467, "grad_norm": 0.3307022452354431, "learning_rate": 1.9999184279932645e-05, "loss": 0.4978, "step": 484 }, { "epoch": 0.01028610209751649, "grad_norm": 0.3872041702270508, "learning_rate": 1.9999180014795238e-05, "loss": 0.537, "step": 485 }, { "epoch": 0.010307310555449513, "grad_norm": 0.40252864360809326, "learning_rate": 1.999917573853683e-05, "loss": 0.6198, "step": 486 }, { "epoch": 0.010328519013382538, "grad_norm": 0.3285825252532959, "learning_rate": 1.9999171451157426e-05, "loss": 0.5955, "step": 487 }, { "epoch": 0.01034972747131556, "grad_norm": 0.2965048551559448, "learning_rate": 1.999916715265703e-05, "loss": 0.5677, "step": 488 }, { "epoch": 0.010370935929248584, "grad_norm": 0.3347901999950409, "learning_rate": 1.999916284303565e-05, "loss": 0.603, "step": 489 }, { "epoch": 0.010392144387181609, "grad_norm": 0.4232368469238281, "learning_rate": 1.9999158522293283e-05, "loss": 0.478, "step": 490 }, { "epoch": 0.010413352845114632, "grad_norm": 0.36070072650909424, "learning_rate": 1.999915419042994e-05, "loss": 0.6047, "step": 491 }, { "epoch": 0.010434561303047655, "grad_norm": 0.3251178562641144, "learning_rate": 1.999914984744563e-05, "loss": 0.6273, "step": 492 }, { "epoch": 0.01045576976098068, "grad_norm": 0.5224596858024597, "learning_rate": 1.9999145493340347e-05, "loss": 0.4735, "step": 493 }, { "epoch": 0.010476978218913703, "grad_norm": 0.31368595361709595, "learning_rate": 1.99991411281141e-05, "loss": 0.5848, "step": 494 }, { "epoch": 0.010498186676846726, "grad_norm": 0.32802727818489075, "learning_rate": 1.99991367517669e-05, "loss": 0.5244, "step": 495 }, { "epoch": 0.01051939513477975, "grad_norm": 0.35015055537223816, "learning_rate": 1.9999132364298744e-05, "loss": 0.6348, "step": 496 }, { "epoch": 0.010540603592712774, "grad_norm": 0.3178415596485138, "learning_rate": 1.999912796570964e-05, "loss": 0.5344, "step": 497 }, { "epoch": 0.010561812050645797, "grad_norm": 0.3282157778739929, "learning_rate": 1.9999123555999592e-05, "loss": 0.5183, "step": 498 }, { "epoch": 0.010583020508578822, "grad_norm": 0.3574131727218628, "learning_rate": 1.9999119135168608e-05, "loss": 0.5986, "step": 499 }, { "epoch": 0.010604228966511845, "grad_norm": 0.36175408959388733, "learning_rate": 1.9999114703216688e-05, "loss": 0.5476, "step": 500 }, { "epoch": 0.010625437424444868, "grad_norm": 0.4576069116592407, "learning_rate": 1.9999110260143842e-05, "loss": 0.5914, "step": 501 }, { "epoch": 0.010646645882377893, "grad_norm": 0.34866124391555786, "learning_rate": 1.999910580595007e-05, "loss": 0.5539, "step": 502 }, { "epoch": 0.010667854340310916, "grad_norm": 0.3344833552837372, "learning_rate": 1.999910134063538e-05, "loss": 0.5783, "step": 503 }, { "epoch": 0.010689062798243939, "grad_norm": 0.416098415851593, "learning_rate": 1.9999096864199776e-05, "loss": 0.5167, "step": 504 }, { "epoch": 0.010710271256176964, "grad_norm": 0.3720570206642151, "learning_rate": 1.9999092376643264e-05, "loss": 0.5223, "step": 505 }, { "epoch": 0.010731479714109987, "grad_norm": 0.34566062688827515, "learning_rate": 1.999908787796585e-05, "loss": 0.5487, "step": 506 }, { "epoch": 0.010752688172043012, "grad_norm": 0.48882293701171875, "learning_rate": 1.999908336816754e-05, "loss": 0.6146, "step": 507 }, { "epoch": 0.010773896629976035, "grad_norm": 0.3746944069862366, "learning_rate": 1.999907884724833e-05, "loss": 0.566, "step": 508 }, { "epoch": 0.010795105087909058, "grad_norm": 0.36075037717819214, "learning_rate": 1.9999074315208233e-05, "loss": 0.6569, "step": 509 }, { "epoch": 0.010816313545842083, "grad_norm": 0.32644686102867126, "learning_rate": 1.9999069772047253e-05, "loss": 0.6048, "step": 510 }, { "epoch": 0.010837522003775106, "grad_norm": 0.371557354927063, "learning_rate": 1.999906521776539e-05, "loss": 0.6178, "step": 511 }, { "epoch": 0.010858730461708129, "grad_norm": 0.41332754492759705, "learning_rate": 1.9999060652362662e-05, "loss": 0.5085, "step": 512 }, { "epoch": 0.010879938919641154, "grad_norm": 0.3284111022949219, "learning_rate": 1.999905607583906e-05, "loss": 0.6386, "step": 513 }, { "epoch": 0.010901147377574177, "grad_norm": 0.438096284866333, "learning_rate": 1.9999051488194597e-05, "loss": 0.5439, "step": 514 }, { "epoch": 0.0109223558355072, "grad_norm": 0.35007244348526, "learning_rate": 1.9999046889429276e-05, "loss": 0.5704, "step": 515 }, { "epoch": 0.010943564293440225, "grad_norm": 0.38938915729522705, "learning_rate": 1.9999042279543103e-05, "loss": 0.6453, "step": 516 }, { "epoch": 0.010964772751373248, "grad_norm": 0.44532525539398193, "learning_rate": 1.999903765853608e-05, "loss": 0.5519, "step": 517 }, { "epoch": 0.01098598120930627, "grad_norm": 0.4029428958892822, "learning_rate": 1.9999033026408214e-05, "loss": 0.6963, "step": 518 }, { "epoch": 0.011007189667239296, "grad_norm": 0.37926048040390015, "learning_rate": 1.9999028383159507e-05, "loss": 0.6519, "step": 519 }, { "epoch": 0.011028398125172319, "grad_norm": 0.38940340280532837, "learning_rate": 1.9999023728789973e-05, "loss": 0.5923, "step": 520 }, { "epoch": 0.011049606583105342, "grad_norm": 0.38112252950668335, "learning_rate": 1.999901906329961e-05, "loss": 0.6208, "step": 521 }, { "epoch": 0.011070815041038367, "grad_norm": 0.36523064970970154, "learning_rate": 1.999901438668842e-05, "loss": 0.5948, "step": 522 }, { "epoch": 0.01109202349897139, "grad_norm": 0.41746681928634644, "learning_rate": 1.999900969895642e-05, "loss": 0.6212, "step": 523 }, { "epoch": 0.011113231956904413, "grad_norm": 0.3303806483745575, "learning_rate": 1.9999005000103605e-05, "loss": 0.5832, "step": 524 }, { "epoch": 0.011134440414837438, "grad_norm": 0.35510024428367615, "learning_rate": 1.9999000290129987e-05, "loss": 0.6338, "step": 525 }, { "epoch": 0.01115564887277046, "grad_norm": 0.35266122221946716, "learning_rate": 1.9998995569035564e-05, "loss": 0.5094, "step": 526 }, { "epoch": 0.011176857330703484, "grad_norm": 0.32872605323791504, "learning_rate": 1.9998990836820348e-05, "loss": 0.5996, "step": 527 }, { "epoch": 0.011198065788636509, "grad_norm": 0.3373124599456787, "learning_rate": 1.9998986093484337e-05, "loss": 0.6222, "step": 528 }, { "epoch": 0.011219274246569532, "grad_norm": 0.34633180499076843, "learning_rate": 1.9998981339027543e-05, "loss": 0.5334, "step": 529 }, { "epoch": 0.011240482704502555, "grad_norm": 0.37099146842956543, "learning_rate": 1.999897657344997e-05, "loss": 0.5711, "step": 530 }, { "epoch": 0.01126169116243558, "grad_norm": 0.3737158179283142, "learning_rate": 1.999897179675162e-05, "loss": 0.6735, "step": 531 }, { "epoch": 0.011282899620368603, "grad_norm": 0.3399089574813843, "learning_rate": 1.99989670089325e-05, "loss": 0.6428, "step": 532 }, { "epoch": 0.011304108078301628, "grad_norm": 0.3330608904361725, "learning_rate": 1.999896220999262e-05, "loss": 0.506, "step": 533 }, { "epoch": 0.01132531653623465, "grad_norm": 0.32373669743537903, "learning_rate": 1.9998957399931978e-05, "loss": 0.5355, "step": 534 }, { "epoch": 0.011346524994167674, "grad_norm": 0.4016847610473633, "learning_rate": 1.9998952578750584e-05, "loss": 0.6205, "step": 535 }, { "epoch": 0.011367733452100699, "grad_norm": 0.3424358069896698, "learning_rate": 1.999894774644844e-05, "loss": 0.5359, "step": 536 }, { "epoch": 0.011388941910033722, "grad_norm": 0.322419136762619, "learning_rate": 1.9998942903025553e-05, "loss": 0.5036, "step": 537 }, { "epoch": 0.011410150367966745, "grad_norm": 0.3612476885318756, "learning_rate": 1.999893804848193e-05, "loss": 0.5404, "step": 538 }, { "epoch": 0.01143135882589977, "grad_norm": 0.3196505308151245, "learning_rate": 1.9998933182817574e-05, "loss": 0.5334, "step": 539 }, { "epoch": 0.011452567283832793, "grad_norm": 0.3599105477333069, "learning_rate": 1.999892830603249e-05, "loss": 0.6536, "step": 540 }, { "epoch": 0.011473775741765816, "grad_norm": 0.3642271161079407, "learning_rate": 1.9998923418126687e-05, "loss": 0.6178, "step": 541 }, { "epoch": 0.01149498419969884, "grad_norm": 0.5855377316474915, "learning_rate": 1.9998918519100166e-05, "loss": 0.5684, "step": 542 }, { "epoch": 0.011516192657631864, "grad_norm": 0.334405779838562, "learning_rate": 1.9998913608952936e-05, "loss": 0.614, "step": 543 }, { "epoch": 0.011537401115564887, "grad_norm": 0.3477785885334015, "learning_rate": 1.9998908687685e-05, "loss": 0.5313, "step": 544 }, { "epoch": 0.011558609573497912, "grad_norm": 0.4075677990913391, "learning_rate": 1.9998903755296366e-05, "loss": 0.4895, "step": 545 }, { "epoch": 0.011579818031430935, "grad_norm": 0.35828566551208496, "learning_rate": 1.999889881178704e-05, "loss": 0.565, "step": 546 }, { "epoch": 0.011601026489363958, "grad_norm": 0.34993496537208557, "learning_rate": 1.9998893857157024e-05, "loss": 0.4795, "step": 547 }, { "epoch": 0.011622234947296983, "grad_norm": 0.36797136068344116, "learning_rate": 1.9998888891406323e-05, "loss": 0.6127, "step": 548 }, { "epoch": 0.011643443405230006, "grad_norm": 0.39240869879722595, "learning_rate": 1.9998883914534947e-05, "loss": 0.6016, "step": 549 }, { "epoch": 0.011664651863163029, "grad_norm": 0.3677384555339813, "learning_rate": 1.99988789265429e-05, "loss": 0.5142, "step": 550 }, { "epoch": 0.011685860321096054, "grad_norm": 0.3595609962940216, "learning_rate": 1.9998873927430186e-05, "loss": 0.6267, "step": 551 }, { "epoch": 0.011707068779029077, "grad_norm": 0.41448676586151123, "learning_rate": 1.999886891719681e-05, "loss": 0.5824, "step": 552 }, { "epoch": 0.0117282772369621, "grad_norm": 0.36436960101127625, "learning_rate": 1.999886389584278e-05, "loss": 0.5987, "step": 553 }, { "epoch": 0.011749485694895125, "grad_norm": 0.33972251415252686, "learning_rate": 1.9998858863368097e-05, "loss": 0.6065, "step": 554 }, { "epoch": 0.011770694152828148, "grad_norm": 0.3853645920753479, "learning_rate": 1.9998853819772775e-05, "loss": 0.5333, "step": 555 }, { "epoch": 0.01179190261076117, "grad_norm": 0.34407246112823486, "learning_rate": 1.999884876505681e-05, "loss": 0.5534, "step": 556 }, { "epoch": 0.011813111068694196, "grad_norm": 0.33762067556381226, "learning_rate": 1.9998843699220216e-05, "loss": 0.533, "step": 557 }, { "epoch": 0.011834319526627219, "grad_norm": 0.3021688461303711, "learning_rate": 1.9998838622262997e-05, "loss": 0.4801, "step": 558 }, { "epoch": 0.011855527984560243, "grad_norm": 0.3558993339538574, "learning_rate": 1.9998833534185153e-05, "loss": 0.6011, "step": 559 }, { "epoch": 0.011876736442493267, "grad_norm": 0.4025760293006897, "learning_rate": 1.999882843498669e-05, "loss": 0.5746, "step": 560 }, { "epoch": 0.01189794490042629, "grad_norm": 0.3310703635215759, "learning_rate": 1.9998823324667622e-05, "loss": 0.5368, "step": 561 }, { "epoch": 0.011919153358359314, "grad_norm": 0.430838406085968, "learning_rate": 1.9998818203227946e-05, "loss": 0.6147, "step": 562 }, { "epoch": 0.011940361816292338, "grad_norm": 0.3879133462905884, "learning_rate": 1.9998813070667675e-05, "loss": 0.5687, "step": 563 }, { "epoch": 0.01196157027422536, "grad_norm": 0.35503891110420227, "learning_rate": 1.9998807926986808e-05, "loss": 0.5803, "step": 564 }, { "epoch": 0.011982778732158385, "grad_norm": 0.33709853887557983, "learning_rate": 1.9998802772185356e-05, "loss": 0.5102, "step": 565 }, { "epoch": 0.012003987190091409, "grad_norm": 0.33897116780281067, "learning_rate": 1.9998797606263318e-05, "loss": 0.4944, "step": 566 }, { "epoch": 0.012025195648024432, "grad_norm": 0.367014616727829, "learning_rate": 1.999879242922071e-05, "loss": 0.5209, "step": 567 }, { "epoch": 0.012046404105957456, "grad_norm": 0.3729635775089264, "learning_rate": 1.999878724105753e-05, "loss": 0.4977, "step": 568 }, { "epoch": 0.01206761256389048, "grad_norm": 0.34637901186943054, "learning_rate": 1.9998782041773784e-05, "loss": 0.617, "step": 569 }, { "epoch": 0.012088821021823503, "grad_norm": 0.31386232376098633, "learning_rate": 1.999877683136948e-05, "loss": 0.5052, "step": 570 }, { "epoch": 0.012110029479756527, "grad_norm": 0.35581737756729126, "learning_rate": 1.9998771609844626e-05, "loss": 0.5502, "step": 571 }, { "epoch": 0.01213123793768955, "grad_norm": 0.3113059997558594, "learning_rate": 1.999876637719922e-05, "loss": 0.5072, "step": 572 }, { "epoch": 0.012152446395622574, "grad_norm": 0.359837144613266, "learning_rate": 1.999876113343328e-05, "loss": 0.5518, "step": 573 }, { "epoch": 0.012173654853555598, "grad_norm": 0.3482804000377655, "learning_rate": 1.9998755878546797e-05, "loss": 0.6349, "step": 574 }, { "epoch": 0.012194863311488622, "grad_norm": 0.3107496500015259, "learning_rate": 1.999875061253979e-05, "loss": 0.5264, "step": 575 }, { "epoch": 0.012216071769421645, "grad_norm": 0.36274030804634094, "learning_rate": 1.9998745335412255e-05, "loss": 0.5424, "step": 576 }, { "epoch": 0.01223728022735467, "grad_norm": 0.37288615107536316, "learning_rate": 1.9998740047164206e-05, "loss": 0.566, "step": 577 }, { "epoch": 0.012258488685287693, "grad_norm": 0.32357916235923767, "learning_rate": 1.9998734747795643e-05, "loss": 0.4561, "step": 578 }, { "epoch": 0.012279697143220716, "grad_norm": 0.3652651309967041, "learning_rate": 1.9998729437306575e-05, "loss": 0.5383, "step": 579 }, { "epoch": 0.01230090560115374, "grad_norm": 0.4330403804779053, "learning_rate": 1.9998724115697007e-05, "loss": 0.5113, "step": 580 }, { "epoch": 0.012322114059086764, "grad_norm": 0.31801825761795044, "learning_rate": 1.9998718782966944e-05, "loss": 0.5966, "step": 581 }, { "epoch": 0.012343322517019787, "grad_norm": 0.3418216407299042, "learning_rate": 1.9998713439116396e-05, "loss": 0.5631, "step": 582 }, { "epoch": 0.012364530974952812, "grad_norm": 0.37147584557533264, "learning_rate": 1.999870808414536e-05, "loss": 0.5877, "step": 583 }, { "epoch": 0.012385739432885835, "grad_norm": 0.3397195041179657, "learning_rate": 1.9998702718053852e-05, "loss": 0.6613, "step": 584 }, { "epoch": 0.01240694789081886, "grad_norm": 0.43228039145469666, "learning_rate": 1.9998697340841874e-05, "loss": 0.5824, "step": 585 }, { "epoch": 0.012428156348751883, "grad_norm": 0.675093948841095, "learning_rate": 1.999869195250943e-05, "loss": 0.5775, "step": 586 }, { "epoch": 0.012449364806684906, "grad_norm": 0.3106462061405182, "learning_rate": 1.9998686553056526e-05, "loss": 0.4783, "step": 587 }, { "epoch": 0.01247057326461793, "grad_norm": 0.33859726786613464, "learning_rate": 1.9998681142483173e-05, "loss": 0.6094, "step": 588 }, { "epoch": 0.012491781722550954, "grad_norm": 0.5101841688156128, "learning_rate": 1.999867572078937e-05, "loss": 0.5779, "step": 589 }, { "epoch": 0.012512990180483977, "grad_norm": 0.3648899793624878, "learning_rate": 1.9998670287975132e-05, "loss": 0.5789, "step": 590 }, { "epoch": 0.012534198638417001, "grad_norm": 0.3338315486907959, "learning_rate": 1.9998664844040458e-05, "loss": 0.588, "step": 591 }, { "epoch": 0.012555407096350025, "grad_norm": 0.3580147325992584, "learning_rate": 1.9998659388985353e-05, "loss": 0.6258, "step": 592 }, { "epoch": 0.012576615554283048, "grad_norm": 0.3541702330112457, "learning_rate": 1.999865392280983e-05, "loss": 0.6151, "step": 593 }, { "epoch": 0.012597824012216072, "grad_norm": 0.3852824568748474, "learning_rate": 1.999864844551389e-05, "loss": 0.6123, "step": 594 }, { "epoch": 0.012619032470149096, "grad_norm": 0.33761513233184814, "learning_rate": 1.9998642957097537e-05, "loss": 0.5512, "step": 595 }, { "epoch": 0.012640240928082119, "grad_norm": 0.3570089638233185, "learning_rate": 1.9998637457560784e-05, "loss": 0.5874, "step": 596 }, { "epoch": 0.012661449386015143, "grad_norm": 0.35095396637916565, "learning_rate": 1.999863194690363e-05, "loss": 0.5586, "step": 597 }, { "epoch": 0.012682657843948167, "grad_norm": 0.35183754563331604, "learning_rate": 1.9998626425126083e-05, "loss": 0.643, "step": 598 }, { "epoch": 0.01270386630188119, "grad_norm": 0.3184170722961426, "learning_rate": 1.9998620892228155e-05, "loss": 0.5066, "step": 599 }, { "epoch": 0.012725074759814214, "grad_norm": 0.34846630692481995, "learning_rate": 1.9998615348209847e-05, "loss": 0.5858, "step": 600 }, { "epoch": 0.012746283217747238, "grad_norm": 0.3385169804096222, "learning_rate": 1.9998609793071165e-05, "loss": 0.616, "step": 601 }, { "epoch": 0.01276749167568026, "grad_norm": 0.3213094472885132, "learning_rate": 1.9998604226812116e-05, "loss": 0.5723, "step": 602 }, { "epoch": 0.012788700133613285, "grad_norm": 0.4227544963359833, "learning_rate": 1.9998598649432703e-05, "loss": 0.5268, "step": 603 }, { "epoch": 0.012809908591546309, "grad_norm": 0.3766266703605652, "learning_rate": 1.999859306093294e-05, "loss": 0.5238, "step": 604 }, { "epoch": 0.012831117049479332, "grad_norm": 0.3926542401313782, "learning_rate": 1.999858746131283e-05, "loss": 0.6207, "step": 605 }, { "epoch": 0.012852325507412356, "grad_norm": 0.3233852982521057, "learning_rate": 1.9998581850572373e-05, "loss": 0.4899, "step": 606 }, { "epoch": 0.01287353396534538, "grad_norm": 0.32334229350090027, "learning_rate": 1.999857622871158e-05, "loss": 0.4957, "step": 607 }, { "epoch": 0.012894742423278403, "grad_norm": 0.2904132008552551, "learning_rate": 1.9998570595730462e-05, "loss": 0.3776, "step": 608 }, { "epoch": 0.012915950881211427, "grad_norm": 0.3563202917575836, "learning_rate": 1.9998564951629018e-05, "loss": 0.5259, "step": 609 }, { "epoch": 0.01293715933914445, "grad_norm": 0.33854663372039795, "learning_rate": 1.9998559296407257e-05, "loss": 0.5578, "step": 610 }, { "epoch": 0.012958367797077475, "grad_norm": 0.34963980317115784, "learning_rate": 1.9998553630065183e-05, "loss": 0.5366, "step": 611 }, { "epoch": 0.012979576255010498, "grad_norm": 0.3404655456542969, "learning_rate": 1.999854795260281e-05, "loss": 0.5298, "step": 612 }, { "epoch": 0.013000784712943522, "grad_norm": 0.3436659574508667, "learning_rate": 1.9998542264020132e-05, "loss": 0.5721, "step": 613 }, { "epoch": 0.013021993170876546, "grad_norm": 0.38451316952705383, "learning_rate": 1.9998536564317168e-05, "loss": 0.5469, "step": 614 }, { "epoch": 0.01304320162880957, "grad_norm": 0.31835225224494934, "learning_rate": 1.9998530853493915e-05, "loss": 0.5071, "step": 615 }, { "epoch": 0.013064410086742593, "grad_norm": 0.3284345865249634, "learning_rate": 1.9998525131550383e-05, "loss": 0.5125, "step": 616 }, { "epoch": 0.013085618544675617, "grad_norm": 0.3773086965084076, "learning_rate": 1.9998519398486583e-05, "loss": 0.5515, "step": 617 }, { "epoch": 0.01310682700260864, "grad_norm": 0.3770412802696228, "learning_rate": 1.999851365430251e-05, "loss": 0.5197, "step": 618 }, { "epoch": 0.013128035460541664, "grad_norm": 0.34290459752082825, "learning_rate": 1.9998507898998182e-05, "loss": 0.5452, "step": 619 }, { "epoch": 0.013149243918474688, "grad_norm": 0.32768121361732483, "learning_rate": 1.99985021325736e-05, "loss": 0.506, "step": 620 }, { "epoch": 0.013170452376407711, "grad_norm": 0.3628899157047272, "learning_rate": 1.9998496355028765e-05, "loss": 0.4402, "step": 621 }, { "epoch": 0.013191660834340735, "grad_norm": 0.3434172570705414, "learning_rate": 1.9998490566363692e-05, "loss": 0.6302, "step": 622 }, { "epoch": 0.01321286929227376, "grad_norm": 0.3276948034763336, "learning_rate": 1.9998484766578388e-05, "loss": 0.5071, "step": 623 }, { "epoch": 0.013234077750206782, "grad_norm": 0.333522230386734, "learning_rate": 1.9998478955672853e-05, "loss": 0.6191, "step": 624 }, { "epoch": 0.013255286208139806, "grad_norm": 0.33930715918540955, "learning_rate": 1.99984731336471e-05, "loss": 0.5246, "step": 625 }, { "epoch": 0.01327649466607283, "grad_norm": 0.321646511554718, "learning_rate": 1.9998467300501128e-05, "loss": 0.5656, "step": 626 }, { "epoch": 0.013297703124005853, "grad_norm": 0.33148664236068726, "learning_rate": 1.999846145623495e-05, "loss": 0.5993, "step": 627 }, { "epoch": 0.013318911581938877, "grad_norm": 0.36029914021492004, "learning_rate": 1.999845560084857e-05, "loss": 0.6303, "step": 628 }, { "epoch": 0.013340120039871901, "grad_norm": 0.3458409309387207, "learning_rate": 1.9998449734341992e-05, "loss": 0.5685, "step": 629 }, { "epoch": 0.013361328497804924, "grad_norm": 0.3496597111225128, "learning_rate": 1.9998443856715228e-05, "loss": 0.5707, "step": 630 }, { "epoch": 0.013382536955737948, "grad_norm": 0.3610881268978119, "learning_rate": 1.999843796796828e-05, "loss": 0.6031, "step": 631 }, { "epoch": 0.013403745413670972, "grad_norm": 0.3272257447242737, "learning_rate": 1.9998432068101157e-05, "loss": 0.6143, "step": 632 }, { "epoch": 0.013424953871603995, "grad_norm": 0.3402248024940491, "learning_rate": 1.9998426157113864e-05, "loss": 0.5501, "step": 633 }, { "epoch": 0.013446162329537019, "grad_norm": 0.31754812598228455, "learning_rate": 1.9998420235006407e-05, "loss": 0.5683, "step": 634 }, { "epoch": 0.013467370787470043, "grad_norm": 0.5145188570022583, "learning_rate": 1.9998414301778798e-05, "loss": 0.5268, "step": 635 }, { "epoch": 0.013488579245403066, "grad_norm": 0.37430325150489807, "learning_rate": 1.9998408357431038e-05, "loss": 0.5642, "step": 636 }, { "epoch": 0.013509787703336091, "grad_norm": 0.35865288972854614, "learning_rate": 1.999840240196313e-05, "loss": 0.6095, "step": 637 }, { "epoch": 0.013530996161269114, "grad_norm": 0.3183467984199524, "learning_rate": 1.9998396435375088e-05, "loss": 0.5744, "step": 638 }, { "epoch": 0.013552204619202137, "grad_norm": 0.32156166434288025, "learning_rate": 1.9998390457666922e-05, "loss": 0.5429, "step": 639 }, { "epoch": 0.013573413077135162, "grad_norm": 0.4205792546272278, "learning_rate": 1.9998384468838626e-05, "loss": 0.5494, "step": 640 }, { "epoch": 0.013594621535068185, "grad_norm": 0.3734389543533325, "learning_rate": 1.9998378468890217e-05, "loss": 0.5744, "step": 641 }, { "epoch": 0.013615829993001208, "grad_norm": 0.3427380621433258, "learning_rate": 1.99983724578217e-05, "loss": 0.6163, "step": 642 }, { "epoch": 0.013637038450934233, "grad_norm": 0.3404308259487152, "learning_rate": 1.999836643563308e-05, "loss": 0.5767, "step": 643 }, { "epoch": 0.013658246908867256, "grad_norm": 0.3507136106491089, "learning_rate": 1.999836040232436e-05, "loss": 0.5577, "step": 644 }, { "epoch": 0.01367945536680028, "grad_norm": 0.3640138506889343, "learning_rate": 1.999835435789555e-05, "loss": 0.5912, "step": 645 }, { "epoch": 0.013700663824733304, "grad_norm": 0.34190577268600464, "learning_rate": 1.9998348302346658e-05, "loss": 0.5151, "step": 646 }, { "epoch": 0.013721872282666327, "grad_norm": 0.3031281530857086, "learning_rate": 1.999834223567769e-05, "loss": 0.4723, "step": 647 }, { "epoch": 0.01374308074059935, "grad_norm": 0.3468640446662903, "learning_rate": 1.9998336157888656e-05, "loss": 0.611, "step": 648 }, { "epoch": 0.013764289198532375, "grad_norm": 0.3380841016769409, "learning_rate": 1.9998330068979558e-05, "loss": 0.6024, "step": 649 }, { "epoch": 0.013785497656465398, "grad_norm": 0.36041519045829773, "learning_rate": 1.99983239689504e-05, "loss": 0.5975, "step": 650 }, { "epoch": 0.013806706114398421, "grad_norm": 0.3462445139884949, "learning_rate": 1.99983178578012e-05, "loss": 0.5932, "step": 651 }, { "epoch": 0.013827914572331446, "grad_norm": 0.3241754174232483, "learning_rate": 1.9998311735531954e-05, "loss": 0.5029, "step": 652 }, { "epoch": 0.01384912303026447, "grad_norm": 0.3153136074542999, "learning_rate": 1.9998305602142673e-05, "loss": 0.453, "step": 653 }, { "epoch": 0.013870331488197492, "grad_norm": 0.3593595325946808, "learning_rate": 1.999829945763336e-05, "loss": 0.6042, "step": 654 }, { "epoch": 0.013891539946130517, "grad_norm": 0.34498313069343567, "learning_rate": 1.999829330200403e-05, "loss": 0.5846, "step": 655 }, { "epoch": 0.01391274840406354, "grad_norm": 0.3574763834476471, "learning_rate": 1.999828713525468e-05, "loss": 0.5451, "step": 656 }, { "epoch": 0.013933956861996563, "grad_norm": 0.4185604751110077, "learning_rate": 1.9998280957385328e-05, "loss": 0.6426, "step": 657 }, { "epoch": 0.013955165319929588, "grad_norm": 0.3457118272781372, "learning_rate": 1.999827476839597e-05, "loss": 0.573, "step": 658 }, { "epoch": 0.013976373777862611, "grad_norm": 0.34809160232543945, "learning_rate": 1.9998268568286622e-05, "loss": 0.5167, "step": 659 }, { "epoch": 0.013997582235795634, "grad_norm": 0.33413127064704895, "learning_rate": 1.999826235705728e-05, "loss": 0.5134, "step": 660 }, { "epoch": 0.01401879069372866, "grad_norm": 0.3334694504737854, "learning_rate": 1.9998256134707964e-05, "loss": 0.5551, "step": 661 }, { "epoch": 0.014039999151661682, "grad_norm": 0.44241949915885925, "learning_rate": 1.9998249901238673e-05, "loss": 0.6138, "step": 662 }, { "epoch": 0.014061207609594707, "grad_norm": 0.3185892403125763, "learning_rate": 1.9998243656649413e-05, "loss": 0.5292, "step": 663 }, { "epoch": 0.01408241606752773, "grad_norm": 0.46379339694976807, "learning_rate": 1.9998237400940195e-05, "loss": 0.5871, "step": 664 }, { "epoch": 0.014103624525460753, "grad_norm": 0.32846593856811523, "learning_rate": 1.999823113411102e-05, "loss": 0.5287, "step": 665 }, { "epoch": 0.014124832983393778, "grad_norm": 0.4264650344848633, "learning_rate": 1.9998224856161906e-05, "loss": 0.5994, "step": 666 }, { "epoch": 0.014146041441326801, "grad_norm": 0.3576667308807373, "learning_rate": 1.9998218567092848e-05, "loss": 0.6142, "step": 667 }, { "epoch": 0.014167249899259824, "grad_norm": 0.32179734110832214, "learning_rate": 1.999821226690386e-05, "loss": 0.5625, "step": 668 }, { "epoch": 0.01418845835719285, "grad_norm": 0.3296438753604889, "learning_rate": 1.999820595559495e-05, "loss": 0.5814, "step": 669 }, { "epoch": 0.014209666815125872, "grad_norm": 0.39047709107398987, "learning_rate": 1.999819963316612e-05, "loss": 0.5601, "step": 670 }, { "epoch": 0.014230875273058895, "grad_norm": 0.33023884892463684, "learning_rate": 1.999819329961738e-05, "loss": 0.5236, "step": 671 }, { "epoch": 0.01425208373099192, "grad_norm": 0.35320723056793213, "learning_rate": 1.9998186954948732e-05, "loss": 0.4638, "step": 672 }, { "epoch": 0.014273292188924943, "grad_norm": 0.34587326645851135, "learning_rate": 1.999818059916019e-05, "loss": 0.5078, "step": 673 }, { "epoch": 0.014294500646857966, "grad_norm": 0.8021981120109558, "learning_rate": 1.999817423225176e-05, "loss": 0.5534, "step": 674 }, { "epoch": 0.014315709104790991, "grad_norm": 0.3784118592739105, "learning_rate": 1.9998167854223445e-05, "loss": 0.5764, "step": 675 }, { "epoch": 0.014336917562724014, "grad_norm": 0.35522010922431946, "learning_rate": 1.9998161465075255e-05, "loss": 0.6046, "step": 676 }, { "epoch": 0.014358126020657037, "grad_norm": 0.36122873425483704, "learning_rate": 1.99981550648072e-05, "loss": 0.5727, "step": 677 }, { "epoch": 0.014379334478590062, "grad_norm": 0.33751437067985535, "learning_rate": 1.999814865341928e-05, "loss": 0.4893, "step": 678 }, { "epoch": 0.014400542936523085, "grad_norm": 0.33720728754997253, "learning_rate": 1.9998142230911507e-05, "loss": 0.6145, "step": 679 }, { "epoch": 0.014421751394456108, "grad_norm": 0.3184647858142853, "learning_rate": 1.9998135797283887e-05, "loss": 0.5074, "step": 680 }, { "epoch": 0.014442959852389133, "grad_norm": 0.33200785517692566, "learning_rate": 1.9998129352536427e-05, "loss": 0.5657, "step": 681 }, { "epoch": 0.014464168310322156, "grad_norm": 0.336811900138855, "learning_rate": 1.9998122896669137e-05, "loss": 0.5882, "step": 682 }, { "epoch": 0.01448537676825518, "grad_norm": 0.32690274715423584, "learning_rate": 1.9998116429682018e-05, "loss": 0.5513, "step": 683 }, { "epoch": 0.014506585226188204, "grad_norm": 0.3305058181285858, "learning_rate": 1.999810995157508e-05, "loss": 0.5966, "step": 684 }, { "epoch": 0.014527793684121227, "grad_norm": 0.33907029032707214, "learning_rate": 1.9998103462348336e-05, "loss": 0.5787, "step": 685 }, { "epoch": 0.01454900214205425, "grad_norm": 0.3362204134464264, "learning_rate": 1.9998096962001786e-05, "loss": 0.591, "step": 686 }, { "epoch": 0.014570210599987275, "grad_norm": 0.42700183391571045, "learning_rate": 1.9998090450535436e-05, "loss": 0.4984, "step": 687 }, { "epoch": 0.014591419057920298, "grad_norm": 0.3360089063644409, "learning_rate": 1.9998083927949298e-05, "loss": 0.5553, "step": 688 }, { "epoch": 0.014612627515853323, "grad_norm": 0.3231200575828552, "learning_rate": 1.999807739424338e-05, "loss": 0.5375, "step": 689 }, { "epoch": 0.014633835973786346, "grad_norm": 0.3623434603214264, "learning_rate": 1.999807084941769e-05, "loss": 0.5916, "step": 690 }, { "epoch": 0.01465504443171937, "grad_norm": 0.373426228761673, "learning_rate": 1.999806429347223e-05, "loss": 0.6306, "step": 691 }, { "epoch": 0.014676252889652394, "grad_norm": 0.422313928604126, "learning_rate": 1.9998057726407005e-05, "loss": 0.608, "step": 692 }, { "epoch": 0.014697461347585417, "grad_norm": 0.37926870584487915, "learning_rate": 1.999805114822203e-05, "loss": 0.5815, "step": 693 }, { "epoch": 0.01471866980551844, "grad_norm": 0.35707658529281616, "learning_rate": 1.999804455891731e-05, "loss": 0.5103, "step": 694 }, { "epoch": 0.014739878263451465, "grad_norm": 0.3406548798084259, "learning_rate": 1.9998037958492853e-05, "loss": 0.4415, "step": 695 }, { "epoch": 0.014761086721384488, "grad_norm": 0.3424825370311737, "learning_rate": 1.999803134694866e-05, "loss": 0.605, "step": 696 }, { "epoch": 0.014782295179317511, "grad_norm": 0.32042011618614197, "learning_rate": 1.999802472428475e-05, "loss": 0.6088, "step": 697 }, { "epoch": 0.014803503637250536, "grad_norm": 0.32249540090560913, "learning_rate": 1.999801809050112e-05, "loss": 0.5568, "step": 698 }, { "epoch": 0.01482471209518356, "grad_norm": 0.6385250687599182, "learning_rate": 1.9998011445597783e-05, "loss": 0.5743, "step": 699 }, { "epoch": 0.014845920553116582, "grad_norm": 0.3570406436920166, "learning_rate": 1.9998004789574743e-05, "loss": 0.6454, "step": 700 }, { "epoch": 0.014867129011049607, "grad_norm": 0.44259321689605713, "learning_rate": 1.999799812243201e-05, "loss": 0.5696, "step": 701 }, { "epoch": 0.01488833746898263, "grad_norm": 0.3333606421947479, "learning_rate": 1.999799144416959e-05, "loss": 0.4924, "step": 702 }, { "epoch": 0.014909545926915653, "grad_norm": 0.3328414857387543, "learning_rate": 1.9997984754787492e-05, "loss": 0.611, "step": 703 }, { "epoch": 0.014930754384848678, "grad_norm": 0.396778404712677, "learning_rate": 1.9997978054285722e-05, "loss": 0.5431, "step": 704 }, { "epoch": 0.014951962842781701, "grad_norm": 0.37940219044685364, "learning_rate": 1.9997971342664283e-05, "loss": 0.5866, "step": 705 }, { "epoch": 0.014973171300714724, "grad_norm": 0.3619183301925659, "learning_rate": 1.9997964619923193e-05, "loss": 0.5338, "step": 706 }, { "epoch": 0.01499437975864775, "grad_norm": 0.34752681851387024, "learning_rate": 1.999795788606245e-05, "loss": 0.5637, "step": 707 }, { "epoch": 0.015015588216580772, "grad_norm": 0.3322727084159851, "learning_rate": 1.9997951141082065e-05, "loss": 0.5991, "step": 708 }, { "epoch": 0.015036796674513795, "grad_norm": 0.35477158427238464, "learning_rate": 1.999794438498205e-05, "loss": 0.5962, "step": 709 }, { "epoch": 0.01505800513244682, "grad_norm": 0.36807993054389954, "learning_rate": 1.9997937617762404e-05, "loss": 0.5448, "step": 710 }, { "epoch": 0.015079213590379843, "grad_norm": 0.4113149344921112, "learning_rate": 1.9997930839423138e-05, "loss": 0.5603, "step": 711 }, { "epoch": 0.015100422048312866, "grad_norm": 0.3482217788696289, "learning_rate": 1.999792404996426e-05, "loss": 0.5886, "step": 712 }, { "epoch": 0.015121630506245891, "grad_norm": 0.4525514841079712, "learning_rate": 1.9997917249385782e-05, "loss": 0.6387, "step": 713 }, { "epoch": 0.015142838964178914, "grad_norm": 0.3290618658065796, "learning_rate": 1.9997910437687705e-05, "loss": 0.566, "step": 714 }, { "epoch": 0.015164047422111939, "grad_norm": 0.3367842435836792, "learning_rate": 1.999790361487004e-05, "loss": 0.5353, "step": 715 }, { "epoch": 0.015185255880044962, "grad_norm": 0.3418280780315399, "learning_rate": 1.999789678093279e-05, "loss": 0.5733, "step": 716 }, { "epoch": 0.015206464337977985, "grad_norm": 0.47879260778427124, "learning_rate": 1.999788993587597e-05, "loss": 0.5671, "step": 717 }, { "epoch": 0.01522767279591101, "grad_norm": 0.36657920479774475, "learning_rate": 1.999788307969958e-05, "loss": 0.4839, "step": 718 }, { "epoch": 0.015248881253844033, "grad_norm": 0.3026871383190155, "learning_rate": 1.9997876212403633e-05, "loss": 0.5697, "step": 719 }, { "epoch": 0.015270089711777056, "grad_norm": 0.3495355546474457, "learning_rate": 1.9997869333988136e-05, "loss": 0.5756, "step": 720 }, { "epoch": 0.015291298169710081, "grad_norm": 0.32776695489883423, "learning_rate": 1.9997862444453095e-05, "loss": 0.4882, "step": 721 }, { "epoch": 0.015312506627643104, "grad_norm": 0.37113282084465027, "learning_rate": 1.9997855543798517e-05, "loss": 0.4631, "step": 722 }, { "epoch": 0.015333715085576127, "grad_norm": 0.30981841683387756, "learning_rate": 1.9997848632024413e-05, "loss": 0.5141, "step": 723 }, { "epoch": 0.015354923543509152, "grad_norm": 0.3200303614139557, "learning_rate": 1.9997841709130785e-05, "loss": 0.5819, "step": 724 }, { "epoch": 0.015376132001442175, "grad_norm": 0.342508465051651, "learning_rate": 1.9997834775117648e-05, "loss": 0.5247, "step": 725 }, { "epoch": 0.015397340459375198, "grad_norm": 0.38575977087020874, "learning_rate": 1.9997827829985007e-05, "loss": 0.4915, "step": 726 }, { "epoch": 0.015418548917308223, "grad_norm": 0.6649923324584961, "learning_rate": 1.9997820873732864e-05, "loss": 0.5249, "step": 727 }, { "epoch": 0.015439757375241246, "grad_norm": 0.3485809564590454, "learning_rate": 1.9997813906361235e-05, "loss": 0.4968, "step": 728 }, { "epoch": 0.01546096583317427, "grad_norm": 0.3906687796115875, "learning_rate": 1.9997806927870123e-05, "loss": 0.5858, "step": 729 }, { "epoch": 0.015482174291107294, "grad_norm": 0.3073081076145172, "learning_rate": 1.9997799938259538e-05, "loss": 0.4536, "step": 730 }, { "epoch": 0.015503382749040317, "grad_norm": 0.3032621741294861, "learning_rate": 1.9997792937529485e-05, "loss": 0.5273, "step": 731 }, { "epoch": 0.01552459120697334, "grad_norm": 0.3729771673679352, "learning_rate": 1.9997785925679976e-05, "loss": 0.6267, "step": 732 }, { "epoch": 0.015545799664906365, "grad_norm": 0.3621523082256317, "learning_rate": 1.9997778902711015e-05, "loss": 0.5358, "step": 733 }, { "epoch": 0.015567008122839388, "grad_norm": 0.31975579261779785, "learning_rate": 1.999777186862261e-05, "loss": 0.5708, "step": 734 }, { "epoch": 0.015588216580772411, "grad_norm": 0.3349486291408539, "learning_rate": 1.999776482341477e-05, "loss": 0.5419, "step": 735 }, { "epoch": 0.015609425038705436, "grad_norm": 0.9803228378295898, "learning_rate": 1.9997757767087504e-05, "loss": 0.6035, "step": 736 }, { "epoch": 0.01563063349663846, "grad_norm": 0.3560437858104706, "learning_rate": 1.999775069964082e-05, "loss": 0.698, "step": 737 }, { "epoch": 0.015651841954571484, "grad_norm": 0.3401190936565399, "learning_rate": 1.9997743621074722e-05, "loss": 0.5876, "step": 738 }, { "epoch": 0.015673050412504507, "grad_norm": 0.3759598135948181, "learning_rate": 1.999773653138922e-05, "loss": 0.5479, "step": 739 }, { "epoch": 0.01569425887043753, "grad_norm": 0.327365905046463, "learning_rate": 1.9997729430584326e-05, "loss": 0.5214, "step": 740 }, { "epoch": 0.015715467328370553, "grad_norm": 0.32585325837135315, "learning_rate": 1.999772231866004e-05, "loss": 0.611, "step": 741 }, { "epoch": 0.015736675786303576, "grad_norm": 0.38608938455581665, "learning_rate": 1.9997715195616375e-05, "loss": 0.6126, "step": 742 }, { "epoch": 0.015757884244236603, "grad_norm": 0.36367857456207275, "learning_rate": 1.999770806145334e-05, "loss": 0.6664, "step": 743 }, { "epoch": 0.015779092702169626, "grad_norm": 0.33477529883384705, "learning_rate": 1.9997700916170938e-05, "loss": 0.5874, "step": 744 }, { "epoch": 0.01580030116010265, "grad_norm": 0.32063204050064087, "learning_rate": 1.9997693759769185e-05, "loss": 0.5196, "step": 745 }, { "epoch": 0.015821509618035672, "grad_norm": 0.44161736965179443, "learning_rate": 1.9997686592248078e-05, "loss": 0.5196, "step": 746 }, { "epoch": 0.015842718075968695, "grad_norm": 0.3269207775592804, "learning_rate": 1.9997679413607637e-05, "loss": 0.5799, "step": 747 }, { "epoch": 0.01586392653390172, "grad_norm": 0.3491232097148895, "learning_rate": 1.9997672223847857e-05, "loss": 0.6184, "step": 748 }, { "epoch": 0.015885134991834745, "grad_norm": 0.4198743999004364, "learning_rate": 1.9997665022968756e-05, "loss": 0.6647, "step": 749 }, { "epoch": 0.015906343449767768, "grad_norm": 0.33319196105003357, "learning_rate": 1.999765781097034e-05, "loss": 0.5162, "step": 750 }, { "epoch": 0.01592755190770079, "grad_norm": 0.38576817512512207, "learning_rate": 1.9997650587852612e-05, "loss": 0.6227, "step": 751 }, { "epoch": 0.015948760365633814, "grad_norm": 0.32010236382484436, "learning_rate": 1.999764335361559e-05, "loss": 0.5445, "step": 752 }, { "epoch": 0.015969968823566837, "grad_norm": 0.367937833070755, "learning_rate": 1.999763610825927e-05, "loss": 0.5898, "step": 753 }, { "epoch": 0.015991177281499864, "grad_norm": 0.3585968315601349, "learning_rate": 1.9997628851783667e-05, "loss": 0.5493, "step": 754 }, { "epoch": 0.016012385739432887, "grad_norm": 0.34597545862197876, "learning_rate": 1.9997621584188792e-05, "loss": 0.5301, "step": 755 }, { "epoch": 0.01603359419736591, "grad_norm": 0.3198436498641968, "learning_rate": 1.9997614305474648e-05, "loss": 0.5569, "step": 756 }, { "epoch": 0.016054802655298933, "grad_norm": 0.3749242424964905, "learning_rate": 1.999760701564124e-05, "loss": 0.5499, "step": 757 }, { "epoch": 0.016076011113231956, "grad_norm": 0.36731263995170593, "learning_rate": 1.9997599714688586e-05, "loss": 0.6044, "step": 758 }, { "epoch": 0.01609721957116498, "grad_norm": 0.36432361602783203, "learning_rate": 1.9997592402616686e-05, "loss": 0.5488, "step": 759 }, { "epoch": 0.016118428029098006, "grad_norm": 0.3443848788738251, "learning_rate": 1.9997585079425546e-05, "loss": 0.5085, "step": 760 }, { "epoch": 0.01613963648703103, "grad_norm": 0.34110739827156067, "learning_rate": 1.9997577745115185e-05, "loss": 0.5958, "step": 761 }, { "epoch": 0.016160844944964052, "grad_norm": 0.34585100412368774, "learning_rate": 1.9997570399685603e-05, "loss": 0.555, "step": 762 }, { "epoch": 0.016182053402897075, "grad_norm": 0.3514409065246582, "learning_rate": 1.9997563043136808e-05, "loss": 0.5197, "step": 763 }, { "epoch": 0.016203261860830098, "grad_norm": 0.3327063024044037, "learning_rate": 1.9997555675468812e-05, "loss": 0.4655, "step": 764 }, { "epoch": 0.01622447031876312, "grad_norm": 0.36146003007888794, "learning_rate": 1.9997548296681622e-05, "loss": 0.5453, "step": 765 }, { "epoch": 0.016245678776696148, "grad_norm": 0.43913477659225464, "learning_rate": 1.9997540906775247e-05, "loss": 0.6201, "step": 766 }, { "epoch": 0.01626688723462917, "grad_norm": 0.31593355536460876, "learning_rate": 1.999753350574969e-05, "loss": 0.5669, "step": 767 }, { "epoch": 0.016288095692562194, "grad_norm": 0.3896650969982147, "learning_rate": 1.9997526093604968e-05, "loss": 0.6411, "step": 768 }, { "epoch": 0.016309304150495217, "grad_norm": 0.3585692048072815, "learning_rate": 1.999751867034108e-05, "loss": 0.4947, "step": 769 }, { "epoch": 0.01633051260842824, "grad_norm": 0.3327234387397766, "learning_rate": 1.999751123595804e-05, "loss": 0.5824, "step": 770 }, { "epoch": 0.016351721066361263, "grad_norm": 0.3443603217601776, "learning_rate": 1.9997503790455854e-05, "loss": 0.5734, "step": 771 }, { "epoch": 0.01637292952429429, "grad_norm": 0.3548787832260132, "learning_rate": 1.9997496333834535e-05, "loss": 0.6068, "step": 772 }, { "epoch": 0.016394137982227313, "grad_norm": 0.3552882671356201, "learning_rate": 1.9997488866094082e-05, "loss": 0.4963, "step": 773 }, { "epoch": 0.016415346440160336, "grad_norm": 0.34954941272735596, "learning_rate": 1.999748138723451e-05, "loss": 0.6108, "step": 774 }, { "epoch": 0.01643655489809336, "grad_norm": 0.3431664705276489, "learning_rate": 1.9997473897255827e-05, "loss": 0.5152, "step": 775 }, { "epoch": 0.016457763356026382, "grad_norm": 0.36746975779533386, "learning_rate": 1.999746639615804e-05, "loss": 0.6017, "step": 776 }, { "epoch": 0.016478971813959405, "grad_norm": 0.34905555844306946, "learning_rate": 1.9997458883941162e-05, "loss": 0.6748, "step": 777 }, { "epoch": 0.016500180271892432, "grad_norm": 0.48690229654312134, "learning_rate": 1.999745136060519e-05, "loss": 0.5767, "step": 778 }, { "epoch": 0.016521388729825455, "grad_norm": 0.5530807971954346, "learning_rate": 1.9997443826150143e-05, "loss": 0.6356, "step": 779 }, { "epoch": 0.016542597187758478, "grad_norm": 0.36677059531211853, "learning_rate": 1.9997436280576026e-05, "loss": 0.5582, "step": 780 }, { "epoch": 0.0165638056456915, "grad_norm": 0.3481108546257019, "learning_rate": 1.9997428723882847e-05, "loss": 0.5907, "step": 781 }, { "epoch": 0.016585014103624524, "grad_norm": 0.3329372704029083, "learning_rate": 1.9997421156070615e-05, "loss": 0.4424, "step": 782 }, { "epoch": 0.01660622256155755, "grad_norm": 0.30792099237442017, "learning_rate": 1.9997413577139338e-05, "loss": 0.5335, "step": 783 }, { "epoch": 0.016627431019490574, "grad_norm": 0.33223703503608704, "learning_rate": 1.999740598708902e-05, "loss": 0.5343, "step": 784 }, { "epoch": 0.016648639477423597, "grad_norm": 0.39744052290916443, "learning_rate": 1.999739838591968e-05, "loss": 0.5225, "step": 785 }, { "epoch": 0.01666984793535662, "grad_norm": 0.3335467278957367, "learning_rate": 1.999739077363132e-05, "loss": 0.5272, "step": 786 }, { "epoch": 0.016691056393289643, "grad_norm": 0.35722705721855164, "learning_rate": 1.999738315022395e-05, "loss": 0.6017, "step": 787 }, { "epoch": 0.016712264851222666, "grad_norm": 0.40598562359809875, "learning_rate": 1.999737551569757e-05, "loss": 0.6395, "step": 788 }, { "epoch": 0.016733473309155693, "grad_norm": 0.31505659222602844, "learning_rate": 1.99973678700522e-05, "loss": 0.6015, "step": 789 }, { "epoch": 0.016754681767088716, "grad_norm": 0.33635276556015015, "learning_rate": 1.9997360213287846e-05, "loss": 0.5504, "step": 790 }, { "epoch": 0.01677589022502174, "grad_norm": 0.29893097281455994, "learning_rate": 1.9997352545404513e-05, "loss": 0.508, "step": 791 }, { "epoch": 0.016797098682954762, "grad_norm": 0.31433531641960144, "learning_rate": 1.9997344866402212e-05, "loss": 0.5448, "step": 792 }, { "epoch": 0.016818307140887785, "grad_norm": 0.3167221248149872, "learning_rate": 1.999733717628095e-05, "loss": 0.5047, "step": 793 }, { "epoch": 0.016839515598820808, "grad_norm": 0.35047677159309387, "learning_rate": 1.9997329475040737e-05, "loss": 0.5857, "step": 794 }, { "epoch": 0.016860724056753835, "grad_norm": 0.34294119477272034, "learning_rate": 1.999732176268158e-05, "loss": 0.5689, "step": 795 }, { "epoch": 0.016881932514686858, "grad_norm": 0.3437771201133728, "learning_rate": 1.999731403920349e-05, "loss": 0.601, "step": 796 }, { "epoch": 0.01690314097261988, "grad_norm": 0.3240935206413269, "learning_rate": 1.9997306304606474e-05, "loss": 0.5111, "step": 797 }, { "epoch": 0.016924349430552904, "grad_norm": 0.3773409128189087, "learning_rate": 1.999729855889054e-05, "loss": 0.6409, "step": 798 }, { "epoch": 0.016945557888485927, "grad_norm": 0.3465387225151062, "learning_rate": 1.9997290802055696e-05, "loss": 0.6043, "step": 799 }, { "epoch": 0.01696676634641895, "grad_norm": 0.35742121934890747, "learning_rate": 1.9997283034101956e-05, "loss": 0.5185, "step": 800 }, { "epoch": 0.016987974804351977, "grad_norm": 0.35225072503089905, "learning_rate": 1.9997275255029322e-05, "loss": 0.6381, "step": 801 }, { "epoch": 0.017009183262285, "grad_norm": 0.36140018701553345, "learning_rate": 1.9997267464837806e-05, "loss": 0.5695, "step": 802 }, { "epoch": 0.017030391720218023, "grad_norm": 0.3519362509250641, "learning_rate": 1.9997259663527417e-05, "loss": 0.5298, "step": 803 }, { "epoch": 0.017051600178151046, "grad_norm": 0.33007290959358215, "learning_rate": 1.999725185109816e-05, "loss": 0.5296, "step": 804 }, { "epoch": 0.01707280863608407, "grad_norm": 0.3652852773666382, "learning_rate": 1.9997244027550047e-05, "loss": 0.6281, "step": 805 }, { "epoch": 0.017094017094017096, "grad_norm": 0.35421091318130493, "learning_rate": 1.9997236192883087e-05, "loss": 0.4752, "step": 806 }, { "epoch": 0.01711522555195012, "grad_norm": 0.33429571986198425, "learning_rate": 1.999722834709729e-05, "loss": 0.5319, "step": 807 }, { "epoch": 0.017136434009883142, "grad_norm": 0.3712928891181946, "learning_rate": 1.9997220490192658e-05, "loss": 0.6244, "step": 808 }, { "epoch": 0.017157642467816165, "grad_norm": 0.3493068516254425, "learning_rate": 1.999721262216921e-05, "loss": 0.6249, "step": 809 }, { "epoch": 0.017178850925749188, "grad_norm": 0.45213401317596436, "learning_rate": 1.9997204743026944e-05, "loss": 0.4993, "step": 810 }, { "epoch": 0.01720005938368221, "grad_norm": 0.3590027093887329, "learning_rate": 1.9997196852765874e-05, "loss": 0.5771, "step": 811 }, { "epoch": 0.017221267841615238, "grad_norm": 0.37217220664024353, "learning_rate": 1.999718895138601e-05, "loss": 0.6574, "step": 812 }, { "epoch": 0.01724247629954826, "grad_norm": 0.3579828143119812, "learning_rate": 1.9997181038887357e-05, "loss": 0.6049, "step": 813 }, { "epoch": 0.017263684757481284, "grad_norm": 0.3535394072532654, "learning_rate": 1.9997173115269927e-05, "loss": 0.5757, "step": 814 }, { "epoch": 0.017284893215414307, "grad_norm": 0.3472994267940521, "learning_rate": 1.9997165180533732e-05, "loss": 0.6122, "step": 815 }, { "epoch": 0.01730610167334733, "grad_norm": 0.5974505543708801, "learning_rate": 1.9997157234678773e-05, "loss": 0.5044, "step": 816 }, { "epoch": 0.017327310131280353, "grad_norm": 0.3132387697696686, "learning_rate": 1.9997149277705064e-05, "loss": 0.4831, "step": 817 }, { "epoch": 0.01734851858921338, "grad_norm": 0.3711230754852295, "learning_rate": 1.999714130961261e-05, "loss": 0.598, "step": 818 }, { "epoch": 0.017369727047146403, "grad_norm": 0.37964752316474915, "learning_rate": 1.9997133330401422e-05, "loss": 0.5681, "step": 819 }, { "epoch": 0.017390935505079426, "grad_norm": 0.30904877185821533, "learning_rate": 1.999712534007151e-05, "loss": 0.474, "step": 820 }, { "epoch": 0.01741214396301245, "grad_norm": 0.3389516770839691, "learning_rate": 1.9997117338622883e-05, "loss": 0.5586, "step": 821 }, { "epoch": 0.017433352420945472, "grad_norm": 0.32502463459968567, "learning_rate": 1.999710932605555e-05, "loss": 0.5352, "step": 822 }, { "epoch": 0.017454560878878495, "grad_norm": 0.322028249502182, "learning_rate": 1.999710130236952e-05, "loss": 0.4875, "step": 823 }, { "epoch": 0.017475769336811522, "grad_norm": 0.488053560256958, "learning_rate": 1.99970932675648e-05, "loss": 0.607, "step": 824 }, { "epoch": 0.017496977794744545, "grad_norm": 0.35096630454063416, "learning_rate": 1.9997085221641398e-05, "loss": 0.5204, "step": 825 }, { "epoch": 0.017518186252677568, "grad_norm": 0.3346728980541229, "learning_rate": 1.9997077164599324e-05, "loss": 0.6277, "step": 826 }, { "epoch": 0.01753939471061059, "grad_norm": 0.37878143787384033, "learning_rate": 1.9997069096438588e-05, "loss": 0.4913, "step": 827 }, { "epoch": 0.017560603168543614, "grad_norm": 0.3090042769908905, "learning_rate": 1.99970610171592e-05, "loss": 0.5882, "step": 828 }, { "epoch": 0.017581811626476637, "grad_norm": 0.47952505946159363, "learning_rate": 1.9997052926761164e-05, "loss": 0.5049, "step": 829 }, { "epoch": 0.017603020084409664, "grad_norm": 0.3197227716445923, "learning_rate": 1.9997044825244497e-05, "loss": 0.5313, "step": 830 }, { "epoch": 0.017624228542342687, "grad_norm": 0.35027334094047546, "learning_rate": 1.99970367126092e-05, "loss": 0.6624, "step": 831 }, { "epoch": 0.01764543700027571, "grad_norm": 0.37239181995391846, "learning_rate": 1.999702858885529e-05, "loss": 0.564, "step": 832 }, { "epoch": 0.017666645458208733, "grad_norm": 0.3573537766933441, "learning_rate": 1.9997020453982768e-05, "loss": 0.5827, "step": 833 }, { "epoch": 0.017687853916141756, "grad_norm": 0.40177449584007263, "learning_rate": 1.9997012307991647e-05, "loss": 0.5954, "step": 834 }, { "epoch": 0.017709062374074783, "grad_norm": 1.10407555103302, "learning_rate": 1.999700415088194e-05, "loss": 0.5037, "step": 835 }, { "epoch": 0.017730270832007806, "grad_norm": 0.355918824672699, "learning_rate": 1.9996995982653647e-05, "loss": 0.53, "step": 836 }, { "epoch": 0.01775147928994083, "grad_norm": 0.34367987513542175, "learning_rate": 1.9996987803306785e-05, "loss": 0.5126, "step": 837 }, { "epoch": 0.017772687747873852, "grad_norm": 0.3516104817390442, "learning_rate": 1.999697961284136e-05, "loss": 0.6196, "step": 838 }, { "epoch": 0.017793896205806875, "grad_norm": 0.3439938724040985, "learning_rate": 1.999697141125738e-05, "loss": 0.5128, "step": 839 }, { "epoch": 0.017815104663739898, "grad_norm": 0.324179470539093, "learning_rate": 1.999696319855485e-05, "loss": 0.5649, "step": 840 }, { "epoch": 0.017836313121672925, "grad_norm": 0.3515211343765259, "learning_rate": 1.9996954974733792e-05, "loss": 0.5674, "step": 841 }, { "epoch": 0.017857521579605948, "grad_norm": 0.4709100127220154, "learning_rate": 1.9996946739794203e-05, "loss": 0.4052, "step": 842 }, { "epoch": 0.01787873003753897, "grad_norm": 0.3560628592967987, "learning_rate": 1.9996938493736097e-05, "loss": 0.6441, "step": 843 }, { "epoch": 0.017899938495471994, "grad_norm": 0.3235477805137634, "learning_rate": 1.9996930236559487e-05, "loss": 0.4955, "step": 844 }, { "epoch": 0.017921146953405017, "grad_norm": 0.32390928268432617, "learning_rate": 1.9996921968264374e-05, "loss": 0.528, "step": 845 }, { "epoch": 0.01794235541133804, "grad_norm": 0.3467140793800354, "learning_rate": 1.999691368885077e-05, "loss": 0.5386, "step": 846 }, { "epoch": 0.017963563869271067, "grad_norm": 0.34721630811691284, "learning_rate": 1.999690539831869e-05, "loss": 0.5502, "step": 847 }, { "epoch": 0.01798477232720409, "grad_norm": 0.36378762125968933, "learning_rate": 1.9996897096668137e-05, "loss": 0.5826, "step": 848 }, { "epoch": 0.018005980785137113, "grad_norm": 0.30168095231056213, "learning_rate": 1.9996888783899118e-05, "loss": 0.5447, "step": 849 }, { "epoch": 0.018027189243070136, "grad_norm": 0.3312585949897766, "learning_rate": 1.999688046001165e-05, "loss": 0.5606, "step": 850 }, { "epoch": 0.01804839770100316, "grad_norm": 0.3143177628517151, "learning_rate": 1.999687212500574e-05, "loss": 0.4983, "step": 851 }, { "epoch": 0.018069606158936182, "grad_norm": 0.36077967286109924, "learning_rate": 1.9996863778881395e-05, "loss": 0.5839, "step": 852 }, { "epoch": 0.01809081461686921, "grad_norm": 0.33496221899986267, "learning_rate": 1.9996855421638624e-05, "loss": 0.5608, "step": 853 }, { "epoch": 0.018112023074802232, "grad_norm": 0.34256476163864136, "learning_rate": 1.9996847053277434e-05, "loss": 0.5933, "step": 854 }, { "epoch": 0.018133231532735255, "grad_norm": 0.36488571763038635, "learning_rate": 1.9996838673797842e-05, "loss": 0.5863, "step": 855 }, { "epoch": 0.018154439990668278, "grad_norm": 0.38981306552886963, "learning_rate": 1.9996830283199852e-05, "loss": 0.4537, "step": 856 }, { "epoch": 0.0181756484486013, "grad_norm": 0.3380298316478729, "learning_rate": 1.9996821881483473e-05, "loss": 0.5146, "step": 857 }, { "epoch": 0.018196856906534328, "grad_norm": 0.31170162558555603, "learning_rate": 1.9996813468648714e-05, "loss": 0.5508, "step": 858 }, { "epoch": 0.01821806536446735, "grad_norm": 0.6266687512397766, "learning_rate": 1.999680504469559e-05, "loss": 0.5648, "step": 859 }, { "epoch": 0.018239273822400374, "grad_norm": 0.34234875440597534, "learning_rate": 1.9996796609624104e-05, "loss": 0.5655, "step": 860 }, { "epoch": 0.018260482280333397, "grad_norm": 0.32899829745292664, "learning_rate": 1.9996788163434267e-05, "loss": 0.5348, "step": 861 }, { "epoch": 0.01828169073826642, "grad_norm": 0.3457389175891876, "learning_rate": 1.999677970612609e-05, "loss": 0.5975, "step": 862 }, { "epoch": 0.018302899196199443, "grad_norm": 0.36175158619880676, "learning_rate": 1.9996771237699582e-05, "loss": 0.6229, "step": 863 }, { "epoch": 0.01832410765413247, "grad_norm": 0.32851436734199524, "learning_rate": 1.999676275815475e-05, "loss": 0.5715, "step": 864 }, { "epoch": 0.018345316112065493, "grad_norm": 0.42485398054122925, "learning_rate": 1.9996754267491608e-05, "loss": 0.6365, "step": 865 }, { "epoch": 0.018366524569998516, "grad_norm": 0.3533354699611664, "learning_rate": 1.9996745765710162e-05, "loss": 0.5627, "step": 866 }, { "epoch": 0.01838773302793154, "grad_norm": 0.3413473665714264, "learning_rate": 1.999673725281042e-05, "loss": 0.6181, "step": 867 }, { "epoch": 0.018408941485864562, "grad_norm": 0.3245302438735962, "learning_rate": 1.9996728728792397e-05, "loss": 0.5261, "step": 868 }, { "epoch": 0.018430149943797585, "grad_norm": 0.3427068591117859, "learning_rate": 1.9996720193656095e-05, "loss": 0.5788, "step": 869 }, { "epoch": 0.01845135840173061, "grad_norm": 0.3842363655567169, "learning_rate": 1.999671164740153e-05, "loss": 0.5895, "step": 870 }, { "epoch": 0.018472566859663635, "grad_norm": 0.3560446500778198, "learning_rate": 1.9996703090028712e-05, "loss": 0.5729, "step": 871 }, { "epoch": 0.018493775317596658, "grad_norm": 0.35361889004707336, "learning_rate": 1.9996694521537642e-05, "loss": 0.637, "step": 872 }, { "epoch": 0.01851498377552968, "grad_norm": 0.3544501066207886, "learning_rate": 1.999668594192834e-05, "loss": 0.5323, "step": 873 }, { "epoch": 0.018536192233462704, "grad_norm": 0.32351312041282654, "learning_rate": 1.9996677351200807e-05, "loss": 0.5208, "step": 874 }, { "epoch": 0.018557400691395727, "grad_norm": 0.37609270215034485, "learning_rate": 1.999666874935506e-05, "loss": 0.5691, "step": 875 }, { "epoch": 0.018578609149328754, "grad_norm": 0.34979861974716187, "learning_rate": 1.99966601363911e-05, "loss": 0.5833, "step": 876 }, { "epoch": 0.018599817607261777, "grad_norm": 0.3080311119556427, "learning_rate": 1.9996651512308946e-05, "loss": 0.5277, "step": 877 }, { "epoch": 0.0186210260651948, "grad_norm": 0.3288978636264801, "learning_rate": 1.99966428771086e-05, "loss": 0.5402, "step": 878 }, { "epoch": 0.018642234523127823, "grad_norm": 0.3136023283004761, "learning_rate": 1.9996634230790075e-05, "loss": 0.5067, "step": 879 }, { "epoch": 0.018663442981060846, "grad_norm": 0.39174824953079224, "learning_rate": 1.9996625573353383e-05, "loss": 0.5019, "step": 880 }, { "epoch": 0.01868465143899387, "grad_norm": 0.2984659671783447, "learning_rate": 1.9996616904798526e-05, "loss": 0.4868, "step": 881 }, { "epoch": 0.018705859896926896, "grad_norm": 0.788544237613678, "learning_rate": 1.9996608225125524e-05, "loss": 0.5318, "step": 882 }, { "epoch": 0.01872706835485992, "grad_norm": 0.35654526948928833, "learning_rate": 1.999659953433438e-05, "loss": 0.599, "step": 883 }, { "epoch": 0.018748276812792942, "grad_norm": 0.32866370677948, "learning_rate": 1.9996590832425102e-05, "loss": 0.5999, "step": 884 }, { "epoch": 0.018769485270725965, "grad_norm": 0.3210759162902832, "learning_rate": 1.9996582119397705e-05, "loss": 0.5949, "step": 885 }, { "epoch": 0.018790693728658988, "grad_norm": 0.3324083685874939, "learning_rate": 1.9996573395252194e-05, "loss": 0.5756, "step": 886 }, { "epoch": 0.018811902186592015, "grad_norm": 0.3459484875202179, "learning_rate": 1.9996564659988583e-05, "loss": 0.5089, "step": 887 }, { "epoch": 0.018833110644525038, "grad_norm": 0.35659411549568176, "learning_rate": 1.999655591360688e-05, "loss": 0.6099, "step": 888 }, { "epoch": 0.01885431910245806, "grad_norm": 0.4657941460609436, "learning_rate": 1.999654715610709e-05, "loss": 0.5657, "step": 889 }, { "epoch": 0.018875527560391084, "grad_norm": 0.38481417298316956, "learning_rate": 1.9996538387489233e-05, "loss": 0.6138, "step": 890 }, { "epoch": 0.018896736018324107, "grad_norm": 0.4256061315536499, "learning_rate": 1.999652960775331e-05, "loss": 0.4956, "step": 891 }, { "epoch": 0.01891794447625713, "grad_norm": 0.3264429271221161, "learning_rate": 1.9996520816899336e-05, "loss": 0.6116, "step": 892 }, { "epoch": 0.018939152934190157, "grad_norm": 0.31666097044944763, "learning_rate": 1.9996512014927313e-05, "loss": 0.519, "step": 893 }, { "epoch": 0.01896036139212318, "grad_norm": 0.32996129989624023, "learning_rate": 1.9996503201837257e-05, "loss": 0.5641, "step": 894 }, { "epoch": 0.018981569850056203, "grad_norm": 0.3515917658805847, "learning_rate": 1.9996494377629183e-05, "loss": 0.5436, "step": 895 }, { "epoch": 0.019002778307989226, "grad_norm": 0.3129362165927887, "learning_rate": 1.999648554230309e-05, "loss": 0.525, "step": 896 }, { "epoch": 0.01902398676592225, "grad_norm": 0.3298971652984619, "learning_rate": 1.9996476695858994e-05, "loss": 0.5524, "step": 897 }, { "epoch": 0.019045195223855272, "grad_norm": 0.7074176669120789, "learning_rate": 1.9996467838296904e-05, "loss": 0.6119, "step": 898 }, { "epoch": 0.0190664036817883, "grad_norm": 0.3172169029712677, "learning_rate": 1.999645896961683e-05, "loss": 0.5582, "step": 899 }, { "epoch": 0.01908761213972132, "grad_norm": 0.3144143521785736, "learning_rate": 1.9996450089818778e-05, "loss": 0.4967, "step": 900 }, { "epoch": 0.019108820597654345, "grad_norm": 0.38509634137153625, "learning_rate": 1.999644119890276e-05, "loss": 0.599, "step": 901 }, { "epoch": 0.019130029055587368, "grad_norm": 0.34590667486190796, "learning_rate": 1.999643229686879e-05, "loss": 0.4965, "step": 902 }, { "epoch": 0.01915123751352039, "grad_norm": 0.4046694338321686, "learning_rate": 1.9996423383716876e-05, "loss": 0.529, "step": 903 }, { "epoch": 0.019172445971453414, "grad_norm": 0.34399011731147766, "learning_rate": 1.9996414459447024e-05, "loss": 0.5196, "step": 904 }, { "epoch": 0.01919365442938644, "grad_norm": 0.29578667879104614, "learning_rate": 1.999640552405925e-05, "loss": 0.4434, "step": 905 }, { "epoch": 0.019214862887319464, "grad_norm": 0.37392231822013855, "learning_rate": 1.999639657755356e-05, "loss": 0.5598, "step": 906 }, { "epoch": 0.019236071345252487, "grad_norm": 0.3375287652015686, "learning_rate": 1.999638761992996e-05, "loss": 0.4955, "step": 907 }, { "epoch": 0.01925727980318551, "grad_norm": 0.3853868544101715, "learning_rate": 1.9996378651188468e-05, "loss": 0.5647, "step": 908 }, { "epoch": 0.019278488261118533, "grad_norm": 0.40606868267059326, "learning_rate": 1.999636967132909e-05, "loss": 0.4752, "step": 909 }, { "epoch": 0.01929969671905156, "grad_norm": 0.3529070019721985, "learning_rate": 1.9996360680351836e-05, "loss": 0.5551, "step": 910 }, { "epoch": 0.019320905176984583, "grad_norm": 0.39903607964515686, "learning_rate": 1.999635167825672e-05, "loss": 0.5376, "step": 911 }, { "epoch": 0.019342113634917606, "grad_norm": 0.34779009222984314, "learning_rate": 1.9996342665043743e-05, "loss": 0.6265, "step": 912 }, { "epoch": 0.01936332209285063, "grad_norm": 0.335847944021225, "learning_rate": 1.9996333640712923e-05, "loss": 0.5372, "step": 913 }, { "epoch": 0.019384530550783652, "grad_norm": 0.3738034665584564, "learning_rate": 1.9996324605264268e-05, "loss": 0.6163, "step": 914 }, { "epoch": 0.019405739008716675, "grad_norm": 0.3526040315628052, "learning_rate": 1.9996315558697787e-05, "loss": 0.524, "step": 915 }, { "epoch": 0.0194269474666497, "grad_norm": 0.4234098196029663, "learning_rate": 1.999630650101349e-05, "loss": 0.5888, "step": 916 }, { "epoch": 0.019448155924582725, "grad_norm": 0.32716700434684753, "learning_rate": 1.999629743221139e-05, "loss": 0.5121, "step": 917 }, { "epoch": 0.019469364382515748, "grad_norm": 0.3652024269104004, "learning_rate": 1.9996288352291497e-05, "loss": 0.5888, "step": 918 }, { "epoch": 0.01949057284044877, "grad_norm": 0.3240652084350586, "learning_rate": 1.9996279261253814e-05, "loss": 0.5143, "step": 919 }, { "epoch": 0.019511781298381794, "grad_norm": 0.34563925862312317, "learning_rate": 1.9996270159098356e-05, "loss": 0.6013, "step": 920 }, { "epoch": 0.019532989756314817, "grad_norm": 0.34467121958732605, "learning_rate": 1.9996261045825137e-05, "loss": 0.5156, "step": 921 }, { "epoch": 0.019554198214247843, "grad_norm": 0.3047303855419159, "learning_rate": 1.999625192143416e-05, "loss": 0.5148, "step": 922 }, { "epoch": 0.019575406672180867, "grad_norm": 0.3350793123245239, "learning_rate": 1.9996242785925437e-05, "loss": 0.5729, "step": 923 }, { "epoch": 0.01959661513011389, "grad_norm": 0.34508365392684937, "learning_rate": 1.9996233639298985e-05, "loss": 0.5704, "step": 924 }, { "epoch": 0.019617823588046913, "grad_norm": 0.3703109323978424, "learning_rate": 1.9996224481554804e-05, "loss": 0.7024, "step": 925 }, { "epoch": 0.019639032045979936, "grad_norm": 0.34912267327308655, "learning_rate": 1.999621531269291e-05, "loss": 0.5567, "step": 926 }, { "epoch": 0.01966024050391296, "grad_norm": 1.122619390487671, "learning_rate": 1.9996206132713315e-05, "loss": 0.54, "step": 927 }, { "epoch": 0.019681448961845985, "grad_norm": 0.3599822223186493, "learning_rate": 1.9996196941616023e-05, "loss": 0.682, "step": 928 }, { "epoch": 0.01970265741977901, "grad_norm": 0.31577444076538086, "learning_rate": 1.9996187739401048e-05, "loss": 0.5126, "step": 929 }, { "epoch": 0.01972386587771203, "grad_norm": 0.3384547531604767, "learning_rate": 1.9996178526068402e-05, "loss": 0.5591, "step": 930 }, { "epoch": 0.019745074335645055, "grad_norm": 0.6278819441795349, "learning_rate": 1.999616930161809e-05, "loss": 0.563, "step": 931 }, { "epoch": 0.019766282793578078, "grad_norm": 0.4609120786190033, "learning_rate": 1.999616006605013e-05, "loss": 0.5694, "step": 932 }, { "epoch": 0.0197874912515111, "grad_norm": 0.346291720867157, "learning_rate": 1.9996150819364525e-05, "loss": 0.589, "step": 933 }, { "epoch": 0.019808699709444128, "grad_norm": 0.3329117000102997, "learning_rate": 1.9996141561561287e-05, "loss": 0.6036, "step": 934 }, { "epoch": 0.01982990816737715, "grad_norm": 0.33966779708862305, "learning_rate": 1.9996132292640428e-05, "loss": 0.6136, "step": 935 }, { "epoch": 0.019851116625310174, "grad_norm": 0.33216792345046997, "learning_rate": 1.9996123012601956e-05, "loss": 0.5459, "step": 936 }, { "epoch": 0.019872325083243197, "grad_norm": 0.3563191294670105, "learning_rate": 1.9996113721445885e-05, "loss": 0.5287, "step": 937 }, { "epoch": 0.01989353354117622, "grad_norm": 0.3225248456001282, "learning_rate": 1.9996104419172222e-05, "loss": 0.6463, "step": 938 }, { "epoch": 0.019914741999109246, "grad_norm": 0.39457404613494873, "learning_rate": 1.999609510578098e-05, "loss": 0.6285, "step": 939 }, { "epoch": 0.01993595045704227, "grad_norm": 0.3387983739376068, "learning_rate": 1.9996085781272168e-05, "loss": 0.5178, "step": 940 }, { "epoch": 0.019957158914975293, "grad_norm": 0.35566362738609314, "learning_rate": 1.9996076445645793e-05, "loss": 0.6251, "step": 941 }, { "epoch": 0.019978367372908316, "grad_norm": 0.3533126413822174, "learning_rate": 1.9996067098901874e-05, "loss": 0.622, "step": 942 }, { "epoch": 0.01999957583084134, "grad_norm": 0.33285897970199585, "learning_rate": 1.999605774104041e-05, "loss": 0.5545, "step": 943 }, { "epoch": 0.020020784288774362, "grad_norm": 0.4220219552516937, "learning_rate": 1.9996048372061422e-05, "loss": 0.6141, "step": 944 }, { "epoch": 0.02004199274670739, "grad_norm": 0.3136822581291199, "learning_rate": 1.9996038991964913e-05, "loss": 0.5581, "step": 945 }, { "epoch": 0.02006320120464041, "grad_norm": 0.3498736619949341, "learning_rate": 1.9996029600750897e-05, "loss": 0.5629, "step": 946 }, { "epoch": 0.020084409662573435, "grad_norm": 0.3806127905845642, "learning_rate": 1.9996020198419383e-05, "loss": 0.5505, "step": 947 }, { "epoch": 0.020105618120506458, "grad_norm": 0.322933554649353, "learning_rate": 1.999601078497038e-05, "loss": 0.548, "step": 948 }, { "epoch": 0.02012682657843948, "grad_norm": 0.3683631420135498, "learning_rate": 1.999600136040391e-05, "loss": 0.4794, "step": 949 }, { "epoch": 0.020148035036372504, "grad_norm": 0.4034537076950073, "learning_rate": 1.9995991924719965e-05, "loss": 0.5299, "step": 950 }, { "epoch": 0.02016924349430553, "grad_norm": 0.4534336030483246, "learning_rate": 1.9995982477918568e-05, "loss": 0.6763, "step": 951 }, { "epoch": 0.020190451952238554, "grad_norm": 0.32237040996551514, "learning_rate": 1.9995973019999724e-05, "loss": 0.5408, "step": 952 }, { "epoch": 0.020211660410171577, "grad_norm": 0.4735819697380066, "learning_rate": 1.9995963550963448e-05, "loss": 0.4969, "step": 953 }, { "epoch": 0.0202328688681046, "grad_norm": 0.3463580906391144, "learning_rate": 1.9995954070809746e-05, "loss": 0.6109, "step": 954 }, { "epoch": 0.020254077326037623, "grad_norm": 0.36575570702552795, "learning_rate": 1.999594457953863e-05, "loss": 0.6397, "step": 955 }, { "epoch": 0.020275285783970646, "grad_norm": 0.3785610795021057, "learning_rate": 1.9995935077150114e-05, "loss": 0.4848, "step": 956 }, { "epoch": 0.020296494241903672, "grad_norm": 0.34477680921554565, "learning_rate": 1.9995925563644208e-05, "loss": 0.6093, "step": 957 }, { "epoch": 0.020317702699836696, "grad_norm": 0.3378797471523285, "learning_rate": 1.9995916039020917e-05, "loss": 0.5214, "step": 958 }, { "epoch": 0.02033891115776972, "grad_norm": 0.38425201177597046, "learning_rate": 1.9995906503280254e-05, "loss": 0.5142, "step": 959 }, { "epoch": 0.02036011961570274, "grad_norm": 0.3696412146091461, "learning_rate": 1.9995896956422233e-05, "loss": 0.5961, "step": 960 }, { "epoch": 0.020381328073635765, "grad_norm": 0.32783642411231995, "learning_rate": 1.9995887398446864e-05, "loss": 0.5554, "step": 961 }, { "epoch": 0.02040253653156879, "grad_norm": 0.29621249437332153, "learning_rate": 1.999587782935415e-05, "loss": 0.5772, "step": 962 }, { "epoch": 0.020423744989501814, "grad_norm": 0.32740578055381775, "learning_rate": 1.999586824914411e-05, "loss": 0.4799, "step": 963 }, { "epoch": 0.020444953447434838, "grad_norm": 0.4352588653564453, "learning_rate": 1.9995858657816755e-05, "loss": 0.5622, "step": 964 }, { "epoch": 0.02046616190536786, "grad_norm": 0.3663594424724579, "learning_rate": 1.999584905537209e-05, "loss": 0.5912, "step": 965 }, { "epoch": 0.020487370363300884, "grad_norm": 0.34862077236175537, "learning_rate": 1.999583944181013e-05, "loss": 0.5623, "step": 966 }, { "epoch": 0.020508578821233907, "grad_norm": 0.38266798853874207, "learning_rate": 1.9995829817130884e-05, "loss": 0.6394, "step": 967 }, { "epoch": 0.020529787279166933, "grad_norm": 0.29355940222740173, "learning_rate": 1.9995820181334362e-05, "loss": 0.5488, "step": 968 }, { "epoch": 0.020550995737099956, "grad_norm": 0.34029069542884827, "learning_rate": 1.9995810534420578e-05, "loss": 0.5496, "step": 969 }, { "epoch": 0.02057220419503298, "grad_norm": 0.3872211277484894, "learning_rate": 1.9995800876389536e-05, "loss": 0.4767, "step": 970 }, { "epoch": 0.020593412652966003, "grad_norm": 0.3320959806442261, "learning_rate": 1.9995791207241255e-05, "loss": 0.5031, "step": 971 }, { "epoch": 0.020614621110899026, "grad_norm": 0.34662431478500366, "learning_rate": 1.9995781526975738e-05, "loss": 0.5889, "step": 972 }, { "epoch": 0.02063582956883205, "grad_norm": 0.34503665566444397, "learning_rate": 1.9995771835593e-05, "loss": 0.6224, "step": 973 }, { "epoch": 0.020657038026765075, "grad_norm": 0.3331945538520813, "learning_rate": 1.9995762133093056e-05, "loss": 0.6518, "step": 974 }, { "epoch": 0.0206782464846981, "grad_norm": 0.7897850871086121, "learning_rate": 1.999575241947591e-05, "loss": 0.5429, "step": 975 }, { "epoch": 0.02069945494263112, "grad_norm": 0.31476926803588867, "learning_rate": 1.9995742694741577e-05, "loss": 0.4957, "step": 976 }, { "epoch": 0.020720663400564145, "grad_norm": 0.3499990999698639, "learning_rate": 1.999573295889006e-05, "loss": 0.543, "step": 977 }, { "epoch": 0.020741871858497168, "grad_norm": 0.3372594714164734, "learning_rate": 1.999572321192138e-05, "loss": 0.5536, "step": 978 }, { "epoch": 0.02076308031643019, "grad_norm": 0.39163193106651306, "learning_rate": 1.9995713453835545e-05, "loss": 0.6677, "step": 979 }, { "epoch": 0.020784288774363217, "grad_norm": 0.3177301287651062, "learning_rate": 1.999570368463256e-05, "loss": 0.5624, "step": 980 }, { "epoch": 0.02080549723229624, "grad_norm": 0.35044533014297485, "learning_rate": 1.9995693904312442e-05, "loss": 0.6063, "step": 981 }, { "epoch": 0.020826705690229264, "grad_norm": 0.345533549785614, "learning_rate": 1.9995684112875203e-05, "loss": 0.5728, "step": 982 }, { "epoch": 0.020847914148162287, "grad_norm": 0.3119488060474396, "learning_rate": 1.999567431032085e-05, "loss": 0.5143, "step": 983 }, { "epoch": 0.02086912260609531, "grad_norm": 0.3608759641647339, "learning_rate": 1.999566449664939e-05, "loss": 0.6202, "step": 984 }, { "epoch": 0.020890331064028333, "grad_norm": 0.30901187658309937, "learning_rate": 1.999565467186084e-05, "loss": 0.5561, "step": 985 }, { "epoch": 0.02091153952196136, "grad_norm": 0.3494356572628021, "learning_rate": 1.9995644835955212e-05, "loss": 0.5178, "step": 986 }, { "epoch": 0.020932747979894382, "grad_norm": 0.3294984698295593, "learning_rate": 1.9995634988932516e-05, "loss": 0.5798, "step": 987 }, { "epoch": 0.020953956437827406, "grad_norm": 0.33828067779541016, "learning_rate": 1.9995625130792757e-05, "loss": 0.536, "step": 988 }, { "epoch": 0.02097516489576043, "grad_norm": 0.3033390939235687, "learning_rate": 1.9995615261535954e-05, "loss": 0.5339, "step": 989 }, { "epoch": 0.02099637335369345, "grad_norm": 0.32303309440612793, "learning_rate": 1.9995605381162115e-05, "loss": 0.5246, "step": 990 }, { "epoch": 0.02101758181162648, "grad_norm": 0.3201538920402527, "learning_rate": 1.999559548967125e-05, "loss": 0.4952, "step": 991 }, { "epoch": 0.0210387902695595, "grad_norm": 0.39637091755867004, "learning_rate": 1.9995585587063368e-05, "loss": 0.6174, "step": 992 }, { "epoch": 0.021059998727492524, "grad_norm": 0.34784606099128723, "learning_rate": 1.9995575673338484e-05, "loss": 0.4732, "step": 993 }, { "epoch": 0.021081207185425548, "grad_norm": 0.32173365354537964, "learning_rate": 1.999556574849661e-05, "loss": 0.5273, "step": 994 }, { "epoch": 0.02110241564335857, "grad_norm": 0.3163820803165436, "learning_rate": 1.9995555812537753e-05, "loss": 0.567, "step": 995 }, { "epoch": 0.021123624101291594, "grad_norm": 0.3254072666168213, "learning_rate": 1.9995545865461926e-05, "loss": 0.5961, "step": 996 }, { "epoch": 0.02114483255922462, "grad_norm": 0.7111180424690247, "learning_rate": 1.9995535907269137e-05, "loss": 0.6078, "step": 997 }, { "epoch": 0.021166041017157643, "grad_norm": 0.32866933941841125, "learning_rate": 1.99955259379594e-05, "loss": 0.626, "step": 998 }, { "epoch": 0.021187249475090666, "grad_norm": 0.3670860230922699, "learning_rate": 1.9995515957532728e-05, "loss": 0.533, "step": 999 }, { "epoch": 0.02120845793302369, "grad_norm": 0.32072708010673523, "learning_rate": 1.9995505965989132e-05, "loss": 0.5234, "step": 1000 }, { "epoch": 0.021229666390956713, "grad_norm": 0.6785367727279663, "learning_rate": 1.999549596332862e-05, "loss": 0.5514, "step": 1001 }, { "epoch": 0.021250874848889736, "grad_norm": 0.3282259702682495, "learning_rate": 1.9995485949551202e-05, "loss": 0.5798, "step": 1002 }, { "epoch": 0.021272083306822762, "grad_norm": 0.2986038327217102, "learning_rate": 1.9995475924656893e-05, "loss": 0.5027, "step": 1003 }, { "epoch": 0.021293291764755785, "grad_norm": 0.34117692708969116, "learning_rate": 1.99954658886457e-05, "loss": 0.5887, "step": 1004 }, { "epoch": 0.02131450022268881, "grad_norm": 0.3046203851699829, "learning_rate": 1.999545584151764e-05, "loss": 0.5818, "step": 1005 }, { "epoch": 0.02133570868062183, "grad_norm": 0.3387924134731293, "learning_rate": 1.9995445783272716e-05, "loss": 0.5739, "step": 1006 }, { "epoch": 0.021356917138554855, "grad_norm": 0.38744884729385376, "learning_rate": 1.999543571391095e-05, "loss": 0.5946, "step": 1007 }, { "epoch": 0.021378125596487878, "grad_norm": 0.31010738015174866, "learning_rate": 1.9995425633432343e-05, "loss": 0.5516, "step": 1008 }, { "epoch": 0.021399334054420904, "grad_norm": 0.3105522692203522, "learning_rate": 1.9995415541836913e-05, "loss": 0.4615, "step": 1009 }, { "epoch": 0.021420542512353927, "grad_norm": 0.3538978397846222, "learning_rate": 1.999540543912467e-05, "loss": 0.5289, "step": 1010 }, { "epoch": 0.02144175097028695, "grad_norm": 0.37611573934555054, "learning_rate": 1.999539532529562e-05, "loss": 0.6161, "step": 1011 }, { "epoch": 0.021462959428219974, "grad_norm": 0.32642704248428345, "learning_rate": 1.999538520034978e-05, "loss": 0.5868, "step": 1012 }, { "epoch": 0.021484167886152997, "grad_norm": 0.37879320979118347, "learning_rate": 1.9995375064287158e-05, "loss": 0.555, "step": 1013 }, { "epoch": 0.021505376344086023, "grad_norm": 0.33252671360969543, "learning_rate": 1.9995364917107766e-05, "loss": 0.5356, "step": 1014 }, { "epoch": 0.021526584802019046, "grad_norm": 0.4925040900707245, "learning_rate": 1.999535475881162e-05, "loss": 0.6287, "step": 1015 }, { "epoch": 0.02154779325995207, "grad_norm": 0.3331615626811981, "learning_rate": 1.9995344589398727e-05, "loss": 0.4669, "step": 1016 }, { "epoch": 0.021569001717885092, "grad_norm": 0.3771783411502838, "learning_rate": 1.9995334408869093e-05, "loss": 0.5574, "step": 1017 }, { "epoch": 0.021590210175818116, "grad_norm": 0.36818772554397583, "learning_rate": 1.999532421722274e-05, "loss": 0.5203, "step": 1018 }, { "epoch": 0.02161141863375114, "grad_norm": 0.3192877173423767, "learning_rate": 1.9995314014459675e-05, "loss": 0.4823, "step": 1019 }, { "epoch": 0.021632627091684165, "grad_norm": 0.3074426054954529, "learning_rate": 1.9995303800579905e-05, "loss": 0.5476, "step": 1020 }, { "epoch": 0.02165383554961719, "grad_norm": 0.335205078125, "learning_rate": 1.9995293575583447e-05, "loss": 0.6383, "step": 1021 }, { "epoch": 0.02167504400755021, "grad_norm": 0.3442005515098572, "learning_rate": 1.999528333947031e-05, "loss": 0.5224, "step": 1022 }, { "epoch": 0.021696252465483234, "grad_norm": 0.31695857644081116, "learning_rate": 1.9995273092240503e-05, "loss": 0.6052, "step": 1023 }, { "epoch": 0.021717460923416258, "grad_norm": 0.3317469656467438, "learning_rate": 1.9995262833894044e-05, "loss": 0.5872, "step": 1024 }, { "epoch": 0.02173866938134928, "grad_norm": 0.6343816518783569, "learning_rate": 1.9995252564430938e-05, "loss": 0.621, "step": 1025 }, { "epoch": 0.021759877839282307, "grad_norm": 0.3198464810848236, "learning_rate": 1.99952422838512e-05, "loss": 0.5028, "step": 1026 }, { "epoch": 0.02178108629721533, "grad_norm": 0.3325045704841614, "learning_rate": 1.999523199215484e-05, "loss": 0.5862, "step": 1027 }, { "epoch": 0.021802294755148353, "grad_norm": 0.329731822013855, "learning_rate": 1.999522168934187e-05, "loss": 0.5311, "step": 1028 }, { "epoch": 0.021823503213081377, "grad_norm": 0.35023924708366394, "learning_rate": 1.99952113754123e-05, "loss": 0.5981, "step": 1029 }, { "epoch": 0.0218447116710144, "grad_norm": 0.4117108881473541, "learning_rate": 1.9995201050366145e-05, "loss": 0.5069, "step": 1030 }, { "epoch": 0.021865920128947423, "grad_norm": 0.355259507894516, "learning_rate": 1.9995190714203413e-05, "loss": 0.5546, "step": 1031 }, { "epoch": 0.02188712858688045, "grad_norm": 0.34123194217681885, "learning_rate": 1.999518036692412e-05, "loss": 0.5574, "step": 1032 }, { "epoch": 0.021908337044813472, "grad_norm": 0.31196439266204834, "learning_rate": 1.999517000852827e-05, "loss": 0.5056, "step": 1033 }, { "epoch": 0.021929545502746495, "grad_norm": 0.3072093725204468, "learning_rate": 1.999515963901588e-05, "loss": 0.4843, "step": 1034 }, { "epoch": 0.02195075396067952, "grad_norm": 1.6349029541015625, "learning_rate": 1.9995149258386962e-05, "loss": 0.5566, "step": 1035 }, { "epoch": 0.02197196241861254, "grad_norm": 0.3109411299228668, "learning_rate": 1.999513886664152e-05, "loss": 0.5586, "step": 1036 }, { "epoch": 0.021993170876545565, "grad_norm": 0.33426445722579956, "learning_rate": 1.999512846377958e-05, "loss": 0.5554, "step": 1037 }, { "epoch": 0.02201437933447859, "grad_norm": 0.3917391002178192, "learning_rate": 1.999511804980114e-05, "loss": 0.5368, "step": 1038 }, { "epoch": 0.022035587792411614, "grad_norm": 0.3929995894432068, "learning_rate": 1.9995107624706213e-05, "loss": 0.5515, "step": 1039 }, { "epoch": 0.022056796250344637, "grad_norm": 0.32563188672065735, "learning_rate": 1.9995097188494822e-05, "loss": 0.5635, "step": 1040 }, { "epoch": 0.02207800470827766, "grad_norm": 0.334580659866333, "learning_rate": 1.9995086741166964e-05, "loss": 0.4848, "step": 1041 }, { "epoch": 0.022099213166210684, "grad_norm": 0.36411964893341064, "learning_rate": 1.999507628272266e-05, "loss": 0.5751, "step": 1042 }, { "epoch": 0.02212042162414371, "grad_norm": 0.34039777517318726, "learning_rate": 1.999506581316192e-05, "loss": 0.504, "step": 1043 }, { "epoch": 0.022141630082076733, "grad_norm": 0.36341220140457153, "learning_rate": 1.9995055332484753e-05, "loss": 0.5309, "step": 1044 }, { "epoch": 0.022162838540009756, "grad_norm": 0.32245591282844543, "learning_rate": 1.9995044840691172e-05, "loss": 0.5244, "step": 1045 }, { "epoch": 0.02218404699794278, "grad_norm": 0.41163507103919983, "learning_rate": 1.9995034337781192e-05, "loss": 0.5776, "step": 1046 }, { "epoch": 0.022205255455875803, "grad_norm": 0.35892584919929504, "learning_rate": 1.999502382375482e-05, "loss": 0.5126, "step": 1047 }, { "epoch": 0.022226463913808826, "grad_norm": 0.3082515299320221, "learning_rate": 1.999501329861207e-05, "loss": 0.4734, "step": 1048 }, { "epoch": 0.022247672371741852, "grad_norm": 0.37045207619667053, "learning_rate": 1.9995002762352952e-05, "loss": 0.5815, "step": 1049 }, { "epoch": 0.022268880829674875, "grad_norm": 0.3964911699295044, "learning_rate": 1.9994992214977476e-05, "loss": 0.5285, "step": 1050 }, { "epoch": 0.0222900892876079, "grad_norm": 0.3080492913722992, "learning_rate": 1.999498165648566e-05, "loss": 0.5519, "step": 1051 }, { "epoch": 0.02231129774554092, "grad_norm": 0.3249201476573944, "learning_rate": 1.999497108687751e-05, "loss": 0.5556, "step": 1052 }, { "epoch": 0.022332506203473945, "grad_norm": 0.32564789056777954, "learning_rate": 1.9994960506153043e-05, "loss": 0.5426, "step": 1053 }, { "epoch": 0.022353714661406968, "grad_norm": 0.5584274530410767, "learning_rate": 1.9994949914312264e-05, "loss": 0.5931, "step": 1054 }, { "epoch": 0.022374923119339994, "grad_norm": 0.3218996822834015, "learning_rate": 1.9994939311355194e-05, "loss": 0.5405, "step": 1055 }, { "epoch": 0.022396131577273017, "grad_norm": 0.4242440164089203, "learning_rate": 1.9994928697281836e-05, "loss": 0.5365, "step": 1056 }, { "epoch": 0.02241734003520604, "grad_norm": 0.32700833678245544, "learning_rate": 1.9994918072092204e-05, "loss": 0.5518, "step": 1057 }, { "epoch": 0.022438548493139063, "grad_norm": 0.3153741955757141, "learning_rate": 1.999490743578631e-05, "loss": 0.5826, "step": 1058 }, { "epoch": 0.022459756951072087, "grad_norm": 0.3319450914859772, "learning_rate": 1.9994896788364172e-05, "loss": 0.5975, "step": 1059 }, { "epoch": 0.02248096540900511, "grad_norm": 0.32822179794311523, "learning_rate": 1.9994886129825792e-05, "loss": 0.5783, "step": 1060 }, { "epoch": 0.022502173866938136, "grad_norm": 0.424462229013443, "learning_rate": 1.999487546017119e-05, "loss": 0.5999, "step": 1061 }, { "epoch": 0.02252338232487116, "grad_norm": 0.3193240165710449, "learning_rate": 1.999486477940037e-05, "loss": 0.5642, "step": 1062 }, { "epoch": 0.022544590782804182, "grad_norm": 0.3340188264846802, "learning_rate": 1.9994854087513352e-05, "loss": 0.6373, "step": 1063 }, { "epoch": 0.022565799240737205, "grad_norm": 0.304878294467926, "learning_rate": 1.9994843384510144e-05, "loss": 0.5358, "step": 1064 }, { "epoch": 0.02258700769867023, "grad_norm": 0.35051339864730835, "learning_rate": 1.9994832670390756e-05, "loss": 0.5214, "step": 1065 }, { "epoch": 0.022608216156603255, "grad_norm": 0.3796381652355194, "learning_rate": 1.9994821945155203e-05, "loss": 0.4826, "step": 1066 }, { "epoch": 0.022629424614536278, "grad_norm": 0.3106641173362732, "learning_rate": 1.999481120880349e-05, "loss": 0.5856, "step": 1067 }, { "epoch": 0.0226506330724693, "grad_norm": 0.29790595173835754, "learning_rate": 1.9994800461335645e-05, "loss": 0.6192, "step": 1068 }, { "epoch": 0.022671841530402324, "grad_norm": 0.3636934459209442, "learning_rate": 1.9994789702751666e-05, "loss": 0.5265, "step": 1069 }, { "epoch": 0.022693049988335347, "grad_norm": 0.3311063051223755, "learning_rate": 1.9994778933051566e-05, "loss": 0.5735, "step": 1070 }, { "epoch": 0.02271425844626837, "grad_norm": 0.3137318789958954, "learning_rate": 1.999476815223536e-05, "loss": 0.5661, "step": 1071 }, { "epoch": 0.022735466904201397, "grad_norm": 0.346113920211792, "learning_rate": 1.999475736030306e-05, "loss": 0.5531, "step": 1072 }, { "epoch": 0.02275667536213442, "grad_norm": 0.3289104402065277, "learning_rate": 1.999474655725468e-05, "loss": 0.4933, "step": 1073 }, { "epoch": 0.022777883820067443, "grad_norm": 0.36213934421539307, "learning_rate": 1.999473574309023e-05, "loss": 0.5343, "step": 1074 }, { "epoch": 0.022799092278000466, "grad_norm": 0.36181652545928955, "learning_rate": 1.9994724917809722e-05, "loss": 0.6079, "step": 1075 }, { "epoch": 0.02282030073593349, "grad_norm": 0.32586443424224854, "learning_rate": 1.9994714081413163e-05, "loss": 0.5845, "step": 1076 }, { "epoch": 0.022841509193866513, "grad_norm": 0.3110719919204712, "learning_rate": 1.9994703233900574e-05, "loss": 0.5685, "step": 1077 }, { "epoch": 0.02286271765179954, "grad_norm": 0.3135002553462982, "learning_rate": 1.9994692375271962e-05, "loss": 0.5225, "step": 1078 }, { "epoch": 0.022883926109732562, "grad_norm": 0.3630043864250183, "learning_rate": 1.999468150552734e-05, "loss": 0.6319, "step": 1079 }, { "epoch": 0.022905134567665585, "grad_norm": 0.32846108078956604, "learning_rate": 1.9994670624666722e-05, "loss": 0.4262, "step": 1080 }, { "epoch": 0.02292634302559861, "grad_norm": 0.32541191577911377, "learning_rate": 1.9994659732690114e-05, "loss": 0.5869, "step": 1081 }, { "epoch": 0.02294755148353163, "grad_norm": 0.38361656665802, "learning_rate": 1.9994648829597534e-05, "loss": 0.5182, "step": 1082 }, { "epoch": 0.022968759941464655, "grad_norm": 0.29630064964294434, "learning_rate": 1.9994637915388992e-05, "loss": 0.6072, "step": 1083 }, { "epoch": 0.02298996839939768, "grad_norm": 0.3493182361125946, "learning_rate": 1.99946269900645e-05, "loss": 0.5666, "step": 1084 }, { "epoch": 0.023011176857330704, "grad_norm": 0.3583405613899231, "learning_rate": 1.999461605362407e-05, "loss": 0.5771, "step": 1085 }, { "epoch": 0.023032385315263727, "grad_norm": 0.31989631056785583, "learning_rate": 1.9994605106067722e-05, "loss": 0.4944, "step": 1086 }, { "epoch": 0.02305359377319675, "grad_norm": 0.3054082989692688, "learning_rate": 1.9994594147395453e-05, "loss": 0.4644, "step": 1087 }, { "epoch": 0.023074802231129773, "grad_norm": 0.34519264101982117, "learning_rate": 1.9994583177607288e-05, "loss": 0.509, "step": 1088 }, { "epoch": 0.023096010689062797, "grad_norm": 0.33902645111083984, "learning_rate": 1.999457219670323e-05, "loss": 0.5582, "step": 1089 }, { "epoch": 0.023117219146995823, "grad_norm": 0.4349386394023895, "learning_rate": 1.99945612046833e-05, "loss": 0.5253, "step": 1090 }, { "epoch": 0.023138427604928846, "grad_norm": 0.34461644291877747, "learning_rate": 1.9994550201547502e-05, "loss": 0.5625, "step": 1091 }, { "epoch": 0.02315963606286187, "grad_norm": 0.3321519196033478, "learning_rate": 1.9994539187295854e-05, "loss": 0.5738, "step": 1092 }, { "epoch": 0.023180844520794892, "grad_norm": 0.3499690890312195, "learning_rate": 1.999452816192837e-05, "loss": 0.6675, "step": 1093 }, { "epoch": 0.023202052978727915, "grad_norm": 0.3435845971107483, "learning_rate": 1.999451712544505e-05, "loss": 0.5825, "step": 1094 }, { "epoch": 0.023223261436660942, "grad_norm": 0.3268028199672699, "learning_rate": 1.9994506077845922e-05, "loss": 0.5433, "step": 1095 }, { "epoch": 0.023244469894593965, "grad_norm": 0.30088961124420166, "learning_rate": 1.9994495019130987e-05, "loss": 0.514, "step": 1096 }, { "epoch": 0.023265678352526988, "grad_norm": 0.3258654773235321, "learning_rate": 1.9994483949300263e-05, "loss": 0.4768, "step": 1097 }, { "epoch": 0.02328688681046001, "grad_norm": 0.3154873549938202, "learning_rate": 1.9994472868353762e-05, "loss": 0.5335, "step": 1098 }, { "epoch": 0.023308095268393034, "grad_norm": 0.3739584982395172, "learning_rate": 1.9994461776291496e-05, "loss": 0.5673, "step": 1099 }, { "epoch": 0.023329303726326057, "grad_norm": 0.3029865622520447, "learning_rate": 1.9994450673113474e-05, "loss": 0.5747, "step": 1100 }, { "epoch": 0.023350512184259084, "grad_norm": 0.33896347880363464, "learning_rate": 1.999443955881971e-05, "loss": 0.5662, "step": 1101 }, { "epoch": 0.023371720642192107, "grad_norm": 0.35510939359664917, "learning_rate": 1.9994428433410218e-05, "loss": 0.6193, "step": 1102 }, { "epoch": 0.02339292910012513, "grad_norm": 0.3325559198856354, "learning_rate": 1.999441729688501e-05, "loss": 0.5457, "step": 1103 }, { "epoch": 0.023414137558058153, "grad_norm": 0.3315606415271759, "learning_rate": 1.9994406149244098e-05, "loss": 0.6376, "step": 1104 }, { "epoch": 0.023435346015991176, "grad_norm": 0.35934823751449585, "learning_rate": 1.9994394990487497e-05, "loss": 0.5162, "step": 1105 }, { "epoch": 0.0234565544739242, "grad_norm": 0.3545888066291809, "learning_rate": 1.9994383820615212e-05, "loss": 0.5858, "step": 1106 }, { "epoch": 0.023477762931857226, "grad_norm": 0.3287457823753357, "learning_rate": 1.9994372639627263e-05, "loss": 0.4891, "step": 1107 }, { "epoch": 0.02349897138979025, "grad_norm": 0.34573328495025635, "learning_rate": 1.9994361447523657e-05, "loss": 0.5926, "step": 1108 }, { "epoch": 0.023520179847723272, "grad_norm": 0.3391743302345276, "learning_rate": 1.9994350244304413e-05, "loss": 0.6098, "step": 1109 }, { "epoch": 0.023541388305656295, "grad_norm": 0.3375917375087738, "learning_rate": 1.9994339029969536e-05, "loss": 0.5855, "step": 1110 }, { "epoch": 0.02356259676358932, "grad_norm": 0.40237873792648315, "learning_rate": 1.9994327804519042e-05, "loss": 0.6052, "step": 1111 }, { "epoch": 0.02358380522152234, "grad_norm": 0.3452609181404114, "learning_rate": 1.9994316567952948e-05, "loss": 0.6243, "step": 1112 }, { "epoch": 0.023605013679455368, "grad_norm": 0.36844220757484436, "learning_rate": 1.9994305320271258e-05, "loss": 0.6181, "step": 1113 }, { "epoch": 0.02362622213738839, "grad_norm": 0.3521406650543213, "learning_rate": 1.9994294061473988e-05, "loss": 0.6367, "step": 1114 }, { "epoch": 0.023647430595321414, "grad_norm": 0.39873892068862915, "learning_rate": 1.9994282791561153e-05, "loss": 0.5606, "step": 1115 }, { "epoch": 0.023668639053254437, "grad_norm": 0.35456162691116333, "learning_rate": 1.999427151053276e-05, "loss": 0.5377, "step": 1116 }, { "epoch": 0.02368984751118746, "grad_norm": 0.39491841197013855, "learning_rate": 1.9994260218388828e-05, "loss": 0.6216, "step": 1117 }, { "epoch": 0.023711055969120487, "grad_norm": 0.3297867178916931, "learning_rate": 1.9994248915129365e-05, "loss": 0.6152, "step": 1118 }, { "epoch": 0.02373226442705351, "grad_norm": 0.31440916657447815, "learning_rate": 1.9994237600754388e-05, "loss": 0.5087, "step": 1119 }, { "epoch": 0.023753472884986533, "grad_norm": 0.3463260233402252, "learning_rate": 1.9994226275263905e-05, "loss": 0.5953, "step": 1120 }, { "epoch": 0.023774681342919556, "grad_norm": 1.2917344570159912, "learning_rate": 1.9994214938657928e-05, "loss": 0.5542, "step": 1121 }, { "epoch": 0.02379588980085258, "grad_norm": 0.5940647125244141, "learning_rate": 1.9994203590936477e-05, "loss": 0.561, "step": 1122 }, { "epoch": 0.023817098258785602, "grad_norm": 0.33609306812286377, "learning_rate": 1.9994192232099557e-05, "loss": 0.6057, "step": 1123 }, { "epoch": 0.02383830671671863, "grad_norm": 0.29962387681007385, "learning_rate": 1.999418086214718e-05, "loss": 0.4908, "step": 1124 }, { "epoch": 0.023859515174651652, "grad_norm": 0.3421957492828369, "learning_rate": 1.999416948107937e-05, "loss": 0.5966, "step": 1125 }, { "epoch": 0.023880723632584675, "grad_norm": 0.3084448277950287, "learning_rate": 1.9994158088896122e-05, "loss": 0.5263, "step": 1126 }, { "epoch": 0.023901932090517698, "grad_norm": 0.3114085793495178, "learning_rate": 1.9994146685597468e-05, "loss": 0.4961, "step": 1127 }, { "epoch": 0.02392314054845072, "grad_norm": 0.33289864659309387, "learning_rate": 1.9994135271183403e-05, "loss": 0.5597, "step": 1128 }, { "epoch": 0.023944349006383744, "grad_norm": 0.32050150632858276, "learning_rate": 1.999412384565395e-05, "loss": 0.535, "step": 1129 }, { "epoch": 0.02396555746431677, "grad_norm": 0.3519909381866455, "learning_rate": 1.999411240900912e-05, "loss": 0.5586, "step": 1130 }, { "epoch": 0.023986765922249794, "grad_norm": 0.3641664683818817, "learning_rate": 1.9994100961248922e-05, "loss": 0.5738, "step": 1131 }, { "epoch": 0.024007974380182817, "grad_norm": 0.35201919078826904, "learning_rate": 1.9994089502373376e-05, "loss": 0.6481, "step": 1132 }, { "epoch": 0.02402918283811584, "grad_norm": 0.35472288727760315, "learning_rate": 1.999407803238249e-05, "loss": 0.6024, "step": 1133 }, { "epoch": 0.024050391296048863, "grad_norm": 0.3677460551261902, "learning_rate": 1.9994066551276275e-05, "loss": 0.587, "step": 1134 }, { "epoch": 0.024071599753981886, "grad_norm": 0.3738883137702942, "learning_rate": 1.9994055059054747e-05, "loss": 0.4897, "step": 1135 }, { "epoch": 0.024092808211914913, "grad_norm": 0.33945658802986145, "learning_rate": 1.9994043555717916e-05, "loss": 0.5569, "step": 1136 }, { "epoch": 0.024114016669847936, "grad_norm": 0.32860228419303894, "learning_rate": 1.9994032041265798e-05, "loss": 0.5622, "step": 1137 }, { "epoch": 0.02413522512778096, "grad_norm": 0.33429840207099915, "learning_rate": 1.9994020515698405e-05, "loss": 0.6698, "step": 1138 }, { "epoch": 0.024156433585713982, "grad_norm": 0.3160455524921417, "learning_rate": 1.999400897901575e-05, "loss": 0.5243, "step": 1139 }, { "epoch": 0.024177642043647005, "grad_norm": 0.36250871419906616, "learning_rate": 1.9993997431217843e-05, "loss": 0.5807, "step": 1140 }, { "epoch": 0.02419885050158003, "grad_norm": 0.3155589699745178, "learning_rate": 1.99939858723047e-05, "loss": 0.5339, "step": 1141 }, { "epoch": 0.024220058959513055, "grad_norm": 0.3634220063686371, "learning_rate": 1.9993974302276332e-05, "loss": 0.6103, "step": 1142 }, { "epoch": 0.024241267417446078, "grad_norm": 0.4157020151615143, "learning_rate": 1.9993962721132753e-05, "loss": 0.6372, "step": 1143 }, { "epoch": 0.0242624758753791, "grad_norm": 0.3319365084171295, "learning_rate": 1.9993951128873977e-05, "loss": 0.5953, "step": 1144 }, { "epoch": 0.024283684333312124, "grad_norm": 0.33867090940475464, "learning_rate": 1.999393952550001e-05, "loss": 0.6141, "step": 1145 }, { "epoch": 0.024304892791245147, "grad_norm": 0.2981909513473511, "learning_rate": 1.9993927911010876e-05, "loss": 0.5198, "step": 1146 }, { "epoch": 0.024326101249178174, "grad_norm": 0.355169415473938, "learning_rate": 1.999391628540658e-05, "loss": 0.5053, "step": 1147 }, { "epoch": 0.024347309707111197, "grad_norm": 0.5000734329223633, "learning_rate": 1.9993904648687138e-05, "loss": 0.5668, "step": 1148 }, { "epoch": 0.02436851816504422, "grad_norm": 0.33928459882736206, "learning_rate": 1.999389300085256e-05, "loss": 0.5534, "step": 1149 }, { "epoch": 0.024389726622977243, "grad_norm": 0.2829208970069885, "learning_rate": 1.9993881341902867e-05, "loss": 0.4923, "step": 1150 }, { "epoch": 0.024410935080910266, "grad_norm": 0.31083810329437256, "learning_rate": 1.999386967183806e-05, "loss": 0.5136, "step": 1151 }, { "epoch": 0.02443214353884329, "grad_norm": 0.33909833431243896, "learning_rate": 1.9993857990658162e-05, "loss": 0.5616, "step": 1152 }, { "epoch": 0.024453351996776316, "grad_norm": 0.34168729186058044, "learning_rate": 1.999384629836318e-05, "loss": 0.5515, "step": 1153 }, { "epoch": 0.02447456045470934, "grad_norm": 0.683480978012085, "learning_rate": 1.9993834594953124e-05, "loss": 0.5687, "step": 1154 }, { "epoch": 0.024495768912642362, "grad_norm": 0.30611518025398254, "learning_rate": 1.999382288042802e-05, "loss": 0.5435, "step": 1155 }, { "epoch": 0.024516977370575385, "grad_norm": 0.48352816700935364, "learning_rate": 1.9993811154787866e-05, "loss": 0.5193, "step": 1156 }, { "epoch": 0.024538185828508408, "grad_norm": 0.3494653105735779, "learning_rate": 1.9993799418032686e-05, "loss": 0.5163, "step": 1157 }, { "epoch": 0.02455939428644143, "grad_norm": 0.3331741988658905, "learning_rate": 1.999378767016249e-05, "loss": 0.6163, "step": 1158 }, { "epoch": 0.024580602744374458, "grad_norm": 0.3994694650173187, "learning_rate": 1.999377591117729e-05, "loss": 0.6055, "step": 1159 }, { "epoch": 0.02460181120230748, "grad_norm": 0.5102160573005676, "learning_rate": 1.9993764141077098e-05, "loss": 0.5389, "step": 1160 }, { "epoch": 0.024623019660240504, "grad_norm": 0.6728770732879639, "learning_rate": 1.9993752359861928e-05, "loss": 0.5721, "step": 1161 }, { "epoch": 0.024644228118173527, "grad_norm": 0.34327080845832825, "learning_rate": 1.999374056753179e-05, "loss": 0.5794, "step": 1162 }, { "epoch": 0.02466543657610655, "grad_norm": 0.31892022490501404, "learning_rate": 1.999372876408671e-05, "loss": 0.4682, "step": 1163 }, { "epoch": 0.024686645034039573, "grad_norm": 0.3700375556945801, "learning_rate": 1.9993716949526683e-05, "loss": 0.576, "step": 1164 }, { "epoch": 0.0247078534919726, "grad_norm": 0.31013166904449463, "learning_rate": 1.9993705123851733e-05, "loss": 0.585, "step": 1165 }, { "epoch": 0.024729061949905623, "grad_norm": 0.33348020911216736, "learning_rate": 1.9993693287061874e-05, "loss": 0.6409, "step": 1166 }, { "epoch": 0.024750270407838646, "grad_norm": 0.3407253921031952, "learning_rate": 1.9993681439157113e-05, "loss": 0.6035, "step": 1167 }, { "epoch": 0.02477147886577167, "grad_norm": 0.30926668643951416, "learning_rate": 1.999366958013747e-05, "loss": 0.4873, "step": 1168 }, { "epoch": 0.024792687323704692, "grad_norm": 0.3141099810600281, "learning_rate": 1.999365771000295e-05, "loss": 0.5391, "step": 1169 }, { "epoch": 0.02481389578163772, "grad_norm": 0.322692334651947, "learning_rate": 1.9993645828753574e-05, "loss": 0.5406, "step": 1170 }, { "epoch": 0.024835104239570742, "grad_norm": 0.3748439848423004, "learning_rate": 1.999363393638935e-05, "loss": 0.5105, "step": 1171 }, { "epoch": 0.024856312697503765, "grad_norm": 0.340665340423584, "learning_rate": 1.999362203291029e-05, "loss": 0.5627, "step": 1172 }, { "epoch": 0.024877521155436788, "grad_norm": 0.3761069178581238, "learning_rate": 1.9993610118316417e-05, "loss": 0.5868, "step": 1173 }, { "epoch": 0.02489872961336981, "grad_norm": 0.35287541151046753, "learning_rate": 1.9993598192607736e-05, "loss": 0.5476, "step": 1174 }, { "epoch": 0.024919938071302834, "grad_norm": 0.3239499032497406, "learning_rate": 1.9993586255784258e-05, "loss": 0.4808, "step": 1175 }, { "epoch": 0.02494114652923586, "grad_norm": 0.3751889169216156, "learning_rate": 1.9993574307846003e-05, "loss": 0.6192, "step": 1176 }, { "epoch": 0.024962354987168884, "grad_norm": 0.3739444613456726, "learning_rate": 1.999356234879298e-05, "loss": 0.599, "step": 1177 }, { "epoch": 0.024983563445101907, "grad_norm": 0.37200114130973816, "learning_rate": 1.999355037862521e-05, "loss": 0.5628, "step": 1178 }, { "epoch": 0.02500477190303493, "grad_norm": 0.328281432390213, "learning_rate": 1.999353839734269e-05, "loss": 0.5741, "step": 1179 }, { "epoch": 0.025025980360967953, "grad_norm": 0.3498342037200928, "learning_rate": 1.999352640494545e-05, "loss": 0.5323, "step": 1180 }, { "epoch": 0.025047188818900976, "grad_norm": 0.34031498432159424, "learning_rate": 1.9993514401433497e-05, "loss": 0.5336, "step": 1181 }, { "epoch": 0.025068397276834003, "grad_norm": 0.44234561920166016, "learning_rate": 1.999350238680684e-05, "loss": 0.5973, "step": 1182 }, { "epoch": 0.025089605734767026, "grad_norm": 0.32732975482940674, "learning_rate": 1.99934903610655e-05, "loss": 0.6006, "step": 1183 }, { "epoch": 0.02511081419270005, "grad_norm": 0.35561496019363403, "learning_rate": 1.9993478324209485e-05, "loss": 0.5136, "step": 1184 }, { "epoch": 0.025132022650633072, "grad_norm": 0.3221929669380188, "learning_rate": 1.999346627623881e-05, "loss": 0.5041, "step": 1185 }, { "epoch": 0.025153231108566095, "grad_norm": 0.34234702587127686, "learning_rate": 1.999345421715349e-05, "loss": 0.5892, "step": 1186 }, { "epoch": 0.02517443956649912, "grad_norm": 0.32967591285705566, "learning_rate": 1.9993442146953537e-05, "loss": 0.5423, "step": 1187 }, { "epoch": 0.025195648024432145, "grad_norm": 0.29898926615715027, "learning_rate": 1.9993430065638964e-05, "loss": 0.5691, "step": 1188 }, { "epoch": 0.025216856482365168, "grad_norm": 0.34510329365730286, "learning_rate": 1.9993417973209784e-05, "loss": 0.6154, "step": 1189 }, { "epoch": 0.02523806494029819, "grad_norm": 0.35180211067199707, "learning_rate": 1.9993405869666014e-05, "loss": 0.5864, "step": 1190 }, { "epoch": 0.025259273398231214, "grad_norm": 0.30399519205093384, "learning_rate": 1.999339375500766e-05, "loss": 0.5869, "step": 1191 }, { "epoch": 0.025280481856164237, "grad_norm": 0.3162119388580322, "learning_rate": 1.9993381629234748e-05, "loss": 0.5691, "step": 1192 }, { "epoch": 0.02530169031409726, "grad_norm": 0.3472139239311218, "learning_rate": 1.9993369492347278e-05, "loss": 0.5479, "step": 1193 }, { "epoch": 0.025322898772030287, "grad_norm": 0.3804076910018921, "learning_rate": 1.999335734434527e-05, "loss": 0.5419, "step": 1194 }, { "epoch": 0.02534410722996331, "grad_norm": 0.2996656596660614, "learning_rate": 1.9993345185228735e-05, "loss": 0.4846, "step": 1195 }, { "epoch": 0.025365315687896333, "grad_norm": 0.4532587230205536, "learning_rate": 1.9993333014997695e-05, "loss": 0.6393, "step": 1196 }, { "epoch": 0.025386524145829356, "grad_norm": 0.318452388048172, "learning_rate": 1.999332083365215e-05, "loss": 0.4714, "step": 1197 }, { "epoch": 0.02540773260376238, "grad_norm": 0.35099461674690247, "learning_rate": 1.9993308641192122e-05, "loss": 0.4954, "step": 1198 }, { "epoch": 0.025428941061695406, "grad_norm": 0.4097159206867218, "learning_rate": 1.9993296437617624e-05, "loss": 0.53, "step": 1199 }, { "epoch": 0.02545014951962843, "grad_norm": 0.34025585651397705, "learning_rate": 1.9993284222928667e-05, "loss": 0.6382, "step": 1200 }, { "epoch": 0.025471357977561452, "grad_norm": 0.3255443274974823, "learning_rate": 1.999327199712527e-05, "loss": 0.6259, "step": 1201 }, { "epoch": 0.025492566435494475, "grad_norm": 0.3343309164047241, "learning_rate": 1.9993259760207437e-05, "loss": 0.5173, "step": 1202 }, { "epoch": 0.025513774893427498, "grad_norm": 0.32505327463150024, "learning_rate": 1.999324751217519e-05, "loss": 0.5794, "step": 1203 }, { "epoch": 0.02553498335136052, "grad_norm": 0.32770517468452454, "learning_rate": 1.999323525302854e-05, "loss": 0.5981, "step": 1204 }, { "epoch": 0.025556191809293548, "grad_norm": 0.31418249011039734, "learning_rate": 1.99932229827675e-05, "loss": 0.5446, "step": 1205 }, { "epoch": 0.02557740026722657, "grad_norm": 0.3376062214374542, "learning_rate": 1.9993210701392084e-05, "loss": 0.5923, "step": 1206 }, { "epoch": 0.025598608725159594, "grad_norm": 0.4560432732105255, "learning_rate": 1.9993198408902307e-05, "loss": 0.5079, "step": 1207 }, { "epoch": 0.025619817183092617, "grad_norm": 0.3238236904144287, "learning_rate": 1.9993186105298185e-05, "loss": 0.5648, "step": 1208 }, { "epoch": 0.02564102564102564, "grad_norm": 0.3497025668621063, "learning_rate": 1.9993173790579722e-05, "loss": 0.4009, "step": 1209 }, { "epoch": 0.025662234098958663, "grad_norm": 0.3039166033267975, "learning_rate": 1.999316146474694e-05, "loss": 0.5208, "step": 1210 }, { "epoch": 0.02568344255689169, "grad_norm": 0.36255964636802673, "learning_rate": 1.999314912779985e-05, "loss": 0.4741, "step": 1211 }, { "epoch": 0.025704651014824713, "grad_norm": 0.35387173295021057, "learning_rate": 1.999313677973847e-05, "loss": 0.5576, "step": 1212 }, { "epoch": 0.025725859472757736, "grad_norm": 0.3671327829360962, "learning_rate": 1.9993124420562808e-05, "loss": 0.6315, "step": 1213 }, { "epoch": 0.02574706793069076, "grad_norm": 0.3113884925842285, "learning_rate": 1.999311205027288e-05, "loss": 0.5981, "step": 1214 }, { "epoch": 0.025768276388623782, "grad_norm": 0.3603948652744293, "learning_rate": 1.9993099668868695e-05, "loss": 0.7078, "step": 1215 }, { "epoch": 0.025789484846556805, "grad_norm": 0.32220929861068726, "learning_rate": 1.9993087276350277e-05, "loss": 0.5155, "step": 1216 }, { "epoch": 0.025810693304489832, "grad_norm": 0.3209960460662842, "learning_rate": 1.999307487271763e-05, "loss": 0.5846, "step": 1217 }, { "epoch": 0.025831901762422855, "grad_norm": 0.3133351504802704, "learning_rate": 1.9993062457970778e-05, "loss": 0.5932, "step": 1218 }, { "epoch": 0.025853110220355878, "grad_norm": 0.3667941391468048, "learning_rate": 1.9993050032109725e-05, "loss": 0.5097, "step": 1219 }, { "epoch": 0.0258743186782889, "grad_norm": 0.3406156599521637, "learning_rate": 1.9993037595134488e-05, "loss": 0.4653, "step": 1220 }, { "epoch": 0.025895527136221924, "grad_norm": 0.707590639591217, "learning_rate": 1.999302514704508e-05, "loss": 0.5314, "step": 1221 }, { "epoch": 0.02591673559415495, "grad_norm": 0.33082345128059387, "learning_rate": 1.999301268784152e-05, "loss": 0.582, "step": 1222 }, { "epoch": 0.025937944052087974, "grad_norm": 0.3228348195552826, "learning_rate": 1.9993000217523817e-05, "loss": 0.5309, "step": 1223 }, { "epoch": 0.025959152510020997, "grad_norm": 0.34069353342056274, "learning_rate": 1.9992987736091988e-05, "loss": 0.5627, "step": 1224 }, { "epoch": 0.02598036096795402, "grad_norm": 0.32726290822029114, "learning_rate": 1.999297524354604e-05, "loss": 0.5405, "step": 1225 }, { "epoch": 0.026001569425887043, "grad_norm": 0.33963537216186523, "learning_rate": 1.9992962739885993e-05, "loss": 0.5426, "step": 1226 }, { "epoch": 0.026022777883820066, "grad_norm": 0.45731282234191895, "learning_rate": 1.999295022511186e-05, "loss": 0.5493, "step": 1227 }, { "epoch": 0.026043986341753093, "grad_norm": 0.3294660747051239, "learning_rate": 1.999293769922366e-05, "loss": 0.4725, "step": 1228 }, { "epoch": 0.026065194799686116, "grad_norm": 0.343619167804718, "learning_rate": 1.9992925162221393e-05, "loss": 0.5815, "step": 1229 }, { "epoch": 0.02608640325761914, "grad_norm": 0.43612733483314514, "learning_rate": 1.9992912614105083e-05, "loss": 0.638, "step": 1230 }, { "epoch": 0.026107611715552162, "grad_norm": 0.36632683873176575, "learning_rate": 1.9992900054874748e-05, "loss": 0.6151, "step": 1231 }, { "epoch": 0.026128820173485185, "grad_norm": 0.30544164776802063, "learning_rate": 1.9992887484530392e-05, "loss": 0.5203, "step": 1232 }, { "epoch": 0.026150028631418208, "grad_norm": 0.3340272903442383, "learning_rate": 1.9992874903072032e-05, "loss": 0.5995, "step": 1233 }, { "epoch": 0.026171237089351235, "grad_norm": 0.48275646567344666, "learning_rate": 1.999286231049969e-05, "loss": 0.5123, "step": 1234 }, { "epoch": 0.026192445547284258, "grad_norm": 0.3953699767589569, "learning_rate": 1.9992849706813365e-05, "loss": 0.556, "step": 1235 }, { "epoch": 0.02621365400521728, "grad_norm": 0.3485969603061676, "learning_rate": 1.9992837092013085e-05, "loss": 0.6312, "step": 1236 }, { "epoch": 0.026234862463150304, "grad_norm": 0.3319435119628906, "learning_rate": 1.9992824466098858e-05, "loss": 0.4616, "step": 1237 }, { "epoch": 0.026256070921083327, "grad_norm": 0.44861385226249695, "learning_rate": 1.9992811829070695e-05, "loss": 0.5066, "step": 1238 }, { "epoch": 0.02627727937901635, "grad_norm": 0.3641761839389801, "learning_rate": 1.9992799180928616e-05, "loss": 0.5487, "step": 1239 }, { "epoch": 0.026298487836949377, "grad_norm": 0.4309374690055847, "learning_rate": 1.9992786521672635e-05, "loss": 0.5416, "step": 1240 }, { "epoch": 0.0263196962948824, "grad_norm": 0.3655303418636322, "learning_rate": 1.9992773851302758e-05, "loss": 0.5645, "step": 1241 }, { "epoch": 0.026340904752815423, "grad_norm": 0.3428822457790375, "learning_rate": 1.999276116981901e-05, "loss": 0.6618, "step": 1242 }, { "epoch": 0.026362113210748446, "grad_norm": 0.39701834321022034, "learning_rate": 1.9992748477221395e-05, "loss": 0.6065, "step": 1243 }, { "epoch": 0.02638332166868147, "grad_norm": 0.3961612582206726, "learning_rate": 1.9992735773509933e-05, "loss": 0.5812, "step": 1244 }, { "epoch": 0.026404530126614492, "grad_norm": 0.35424715280532837, "learning_rate": 1.999272305868464e-05, "loss": 0.5898, "step": 1245 }, { "epoch": 0.02642573858454752, "grad_norm": 0.3353767991065979, "learning_rate": 1.9992710332745523e-05, "loss": 0.5328, "step": 1246 }, { "epoch": 0.026446947042480542, "grad_norm": 0.362174928188324, "learning_rate": 1.9992697595692604e-05, "loss": 0.5331, "step": 1247 }, { "epoch": 0.026468155500413565, "grad_norm": 0.35540571808815, "learning_rate": 1.9992684847525895e-05, "loss": 0.6123, "step": 1248 }, { "epoch": 0.026489363958346588, "grad_norm": 0.8546055555343628, "learning_rate": 1.9992672088245406e-05, "loss": 0.5011, "step": 1249 }, { "epoch": 0.02651057241627961, "grad_norm": 0.3201626241207123, "learning_rate": 1.9992659317851153e-05, "loss": 0.6256, "step": 1250 }, { "epoch": 0.026531780874212638, "grad_norm": 0.32531997561454773, "learning_rate": 1.9992646536343154e-05, "loss": 0.5841, "step": 1251 }, { "epoch": 0.02655298933214566, "grad_norm": 0.3318432569503784, "learning_rate": 1.999263374372142e-05, "loss": 0.3813, "step": 1252 }, { "epoch": 0.026574197790078684, "grad_norm": 0.35374733805656433, "learning_rate": 1.999262093998596e-05, "loss": 0.5407, "step": 1253 }, { "epoch": 0.026595406248011707, "grad_norm": 0.3482794165611267, "learning_rate": 1.99926081251368e-05, "loss": 0.5746, "step": 1254 }, { "epoch": 0.02661661470594473, "grad_norm": 0.3632204234600067, "learning_rate": 1.9992595299173943e-05, "loss": 0.5272, "step": 1255 }, { "epoch": 0.026637823163877753, "grad_norm": 0.36404669284820557, "learning_rate": 1.9992582462097415e-05, "loss": 0.5406, "step": 1256 }, { "epoch": 0.02665903162181078, "grad_norm": 0.33548039197921753, "learning_rate": 1.9992569613907218e-05, "loss": 0.5139, "step": 1257 }, { "epoch": 0.026680240079743803, "grad_norm": 0.3354564905166626, "learning_rate": 1.9992556754603372e-05, "loss": 0.5248, "step": 1258 }, { "epoch": 0.026701448537676826, "grad_norm": 0.30907678604125977, "learning_rate": 1.9992543884185896e-05, "loss": 0.6224, "step": 1259 }, { "epoch": 0.02672265699560985, "grad_norm": 0.34065333008766174, "learning_rate": 1.9992531002654795e-05, "loss": 0.4389, "step": 1260 }, { "epoch": 0.026743865453542872, "grad_norm": 0.3300098478794098, "learning_rate": 1.999251811001009e-05, "loss": 0.545, "step": 1261 }, { "epoch": 0.026765073911475895, "grad_norm": 0.3300396800041199, "learning_rate": 1.9992505206251793e-05, "loss": 0.568, "step": 1262 }, { "epoch": 0.02678628236940892, "grad_norm": 0.37160706520080566, "learning_rate": 1.9992492291379917e-05, "loss": 0.5714, "step": 1263 }, { "epoch": 0.026807490827341945, "grad_norm": 0.32172083854675293, "learning_rate": 1.999247936539448e-05, "loss": 0.5537, "step": 1264 }, { "epoch": 0.026828699285274968, "grad_norm": 0.3214629292488098, "learning_rate": 1.999246642829549e-05, "loss": 0.5892, "step": 1265 }, { "epoch": 0.02684990774320799, "grad_norm": 0.33026981353759766, "learning_rate": 1.999245348008297e-05, "loss": 0.5721, "step": 1266 }, { "epoch": 0.026871116201141014, "grad_norm": 0.29978883266448975, "learning_rate": 1.999244052075693e-05, "loss": 0.4559, "step": 1267 }, { "epoch": 0.026892324659074037, "grad_norm": 0.3288995921611786, "learning_rate": 1.9992427550317383e-05, "loss": 0.4643, "step": 1268 }, { "epoch": 0.026913533117007064, "grad_norm": 0.3607150614261627, "learning_rate": 1.9992414568764345e-05, "loss": 0.6088, "step": 1269 }, { "epoch": 0.026934741574940087, "grad_norm": 0.4207507073879242, "learning_rate": 1.9992401576097832e-05, "loss": 0.5236, "step": 1270 }, { "epoch": 0.02695595003287311, "grad_norm": 0.349168062210083, "learning_rate": 1.9992388572317854e-05, "loss": 0.5101, "step": 1271 }, { "epoch": 0.026977158490806133, "grad_norm": 0.30617254972457886, "learning_rate": 1.999237555742443e-05, "loss": 0.5659, "step": 1272 }, { "epoch": 0.026998366948739156, "grad_norm": 0.3679562509059906, "learning_rate": 1.9992362531417573e-05, "loss": 0.5736, "step": 1273 }, { "epoch": 0.027019575406672183, "grad_norm": 0.3783669173717499, "learning_rate": 1.9992349494297297e-05, "loss": 0.6496, "step": 1274 }, { "epoch": 0.027040783864605206, "grad_norm": 0.3381105363368988, "learning_rate": 1.9992336446063617e-05, "loss": 0.5714, "step": 1275 }, { "epoch": 0.02706199232253823, "grad_norm": 0.3376785218715668, "learning_rate": 1.9992323386716546e-05, "loss": 0.6407, "step": 1276 }, { "epoch": 0.027083200780471252, "grad_norm": 0.3168334662914276, "learning_rate": 1.9992310316256098e-05, "loss": 0.5449, "step": 1277 }, { "epoch": 0.027104409238404275, "grad_norm": 0.3677845001220703, "learning_rate": 1.9992297234682293e-05, "loss": 0.5678, "step": 1278 }, { "epoch": 0.027125617696337298, "grad_norm": 0.3743898570537567, "learning_rate": 1.999228414199514e-05, "loss": 0.5473, "step": 1279 }, { "epoch": 0.027146826154270325, "grad_norm": 0.34056514501571655, "learning_rate": 1.9992271038194656e-05, "loss": 0.5249, "step": 1280 }, { "epoch": 0.027168034612203348, "grad_norm": 0.3083377778530121, "learning_rate": 1.9992257923280855e-05, "loss": 0.5481, "step": 1281 }, { "epoch": 0.02718924307013637, "grad_norm": 0.39655932784080505, "learning_rate": 1.999224479725375e-05, "loss": 0.6142, "step": 1282 }, { "epoch": 0.027210451528069394, "grad_norm": 0.31626489758491516, "learning_rate": 1.9992231660113358e-05, "loss": 0.5711, "step": 1283 }, { "epoch": 0.027231659986002417, "grad_norm": 0.39877957105636597, "learning_rate": 1.9992218511859693e-05, "loss": 0.5904, "step": 1284 }, { "epoch": 0.02725286844393544, "grad_norm": 0.3121827244758606, "learning_rate": 1.999220535249277e-05, "loss": 0.515, "step": 1285 }, { "epoch": 0.027274076901868467, "grad_norm": 0.3378162682056427, "learning_rate": 1.99921921820126e-05, "loss": 0.469, "step": 1286 }, { "epoch": 0.02729528535980149, "grad_norm": 0.37188252806663513, "learning_rate": 1.9992179000419204e-05, "loss": 0.5691, "step": 1287 }, { "epoch": 0.027316493817734513, "grad_norm": 0.3711283802986145, "learning_rate": 1.9992165807712593e-05, "loss": 0.5209, "step": 1288 }, { "epoch": 0.027337702275667536, "grad_norm": 0.31711041927337646, "learning_rate": 1.999215260389278e-05, "loss": 0.5629, "step": 1289 }, { "epoch": 0.02735891073360056, "grad_norm": 0.3228372633457184, "learning_rate": 1.999213938895978e-05, "loss": 0.4753, "step": 1290 }, { "epoch": 0.027380119191533582, "grad_norm": 0.3931206464767456, "learning_rate": 1.9992126162913617e-05, "loss": 0.5392, "step": 1291 }, { "epoch": 0.02740132764946661, "grad_norm": 0.326364666223526, "learning_rate": 1.9992112925754293e-05, "loss": 0.568, "step": 1292 }, { "epoch": 0.02742253610739963, "grad_norm": 0.33570969104766846, "learning_rate": 1.9992099677481825e-05, "loss": 0.5413, "step": 1293 }, { "epoch": 0.027443744565332655, "grad_norm": 0.3075372874736786, "learning_rate": 1.9992086418096238e-05, "loss": 0.524, "step": 1294 }, { "epoch": 0.027464953023265678, "grad_norm": 0.38759222626686096, "learning_rate": 1.999207314759753e-05, "loss": 0.516, "step": 1295 }, { "epoch": 0.0274861614811987, "grad_norm": 0.5391276478767395, "learning_rate": 1.9992059865985732e-05, "loss": 0.6229, "step": 1296 }, { "epoch": 0.027507369939131724, "grad_norm": 0.3282458484172821, "learning_rate": 1.999204657326085e-05, "loss": 0.6393, "step": 1297 }, { "epoch": 0.02752857839706475, "grad_norm": 0.4499047100543976, "learning_rate": 1.9992033269422903e-05, "loss": 0.5627, "step": 1298 }, { "epoch": 0.027549786854997774, "grad_norm": 0.37420734763145447, "learning_rate": 1.99920199544719e-05, "loss": 0.5595, "step": 1299 }, { "epoch": 0.027570995312930797, "grad_norm": 0.3288578689098358, "learning_rate": 1.999200662840786e-05, "loss": 0.4942, "step": 1300 }, { "epoch": 0.02759220377086382, "grad_norm": 0.4009118378162384, "learning_rate": 1.9991993291230797e-05, "loss": 0.6884, "step": 1301 }, { "epoch": 0.027613412228796843, "grad_norm": 0.35327965021133423, "learning_rate": 1.9991979942940726e-05, "loss": 0.5831, "step": 1302 }, { "epoch": 0.02763462068672987, "grad_norm": 0.3419669568538666, "learning_rate": 1.999196658353766e-05, "loss": 0.6813, "step": 1303 }, { "epoch": 0.027655829144662893, "grad_norm": 0.38144591450691223, "learning_rate": 1.9991953213021616e-05, "loss": 0.6219, "step": 1304 }, { "epoch": 0.027677037602595916, "grad_norm": 0.31947094202041626, "learning_rate": 1.9991939831392613e-05, "loss": 0.4796, "step": 1305 }, { "epoch": 0.02769824606052894, "grad_norm": 0.3510657250881195, "learning_rate": 1.9991926438650657e-05, "loss": 0.547, "step": 1306 }, { "epoch": 0.027719454518461962, "grad_norm": 0.29206493496894836, "learning_rate": 1.9991913034795768e-05, "loss": 0.4923, "step": 1307 }, { "epoch": 0.027740662976394985, "grad_norm": 0.4191288352012634, "learning_rate": 1.999189961982796e-05, "loss": 0.5956, "step": 1308 }, { "epoch": 0.02776187143432801, "grad_norm": 0.4719637334346771, "learning_rate": 1.9991886193747246e-05, "loss": 0.5918, "step": 1309 }, { "epoch": 0.027783079892261035, "grad_norm": 0.31970664858818054, "learning_rate": 1.999187275655365e-05, "loss": 0.5786, "step": 1310 }, { "epoch": 0.027804288350194058, "grad_norm": 0.4438095688819885, "learning_rate": 1.9991859308247174e-05, "loss": 0.4739, "step": 1311 }, { "epoch": 0.02782549680812708, "grad_norm": 0.3283400237560272, "learning_rate": 1.999184584882784e-05, "loss": 0.59, "step": 1312 }, { "epoch": 0.027846705266060104, "grad_norm": 0.32851821184158325, "learning_rate": 1.9991832378295663e-05, "loss": 0.6576, "step": 1313 }, { "epoch": 0.027867913723993127, "grad_norm": 0.33528605103492737, "learning_rate": 1.9991818896650653e-05, "loss": 0.568, "step": 1314 }, { "epoch": 0.027889122181926154, "grad_norm": 0.3389872610569, "learning_rate": 1.9991805403892832e-05, "loss": 0.5069, "step": 1315 }, { "epoch": 0.027910330639859177, "grad_norm": 0.3718460202217102, "learning_rate": 1.9991791900022213e-05, "loss": 0.5592, "step": 1316 }, { "epoch": 0.0279315390977922, "grad_norm": 0.3772730827331543, "learning_rate": 1.9991778385038806e-05, "loss": 0.6237, "step": 1317 }, { "epoch": 0.027952747555725223, "grad_norm": 0.3113984167575836, "learning_rate": 1.9991764858942636e-05, "loss": 0.5663, "step": 1318 }, { "epoch": 0.027973956013658246, "grad_norm": 0.4441734552383423, "learning_rate": 1.9991751321733708e-05, "loss": 0.5234, "step": 1319 }, { "epoch": 0.02799516447159127, "grad_norm": 0.31630465388298035, "learning_rate": 1.999173777341204e-05, "loss": 0.4808, "step": 1320 }, { "epoch": 0.028016372929524296, "grad_norm": 0.33925777673721313, "learning_rate": 1.999172421397765e-05, "loss": 0.5902, "step": 1321 }, { "epoch": 0.02803758138745732, "grad_norm": 0.3373272120952606, "learning_rate": 1.999171064343055e-05, "loss": 0.5778, "step": 1322 }, { "epoch": 0.02805878984539034, "grad_norm": 0.43182116746902466, "learning_rate": 1.9991697061770757e-05, "loss": 0.542, "step": 1323 }, { "epoch": 0.028079998303323365, "grad_norm": 0.47302863001823425, "learning_rate": 1.999168346899828e-05, "loss": 0.5228, "step": 1324 }, { "epoch": 0.028101206761256388, "grad_norm": 0.3954488933086395, "learning_rate": 1.999166986511315e-05, "loss": 0.5215, "step": 1325 }, { "epoch": 0.028122415219189414, "grad_norm": 0.40466436743736267, "learning_rate": 1.9991656250115365e-05, "loss": 0.56, "step": 1326 }, { "epoch": 0.028143623677122438, "grad_norm": 0.3401874601840973, "learning_rate": 1.999164262400495e-05, "loss": 0.619, "step": 1327 }, { "epoch": 0.02816483213505546, "grad_norm": 0.32141581177711487, "learning_rate": 1.999162898678191e-05, "loss": 0.5354, "step": 1328 }, { "epoch": 0.028186040592988484, "grad_norm": 0.3092856705188751, "learning_rate": 1.9991615338446273e-05, "loss": 0.5583, "step": 1329 }, { "epoch": 0.028207249050921507, "grad_norm": 0.34447482228279114, "learning_rate": 1.9991601678998046e-05, "loss": 0.5447, "step": 1330 }, { "epoch": 0.02822845750885453, "grad_norm": 0.3439100384712219, "learning_rate": 1.999158800843725e-05, "loss": 0.6055, "step": 1331 }, { "epoch": 0.028249665966787556, "grad_norm": 0.3066610097885132, "learning_rate": 1.999157432676389e-05, "loss": 0.4723, "step": 1332 }, { "epoch": 0.02827087442472058, "grad_norm": 0.3568776547908783, "learning_rate": 1.9991560633977992e-05, "loss": 0.5169, "step": 1333 }, { "epoch": 0.028292082882653603, "grad_norm": 0.3510456085205078, "learning_rate": 1.999154693007957e-05, "loss": 0.5546, "step": 1334 }, { "epoch": 0.028313291340586626, "grad_norm": 0.3329434394836426, "learning_rate": 1.999153321506863e-05, "loss": 0.6021, "step": 1335 }, { "epoch": 0.02833449979851965, "grad_norm": 0.43591201305389404, "learning_rate": 1.9991519488945197e-05, "loss": 0.5681, "step": 1336 }, { "epoch": 0.028355708256452672, "grad_norm": 0.3383048176765442, "learning_rate": 1.9991505751709283e-05, "loss": 0.5657, "step": 1337 }, { "epoch": 0.0283769167143857, "grad_norm": 0.31237363815307617, "learning_rate": 1.9991492003360903e-05, "loss": 0.5941, "step": 1338 }, { "epoch": 0.02839812517231872, "grad_norm": 0.3661760985851288, "learning_rate": 1.9991478243900067e-05, "loss": 0.5028, "step": 1339 }, { "epoch": 0.028419333630251745, "grad_norm": 0.30692505836486816, "learning_rate": 1.9991464473326803e-05, "loss": 0.5879, "step": 1340 }, { "epoch": 0.028440542088184768, "grad_norm": 0.31535786390304565, "learning_rate": 1.9991450691641117e-05, "loss": 0.5557, "step": 1341 }, { "epoch": 0.02846175054611779, "grad_norm": 0.31644707918167114, "learning_rate": 1.9991436898843027e-05, "loss": 0.5985, "step": 1342 }, { "epoch": 0.028482959004050814, "grad_norm": 0.3356207609176636, "learning_rate": 1.999142309493255e-05, "loss": 0.6573, "step": 1343 }, { "epoch": 0.02850416746198384, "grad_norm": 0.35109764337539673, "learning_rate": 1.9991409279909692e-05, "loss": 0.5894, "step": 1344 }, { "epoch": 0.028525375919916864, "grad_norm": 0.33852964639663696, "learning_rate": 1.999139545377448e-05, "loss": 0.5075, "step": 1345 }, { "epoch": 0.028546584377849887, "grad_norm": 0.354320764541626, "learning_rate": 1.999138161652692e-05, "loss": 0.5828, "step": 1346 }, { "epoch": 0.02856779283578291, "grad_norm": 0.3418082892894745, "learning_rate": 1.9991367768167037e-05, "loss": 0.5892, "step": 1347 }, { "epoch": 0.028589001293715933, "grad_norm": 0.40261051058769226, "learning_rate": 1.9991353908694843e-05, "loss": 0.608, "step": 1348 }, { "epoch": 0.028610209751648956, "grad_norm": 0.3499436378479004, "learning_rate": 1.9991340038110346e-05, "loss": 0.543, "step": 1349 }, { "epoch": 0.028631418209581982, "grad_norm": 0.32799994945526123, "learning_rate": 1.9991326156413572e-05, "loss": 0.613, "step": 1350 }, { "epoch": 0.028652626667515006, "grad_norm": 0.35342127084732056, "learning_rate": 1.999131226360453e-05, "loss": 0.5103, "step": 1351 }, { "epoch": 0.02867383512544803, "grad_norm": 0.3609023690223694, "learning_rate": 1.999129835968324e-05, "loss": 0.659, "step": 1352 }, { "epoch": 0.028695043583381052, "grad_norm": 0.3821169435977936, "learning_rate": 1.999128444464971e-05, "loss": 0.5754, "step": 1353 }, { "epoch": 0.028716252041314075, "grad_norm": 0.36905965209007263, "learning_rate": 1.9991270518503965e-05, "loss": 0.6296, "step": 1354 }, { "epoch": 0.0287374604992471, "grad_norm": 0.35971611738204956, "learning_rate": 1.9991256581246015e-05, "loss": 0.6006, "step": 1355 }, { "epoch": 0.028758668957180124, "grad_norm": 0.3623155951499939, "learning_rate": 1.9991242632875873e-05, "loss": 0.5269, "step": 1356 }, { "epoch": 0.028779877415113148, "grad_norm": 0.3195771276950836, "learning_rate": 1.999122867339356e-05, "loss": 0.6088, "step": 1357 }, { "epoch": 0.02880108587304617, "grad_norm": 0.35453712940216064, "learning_rate": 1.999121470279909e-05, "loss": 0.6532, "step": 1358 }, { "epoch": 0.028822294330979194, "grad_norm": 0.4388962984085083, "learning_rate": 1.9991200721092476e-05, "loss": 0.6021, "step": 1359 }, { "epoch": 0.028843502788912217, "grad_norm": 0.35402774810791016, "learning_rate": 1.999118672827374e-05, "loss": 0.5606, "step": 1360 }, { "epoch": 0.028864711246845243, "grad_norm": 0.32002148032188416, "learning_rate": 1.9991172724342884e-05, "loss": 0.589, "step": 1361 }, { "epoch": 0.028885919704778266, "grad_norm": 0.3195905387401581, "learning_rate": 1.999115870929994e-05, "loss": 0.5247, "step": 1362 }, { "epoch": 0.02890712816271129, "grad_norm": 0.35186490416526794, "learning_rate": 1.9991144683144912e-05, "loss": 0.5909, "step": 1363 }, { "epoch": 0.028928336620644313, "grad_norm": 0.3217103183269501, "learning_rate": 1.9991130645877823e-05, "loss": 0.4999, "step": 1364 }, { "epoch": 0.028949545078577336, "grad_norm": 0.3636799454689026, "learning_rate": 1.9991116597498684e-05, "loss": 0.4969, "step": 1365 }, { "epoch": 0.02897075353651036, "grad_norm": 0.3369251787662506, "learning_rate": 1.9991102538007506e-05, "loss": 0.5812, "step": 1366 }, { "epoch": 0.028991961994443385, "grad_norm": 0.34059447050094604, "learning_rate": 1.999108846740432e-05, "loss": 0.5832, "step": 1367 }, { "epoch": 0.02901317045237641, "grad_norm": 0.4422863721847534, "learning_rate": 1.999107438568913e-05, "loss": 0.5016, "step": 1368 }, { "epoch": 0.02903437891030943, "grad_norm": 0.3962318003177643, "learning_rate": 1.999106029286195e-05, "loss": 0.5533, "step": 1369 }, { "epoch": 0.029055587368242455, "grad_norm": 0.33579325675964355, "learning_rate": 1.99910461889228e-05, "loss": 0.5562, "step": 1370 }, { "epoch": 0.029076795826175478, "grad_norm": 0.41442781686782837, "learning_rate": 1.9991032073871698e-05, "loss": 0.5462, "step": 1371 }, { "epoch": 0.0290980042841085, "grad_norm": 0.33980974555015564, "learning_rate": 1.9991017947708656e-05, "loss": 0.6106, "step": 1372 }, { "epoch": 0.029119212742041527, "grad_norm": 0.3030170798301697, "learning_rate": 1.9991003810433687e-05, "loss": 0.6172, "step": 1373 }, { "epoch": 0.02914042119997455, "grad_norm": 0.29767051339149475, "learning_rate": 1.999098966204682e-05, "loss": 0.4272, "step": 1374 }, { "epoch": 0.029161629657907574, "grad_norm": 0.3179580569267273, "learning_rate": 1.999097550254805e-05, "loss": 0.5301, "step": 1375 }, { "epoch": 0.029182838115840597, "grad_norm": 0.32329896092414856, "learning_rate": 1.9990961331937408e-05, "loss": 0.5791, "step": 1376 }, { "epoch": 0.02920404657377362, "grad_norm": 0.3118455410003662, "learning_rate": 1.9990947150214907e-05, "loss": 0.4963, "step": 1377 }, { "epoch": 0.029225255031706646, "grad_norm": 0.33293330669403076, "learning_rate": 1.9990932957380564e-05, "loss": 0.5564, "step": 1378 }, { "epoch": 0.02924646348963967, "grad_norm": 0.35606828331947327, "learning_rate": 1.9990918753434388e-05, "loss": 0.4848, "step": 1379 }, { "epoch": 0.029267671947572692, "grad_norm": 0.37055838108062744, "learning_rate": 1.9990904538376397e-05, "loss": 0.5478, "step": 1380 }, { "epoch": 0.029288880405505716, "grad_norm": 0.3581787943840027, "learning_rate": 1.999089031220661e-05, "loss": 0.5462, "step": 1381 }, { "epoch": 0.02931008886343874, "grad_norm": 0.3484707474708557, "learning_rate": 1.9990876074925044e-05, "loss": 0.5421, "step": 1382 }, { "epoch": 0.029331297321371762, "grad_norm": 0.33666783571243286, "learning_rate": 1.9990861826531713e-05, "loss": 0.5516, "step": 1383 }, { "epoch": 0.02935250577930479, "grad_norm": 0.32701659202575684, "learning_rate": 1.9990847567026634e-05, "loss": 0.4256, "step": 1384 }, { "epoch": 0.02937371423723781, "grad_norm": 0.3712594211101532, "learning_rate": 1.9990833296409814e-05, "loss": 0.5777, "step": 1385 }, { "epoch": 0.029394922695170835, "grad_norm": 0.3228375315666199, "learning_rate": 1.999081901468128e-05, "loss": 0.5576, "step": 1386 }, { "epoch": 0.029416131153103858, "grad_norm": 0.3412027955055237, "learning_rate": 1.9990804721841047e-05, "loss": 0.5732, "step": 1387 }, { "epoch": 0.02943733961103688, "grad_norm": 0.3588094413280487, "learning_rate": 1.999079041788912e-05, "loss": 0.5831, "step": 1388 }, { "epoch": 0.029458548068969904, "grad_norm": 0.32496729493141174, "learning_rate": 1.999077610282553e-05, "loss": 0.5338, "step": 1389 }, { "epoch": 0.02947975652690293, "grad_norm": 0.3276945948600769, "learning_rate": 1.999076177665028e-05, "loss": 0.587, "step": 1390 }, { "epoch": 0.029500964984835953, "grad_norm": 0.3156758248806, "learning_rate": 1.9990747439363396e-05, "loss": 0.5513, "step": 1391 }, { "epoch": 0.029522173442768977, "grad_norm": 0.3570685088634491, "learning_rate": 1.999073309096489e-05, "loss": 0.592, "step": 1392 }, { "epoch": 0.029543381900702, "grad_norm": 0.3137660026550293, "learning_rate": 1.9990718731454774e-05, "loss": 0.5532, "step": 1393 }, { "epoch": 0.029564590358635023, "grad_norm": 0.32207322120666504, "learning_rate": 1.999070436083307e-05, "loss": 0.6453, "step": 1394 }, { "epoch": 0.029585798816568046, "grad_norm": 0.3194875717163086, "learning_rate": 1.9990689979099788e-05, "loss": 0.6432, "step": 1395 }, { "epoch": 0.029607007274501072, "grad_norm": 0.3093317151069641, "learning_rate": 1.9990675586254952e-05, "loss": 0.4922, "step": 1396 }, { "epoch": 0.029628215732434095, "grad_norm": 0.34348514676094055, "learning_rate": 1.9990661182298568e-05, "loss": 0.6363, "step": 1397 }, { "epoch": 0.02964942419036712, "grad_norm": 0.346896231174469, "learning_rate": 1.999064676723066e-05, "loss": 0.5381, "step": 1398 }, { "epoch": 0.02967063264830014, "grad_norm": 0.341810017824173, "learning_rate": 1.9990632341051242e-05, "loss": 0.6934, "step": 1399 }, { "epoch": 0.029691841106233165, "grad_norm": 0.3952643573284149, "learning_rate": 1.999061790376033e-05, "loss": 0.5327, "step": 1400 }, { "epoch": 0.029713049564166188, "grad_norm": 0.3523610830307007, "learning_rate": 1.999060345535794e-05, "loss": 0.5302, "step": 1401 }, { "epoch": 0.029734258022099214, "grad_norm": 0.33315882086753845, "learning_rate": 1.999058899584409e-05, "loss": 0.5835, "step": 1402 }, { "epoch": 0.029755466480032237, "grad_norm": 0.5124685764312744, "learning_rate": 1.9990574525218788e-05, "loss": 0.5818, "step": 1403 }, { "epoch": 0.02977667493796526, "grad_norm": 0.34510213136672974, "learning_rate": 1.9990560043482055e-05, "loss": 0.502, "step": 1404 }, { "epoch": 0.029797883395898284, "grad_norm": 0.3253914713859558, "learning_rate": 1.999054555063391e-05, "loss": 0.4862, "step": 1405 }, { "epoch": 0.029819091853831307, "grad_norm": 0.3475536108016968, "learning_rate": 1.999053104667437e-05, "loss": 0.4943, "step": 1406 }, { "epoch": 0.029840300311764333, "grad_norm": 0.28318068385124207, "learning_rate": 1.999051653160345e-05, "loss": 0.4648, "step": 1407 }, { "epoch": 0.029861508769697356, "grad_norm": 0.3202584683895111, "learning_rate": 1.9990502005421156e-05, "loss": 0.4439, "step": 1408 }, { "epoch": 0.02988271722763038, "grad_norm": 0.3144548535346985, "learning_rate": 1.9990487468127516e-05, "loss": 0.5603, "step": 1409 }, { "epoch": 0.029903925685563403, "grad_norm": 0.33974364399909973, "learning_rate": 1.9990472919722545e-05, "loss": 0.5978, "step": 1410 }, { "epoch": 0.029925134143496426, "grad_norm": 0.3219192922115326, "learning_rate": 1.9990458360206256e-05, "loss": 0.5161, "step": 1411 }, { "epoch": 0.02994634260142945, "grad_norm": 0.32724663615226746, "learning_rate": 1.9990443789578664e-05, "loss": 0.5077, "step": 1412 }, { "epoch": 0.029967551059362475, "grad_norm": 0.3906638026237488, "learning_rate": 1.9990429207839788e-05, "loss": 0.6274, "step": 1413 }, { "epoch": 0.0299887595172955, "grad_norm": 0.3348609209060669, "learning_rate": 1.9990414614989646e-05, "loss": 0.5651, "step": 1414 }, { "epoch": 0.03000996797522852, "grad_norm": 0.5090267658233643, "learning_rate": 1.9990400011028248e-05, "loss": 0.6429, "step": 1415 }, { "epoch": 0.030031176433161545, "grad_norm": 0.3213863968849182, "learning_rate": 1.9990385395955617e-05, "loss": 0.5383, "step": 1416 }, { "epoch": 0.030052384891094568, "grad_norm": 0.3414023816585541, "learning_rate": 1.9990370769771764e-05, "loss": 0.569, "step": 1417 }, { "epoch": 0.03007359334902759, "grad_norm": 0.3449949324131012, "learning_rate": 1.9990356132476706e-05, "loss": 0.5907, "step": 1418 }, { "epoch": 0.030094801806960617, "grad_norm": 0.32000163197517395, "learning_rate": 1.9990341484070462e-05, "loss": 0.5509, "step": 1419 }, { "epoch": 0.03011601026489364, "grad_norm": 0.30295896530151367, "learning_rate": 1.999032682455305e-05, "loss": 0.6097, "step": 1420 }, { "epoch": 0.030137218722826663, "grad_norm": 0.3352426588535309, "learning_rate": 1.999031215392448e-05, "loss": 0.5707, "step": 1421 }, { "epoch": 0.030158427180759687, "grad_norm": 1.0543057918548584, "learning_rate": 1.9990297472184773e-05, "loss": 0.6552, "step": 1422 }, { "epoch": 0.03017963563869271, "grad_norm": 0.33940210938453674, "learning_rate": 1.9990282779333942e-05, "loss": 0.5552, "step": 1423 }, { "epoch": 0.030200844096625733, "grad_norm": 0.34898942708969116, "learning_rate": 1.999026807537201e-05, "loss": 0.5581, "step": 1424 }, { "epoch": 0.03022205255455876, "grad_norm": 0.36505424976348877, "learning_rate": 1.9990253360298985e-05, "loss": 0.5415, "step": 1425 }, { "epoch": 0.030243261012491782, "grad_norm": 0.33573266863822937, "learning_rate": 1.9990238634114886e-05, "loss": 0.4845, "step": 1426 }, { "epoch": 0.030264469470424805, "grad_norm": 0.44649332761764526, "learning_rate": 1.999022389681973e-05, "loss": 0.5648, "step": 1427 }, { "epoch": 0.03028567792835783, "grad_norm": 0.34909337759017944, "learning_rate": 1.9990209148413537e-05, "loss": 0.5442, "step": 1428 }, { "epoch": 0.03030688638629085, "grad_norm": 0.35046517848968506, "learning_rate": 1.999019438889632e-05, "loss": 0.5657, "step": 1429 }, { "epoch": 0.030328094844223878, "grad_norm": 0.38797473907470703, "learning_rate": 1.9990179618268094e-05, "loss": 0.6778, "step": 1430 }, { "epoch": 0.0303493033021569, "grad_norm": 0.3215062916278839, "learning_rate": 1.9990164836528877e-05, "loss": 0.6212, "step": 1431 }, { "epoch": 0.030370511760089924, "grad_norm": 0.3612697124481201, "learning_rate": 1.9990150043678687e-05, "loss": 0.5666, "step": 1432 }, { "epoch": 0.030391720218022947, "grad_norm": 0.45237278938293457, "learning_rate": 1.999013523971754e-05, "loss": 0.5514, "step": 1433 }, { "epoch": 0.03041292867595597, "grad_norm": 0.33766141533851624, "learning_rate": 1.9990120424645448e-05, "loss": 0.4889, "step": 1434 }, { "epoch": 0.030434137133888994, "grad_norm": 0.3327227830886841, "learning_rate": 1.999010559846243e-05, "loss": 0.5468, "step": 1435 }, { "epoch": 0.03045534559182202, "grad_norm": 0.3274146616458893, "learning_rate": 1.9990090761168505e-05, "loss": 0.5231, "step": 1436 }, { "epoch": 0.030476554049755043, "grad_norm": 0.32064512372016907, "learning_rate": 1.999007591276369e-05, "loss": 0.541, "step": 1437 }, { "epoch": 0.030497762507688066, "grad_norm": 0.3435177803039551, "learning_rate": 1.9990061053247998e-05, "loss": 0.5282, "step": 1438 }, { "epoch": 0.03051897096562109, "grad_norm": 0.32512709498405457, "learning_rate": 1.9990046182621444e-05, "loss": 0.4734, "step": 1439 }, { "epoch": 0.030540179423554113, "grad_norm": 0.3405003845691681, "learning_rate": 1.999003130088405e-05, "loss": 0.5128, "step": 1440 }, { "epoch": 0.030561387881487136, "grad_norm": 0.34842392802238464, "learning_rate": 1.999001640803583e-05, "loss": 0.5453, "step": 1441 }, { "epoch": 0.030582596339420162, "grad_norm": 0.3566507399082184, "learning_rate": 1.99900015040768e-05, "loss": 0.5764, "step": 1442 }, { "epoch": 0.030603804797353185, "grad_norm": 0.34333476424217224, "learning_rate": 1.998998658900698e-05, "loss": 0.5826, "step": 1443 }, { "epoch": 0.03062501325528621, "grad_norm": 0.8748303055763245, "learning_rate": 1.998997166282638e-05, "loss": 0.6262, "step": 1444 }, { "epoch": 0.03064622171321923, "grad_norm": 0.34252622723579407, "learning_rate": 1.9989956725535022e-05, "loss": 0.5374, "step": 1445 }, { "epoch": 0.030667430171152255, "grad_norm": 0.3714712858200073, "learning_rate": 1.998994177713292e-05, "loss": 0.5427, "step": 1446 }, { "epoch": 0.030688638629085278, "grad_norm": 0.3649067282676697, "learning_rate": 1.998992681762009e-05, "loss": 0.6311, "step": 1447 }, { "epoch": 0.030709847087018304, "grad_norm": 0.33157864212989807, "learning_rate": 1.9989911846996555e-05, "loss": 0.6009, "step": 1448 }, { "epoch": 0.030731055544951327, "grad_norm": 0.3841365873813629, "learning_rate": 1.9989896865262325e-05, "loss": 0.588, "step": 1449 }, { "epoch": 0.03075226400288435, "grad_norm": 0.39561253786087036, "learning_rate": 1.9989881872417415e-05, "loss": 0.5613, "step": 1450 }, { "epoch": 0.030773472460817373, "grad_norm": 0.38312211632728577, "learning_rate": 1.9989866868461848e-05, "loss": 0.6275, "step": 1451 }, { "epoch": 0.030794680918750397, "grad_norm": 0.337054044008255, "learning_rate": 1.9989851853395638e-05, "loss": 0.6393, "step": 1452 }, { "epoch": 0.03081588937668342, "grad_norm": 0.3497236967086792, "learning_rate": 1.99898368272188e-05, "loss": 0.6014, "step": 1453 }, { "epoch": 0.030837097834616446, "grad_norm": 0.33462968468666077, "learning_rate": 1.9989821789931353e-05, "loss": 0.523, "step": 1454 }, { "epoch": 0.03085830629254947, "grad_norm": 0.42689335346221924, "learning_rate": 1.9989806741533313e-05, "loss": 0.5355, "step": 1455 }, { "epoch": 0.030879514750482492, "grad_norm": 0.3387523889541626, "learning_rate": 1.9989791682024697e-05, "loss": 0.5396, "step": 1456 }, { "epoch": 0.030900723208415515, "grad_norm": 0.33400651812553406, "learning_rate": 1.9989776611405523e-05, "loss": 0.5105, "step": 1457 }, { "epoch": 0.03092193166634854, "grad_norm": 0.32379162311553955, "learning_rate": 1.9989761529675804e-05, "loss": 0.5711, "step": 1458 }, { "epoch": 0.030943140124281565, "grad_norm": 0.32411277294158936, "learning_rate": 1.998974643683556e-05, "loss": 0.6471, "step": 1459 }, { "epoch": 0.030964348582214588, "grad_norm": 0.3576846718788147, "learning_rate": 1.9989731332884803e-05, "loss": 0.6321, "step": 1460 }, { "epoch": 0.03098555704014761, "grad_norm": 0.33629855513572693, "learning_rate": 1.998971621782356e-05, "loss": 0.5414, "step": 1461 }, { "epoch": 0.031006765498080634, "grad_norm": 0.36355698108673096, "learning_rate": 1.9989701091651837e-05, "loss": 0.5401, "step": 1462 }, { "epoch": 0.031027973956013657, "grad_norm": 0.36664581298828125, "learning_rate": 1.998968595436966e-05, "loss": 0.5599, "step": 1463 }, { "epoch": 0.03104918241394668, "grad_norm": 0.3570162355899811, "learning_rate": 1.9989670805977034e-05, "loss": 0.5497, "step": 1464 }, { "epoch": 0.031070390871879707, "grad_norm": 0.37416866421699524, "learning_rate": 1.9989655646473987e-05, "loss": 0.5683, "step": 1465 }, { "epoch": 0.03109159932981273, "grad_norm": 0.33253148198127747, "learning_rate": 1.998964047586053e-05, "loss": 0.6004, "step": 1466 }, { "epoch": 0.031112807787745753, "grad_norm": 0.42915067076683044, "learning_rate": 1.9989625294136685e-05, "loss": 0.4008, "step": 1467 }, { "epoch": 0.031134016245678776, "grad_norm": 0.3441256582736969, "learning_rate": 1.9989610101302464e-05, "loss": 0.6274, "step": 1468 }, { "epoch": 0.0311552247036118, "grad_norm": 0.3221849501132965, "learning_rate": 1.998959489735788e-05, "loss": 0.5703, "step": 1469 }, { "epoch": 0.031176433161544823, "grad_norm": 0.3438684642314911, "learning_rate": 1.9989579682302964e-05, "loss": 0.5325, "step": 1470 }, { "epoch": 0.03119764161947785, "grad_norm": 0.331563264131546, "learning_rate": 1.998956445613772e-05, "loss": 0.6294, "step": 1471 }, { "epoch": 0.031218850077410872, "grad_norm": 0.39878690242767334, "learning_rate": 1.9989549218862173e-05, "loss": 0.5157, "step": 1472 }, { "epoch": 0.031240058535343895, "grad_norm": 0.4396231174468994, "learning_rate": 1.998953397047633e-05, "loss": 0.5806, "step": 1473 }, { "epoch": 0.03126126699327692, "grad_norm": 0.4600769579410553, "learning_rate": 1.998951871098022e-05, "loss": 0.5883, "step": 1474 }, { "epoch": 0.03128247545120994, "grad_norm": 0.6617733240127563, "learning_rate": 1.9989503440373846e-05, "loss": 0.5733, "step": 1475 }, { "epoch": 0.03130368390914297, "grad_norm": 0.5232219696044922, "learning_rate": 1.998948815865724e-05, "loss": 0.6481, "step": 1476 }, { "epoch": 0.03132489236707599, "grad_norm": 0.3718774914741516, "learning_rate": 1.998947286583041e-05, "loss": 0.5072, "step": 1477 }, { "epoch": 0.031346100825009014, "grad_norm": 0.3197239637374878, "learning_rate": 1.9989457561893374e-05, "loss": 0.5917, "step": 1478 }, { "epoch": 0.031367309282942034, "grad_norm": 0.34539639949798584, "learning_rate": 1.9989442246846153e-05, "loss": 0.5896, "step": 1479 }, { "epoch": 0.03138851774087506, "grad_norm": 0.3284091055393219, "learning_rate": 1.998942692068876e-05, "loss": 0.5542, "step": 1480 }, { "epoch": 0.03140972619880809, "grad_norm": 0.3267226219177246, "learning_rate": 1.9989411583421212e-05, "loss": 0.6095, "step": 1481 }, { "epoch": 0.03143093465674111, "grad_norm": 0.48488330841064453, "learning_rate": 1.998939623504353e-05, "loss": 0.6038, "step": 1482 }, { "epoch": 0.03145214311467413, "grad_norm": 0.3210887908935547, "learning_rate": 1.9989380875555725e-05, "loss": 0.5719, "step": 1483 }, { "epoch": 0.03147335157260715, "grad_norm": 0.3229188024997711, "learning_rate": 1.9989365504957817e-05, "loss": 0.5239, "step": 1484 }, { "epoch": 0.03149456003054018, "grad_norm": 0.45884689688682556, "learning_rate": 1.9989350123249825e-05, "loss": 0.562, "step": 1485 }, { "epoch": 0.031515768488473206, "grad_norm": 0.36812642216682434, "learning_rate": 1.9989334730431765e-05, "loss": 0.5814, "step": 1486 }, { "epoch": 0.031536976946406226, "grad_norm": 0.3079797327518463, "learning_rate": 1.9989319326503653e-05, "loss": 0.5656, "step": 1487 }, { "epoch": 0.03155818540433925, "grad_norm": 0.34147727489471436, "learning_rate": 1.9989303911465507e-05, "loss": 0.5765, "step": 1488 }, { "epoch": 0.03157939386227227, "grad_norm": 0.3302502930164337, "learning_rate": 1.9989288485317345e-05, "loss": 0.494, "step": 1489 }, { "epoch": 0.0316006023202053, "grad_norm": 0.42163223028182983, "learning_rate": 1.998927304805918e-05, "loss": 0.5189, "step": 1490 }, { "epoch": 0.031621810778138325, "grad_norm": 0.3577633798122406, "learning_rate": 1.9989257599691037e-05, "loss": 0.5247, "step": 1491 }, { "epoch": 0.031643019236071344, "grad_norm": 0.29769036173820496, "learning_rate": 1.9989242140212925e-05, "loss": 0.518, "step": 1492 }, { "epoch": 0.03166422769400437, "grad_norm": 0.3970264196395874, "learning_rate": 1.9989226669624867e-05, "loss": 0.5661, "step": 1493 }, { "epoch": 0.03168543615193739, "grad_norm": 0.2977975308895111, "learning_rate": 1.9989211187926875e-05, "loss": 0.4827, "step": 1494 }, { "epoch": 0.03170664460987042, "grad_norm": 0.3040255904197693, "learning_rate": 1.9989195695118972e-05, "loss": 0.5341, "step": 1495 }, { "epoch": 0.03172785306780344, "grad_norm": 0.3163955807685852, "learning_rate": 1.998918019120117e-05, "loss": 0.5415, "step": 1496 }, { "epoch": 0.03174906152573646, "grad_norm": 0.455920934677124, "learning_rate": 1.998916467617349e-05, "loss": 0.556, "step": 1497 }, { "epoch": 0.03177026998366949, "grad_norm": 0.33380627632141113, "learning_rate": 1.9989149150035944e-05, "loss": 0.5548, "step": 1498 }, { "epoch": 0.03179147844160251, "grad_norm": 0.3067489564418793, "learning_rate": 1.998913361278856e-05, "loss": 0.5369, "step": 1499 }, { "epoch": 0.031812686899535536, "grad_norm": 0.3225211203098297, "learning_rate": 1.9989118064431344e-05, "loss": 0.4618, "step": 1500 }, { "epoch": 0.031833895357468556, "grad_norm": 0.34688106179237366, "learning_rate": 1.9989102504964316e-05, "loss": 0.5392, "step": 1501 }, { "epoch": 0.03185510381540158, "grad_norm": 0.30716031789779663, "learning_rate": 1.99890869343875e-05, "loss": 0.5409, "step": 1502 }, { "epoch": 0.03187631227333461, "grad_norm": 0.4192601144313812, "learning_rate": 1.9989071352700903e-05, "loss": 0.4987, "step": 1503 }, { "epoch": 0.03189752073126763, "grad_norm": 0.35825273394584656, "learning_rate": 1.998905575990455e-05, "loss": 0.5716, "step": 1504 }, { "epoch": 0.031918729189200655, "grad_norm": 0.30905935168266296, "learning_rate": 1.9989040155998455e-05, "loss": 0.5224, "step": 1505 }, { "epoch": 0.031939937647133675, "grad_norm": 0.33884063363075256, "learning_rate": 1.998902454098264e-05, "loss": 0.4977, "step": 1506 }, { "epoch": 0.0319611461050667, "grad_norm": 0.3242473602294922, "learning_rate": 1.9989008914857115e-05, "loss": 0.5226, "step": 1507 }, { "epoch": 0.03198235456299973, "grad_norm": 0.32569780945777893, "learning_rate": 1.99889932776219e-05, "loss": 0.5091, "step": 1508 }, { "epoch": 0.03200356302093275, "grad_norm": 0.31753623485565186, "learning_rate": 1.998897762927702e-05, "loss": 0.5736, "step": 1509 }, { "epoch": 0.032024771478865774, "grad_norm": 0.2933645248413086, "learning_rate": 1.998896196982248e-05, "loss": 0.4875, "step": 1510 }, { "epoch": 0.032045979936798794, "grad_norm": 0.33817020058631897, "learning_rate": 1.9988946299258306e-05, "loss": 0.507, "step": 1511 }, { "epoch": 0.03206718839473182, "grad_norm": 0.3270431458950043, "learning_rate": 1.9988930617584512e-05, "loss": 0.5407, "step": 1512 }, { "epoch": 0.03208839685266484, "grad_norm": 0.3220902383327484, "learning_rate": 1.9988914924801115e-05, "loss": 0.5318, "step": 1513 }, { "epoch": 0.032109605310597866, "grad_norm": 0.3085906505584717, "learning_rate": 1.9988899220908132e-05, "loss": 0.6389, "step": 1514 }, { "epoch": 0.03213081376853089, "grad_norm": 0.33297696709632874, "learning_rate": 1.9988883505905584e-05, "loss": 0.5284, "step": 1515 }, { "epoch": 0.03215202222646391, "grad_norm": 0.34983840584754944, "learning_rate": 1.9988867779793487e-05, "loss": 0.591, "step": 1516 }, { "epoch": 0.03217323068439694, "grad_norm": 0.33137238025665283, "learning_rate": 1.998885204257186e-05, "loss": 0.5071, "step": 1517 }, { "epoch": 0.03219443914232996, "grad_norm": 0.3087601363658905, "learning_rate": 1.9988836294240715e-05, "loss": 0.5509, "step": 1518 }, { "epoch": 0.032215647600262985, "grad_norm": 0.3048601448535919, "learning_rate": 1.9988820534800074e-05, "loss": 0.5465, "step": 1519 }, { "epoch": 0.03223685605819601, "grad_norm": 0.47471365332603455, "learning_rate": 1.9988804764249956e-05, "loss": 0.4708, "step": 1520 }, { "epoch": 0.03225806451612903, "grad_norm": 0.3493764400482178, "learning_rate": 1.9988788982590373e-05, "loss": 0.5672, "step": 1521 }, { "epoch": 0.03227927297406206, "grad_norm": 0.42088398337364197, "learning_rate": 1.9988773189821348e-05, "loss": 0.5548, "step": 1522 }, { "epoch": 0.03230048143199508, "grad_norm": 0.4173782169818878, "learning_rate": 1.9988757385942895e-05, "loss": 0.6456, "step": 1523 }, { "epoch": 0.032321689889928104, "grad_norm": 0.3488677144050598, "learning_rate": 1.9988741570955033e-05, "loss": 0.503, "step": 1524 }, { "epoch": 0.032342898347861124, "grad_norm": 0.34718337655067444, "learning_rate": 1.9988725744857778e-05, "loss": 0.596, "step": 1525 }, { "epoch": 0.03236410680579415, "grad_norm": 0.35100239515304565, "learning_rate": 1.998870990765115e-05, "loss": 0.521, "step": 1526 }, { "epoch": 0.03238531526372718, "grad_norm": 0.42324382066726685, "learning_rate": 1.9988694059335167e-05, "loss": 0.6402, "step": 1527 }, { "epoch": 0.032406523721660196, "grad_norm": 0.33572131395339966, "learning_rate": 1.9988678199909842e-05, "loss": 0.5382, "step": 1528 }, { "epoch": 0.03242773217959322, "grad_norm": 0.33941298723220825, "learning_rate": 1.9988662329375202e-05, "loss": 0.6318, "step": 1529 }, { "epoch": 0.03244894063752624, "grad_norm": 0.36479902267456055, "learning_rate": 1.998864644773125e-05, "loss": 0.5688, "step": 1530 }, { "epoch": 0.03247014909545927, "grad_norm": 0.3787119686603546, "learning_rate": 1.998863055497802e-05, "loss": 0.5901, "step": 1531 }, { "epoch": 0.032491357553392296, "grad_norm": 0.302115797996521, "learning_rate": 1.998861465111552e-05, "loss": 0.4661, "step": 1532 }, { "epoch": 0.032512566011325315, "grad_norm": 0.3346666395664215, "learning_rate": 1.9988598736143766e-05, "loss": 0.5264, "step": 1533 }, { "epoch": 0.03253377446925834, "grad_norm": 0.33130791783332825, "learning_rate": 1.9988582810062783e-05, "loss": 0.5776, "step": 1534 }, { "epoch": 0.03255498292719136, "grad_norm": 0.3132098317146301, "learning_rate": 1.9988566872872582e-05, "loss": 0.5363, "step": 1535 }, { "epoch": 0.03257619138512439, "grad_norm": 0.3509593904018402, "learning_rate": 1.9988550924573186e-05, "loss": 0.5327, "step": 1536 }, { "epoch": 0.032597399843057415, "grad_norm": 0.3628557622432709, "learning_rate": 1.998853496516461e-05, "loss": 0.6057, "step": 1537 }, { "epoch": 0.032618608300990434, "grad_norm": 0.3528401255607605, "learning_rate": 1.998851899464687e-05, "loss": 0.512, "step": 1538 }, { "epoch": 0.03263981675892346, "grad_norm": 0.2965620458126068, "learning_rate": 1.9988503013019986e-05, "loss": 0.5129, "step": 1539 }, { "epoch": 0.03266102521685648, "grad_norm": 0.32929977774620056, "learning_rate": 1.998848702028398e-05, "loss": 0.5644, "step": 1540 }, { "epoch": 0.03268223367478951, "grad_norm": 0.3227764666080475, "learning_rate": 1.998847101643886e-05, "loss": 0.514, "step": 1541 }, { "epoch": 0.03270344213272253, "grad_norm": 0.5511286854743958, "learning_rate": 1.9988455001484653e-05, "loss": 0.4862, "step": 1542 }, { "epoch": 0.03272465059065555, "grad_norm": 0.4093215763568878, "learning_rate": 1.9988438975421374e-05, "loss": 0.6593, "step": 1543 }, { "epoch": 0.03274585904858858, "grad_norm": 0.37372320890426636, "learning_rate": 1.9988422938249038e-05, "loss": 0.5628, "step": 1544 }, { "epoch": 0.0327670675065216, "grad_norm": 0.3354209065437317, "learning_rate": 1.9988406889967665e-05, "loss": 0.6515, "step": 1545 }, { "epoch": 0.032788275964454626, "grad_norm": 0.3681719899177551, "learning_rate": 1.9988390830577275e-05, "loss": 0.4867, "step": 1546 }, { "epoch": 0.032809484422387646, "grad_norm": 0.5328938961029053, "learning_rate": 1.998837476007788e-05, "loss": 0.6259, "step": 1547 }, { "epoch": 0.03283069288032067, "grad_norm": 0.3514869213104248, "learning_rate": 1.9988358678469503e-05, "loss": 0.5439, "step": 1548 }, { "epoch": 0.0328519013382537, "grad_norm": 0.3675353527069092, "learning_rate": 1.9988342585752155e-05, "loss": 0.5524, "step": 1549 }, { "epoch": 0.03287310979618672, "grad_norm": 0.3459165096282959, "learning_rate": 1.9988326481925868e-05, "loss": 0.5249, "step": 1550 }, { "epoch": 0.032894318254119745, "grad_norm": 0.3099340796470642, "learning_rate": 1.9988310366990645e-05, "loss": 0.5297, "step": 1551 }, { "epoch": 0.032915526712052764, "grad_norm": 0.3256842792034149, "learning_rate": 1.9988294240946514e-05, "loss": 0.5744, "step": 1552 }, { "epoch": 0.03293673516998579, "grad_norm": 0.3339405059814453, "learning_rate": 1.9988278103793486e-05, "loss": 0.5389, "step": 1553 }, { "epoch": 0.03295794362791881, "grad_norm": 0.32520177960395813, "learning_rate": 1.998826195553158e-05, "loss": 0.6032, "step": 1554 }, { "epoch": 0.03297915208585184, "grad_norm": 0.3936346173286438, "learning_rate": 1.998824579616082e-05, "loss": 0.5575, "step": 1555 }, { "epoch": 0.033000360543784864, "grad_norm": 0.361832857131958, "learning_rate": 1.998822962568122e-05, "loss": 0.5247, "step": 1556 }, { "epoch": 0.03302156900171788, "grad_norm": 0.36056599020957947, "learning_rate": 1.9988213444092793e-05, "loss": 0.636, "step": 1557 }, { "epoch": 0.03304277745965091, "grad_norm": 0.3367122709751129, "learning_rate": 1.998819725139557e-05, "loss": 0.5248, "step": 1558 }, { "epoch": 0.03306398591758393, "grad_norm": 0.3789592683315277, "learning_rate": 1.998818104758955e-05, "loss": 0.5652, "step": 1559 }, { "epoch": 0.033085194375516956, "grad_norm": 0.35772886872291565, "learning_rate": 1.998816483267477e-05, "loss": 0.5015, "step": 1560 }, { "epoch": 0.03310640283344998, "grad_norm": 0.3380836248397827, "learning_rate": 1.9988148606651235e-05, "loss": 0.6381, "step": 1561 }, { "epoch": 0.033127611291383, "grad_norm": 0.31847289204597473, "learning_rate": 1.998813236951897e-05, "loss": 0.6398, "step": 1562 }, { "epoch": 0.03314881974931603, "grad_norm": 0.3381936848163605, "learning_rate": 1.998811612127799e-05, "loss": 0.5959, "step": 1563 }, { "epoch": 0.03317002820724905, "grad_norm": 0.37629249691963196, "learning_rate": 1.9988099861928316e-05, "loss": 0.5409, "step": 1564 }, { "epoch": 0.033191236665182075, "grad_norm": 0.4161582887172699, "learning_rate": 1.9988083591469964e-05, "loss": 0.6112, "step": 1565 }, { "epoch": 0.0332124451231151, "grad_norm": 0.3622797727584839, "learning_rate": 1.998806730990295e-05, "loss": 0.7052, "step": 1566 }, { "epoch": 0.03323365358104812, "grad_norm": 0.33107706904411316, "learning_rate": 1.99880510172273e-05, "loss": 0.58, "step": 1567 }, { "epoch": 0.03325486203898115, "grad_norm": 0.3081870377063751, "learning_rate": 1.998803471344302e-05, "loss": 0.574, "step": 1568 }, { "epoch": 0.03327607049691417, "grad_norm": 0.32454147934913635, "learning_rate": 1.998801839855014e-05, "loss": 0.5624, "step": 1569 }, { "epoch": 0.033297278954847194, "grad_norm": 0.3160974383354187, "learning_rate": 1.9988002072548668e-05, "loss": 0.5099, "step": 1570 }, { "epoch": 0.033318487412780214, "grad_norm": 0.3544299006462097, "learning_rate": 1.9987985735438628e-05, "loss": 0.5152, "step": 1571 }, { "epoch": 0.03333969587071324, "grad_norm": 0.3196443021297455, "learning_rate": 1.9987969387220035e-05, "loss": 0.533, "step": 1572 }, { "epoch": 0.03336090432864627, "grad_norm": 0.3429868519306183, "learning_rate": 1.9987953027892914e-05, "loss": 0.6178, "step": 1573 }, { "epoch": 0.033382112786579286, "grad_norm": 0.33808383345603943, "learning_rate": 1.9987936657457274e-05, "loss": 0.5703, "step": 1574 }, { "epoch": 0.03340332124451231, "grad_norm": 0.3868117928504944, "learning_rate": 1.998792027591314e-05, "loss": 0.5641, "step": 1575 }, { "epoch": 0.03342452970244533, "grad_norm": 0.32712095975875854, "learning_rate": 1.998790388326053e-05, "loss": 0.5687, "step": 1576 }, { "epoch": 0.03344573816037836, "grad_norm": 0.3778836131095886, "learning_rate": 1.9987887479499456e-05, "loss": 0.704, "step": 1577 }, { "epoch": 0.033466946618311386, "grad_norm": 0.34113025665283203, "learning_rate": 1.998787106462994e-05, "loss": 0.5726, "step": 1578 }, { "epoch": 0.033488155076244405, "grad_norm": 0.3549773693084717, "learning_rate": 1.9987854638652003e-05, "loss": 0.5803, "step": 1579 }, { "epoch": 0.03350936353417743, "grad_norm": 0.34153619408607483, "learning_rate": 1.9987838201565658e-05, "loss": 0.5424, "step": 1580 }, { "epoch": 0.03353057199211045, "grad_norm": 0.3454766571521759, "learning_rate": 1.998782175337093e-05, "loss": 0.5723, "step": 1581 }, { "epoch": 0.03355178045004348, "grad_norm": 0.43311893939971924, "learning_rate": 1.9987805294067832e-05, "loss": 0.5321, "step": 1582 }, { "epoch": 0.0335729889079765, "grad_norm": 0.30675560235977173, "learning_rate": 1.9987788823656382e-05, "loss": 0.4577, "step": 1583 }, { "epoch": 0.033594197365909524, "grad_norm": 0.3566882014274597, "learning_rate": 1.9987772342136603e-05, "loss": 0.5556, "step": 1584 }, { "epoch": 0.03361540582384255, "grad_norm": 0.3664534091949463, "learning_rate": 1.9987755849508508e-05, "loss": 0.5645, "step": 1585 }, { "epoch": 0.03363661428177557, "grad_norm": 0.3283439576625824, "learning_rate": 1.998773934577212e-05, "loss": 0.5757, "step": 1586 }, { "epoch": 0.0336578227397086, "grad_norm": 0.30781641602516174, "learning_rate": 1.998772283092745e-05, "loss": 0.4699, "step": 1587 }, { "epoch": 0.033679031197641617, "grad_norm": 0.32192692160606384, "learning_rate": 1.998770630497453e-05, "loss": 0.569, "step": 1588 }, { "epoch": 0.03370023965557464, "grad_norm": 0.4225517213344574, "learning_rate": 1.9987689767913363e-05, "loss": 0.5601, "step": 1589 }, { "epoch": 0.03372144811350767, "grad_norm": 0.30930599570274353, "learning_rate": 1.9987673219743977e-05, "loss": 0.4866, "step": 1590 }, { "epoch": 0.03374265657144069, "grad_norm": 0.4462014138698578, "learning_rate": 1.9987656660466386e-05, "loss": 0.6182, "step": 1591 }, { "epoch": 0.033763865029373716, "grad_norm": 0.3410101532936096, "learning_rate": 1.998764009008061e-05, "loss": 0.513, "step": 1592 }, { "epoch": 0.033785073487306735, "grad_norm": 0.32653018832206726, "learning_rate": 1.998762350858667e-05, "loss": 0.5118, "step": 1593 }, { "epoch": 0.03380628194523976, "grad_norm": 0.35484257340431213, "learning_rate": 1.9987606915984578e-05, "loss": 0.5032, "step": 1594 }, { "epoch": 0.03382749040317279, "grad_norm": 0.33357518911361694, "learning_rate": 1.998759031227436e-05, "loss": 0.5895, "step": 1595 }, { "epoch": 0.03384869886110581, "grad_norm": 0.3576677143573761, "learning_rate": 1.9987573697456027e-05, "loss": 0.6116, "step": 1596 }, { "epoch": 0.033869907319038835, "grad_norm": 0.3965832591056824, "learning_rate": 1.9987557071529605e-05, "loss": 0.6373, "step": 1597 }, { "epoch": 0.033891115776971854, "grad_norm": 0.33890774846076965, "learning_rate": 1.998754043449511e-05, "loss": 0.5529, "step": 1598 }, { "epoch": 0.03391232423490488, "grad_norm": 0.42806780338287354, "learning_rate": 1.9987523786352555e-05, "loss": 0.6365, "step": 1599 }, { "epoch": 0.0339335326928379, "grad_norm": 0.35770562291145325, "learning_rate": 1.9987507127101963e-05, "loss": 0.6706, "step": 1600 }, { "epoch": 0.03395474115077093, "grad_norm": 0.3410753011703491, "learning_rate": 1.9987490456743354e-05, "loss": 0.576, "step": 1601 }, { "epoch": 0.033975949608703954, "grad_norm": 0.31219178438186646, "learning_rate": 1.9987473775276745e-05, "loss": 0.6272, "step": 1602 }, { "epoch": 0.03399715806663697, "grad_norm": 0.3275754749774933, "learning_rate": 1.9987457082702153e-05, "loss": 0.5073, "step": 1603 }, { "epoch": 0.03401836652457, "grad_norm": 0.30578434467315674, "learning_rate": 1.9987440379019597e-05, "loss": 0.5955, "step": 1604 }, { "epoch": 0.03403957498250302, "grad_norm": 0.43547922372817993, "learning_rate": 1.99874236642291e-05, "loss": 0.6135, "step": 1605 }, { "epoch": 0.034060783440436046, "grad_norm": 0.30837947130203247, "learning_rate": 1.9987406938330676e-05, "loss": 0.5045, "step": 1606 }, { "epoch": 0.03408199189836907, "grad_norm": 0.3221975862979889, "learning_rate": 1.9987390201324346e-05, "loss": 0.6262, "step": 1607 }, { "epoch": 0.03410320035630209, "grad_norm": 0.3056246042251587, "learning_rate": 1.9987373453210122e-05, "loss": 0.5492, "step": 1608 }, { "epoch": 0.03412440881423512, "grad_norm": 0.35157737135887146, "learning_rate": 1.9987356693988033e-05, "loss": 0.5118, "step": 1609 }, { "epoch": 0.03414561727216814, "grad_norm": 0.5327381491661072, "learning_rate": 1.9987339923658093e-05, "loss": 0.5578, "step": 1610 }, { "epoch": 0.034166825730101165, "grad_norm": 0.34865817427635193, "learning_rate": 1.998732314222032e-05, "loss": 0.5344, "step": 1611 }, { "epoch": 0.03418803418803419, "grad_norm": 0.29242077469825745, "learning_rate": 1.998730634967473e-05, "loss": 0.5676, "step": 1612 }, { "epoch": 0.03420924264596721, "grad_norm": 0.4376832842826843, "learning_rate": 1.9987289546021345e-05, "loss": 0.5474, "step": 1613 }, { "epoch": 0.03423045110390024, "grad_norm": 0.366172194480896, "learning_rate": 1.9987272731260187e-05, "loss": 0.5621, "step": 1614 }, { "epoch": 0.03425165956183326, "grad_norm": 0.33490923047065735, "learning_rate": 1.998725590539127e-05, "loss": 0.5247, "step": 1615 }, { "epoch": 0.034272868019766284, "grad_norm": 0.7437321543693542, "learning_rate": 1.998723906841461e-05, "loss": 0.6058, "step": 1616 }, { "epoch": 0.0342940764776993, "grad_norm": 0.3611621856689453, "learning_rate": 1.998722222033023e-05, "loss": 0.4476, "step": 1617 }, { "epoch": 0.03431528493563233, "grad_norm": 0.3904166519641876, "learning_rate": 1.9987205361138153e-05, "loss": 0.6191, "step": 1618 }, { "epoch": 0.03433649339356536, "grad_norm": 0.3199140727519989, "learning_rate": 1.9987188490838393e-05, "loss": 0.5703, "step": 1619 }, { "epoch": 0.034357701851498376, "grad_norm": 0.4520297646522522, "learning_rate": 1.9987171609430964e-05, "loss": 0.5367, "step": 1620 }, { "epoch": 0.0343789103094314, "grad_norm": 0.4094850718975067, "learning_rate": 1.998715471691589e-05, "loss": 0.6169, "step": 1621 }, { "epoch": 0.03440011876736442, "grad_norm": 0.3334827721118927, "learning_rate": 1.998713781329319e-05, "loss": 0.5091, "step": 1622 }, { "epoch": 0.03442132722529745, "grad_norm": 0.3240543603897095, "learning_rate": 1.9987120898562883e-05, "loss": 0.6037, "step": 1623 }, { "epoch": 0.034442535683230475, "grad_norm": 0.35608378052711487, "learning_rate": 1.9987103972724985e-05, "loss": 0.6456, "step": 1624 }, { "epoch": 0.034463744141163495, "grad_norm": 0.32471850514411926, "learning_rate": 1.9987087035779524e-05, "loss": 0.5723, "step": 1625 }, { "epoch": 0.03448495259909652, "grad_norm": 0.4691865146160126, "learning_rate": 1.9987070087726503e-05, "loss": 0.5935, "step": 1626 }, { "epoch": 0.03450616105702954, "grad_norm": 0.32277950644493103, "learning_rate": 1.9987053128565953e-05, "loss": 0.5523, "step": 1627 }, { "epoch": 0.03452736951496257, "grad_norm": 0.35020625591278076, "learning_rate": 1.9987036158297887e-05, "loss": 0.424, "step": 1628 }, { "epoch": 0.03454857797289559, "grad_norm": 0.3211125433444977, "learning_rate": 1.998701917692233e-05, "loss": 0.5515, "step": 1629 }, { "epoch": 0.034569786430828614, "grad_norm": 0.31826484203338623, "learning_rate": 1.9987002184439292e-05, "loss": 0.5425, "step": 1630 }, { "epoch": 0.03459099488876164, "grad_norm": 0.35803231596946716, "learning_rate": 1.99869851808488e-05, "loss": 0.5447, "step": 1631 }, { "epoch": 0.03461220334669466, "grad_norm": 0.3450668752193451, "learning_rate": 1.9986968166150872e-05, "loss": 0.4733, "step": 1632 }, { "epoch": 0.03463341180462769, "grad_norm": 0.3606686592102051, "learning_rate": 1.9986951140345522e-05, "loss": 0.5844, "step": 1633 }, { "epoch": 0.034654620262560706, "grad_norm": 0.3913233280181885, "learning_rate": 1.9986934103432772e-05, "loss": 0.578, "step": 1634 }, { "epoch": 0.03467582872049373, "grad_norm": 0.3120036721229553, "learning_rate": 1.9986917055412638e-05, "loss": 0.5181, "step": 1635 }, { "epoch": 0.03469703717842676, "grad_norm": 0.3866932690143585, "learning_rate": 1.9986899996285144e-05, "loss": 0.5859, "step": 1636 }, { "epoch": 0.03471824563635978, "grad_norm": 0.3430354595184326, "learning_rate": 1.9986882926050307e-05, "loss": 0.5561, "step": 1637 }, { "epoch": 0.034739454094292806, "grad_norm": 0.349485844373703, "learning_rate": 1.9986865844708148e-05, "loss": 0.5708, "step": 1638 }, { "epoch": 0.034760662552225825, "grad_norm": 0.3301217555999756, "learning_rate": 1.998684875225868e-05, "loss": 0.531, "step": 1639 }, { "epoch": 0.03478187101015885, "grad_norm": 0.35032975673675537, "learning_rate": 1.9986831648701925e-05, "loss": 0.5782, "step": 1640 }, { "epoch": 0.03480307946809188, "grad_norm": 0.4068504571914673, "learning_rate": 1.9986814534037907e-05, "loss": 0.5229, "step": 1641 }, { "epoch": 0.0348242879260249, "grad_norm": 0.32888293266296387, "learning_rate": 1.9986797408266636e-05, "loss": 0.596, "step": 1642 }, { "epoch": 0.034845496383957925, "grad_norm": 0.3925340175628662, "learning_rate": 1.998678027138814e-05, "loss": 0.5371, "step": 1643 }, { "epoch": 0.034866704841890944, "grad_norm": 0.322107195854187, "learning_rate": 1.998676312340243e-05, "loss": 0.5314, "step": 1644 }, { "epoch": 0.03488791329982397, "grad_norm": 0.3289991319179535, "learning_rate": 1.9986745964309527e-05, "loss": 0.6414, "step": 1645 }, { "epoch": 0.03490912175775699, "grad_norm": 0.6041913032531738, "learning_rate": 1.9986728794109454e-05, "loss": 0.557, "step": 1646 }, { "epoch": 0.03493033021569002, "grad_norm": 0.38108861446380615, "learning_rate": 1.998671161280223e-05, "loss": 0.5531, "step": 1647 }, { "epoch": 0.034951538673623044, "grad_norm": 0.3367345929145813, "learning_rate": 1.9986694420387873e-05, "loss": 0.4967, "step": 1648 }, { "epoch": 0.03497274713155606, "grad_norm": 0.371073842048645, "learning_rate": 1.9986677216866398e-05, "loss": 0.589, "step": 1649 }, { "epoch": 0.03499395558948909, "grad_norm": 0.3645532429218292, "learning_rate": 1.998666000223783e-05, "loss": 0.5106, "step": 1650 }, { "epoch": 0.03501516404742211, "grad_norm": 0.3107593357563019, "learning_rate": 1.998664277650218e-05, "loss": 0.5579, "step": 1651 }, { "epoch": 0.035036372505355136, "grad_norm": 0.3237669765949249, "learning_rate": 1.9986625539659478e-05, "loss": 0.653, "step": 1652 }, { "epoch": 0.03505758096328816, "grad_norm": 0.32949602603912354, "learning_rate": 1.998660829170974e-05, "loss": 0.49, "step": 1653 }, { "epoch": 0.03507878942122118, "grad_norm": 0.3260342478752136, "learning_rate": 1.998659103265298e-05, "loss": 0.5787, "step": 1654 }, { "epoch": 0.03509999787915421, "grad_norm": 0.4556964039802551, "learning_rate": 1.9986573762489215e-05, "loss": 0.5515, "step": 1655 }, { "epoch": 0.03512120633708723, "grad_norm": 0.33797916769981384, "learning_rate": 1.9986556481218478e-05, "loss": 0.5413, "step": 1656 }, { "epoch": 0.035142414795020255, "grad_norm": 0.41802018880844116, "learning_rate": 1.9986539188840774e-05, "loss": 0.5337, "step": 1657 }, { "epoch": 0.035163623252953274, "grad_norm": 0.305612713098526, "learning_rate": 1.998652188535613e-05, "loss": 0.5807, "step": 1658 }, { "epoch": 0.0351848317108863, "grad_norm": 0.35363128781318665, "learning_rate": 1.9986504570764566e-05, "loss": 0.5438, "step": 1659 }, { "epoch": 0.03520604016881933, "grad_norm": 0.34372812509536743, "learning_rate": 1.9986487245066093e-05, "loss": 0.5566, "step": 1660 }, { "epoch": 0.03522724862675235, "grad_norm": 0.34495416283607483, "learning_rate": 1.998646990826074e-05, "loss": 0.5289, "step": 1661 }, { "epoch": 0.035248457084685374, "grad_norm": 0.32497918605804443, "learning_rate": 1.998645256034852e-05, "loss": 0.4618, "step": 1662 }, { "epoch": 0.03526966554261839, "grad_norm": 0.3359975814819336, "learning_rate": 1.998643520132945e-05, "loss": 0.5277, "step": 1663 }, { "epoch": 0.03529087400055142, "grad_norm": 0.27563154697418213, "learning_rate": 1.998641783120356e-05, "loss": 0.4562, "step": 1664 }, { "epoch": 0.035312082458484446, "grad_norm": 0.40487661957740784, "learning_rate": 1.9986400449970856e-05, "loss": 0.652, "step": 1665 }, { "epoch": 0.035333290916417466, "grad_norm": 0.39584022760391235, "learning_rate": 1.9986383057631372e-05, "loss": 0.6188, "step": 1666 }, { "epoch": 0.03535449937435049, "grad_norm": 0.35288286209106445, "learning_rate": 1.9986365654185117e-05, "loss": 0.5731, "step": 1667 }, { "epoch": 0.03537570783228351, "grad_norm": 0.34333333373069763, "learning_rate": 1.998634823963211e-05, "loss": 0.6275, "step": 1668 }, { "epoch": 0.03539691629021654, "grad_norm": 0.33116382360458374, "learning_rate": 1.9986330813972375e-05, "loss": 0.5738, "step": 1669 }, { "epoch": 0.035418124748149565, "grad_norm": 0.38624608516693115, "learning_rate": 1.998631337720593e-05, "loss": 0.5573, "step": 1670 }, { "epoch": 0.035439333206082585, "grad_norm": 0.38443735241889954, "learning_rate": 1.9986295929332795e-05, "loss": 0.5919, "step": 1671 }, { "epoch": 0.03546054166401561, "grad_norm": 0.36592376232147217, "learning_rate": 1.9986278470352983e-05, "loss": 0.5906, "step": 1672 }, { "epoch": 0.03548175012194863, "grad_norm": 0.4189577102661133, "learning_rate": 1.9986261000266526e-05, "loss": 0.5653, "step": 1673 }, { "epoch": 0.03550295857988166, "grad_norm": 0.3303574323654175, "learning_rate": 1.9986243519073435e-05, "loss": 0.4995, "step": 1674 }, { "epoch": 0.03552416703781468, "grad_norm": 0.31975284218788147, "learning_rate": 1.998622602677373e-05, "loss": 0.5803, "step": 1675 }, { "epoch": 0.035545375495747704, "grad_norm": 0.33047351241111755, "learning_rate": 1.9986208523367425e-05, "loss": 0.5001, "step": 1676 }, { "epoch": 0.03556658395368073, "grad_norm": 0.3517349362373352, "learning_rate": 1.9986191008854554e-05, "loss": 0.5581, "step": 1677 }, { "epoch": 0.03558779241161375, "grad_norm": 0.4196995794773102, "learning_rate": 1.9986173483235123e-05, "loss": 0.5799, "step": 1678 }, { "epoch": 0.03560900086954678, "grad_norm": 0.2958984971046448, "learning_rate": 1.998615594650916e-05, "loss": 0.4779, "step": 1679 }, { "epoch": 0.035630209327479796, "grad_norm": 0.3327327370643616, "learning_rate": 1.998613839867668e-05, "loss": 0.519, "step": 1680 }, { "epoch": 0.03565141778541282, "grad_norm": 0.38037604093551636, "learning_rate": 1.9986120839737703e-05, "loss": 0.5376, "step": 1681 }, { "epoch": 0.03567262624334585, "grad_norm": 0.3073549270629883, "learning_rate": 1.9986103269692248e-05, "loss": 0.5573, "step": 1682 }, { "epoch": 0.03569383470127887, "grad_norm": 0.3395389914512634, "learning_rate": 1.9986085688540337e-05, "loss": 0.5447, "step": 1683 }, { "epoch": 0.035715043159211896, "grad_norm": 0.30582818388938904, "learning_rate": 1.998606809628199e-05, "loss": 0.4994, "step": 1684 }, { "epoch": 0.035736251617144915, "grad_norm": 0.34157270193099976, "learning_rate": 1.9986050492917222e-05, "loss": 0.6208, "step": 1685 }, { "epoch": 0.03575746007507794, "grad_norm": 0.3780338168144226, "learning_rate": 1.9986032878446054e-05, "loss": 0.5837, "step": 1686 }, { "epoch": 0.03577866853301096, "grad_norm": 0.3655281364917755, "learning_rate": 1.9986015252868512e-05, "loss": 0.566, "step": 1687 }, { "epoch": 0.03579987699094399, "grad_norm": 0.32708004117012024, "learning_rate": 1.9985997616184607e-05, "loss": 0.5273, "step": 1688 }, { "epoch": 0.035821085448877014, "grad_norm": 0.3339475691318512, "learning_rate": 1.998597996839436e-05, "loss": 0.5488, "step": 1689 }, { "epoch": 0.035842293906810034, "grad_norm": 0.2987084984779358, "learning_rate": 1.99859623094978e-05, "loss": 0.5712, "step": 1690 }, { "epoch": 0.03586350236474306, "grad_norm": 0.3589285910129547, "learning_rate": 1.9985944639494935e-05, "loss": 0.5917, "step": 1691 }, { "epoch": 0.03588471082267608, "grad_norm": 0.3727550208568573, "learning_rate": 1.9985926958385788e-05, "loss": 0.5593, "step": 1692 }, { "epoch": 0.03590591928060911, "grad_norm": 0.3540547788143158, "learning_rate": 1.9985909266170386e-05, "loss": 0.5998, "step": 1693 }, { "epoch": 0.03592712773854213, "grad_norm": 0.3111687898635864, "learning_rate": 1.9985891562848735e-05, "loss": 0.5611, "step": 1694 }, { "epoch": 0.03594833619647515, "grad_norm": 0.3223099112510681, "learning_rate": 1.9985873848420866e-05, "loss": 0.5463, "step": 1695 }, { "epoch": 0.03596954465440818, "grad_norm": 0.40248894691467285, "learning_rate": 1.9985856122886792e-05, "loss": 0.542, "step": 1696 }, { "epoch": 0.0359907531123412, "grad_norm": 0.3269663453102112, "learning_rate": 1.9985838386246538e-05, "loss": 0.5424, "step": 1697 }, { "epoch": 0.036011961570274226, "grad_norm": 0.322777658700943, "learning_rate": 1.9985820638500123e-05, "loss": 0.4814, "step": 1698 }, { "epoch": 0.03603317002820725, "grad_norm": 0.3299839496612549, "learning_rate": 1.998580287964756e-05, "loss": 0.5234, "step": 1699 }, { "epoch": 0.03605437848614027, "grad_norm": 0.28880253434181213, "learning_rate": 1.998578510968888e-05, "loss": 0.4807, "step": 1700 }, { "epoch": 0.0360755869440733, "grad_norm": 0.5660514235496521, "learning_rate": 1.998576732862409e-05, "loss": 0.6206, "step": 1701 }, { "epoch": 0.03609679540200632, "grad_norm": 0.40494054555892944, "learning_rate": 1.998574953645322e-05, "loss": 0.5287, "step": 1702 }, { "epoch": 0.036118003859939345, "grad_norm": 0.31831124424934387, "learning_rate": 1.998573173317629e-05, "loss": 0.5827, "step": 1703 }, { "epoch": 0.036139212317872364, "grad_norm": 0.35231488943099976, "learning_rate": 1.9985713918793307e-05, "loss": 0.6645, "step": 1704 }, { "epoch": 0.03616042077580539, "grad_norm": 0.34806716442108154, "learning_rate": 1.9985696093304304e-05, "loss": 0.6333, "step": 1705 }, { "epoch": 0.03618162923373842, "grad_norm": 0.3550221920013428, "learning_rate": 1.9985678256709297e-05, "loss": 0.517, "step": 1706 }, { "epoch": 0.03620283769167144, "grad_norm": 0.3794066607952118, "learning_rate": 1.998566040900831e-05, "loss": 0.5466, "step": 1707 }, { "epoch": 0.036224046149604464, "grad_norm": 0.3235571086406708, "learning_rate": 1.998564255020135e-05, "loss": 0.554, "step": 1708 }, { "epoch": 0.03624525460753748, "grad_norm": 0.41951262950897217, "learning_rate": 1.998562468028845e-05, "loss": 0.5095, "step": 1709 }, { "epoch": 0.03626646306547051, "grad_norm": 0.5028435587882996, "learning_rate": 1.998560679926962e-05, "loss": 0.5473, "step": 1710 }, { "epoch": 0.036287671523403536, "grad_norm": 0.3387802839279175, "learning_rate": 1.998558890714489e-05, "loss": 0.5589, "step": 1711 }, { "epoch": 0.036308879981336556, "grad_norm": 0.3209361732006073, "learning_rate": 1.9985571003914273e-05, "loss": 0.4168, "step": 1712 }, { "epoch": 0.03633008843926958, "grad_norm": 0.3464232087135315, "learning_rate": 1.9985553089577792e-05, "loss": 0.5691, "step": 1713 }, { "epoch": 0.0363512968972026, "grad_norm": 0.30504751205444336, "learning_rate": 1.9985535164135462e-05, "loss": 0.5391, "step": 1714 }, { "epoch": 0.03637250535513563, "grad_norm": 0.3815433979034424, "learning_rate": 1.998551722758731e-05, "loss": 0.4472, "step": 1715 }, { "epoch": 0.036393713813068655, "grad_norm": 0.3947257995605469, "learning_rate": 1.998549927993335e-05, "loss": 0.5444, "step": 1716 }, { "epoch": 0.036414922271001675, "grad_norm": 0.3271303176879883, "learning_rate": 1.9985481321173608e-05, "loss": 0.6069, "step": 1717 }, { "epoch": 0.0364361307289347, "grad_norm": 0.34430378675460815, "learning_rate": 1.9985463351308096e-05, "loss": 0.5745, "step": 1718 }, { "epoch": 0.03645733918686772, "grad_norm": 0.3424847424030304, "learning_rate": 1.998544537033684e-05, "loss": 0.52, "step": 1719 }, { "epoch": 0.03647854764480075, "grad_norm": 0.652563750743866, "learning_rate": 1.9985427378259863e-05, "loss": 0.5039, "step": 1720 }, { "epoch": 0.03649975610273377, "grad_norm": 0.7262839078903198, "learning_rate": 1.9985409375077175e-05, "loss": 0.5632, "step": 1721 }, { "epoch": 0.036520964560666794, "grad_norm": 0.38577786087989807, "learning_rate": 1.9985391360788804e-05, "loss": 0.5266, "step": 1722 }, { "epoch": 0.03654217301859982, "grad_norm": 0.31080910563468933, "learning_rate": 1.9985373335394764e-05, "loss": 0.4988, "step": 1723 }, { "epoch": 0.03656338147653284, "grad_norm": 0.38825979828834534, "learning_rate": 1.9985355298895085e-05, "loss": 0.5543, "step": 1724 }, { "epoch": 0.036584589934465866, "grad_norm": 0.35961639881134033, "learning_rate": 1.9985337251289774e-05, "loss": 0.6022, "step": 1725 }, { "epoch": 0.036605798392398886, "grad_norm": 0.311441570520401, "learning_rate": 1.998531919257886e-05, "loss": 0.5615, "step": 1726 }, { "epoch": 0.03662700685033191, "grad_norm": 0.35554638504981995, "learning_rate": 1.998530112276236e-05, "loss": 0.5614, "step": 1727 }, { "epoch": 0.03664821530826494, "grad_norm": 0.35766103863716125, "learning_rate": 1.9985283041840297e-05, "loss": 0.5364, "step": 1728 }, { "epoch": 0.03666942376619796, "grad_norm": 0.36995089054107666, "learning_rate": 1.9985264949812685e-05, "loss": 0.591, "step": 1729 }, { "epoch": 0.036690632224130985, "grad_norm": 0.30686911940574646, "learning_rate": 1.9985246846679554e-05, "loss": 0.5894, "step": 1730 }, { "epoch": 0.036711840682064005, "grad_norm": 0.3217824697494507, "learning_rate": 1.9985228732440912e-05, "loss": 0.5084, "step": 1731 }, { "epoch": 0.03673304913999703, "grad_norm": 0.3561584949493408, "learning_rate": 1.998521060709679e-05, "loss": 0.5339, "step": 1732 }, { "epoch": 0.03675425759793005, "grad_norm": 0.32968464493751526, "learning_rate": 1.9985192470647204e-05, "loss": 0.593, "step": 1733 }, { "epoch": 0.03677546605586308, "grad_norm": 0.37955528497695923, "learning_rate": 1.9985174323092168e-05, "loss": 0.6191, "step": 1734 }, { "epoch": 0.036796674513796104, "grad_norm": 0.3031679391860962, "learning_rate": 1.998515616443171e-05, "loss": 0.5065, "step": 1735 }, { "epoch": 0.036817882971729124, "grad_norm": 0.3591052293777466, "learning_rate": 1.998513799466585e-05, "loss": 0.6151, "step": 1736 }, { "epoch": 0.03683909142966215, "grad_norm": 0.31528687477111816, "learning_rate": 1.9985119813794607e-05, "loss": 0.5633, "step": 1737 }, { "epoch": 0.03686029988759517, "grad_norm": 0.34866663813591003, "learning_rate": 1.9985101621818002e-05, "loss": 0.5141, "step": 1738 }, { "epoch": 0.0368815083455282, "grad_norm": 0.3257739245891571, "learning_rate": 1.998508341873605e-05, "loss": 0.5948, "step": 1739 }, { "epoch": 0.03690271680346122, "grad_norm": 0.3222659230232239, "learning_rate": 1.9985065204548774e-05, "loss": 0.4785, "step": 1740 }, { "epoch": 0.03692392526139424, "grad_norm": 0.3420979380607605, "learning_rate": 1.9985046979256198e-05, "loss": 0.5479, "step": 1741 }, { "epoch": 0.03694513371932727, "grad_norm": 0.38071027398109436, "learning_rate": 1.998502874285834e-05, "loss": 0.5565, "step": 1742 }, { "epoch": 0.03696634217726029, "grad_norm": 0.33299991488456726, "learning_rate": 1.9985010495355218e-05, "loss": 0.5911, "step": 1743 }, { "epoch": 0.036987550635193316, "grad_norm": 0.3120419383049011, "learning_rate": 1.9984992236746855e-05, "loss": 0.5364, "step": 1744 }, { "epoch": 0.03700875909312634, "grad_norm": 0.38828861713409424, "learning_rate": 1.998497396703327e-05, "loss": 0.662, "step": 1745 }, { "epoch": 0.03702996755105936, "grad_norm": 0.30244454741477966, "learning_rate": 1.998495568621448e-05, "loss": 0.5334, "step": 1746 }, { "epoch": 0.03705117600899239, "grad_norm": 0.3524201214313507, "learning_rate": 1.9984937394290513e-05, "loss": 0.5237, "step": 1747 }, { "epoch": 0.03707238446692541, "grad_norm": 0.3440740704536438, "learning_rate": 1.9984919091261384e-05, "loss": 0.6303, "step": 1748 }, { "epoch": 0.037093592924858435, "grad_norm": 0.4078585207462311, "learning_rate": 1.9984900777127117e-05, "loss": 0.5389, "step": 1749 }, { "epoch": 0.037114801382791454, "grad_norm": 0.33682259917259216, "learning_rate": 1.9984882451887727e-05, "loss": 0.5823, "step": 1750 }, { "epoch": 0.03713600984072448, "grad_norm": 0.39061644673347473, "learning_rate": 1.998486411554324e-05, "loss": 0.5841, "step": 1751 }, { "epoch": 0.03715721829865751, "grad_norm": 0.32773929834365845, "learning_rate": 1.9984845768093673e-05, "loss": 0.5099, "step": 1752 }, { "epoch": 0.03717842675659053, "grad_norm": 0.34176141023635864, "learning_rate": 1.9984827409539047e-05, "loss": 0.5959, "step": 1753 }, { "epoch": 0.03719963521452355, "grad_norm": 0.3823026716709137, "learning_rate": 1.9984809039879382e-05, "loss": 0.6185, "step": 1754 }, { "epoch": 0.03722084367245657, "grad_norm": 0.36084413528442383, "learning_rate": 1.9984790659114705e-05, "loss": 0.5529, "step": 1755 }, { "epoch": 0.0372420521303896, "grad_norm": 0.3622978925704956, "learning_rate": 1.9984772267245023e-05, "loss": 0.5251, "step": 1756 }, { "epoch": 0.037263260588322626, "grad_norm": 0.34685713052749634, "learning_rate": 1.9984753864270366e-05, "loss": 0.5118, "step": 1757 }, { "epoch": 0.037284469046255646, "grad_norm": 0.3132157623767853, "learning_rate": 1.9984735450190752e-05, "loss": 0.6064, "step": 1758 }, { "epoch": 0.03730567750418867, "grad_norm": 0.3790903091430664, "learning_rate": 1.9984717025006203e-05, "loss": 0.5895, "step": 1759 }, { "epoch": 0.03732688596212169, "grad_norm": 0.3405190110206604, "learning_rate": 1.998469858871674e-05, "loss": 0.6082, "step": 1760 }, { "epoch": 0.03734809442005472, "grad_norm": 0.32705920934677124, "learning_rate": 1.9984680141322383e-05, "loss": 0.5622, "step": 1761 }, { "epoch": 0.03736930287798774, "grad_norm": 0.2923683226108551, "learning_rate": 1.9984661682823144e-05, "loss": 0.5465, "step": 1762 }, { "epoch": 0.037390511335920765, "grad_norm": 0.3100428879261017, "learning_rate": 1.998464321321906e-05, "loss": 0.4772, "step": 1763 }, { "epoch": 0.03741171979385379, "grad_norm": 0.3654898405075073, "learning_rate": 1.9984624732510136e-05, "loss": 0.6009, "step": 1764 }, { "epoch": 0.03743292825178681, "grad_norm": 0.3651096820831299, "learning_rate": 1.9984606240696404e-05, "loss": 0.4823, "step": 1765 }, { "epoch": 0.03745413670971984, "grad_norm": 0.3965774476528168, "learning_rate": 1.9984587737777877e-05, "loss": 0.5442, "step": 1766 }, { "epoch": 0.03747534516765286, "grad_norm": 0.34379515051841736, "learning_rate": 1.998456922375458e-05, "loss": 0.5452, "step": 1767 }, { "epoch": 0.037496553625585884, "grad_norm": 0.3238995373249054, "learning_rate": 1.998455069862653e-05, "loss": 0.5588, "step": 1768 }, { "epoch": 0.03751776208351891, "grad_norm": 0.4001045227050781, "learning_rate": 1.9984532162393752e-05, "loss": 0.6017, "step": 1769 }, { "epoch": 0.03753897054145193, "grad_norm": 0.3260335922241211, "learning_rate": 1.9984513615056258e-05, "loss": 0.5234, "step": 1770 }, { "epoch": 0.037560178999384956, "grad_norm": 0.32899609208106995, "learning_rate": 1.998449505661408e-05, "loss": 0.6138, "step": 1771 }, { "epoch": 0.037581387457317976, "grad_norm": 0.33136841654777527, "learning_rate": 1.9984476487067234e-05, "loss": 0.5959, "step": 1772 }, { "epoch": 0.037602595915251, "grad_norm": 0.3090762197971344, "learning_rate": 1.9984457906415734e-05, "loss": 0.5488, "step": 1773 }, { "epoch": 0.03762380437318403, "grad_norm": 0.31219056248664856, "learning_rate": 1.9984439314659612e-05, "loss": 0.4171, "step": 1774 }, { "epoch": 0.03764501283111705, "grad_norm": 0.35069286823272705, "learning_rate": 1.998442071179888e-05, "loss": 0.5213, "step": 1775 }, { "epoch": 0.037666221289050075, "grad_norm": 0.32997339963912964, "learning_rate": 1.9984402097833566e-05, "loss": 0.5077, "step": 1776 }, { "epoch": 0.037687429746983095, "grad_norm": 0.37291523814201355, "learning_rate": 1.9984383472763686e-05, "loss": 0.6373, "step": 1777 }, { "epoch": 0.03770863820491612, "grad_norm": 0.3164112865924835, "learning_rate": 1.9984364836589257e-05, "loss": 0.5725, "step": 1778 }, { "epoch": 0.03772984666284914, "grad_norm": 0.3795333802700043, "learning_rate": 1.9984346189310312e-05, "loss": 0.5661, "step": 1779 }, { "epoch": 0.03775105512078217, "grad_norm": 0.3448769748210907, "learning_rate": 1.9984327530926856e-05, "loss": 0.5251, "step": 1780 }, { "epoch": 0.037772263578715194, "grad_norm": 0.3048517405986786, "learning_rate": 1.998430886143892e-05, "loss": 0.5655, "step": 1781 }, { "epoch": 0.037793472036648214, "grad_norm": 0.3389712870121002, "learning_rate": 1.9984290180846525e-05, "loss": 0.5782, "step": 1782 }, { "epoch": 0.03781468049458124, "grad_norm": 0.3659127950668335, "learning_rate": 1.998427148914969e-05, "loss": 0.6052, "step": 1783 }, { "epoch": 0.03783588895251426, "grad_norm": 0.4173765778541565, "learning_rate": 1.998425278634843e-05, "loss": 0.5902, "step": 1784 }, { "epoch": 0.03785709741044729, "grad_norm": 0.33142712712287903, "learning_rate": 1.9984234072442775e-05, "loss": 0.5669, "step": 1785 }, { "epoch": 0.03787830586838031, "grad_norm": 0.3431355357170105, "learning_rate": 1.9984215347432737e-05, "loss": 0.536, "step": 1786 }, { "epoch": 0.03789951432631333, "grad_norm": 0.3527736961841583, "learning_rate": 1.9984196611318345e-05, "loss": 0.5919, "step": 1787 }, { "epoch": 0.03792072278424636, "grad_norm": 0.39097321033477783, "learning_rate": 1.9984177864099616e-05, "loss": 0.622, "step": 1788 }, { "epoch": 0.03794193124217938, "grad_norm": 0.31010162830352783, "learning_rate": 1.9984159105776572e-05, "loss": 0.5483, "step": 1789 }, { "epoch": 0.037963139700112405, "grad_norm": 0.31646963953971863, "learning_rate": 1.998414033634923e-05, "loss": 0.5396, "step": 1790 }, { "epoch": 0.037984348158045425, "grad_norm": 0.3291371464729309, "learning_rate": 1.9984121555817617e-05, "loss": 0.545, "step": 1791 }, { "epoch": 0.03800555661597845, "grad_norm": 0.33414432406425476, "learning_rate": 1.998410276418175e-05, "loss": 0.6022, "step": 1792 }, { "epoch": 0.03802676507391148, "grad_norm": 0.33240175247192383, "learning_rate": 1.998408396144165e-05, "loss": 0.4962, "step": 1793 }, { "epoch": 0.0380479735318445, "grad_norm": 0.3423152565956116, "learning_rate": 1.998406514759734e-05, "loss": 0.5757, "step": 1794 }, { "epoch": 0.038069181989777524, "grad_norm": 0.3213205337524414, "learning_rate": 1.9984046322648835e-05, "loss": 0.5388, "step": 1795 }, { "epoch": 0.038090390447710544, "grad_norm": 0.3335356116294861, "learning_rate": 1.9984027486596163e-05, "loss": 0.5384, "step": 1796 }, { "epoch": 0.03811159890564357, "grad_norm": 0.2876453399658203, "learning_rate": 1.9984008639439343e-05, "loss": 0.4748, "step": 1797 }, { "epoch": 0.0381328073635766, "grad_norm": 0.3210739195346832, "learning_rate": 1.9983989781178396e-05, "loss": 0.5538, "step": 1798 }, { "epoch": 0.03815401582150962, "grad_norm": 0.4035017192363739, "learning_rate": 1.998397091181334e-05, "loss": 0.5538, "step": 1799 }, { "epoch": 0.03817522427944264, "grad_norm": 0.4229133129119873, "learning_rate": 1.99839520313442e-05, "loss": 0.5673, "step": 1800 }, { "epoch": 0.03819643273737566, "grad_norm": 0.34848323464393616, "learning_rate": 1.9983933139770998e-05, "loss": 0.5638, "step": 1801 }, { "epoch": 0.03821764119530869, "grad_norm": 0.3417253792285919, "learning_rate": 1.9983914237093746e-05, "loss": 0.5211, "step": 1802 }, { "epoch": 0.038238849653241716, "grad_norm": 0.37514621019363403, "learning_rate": 1.9983895323312476e-05, "loss": 0.5418, "step": 1803 }, { "epoch": 0.038260058111174736, "grad_norm": 0.41384899616241455, "learning_rate": 1.9983876398427203e-05, "loss": 0.5755, "step": 1804 }, { "epoch": 0.03828126656910776, "grad_norm": 0.3177149295806885, "learning_rate": 1.998385746243795e-05, "loss": 0.5713, "step": 1805 }, { "epoch": 0.03830247502704078, "grad_norm": 0.2993027865886688, "learning_rate": 1.9983838515344732e-05, "loss": 0.4871, "step": 1806 }, { "epoch": 0.03832368348497381, "grad_norm": 0.7928825616836548, "learning_rate": 1.9983819557147585e-05, "loss": 0.5446, "step": 1807 }, { "epoch": 0.03834489194290683, "grad_norm": 0.35159575939178467, "learning_rate": 1.9983800587846513e-05, "loss": 0.6323, "step": 1808 }, { "epoch": 0.038366100400839855, "grad_norm": 0.3376838266849518, "learning_rate": 1.9983781607441548e-05, "loss": 0.5439, "step": 1809 }, { "epoch": 0.03838730885877288, "grad_norm": 0.4424509108066559, "learning_rate": 1.9983762615932706e-05, "loss": 0.6672, "step": 1810 }, { "epoch": 0.0384085173167059, "grad_norm": 0.3959980309009552, "learning_rate": 1.9983743613320014e-05, "loss": 0.5486, "step": 1811 }, { "epoch": 0.03842972577463893, "grad_norm": 0.3555510640144348, "learning_rate": 1.9983724599603485e-05, "loss": 0.4934, "step": 1812 }, { "epoch": 0.03845093423257195, "grad_norm": 0.3144210875034332, "learning_rate": 1.9983705574783148e-05, "loss": 0.569, "step": 1813 }, { "epoch": 0.038472142690504973, "grad_norm": 0.32346656918525696, "learning_rate": 1.998368653885902e-05, "loss": 0.5189, "step": 1814 }, { "epoch": 0.038493351148438, "grad_norm": 0.3349877595901489, "learning_rate": 1.9983667491831117e-05, "loss": 0.5278, "step": 1815 }, { "epoch": 0.03851455960637102, "grad_norm": 0.33470064401626587, "learning_rate": 1.998364843369947e-05, "loss": 0.5733, "step": 1816 }, { "epoch": 0.038535768064304046, "grad_norm": 0.5106126070022583, "learning_rate": 1.9983629364464093e-05, "loss": 0.5913, "step": 1817 }, { "epoch": 0.038556976522237066, "grad_norm": 0.3891933262348175, "learning_rate": 1.9983610284125015e-05, "loss": 0.5928, "step": 1818 }, { "epoch": 0.03857818498017009, "grad_norm": 0.3353281021118164, "learning_rate": 1.998359119268225e-05, "loss": 0.6781, "step": 1819 }, { "epoch": 0.03859939343810312, "grad_norm": 0.2924518883228302, "learning_rate": 1.998357209013582e-05, "loss": 0.4684, "step": 1820 }, { "epoch": 0.03862060189603614, "grad_norm": 0.3687579035758972, "learning_rate": 1.998355297648575e-05, "loss": 0.5545, "step": 1821 }, { "epoch": 0.038641810353969165, "grad_norm": 0.323833167552948, "learning_rate": 1.998353385173206e-05, "loss": 0.5302, "step": 1822 }, { "epoch": 0.038663018811902185, "grad_norm": 0.3188692629337311, "learning_rate": 1.998351471587477e-05, "loss": 0.5419, "step": 1823 }, { "epoch": 0.03868422726983521, "grad_norm": 0.3473614752292633, "learning_rate": 1.9983495568913896e-05, "loss": 0.5267, "step": 1824 }, { "epoch": 0.03870543572776823, "grad_norm": 0.33213603496551514, "learning_rate": 1.998347641084947e-05, "loss": 0.6065, "step": 1825 }, { "epoch": 0.03872664418570126, "grad_norm": 0.3167925477027893, "learning_rate": 1.998345724168151e-05, "loss": 0.5434, "step": 1826 }, { "epoch": 0.038747852643634284, "grad_norm": 0.3880646526813507, "learning_rate": 1.998343806141003e-05, "loss": 0.5638, "step": 1827 }, { "epoch": 0.038769061101567304, "grad_norm": 0.341676265001297, "learning_rate": 1.9983418870035064e-05, "loss": 0.5493, "step": 1828 }, { "epoch": 0.03879026955950033, "grad_norm": 0.3336414098739624, "learning_rate": 1.998339966755662e-05, "loss": 0.5655, "step": 1829 }, { "epoch": 0.03881147801743335, "grad_norm": 0.35182997584342957, "learning_rate": 1.998338045397473e-05, "loss": 0.5353, "step": 1830 }, { "epoch": 0.038832686475366376, "grad_norm": 0.370891273021698, "learning_rate": 1.9983361229289406e-05, "loss": 0.6725, "step": 1831 }, { "epoch": 0.0388538949332994, "grad_norm": 0.3282492458820343, "learning_rate": 1.998334199350068e-05, "loss": 0.5489, "step": 1832 }, { "epoch": 0.03887510339123242, "grad_norm": 0.34675875306129456, "learning_rate": 1.9983322746608564e-05, "loss": 0.5603, "step": 1833 }, { "epoch": 0.03889631184916545, "grad_norm": 0.3763808310031891, "learning_rate": 1.9983303488613084e-05, "loss": 0.5887, "step": 1834 }, { "epoch": 0.03891752030709847, "grad_norm": 0.3318595290184021, "learning_rate": 1.9983284219514262e-05, "loss": 0.5774, "step": 1835 }, { "epoch": 0.038938728765031495, "grad_norm": 0.3275100886821747, "learning_rate": 1.998326493931212e-05, "loss": 0.5432, "step": 1836 }, { "epoch": 0.038959937222964515, "grad_norm": 0.35103780031204224, "learning_rate": 1.998324564800667e-05, "loss": 0.563, "step": 1837 }, { "epoch": 0.03898114568089754, "grad_norm": 0.3087127208709717, "learning_rate": 1.9983226345597947e-05, "loss": 0.4849, "step": 1838 }, { "epoch": 0.03900235413883057, "grad_norm": 0.37645843625068665, "learning_rate": 1.998320703208596e-05, "loss": 0.5654, "step": 1839 }, { "epoch": 0.03902356259676359, "grad_norm": 0.35348811745643616, "learning_rate": 1.9983187707470747e-05, "loss": 0.5579, "step": 1840 }, { "epoch": 0.039044771054696614, "grad_norm": 0.287818044424057, "learning_rate": 1.998316837175231e-05, "loss": 0.5053, "step": 1841 }, { "epoch": 0.039065979512629634, "grad_norm": 0.3160885274410248, "learning_rate": 1.9983149024930686e-05, "loss": 0.5197, "step": 1842 }, { "epoch": 0.03908718797056266, "grad_norm": 0.31475919485092163, "learning_rate": 1.9983129667005887e-05, "loss": 0.5005, "step": 1843 }, { "epoch": 0.03910839642849569, "grad_norm": 0.3513064384460449, "learning_rate": 1.998311029797794e-05, "loss": 0.5738, "step": 1844 }, { "epoch": 0.03912960488642871, "grad_norm": 0.3438945412635803, "learning_rate": 1.998309091784686e-05, "loss": 0.4582, "step": 1845 }, { "epoch": 0.03915081334436173, "grad_norm": 0.3130645751953125, "learning_rate": 1.9983071526612678e-05, "loss": 0.4482, "step": 1846 }, { "epoch": 0.03917202180229475, "grad_norm": 0.41191279888153076, "learning_rate": 1.998305212427541e-05, "loss": 0.558, "step": 1847 }, { "epoch": 0.03919323026022778, "grad_norm": 0.32575565576553345, "learning_rate": 1.9983032710835076e-05, "loss": 0.5682, "step": 1848 }, { "epoch": 0.039214438718160806, "grad_norm": 0.3976095914840698, "learning_rate": 1.9983013286291702e-05, "loss": 0.5407, "step": 1849 }, { "epoch": 0.039235647176093826, "grad_norm": 0.37927570939064026, "learning_rate": 1.9982993850645308e-05, "loss": 0.5121, "step": 1850 }, { "epoch": 0.03925685563402685, "grad_norm": 0.3466268479824066, "learning_rate": 1.998297440389591e-05, "loss": 0.4643, "step": 1851 }, { "epoch": 0.03927806409195987, "grad_norm": 0.3124309182167053, "learning_rate": 1.998295494604354e-05, "loss": 0.4815, "step": 1852 }, { "epoch": 0.0392992725498929, "grad_norm": 0.3216899633407593, "learning_rate": 1.998293547708821e-05, "loss": 0.5128, "step": 1853 }, { "epoch": 0.03932048100782592, "grad_norm": 0.3614428639411926, "learning_rate": 1.998291599702995e-05, "loss": 0.5186, "step": 1854 }, { "epoch": 0.039341689465758944, "grad_norm": 0.30390989780426025, "learning_rate": 1.9982896505868775e-05, "loss": 0.5352, "step": 1855 }, { "epoch": 0.03936289792369197, "grad_norm": 0.36462366580963135, "learning_rate": 1.998287700360471e-05, "loss": 0.562, "step": 1856 }, { "epoch": 0.03938410638162499, "grad_norm": 0.3376551866531372, "learning_rate": 1.9982857490237774e-05, "loss": 0.6063, "step": 1857 }, { "epoch": 0.03940531483955802, "grad_norm": 0.29277217388153076, "learning_rate": 1.9982837965767995e-05, "loss": 0.4605, "step": 1858 }, { "epoch": 0.03942652329749104, "grad_norm": 0.5597720146179199, "learning_rate": 1.9982818430195386e-05, "loss": 0.4892, "step": 1859 }, { "epoch": 0.03944773175542406, "grad_norm": 0.33366668224334717, "learning_rate": 1.9982798883519975e-05, "loss": 0.5029, "step": 1860 }, { "epoch": 0.03946894021335709, "grad_norm": 0.297687828540802, "learning_rate": 1.9982779325741784e-05, "loss": 0.5162, "step": 1861 }, { "epoch": 0.03949014867129011, "grad_norm": 0.31323838233947754, "learning_rate": 1.9982759756860827e-05, "loss": 0.5137, "step": 1862 }, { "epoch": 0.039511357129223136, "grad_norm": 0.43360012769699097, "learning_rate": 1.9982740176877136e-05, "loss": 0.6592, "step": 1863 }, { "epoch": 0.039532565587156156, "grad_norm": 0.3006550371646881, "learning_rate": 1.9982720585790726e-05, "loss": 0.5197, "step": 1864 }, { "epoch": 0.03955377404508918, "grad_norm": 0.300723135471344, "learning_rate": 1.9982700983601622e-05, "loss": 0.5013, "step": 1865 }, { "epoch": 0.0395749825030222, "grad_norm": 0.31027933955192566, "learning_rate": 1.9982681370309844e-05, "loss": 0.465, "step": 1866 }, { "epoch": 0.03959619096095523, "grad_norm": 0.5506749153137207, "learning_rate": 1.9982661745915415e-05, "loss": 0.5241, "step": 1867 }, { "epoch": 0.039617399418888255, "grad_norm": 0.36487624049186707, "learning_rate": 1.9982642110418357e-05, "loss": 0.6102, "step": 1868 }, { "epoch": 0.039638607876821275, "grad_norm": 0.3344014883041382, "learning_rate": 1.998262246381869e-05, "loss": 0.6023, "step": 1869 }, { "epoch": 0.0396598163347543, "grad_norm": 0.3112836480140686, "learning_rate": 1.9982602806116434e-05, "loss": 0.5668, "step": 1870 }, { "epoch": 0.03968102479268732, "grad_norm": 0.4138396382331848, "learning_rate": 1.9982583137311618e-05, "loss": 0.6339, "step": 1871 }, { "epoch": 0.03970223325062035, "grad_norm": 0.3674389719963074, "learning_rate": 1.998256345740426e-05, "loss": 0.6466, "step": 1872 }, { "epoch": 0.039723441708553374, "grad_norm": 0.33207178115844727, "learning_rate": 1.998254376639438e-05, "loss": 0.5469, "step": 1873 }, { "epoch": 0.039744650166486394, "grad_norm": 0.41540926694869995, "learning_rate": 1.9982524064282e-05, "loss": 0.6191, "step": 1874 }, { "epoch": 0.03976585862441942, "grad_norm": 0.3576764166355133, "learning_rate": 1.9982504351067145e-05, "loss": 0.5695, "step": 1875 }, { "epoch": 0.03978706708235244, "grad_norm": 0.3189852833747864, "learning_rate": 1.9982484626749836e-05, "loss": 0.5281, "step": 1876 }, { "epoch": 0.039808275540285466, "grad_norm": 0.3564063608646393, "learning_rate": 1.9982464891330092e-05, "loss": 0.6095, "step": 1877 }, { "epoch": 0.03982948399821849, "grad_norm": 0.3790127635002136, "learning_rate": 1.9982445144807943e-05, "loss": 0.5776, "step": 1878 }, { "epoch": 0.03985069245615151, "grad_norm": 0.3024339973926544, "learning_rate": 1.99824253871834e-05, "loss": 0.4966, "step": 1879 }, { "epoch": 0.03987190091408454, "grad_norm": 0.3106319010257721, "learning_rate": 1.9982405618456492e-05, "loss": 0.5108, "step": 1880 }, { "epoch": 0.03989310937201756, "grad_norm": 0.32705843448638916, "learning_rate": 1.998238583862724e-05, "loss": 0.4929, "step": 1881 }, { "epoch": 0.039914317829950585, "grad_norm": 0.31437361240386963, "learning_rate": 1.9982366047695664e-05, "loss": 0.5469, "step": 1882 }, { "epoch": 0.039935526287883605, "grad_norm": 0.31119537353515625, "learning_rate": 1.9982346245661787e-05, "loss": 0.6285, "step": 1883 }, { "epoch": 0.03995673474581663, "grad_norm": 0.3352918326854706, "learning_rate": 1.9982326432525632e-05, "loss": 0.584, "step": 1884 }, { "epoch": 0.03997794320374966, "grad_norm": 0.33167269825935364, "learning_rate": 1.9982306608287217e-05, "loss": 0.5146, "step": 1885 }, { "epoch": 0.03999915166168268, "grad_norm": 0.31795310974121094, "learning_rate": 1.9982286772946572e-05, "loss": 0.5842, "step": 1886 }, { "epoch": 0.040020360119615704, "grad_norm": 0.3143123388290405, "learning_rate": 1.9982266926503713e-05, "loss": 0.4856, "step": 1887 }, { "epoch": 0.040041568577548724, "grad_norm": 0.35630834102630615, "learning_rate": 1.998224706895866e-05, "loss": 0.6089, "step": 1888 }, { "epoch": 0.04006277703548175, "grad_norm": 0.3924027383327484, "learning_rate": 1.9982227200311444e-05, "loss": 0.6067, "step": 1889 }, { "epoch": 0.04008398549341478, "grad_norm": 0.33087947964668274, "learning_rate": 1.9982207320562078e-05, "loss": 0.545, "step": 1890 }, { "epoch": 0.040105193951347796, "grad_norm": 0.4637828469276428, "learning_rate": 1.998218742971059e-05, "loss": 0.5746, "step": 1891 }, { "epoch": 0.04012640240928082, "grad_norm": 0.30880972743034363, "learning_rate": 1.9982167527757e-05, "loss": 0.5512, "step": 1892 }, { "epoch": 0.04014761086721384, "grad_norm": 0.3720370829105377, "learning_rate": 1.9982147614701325e-05, "loss": 0.4885, "step": 1893 }, { "epoch": 0.04016881932514687, "grad_norm": 0.38138970732688904, "learning_rate": 1.9982127690543597e-05, "loss": 0.6234, "step": 1894 }, { "epoch": 0.04019002778307989, "grad_norm": 0.3807353079319, "learning_rate": 1.998210775528383e-05, "loss": 0.6127, "step": 1895 }, { "epoch": 0.040211236241012915, "grad_norm": 0.3464992344379425, "learning_rate": 1.9982087808922052e-05, "loss": 0.5491, "step": 1896 }, { "epoch": 0.04023244469894594, "grad_norm": 0.3442661464214325, "learning_rate": 1.9982067851458284e-05, "loss": 0.5036, "step": 1897 }, { "epoch": 0.04025365315687896, "grad_norm": 0.4431553781032562, "learning_rate": 1.9982047882892542e-05, "loss": 0.6121, "step": 1898 }, { "epoch": 0.04027486161481199, "grad_norm": 0.3519727289676666, "learning_rate": 1.998202790322486e-05, "loss": 0.5649, "step": 1899 }, { "epoch": 0.04029607007274501, "grad_norm": 0.3670773208141327, "learning_rate": 1.9982007912455247e-05, "loss": 0.6036, "step": 1900 }, { "epoch": 0.040317278530678034, "grad_norm": 0.35942304134368896, "learning_rate": 1.9981987910583736e-05, "loss": 0.4966, "step": 1901 }, { "epoch": 0.04033848698861106, "grad_norm": 0.3337857723236084, "learning_rate": 1.998196789761034e-05, "loss": 0.5358, "step": 1902 }, { "epoch": 0.04035969544654408, "grad_norm": 0.42034831643104553, "learning_rate": 1.998194787353509e-05, "loss": 0.5152, "step": 1903 }, { "epoch": 0.04038090390447711, "grad_norm": 0.35854846239089966, "learning_rate": 1.9981927838358005e-05, "loss": 0.6126, "step": 1904 }, { "epoch": 0.04040211236241013, "grad_norm": 0.3668428659439087, "learning_rate": 1.99819077920791e-05, "loss": 0.5561, "step": 1905 }, { "epoch": 0.04042332082034315, "grad_norm": 0.34038347005844116, "learning_rate": 1.998188773469841e-05, "loss": 0.5808, "step": 1906 }, { "epoch": 0.04044452927827618, "grad_norm": 0.333181232213974, "learning_rate": 1.9981867666215952e-05, "loss": 0.5775, "step": 1907 }, { "epoch": 0.0404657377362092, "grad_norm": 0.37552207708358765, "learning_rate": 1.9981847586631744e-05, "loss": 0.5357, "step": 1908 }, { "epoch": 0.040486946194142226, "grad_norm": 0.33745113015174866, "learning_rate": 1.998182749594581e-05, "loss": 0.5861, "step": 1909 }, { "epoch": 0.040508154652075246, "grad_norm": 0.29809093475341797, "learning_rate": 1.998180739415818e-05, "loss": 0.4862, "step": 1910 }, { "epoch": 0.04052936311000827, "grad_norm": 0.34325268864631653, "learning_rate": 1.9981787281268868e-05, "loss": 0.5173, "step": 1911 }, { "epoch": 0.04055057156794129, "grad_norm": 0.33481407165527344, "learning_rate": 1.99817671572779e-05, "loss": 0.5066, "step": 1912 }, { "epoch": 0.04057178002587432, "grad_norm": 0.3554627001285553, "learning_rate": 1.9981747022185295e-05, "loss": 0.5694, "step": 1913 }, { "epoch": 0.040592988483807345, "grad_norm": 0.3108213543891907, "learning_rate": 1.9981726875991076e-05, "loss": 0.5542, "step": 1914 }, { "epoch": 0.040614196941740364, "grad_norm": 0.4251183569431305, "learning_rate": 1.998170671869527e-05, "loss": 0.6115, "step": 1915 }, { "epoch": 0.04063540539967339, "grad_norm": 0.3519827425479889, "learning_rate": 1.9981686550297896e-05, "loss": 0.4707, "step": 1916 }, { "epoch": 0.04065661385760641, "grad_norm": 0.30501046776771545, "learning_rate": 1.998166637079898e-05, "loss": 0.5549, "step": 1917 }, { "epoch": 0.04067782231553944, "grad_norm": 0.3319173753261566, "learning_rate": 1.9981646180198536e-05, "loss": 0.6055, "step": 1918 }, { "epoch": 0.040699030773472464, "grad_norm": 0.3269084095954895, "learning_rate": 1.9981625978496594e-05, "loss": 0.5395, "step": 1919 }, { "epoch": 0.04072023923140548, "grad_norm": 0.31895822286605835, "learning_rate": 1.9981605765693172e-05, "loss": 0.47, "step": 1920 }, { "epoch": 0.04074144768933851, "grad_norm": 0.4177064299583435, "learning_rate": 1.9981585541788298e-05, "loss": 0.5142, "step": 1921 }, { "epoch": 0.04076265614727153, "grad_norm": 0.31775686144828796, "learning_rate": 1.998156530678199e-05, "loss": 0.553, "step": 1922 }, { "epoch": 0.040783864605204556, "grad_norm": 0.31265002489089966, "learning_rate": 1.9981545060674276e-05, "loss": 0.5382, "step": 1923 }, { "epoch": 0.04080507306313758, "grad_norm": 0.305481880903244, "learning_rate": 1.9981524803465168e-05, "loss": 0.4439, "step": 1924 }, { "epoch": 0.0408262815210706, "grad_norm": 0.36065325140953064, "learning_rate": 1.99815045351547e-05, "loss": 0.6145, "step": 1925 }, { "epoch": 0.04084748997900363, "grad_norm": 0.34499743580818176, "learning_rate": 1.9981484255742884e-05, "loss": 0.5296, "step": 1926 }, { "epoch": 0.04086869843693665, "grad_norm": 0.34594228863716125, "learning_rate": 1.998146396522975e-05, "loss": 0.5905, "step": 1927 }, { "epoch": 0.040889906894869675, "grad_norm": 0.3120577335357666, "learning_rate": 1.998144366361532e-05, "loss": 0.5212, "step": 1928 }, { "epoch": 0.040911115352802695, "grad_norm": 0.3205433189868927, "learning_rate": 1.9981423350899615e-05, "loss": 0.5358, "step": 1929 }, { "epoch": 0.04093232381073572, "grad_norm": 0.3038367033004761, "learning_rate": 1.998140302708266e-05, "loss": 0.4792, "step": 1930 }, { "epoch": 0.04095353226866875, "grad_norm": 0.3359050154685974, "learning_rate": 1.998138269216447e-05, "loss": 0.5276, "step": 1931 }, { "epoch": 0.04097474072660177, "grad_norm": 0.3281975984573364, "learning_rate": 1.9981362346145073e-05, "loss": 0.6041, "step": 1932 }, { "epoch": 0.040995949184534794, "grad_norm": 0.33886653184890747, "learning_rate": 1.9981341989024496e-05, "loss": 0.577, "step": 1933 }, { "epoch": 0.041017157642467814, "grad_norm": 0.3714391589164734, "learning_rate": 1.9981321620802755e-05, "loss": 0.5523, "step": 1934 }, { "epoch": 0.04103836610040084, "grad_norm": 0.36014169454574585, "learning_rate": 1.9981301241479875e-05, "loss": 0.5599, "step": 1935 }, { "epoch": 0.04105957455833387, "grad_norm": 0.3720541298389435, "learning_rate": 1.9981280851055876e-05, "loss": 0.5042, "step": 1936 }, { "epoch": 0.041080783016266886, "grad_norm": 0.3150661587715149, "learning_rate": 1.9981260449530785e-05, "loss": 0.4919, "step": 1937 }, { "epoch": 0.04110199147419991, "grad_norm": 0.3023146986961365, "learning_rate": 1.9981240036904623e-05, "loss": 0.5529, "step": 1938 }, { "epoch": 0.04112319993213293, "grad_norm": 0.3696354031562805, "learning_rate": 1.9981219613177413e-05, "loss": 0.6354, "step": 1939 }, { "epoch": 0.04114440839006596, "grad_norm": 0.35578373074531555, "learning_rate": 1.9981199178349175e-05, "loss": 0.5666, "step": 1940 }, { "epoch": 0.04116561684799898, "grad_norm": 0.41368162631988525, "learning_rate": 1.9981178732419937e-05, "loss": 0.5076, "step": 1941 }, { "epoch": 0.041186825305932005, "grad_norm": 0.3269716799259186, "learning_rate": 1.9981158275389716e-05, "loss": 0.5046, "step": 1942 }, { "epoch": 0.04120803376386503, "grad_norm": 0.29546430706977844, "learning_rate": 1.998113780725854e-05, "loss": 0.5139, "step": 1943 }, { "epoch": 0.04122924222179805, "grad_norm": 0.32137784361839294, "learning_rate": 1.998111732802643e-05, "loss": 0.5222, "step": 1944 }, { "epoch": 0.04125045067973108, "grad_norm": 0.32640540599823, "learning_rate": 1.9981096837693403e-05, "loss": 0.6359, "step": 1945 }, { "epoch": 0.0412716591376641, "grad_norm": 0.2993171811103821, "learning_rate": 1.998107633625949e-05, "loss": 0.4938, "step": 1946 }, { "epoch": 0.041292867595597124, "grad_norm": 0.332664430141449, "learning_rate": 1.9981055823724712e-05, "loss": 0.5493, "step": 1947 }, { "epoch": 0.04131407605353015, "grad_norm": 0.34434399008750916, "learning_rate": 1.998103530008909e-05, "loss": 0.5115, "step": 1948 }, { "epoch": 0.04133528451146317, "grad_norm": 0.31576457619667053, "learning_rate": 1.9981014765352644e-05, "loss": 0.5212, "step": 1949 }, { "epoch": 0.0413564929693962, "grad_norm": 0.35426825284957886, "learning_rate": 1.9980994219515403e-05, "loss": 0.5757, "step": 1950 }, { "epoch": 0.041377701427329217, "grad_norm": 0.3492608666419983, "learning_rate": 1.9980973662577384e-05, "loss": 0.5565, "step": 1951 }, { "epoch": 0.04139890988526224, "grad_norm": 0.39327603578567505, "learning_rate": 1.9980953094538616e-05, "loss": 0.5093, "step": 1952 }, { "epoch": 0.04142011834319527, "grad_norm": 0.31830158829689026, "learning_rate": 1.998093251539912e-05, "loss": 0.6042, "step": 1953 }, { "epoch": 0.04144132680112829, "grad_norm": 0.29647698998451233, "learning_rate": 1.9980911925158913e-05, "loss": 0.46, "step": 1954 }, { "epoch": 0.041462535259061316, "grad_norm": 0.3294065594673157, "learning_rate": 1.998089132381803e-05, "loss": 0.589, "step": 1955 }, { "epoch": 0.041483743716994335, "grad_norm": 0.3186299204826355, "learning_rate": 1.998087071137648e-05, "loss": 0.5486, "step": 1956 }, { "epoch": 0.04150495217492736, "grad_norm": 0.3327064514160156, "learning_rate": 1.998085008783429e-05, "loss": 0.601, "step": 1957 }, { "epoch": 0.04152616063286038, "grad_norm": 0.32593411207199097, "learning_rate": 1.998082945319149e-05, "loss": 0.5429, "step": 1958 }, { "epoch": 0.04154736909079341, "grad_norm": 0.3170648217201233, "learning_rate": 1.9980808807448094e-05, "loss": 0.535, "step": 1959 }, { "epoch": 0.041568577548726435, "grad_norm": 0.3561059534549713, "learning_rate": 1.9980788150604136e-05, "loss": 0.5226, "step": 1960 }, { "epoch": 0.041589786006659454, "grad_norm": 0.3426947593688965, "learning_rate": 1.9980767482659628e-05, "loss": 0.5551, "step": 1961 }, { "epoch": 0.04161099446459248, "grad_norm": 0.34689435362815857, "learning_rate": 1.9980746803614597e-05, "loss": 0.5685, "step": 1962 }, { "epoch": 0.0416322029225255, "grad_norm": 0.30191534757614136, "learning_rate": 1.9980726113469067e-05, "loss": 0.5598, "step": 1963 }, { "epoch": 0.04165341138045853, "grad_norm": 0.3147541582584381, "learning_rate": 1.9980705412223058e-05, "loss": 0.5227, "step": 1964 }, { "epoch": 0.041674619838391554, "grad_norm": 0.2899840474128723, "learning_rate": 1.9980684699876595e-05, "loss": 0.5389, "step": 1965 }, { "epoch": 0.04169582829632457, "grad_norm": 0.582439124584198, "learning_rate": 1.9980663976429704e-05, "loss": 0.5367, "step": 1966 }, { "epoch": 0.0417170367542576, "grad_norm": 0.3067116439342499, "learning_rate": 1.9980643241882402e-05, "loss": 0.5715, "step": 1967 }, { "epoch": 0.04173824521219062, "grad_norm": 0.3313163220882416, "learning_rate": 1.998062249623472e-05, "loss": 0.4983, "step": 1968 }, { "epoch": 0.041759453670123646, "grad_norm": 0.31681859493255615, "learning_rate": 1.9980601739486674e-05, "loss": 0.5469, "step": 1969 }, { "epoch": 0.041780662128056666, "grad_norm": 0.3596760630607605, "learning_rate": 1.998058097163829e-05, "loss": 0.5533, "step": 1970 }, { "epoch": 0.04180187058598969, "grad_norm": 0.327589750289917, "learning_rate": 1.9980560192689588e-05, "loss": 0.5485, "step": 1971 }, { "epoch": 0.04182307904392272, "grad_norm": 0.3550763726234436, "learning_rate": 1.9980539402640594e-05, "loss": 0.5207, "step": 1972 }, { "epoch": 0.04184428750185574, "grad_norm": 0.31585827469825745, "learning_rate": 1.9980518601491332e-05, "loss": 0.4972, "step": 1973 }, { "epoch": 0.041865495959788765, "grad_norm": 0.34033825993537903, "learning_rate": 1.9980497789241825e-05, "loss": 0.5295, "step": 1974 }, { "epoch": 0.041886704417721785, "grad_norm": 0.4429134428501129, "learning_rate": 1.9980476965892094e-05, "loss": 0.4482, "step": 1975 }, { "epoch": 0.04190791287565481, "grad_norm": 0.34787118434906006, "learning_rate": 1.9980456131442162e-05, "loss": 0.6006, "step": 1976 }, { "epoch": 0.04192912133358784, "grad_norm": 0.3586668372154236, "learning_rate": 1.9980435285892056e-05, "loss": 0.6654, "step": 1977 }, { "epoch": 0.04195032979152086, "grad_norm": 0.32160499691963196, "learning_rate": 1.9980414429241794e-05, "loss": 0.6065, "step": 1978 }, { "epoch": 0.041971538249453884, "grad_norm": 0.3464931845664978, "learning_rate": 1.9980393561491405e-05, "loss": 0.5324, "step": 1979 }, { "epoch": 0.0419927467073869, "grad_norm": 0.3176870346069336, "learning_rate": 1.9980372682640907e-05, "loss": 0.4868, "step": 1980 }, { "epoch": 0.04201395516531993, "grad_norm": 0.32992279529571533, "learning_rate": 1.9980351792690324e-05, "loss": 0.5742, "step": 1981 }, { "epoch": 0.04203516362325296, "grad_norm": 0.30577442049980164, "learning_rate": 1.9980330891639682e-05, "loss": 0.5107, "step": 1982 }, { "epoch": 0.042056372081185976, "grad_norm": 0.31421220302581787, "learning_rate": 1.9980309979489002e-05, "loss": 0.5302, "step": 1983 }, { "epoch": 0.042077580539119, "grad_norm": 0.2906486690044403, "learning_rate": 1.9980289056238308e-05, "loss": 0.4837, "step": 1984 }, { "epoch": 0.04209878899705202, "grad_norm": 0.40679219365119934, "learning_rate": 1.9980268121887626e-05, "loss": 0.5414, "step": 1985 }, { "epoch": 0.04211999745498505, "grad_norm": 0.36191093921661377, "learning_rate": 1.998024717643697e-05, "loss": 0.5535, "step": 1986 }, { "epoch": 0.04214120591291807, "grad_norm": 0.29371538758277893, "learning_rate": 1.9980226219886375e-05, "loss": 0.5326, "step": 1987 }, { "epoch": 0.042162414370851095, "grad_norm": 0.3157428801059723, "learning_rate": 1.998020525223586e-05, "loss": 0.5567, "step": 1988 }, { "epoch": 0.04218362282878412, "grad_norm": 0.4588944911956787, "learning_rate": 1.9980184273485443e-05, "loss": 0.5401, "step": 1989 }, { "epoch": 0.04220483128671714, "grad_norm": 0.31681686639785767, "learning_rate": 1.9980163283635153e-05, "loss": 0.5254, "step": 1990 }, { "epoch": 0.04222603974465017, "grad_norm": 0.36463406682014465, "learning_rate": 1.9980142282685015e-05, "loss": 0.5695, "step": 1991 }, { "epoch": 0.04224724820258319, "grad_norm": 0.36349791288375854, "learning_rate": 1.998012127063505e-05, "loss": 0.6149, "step": 1992 }, { "epoch": 0.042268456660516214, "grad_norm": 0.41912803053855896, "learning_rate": 1.9980100247485275e-05, "loss": 0.5967, "step": 1993 }, { "epoch": 0.04228966511844924, "grad_norm": 0.33645957708358765, "learning_rate": 1.9980079213235724e-05, "loss": 0.583, "step": 1994 }, { "epoch": 0.04231087357638226, "grad_norm": 0.38873356580734253, "learning_rate": 1.9980058167886413e-05, "loss": 0.6954, "step": 1995 }, { "epoch": 0.04233208203431529, "grad_norm": 0.33593660593032837, "learning_rate": 1.9980037111437372e-05, "loss": 0.481, "step": 1996 }, { "epoch": 0.042353290492248306, "grad_norm": 0.3269520401954651, "learning_rate": 1.9980016043888618e-05, "loss": 0.5876, "step": 1997 }, { "epoch": 0.04237449895018133, "grad_norm": 0.3767275810241699, "learning_rate": 1.997999496524017e-05, "loss": 0.5189, "step": 1998 }, { "epoch": 0.04239570740811435, "grad_norm": 0.32528987526893616, "learning_rate": 1.997997387549207e-05, "loss": 0.4924, "step": 1999 }, { "epoch": 0.04241691586604738, "grad_norm": 0.318997323513031, "learning_rate": 1.9979952774644323e-05, "loss": 0.5191, "step": 2000 }, { "epoch": 0.042438124323980406, "grad_norm": 0.3498484790325165, "learning_rate": 1.997993166269696e-05, "loss": 0.5486, "step": 2001 }, { "epoch": 0.042459332781913425, "grad_norm": 0.31820034980773926, "learning_rate": 1.9979910539650006e-05, "loss": 0.533, "step": 2002 }, { "epoch": 0.04248054123984645, "grad_norm": 0.36356213688850403, "learning_rate": 1.997988940550348e-05, "loss": 0.5697, "step": 2003 }, { "epoch": 0.04250174969777947, "grad_norm": 0.3546191453933716, "learning_rate": 1.997986826025741e-05, "loss": 0.5132, "step": 2004 }, { "epoch": 0.0425229581557125, "grad_norm": 0.3306904733181, "learning_rate": 1.9979847103911813e-05, "loss": 0.5192, "step": 2005 }, { "epoch": 0.042544166613645525, "grad_norm": 0.3040636479854584, "learning_rate": 1.9979825936466717e-05, "loss": 0.503, "step": 2006 }, { "epoch": 0.042565375071578544, "grad_norm": 0.3281967043876648, "learning_rate": 1.9979804757922153e-05, "loss": 0.5594, "step": 2007 }, { "epoch": 0.04258658352951157, "grad_norm": 0.3467049300670624, "learning_rate": 1.997978356827813e-05, "loss": 0.5226, "step": 2008 }, { "epoch": 0.04260779198744459, "grad_norm": 0.33528557419776917, "learning_rate": 1.9979762367534678e-05, "loss": 0.5958, "step": 2009 }, { "epoch": 0.04262900044537762, "grad_norm": 0.3315563499927521, "learning_rate": 1.9979741155691825e-05, "loss": 0.5143, "step": 2010 }, { "epoch": 0.042650208903310644, "grad_norm": 0.32341861724853516, "learning_rate": 1.9979719932749585e-05, "loss": 0.5866, "step": 2011 }, { "epoch": 0.04267141736124366, "grad_norm": 0.36949366331100464, "learning_rate": 1.9979698698707994e-05, "loss": 0.5004, "step": 2012 }, { "epoch": 0.04269262581917669, "grad_norm": 0.3554312586784363, "learning_rate": 1.9979677453567063e-05, "loss": 0.5159, "step": 2013 }, { "epoch": 0.04271383427710971, "grad_norm": 0.3036960959434509, "learning_rate": 1.9979656197326826e-05, "loss": 0.5668, "step": 2014 }, { "epoch": 0.042735042735042736, "grad_norm": 0.3839181959629059, "learning_rate": 1.99796349299873e-05, "loss": 0.538, "step": 2015 }, { "epoch": 0.042756251192975755, "grad_norm": 0.30920538306236267, "learning_rate": 1.997961365154851e-05, "loss": 0.5934, "step": 2016 }, { "epoch": 0.04277745965090878, "grad_norm": 0.3290720283985138, "learning_rate": 1.997959236201048e-05, "loss": 0.5081, "step": 2017 }, { "epoch": 0.04279866810884181, "grad_norm": 0.2841282784938812, "learning_rate": 1.9979571061373234e-05, "loss": 0.4784, "step": 2018 }, { "epoch": 0.04281987656677483, "grad_norm": 0.39238834381103516, "learning_rate": 1.9979549749636798e-05, "loss": 0.5955, "step": 2019 }, { "epoch": 0.042841085024707855, "grad_norm": 0.3013613522052765, "learning_rate": 1.9979528426801194e-05, "loss": 0.5437, "step": 2020 }, { "epoch": 0.042862293482640874, "grad_norm": 0.30316129326820374, "learning_rate": 1.9979507092866442e-05, "loss": 0.426, "step": 2021 }, { "epoch": 0.0428835019405739, "grad_norm": 0.3637685775756836, "learning_rate": 1.997948574783257e-05, "loss": 0.4737, "step": 2022 }, { "epoch": 0.04290471039850693, "grad_norm": 0.3277389109134674, "learning_rate": 1.99794643916996e-05, "loss": 0.4781, "step": 2023 }, { "epoch": 0.04292591885643995, "grad_norm": 0.32488155364990234, "learning_rate": 1.9979443024467558e-05, "loss": 0.5736, "step": 2024 }, { "epoch": 0.042947127314372974, "grad_norm": 0.32507064938545227, "learning_rate": 1.9979421646136466e-05, "loss": 0.5414, "step": 2025 }, { "epoch": 0.04296833577230599, "grad_norm": 0.31429076194763184, "learning_rate": 1.997940025670635e-05, "loss": 0.5138, "step": 2026 }, { "epoch": 0.04298954423023902, "grad_norm": 0.32204779982566833, "learning_rate": 1.9979378856177227e-05, "loss": 0.6278, "step": 2027 }, { "epoch": 0.043010752688172046, "grad_norm": 0.3398773968219757, "learning_rate": 1.9979357444549128e-05, "loss": 0.593, "step": 2028 }, { "epoch": 0.043031961146105066, "grad_norm": 0.32753998041152954, "learning_rate": 1.9979336021822075e-05, "loss": 0.5905, "step": 2029 }, { "epoch": 0.04305316960403809, "grad_norm": 0.3209560811519623, "learning_rate": 1.997931458799609e-05, "loss": 0.49, "step": 2030 }, { "epoch": 0.04307437806197111, "grad_norm": 0.3144736886024475, "learning_rate": 1.9979293143071198e-05, "loss": 0.4949, "step": 2031 }, { "epoch": 0.04309558651990414, "grad_norm": 0.41035038232803345, "learning_rate": 1.9979271687047422e-05, "loss": 0.5999, "step": 2032 }, { "epoch": 0.04311679497783716, "grad_norm": 0.34198853373527527, "learning_rate": 1.997925021992479e-05, "loss": 0.5976, "step": 2033 }, { "epoch": 0.043138003435770185, "grad_norm": 0.34086334705352783, "learning_rate": 1.997922874170332e-05, "loss": 0.591, "step": 2034 }, { "epoch": 0.04315921189370321, "grad_norm": 0.41992172598838806, "learning_rate": 1.997920725238304e-05, "loss": 0.6149, "step": 2035 }, { "epoch": 0.04318042035163623, "grad_norm": 0.3723890781402588, "learning_rate": 1.9979185751963967e-05, "loss": 0.6006, "step": 2036 }, { "epoch": 0.04320162880956926, "grad_norm": 0.3216175138950348, "learning_rate": 1.997916424044614e-05, "loss": 0.5235, "step": 2037 }, { "epoch": 0.04322283726750228, "grad_norm": 0.32092711329460144, "learning_rate": 1.9979142717829565e-05, "loss": 0.5355, "step": 2038 }, { "epoch": 0.043244045725435304, "grad_norm": 0.32502293586730957, "learning_rate": 1.9979121184114277e-05, "loss": 0.5651, "step": 2039 }, { "epoch": 0.04326525418336833, "grad_norm": 0.31145724654197693, "learning_rate": 1.9979099639300296e-05, "loss": 0.5547, "step": 2040 }, { "epoch": 0.04328646264130135, "grad_norm": 0.31088000535964966, "learning_rate": 1.997907808338765e-05, "loss": 0.5711, "step": 2041 }, { "epoch": 0.04330767109923438, "grad_norm": 0.35746580362319946, "learning_rate": 1.997905651637636e-05, "loss": 0.6409, "step": 2042 }, { "epoch": 0.043328879557167396, "grad_norm": 0.3773840069770813, "learning_rate": 1.9979034938266445e-05, "loss": 0.5505, "step": 2043 }, { "epoch": 0.04335008801510042, "grad_norm": 0.3570621907711029, "learning_rate": 1.9979013349057936e-05, "loss": 0.5869, "step": 2044 }, { "epoch": 0.04337129647303344, "grad_norm": 0.3607560396194458, "learning_rate": 1.997899174875086e-05, "loss": 0.5375, "step": 2045 }, { "epoch": 0.04339250493096647, "grad_norm": 0.34170734882354736, "learning_rate": 1.997897013734523e-05, "loss": 0.5702, "step": 2046 }, { "epoch": 0.043413713388899496, "grad_norm": 0.3242536187171936, "learning_rate": 1.997894851484108e-05, "loss": 0.5349, "step": 2047 }, { "epoch": 0.043434921846832515, "grad_norm": 0.3030790090560913, "learning_rate": 1.997892688123843e-05, "loss": 0.5504, "step": 2048 }, { "epoch": 0.04345613030476554, "grad_norm": 0.3897925913333893, "learning_rate": 1.99789052365373e-05, "loss": 0.5667, "step": 2049 }, { "epoch": 0.04347733876269856, "grad_norm": 0.33447137475013733, "learning_rate": 1.997888358073772e-05, "loss": 0.5774, "step": 2050 }, { "epoch": 0.04349854722063159, "grad_norm": 0.3119889795780182, "learning_rate": 1.9978861913839716e-05, "loss": 0.5508, "step": 2051 }, { "epoch": 0.043519755678564614, "grad_norm": 0.33481982350349426, "learning_rate": 1.9978840235843306e-05, "loss": 0.525, "step": 2052 }, { "epoch": 0.043540964136497634, "grad_norm": 0.33218759298324585, "learning_rate": 1.9978818546748514e-05, "loss": 0.5868, "step": 2053 }, { "epoch": 0.04356217259443066, "grad_norm": 0.3502645194530487, "learning_rate": 1.997879684655537e-05, "loss": 0.5944, "step": 2054 }, { "epoch": 0.04358338105236368, "grad_norm": 0.36508968472480774, "learning_rate": 1.9978775135263895e-05, "loss": 0.5243, "step": 2055 }, { "epoch": 0.04360458951029671, "grad_norm": 0.28094232082366943, "learning_rate": 1.9978753412874112e-05, "loss": 0.5057, "step": 2056 }, { "epoch": 0.04362579796822973, "grad_norm": 0.4128984212875366, "learning_rate": 1.997873167938605e-05, "loss": 0.6068, "step": 2057 }, { "epoch": 0.04364700642616275, "grad_norm": 0.3216591775417328, "learning_rate": 1.997870993479972e-05, "loss": 0.5826, "step": 2058 }, { "epoch": 0.04366821488409578, "grad_norm": 0.31875747442245483, "learning_rate": 1.9978688179115162e-05, "loss": 0.5751, "step": 2059 }, { "epoch": 0.0436894233420288, "grad_norm": 0.32066717743873596, "learning_rate": 1.9978666412332392e-05, "loss": 0.6295, "step": 2060 }, { "epoch": 0.043710631799961826, "grad_norm": 0.3261774480342865, "learning_rate": 1.9978644634451438e-05, "loss": 0.4866, "step": 2061 }, { "epoch": 0.043731840257894845, "grad_norm": 0.3242272734642029, "learning_rate": 1.9978622845472322e-05, "loss": 0.5492, "step": 2062 }, { "epoch": 0.04375304871582787, "grad_norm": 0.3032897412776947, "learning_rate": 1.9978601045395065e-05, "loss": 0.5483, "step": 2063 }, { "epoch": 0.0437742571737609, "grad_norm": 0.3230975270271301, "learning_rate": 1.99785792342197e-05, "loss": 0.6209, "step": 2064 }, { "epoch": 0.04379546563169392, "grad_norm": 0.30254805088043213, "learning_rate": 1.997855741194624e-05, "loss": 0.4571, "step": 2065 }, { "epoch": 0.043816674089626945, "grad_norm": 0.40232792496681213, "learning_rate": 1.9978535578574716e-05, "loss": 0.5886, "step": 2066 }, { "epoch": 0.043837882547559964, "grad_norm": 0.4323533773422241, "learning_rate": 1.9978513734105154e-05, "loss": 0.5732, "step": 2067 }, { "epoch": 0.04385909100549299, "grad_norm": 0.3223327100276947, "learning_rate": 1.9978491878537576e-05, "loss": 0.5843, "step": 2068 }, { "epoch": 0.04388029946342602, "grad_norm": 0.30246901512145996, "learning_rate": 1.9978470011872003e-05, "loss": 0.5829, "step": 2069 }, { "epoch": 0.04390150792135904, "grad_norm": 0.3715958595275879, "learning_rate": 1.9978448134108466e-05, "loss": 0.4871, "step": 2070 }, { "epoch": 0.043922716379292064, "grad_norm": 0.31173548102378845, "learning_rate": 1.997842624524698e-05, "loss": 0.6008, "step": 2071 }, { "epoch": 0.04394392483722508, "grad_norm": 0.3495468497276306, "learning_rate": 1.997840434528758e-05, "loss": 0.5406, "step": 2072 }, { "epoch": 0.04396513329515811, "grad_norm": 0.3020080327987671, "learning_rate": 1.9978382434230288e-05, "loss": 0.4996, "step": 2073 }, { "epoch": 0.04398634175309113, "grad_norm": 0.3476172983646393, "learning_rate": 1.9978360512075122e-05, "loss": 0.5812, "step": 2074 }, { "epoch": 0.044007550211024156, "grad_norm": 0.2986823320388794, "learning_rate": 1.9978338578822108e-05, "loss": 0.4941, "step": 2075 }, { "epoch": 0.04402875866895718, "grad_norm": 0.2959996461868286, "learning_rate": 1.9978316634471275e-05, "loss": 0.5043, "step": 2076 }, { "epoch": 0.0440499671268902, "grad_norm": 0.3892432153224945, "learning_rate": 1.9978294679022646e-05, "loss": 0.5213, "step": 2077 }, { "epoch": 0.04407117558482323, "grad_norm": 0.3878769278526306, "learning_rate": 1.9978272712476242e-05, "loss": 0.5616, "step": 2078 }, { "epoch": 0.04409238404275625, "grad_norm": 0.34792548418045044, "learning_rate": 1.997825073483209e-05, "loss": 0.6042, "step": 2079 }, { "epoch": 0.044113592500689275, "grad_norm": 0.3509308993816376, "learning_rate": 1.9978228746090216e-05, "loss": 0.5244, "step": 2080 }, { "epoch": 0.0441348009586223, "grad_norm": 0.4255318343639374, "learning_rate": 1.997820674625064e-05, "loss": 0.4893, "step": 2081 }, { "epoch": 0.04415600941655532, "grad_norm": 0.34447726607322693, "learning_rate": 1.9978184735313392e-05, "loss": 0.5457, "step": 2082 }, { "epoch": 0.04417721787448835, "grad_norm": 0.31750622391700745, "learning_rate": 1.997816271327849e-05, "loss": 0.508, "step": 2083 }, { "epoch": 0.04419842633242137, "grad_norm": 0.2994402050971985, "learning_rate": 1.9978140680145966e-05, "loss": 0.5118, "step": 2084 }, { "epoch": 0.044219634790354394, "grad_norm": 0.37407973408699036, "learning_rate": 1.9978118635915837e-05, "loss": 0.5786, "step": 2085 }, { "epoch": 0.04424084324828742, "grad_norm": 0.32554593682289124, "learning_rate": 1.9978096580588136e-05, "loss": 0.5994, "step": 2086 }, { "epoch": 0.04426205170622044, "grad_norm": 0.3184836208820343, "learning_rate": 1.997807451416288e-05, "loss": 0.5168, "step": 2087 }, { "epoch": 0.044283260164153466, "grad_norm": 0.3582797348499298, "learning_rate": 1.9978052436640094e-05, "loss": 0.5332, "step": 2088 }, { "epoch": 0.044304468622086486, "grad_norm": 0.3455512225627899, "learning_rate": 1.9978030348019807e-05, "loss": 0.5711, "step": 2089 }, { "epoch": 0.04432567708001951, "grad_norm": 0.3940318524837494, "learning_rate": 1.997800824830204e-05, "loss": 0.537, "step": 2090 }, { "epoch": 0.04434688553795253, "grad_norm": 0.33111217617988586, "learning_rate": 1.9977986137486823e-05, "loss": 0.5719, "step": 2091 }, { "epoch": 0.04436809399588556, "grad_norm": 0.37665697932243347, "learning_rate": 1.997796401557417e-05, "loss": 0.4824, "step": 2092 }, { "epoch": 0.044389302453818585, "grad_norm": 0.39848092198371887, "learning_rate": 1.997794188256412e-05, "loss": 0.5872, "step": 2093 }, { "epoch": 0.044410510911751605, "grad_norm": 0.3210288882255554, "learning_rate": 1.9977919738456682e-05, "loss": 0.5345, "step": 2094 }, { "epoch": 0.04443171936968463, "grad_norm": 0.31014055013656616, "learning_rate": 1.997789758325189e-05, "loss": 0.558, "step": 2095 }, { "epoch": 0.04445292782761765, "grad_norm": 0.3430032730102539, "learning_rate": 1.997787541694977e-05, "loss": 0.5046, "step": 2096 }, { "epoch": 0.04447413628555068, "grad_norm": 0.34333762526512146, "learning_rate": 1.9977853239550345e-05, "loss": 0.5296, "step": 2097 }, { "epoch": 0.044495344743483704, "grad_norm": 0.3665967881679535, "learning_rate": 1.9977831051053632e-05, "loss": 0.5814, "step": 2098 }, { "epoch": 0.044516553201416724, "grad_norm": 1.041954517364502, "learning_rate": 1.9977808851459666e-05, "loss": 0.5046, "step": 2099 }, { "epoch": 0.04453776165934975, "grad_norm": 0.36144858598709106, "learning_rate": 1.997778664076847e-05, "loss": 0.6528, "step": 2100 }, { "epoch": 0.04455897011728277, "grad_norm": 0.329365998506546, "learning_rate": 1.9977764418980062e-05, "loss": 0.5828, "step": 2101 }, { "epoch": 0.0445801785752158, "grad_norm": 0.3165629208087921, "learning_rate": 1.9977742186094472e-05, "loss": 0.5351, "step": 2102 }, { "epoch": 0.044601387033148816, "grad_norm": 0.31734123826026917, "learning_rate": 1.9977719942111724e-05, "loss": 0.5408, "step": 2103 }, { "epoch": 0.04462259549108184, "grad_norm": 0.321304589509964, "learning_rate": 1.9977697687031844e-05, "loss": 0.5682, "step": 2104 }, { "epoch": 0.04464380394901487, "grad_norm": 0.33247607946395874, "learning_rate": 1.9977675420854854e-05, "loss": 0.5366, "step": 2105 }, { "epoch": 0.04466501240694789, "grad_norm": 0.30056628584861755, "learning_rate": 1.9977653143580784e-05, "loss": 0.4633, "step": 2106 }, { "epoch": 0.044686220864880916, "grad_norm": 0.31533291935920715, "learning_rate": 1.9977630855209646e-05, "loss": 0.5411, "step": 2107 }, { "epoch": 0.044707429322813935, "grad_norm": 0.31032851338386536, "learning_rate": 1.997760855574148e-05, "loss": 0.5256, "step": 2108 }, { "epoch": 0.04472863778074696, "grad_norm": 0.340219110250473, "learning_rate": 1.9977586245176304e-05, "loss": 0.5314, "step": 2109 }, { "epoch": 0.04474984623867999, "grad_norm": 0.3463495969772339, "learning_rate": 1.9977563923514143e-05, "loss": 0.5107, "step": 2110 }, { "epoch": 0.04477105469661301, "grad_norm": 0.39736247062683105, "learning_rate": 1.9977541590755023e-05, "loss": 0.5572, "step": 2111 }, { "epoch": 0.044792263154546035, "grad_norm": 0.3048366606235504, "learning_rate": 1.9977519246898966e-05, "loss": 0.5251, "step": 2112 }, { "epoch": 0.044813471612479054, "grad_norm": 0.3591163456439972, "learning_rate": 1.9977496891946e-05, "loss": 0.5452, "step": 2113 }, { "epoch": 0.04483468007041208, "grad_norm": 0.31778329610824585, "learning_rate": 1.997747452589615e-05, "loss": 0.4938, "step": 2114 }, { "epoch": 0.04485588852834511, "grad_norm": 0.35919663310050964, "learning_rate": 1.9977452148749433e-05, "loss": 0.6018, "step": 2115 }, { "epoch": 0.04487709698627813, "grad_norm": 0.32652002573013306, "learning_rate": 1.9977429760505888e-05, "loss": 0.5576, "step": 2116 }, { "epoch": 0.04489830544421115, "grad_norm": 0.3663879334926605, "learning_rate": 1.9977407361165527e-05, "loss": 0.6079, "step": 2117 }, { "epoch": 0.04491951390214417, "grad_norm": 0.29601263999938965, "learning_rate": 1.9977384950728383e-05, "loss": 0.4939, "step": 2118 }, { "epoch": 0.0449407223600772, "grad_norm": 0.29818040132522583, "learning_rate": 1.9977362529194478e-05, "loss": 0.4726, "step": 2119 }, { "epoch": 0.04496193081801022, "grad_norm": 0.3257771134376526, "learning_rate": 1.997734009656384e-05, "loss": 0.5046, "step": 2120 }, { "epoch": 0.044983139275943246, "grad_norm": 0.32298150658607483, "learning_rate": 1.9977317652836484e-05, "loss": 0.4918, "step": 2121 }, { "epoch": 0.04500434773387627, "grad_norm": 0.3813590407371521, "learning_rate": 1.9977295198012446e-05, "loss": 0.5806, "step": 2122 }, { "epoch": 0.04502555619180929, "grad_norm": 0.3433612883090973, "learning_rate": 1.9977272732091745e-05, "loss": 0.521, "step": 2123 }, { "epoch": 0.04504676464974232, "grad_norm": 0.3772689700126648, "learning_rate": 1.9977250255074413e-05, "loss": 0.5731, "step": 2124 }, { "epoch": 0.04506797310767534, "grad_norm": 0.7586131691932678, "learning_rate": 1.9977227766960463e-05, "loss": 0.5858, "step": 2125 }, { "epoch": 0.045089181565608365, "grad_norm": 0.35493096709251404, "learning_rate": 1.9977205267749933e-05, "loss": 0.6063, "step": 2126 }, { "epoch": 0.04511039002354139, "grad_norm": 0.34226030111312866, "learning_rate": 1.9977182757442838e-05, "loss": 0.5309, "step": 2127 }, { "epoch": 0.04513159848147441, "grad_norm": 0.3078337609767914, "learning_rate": 1.997716023603921e-05, "loss": 0.4724, "step": 2128 }, { "epoch": 0.04515280693940744, "grad_norm": 0.32941755652427673, "learning_rate": 1.9977137703539068e-05, "loss": 0.4572, "step": 2129 }, { "epoch": 0.04517401539734046, "grad_norm": 0.30192044377326965, "learning_rate": 1.9977115159942443e-05, "loss": 0.552, "step": 2130 }, { "epoch": 0.045195223855273484, "grad_norm": 0.33596596121788025, "learning_rate": 1.9977092605249355e-05, "loss": 0.4386, "step": 2131 }, { "epoch": 0.04521643231320651, "grad_norm": 0.3542340099811554, "learning_rate": 1.9977070039459833e-05, "loss": 0.5534, "step": 2132 }, { "epoch": 0.04523764077113953, "grad_norm": 0.4690416157245636, "learning_rate": 1.99770474625739e-05, "loss": 0.5584, "step": 2133 }, { "epoch": 0.045258849229072556, "grad_norm": 0.348863422870636, "learning_rate": 1.997702487459158e-05, "loss": 0.4978, "step": 2134 }, { "epoch": 0.045280057687005576, "grad_norm": 0.3355666995048523, "learning_rate": 1.99770022755129e-05, "loss": 0.4841, "step": 2135 }, { "epoch": 0.0453012661449386, "grad_norm": 0.34640198945999146, "learning_rate": 1.9976979665337883e-05, "loss": 0.5601, "step": 2136 }, { "epoch": 0.04532247460287162, "grad_norm": 0.31229567527770996, "learning_rate": 1.997695704406656e-05, "loss": 0.5462, "step": 2137 }, { "epoch": 0.04534368306080465, "grad_norm": 0.3438103497028351, "learning_rate": 1.997693441169895e-05, "loss": 0.6259, "step": 2138 }, { "epoch": 0.045364891518737675, "grad_norm": 0.3745654225349426, "learning_rate": 1.997691176823508e-05, "loss": 0.5643, "step": 2139 }, { "epoch": 0.045386099976670695, "grad_norm": 0.3358254134654999, "learning_rate": 1.9976889113674974e-05, "loss": 0.5985, "step": 2140 }, { "epoch": 0.04540730843460372, "grad_norm": 0.3082582354545593, "learning_rate": 1.9976866448018662e-05, "loss": 0.556, "step": 2141 }, { "epoch": 0.04542851689253674, "grad_norm": 0.31658005714416504, "learning_rate": 1.9976843771266164e-05, "loss": 0.4317, "step": 2142 }, { "epoch": 0.04544972535046977, "grad_norm": 0.259307324886322, "learning_rate": 1.9976821083417508e-05, "loss": 0.4442, "step": 2143 }, { "epoch": 0.045470933808402794, "grad_norm": 0.36391183733940125, "learning_rate": 1.9976798384472716e-05, "loss": 0.5591, "step": 2144 }, { "epoch": 0.045492142266335814, "grad_norm": 0.35380852222442627, "learning_rate": 1.997677567443182e-05, "loss": 0.5193, "step": 2145 }, { "epoch": 0.04551335072426884, "grad_norm": 0.29591572284698486, "learning_rate": 1.9976752953294837e-05, "loss": 0.5402, "step": 2146 }, { "epoch": 0.04553455918220186, "grad_norm": 0.3313496708869934, "learning_rate": 1.9976730221061796e-05, "loss": 0.4908, "step": 2147 }, { "epoch": 0.04555576764013489, "grad_norm": 0.36697763204574585, "learning_rate": 1.9976707477732722e-05, "loss": 0.5613, "step": 2148 }, { "epoch": 0.045576976098067906, "grad_norm": 0.32805362343788147, "learning_rate": 1.9976684723307642e-05, "loss": 0.5934, "step": 2149 }, { "epoch": 0.04559818455600093, "grad_norm": 0.34369322657585144, "learning_rate": 1.997666195778658e-05, "loss": 0.5109, "step": 2150 }, { "epoch": 0.04561939301393396, "grad_norm": 0.3282856345176697, "learning_rate": 1.997663918116956e-05, "loss": 0.5483, "step": 2151 }, { "epoch": 0.04564060147186698, "grad_norm": 0.5474342703819275, "learning_rate": 1.997661639345661e-05, "loss": 0.6252, "step": 2152 }, { "epoch": 0.045661809929800005, "grad_norm": 0.4210938811302185, "learning_rate": 1.9976593594647756e-05, "loss": 0.5408, "step": 2153 }, { "epoch": 0.045683018387733025, "grad_norm": 0.3440816402435303, "learning_rate": 1.9976570784743018e-05, "loss": 0.6126, "step": 2154 }, { "epoch": 0.04570422684566605, "grad_norm": 0.32114487886428833, "learning_rate": 1.9976547963742425e-05, "loss": 0.5121, "step": 2155 }, { "epoch": 0.04572543530359908, "grad_norm": 0.3411702811717987, "learning_rate": 1.9976525131646002e-05, "loss": 0.498, "step": 2156 }, { "epoch": 0.0457466437615321, "grad_norm": 0.35275623202323914, "learning_rate": 1.9976502288453773e-05, "loss": 0.4865, "step": 2157 }, { "epoch": 0.045767852219465124, "grad_norm": 0.3183431923389435, "learning_rate": 1.997647943416577e-05, "loss": 0.615, "step": 2158 }, { "epoch": 0.045789060677398144, "grad_norm": 0.31469210982322693, "learning_rate": 1.9976456568782008e-05, "loss": 0.5511, "step": 2159 }, { "epoch": 0.04581026913533117, "grad_norm": 0.3533659279346466, "learning_rate": 1.997643369230252e-05, "loss": 0.59, "step": 2160 }, { "epoch": 0.0458314775932642, "grad_norm": 0.32288283109664917, "learning_rate": 1.997641080472733e-05, "loss": 0.4966, "step": 2161 }, { "epoch": 0.04585268605119722, "grad_norm": 0.39010754227638245, "learning_rate": 1.9976387906056458e-05, "loss": 0.6799, "step": 2162 }, { "epoch": 0.04587389450913024, "grad_norm": 0.3411151170730591, "learning_rate": 1.997636499628994e-05, "loss": 0.5061, "step": 2163 }, { "epoch": 0.04589510296706326, "grad_norm": 0.3495028018951416, "learning_rate": 1.997634207542779e-05, "loss": 0.5754, "step": 2164 }, { "epoch": 0.04591631142499629, "grad_norm": 0.31794247031211853, "learning_rate": 1.9976319143470043e-05, "loss": 0.5839, "step": 2165 }, { "epoch": 0.04593751988292931, "grad_norm": 0.4058001935482025, "learning_rate": 1.997629620041672e-05, "loss": 0.6534, "step": 2166 }, { "epoch": 0.045958728340862336, "grad_norm": 0.38413888216018677, "learning_rate": 1.9976273246267843e-05, "loss": 0.4487, "step": 2167 }, { "epoch": 0.04597993679879536, "grad_norm": 0.29704946279525757, "learning_rate": 1.9976250281023445e-05, "loss": 0.5407, "step": 2168 }, { "epoch": 0.04600114525672838, "grad_norm": 0.30459651350975037, "learning_rate": 1.997622730468355e-05, "loss": 0.5042, "step": 2169 }, { "epoch": 0.04602235371466141, "grad_norm": 0.3242877721786499, "learning_rate": 1.997620431724818e-05, "loss": 0.5343, "step": 2170 }, { "epoch": 0.04604356217259443, "grad_norm": 0.3277152478694916, "learning_rate": 1.997618131871736e-05, "loss": 0.5183, "step": 2171 }, { "epoch": 0.046064770630527455, "grad_norm": 0.35557594895362854, "learning_rate": 1.997615830909112e-05, "loss": 0.5778, "step": 2172 }, { "epoch": 0.04608597908846048, "grad_norm": 0.40912339091300964, "learning_rate": 1.9976135288369483e-05, "loss": 0.5919, "step": 2173 }, { "epoch": 0.0461071875463935, "grad_norm": 0.3235670030117035, "learning_rate": 1.9976112256552476e-05, "loss": 0.496, "step": 2174 }, { "epoch": 0.04612839600432653, "grad_norm": 0.48430392146110535, "learning_rate": 1.997608921364012e-05, "loss": 0.5328, "step": 2175 }, { "epoch": 0.04614960446225955, "grad_norm": 0.36062192916870117, "learning_rate": 1.9976066159632446e-05, "loss": 0.5384, "step": 2176 }, { "epoch": 0.046170812920192573, "grad_norm": 0.3924926519393921, "learning_rate": 1.9976043094529484e-05, "loss": 0.5613, "step": 2177 }, { "epoch": 0.04619202137812559, "grad_norm": 0.33175304532051086, "learning_rate": 1.9976020018331244e-05, "loss": 0.5584, "step": 2178 }, { "epoch": 0.04621322983605862, "grad_norm": 0.2938650846481323, "learning_rate": 1.9975996931037767e-05, "loss": 0.5251, "step": 2179 }, { "epoch": 0.046234438293991646, "grad_norm": 0.3269276022911072, "learning_rate": 1.997597383264907e-05, "loss": 0.5831, "step": 2180 }, { "epoch": 0.046255646751924666, "grad_norm": 0.30519992113113403, "learning_rate": 1.9975950723165183e-05, "loss": 0.5375, "step": 2181 }, { "epoch": 0.04627685520985769, "grad_norm": 0.34460359811782837, "learning_rate": 1.997592760258613e-05, "loss": 0.5319, "step": 2182 }, { "epoch": 0.04629806366779071, "grad_norm": 0.3441993296146393, "learning_rate": 1.997590447091194e-05, "loss": 0.6049, "step": 2183 }, { "epoch": 0.04631927212572374, "grad_norm": 0.35432788729667664, "learning_rate": 1.997588132814263e-05, "loss": 0.6192, "step": 2184 }, { "epoch": 0.046340480583656765, "grad_norm": 0.35697972774505615, "learning_rate": 1.9975858174278236e-05, "loss": 0.5407, "step": 2185 }, { "epoch": 0.046361689041589785, "grad_norm": 0.3277674615383148, "learning_rate": 1.9975835009318777e-05, "loss": 0.5633, "step": 2186 }, { "epoch": 0.04638289749952281, "grad_norm": 0.9638996720314026, "learning_rate": 1.9975811833264284e-05, "loss": 0.4527, "step": 2187 }, { "epoch": 0.04640410595745583, "grad_norm": 0.34600743651390076, "learning_rate": 1.9975788646114774e-05, "loss": 0.6309, "step": 2188 }, { "epoch": 0.04642531441538886, "grad_norm": 0.33923810720443726, "learning_rate": 1.997576544787028e-05, "loss": 0.6158, "step": 2189 }, { "epoch": 0.046446522873321884, "grad_norm": 0.31003978848457336, "learning_rate": 1.997574223853083e-05, "loss": 0.527, "step": 2190 }, { "epoch": 0.046467731331254904, "grad_norm": 0.356876403093338, "learning_rate": 1.9975719018096443e-05, "loss": 0.5456, "step": 2191 }, { "epoch": 0.04648893978918793, "grad_norm": 0.3386654555797577, "learning_rate": 1.997569578656715e-05, "loss": 0.5877, "step": 2192 }, { "epoch": 0.04651014824712095, "grad_norm": 0.304960161447525, "learning_rate": 1.9975672543942976e-05, "loss": 0.6475, "step": 2193 }, { "epoch": 0.046531356705053976, "grad_norm": 0.34727421402931213, "learning_rate": 1.9975649290223943e-05, "loss": 0.5624, "step": 2194 }, { "epoch": 0.046552565162986996, "grad_norm": 0.6385757923126221, "learning_rate": 1.9975626025410077e-05, "loss": 0.6339, "step": 2195 }, { "epoch": 0.04657377362092002, "grad_norm": 0.34612002968788147, "learning_rate": 1.997560274950141e-05, "loss": 0.5621, "step": 2196 }, { "epoch": 0.04659498207885305, "grad_norm": 0.4382152259349823, "learning_rate": 1.9975579462497965e-05, "loss": 0.5101, "step": 2197 }, { "epoch": 0.04661619053678607, "grad_norm": 0.38743355870246887, "learning_rate": 1.9975556164399766e-05, "loss": 0.5644, "step": 2198 }, { "epoch": 0.046637398994719095, "grad_norm": 0.30791160464286804, "learning_rate": 1.9975532855206842e-05, "loss": 0.5169, "step": 2199 }, { "epoch": 0.046658607452652115, "grad_norm": 0.3374626338481903, "learning_rate": 1.9975509534919216e-05, "loss": 0.6221, "step": 2200 }, { "epoch": 0.04667981591058514, "grad_norm": 0.3290281891822815, "learning_rate": 1.9975486203536912e-05, "loss": 0.5861, "step": 2201 }, { "epoch": 0.04670102436851817, "grad_norm": 0.3801724314689636, "learning_rate": 1.9975462861059963e-05, "loss": 0.584, "step": 2202 }, { "epoch": 0.04672223282645119, "grad_norm": 0.3223809599876404, "learning_rate": 1.9975439507488388e-05, "loss": 0.504, "step": 2203 }, { "epoch": 0.046743441284384214, "grad_norm": 0.3280421793460846, "learning_rate": 1.9975416142822215e-05, "loss": 0.6131, "step": 2204 }, { "epoch": 0.046764649742317234, "grad_norm": 0.32018738985061646, "learning_rate": 1.9975392767061473e-05, "loss": 0.4851, "step": 2205 }, { "epoch": 0.04678585820025026, "grad_norm": 0.3542773723602295, "learning_rate": 1.9975369380206185e-05, "loss": 0.451, "step": 2206 }, { "epoch": 0.04680706665818328, "grad_norm": 0.2933593690395355, "learning_rate": 1.997534598225638e-05, "loss": 0.4398, "step": 2207 }, { "epoch": 0.04682827511611631, "grad_norm": 0.3873673677444458, "learning_rate": 1.9975322573212076e-05, "loss": 0.5703, "step": 2208 }, { "epoch": 0.04684948357404933, "grad_norm": 0.34993791580200195, "learning_rate": 1.997529915307331e-05, "loss": 0.5, "step": 2209 }, { "epoch": 0.04687069203198235, "grad_norm": 0.3586730659008026, "learning_rate": 1.9975275721840105e-05, "loss": 0.5481, "step": 2210 }, { "epoch": 0.04689190048991538, "grad_norm": 0.3960600793361664, "learning_rate": 1.9975252279512477e-05, "loss": 0.6441, "step": 2211 }, { "epoch": 0.0469131089478484, "grad_norm": 0.35775846242904663, "learning_rate": 1.9975228826090466e-05, "loss": 0.5779, "step": 2212 }, { "epoch": 0.046934317405781426, "grad_norm": 0.3241201341152191, "learning_rate": 1.997520536157409e-05, "loss": 0.5203, "step": 2213 }, { "epoch": 0.04695552586371445, "grad_norm": 0.2969808280467987, "learning_rate": 1.997518188596338e-05, "loss": 0.5423, "step": 2214 }, { "epoch": 0.04697673432164747, "grad_norm": 0.3096660375595093, "learning_rate": 1.9975158399258358e-05, "loss": 0.526, "step": 2215 }, { "epoch": 0.0469979427795805, "grad_norm": 0.312267005443573, "learning_rate": 1.997513490145905e-05, "loss": 0.5413, "step": 2216 }, { "epoch": 0.04701915123751352, "grad_norm": 0.29937228560447693, "learning_rate": 1.9975111392565484e-05, "loss": 0.5725, "step": 2217 }, { "epoch": 0.047040359695446544, "grad_norm": 0.33556029200553894, "learning_rate": 1.9975087872577688e-05, "loss": 0.5144, "step": 2218 }, { "epoch": 0.04706156815337957, "grad_norm": 0.3213222622871399, "learning_rate": 1.9975064341495684e-05, "loss": 0.4868, "step": 2219 }, { "epoch": 0.04708277661131259, "grad_norm": 0.35373833775520325, "learning_rate": 1.9975040799319504e-05, "loss": 0.5475, "step": 2220 }, { "epoch": 0.04710398506924562, "grad_norm": 0.35981741547584534, "learning_rate": 1.9975017246049164e-05, "loss": 0.5871, "step": 2221 }, { "epoch": 0.04712519352717864, "grad_norm": 0.39609870314598083, "learning_rate": 1.99749936816847e-05, "loss": 0.5738, "step": 2222 }, { "epoch": 0.04714640198511166, "grad_norm": 0.34491580724716187, "learning_rate": 1.9974970106226133e-05, "loss": 0.6769, "step": 2223 }, { "epoch": 0.04716761044304468, "grad_norm": 0.31659436225891113, "learning_rate": 1.9974946519673494e-05, "loss": 0.5186, "step": 2224 }, { "epoch": 0.04718881890097771, "grad_norm": 0.3947129249572754, "learning_rate": 1.9974922922026803e-05, "loss": 0.6113, "step": 2225 }, { "epoch": 0.047210027358910736, "grad_norm": 0.33368855714797974, "learning_rate": 1.997489931328609e-05, "loss": 0.5249, "step": 2226 }, { "epoch": 0.047231235816843756, "grad_norm": 0.34961479902267456, "learning_rate": 1.9974875693451382e-05, "loss": 0.5694, "step": 2227 }, { "epoch": 0.04725244427477678, "grad_norm": 0.32783791422843933, "learning_rate": 1.9974852062522703e-05, "loss": 0.5748, "step": 2228 }, { "epoch": 0.0472736527327098, "grad_norm": 0.3358798623085022, "learning_rate": 1.9974828420500078e-05, "loss": 0.4517, "step": 2229 }, { "epoch": 0.04729486119064283, "grad_norm": 0.32805147767066956, "learning_rate": 1.9974804767383537e-05, "loss": 0.5358, "step": 2230 }, { "epoch": 0.047316069648575855, "grad_norm": 0.30385836958885193, "learning_rate": 1.9974781103173108e-05, "loss": 0.565, "step": 2231 }, { "epoch": 0.047337278106508875, "grad_norm": 0.3359735906124115, "learning_rate": 1.9974757427868813e-05, "loss": 0.5495, "step": 2232 }, { "epoch": 0.0473584865644419, "grad_norm": 0.3092138469219208, "learning_rate": 1.9974733741470674e-05, "loss": 0.5067, "step": 2233 }, { "epoch": 0.04737969502237492, "grad_norm": 0.38451316952705383, "learning_rate": 1.9974710043978728e-05, "loss": 0.623, "step": 2234 }, { "epoch": 0.04740090348030795, "grad_norm": 0.3435843586921692, "learning_rate": 1.9974686335392994e-05, "loss": 0.4575, "step": 2235 }, { "epoch": 0.047422111938240974, "grad_norm": 0.33896175026893616, "learning_rate": 1.9974662615713504e-05, "loss": 0.5389, "step": 2236 }, { "epoch": 0.047443320396173994, "grad_norm": 0.3346558213233948, "learning_rate": 1.9974638884940275e-05, "loss": 0.5733, "step": 2237 }, { "epoch": 0.04746452885410702, "grad_norm": 0.4064931571483612, "learning_rate": 1.9974615143073343e-05, "loss": 0.6386, "step": 2238 }, { "epoch": 0.04748573731204004, "grad_norm": 0.35625317692756653, "learning_rate": 1.997459139011273e-05, "loss": 0.5908, "step": 2239 }, { "epoch": 0.047506945769973066, "grad_norm": 0.380704790353775, "learning_rate": 1.9974567626058463e-05, "loss": 0.553, "step": 2240 }, { "epoch": 0.047528154227906086, "grad_norm": 0.3457302749156952, "learning_rate": 1.997454385091057e-05, "loss": 0.5798, "step": 2241 }, { "epoch": 0.04754936268583911, "grad_norm": 0.4072100520133972, "learning_rate": 1.9974520064669076e-05, "loss": 0.4777, "step": 2242 }, { "epoch": 0.04757057114377214, "grad_norm": 0.32628872990608215, "learning_rate": 1.9974496267334005e-05, "loss": 0.524, "step": 2243 }, { "epoch": 0.04759177960170516, "grad_norm": 0.35038647055625916, "learning_rate": 1.9974472458905388e-05, "loss": 0.6155, "step": 2244 }, { "epoch": 0.047612988059638185, "grad_norm": 0.37569287419319153, "learning_rate": 1.9974448639383248e-05, "loss": 0.5536, "step": 2245 }, { "epoch": 0.047634196517571205, "grad_norm": 0.32639628648757935, "learning_rate": 1.9974424808767613e-05, "loss": 0.6141, "step": 2246 }, { "epoch": 0.04765540497550423, "grad_norm": 0.326035737991333, "learning_rate": 1.9974400967058512e-05, "loss": 0.5666, "step": 2247 }, { "epoch": 0.04767661343343726, "grad_norm": 0.34531116485595703, "learning_rate": 1.9974377114255966e-05, "loss": 0.6182, "step": 2248 }, { "epoch": 0.04769782189137028, "grad_norm": 0.35636991262435913, "learning_rate": 1.9974353250360003e-05, "loss": 0.5445, "step": 2249 }, { "epoch": 0.047719030349303304, "grad_norm": 0.3228687644004822, "learning_rate": 1.9974329375370652e-05, "loss": 0.5348, "step": 2250 }, { "epoch": 0.047740238807236324, "grad_norm": 0.3432667851448059, "learning_rate": 1.997430548928794e-05, "loss": 0.5568, "step": 2251 }, { "epoch": 0.04776144726516935, "grad_norm": 0.3098829388618469, "learning_rate": 1.9974281592111894e-05, "loss": 0.4731, "step": 2252 }, { "epoch": 0.04778265572310237, "grad_norm": 0.3344138562679291, "learning_rate": 1.9974257683842535e-05, "loss": 0.5844, "step": 2253 }, { "epoch": 0.047803864181035396, "grad_norm": 0.32826167345046997, "learning_rate": 1.9974233764479896e-05, "loss": 0.5352, "step": 2254 }, { "epoch": 0.04782507263896842, "grad_norm": 0.4539552927017212, "learning_rate": 1.9974209834024e-05, "loss": 0.6203, "step": 2255 }, { "epoch": 0.04784628109690144, "grad_norm": 0.3422110676765442, "learning_rate": 1.9974185892474874e-05, "loss": 0.49, "step": 2256 }, { "epoch": 0.04786748955483447, "grad_norm": 0.5812528133392334, "learning_rate": 1.9974161939832545e-05, "loss": 0.551, "step": 2257 }, { "epoch": 0.04788869801276749, "grad_norm": 0.3304707705974579, "learning_rate": 1.9974137976097038e-05, "loss": 0.5535, "step": 2258 }, { "epoch": 0.047909906470700515, "grad_norm": 0.48529133200645447, "learning_rate": 1.9974114001268385e-05, "loss": 0.5122, "step": 2259 }, { "epoch": 0.04793111492863354, "grad_norm": 0.30451124906539917, "learning_rate": 1.9974090015346606e-05, "loss": 0.4672, "step": 2260 }, { "epoch": 0.04795232338656656, "grad_norm": 0.31890740990638733, "learning_rate": 1.997406601833173e-05, "loss": 0.5717, "step": 2261 }, { "epoch": 0.04797353184449959, "grad_norm": 0.3792099058628082, "learning_rate": 1.997404201022379e-05, "loss": 0.5476, "step": 2262 }, { "epoch": 0.04799474030243261, "grad_norm": 0.357378214597702, "learning_rate": 1.99740179910228e-05, "loss": 0.5836, "step": 2263 }, { "epoch": 0.048015948760365634, "grad_norm": 0.4506392478942871, "learning_rate": 1.99739939607288e-05, "loss": 0.6206, "step": 2264 }, { "epoch": 0.04803715721829866, "grad_norm": 0.4100002348423004, "learning_rate": 1.9973969919341806e-05, "loss": 0.5644, "step": 2265 }, { "epoch": 0.04805836567623168, "grad_norm": 0.3183671236038208, "learning_rate": 1.997394586686185e-05, "loss": 0.5504, "step": 2266 }, { "epoch": 0.04807957413416471, "grad_norm": 0.3380313217639923, "learning_rate": 1.997392180328896e-05, "loss": 0.5462, "step": 2267 }, { "epoch": 0.04810078259209773, "grad_norm": 0.34117457270622253, "learning_rate": 1.9973897728623163e-05, "loss": 0.5627, "step": 2268 }, { "epoch": 0.04812199105003075, "grad_norm": 0.3103187680244446, "learning_rate": 1.997387364286448e-05, "loss": 0.5393, "step": 2269 }, { "epoch": 0.04814319950796377, "grad_norm": 0.35905179381370544, "learning_rate": 1.997384954601294e-05, "loss": 0.5682, "step": 2270 }, { "epoch": 0.0481644079658968, "grad_norm": 0.3512156009674072, "learning_rate": 1.9973825438068574e-05, "loss": 0.5996, "step": 2271 }, { "epoch": 0.048185616423829826, "grad_norm": 0.3565283715724945, "learning_rate": 1.9973801319031407e-05, "loss": 0.602, "step": 2272 }, { "epoch": 0.048206824881762846, "grad_norm": 0.6341794729232788, "learning_rate": 1.9973777188901458e-05, "loss": 0.5229, "step": 2273 }, { "epoch": 0.04822803333969587, "grad_norm": 0.3340074419975281, "learning_rate": 1.9973753047678766e-05, "loss": 0.4961, "step": 2274 }, { "epoch": 0.04824924179762889, "grad_norm": 0.3437650203704834, "learning_rate": 1.9973728895363354e-05, "loss": 0.5517, "step": 2275 }, { "epoch": 0.04827045025556192, "grad_norm": 0.3446674942970276, "learning_rate": 1.9973704731955247e-05, "loss": 0.6374, "step": 2276 }, { "epoch": 0.048291658713494945, "grad_norm": 0.3195776045322418, "learning_rate": 1.997368055745447e-05, "loss": 0.5359, "step": 2277 }, { "epoch": 0.048312867171427964, "grad_norm": 0.3177553415298462, "learning_rate": 1.997365637186105e-05, "loss": 0.5142, "step": 2278 }, { "epoch": 0.04833407562936099, "grad_norm": 0.3352469205856323, "learning_rate": 1.997363217517502e-05, "loss": 0.5785, "step": 2279 }, { "epoch": 0.04835528408729401, "grad_norm": 0.31703490018844604, "learning_rate": 1.99736079673964e-05, "loss": 0.5613, "step": 2280 }, { "epoch": 0.04837649254522704, "grad_norm": 0.34892335534095764, "learning_rate": 1.9973583748525226e-05, "loss": 0.5546, "step": 2281 }, { "epoch": 0.04839770100316006, "grad_norm": 0.4256194233894348, "learning_rate": 1.9973559518561515e-05, "loss": 0.5802, "step": 2282 }, { "epoch": 0.04841890946109308, "grad_norm": 0.32447749376296997, "learning_rate": 1.9973535277505297e-05, "loss": 0.5461, "step": 2283 }, { "epoch": 0.04844011791902611, "grad_norm": 0.346442312002182, "learning_rate": 1.9973511025356597e-05, "loss": 0.5895, "step": 2284 }, { "epoch": 0.04846132637695913, "grad_norm": 0.37572765350341797, "learning_rate": 1.9973486762115448e-05, "loss": 0.5037, "step": 2285 }, { "epoch": 0.048482534834892156, "grad_norm": 0.35092946887016296, "learning_rate": 1.9973462487781874e-05, "loss": 0.6243, "step": 2286 }, { "epoch": 0.048503743292825176, "grad_norm": 0.3247150480747223, "learning_rate": 1.99734382023559e-05, "loss": 0.5097, "step": 2287 }, { "epoch": 0.0485249517507582, "grad_norm": 0.37849780917167664, "learning_rate": 1.997341390583756e-05, "loss": 0.557, "step": 2288 }, { "epoch": 0.04854616020869123, "grad_norm": 0.34183886647224426, "learning_rate": 1.997338959822687e-05, "loss": 0.5223, "step": 2289 }, { "epoch": 0.04856736866662425, "grad_norm": 0.301710307598114, "learning_rate": 1.9973365279523863e-05, "loss": 0.5244, "step": 2290 }, { "epoch": 0.048588577124557275, "grad_norm": 0.32886040210723877, "learning_rate": 1.9973340949728567e-05, "loss": 0.4726, "step": 2291 }, { "epoch": 0.048609785582490295, "grad_norm": 0.30036258697509766, "learning_rate": 1.9973316608841005e-05, "loss": 0.5655, "step": 2292 }, { "epoch": 0.04863099404042332, "grad_norm": 0.34550178050994873, "learning_rate": 1.9973292256861212e-05, "loss": 0.5562, "step": 2293 }, { "epoch": 0.04865220249835635, "grad_norm": 0.3866398334503174, "learning_rate": 1.9973267893789207e-05, "loss": 0.5652, "step": 2294 }, { "epoch": 0.04867341095628937, "grad_norm": 0.47489655017852783, "learning_rate": 1.997324351962502e-05, "loss": 0.5983, "step": 2295 }, { "epoch": 0.048694619414222394, "grad_norm": 0.3185524642467499, "learning_rate": 1.997321913436868e-05, "loss": 0.5499, "step": 2296 }, { "epoch": 0.048715827872155414, "grad_norm": 0.4912630021572113, "learning_rate": 1.997319473802021e-05, "loss": 0.4562, "step": 2297 }, { "epoch": 0.04873703633008844, "grad_norm": 0.3799733817577362, "learning_rate": 1.997317033057964e-05, "loss": 0.5299, "step": 2298 }, { "epoch": 0.04875824478802146, "grad_norm": 0.33956798911094666, "learning_rate": 1.9973145912047e-05, "loss": 0.6414, "step": 2299 }, { "epoch": 0.048779453245954486, "grad_norm": 0.34767380356788635, "learning_rate": 1.997312148242231e-05, "loss": 0.6064, "step": 2300 }, { "epoch": 0.04880066170388751, "grad_norm": 0.3282814025878906, "learning_rate": 1.9973097041705604e-05, "loss": 0.5065, "step": 2301 }, { "epoch": 0.04882187016182053, "grad_norm": 0.367078572511673, "learning_rate": 1.99730725898969e-05, "loss": 0.5529, "step": 2302 }, { "epoch": 0.04884307861975356, "grad_norm": 0.39438140392303467, "learning_rate": 1.9973048126996234e-05, "loss": 0.5191, "step": 2303 }, { "epoch": 0.04886428707768658, "grad_norm": 0.29453301429748535, "learning_rate": 1.9973023653003635e-05, "loss": 0.4744, "step": 2304 }, { "epoch": 0.048885495535619605, "grad_norm": 0.3474818766117096, "learning_rate": 1.9972999167919122e-05, "loss": 0.5259, "step": 2305 }, { "epoch": 0.04890670399355263, "grad_norm": 0.32112959027290344, "learning_rate": 1.9972974671742727e-05, "loss": 0.4847, "step": 2306 }, { "epoch": 0.04892791245148565, "grad_norm": 0.35121357440948486, "learning_rate": 1.9972950164474472e-05, "loss": 0.5026, "step": 2307 }, { "epoch": 0.04894912090941868, "grad_norm": 0.4377143979072571, "learning_rate": 1.9972925646114393e-05, "loss": 0.5438, "step": 2308 }, { "epoch": 0.0489703293673517, "grad_norm": 0.34160783886909485, "learning_rate": 1.9972901116662514e-05, "loss": 0.6414, "step": 2309 }, { "epoch": 0.048991537825284724, "grad_norm": 0.3170364499092102, "learning_rate": 1.9972876576118862e-05, "loss": 0.6348, "step": 2310 }, { "epoch": 0.049012746283217744, "grad_norm": 0.34154805541038513, "learning_rate": 1.9972852024483455e-05, "loss": 0.5528, "step": 2311 }, { "epoch": 0.04903395474115077, "grad_norm": 0.335067480802536, "learning_rate": 1.9972827461756335e-05, "loss": 0.6361, "step": 2312 }, { "epoch": 0.0490551631990838, "grad_norm": 0.34096774458885193, "learning_rate": 1.9972802887937522e-05, "loss": 0.5771, "step": 2313 }, { "epoch": 0.049076371657016817, "grad_norm": 0.3862219750881195, "learning_rate": 1.9972778303027046e-05, "loss": 0.5273, "step": 2314 }, { "epoch": 0.04909758011494984, "grad_norm": 0.31517279148101807, "learning_rate": 1.997275370702493e-05, "loss": 0.4764, "step": 2315 }, { "epoch": 0.04911878857288286, "grad_norm": 0.3323840796947479, "learning_rate": 1.9972729099931204e-05, "loss": 0.5889, "step": 2316 }, { "epoch": 0.04913999703081589, "grad_norm": 0.3730940520763397, "learning_rate": 1.9972704481745896e-05, "loss": 0.6134, "step": 2317 }, { "epoch": 0.049161205488748916, "grad_norm": 0.29834312200546265, "learning_rate": 1.9972679852469034e-05, "loss": 0.5596, "step": 2318 }, { "epoch": 0.049182413946681935, "grad_norm": 0.34565824270248413, "learning_rate": 1.997265521210064e-05, "loss": 0.5797, "step": 2319 }, { "epoch": 0.04920362240461496, "grad_norm": 0.3273184299468994, "learning_rate": 1.9972630560640748e-05, "loss": 0.5338, "step": 2320 }, { "epoch": 0.04922483086254798, "grad_norm": 0.9239223599433899, "learning_rate": 1.9972605898089386e-05, "loss": 0.5563, "step": 2321 }, { "epoch": 0.04924603932048101, "grad_norm": 0.3326113820075989, "learning_rate": 1.9972581224446574e-05, "loss": 0.5474, "step": 2322 }, { "epoch": 0.049267247778414035, "grad_norm": 0.354912132024765, "learning_rate": 1.9972556539712347e-05, "loss": 0.5125, "step": 2323 }, { "epoch": 0.049288456236347054, "grad_norm": 0.33473119139671326, "learning_rate": 1.9972531843886725e-05, "loss": 0.5595, "step": 2324 }, { "epoch": 0.04930966469428008, "grad_norm": 0.34137243032455444, "learning_rate": 1.9972507136969742e-05, "loss": 0.4626, "step": 2325 }, { "epoch": 0.0493308731522131, "grad_norm": 0.35258379578590393, "learning_rate": 1.997248241896142e-05, "loss": 0.5499, "step": 2326 }, { "epoch": 0.04935208161014613, "grad_norm": 0.3762582838535309, "learning_rate": 1.9972457689861798e-05, "loss": 0.5682, "step": 2327 }, { "epoch": 0.04937329006807915, "grad_norm": 0.4531934857368469, "learning_rate": 1.997243294967089e-05, "loss": 0.5977, "step": 2328 }, { "epoch": 0.04939449852601217, "grad_norm": 0.30115416646003723, "learning_rate": 1.9972408198388727e-05, "loss": 0.5416, "step": 2329 }, { "epoch": 0.0494157069839452, "grad_norm": 0.31677159667015076, "learning_rate": 1.9972383436015345e-05, "loss": 0.5282, "step": 2330 }, { "epoch": 0.04943691544187822, "grad_norm": 0.3510551154613495, "learning_rate": 1.9972358662550758e-05, "loss": 0.6149, "step": 2331 }, { "epoch": 0.049458123899811246, "grad_norm": 0.3253784477710724, "learning_rate": 1.9972333877995002e-05, "loss": 0.6052, "step": 2332 }, { "epoch": 0.049479332357744266, "grad_norm": 0.4310617446899414, "learning_rate": 1.9972309082348102e-05, "loss": 0.5608, "step": 2333 }, { "epoch": 0.04950054081567729, "grad_norm": 0.2967837154865265, "learning_rate": 1.997228427561009e-05, "loss": 0.4692, "step": 2334 }, { "epoch": 0.04952174927361032, "grad_norm": 0.35288503766059875, "learning_rate": 1.9972259457780986e-05, "loss": 0.5481, "step": 2335 }, { "epoch": 0.04954295773154334, "grad_norm": 0.38861897587776184, "learning_rate": 1.9972234628860826e-05, "loss": 0.4978, "step": 2336 }, { "epoch": 0.049564166189476365, "grad_norm": 0.3303236663341522, "learning_rate": 1.9972209788849627e-05, "loss": 0.5934, "step": 2337 }, { "epoch": 0.049585374647409385, "grad_norm": 0.3762779235839844, "learning_rate": 1.9972184937747426e-05, "loss": 0.5215, "step": 2338 }, { "epoch": 0.04960658310534241, "grad_norm": 0.31352391839027405, "learning_rate": 1.997216007555425e-05, "loss": 0.5426, "step": 2339 }, { "epoch": 0.04962779156327544, "grad_norm": 0.2994822859764099, "learning_rate": 1.9972135202270123e-05, "loss": 0.5294, "step": 2340 }, { "epoch": 0.04964900002120846, "grad_norm": 0.32497408986091614, "learning_rate": 1.9972110317895072e-05, "loss": 0.5006, "step": 2341 }, { "epoch": 0.049670208479141484, "grad_norm": 0.6255596876144409, "learning_rate": 1.9972085422429128e-05, "loss": 0.5791, "step": 2342 }, { "epoch": 0.0496914169370745, "grad_norm": 0.34463173151016235, "learning_rate": 1.9972060515872317e-05, "loss": 0.5692, "step": 2343 }, { "epoch": 0.04971262539500753, "grad_norm": 0.3144073784351349, "learning_rate": 1.9972035598224666e-05, "loss": 0.4671, "step": 2344 }, { "epoch": 0.04973383385294055, "grad_norm": 0.31465262174606323, "learning_rate": 1.9972010669486207e-05, "loss": 0.5097, "step": 2345 }, { "epoch": 0.049755042310873576, "grad_norm": 0.3645740747451782, "learning_rate": 1.997198572965696e-05, "loss": 0.5899, "step": 2346 }, { "epoch": 0.0497762507688066, "grad_norm": 0.34876689314842224, "learning_rate": 1.9971960778736957e-05, "loss": 0.4752, "step": 2347 }, { "epoch": 0.04979745922673962, "grad_norm": 0.4830370545387268, "learning_rate": 1.997193581672623e-05, "loss": 0.5516, "step": 2348 }, { "epoch": 0.04981866768467265, "grad_norm": 0.35628893971443176, "learning_rate": 1.9971910843624803e-05, "loss": 0.5814, "step": 2349 }, { "epoch": 0.04983987614260567, "grad_norm": 0.38644739985466003, "learning_rate": 1.99718858594327e-05, "loss": 0.5145, "step": 2350 }, { "epoch": 0.049861084600538695, "grad_norm": 0.6778594255447388, "learning_rate": 1.997186086414995e-05, "loss": 0.5632, "step": 2351 }, { "epoch": 0.04988229305847172, "grad_norm": 0.3632998764514923, "learning_rate": 1.9971835857776588e-05, "loss": 0.6193, "step": 2352 }, { "epoch": 0.04990350151640474, "grad_norm": 0.34912344813346863, "learning_rate": 1.9971810840312633e-05, "loss": 0.4883, "step": 2353 }, { "epoch": 0.04992470997433777, "grad_norm": 0.34033113718032837, "learning_rate": 1.997178581175812e-05, "loss": 0.5109, "step": 2354 }, { "epoch": 0.04994591843227079, "grad_norm": 0.36119964718818665, "learning_rate": 1.997176077211307e-05, "loss": 0.569, "step": 2355 }, { "epoch": 0.049967126890203814, "grad_norm": 0.3864743411540985, "learning_rate": 1.9971735721377517e-05, "loss": 0.5083, "step": 2356 }, { "epoch": 0.049988335348136834, "grad_norm": 0.29591673612594604, "learning_rate": 1.9971710659551486e-05, "loss": 0.5169, "step": 2357 }, { "epoch": 0.05000954380606986, "grad_norm": 0.39581096172332764, "learning_rate": 1.9971685586635005e-05, "loss": 0.5374, "step": 2358 }, { "epoch": 0.05003075226400289, "grad_norm": 0.37963858246803284, "learning_rate": 1.9971660502628102e-05, "loss": 0.4604, "step": 2359 }, { "epoch": 0.050051960721935906, "grad_norm": 0.3933435082435608, "learning_rate": 1.9971635407530803e-05, "loss": 0.6579, "step": 2360 }, { "epoch": 0.05007316917986893, "grad_norm": 0.3377869427204132, "learning_rate": 1.997161030134314e-05, "loss": 0.6575, "step": 2361 }, { "epoch": 0.05009437763780195, "grad_norm": 0.3253692388534546, "learning_rate": 1.9971585184065138e-05, "loss": 0.5454, "step": 2362 }, { "epoch": 0.05011558609573498, "grad_norm": 0.35738405585289, "learning_rate": 1.9971560055696822e-05, "loss": 0.6445, "step": 2363 }, { "epoch": 0.050136794553668006, "grad_norm": 0.3148169219493866, "learning_rate": 1.9971534916238226e-05, "loss": 0.5589, "step": 2364 }, { "epoch": 0.050158003011601025, "grad_norm": 0.3395877182483673, "learning_rate": 1.997150976568938e-05, "loss": 0.5419, "step": 2365 }, { "epoch": 0.05017921146953405, "grad_norm": 0.39494189620018005, "learning_rate": 1.99714846040503e-05, "loss": 0.5234, "step": 2366 }, { "epoch": 0.05020041992746707, "grad_norm": 0.3552054464817047, "learning_rate": 1.9971459431321026e-05, "loss": 0.6122, "step": 2367 }, { "epoch": 0.0502216283854001, "grad_norm": 0.3796484172344208, "learning_rate": 1.997143424750158e-05, "loss": 0.5499, "step": 2368 }, { "epoch": 0.050242836843333125, "grad_norm": 0.3952259421348572, "learning_rate": 1.9971409052591992e-05, "loss": 0.5219, "step": 2369 }, { "epoch": 0.050264045301266144, "grad_norm": 0.29659631848335266, "learning_rate": 1.9971383846592287e-05, "loss": 0.4255, "step": 2370 }, { "epoch": 0.05028525375919917, "grad_norm": 0.340384840965271, "learning_rate": 1.99713586295025e-05, "loss": 0.5088, "step": 2371 }, { "epoch": 0.05030646221713219, "grad_norm": 0.3353286385536194, "learning_rate": 1.9971333401322652e-05, "loss": 0.5489, "step": 2372 }, { "epoch": 0.05032767067506522, "grad_norm": 0.3436596691608429, "learning_rate": 1.9971308162052773e-05, "loss": 0.5721, "step": 2373 }, { "epoch": 0.05034887913299824, "grad_norm": 0.33076727390289307, "learning_rate": 1.997128291169289e-05, "loss": 0.5846, "step": 2374 }, { "epoch": 0.05037008759093126, "grad_norm": 0.3808562755584717, "learning_rate": 1.997125765024304e-05, "loss": 0.5332, "step": 2375 }, { "epoch": 0.05039129604886429, "grad_norm": 0.3159383237361908, "learning_rate": 1.9971232377703237e-05, "loss": 0.516, "step": 2376 }, { "epoch": 0.05041250450679731, "grad_norm": 0.3654981553554535, "learning_rate": 1.997120709407352e-05, "loss": 0.5836, "step": 2377 }, { "epoch": 0.050433712964730336, "grad_norm": 0.3706683814525604, "learning_rate": 1.997118179935391e-05, "loss": 0.5982, "step": 2378 }, { "epoch": 0.050454921422663356, "grad_norm": 0.33294686675071716, "learning_rate": 1.997115649354444e-05, "loss": 0.4756, "step": 2379 }, { "epoch": 0.05047612988059638, "grad_norm": 0.37703585624694824, "learning_rate": 1.9971131176645134e-05, "loss": 0.5892, "step": 2380 }, { "epoch": 0.05049733833852941, "grad_norm": 0.4330299496650696, "learning_rate": 1.9971105848656025e-05, "loss": 0.4625, "step": 2381 }, { "epoch": 0.05051854679646243, "grad_norm": 0.34628668427467346, "learning_rate": 1.997108050957714e-05, "loss": 0.5049, "step": 2382 }, { "epoch": 0.050539755254395455, "grad_norm": 0.3664391040802002, "learning_rate": 1.9971055159408503e-05, "loss": 0.6199, "step": 2383 }, { "epoch": 0.050560963712328474, "grad_norm": 0.3195611238479614, "learning_rate": 1.9971029798150146e-05, "loss": 0.552, "step": 2384 }, { "epoch": 0.0505821721702615, "grad_norm": 0.33959391713142395, "learning_rate": 1.9971004425802097e-05, "loss": 0.6546, "step": 2385 }, { "epoch": 0.05060338062819452, "grad_norm": 0.30860885977745056, "learning_rate": 1.997097904236438e-05, "loss": 0.5802, "step": 2386 }, { "epoch": 0.05062458908612755, "grad_norm": 0.30886030197143555, "learning_rate": 1.9970953647837033e-05, "loss": 0.5012, "step": 2387 }, { "epoch": 0.050645797544060574, "grad_norm": 0.3692067861557007, "learning_rate": 1.9970928242220075e-05, "loss": 0.5897, "step": 2388 }, { "epoch": 0.05066700600199359, "grad_norm": 0.3160094916820526, "learning_rate": 1.9970902825513538e-05, "loss": 0.5611, "step": 2389 }, { "epoch": 0.05068821445992662, "grad_norm": 0.3183479309082031, "learning_rate": 1.9970877397717445e-05, "loss": 0.5184, "step": 2390 }, { "epoch": 0.05070942291785964, "grad_norm": 0.4048576056957245, "learning_rate": 1.9970851958831835e-05, "loss": 0.5599, "step": 2391 }, { "epoch": 0.050730631375792666, "grad_norm": 0.36299896240234375, "learning_rate": 1.9970826508856723e-05, "loss": 0.6023, "step": 2392 }, { "epoch": 0.05075183983372569, "grad_norm": 0.3336614668369293, "learning_rate": 1.997080104779215e-05, "loss": 0.4943, "step": 2393 }, { "epoch": 0.05077304829165871, "grad_norm": 0.4711558520793915, "learning_rate": 1.997077557563814e-05, "loss": 0.5558, "step": 2394 }, { "epoch": 0.05079425674959174, "grad_norm": 0.43070098757743835, "learning_rate": 1.9970750092394715e-05, "loss": 0.5399, "step": 2395 }, { "epoch": 0.05081546520752476, "grad_norm": 0.32846322655677795, "learning_rate": 1.997072459806191e-05, "loss": 0.5952, "step": 2396 }, { "epoch": 0.050836673665457785, "grad_norm": 0.3717276453971863, "learning_rate": 1.9970699092639752e-05, "loss": 0.5317, "step": 2397 }, { "epoch": 0.05085788212339081, "grad_norm": 0.3413659334182739, "learning_rate": 1.997067357612827e-05, "loss": 0.5696, "step": 2398 }, { "epoch": 0.05087909058132383, "grad_norm": 0.31546127796173096, "learning_rate": 1.997064804852749e-05, "loss": 0.5439, "step": 2399 }, { "epoch": 0.05090029903925686, "grad_norm": 0.3207571804523468, "learning_rate": 1.9970622509837444e-05, "loss": 0.4853, "step": 2400 }, { "epoch": 0.05092150749718988, "grad_norm": 0.35447755455970764, "learning_rate": 1.9970596960058155e-05, "loss": 0.585, "step": 2401 }, { "epoch": 0.050942715955122904, "grad_norm": 0.3720360994338989, "learning_rate": 1.997057139918966e-05, "loss": 0.5406, "step": 2402 }, { "epoch": 0.050963924413055924, "grad_norm": 0.36287593841552734, "learning_rate": 1.9970545827231978e-05, "loss": 0.592, "step": 2403 }, { "epoch": 0.05098513287098895, "grad_norm": 0.3103538155555725, "learning_rate": 1.997052024418514e-05, "loss": 0.5163, "step": 2404 }, { "epoch": 0.05100634132892198, "grad_norm": 0.3240635395050049, "learning_rate": 1.997049465004918e-05, "loss": 0.5327, "step": 2405 }, { "epoch": 0.051027549786854996, "grad_norm": 0.3124209940433502, "learning_rate": 1.997046904482412e-05, "loss": 0.5929, "step": 2406 }, { "epoch": 0.05104875824478802, "grad_norm": 0.4325157701969147, "learning_rate": 1.9970443428509992e-05, "loss": 0.5367, "step": 2407 }, { "epoch": 0.05106996670272104, "grad_norm": 0.320663183927536, "learning_rate": 1.9970417801106822e-05, "loss": 0.6106, "step": 2408 }, { "epoch": 0.05109117516065407, "grad_norm": 0.31604427099227905, "learning_rate": 1.997039216261464e-05, "loss": 0.6148, "step": 2409 }, { "epoch": 0.051112383618587096, "grad_norm": 0.3754670023918152, "learning_rate": 1.997036651303348e-05, "loss": 0.5275, "step": 2410 }, { "epoch": 0.051133592076520115, "grad_norm": 0.37586814165115356, "learning_rate": 1.9970340852363356e-05, "loss": 0.561, "step": 2411 }, { "epoch": 0.05115480053445314, "grad_norm": 0.31601640582084656, "learning_rate": 1.9970315180604313e-05, "loss": 0.4577, "step": 2412 }, { "epoch": 0.05117600899238616, "grad_norm": 0.3850913643836975, "learning_rate": 1.9970289497756368e-05, "loss": 0.5297, "step": 2413 }, { "epoch": 0.05119721745031919, "grad_norm": 0.34286776185035706, "learning_rate": 1.9970263803819555e-05, "loss": 0.5646, "step": 2414 }, { "epoch": 0.05121842590825221, "grad_norm": 0.7840570211410522, "learning_rate": 1.99702380987939e-05, "loss": 0.5376, "step": 2415 }, { "epoch": 0.051239634366185234, "grad_norm": 0.3606361150741577, "learning_rate": 1.9970212382679433e-05, "loss": 0.6722, "step": 2416 }, { "epoch": 0.05126084282411826, "grad_norm": 0.3911840617656708, "learning_rate": 1.9970186655476184e-05, "loss": 0.555, "step": 2417 }, { "epoch": 0.05128205128205128, "grad_norm": 0.3562273681163788, "learning_rate": 1.9970160917184177e-05, "loss": 0.5666, "step": 2418 }, { "epoch": 0.05130325973998431, "grad_norm": 0.33651503920555115, "learning_rate": 1.9970135167803447e-05, "loss": 0.5768, "step": 2419 }, { "epoch": 0.051324468197917326, "grad_norm": 0.3368266522884369, "learning_rate": 1.997010940733402e-05, "loss": 0.5046, "step": 2420 }, { "epoch": 0.05134567665585035, "grad_norm": 0.31542909145355225, "learning_rate": 1.9970083635775924e-05, "loss": 0.5024, "step": 2421 }, { "epoch": 0.05136688511378338, "grad_norm": 0.3360316753387451, "learning_rate": 1.9970057853129182e-05, "loss": 0.5048, "step": 2422 }, { "epoch": 0.0513880935717164, "grad_norm": 0.3240255117416382, "learning_rate": 1.9970032059393833e-05, "loss": 0.5191, "step": 2423 }, { "epoch": 0.051409302029649426, "grad_norm": 0.32670503854751587, "learning_rate": 1.99700062545699e-05, "loss": 0.5595, "step": 2424 }, { "epoch": 0.051430510487582445, "grad_norm": 0.4107622802257538, "learning_rate": 1.9969980438657413e-05, "loss": 0.6157, "step": 2425 }, { "epoch": 0.05145171894551547, "grad_norm": 0.341012567281723, "learning_rate": 1.99699546116564e-05, "loss": 0.5533, "step": 2426 }, { "epoch": 0.0514729274034485, "grad_norm": 1.2641583681106567, "learning_rate": 1.996992877356689e-05, "loss": 0.5844, "step": 2427 }, { "epoch": 0.05149413586138152, "grad_norm": 0.3621880114078522, "learning_rate": 1.996990292438891e-05, "loss": 0.5638, "step": 2428 }, { "epoch": 0.051515344319314545, "grad_norm": 0.33572861552238464, "learning_rate": 1.9969877064122492e-05, "loss": 0.5413, "step": 2429 }, { "epoch": 0.051536552777247564, "grad_norm": 0.35644346475601196, "learning_rate": 1.9969851192767663e-05, "loss": 0.5368, "step": 2430 }, { "epoch": 0.05155776123518059, "grad_norm": 0.3263204097747803, "learning_rate": 1.9969825310324454e-05, "loss": 0.4938, "step": 2431 }, { "epoch": 0.05157896969311361, "grad_norm": 0.3737124800682068, "learning_rate": 1.996979941679289e-05, "loss": 0.608, "step": 2432 }, { "epoch": 0.05160017815104664, "grad_norm": 0.322641521692276, "learning_rate": 1.9969773512173003e-05, "loss": 0.5377, "step": 2433 }, { "epoch": 0.051621386608979664, "grad_norm": 0.3271580636501312, "learning_rate": 1.996974759646482e-05, "loss": 0.5572, "step": 2434 }, { "epoch": 0.05164259506691268, "grad_norm": 0.33497318625450134, "learning_rate": 1.996972166966837e-05, "loss": 0.6288, "step": 2435 }, { "epoch": 0.05166380352484571, "grad_norm": 0.37970346212387085, "learning_rate": 1.9969695731783682e-05, "loss": 0.5856, "step": 2436 }, { "epoch": 0.05168501198277873, "grad_norm": 0.3885490298271179, "learning_rate": 1.9969669782810785e-05, "loss": 0.6202, "step": 2437 }, { "epoch": 0.051706220440711756, "grad_norm": 0.325967937707901, "learning_rate": 1.996964382274971e-05, "loss": 0.5, "step": 2438 }, { "epoch": 0.05172742889864478, "grad_norm": 0.3320331573486328, "learning_rate": 1.996961785160048e-05, "loss": 0.5929, "step": 2439 }, { "epoch": 0.0517486373565778, "grad_norm": 0.3555184602737427, "learning_rate": 1.9969591869363133e-05, "loss": 0.5852, "step": 2440 }, { "epoch": 0.05176984581451083, "grad_norm": 0.32807210087776184, "learning_rate": 1.9969565876037687e-05, "loss": 0.5552, "step": 2441 }, { "epoch": 0.05179105427244385, "grad_norm": 0.6464670300483704, "learning_rate": 1.9969539871624176e-05, "loss": 0.5334, "step": 2442 }, { "epoch": 0.051812262730376875, "grad_norm": 0.31234923005104065, "learning_rate": 1.9969513856122632e-05, "loss": 0.4357, "step": 2443 }, { "epoch": 0.0518334711883099, "grad_norm": 0.3240095376968384, "learning_rate": 1.9969487829533084e-05, "loss": 0.5425, "step": 2444 }, { "epoch": 0.05185467964624292, "grad_norm": 0.3343755900859833, "learning_rate": 1.9969461791855557e-05, "loss": 0.6072, "step": 2445 }, { "epoch": 0.05187588810417595, "grad_norm": 0.3050932288169861, "learning_rate": 1.9969435743090076e-05, "loss": 0.4814, "step": 2446 }, { "epoch": 0.05189709656210897, "grad_norm": 1.6532410383224487, "learning_rate": 1.996940968323668e-05, "loss": 0.5193, "step": 2447 }, { "epoch": 0.051918305020041994, "grad_norm": 0.3326416611671448, "learning_rate": 1.9969383612295393e-05, "loss": 0.5458, "step": 2448 }, { "epoch": 0.05193951347797501, "grad_norm": 0.3183394968509674, "learning_rate": 1.996935753026624e-05, "loss": 0.531, "step": 2449 }, { "epoch": 0.05196072193590804, "grad_norm": 0.33080631494522095, "learning_rate": 1.996933143714926e-05, "loss": 0.513, "step": 2450 }, { "epoch": 0.051981930393841067, "grad_norm": 0.32910463213920593, "learning_rate": 1.9969305332944473e-05, "loss": 0.536, "step": 2451 }, { "epoch": 0.052003138851774086, "grad_norm": 0.3591657876968384, "learning_rate": 1.9969279217651914e-05, "loss": 0.6969, "step": 2452 }, { "epoch": 0.05202434730970711, "grad_norm": 0.6508049368858337, "learning_rate": 1.9969253091271605e-05, "loss": 0.5673, "step": 2453 }, { "epoch": 0.05204555576764013, "grad_norm": 0.3381224572658539, "learning_rate": 1.996922695380358e-05, "loss": 0.6276, "step": 2454 }, { "epoch": 0.05206676422557316, "grad_norm": 0.3551100790500641, "learning_rate": 1.9969200805247872e-05, "loss": 0.5784, "step": 2455 }, { "epoch": 0.052087972683506185, "grad_norm": 0.3326418101787567, "learning_rate": 1.99691746456045e-05, "loss": 0.5043, "step": 2456 }, { "epoch": 0.052109181141439205, "grad_norm": 0.317680299282074, "learning_rate": 1.99691484748735e-05, "loss": 0.6156, "step": 2457 }, { "epoch": 0.05213038959937223, "grad_norm": 0.35307642817497253, "learning_rate": 1.9969122293054902e-05, "loss": 0.51, "step": 2458 }, { "epoch": 0.05215159805730525, "grad_norm": 0.3170062303543091, "learning_rate": 1.996909610014873e-05, "loss": 0.5288, "step": 2459 }, { "epoch": 0.05217280651523828, "grad_norm": 0.7601625919342041, "learning_rate": 1.9969069896155017e-05, "loss": 0.5319, "step": 2460 }, { "epoch": 0.0521940149731713, "grad_norm": 0.3282287120819092, "learning_rate": 1.996904368107379e-05, "loss": 0.5048, "step": 2461 }, { "epoch": 0.052215223431104324, "grad_norm": 0.37206870317459106, "learning_rate": 1.9969017454905083e-05, "loss": 0.5498, "step": 2462 }, { "epoch": 0.05223643188903735, "grad_norm": 0.3571438193321228, "learning_rate": 1.9968991217648917e-05, "loss": 0.6085, "step": 2463 }, { "epoch": 0.05225764034697037, "grad_norm": 0.3346205949783325, "learning_rate": 1.9968964969305327e-05, "loss": 0.5772, "step": 2464 }, { "epoch": 0.0522788488049034, "grad_norm": 0.33706071972846985, "learning_rate": 1.9968938709874344e-05, "loss": 0.5337, "step": 2465 }, { "epoch": 0.052300057262836416, "grad_norm": 0.31627652049064636, "learning_rate": 1.996891243935599e-05, "loss": 0.55, "step": 2466 }, { "epoch": 0.05232126572076944, "grad_norm": 0.29724225401878357, "learning_rate": 1.99688861577503e-05, "loss": 0.5089, "step": 2467 }, { "epoch": 0.05234247417870247, "grad_norm": 0.33577510714530945, "learning_rate": 1.99688598650573e-05, "loss": 0.5741, "step": 2468 }, { "epoch": 0.05236368263663549, "grad_norm": 0.360937237739563, "learning_rate": 1.9968833561277022e-05, "loss": 0.5578, "step": 2469 }, { "epoch": 0.052384891094568516, "grad_norm": 0.3748127222061157, "learning_rate": 1.996880724640949e-05, "loss": 0.4978, "step": 2470 }, { "epoch": 0.052406099552501535, "grad_norm": 0.3423115313053131, "learning_rate": 1.9968780920454742e-05, "loss": 0.5144, "step": 2471 }, { "epoch": 0.05242730801043456, "grad_norm": 0.29975125193595886, "learning_rate": 1.9968754583412803e-05, "loss": 0.5094, "step": 2472 }, { "epoch": 0.05244851646836759, "grad_norm": 0.3487979769706726, "learning_rate": 1.99687282352837e-05, "loss": 0.454, "step": 2473 }, { "epoch": 0.05246972492630061, "grad_norm": 0.31005972623825073, "learning_rate": 1.996870187606746e-05, "loss": 0.4984, "step": 2474 }, { "epoch": 0.052490933384233635, "grad_norm": 0.33487600088119507, "learning_rate": 1.996867550576412e-05, "loss": 0.5749, "step": 2475 }, { "epoch": 0.052512141842166654, "grad_norm": 0.32238271832466125, "learning_rate": 1.9968649124373708e-05, "loss": 0.539, "step": 2476 }, { "epoch": 0.05253335030009968, "grad_norm": 0.34503284096717834, "learning_rate": 1.9968622731896247e-05, "loss": 0.4996, "step": 2477 }, { "epoch": 0.0525545587580327, "grad_norm": 0.31348904967308044, "learning_rate": 1.9968596328331773e-05, "loss": 0.5331, "step": 2478 }, { "epoch": 0.05257576721596573, "grad_norm": 0.37065181136131287, "learning_rate": 1.996856991368031e-05, "loss": 0.5399, "step": 2479 }, { "epoch": 0.05259697567389875, "grad_norm": 0.3353996276855469, "learning_rate": 1.9968543487941892e-05, "loss": 0.5403, "step": 2480 }, { "epoch": 0.05261818413183177, "grad_norm": 0.37834686040878296, "learning_rate": 1.9968517051116548e-05, "loss": 0.4784, "step": 2481 }, { "epoch": 0.0526393925897648, "grad_norm": 0.337542325258255, "learning_rate": 1.9968490603204308e-05, "loss": 0.5179, "step": 2482 }, { "epoch": 0.05266060104769782, "grad_norm": 0.35758450627326965, "learning_rate": 1.9968464144205193e-05, "loss": 0.5083, "step": 2483 }, { "epoch": 0.052681809505630846, "grad_norm": 0.3281085789203644, "learning_rate": 1.996843767411924e-05, "loss": 0.5313, "step": 2484 }, { "epoch": 0.05270301796356387, "grad_norm": 0.36070337891578674, "learning_rate": 1.996841119294648e-05, "loss": 0.5604, "step": 2485 }, { "epoch": 0.05272422642149689, "grad_norm": 0.34160032868385315, "learning_rate": 1.9968384700686938e-05, "loss": 0.6134, "step": 2486 }, { "epoch": 0.05274543487942992, "grad_norm": 0.3377569019794464, "learning_rate": 1.9968358197340647e-05, "loss": 0.5153, "step": 2487 }, { "epoch": 0.05276664333736294, "grad_norm": 0.3396364450454712, "learning_rate": 1.9968331682907633e-05, "loss": 0.5803, "step": 2488 }, { "epoch": 0.052787851795295965, "grad_norm": 0.3363138437271118, "learning_rate": 1.9968305157387927e-05, "loss": 0.4663, "step": 2489 }, { "epoch": 0.052809060253228984, "grad_norm": 0.3270035684108734, "learning_rate": 1.996827862078156e-05, "loss": 0.5833, "step": 2490 }, { "epoch": 0.05283026871116201, "grad_norm": 0.35322245955467224, "learning_rate": 1.9968252073088557e-05, "loss": 0.4935, "step": 2491 }, { "epoch": 0.05285147716909504, "grad_norm": 0.31137192249298096, "learning_rate": 1.9968225514308954e-05, "loss": 0.5141, "step": 2492 }, { "epoch": 0.05287268562702806, "grad_norm": 0.30796611309051514, "learning_rate": 1.9968198944442777e-05, "loss": 0.5143, "step": 2493 }, { "epoch": 0.052893894084961084, "grad_norm": 0.3423152565956116, "learning_rate": 1.9968172363490054e-05, "loss": 0.5636, "step": 2494 }, { "epoch": 0.0529151025428941, "grad_norm": 0.6089782118797302, "learning_rate": 1.9968145771450818e-05, "loss": 0.5169, "step": 2495 }, { "epoch": 0.05293631100082713, "grad_norm": 0.2971324026584625, "learning_rate": 1.9968119168325096e-05, "loss": 0.4474, "step": 2496 }, { "epoch": 0.052957519458760156, "grad_norm": 0.31315380334854126, "learning_rate": 1.9968092554112916e-05, "loss": 0.5206, "step": 2497 }, { "epoch": 0.052978727916693176, "grad_norm": 0.3639402389526367, "learning_rate": 1.9968065928814314e-05, "loss": 0.588, "step": 2498 }, { "epoch": 0.0529999363746262, "grad_norm": 0.3383685350418091, "learning_rate": 1.9968039292429312e-05, "loss": 0.5822, "step": 2499 }, { "epoch": 0.05302114483255922, "grad_norm": 0.35696613788604736, "learning_rate": 1.996801264495795e-05, "loss": 0.5709, "step": 2500 }, { "epoch": 0.05304235329049225, "grad_norm": 0.3297877609729767, "learning_rate": 1.9967985986400243e-05, "loss": 0.6067, "step": 2501 }, { "epoch": 0.053063561748425275, "grad_norm": 0.30203327536582947, "learning_rate": 1.996795931675623e-05, "loss": 0.547, "step": 2502 }, { "epoch": 0.053084770206358295, "grad_norm": 0.3352437913417816, "learning_rate": 1.9967932636025944e-05, "loss": 0.6025, "step": 2503 }, { "epoch": 0.05310597866429132, "grad_norm": 0.3773057162761688, "learning_rate": 1.9967905944209405e-05, "loss": 0.5959, "step": 2504 }, { "epoch": 0.05312718712222434, "grad_norm": 0.4244353473186493, "learning_rate": 1.996787924130665e-05, "loss": 0.5742, "step": 2505 }, { "epoch": 0.05314839558015737, "grad_norm": 0.32183128595352173, "learning_rate": 1.9967852527317706e-05, "loss": 0.5259, "step": 2506 }, { "epoch": 0.05316960403809039, "grad_norm": 0.3105335533618927, "learning_rate": 1.9967825802242604e-05, "loss": 0.5354, "step": 2507 }, { "epoch": 0.053190812496023414, "grad_norm": 0.3091784715652466, "learning_rate": 1.996779906608137e-05, "loss": 0.5419, "step": 2508 }, { "epoch": 0.05321202095395644, "grad_norm": 0.350390762090683, "learning_rate": 1.996777231883404e-05, "loss": 0.5742, "step": 2509 }, { "epoch": 0.05323322941188946, "grad_norm": 0.3468160033226013, "learning_rate": 1.996774556050064e-05, "loss": 0.5776, "step": 2510 }, { "epoch": 0.05325443786982249, "grad_norm": 0.339805006980896, "learning_rate": 1.9967718791081194e-05, "loss": 0.5673, "step": 2511 }, { "epoch": 0.053275646327755506, "grad_norm": 0.37355750799179077, "learning_rate": 1.9967692010575742e-05, "loss": 0.646, "step": 2512 }, { "epoch": 0.05329685478568853, "grad_norm": 0.3406389653682709, "learning_rate": 1.9967665218984308e-05, "loss": 0.596, "step": 2513 }, { "epoch": 0.05331806324362156, "grad_norm": 0.32335975766181946, "learning_rate": 1.9967638416306928e-05, "loss": 0.4848, "step": 2514 }, { "epoch": 0.05333927170155458, "grad_norm": 0.339670866727829, "learning_rate": 1.996761160254362e-05, "loss": 0.5843, "step": 2515 }, { "epoch": 0.053360480159487605, "grad_norm": 0.3592018485069275, "learning_rate": 1.9967584777694428e-05, "loss": 0.6202, "step": 2516 }, { "epoch": 0.053381688617420625, "grad_norm": 0.35789260268211365, "learning_rate": 1.9967557941759373e-05, "loss": 0.5565, "step": 2517 }, { "epoch": 0.05340289707535365, "grad_norm": 0.341484010219574, "learning_rate": 1.9967531094738483e-05, "loss": 0.5043, "step": 2518 }, { "epoch": 0.05342410553328667, "grad_norm": 0.36102738976478577, "learning_rate": 1.9967504236631795e-05, "loss": 0.5698, "step": 2519 }, { "epoch": 0.0534453139912197, "grad_norm": 0.31756314635276794, "learning_rate": 1.9967477367439334e-05, "loss": 0.5444, "step": 2520 }, { "epoch": 0.053466522449152724, "grad_norm": 0.40494322776794434, "learning_rate": 1.996745048716113e-05, "loss": 0.5494, "step": 2521 }, { "epoch": 0.053487730907085744, "grad_norm": 0.31818467378616333, "learning_rate": 1.9967423595797216e-05, "loss": 0.474, "step": 2522 }, { "epoch": 0.05350893936501877, "grad_norm": 0.31477245688438416, "learning_rate": 1.996739669334762e-05, "loss": 0.5321, "step": 2523 }, { "epoch": 0.05353014782295179, "grad_norm": 0.3820747435092926, "learning_rate": 1.9967369779812372e-05, "loss": 0.5194, "step": 2524 }, { "epoch": 0.05355135628088482, "grad_norm": 0.3180956244468689, "learning_rate": 1.9967342855191504e-05, "loss": 0.5268, "step": 2525 }, { "epoch": 0.05357256473881784, "grad_norm": 0.3190140128135681, "learning_rate": 1.996731591948504e-05, "loss": 0.459, "step": 2526 }, { "epoch": 0.05359377319675086, "grad_norm": 0.30020973086357117, "learning_rate": 1.9967288972693017e-05, "loss": 0.5486, "step": 2527 }, { "epoch": 0.05361498165468389, "grad_norm": 0.3266112208366394, "learning_rate": 1.996726201481546e-05, "loss": 0.6084, "step": 2528 }, { "epoch": 0.05363619011261691, "grad_norm": 0.3332166373729706, "learning_rate": 1.99672350458524e-05, "loss": 0.6159, "step": 2529 }, { "epoch": 0.053657398570549936, "grad_norm": 0.31377485394477844, "learning_rate": 1.9967208065803868e-05, "loss": 0.4978, "step": 2530 }, { "epoch": 0.05367860702848296, "grad_norm": 0.30076348781585693, "learning_rate": 1.9967181074669897e-05, "loss": 0.5331, "step": 2531 }, { "epoch": 0.05369981548641598, "grad_norm": 0.3314976990222931, "learning_rate": 1.996715407245051e-05, "loss": 0.5494, "step": 2532 }, { "epoch": 0.05372102394434901, "grad_norm": 0.344335675239563, "learning_rate": 1.996712705914574e-05, "loss": 0.5602, "step": 2533 }, { "epoch": 0.05374223240228203, "grad_norm": 0.3185831606388092, "learning_rate": 1.9967100034755623e-05, "loss": 0.5514, "step": 2534 }, { "epoch": 0.053763440860215055, "grad_norm": 0.32630565762519836, "learning_rate": 1.9967072999280178e-05, "loss": 0.5615, "step": 2535 }, { "epoch": 0.053784649318148074, "grad_norm": 0.3436538875102997, "learning_rate": 1.9967045952719447e-05, "loss": 0.557, "step": 2536 }, { "epoch": 0.0538058577760811, "grad_norm": 0.315807044506073, "learning_rate": 1.996701889507345e-05, "loss": 0.5286, "step": 2537 }, { "epoch": 0.05382706623401413, "grad_norm": 0.3249390721321106, "learning_rate": 1.9966991826342222e-05, "loss": 0.6012, "step": 2538 }, { "epoch": 0.05384827469194715, "grad_norm": 0.38503241539001465, "learning_rate": 1.9966964746525792e-05, "loss": 0.5759, "step": 2539 }, { "epoch": 0.053869483149880174, "grad_norm": 0.32386553287506104, "learning_rate": 1.996693765562419e-05, "loss": 0.5282, "step": 2540 }, { "epoch": 0.05389069160781319, "grad_norm": 0.3418847620487213, "learning_rate": 1.996691055363745e-05, "loss": 0.5922, "step": 2541 }, { "epoch": 0.05391190006574622, "grad_norm": 0.3032986521720886, "learning_rate": 1.9966883440565594e-05, "loss": 0.5103, "step": 2542 }, { "epoch": 0.053933108523679246, "grad_norm": 0.33476367592811584, "learning_rate": 1.996685631640866e-05, "loss": 0.5838, "step": 2543 }, { "epoch": 0.053954316981612266, "grad_norm": 0.2797754406929016, "learning_rate": 1.9966829181166672e-05, "loss": 0.478, "step": 2544 }, { "epoch": 0.05397552543954529, "grad_norm": 0.34042423963546753, "learning_rate": 1.9966802034839667e-05, "loss": 0.554, "step": 2545 }, { "epoch": 0.05399673389747831, "grad_norm": 0.32076945900917053, "learning_rate": 1.996677487742767e-05, "loss": 0.4859, "step": 2546 }, { "epoch": 0.05401794235541134, "grad_norm": 0.3274913728237152, "learning_rate": 1.996674770893071e-05, "loss": 0.5128, "step": 2547 }, { "epoch": 0.054039150813344365, "grad_norm": 0.3338906764984131, "learning_rate": 1.9966720529348823e-05, "loss": 0.5817, "step": 2548 }, { "epoch": 0.054060359271277385, "grad_norm": 0.3471888303756714, "learning_rate": 1.9966693338682032e-05, "loss": 0.5656, "step": 2549 }, { "epoch": 0.05408156772921041, "grad_norm": 0.651026725769043, "learning_rate": 1.9966666136930374e-05, "loss": 0.5982, "step": 2550 }, { "epoch": 0.05410277618714343, "grad_norm": 0.36260858178138733, "learning_rate": 1.9966638924093877e-05, "loss": 0.6208, "step": 2551 }, { "epoch": 0.05412398464507646, "grad_norm": 0.3347008228302002, "learning_rate": 1.9966611700172572e-05, "loss": 0.4881, "step": 2552 }, { "epoch": 0.05414519310300948, "grad_norm": 0.4407888948917389, "learning_rate": 1.9966584465166485e-05, "loss": 0.5989, "step": 2553 }, { "epoch": 0.054166401560942504, "grad_norm": 0.33136799931526184, "learning_rate": 1.996655721907565e-05, "loss": 0.5136, "step": 2554 }, { "epoch": 0.05418761001887553, "grad_norm": 0.3264228403568268, "learning_rate": 1.9966529961900097e-05, "loss": 0.6223, "step": 2555 }, { "epoch": 0.05420881847680855, "grad_norm": 0.3567871153354645, "learning_rate": 1.9966502693639856e-05, "loss": 0.5611, "step": 2556 }, { "epoch": 0.054230026934741576, "grad_norm": 0.33400601148605347, "learning_rate": 1.996647541429496e-05, "loss": 0.5026, "step": 2557 }, { "epoch": 0.054251235392674596, "grad_norm": 0.3737683892250061, "learning_rate": 1.9966448123865433e-05, "loss": 0.6316, "step": 2558 }, { "epoch": 0.05427244385060762, "grad_norm": 0.41542840003967285, "learning_rate": 1.996642082235131e-05, "loss": 0.5466, "step": 2559 }, { "epoch": 0.05429365230854065, "grad_norm": 0.31895604729652405, "learning_rate": 1.996639350975262e-05, "loss": 0.5042, "step": 2560 }, { "epoch": 0.05431486076647367, "grad_norm": 0.3138403594493866, "learning_rate": 1.9966366186069394e-05, "loss": 0.4769, "step": 2561 }, { "epoch": 0.054336069224406695, "grad_norm": 0.33452555537223816, "learning_rate": 1.9966338851301664e-05, "loss": 0.5659, "step": 2562 }, { "epoch": 0.054357277682339715, "grad_norm": 0.683912992477417, "learning_rate": 1.9966311505449455e-05, "loss": 0.5635, "step": 2563 }, { "epoch": 0.05437848614027274, "grad_norm": 0.31578490138053894, "learning_rate": 1.9966284148512802e-05, "loss": 0.5099, "step": 2564 }, { "epoch": 0.05439969459820576, "grad_norm": 0.32925528287887573, "learning_rate": 1.9966256780491738e-05, "loss": 0.5262, "step": 2565 }, { "epoch": 0.05442090305613879, "grad_norm": 0.3718441128730774, "learning_rate": 1.9966229401386282e-05, "loss": 0.5492, "step": 2566 }, { "epoch": 0.054442111514071814, "grad_norm": 0.33350813388824463, "learning_rate": 1.996620201119648e-05, "loss": 0.6177, "step": 2567 }, { "epoch": 0.054463319972004834, "grad_norm": 0.32484567165374756, "learning_rate": 1.996617460992235e-05, "loss": 0.5672, "step": 2568 }, { "epoch": 0.05448452842993786, "grad_norm": 0.45349523425102234, "learning_rate": 1.996614719756393e-05, "loss": 0.596, "step": 2569 }, { "epoch": 0.05450573688787088, "grad_norm": 0.3420409858226776, "learning_rate": 1.9966119774121246e-05, "loss": 0.5383, "step": 2570 }, { "epoch": 0.05452694534580391, "grad_norm": 0.3539605736732483, "learning_rate": 1.9966092339594333e-05, "loss": 0.5838, "step": 2571 }, { "epoch": 0.05454815380373693, "grad_norm": 0.3792569637298584, "learning_rate": 1.9966064893983214e-05, "loss": 0.463, "step": 2572 }, { "epoch": 0.05456936226166995, "grad_norm": 0.3326251208782196, "learning_rate": 1.9966037437287928e-05, "loss": 0.5516, "step": 2573 }, { "epoch": 0.05459057071960298, "grad_norm": 0.33772557973861694, "learning_rate": 1.99660099695085e-05, "loss": 0.554, "step": 2574 }, { "epoch": 0.054611779177536, "grad_norm": 0.33549267053604126, "learning_rate": 1.9965982490644965e-05, "loss": 0.6258, "step": 2575 }, { "epoch": 0.054632987635469026, "grad_norm": 0.3054536283016205, "learning_rate": 1.9965955000697347e-05, "loss": 0.5291, "step": 2576 }, { "epoch": 0.05465419609340205, "grad_norm": 0.3710511326789856, "learning_rate": 1.996592749966568e-05, "loss": 0.5208, "step": 2577 }, { "epoch": 0.05467540455133507, "grad_norm": 0.314704567193985, "learning_rate": 1.996589998755e-05, "loss": 0.6184, "step": 2578 }, { "epoch": 0.0546966130092681, "grad_norm": 0.3328072726726532, "learning_rate": 1.9965872464350328e-05, "loss": 0.5975, "step": 2579 }, { "epoch": 0.05471782146720112, "grad_norm": 0.31669116020202637, "learning_rate": 1.99658449300667e-05, "loss": 0.5118, "step": 2580 }, { "epoch": 0.054739029925134144, "grad_norm": 0.3495599627494812, "learning_rate": 1.9965817384699147e-05, "loss": 0.5267, "step": 2581 }, { "epoch": 0.054760238383067164, "grad_norm": 0.33905431628227234, "learning_rate": 1.9965789828247696e-05, "loss": 0.6582, "step": 2582 }, { "epoch": 0.05478144684100019, "grad_norm": 0.3067270815372467, "learning_rate": 1.9965762260712385e-05, "loss": 0.6349, "step": 2583 }, { "epoch": 0.05480265529893322, "grad_norm": 0.28430992364883423, "learning_rate": 1.9965734682093238e-05, "loss": 0.5362, "step": 2584 }, { "epoch": 0.05482386375686624, "grad_norm": 0.353571355342865, "learning_rate": 1.996570709239028e-05, "loss": 0.6101, "step": 2585 }, { "epoch": 0.05484507221479926, "grad_norm": 0.9266538619995117, "learning_rate": 1.996567949160356e-05, "loss": 0.6837, "step": 2586 }, { "epoch": 0.05486628067273228, "grad_norm": 0.35247814655303955, "learning_rate": 1.9965651879733095e-05, "loss": 0.4988, "step": 2587 }, { "epoch": 0.05488748913066531, "grad_norm": 0.3430350422859192, "learning_rate": 1.9965624256778915e-05, "loss": 0.5298, "step": 2588 }, { "epoch": 0.054908697588598336, "grad_norm": 0.42397215962409973, "learning_rate": 1.9965596622741055e-05, "loss": 0.5705, "step": 2589 }, { "epoch": 0.054929906046531356, "grad_norm": 0.3186924457550049, "learning_rate": 1.9965568977619546e-05, "loss": 0.4979, "step": 2590 }, { "epoch": 0.05495111450446438, "grad_norm": 0.3660106956958771, "learning_rate": 1.996554132141442e-05, "loss": 0.537, "step": 2591 }, { "epoch": 0.0549723229623974, "grad_norm": 0.3419508934020996, "learning_rate": 1.9965513654125703e-05, "loss": 0.5623, "step": 2592 }, { "epoch": 0.05499353142033043, "grad_norm": 0.3235423266887665, "learning_rate": 1.996548597575343e-05, "loss": 0.5594, "step": 2593 }, { "epoch": 0.05501473987826345, "grad_norm": 0.338544100522995, "learning_rate": 1.996545828629763e-05, "loss": 0.5906, "step": 2594 }, { "epoch": 0.055035948336196475, "grad_norm": 0.3367423713207245, "learning_rate": 1.9965430585758332e-05, "loss": 0.5007, "step": 2595 }, { "epoch": 0.0550571567941295, "grad_norm": 0.34659087657928467, "learning_rate": 1.9965402874135568e-05, "loss": 0.5581, "step": 2596 }, { "epoch": 0.05507836525206252, "grad_norm": 0.49316999316215515, "learning_rate": 1.996537515142937e-05, "loss": 0.5312, "step": 2597 }, { "epoch": 0.05509957370999555, "grad_norm": 0.3553631901741028, "learning_rate": 1.996534741763977e-05, "loss": 0.4971, "step": 2598 }, { "epoch": 0.05512078216792857, "grad_norm": 0.33283671736717224, "learning_rate": 1.9965319672766796e-05, "loss": 0.5726, "step": 2599 }, { "epoch": 0.055141990625861594, "grad_norm": 0.3046357035636902, "learning_rate": 1.996529191681048e-05, "loss": 0.5811, "step": 2600 }, { "epoch": 0.05516319908379462, "grad_norm": 0.3292158842086792, "learning_rate": 1.9965264149770856e-05, "loss": 0.592, "step": 2601 }, { "epoch": 0.05518440754172764, "grad_norm": 0.3003045320510864, "learning_rate": 1.9965236371647945e-05, "loss": 0.4984, "step": 2602 }, { "epoch": 0.055205615999660666, "grad_norm": 0.44457605481147766, "learning_rate": 1.9965208582441794e-05, "loss": 0.5458, "step": 2603 }, { "epoch": 0.055226824457593686, "grad_norm": 0.36482691764831543, "learning_rate": 1.9965180782152414e-05, "loss": 0.5701, "step": 2604 }, { "epoch": 0.05524803291552671, "grad_norm": 0.3222735822200775, "learning_rate": 1.9965152970779855e-05, "loss": 0.5868, "step": 2605 }, { "epoch": 0.05526924137345974, "grad_norm": 0.34441661834716797, "learning_rate": 1.9965125148324137e-05, "loss": 0.4932, "step": 2606 }, { "epoch": 0.05529044983139276, "grad_norm": 0.3382149636745453, "learning_rate": 1.9965097314785292e-05, "loss": 0.4914, "step": 2607 }, { "epoch": 0.055311658289325785, "grad_norm": 0.31131693720817566, "learning_rate": 1.9965069470163355e-05, "loss": 0.503, "step": 2608 }, { "epoch": 0.055332866747258805, "grad_norm": 0.2846130132675171, "learning_rate": 1.996504161445835e-05, "loss": 0.4798, "step": 2609 }, { "epoch": 0.05535407520519183, "grad_norm": 0.3685024380683899, "learning_rate": 1.9965013747670316e-05, "loss": 0.6618, "step": 2610 }, { "epoch": 0.05537528366312485, "grad_norm": 0.35911381244659424, "learning_rate": 1.9964985869799277e-05, "loss": 0.5211, "step": 2611 }, { "epoch": 0.05539649212105788, "grad_norm": 0.3376264274120331, "learning_rate": 1.996495798084527e-05, "loss": 0.6569, "step": 2612 }, { "epoch": 0.055417700578990904, "grad_norm": 0.3436902165412903, "learning_rate": 1.9964930080808323e-05, "loss": 0.5022, "step": 2613 }, { "epoch": 0.055438909036923924, "grad_norm": 0.3658807575702667, "learning_rate": 1.9964902169688466e-05, "loss": 0.6557, "step": 2614 }, { "epoch": 0.05546011749485695, "grad_norm": 0.3322509527206421, "learning_rate": 1.9964874247485732e-05, "loss": 0.6023, "step": 2615 }, { "epoch": 0.05548132595278997, "grad_norm": 0.35315656661987305, "learning_rate": 1.996484631420015e-05, "loss": 0.5734, "step": 2616 }, { "epoch": 0.055502534410722996, "grad_norm": 0.3638940453529358, "learning_rate": 1.9964818369831754e-05, "loss": 0.5787, "step": 2617 }, { "epoch": 0.05552374286865602, "grad_norm": 0.30413275957107544, "learning_rate": 1.9964790414380574e-05, "loss": 0.565, "step": 2618 }, { "epoch": 0.05554495132658904, "grad_norm": 0.2919301986694336, "learning_rate": 1.9964762447846643e-05, "loss": 0.5508, "step": 2619 }, { "epoch": 0.05556615978452207, "grad_norm": 0.33802253007888794, "learning_rate": 1.9964734470229987e-05, "loss": 0.5502, "step": 2620 }, { "epoch": 0.05558736824245509, "grad_norm": 0.3321255147457123, "learning_rate": 1.996470648153064e-05, "loss": 0.5476, "step": 2621 }, { "epoch": 0.055608576700388115, "grad_norm": 0.3419671654701233, "learning_rate": 1.9964678481748635e-05, "loss": 0.5654, "step": 2622 }, { "epoch": 0.055629785158321135, "grad_norm": 0.31408822536468506, "learning_rate": 1.9964650470883998e-05, "loss": 0.6006, "step": 2623 }, { "epoch": 0.05565099361625416, "grad_norm": 0.6249896287918091, "learning_rate": 1.9964622448936765e-05, "loss": 0.5785, "step": 2624 }, { "epoch": 0.05567220207418719, "grad_norm": 0.6193453073501587, "learning_rate": 1.9964594415906962e-05, "loss": 0.5042, "step": 2625 }, { "epoch": 0.05569341053212021, "grad_norm": 0.3277309536933899, "learning_rate": 1.9964566371794628e-05, "loss": 0.5876, "step": 2626 }, { "epoch": 0.055714618990053234, "grad_norm": 0.3955991566181183, "learning_rate": 1.9964538316599786e-05, "loss": 0.6035, "step": 2627 }, { "epoch": 0.055735827447986254, "grad_norm": 0.3480524718761444, "learning_rate": 1.9964510250322474e-05, "loss": 0.5368, "step": 2628 }, { "epoch": 0.05575703590591928, "grad_norm": 0.3131469488143921, "learning_rate": 1.996448217296272e-05, "loss": 0.6209, "step": 2629 }, { "epoch": 0.05577824436385231, "grad_norm": 0.3297197222709656, "learning_rate": 1.9964454084520555e-05, "loss": 0.5764, "step": 2630 }, { "epoch": 0.05579945282178533, "grad_norm": 0.3125487267971039, "learning_rate": 1.9964425984996013e-05, "loss": 0.5058, "step": 2631 }, { "epoch": 0.05582066127971835, "grad_norm": 0.37130221724510193, "learning_rate": 1.996439787438912e-05, "loss": 0.5076, "step": 2632 }, { "epoch": 0.05584186973765137, "grad_norm": 0.3595365881919861, "learning_rate": 1.996436975269991e-05, "loss": 0.6468, "step": 2633 }, { "epoch": 0.0558630781955844, "grad_norm": 0.5022901892662048, "learning_rate": 1.996434161992842e-05, "loss": 0.4585, "step": 2634 }, { "epoch": 0.055884286653517426, "grad_norm": 0.36465999484062195, "learning_rate": 1.9964313476074668e-05, "loss": 0.5825, "step": 2635 }, { "epoch": 0.055905495111450446, "grad_norm": 0.3265637755393982, "learning_rate": 1.9964285321138698e-05, "loss": 0.4877, "step": 2636 }, { "epoch": 0.05592670356938347, "grad_norm": 0.4771261513233185, "learning_rate": 1.9964257155120535e-05, "loss": 0.6625, "step": 2637 }, { "epoch": 0.05594791202731649, "grad_norm": 0.360411137342453, "learning_rate": 1.9964228978020214e-05, "loss": 0.5481, "step": 2638 }, { "epoch": 0.05596912048524952, "grad_norm": 0.3763447403907776, "learning_rate": 1.996420078983776e-05, "loss": 0.5989, "step": 2639 }, { "epoch": 0.05599032894318254, "grad_norm": 0.3453352451324463, "learning_rate": 1.996417259057321e-05, "loss": 0.6082, "step": 2640 }, { "epoch": 0.056011537401115565, "grad_norm": 0.3435741662979126, "learning_rate": 1.9964144380226593e-05, "loss": 0.5739, "step": 2641 }, { "epoch": 0.05603274585904859, "grad_norm": 0.34053152799606323, "learning_rate": 1.9964116158797944e-05, "loss": 0.5585, "step": 2642 }, { "epoch": 0.05605395431698161, "grad_norm": 0.35525500774383545, "learning_rate": 1.9964087926287288e-05, "loss": 0.5679, "step": 2643 }, { "epoch": 0.05607516277491464, "grad_norm": 0.32501593232154846, "learning_rate": 1.9964059682694663e-05, "loss": 0.5278, "step": 2644 }, { "epoch": 0.05609637123284766, "grad_norm": 0.3103535771369934, "learning_rate": 1.9964031428020096e-05, "loss": 0.5369, "step": 2645 }, { "epoch": 0.05611757969078068, "grad_norm": 0.35703662037849426, "learning_rate": 1.996400316226362e-05, "loss": 0.5541, "step": 2646 }, { "epoch": 0.05613878814871371, "grad_norm": 0.3588786721229553, "learning_rate": 1.9963974885425267e-05, "loss": 0.6107, "step": 2647 }, { "epoch": 0.05615999660664673, "grad_norm": 0.3263324499130249, "learning_rate": 1.9963946597505067e-05, "loss": 0.6277, "step": 2648 }, { "epoch": 0.056181205064579756, "grad_norm": 0.36209526658058167, "learning_rate": 1.996391829850305e-05, "loss": 0.6141, "step": 2649 }, { "epoch": 0.056202413522512776, "grad_norm": 0.6543034911155701, "learning_rate": 1.9963889988419253e-05, "loss": 0.4862, "step": 2650 }, { "epoch": 0.0562236219804458, "grad_norm": 0.33232933282852173, "learning_rate": 1.9963861667253705e-05, "loss": 0.5624, "step": 2651 }, { "epoch": 0.05624483043837883, "grad_norm": 0.3069153130054474, "learning_rate": 1.9963833335006434e-05, "loss": 0.5614, "step": 2652 }, { "epoch": 0.05626603889631185, "grad_norm": 0.33088016510009766, "learning_rate": 1.9963804991677475e-05, "loss": 0.4977, "step": 2653 }, { "epoch": 0.056287247354244875, "grad_norm": 0.388028621673584, "learning_rate": 1.9963776637266855e-05, "loss": 0.4995, "step": 2654 }, { "epoch": 0.056308455812177895, "grad_norm": 0.3051193356513977, "learning_rate": 1.9963748271774615e-05, "loss": 0.4778, "step": 2655 }, { "epoch": 0.05632966427011092, "grad_norm": 0.38128411769866943, "learning_rate": 1.996371989520078e-05, "loss": 0.5896, "step": 2656 }, { "epoch": 0.05635087272804394, "grad_norm": 0.31528669595718384, "learning_rate": 1.9963691507545376e-05, "loss": 0.5736, "step": 2657 }, { "epoch": 0.05637208118597697, "grad_norm": 0.4079664945602417, "learning_rate": 1.996366310880845e-05, "loss": 0.5248, "step": 2658 }, { "epoch": 0.056393289643909994, "grad_norm": 0.31954649090766907, "learning_rate": 1.996363469899002e-05, "loss": 0.5725, "step": 2659 }, { "epoch": 0.056414498101843014, "grad_norm": 0.40817755460739136, "learning_rate": 1.9963606278090118e-05, "loss": 0.6529, "step": 2660 }, { "epoch": 0.05643570655977604, "grad_norm": 0.30625149607658386, "learning_rate": 1.9963577846108784e-05, "loss": 0.4686, "step": 2661 }, { "epoch": 0.05645691501770906, "grad_norm": 0.3251015841960907, "learning_rate": 1.9963549403046046e-05, "loss": 0.6188, "step": 2662 }, { "epoch": 0.056478123475642086, "grad_norm": 0.3740597069263458, "learning_rate": 1.996352094890193e-05, "loss": 0.5051, "step": 2663 }, { "epoch": 0.05649933193357511, "grad_norm": 0.33675679564476013, "learning_rate": 1.9963492483676478e-05, "loss": 0.5901, "step": 2664 }, { "epoch": 0.05652054039150813, "grad_norm": 0.31602147221565247, "learning_rate": 1.9963464007369716e-05, "loss": 0.5211, "step": 2665 }, { "epoch": 0.05654174884944116, "grad_norm": 0.3214581608772278, "learning_rate": 1.9963435519981673e-05, "loss": 0.4711, "step": 2666 }, { "epoch": 0.05656295730737418, "grad_norm": 0.31912070512771606, "learning_rate": 1.9963407021512386e-05, "loss": 0.6008, "step": 2667 }, { "epoch": 0.056584165765307205, "grad_norm": 0.3402352035045624, "learning_rate": 1.9963378511961885e-05, "loss": 0.5811, "step": 2668 }, { "epoch": 0.056605374223240225, "grad_norm": 0.3260883390903473, "learning_rate": 1.9963349991330197e-05, "loss": 0.4954, "step": 2669 }, { "epoch": 0.05662658268117325, "grad_norm": 0.3054596483707428, "learning_rate": 1.996332145961736e-05, "loss": 0.4616, "step": 2670 }, { "epoch": 0.05664779113910628, "grad_norm": 0.3137643039226532, "learning_rate": 1.99632929168234e-05, "loss": 0.5661, "step": 2671 }, { "epoch": 0.0566689995970393, "grad_norm": 0.3362816870212555, "learning_rate": 1.9963264362948357e-05, "loss": 0.6128, "step": 2672 }, { "epoch": 0.056690208054972324, "grad_norm": 0.3182905614376068, "learning_rate": 1.996323579799226e-05, "loss": 0.5764, "step": 2673 }, { "epoch": 0.056711416512905344, "grad_norm": 0.3589418828487396, "learning_rate": 1.9963207221955133e-05, "loss": 0.5588, "step": 2674 }, { "epoch": 0.05673262497083837, "grad_norm": 0.34856072068214417, "learning_rate": 1.9963178634837013e-05, "loss": 0.5869, "step": 2675 }, { "epoch": 0.0567538334287714, "grad_norm": 0.32052603363990784, "learning_rate": 1.9963150036637935e-05, "loss": 0.5719, "step": 2676 }, { "epoch": 0.05677504188670442, "grad_norm": 0.4832001328468323, "learning_rate": 1.996312142735793e-05, "loss": 0.5753, "step": 2677 }, { "epoch": 0.05679625034463744, "grad_norm": 0.3410321772098541, "learning_rate": 1.9963092806997024e-05, "loss": 0.5122, "step": 2678 }, { "epoch": 0.05681745880257046, "grad_norm": 0.3086654245853424, "learning_rate": 1.9963064175555256e-05, "loss": 0.6004, "step": 2679 }, { "epoch": 0.05683866726050349, "grad_norm": 0.31024178862571716, "learning_rate": 1.9963035533032654e-05, "loss": 0.5333, "step": 2680 }, { "epoch": 0.056859875718436516, "grad_norm": 0.34985584020614624, "learning_rate": 1.996300687942925e-05, "loss": 0.6327, "step": 2681 }, { "epoch": 0.056881084176369535, "grad_norm": 0.3646358847618103, "learning_rate": 1.9962978214745073e-05, "loss": 0.6019, "step": 2682 }, { "epoch": 0.05690229263430256, "grad_norm": 0.3060615658760071, "learning_rate": 1.9962949538980162e-05, "loss": 0.54, "step": 2683 }, { "epoch": 0.05692350109223558, "grad_norm": 0.3163577914237976, "learning_rate": 1.9962920852134545e-05, "loss": 0.5436, "step": 2684 }, { "epoch": 0.05694470955016861, "grad_norm": 0.4364694654941559, "learning_rate": 1.9962892154208253e-05, "loss": 0.5459, "step": 2685 }, { "epoch": 0.05696591800810163, "grad_norm": 0.3094148635864258, "learning_rate": 1.9962863445201317e-05, "loss": 0.5406, "step": 2686 }, { "epoch": 0.056987126466034654, "grad_norm": 0.3286258280277252, "learning_rate": 1.9962834725113774e-05, "loss": 0.5434, "step": 2687 }, { "epoch": 0.05700833492396768, "grad_norm": 0.2862294316291809, "learning_rate": 1.996280599394565e-05, "loss": 0.4608, "step": 2688 }, { "epoch": 0.0570295433819007, "grad_norm": 0.3211304843425751, "learning_rate": 1.9962777251696984e-05, "loss": 0.5633, "step": 2689 }, { "epoch": 0.05705075183983373, "grad_norm": 0.3334934115409851, "learning_rate": 1.99627484983678e-05, "loss": 0.6067, "step": 2690 }, { "epoch": 0.05707196029776675, "grad_norm": 0.45154017210006714, "learning_rate": 1.9962719733958135e-05, "loss": 0.5016, "step": 2691 }, { "epoch": 0.05709316875569977, "grad_norm": 0.5561495423316956, "learning_rate": 1.9962690958468016e-05, "loss": 0.5114, "step": 2692 }, { "epoch": 0.0571143772136328, "grad_norm": 0.30064356327056885, "learning_rate": 1.9962662171897483e-05, "loss": 0.5463, "step": 2693 }, { "epoch": 0.05713558567156582, "grad_norm": 0.353432834148407, "learning_rate": 1.9962633374246564e-05, "loss": 0.488, "step": 2694 }, { "epoch": 0.057156794129498846, "grad_norm": 0.3792407810688019, "learning_rate": 1.9962604565515288e-05, "loss": 0.5116, "step": 2695 }, { "epoch": 0.057178002587431866, "grad_norm": 0.332980215549469, "learning_rate": 1.9962575745703693e-05, "loss": 0.5327, "step": 2696 }, { "epoch": 0.05719921104536489, "grad_norm": 0.3374582529067993, "learning_rate": 1.9962546914811805e-05, "loss": 0.5583, "step": 2697 }, { "epoch": 0.05722041950329791, "grad_norm": 0.3331095278263092, "learning_rate": 1.996251807283966e-05, "loss": 0.5144, "step": 2698 }, { "epoch": 0.05724162796123094, "grad_norm": 0.3126887381076813, "learning_rate": 1.9962489219787286e-05, "loss": 0.5098, "step": 2699 }, { "epoch": 0.057262836419163965, "grad_norm": 0.31832048296928406, "learning_rate": 1.9962460355654723e-05, "loss": 0.4609, "step": 2700 }, { "epoch": 0.057284044877096985, "grad_norm": 0.39688342809677124, "learning_rate": 1.9962431480441996e-05, "loss": 0.6007, "step": 2701 }, { "epoch": 0.05730525333503001, "grad_norm": 0.3443857431411743, "learning_rate": 1.996240259414914e-05, "loss": 0.5641, "step": 2702 }, { "epoch": 0.05732646179296303, "grad_norm": 0.29847976565361023, "learning_rate": 1.9962373696776184e-05, "loss": 0.5572, "step": 2703 }, { "epoch": 0.05734767025089606, "grad_norm": 0.33302202820777893, "learning_rate": 1.9962344788323163e-05, "loss": 0.4687, "step": 2704 }, { "epoch": 0.057368878708829084, "grad_norm": 0.3085920214653015, "learning_rate": 1.996231586879011e-05, "loss": 0.4915, "step": 2705 }, { "epoch": 0.057390087166762103, "grad_norm": 0.35418230295181274, "learning_rate": 1.9962286938177055e-05, "loss": 0.5764, "step": 2706 }, { "epoch": 0.05741129562469513, "grad_norm": 0.3347742259502411, "learning_rate": 1.9962257996484033e-05, "loss": 0.523, "step": 2707 }, { "epoch": 0.05743250408262815, "grad_norm": 0.3663382828235626, "learning_rate": 1.9962229043711074e-05, "loss": 0.4935, "step": 2708 }, { "epoch": 0.057453712540561176, "grad_norm": 0.3432545065879822, "learning_rate": 1.996220007985821e-05, "loss": 0.6454, "step": 2709 }, { "epoch": 0.0574749209984942, "grad_norm": 0.5614304542541504, "learning_rate": 1.996217110492547e-05, "loss": 0.608, "step": 2710 }, { "epoch": 0.05749612945642722, "grad_norm": 0.2917245030403137, "learning_rate": 1.9962142118912894e-05, "loss": 0.514, "step": 2711 }, { "epoch": 0.05751733791436025, "grad_norm": 0.41495203971862793, "learning_rate": 1.9962113121820508e-05, "loss": 0.5636, "step": 2712 }, { "epoch": 0.05753854637229327, "grad_norm": 0.3206419050693512, "learning_rate": 1.9962084113648345e-05, "loss": 0.5036, "step": 2713 }, { "epoch": 0.057559754830226295, "grad_norm": 0.3278835415840149, "learning_rate": 1.9962055094396442e-05, "loss": 0.5255, "step": 2714 }, { "epoch": 0.057580963288159315, "grad_norm": 0.3686562180519104, "learning_rate": 1.9962026064064826e-05, "loss": 0.5732, "step": 2715 }, { "epoch": 0.05760217174609234, "grad_norm": 0.30982476472854614, "learning_rate": 1.996199702265353e-05, "loss": 0.5453, "step": 2716 }, { "epoch": 0.05762338020402537, "grad_norm": 0.3531337082386017, "learning_rate": 1.996196797016259e-05, "loss": 0.536, "step": 2717 }, { "epoch": 0.05764458866195839, "grad_norm": 0.41006243228912354, "learning_rate": 1.9961938906592032e-05, "loss": 0.5512, "step": 2718 }, { "epoch": 0.057665797119891414, "grad_norm": 0.3311755061149597, "learning_rate": 1.9961909831941897e-05, "loss": 0.5513, "step": 2719 }, { "epoch": 0.057687005577824434, "grad_norm": 0.34115707874298096, "learning_rate": 1.996188074621221e-05, "loss": 0.5577, "step": 2720 }, { "epoch": 0.05770821403575746, "grad_norm": 0.34515416622161865, "learning_rate": 1.996185164940301e-05, "loss": 0.5522, "step": 2721 }, { "epoch": 0.05772942249369049, "grad_norm": 0.3160700500011444, "learning_rate": 1.996182254151432e-05, "loss": 0.4117, "step": 2722 }, { "epoch": 0.057750630951623506, "grad_norm": 0.33877819776535034, "learning_rate": 1.9961793422546177e-05, "loss": 0.5957, "step": 2723 }, { "epoch": 0.05777183940955653, "grad_norm": 0.3861977756023407, "learning_rate": 1.9961764292498614e-05, "loss": 0.5602, "step": 2724 }, { "epoch": 0.05779304786748955, "grad_norm": 0.3110409677028656, "learning_rate": 1.9961735151371667e-05, "loss": 0.5677, "step": 2725 }, { "epoch": 0.05781425632542258, "grad_norm": 0.3742147982120514, "learning_rate": 1.9961705999165363e-05, "loss": 0.5434, "step": 2726 }, { "epoch": 0.0578354647833556, "grad_norm": 0.2892496883869171, "learning_rate": 1.9961676835879732e-05, "loss": 0.5537, "step": 2727 }, { "epoch": 0.057856673241288625, "grad_norm": 0.4574771225452423, "learning_rate": 1.996164766151482e-05, "loss": 0.5964, "step": 2728 }, { "epoch": 0.05787788169922165, "grad_norm": 0.4698134958744049, "learning_rate": 1.996161847607064e-05, "loss": 0.5043, "step": 2729 }, { "epoch": 0.05789909015715467, "grad_norm": 0.3541792929172516, "learning_rate": 1.996158927954724e-05, "loss": 0.5634, "step": 2730 }, { "epoch": 0.0579202986150877, "grad_norm": 0.34364908933639526, "learning_rate": 1.9961560071944647e-05, "loss": 0.5925, "step": 2731 }, { "epoch": 0.05794150707302072, "grad_norm": 0.37049055099487305, "learning_rate": 1.996153085326289e-05, "loss": 0.637, "step": 2732 }, { "epoch": 0.057962715530953744, "grad_norm": 0.3250134587287903, "learning_rate": 1.9961501623502007e-05, "loss": 0.5156, "step": 2733 }, { "epoch": 0.05798392398888677, "grad_norm": 0.3473033308982849, "learning_rate": 1.9961472382662027e-05, "loss": 0.4909, "step": 2734 }, { "epoch": 0.05800513244681979, "grad_norm": 0.3366163969039917, "learning_rate": 1.9961443130742987e-05, "loss": 0.556, "step": 2735 }, { "epoch": 0.05802634090475282, "grad_norm": 0.3481552004814148, "learning_rate": 1.9961413867744916e-05, "loss": 0.5574, "step": 2736 }, { "epoch": 0.05804754936268584, "grad_norm": 0.3284747302532196, "learning_rate": 1.9961384593667847e-05, "loss": 0.5595, "step": 2737 }, { "epoch": 0.05806875782061886, "grad_norm": 0.4742571711540222, "learning_rate": 1.996135530851181e-05, "loss": 0.6724, "step": 2738 }, { "epoch": 0.05808996627855189, "grad_norm": 0.30743029713630676, "learning_rate": 1.996132601227684e-05, "loss": 0.5641, "step": 2739 }, { "epoch": 0.05811117473648491, "grad_norm": 0.307208776473999, "learning_rate": 1.996129670496297e-05, "loss": 0.5773, "step": 2740 }, { "epoch": 0.058132383194417936, "grad_norm": 0.30607396364212036, "learning_rate": 1.9961267386570236e-05, "loss": 0.5028, "step": 2741 }, { "epoch": 0.058153591652350956, "grad_norm": 0.320551335811615, "learning_rate": 1.9961238057098666e-05, "loss": 0.5399, "step": 2742 }, { "epoch": 0.05817480011028398, "grad_norm": 0.34375354647636414, "learning_rate": 1.9961208716548294e-05, "loss": 0.6406, "step": 2743 }, { "epoch": 0.058196008568217, "grad_norm": 0.33429643511772156, "learning_rate": 1.9961179364919148e-05, "loss": 0.4834, "step": 2744 }, { "epoch": 0.05821721702615003, "grad_norm": 0.31803545355796814, "learning_rate": 1.9961150002211265e-05, "loss": 0.6153, "step": 2745 }, { "epoch": 0.058238425484083055, "grad_norm": 0.3240765631198883, "learning_rate": 1.9961120628424683e-05, "loss": 0.5252, "step": 2746 }, { "epoch": 0.058259633942016074, "grad_norm": 0.3357656002044678, "learning_rate": 1.9961091243559424e-05, "loss": 0.5212, "step": 2747 }, { "epoch": 0.0582808423999491, "grad_norm": 0.36168888211250305, "learning_rate": 1.9961061847615527e-05, "loss": 0.5273, "step": 2748 }, { "epoch": 0.05830205085788212, "grad_norm": 0.3659411370754242, "learning_rate": 1.9961032440593025e-05, "loss": 0.5961, "step": 2749 }, { "epoch": 0.05832325931581515, "grad_norm": 0.35807517170906067, "learning_rate": 1.996100302249195e-05, "loss": 0.5594, "step": 2750 }, { "epoch": 0.058344467773748174, "grad_norm": 0.3253190517425537, "learning_rate": 1.9960973593312333e-05, "loss": 0.4746, "step": 2751 }, { "epoch": 0.05836567623168119, "grad_norm": 0.33298787474632263, "learning_rate": 1.9960944153054206e-05, "loss": 0.4751, "step": 2752 }, { "epoch": 0.05838688468961422, "grad_norm": 0.38527148962020874, "learning_rate": 1.9960914701717607e-05, "loss": 0.5824, "step": 2753 }, { "epoch": 0.05840809314754724, "grad_norm": 0.38156208395957947, "learning_rate": 1.9960885239302562e-05, "loss": 0.4913, "step": 2754 }, { "epoch": 0.058429301605480266, "grad_norm": 0.32983461022377014, "learning_rate": 1.9960855765809105e-05, "loss": 0.5389, "step": 2755 }, { "epoch": 0.05845051006341329, "grad_norm": 0.3122197091579437, "learning_rate": 1.9960826281237275e-05, "loss": 0.4984, "step": 2756 }, { "epoch": 0.05847171852134631, "grad_norm": 0.339948445558548, "learning_rate": 1.99607967855871e-05, "loss": 0.5511, "step": 2757 }, { "epoch": 0.05849292697927934, "grad_norm": 0.348781019449234, "learning_rate": 1.996076727885861e-05, "loss": 0.6182, "step": 2758 }, { "epoch": 0.05851413543721236, "grad_norm": 0.3321453630924225, "learning_rate": 1.9960737761051847e-05, "loss": 0.544, "step": 2759 }, { "epoch": 0.058535343895145385, "grad_norm": 0.31413164734840393, "learning_rate": 1.9960708232166834e-05, "loss": 0.5431, "step": 2760 }, { "epoch": 0.058556552353078405, "grad_norm": 0.3575782775878906, "learning_rate": 1.996067869220361e-05, "loss": 0.5148, "step": 2761 }, { "epoch": 0.05857776081101143, "grad_norm": 0.43624746799468994, "learning_rate": 1.9960649141162202e-05, "loss": 0.5827, "step": 2762 }, { "epoch": 0.05859896926894446, "grad_norm": 0.31761908531188965, "learning_rate": 1.996061957904265e-05, "loss": 0.4781, "step": 2763 }, { "epoch": 0.05862017772687748, "grad_norm": 0.2912736237049103, "learning_rate": 1.996059000584498e-05, "loss": 0.4514, "step": 2764 }, { "epoch": 0.058641386184810504, "grad_norm": 0.4397518038749695, "learning_rate": 1.9960560421569234e-05, "loss": 0.6055, "step": 2765 }, { "epoch": 0.058662594642743524, "grad_norm": 0.40290725231170654, "learning_rate": 1.9960530826215434e-05, "loss": 0.5451, "step": 2766 }, { "epoch": 0.05868380310067655, "grad_norm": 0.3028576672077179, "learning_rate": 1.9960501219783622e-05, "loss": 0.4935, "step": 2767 }, { "epoch": 0.05870501155860958, "grad_norm": 0.3540213406085968, "learning_rate": 1.9960471602273823e-05, "loss": 0.5491, "step": 2768 }, { "epoch": 0.058726220016542596, "grad_norm": 0.3220618665218353, "learning_rate": 1.9960441973686076e-05, "loss": 0.502, "step": 2769 }, { "epoch": 0.05874742847447562, "grad_norm": 0.3504464030265808, "learning_rate": 1.996041233402041e-05, "loss": 0.5355, "step": 2770 }, { "epoch": 0.05876863693240864, "grad_norm": 0.3145533800125122, "learning_rate": 1.9960382683276864e-05, "loss": 0.5222, "step": 2771 }, { "epoch": 0.05878984539034167, "grad_norm": 0.3497001528739929, "learning_rate": 1.9960353021455464e-05, "loss": 0.5802, "step": 2772 }, { "epoch": 0.05881105384827469, "grad_norm": 0.38922053575515747, "learning_rate": 1.996032334855625e-05, "loss": 0.6333, "step": 2773 }, { "epoch": 0.058832262306207715, "grad_norm": 0.34982386231422424, "learning_rate": 1.9960293664579245e-05, "loss": 0.494, "step": 2774 }, { "epoch": 0.05885347076414074, "grad_norm": 0.3376711905002594, "learning_rate": 1.996026396952449e-05, "loss": 0.4909, "step": 2775 }, { "epoch": 0.05887467922207376, "grad_norm": 0.3348621726036072, "learning_rate": 1.9960234263392016e-05, "loss": 0.5393, "step": 2776 }, { "epoch": 0.05889588768000679, "grad_norm": 0.2924371063709259, "learning_rate": 1.9960204546181857e-05, "loss": 0.5683, "step": 2777 }, { "epoch": 0.05891709613793981, "grad_norm": 0.3522714376449585, "learning_rate": 1.9960174817894046e-05, "loss": 0.5899, "step": 2778 }, { "epoch": 0.058938304595872834, "grad_norm": 0.3448334038257599, "learning_rate": 1.9960145078528613e-05, "loss": 0.5685, "step": 2779 }, { "epoch": 0.05895951305380586, "grad_norm": 0.29512128233909607, "learning_rate": 1.996011532808559e-05, "loss": 0.4971, "step": 2780 }, { "epoch": 0.05898072151173888, "grad_norm": 0.40952327847480774, "learning_rate": 1.996008556656502e-05, "loss": 0.6457, "step": 2781 }, { "epoch": 0.05900192996967191, "grad_norm": 0.4768999516963959, "learning_rate": 1.9960055793966925e-05, "loss": 0.6103, "step": 2782 }, { "epoch": 0.059023138427604926, "grad_norm": 0.3549824059009552, "learning_rate": 1.9960026010291345e-05, "loss": 0.6452, "step": 2783 }, { "epoch": 0.05904434688553795, "grad_norm": 0.3661080300807953, "learning_rate": 1.995999621553831e-05, "loss": 0.5938, "step": 2784 }, { "epoch": 0.05906555534347098, "grad_norm": 0.3336692452430725, "learning_rate": 1.995996640970785e-05, "loss": 0.6237, "step": 2785 }, { "epoch": 0.059086763801404, "grad_norm": 0.35755306482315063, "learning_rate": 1.9959936592800006e-05, "loss": 0.4798, "step": 2786 }, { "epoch": 0.059107972259337026, "grad_norm": 0.3574981987476349, "learning_rate": 1.9959906764814808e-05, "loss": 0.5497, "step": 2787 }, { "epoch": 0.059129180717270045, "grad_norm": 0.3205941915512085, "learning_rate": 1.9959876925752282e-05, "loss": 0.6506, "step": 2788 }, { "epoch": 0.05915038917520307, "grad_norm": 0.42365914583206177, "learning_rate": 1.9959847075612476e-05, "loss": 0.6508, "step": 2789 }, { "epoch": 0.05917159763313609, "grad_norm": 0.3040931224822998, "learning_rate": 1.995981721439541e-05, "loss": 0.4244, "step": 2790 }, { "epoch": 0.05919280609106912, "grad_norm": 0.3525548279285431, "learning_rate": 1.995978734210112e-05, "loss": 0.6542, "step": 2791 }, { "epoch": 0.059214014549002145, "grad_norm": 0.33028286695480347, "learning_rate": 1.9959757458729646e-05, "loss": 0.547, "step": 2792 }, { "epoch": 0.059235223006935164, "grad_norm": 0.3671293556690216, "learning_rate": 1.9959727564281012e-05, "loss": 0.5662, "step": 2793 }, { "epoch": 0.05925643146486819, "grad_norm": 0.3435755670070648, "learning_rate": 1.9959697658755256e-05, "loss": 0.5592, "step": 2794 }, { "epoch": 0.05927763992280121, "grad_norm": 0.3272441029548645, "learning_rate": 1.995966774215241e-05, "loss": 0.6073, "step": 2795 }, { "epoch": 0.05929884838073424, "grad_norm": 0.426736444234848, "learning_rate": 1.9959637814472512e-05, "loss": 0.4786, "step": 2796 }, { "epoch": 0.059320056838667264, "grad_norm": 0.3986131548881531, "learning_rate": 1.995960787571559e-05, "loss": 0.5439, "step": 2797 }, { "epoch": 0.05934126529660028, "grad_norm": 0.32126328349113464, "learning_rate": 1.9959577925881677e-05, "loss": 0.5252, "step": 2798 }, { "epoch": 0.05936247375453331, "grad_norm": 0.36646392941474915, "learning_rate": 1.995954796497081e-05, "loss": 0.494, "step": 2799 }, { "epoch": 0.05938368221246633, "grad_norm": 0.380016952753067, "learning_rate": 1.9959517992983017e-05, "loss": 0.5421, "step": 2800 }, { "epoch": 0.059404890670399356, "grad_norm": 0.355412095785141, "learning_rate": 1.9959488009918337e-05, "loss": 0.5978, "step": 2801 }, { "epoch": 0.059426099128332376, "grad_norm": 0.34336772561073303, "learning_rate": 1.99594580157768e-05, "loss": 0.5223, "step": 2802 }, { "epoch": 0.0594473075862654, "grad_norm": 0.332001656293869, "learning_rate": 1.9959428010558442e-05, "loss": 0.6083, "step": 2803 }, { "epoch": 0.05946851604419843, "grad_norm": 0.33490872383117676, "learning_rate": 1.9959397994263292e-05, "loss": 0.5402, "step": 2804 }, { "epoch": 0.05948972450213145, "grad_norm": 0.41967618465423584, "learning_rate": 1.9959367966891386e-05, "loss": 0.5808, "step": 2805 }, { "epoch": 0.059510932960064475, "grad_norm": 0.3383139967918396, "learning_rate": 1.9959337928442763e-05, "loss": 0.5114, "step": 2806 }, { "epoch": 0.059532141417997494, "grad_norm": 0.35713285207748413, "learning_rate": 1.9959307878917446e-05, "loss": 0.5615, "step": 2807 }, { "epoch": 0.05955334987593052, "grad_norm": 0.2988758683204651, "learning_rate": 1.9959277818315472e-05, "loss": 0.4541, "step": 2808 }, { "epoch": 0.05957455833386355, "grad_norm": 0.33905842900276184, "learning_rate": 1.9959247746636882e-05, "loss": 0.5712, "step": 2809 }, { "epoch": 0.05959576679179657, "grad_norm": 0.3193584084510803, "learning_rate": 1.9959217663881697e-05, "loss": 0.6078, "step": 2810 }, { "epoch": 0.059616975249729594, "grad_norm": 0.34712842106819153, "learning_rate": 1.995918757004996e-05, "loss": 0.5781, "step": 2811 }, { "epoch": 0.05963818370766261, "grad_norm": 0.3347832262516022, "learning_rate": 1.9959157465141698e-05, "loss": 0.5272, "step": 2812 }, { "epoch": 0.05965939216559564, "grad_norm": 0.4238032102584839, "learning_rate": 1.995912734915695e-05, "loss": 0.5315, "step": 2813 }, { "epoch": 0.059680600623528667, "grad_norm": 0.355518102645874, "learning_rate": 1.9959097222095747e-05, "loss": 0.5812, "step": 2814 }, { "epoch": 0.059701809081461686, "grad_norm": 0.33141615986824036, "learning_rate": 1.9959067083958122e-05, "loss": 0.5249, "step": 2815 }, { "epoch": 0.05972301753939471, "grad_norm": 0.3390752375125885, "learning_rate": 1.995903693474411e-05, "loss": 0.5774, "step": 2816 }, { "epoch": 0.05974422599732773, "grad_norm": 0.3253145217895508, "learning_rate": 1.9959006774453743e-05, "loss": 0.5184, "step": 2817 }, { "epoch": 0.05976543445526076, "grad_norm": 0.370900958776474, "learning_rate": 1.9958976603087056e-05, "loss": 0.5606, "step": 2818 }, { "epoch": 0.05978664291319378, "grad_norm": 0.28752079606056213, "learning_rate": 1.995894642064408e-05, "loss": 0.5747, "step": 2819 }, { "epoch": 0.059807851371126805, "grad_norm": 0.31642627716064453, "learning_rate": 1.9958916227124848e-05, "loss": 0.5956, "step": 2820 }, { "epoch": 0.05982905982905983, "grad_norm": 0.3789512515068054, "learning_rate": 1.99588860225294e-05, "loss": 0.6135, "step": 2821 }, { "epoch": 0.05985026828699285, "grad_norm": 0.30164438486099243, "learning_rate": 1.9958855806857764e-05, "loss": 0.466, "step": 2822 }, { "epoch": 0.05987147674492588, "grad_norm": 0.3455377519130707, "learning_rate": 1.9958825580109976e-05, "loss": 0.5352, "step": 2823 }, { "epoch": 0.0598926852028589, "grad_norm": 0.5031490325927734, "learning_rate": 1.9958795342286067e-05, "loss": 0.6099, "step": 2824 }, { "epoch": 0.059913893660791924, "grad_norm": 0.32502302527427673, "learning_rate": 1.9958765093386074e-05, "loss": 0.6065, "step": 2825 }, { "epoch": 0.05993510211872495, "grad_norm": 0.31859830021858215, "learning_rate": 1.995873483341003e-05, "loss": 0.5345, "step": 2826 }, { "epoch": 0.05995631057665797, "grad_norm": 0.3405471444129944, "learning_rate": 1.9958704562357964e-05, "loss": 0.6028, "step": 2827 }, { "epoch": 0.059977519034591, "grad_norm": 0.40442556142807007, "learning_rate": 1.995867428022992e-05, "loss": 0.5496, "step": 2828 }, { "epoch": 0.059998727492524016, "grad_norm": 0.30852606892585754, "learning_rate": 1.9958643987025918e-05, "loss": 0.4791, "step": 2829 }, { "epoch": 0.06001993595045704, "grad_norm": 0.3534466028213501, "learning_rate": 1.9958613682746e-05, "loss": 0.5973, "step": 2830 }, { "epoch": 0.06004114440839006, "grad_norm": 0.359220027923584, "learning_rate": 1.99585833673902e-05, "loss": 0.6314, "step": 2831 }, { "epoch": 0.06006235286632309, "grad_norm": 0.34360918402671814, "learning_rate": 1.995855304095855e-05, "loss": 0.5587, "step": 2832 }, { "epoch": 0.060083561324256116, "grad_norm": 0.36066868901252747, "learning_rate": 1.9958522703451083e-05, "loss": 0.5716, "step": 2833 }, { "epoch": 0.060104769782189135, "grad_norm": 0.309345006942749, "learning_rate": 1.9958492354867835e-05, "loss": 0.5269, "step": 2834 }, { "epoch": 0.06012597824012216, "grad_norm": 0.36438143253326416, "learning_rate": 1.9958461995208838e-05, "loss": 0.3978, "step": 2835 }, { "epoch": 0.06014718669805518, "grad_norm": 0.3890838027000427, "learning_rate": 1.9958431624474122e-05, "loss": 0.5261, "step": 2836 }, { "epoch": 0.06016839515598821, "grad_norm": 0.36856216192245483, "learning_rate": 1.995840124266373e-05, "loss": 0.5888, "step": 2837 }, { "epoch": 0.060189603613921235, "grad_norm": 0.36931267380714417, "learning_rate": 1.995837084977769e-05, "loss": 0.5278, "step": 2838 }, { "epoch": 0.060210812071854254, "grad_norm": 0.352304071187973, "learning_rate": 1.9958340445816033e-05, "loss": 0.511, "step": 2839 }, { "epoch": 0.06023202052978728, "grad_norm": 0.33641740679740906, "learning_rate": 1.9958310030778798e-05, "loss": 0.4781, "step": 2840 }, { "epoch": 0.0602532289877203, "grad_norm": 0.33479440212249756, "learning_rate": 1.9958279604666017e-05, "loss": 0.5518, "step": 2841 }, { "epoch": 0.06027443744565333, "grad_norm": 0.3995935022830963, "learning_rate": 1.9958249167477728e-05, "loss": 0.5125, "step": 2842 }, { "epoch": 0.06029564590358635, "grad_norm": 0.3024890422821045, "learning_rate": 1.9958218719213955e-05, "loss": 0.4535, "step": 2843 }, { "epoch": 0.06031685436151937, "grad_norm": 0.48284077644348145, "learning_rate": 1.995818825987474e-05, "loss": 0.5884, "step": 2844 }, { "epoch": 0.0603380628194524, "grad_norm": 0.34950751066207886, "learning_rate": 1.9958157789460115e-05, "loss": 0.5248, "step": 2845 }, { "epoch": 0.06035927127738542, "grad_norm": 0.3179190456867218, "learning_rate": 1.9958127307970116e-05, "loss": 0.5623, "step": 2846 }, { "epoch": 0.060380479735318446, "grad_norm": 0.46730300784111023, "learning_rate": 1.9958096815404768e-05, "loss": 0.5737, "step": 2847 }, { "epoch": 0.060401688193251465, "grad_norm": 0.3722928762435913, "learning_rate": 1.9958066311764115e-05, "loss": 0.5683, "step": 2848 }, { "epoch": 0.06042289665118449, "grad_norm": 0.3328583538532257, "learning_rate": 1.9958035797048185e-05, "loss": 0.5401, "step": 2849 }, { "epoch": 0.06044410510911752, "grad_norm": 0.2981826364994049, "learning_rate": 1.9958005271257017e-05, "loss": 0.5144, "step": 2850 }, { "epoch": 0.06046531356705054, "grad_norm": 0.32304736971855164, "learning_rate": 1.995797473439064e-05, "loss": 0.5772, "step": 2851 }, { "epoch": 0.060486522024983565, "grad_norm": 0.3741312026977539, "learning_rate": 1.9957944186449093e-05, "loss": 0.479, "step": 2852 }, { "epoch": 0.060507730482916584, "grad_norm": 0.3062640428543091, "learning_rate": 1.9957913627432405e-05, "loss": 0.5398, "step": 2853 }, { "epoch": 0.06052893894084961, "grad_norm": 0.3774738907814026, "learning_rate": 1.995788305734061e-05, "loss": 0.6012, "step": 2854 }, { "epoch": 0.06055014739878264, "grad_norm": 0.42697465419769287, "learning_rate": 1.9957852476173743e-05, "loss": 0.5117, "step": 2855 }, { "epoch": 0.06057135585671566, "grad_norm": 0.3187839388847351, "learning_rate": 1.9957821883931845e-05, "loss": 0.4962, "step": 2856 }, { "epoch": 0.060592564314648684, "grad_norm": 0.4476613402366638, "learning_rate": 1.9957791280614944e-05, "loss": 0.5828, "step": 2857 }, { "epoch": 0.0606137727725817, "grad_norm": 0.3496324121952057, "learning_rate": 1.9957760666223066e-05, "loss": 0.5436, "step": 2858 }, { "epoch": 0.06063498123051473, "grad_norm": 0.34391555190086365, "learning_rate": 1.995773004075626e-05, "loss": 0.499, "step": 2859 }, { "epoch": 0.060656189688447756, "grad_norm": 0.3723052144050598, "learning_rate": 1.995769940421455e-05, "loss": 0.5537, "step": 2860 }, { "epoch": 0.060677398146380776, "grad_norm": 0.33910250663757324, "learning_rate": 1.9957668756597973e-05, "loss": 0.5038, "step": 2861 }, { "epoch": 0.0606986066043138, "grad_norm": 0.3485940396785736, "learning_rate": 1.9957638097906567e-05, "loss": 0.5922, "step": 2862 }, { "epoch": 0.06071981506224682, "grad_norm": 0.3463563919067383, "learning_rate": 1.995760742814036e-05, "loss": 0.578, "step": 2863 }, { "epoch": 0.06074102352017985, "grad_norm": 0.32744479179382324, "learning_rate": 1.9957576747299386e-05, "loss": 0.5513, "step": 2864 }, { "epoch": 0.06076223197811287, "grad_norm": 0.9449393153190613, "learning_rate": 1.9957546055383685e-05, "loss": 0.556, "step": 2865 }, { "epoch": 0.060783440436045895, "grad_norm": 0.275898814201355, "learning_rate": 1.995751535239329e-05, "loss": 0.5049, "step": 2866 }, { "epoch": 0.06080464889397892, "grad_norm": 0.31676962971687317, "learning_rate": 1.995748463832823e-05, "loss": 0.4573, "step": 2867 }, { "epoch": 0.06082585735191194, "grad_norm": 0.32767996191978455, "learning_rate": 1.995745391318854e-05, "loss": 0.5649, "step": 2868 }, { "epoch": 0.06084706580984497, "grad_norm": 0.31560084223747253, "learning_rate": 1.995742317697426e-05, "loss": 0.5274, "step": 2869 }, { "epoch": 0.06086827426777799, "grad_norm": 0.33715003728866577, "learning_rate": 1.9957392429685416e-05, "loss": 0.5667, "step": 2870 }, { "epoch": 0.060889482725711014, "grad_norm": 0.3349931240081787, "learning_rate": 1.9957361671322047e-05, "loss": 0.5919, "step": 2871 }, { "epoch": 0.06091069118364404, "grad_norm": 0.3275941014289856, "learning_rate": 1.995733090188419e-05, "loss": 0.4714, "step": 2872 }, { "epoch": 0.06093189964157706, "grad_norm": 0.36328965425491333, "learning_rate": 1.9957300121371876e-05, "loss": 0.5535, "step": 2873 }, { "epoch": 0.06095310809951009, "grad_norm": 0.319261759519577, "learning_rate": 1.9957269329785137e-05, "loss": 0.4839, "step": 2874 }, { "epoch": 0.060974316557443106, "grad_norm": 0.36888372898101807, "learning_rate": 1.9957238527124012e-05, "loss": 0.5232, "step": 2875 }, { "epoch": 0.06099552501537613, "grad_norm": 0.34926044940948486, "learning_rate": 1.9957207713388534e-05, "loss": 0.5295, "step": 2876 }, { "epoch": 0.06101673347330915, "grad_norm": 0.3462076783180237, "learning_rate": 1.995717688857873e-05, "loss": 0.5998, "step": 2877 }, { "epoch": 0.06103794193124218, "grad_norm": 0.3800881505012512, "learning_rate": 1.9957146052694646e-05, "loss": 0.5757, "step": 2878 }, { "epoch": 0.061059150389175205, "grad_norm": 0.3238185942173004, "learning_rate": 1.995711520573631e-05, "loss": 0.6396, "step": 2879 }, { "epoch": 0.061080358847108225, "grad_norm": 0.38560354709625244, "learning_rate": 1.9957084347703754e-05, "loss": 0.6196, "step": 2880 }, { "epoch": 0.06110156730504125, "grad_norm": 0.3137253224849701, "learning_rate": 1.9957053478597018e-05, "loss": 0.5532, "step": 2881 }, { "epoch": 0.06112277576297427, "grad_norm": 0.32916194200515747, "learning_rate": 1.9957022598416135e-05, "loss": 0.5166, "step": 2882 }, { "epoch": 0.0611439842209073, "grad_norm": 0.33670446276664734, "learning_rate": 1.9956991707161134e-05, "loss": 0.6089, "step": 2883 }, { "epoch": 0.061165192678840324, "grad_norm": 0.3364875018596649, "learning_rate": 1.9956960804832054e-05, "loss": 0.5299, "step": 2884 }, { "epoch": 0.061186401136773344, "grad_norm": 0.310920387506485, "learning_rate": 1.995692989142893e-05, "loss": 0.5043, "step": 2885 }, { "epoch": 0.06120760959470637, "grad_norm": 0.628017783164978, "learning_rate": 1.9956898966951796e-05, "loss": 0.5298, "step": 2886 }, { "epoch": 0.06122881805263939, "grad_norm": 0.3583092987537384, "learning_rate": 1.9956868031400685e-05, "loss": 0.5536, "step": 2887 }, { "epoch": 0.06125002651057242, "grad_norm": 0.3061988353729248, "learning_rate": 1.995683708477563e-05, "loss": 0.4255, "step": 2888 }, { "epoch": 0.06127123496850544, "grad_norm": 0.3537364900112152, "learning_rate": 1.995680612707667e-05, "loss": 0.5701, "step": 2889 }, { "epoch": 0.06129244342643846, "grad_norm": 0.3310873806476593, "learning_rate": 1.9956775158303834e-05, "loss": 0.519, "step": 2890 }, { "epoch": 0.06131365188437149, "grad_norm": 0.46666449308395386, "learning_rate": 1.9956744178457162e-05, "loss": 0.4184, "step": 2891 }, { "epoch": 0.06133486034230451, "grad_norm": 0.3218550980091095, "learning_rate": 1.9956713187536683e-05, "loss": 0.5787, "step": 2892 }, { "epoch": 0.061356068800237536, "grad_norm": 0.3380405604839325, "learning_rate": 1.9956682185542434e-05, "loss": 0.4776, "step": 2893 }, { "epoch": 0.061377277258170555, "grad_norm": 0.34686219692230225, "learning_rate": 1.9956651172474452e-05, "loss": 0.5877, "step": 2894 }, { "epoch": 0.06139848571610358, "grad_norm": 0.34760186076164246, "learning_rate": 1.9956620148332765e-05, "loss": 0.5372, "step": 2895 }, { "epoch": 0.06141969417403661, "grad_norm": 0.35492485761642456, "learning_rate": 1.9956589113117416e-05, "loss": 0.5967, "step": 2896 }, { "epoch": 0.06144090263196963, "grad_norm": 0.37211230397224426, "learning_rate": 1.9956558066828433e-05, "loss": 0.5014, "step": 2897 }, { "epoch": 0.061462111089902655, "grad_norm": 0.31923583149909973, "learning_rate": 1.9956527009465852e-05, "loss": 0.6283, "step": 2898 }, { "epoch": 0.061483319547835674, "grad_norm": 0.3225003182888031, "learning_rate": 1.995649594102971e-05, "loss": 0.5801, "step": 2899 }, { "epoch": 0.0615045280057687, "grad_norm": 0.38069579005241394, "learning_rate": 1.995646486152004e-05, "loss": 0.6125, "step": 2900 }, { "epoch": 0.06152573646370173, "grad_norm": 0.3101454973220825, "learning_rate": 1.9956433770936874e-05, "loss": 0.5533, "step": 2901 }, { "epoch": 0.06154694492163475, "grad_norm": 0.3929891884326935, "learning_rate": 1.995640266928025e-05, "loss": 0.6336, "step": 2902 }, { "epoch": 0.061568153379567774, "grad_norm": 0.28555938601493835, "learning_rate": 1.99563715565502e-05, "loss": 0.5785, "step": 2903 }, { "epoch": 0.06158936183750079, "grad_norm": 0.3032074570655823, "learning_rate": 1.9956340432746765e-05, "loss": 0.5528, "step": 2904 }, { "epoch": 0.06161057029543382, "grad_norm": 0.31400397419929504, "learning_rate": 1.9956309297869966e-05, "loss": 0.5853, "step": 2905 }, { "epoch": 0.06163177875336684, "grad_norm": 0.3111499845981598, "learning_rate": 1.9956278151919856e-05, "loss": 0.5245, "step": 2906 }, { "epoch": 0.061652987211299866, "grad_norm": 0.3225533664226532, "learning_rate": 1.9956246994896453e-05, "loss": 0.6437, "step": 2907 }, { "epoch": 0.06167419566923289, "grad_norm": 0.4813494086265564, "learning_rate": 1.99562158267998e-05, "loss": 0.485, "step": 2908 }, { "epoch": 0.06169540412716591, "grad_norm": 0.34809625148773193, "learning_rate": 1.995618464762993e-05, "loss": 0.5516, "step": 2909 }, { "epoch": 0.06171661258509894, "grad_norm": 0.33171913027763367, "learning_rate": 1.995615345738688e-05, "loss": 0.5262, "step": 2910 }, { "epoch": 0.06173782104303196, "grad_norm": 0.36549893021583557, "learning_rate": 1.995612225607068e-05, "loss": 0.5242, "step": 2911 }, { "epoch": 0.061759029500964985, "grad_norm": 0.39337077736854553, "learning_rate": 1.9956091043681373e-05, "loss": 0.5032, "step": 2912 }, { "epoch": 0.06178023795889801, "grad_norm": 0.4012179970741272, "learning_rate": 1.9956059820218982e-05, "loss": 0.5443, "step": 2913 }, { "epoch": 0.06180144641683103, "grad_norm": 0.3504864573478699, "learning_rate": 1.995602858568355e-05, "loss": 0.5049, "step": 2914 }, { "epoch": 0.06182265487476406, "grad_norm": 0.32273972034454346, "learning_rate": 1.9955997340075107e-05, "loss": 0.5426, "step": 2915 }, { "epoch": 0.06184386333269708, "grad_norm": 0.33104538917541504, "learning_rate": 1.9955966083393697e-05, "loss": 0.5142, "step": 2916 }, { "epoch": 0.061865071790630104, "grad_norm": 0.32810041308403015, "learning_rate": 1.995593481563934e-05, "loss": 0.5169, "step": 2917 }, { "epoch": 0.06188628024856313, "grad_norm": 0.3440902531147003, "learning_rate": 1.9955903536812085e-05, "loss": 0.5147, "step": 2918 }, { "epoch": 0.06190748870649615, "grad_norm": 0.3670969307422638, "learning_rate": 1.9955872246911957e-05, "loss": 0.5885, "step": 2919 }, { "epoch": 0.061928697164429176, "grad_norm": 0.2946217358112335, "learning_rate": 1.9955840945938995e-05, "loss": 0.48, "step": 2920 }, { "epoch": 0.061949905622362196, "grad_norm": 0.31538933515548706, "learning_rate": 1.9955809633893234e-05, "loss": 0.6481, "step": 2921 }, { "epoch": 0.06197111408029522, "grad_norm": 0.31644144654273987, "learning_rate": 1.9955778310774707e-05, "loss": 0.4896, "step": 2922 }, { "epoch": 0.06199232253822824, "grad_norm": 0.2937915027141571, "learning_rate": 1.995574697658345e-05, "loss": 0.4672, "step": 2923 }, { "epoch": 0.06201353099616127, "grad_norm": 0.33064690232276917, "learning_rate": 1.9955715631319497e-05, "loss": 0.5267, "step": 2924 }, { "epoch": 0.062034739454094295, "grad_norm": 0.34208446741104126, "learning_rate": 1.9955684274982886e-05, "loss": 0.525, "step": 2925 }, { "epoch": 0.062055947912027315, "grad_norm": 0.7240343689918518, "learning_rate": 1.995565290757365e-05, "loss": 0.5798, "step": 2926 }, { "epoch": 0.06207715636996034, "grad_norm": 0.3416404128074646, "learning_rate": 1.995562152909182e-05, "loss": 0.5241, "step": 2927 }, { "epoch": 0.06209836482789336, "grad_norm": 0.4188418388366699, "learning_rate": 1.9955590139537435e-05, "loss": 0.5646, "step": 2928 }, { "epoch": 0.06211957328582639, "grad_norm": 0.34134146571159363, "learning_rate": 1.995555873891053e-05, "loss": 0.581, "step": 2929 }, { "epoch": 0.062140781743759414, "grad_norm": 0.34477609395980835, "learning_rate": 1.9955527327211138e-05, "loss": 0.5553, "step": 2930 }, { "epoch": 0.062161990201692434, "grad_norm": 0.3300216794013977, "learning_rate": 1.9955495904439296e-05, "loss": 0.4818, "step": 2931 }, { "epoch": 0.06218319865962546, "grad_norm": 0.32564109563827515, "learning_rate": 1.995546447059504e-05, "loss": 0.5474, "step": 2932 }, { "epoch": 0.06220440711755848, "grad_norm": 0.3980270326137543, "learning_rate": 1.99554330256784e-05, "loss": 0.555, "step": 2933 }, { "epoch": 0.06222561557549151, "grad_norm": 0.34132593870162964, "learning_rate": 1.995540156968941e-05, "loss": 0.6384, "step": 2934 }, { "epoch": 0.062246824033424526, "grad_norm": 0.3077263832092285, "learning_rate": 1.9955370102628114e-05, "loss": 0.5217, "step": 2935 }, { "epoch": 0.06226803249135755, "grad_norm": 0.34880930185317993, "learning_rate": 1.995533862449454e-05, "loss": 0.6216, "step": 2936 }, { "epoch": 0.06228924094929058, "grad_norm": 0.3230782747268677, "learning_rate": 1.9955307135288727e-05, "loss": 0.5963, "step": 2937 }, { "epoch": 0.0623104494072236, "grad_norm": 0.3178178369998932, "learning_rate": 1.995527563501071e-05, "loss": 0.5953, "step": 2938 }, { "epoch": 0.062331657865156626, "grad_norm": 0.31770429015159607, "learning_rate": 1.9955244123660517e-05, "loss": 0.5087, "step": 2939 }, { "epoch": 0.062352866323089645, "grad_norm": 0.3178301751613617, "learning_rate": 1.995521260123819e-05, "loss": 0.4925, "step": 2940 }, { "epoch": 0.06237407478102267, "grad_norm": 0.34101101756095886, "learning_rate": 1.9955181067743764e-05, "loss": 0.4998, "step": 2941 }, { "epoch": 0.0623952832389557, "grad_norm": 0.36331048607826233, "learning_rate": 1.9955149523177268e-05, "loss": 0.5998, "step": 2942 }, { "epoch": 0.06241649169688872, "grad_norm": 0.27978014945983887, "learning_rate": 1.995511796753874e-05, "loss": 0.4657, "step": 2943 }, { "epoch": 0.062437700154821744, "grad_norm": 0.3214883506298065, "learning_rate": 1.995508640082822e-05, "loss": 0.5334, "step": 2944 }, { "epoch": 0.062458908612754764, "grad_norm": 0.3184570372104645, "learning_rate": 1.9955054823045742e-05, "loss": 0.5623, "step": 2945 }, { "epoch": 0.06248011707068779, "grad_norm": 0.357322096824646, "learning_rate": 1.9955023234191336e-05, "loss": 0.6161, "step": 2946 }, { "epoch": 0.06250132552862081, "grad_norm": 0.3342704772949219, "learning_rate": 1.9954991634265037e-05, "loss": 0.5203, "step": 2947 }, { "epoch": 0.06252253398655384, "grad_norm": 0.3185162842273712, "learning_rate": 1.9954960023266886e-05, "loss": 0.5186, "step": 2948 }, { "epoch": 0.06254374244448686, "grad_norm": 0.3699676990509033, "learning_rate": 1.9954928401196912e-05, "loss": 0.5658, "step": 2949 }, { "epoch": 0.06256495090241988, "grad_norm": 0.32375767827033997, "learning_rate": 1.9954896768055155e-05, "loss": 0.5642, "step": 2950 }, { "epoch": 0.0625861593603529, "grad_norm": 0.34398043155670166, "learning_rate": 1.9954865123841647e-05, "loss": 0.5583, "step": 2951 }, { "epoch": 0.06260736781828594, "grad_norm": 0.310258686542511, "learning_rate": 1.995483346855643e-05, "loss": 0.5031, "step": 2952 }, { "epoch": 0.06262857627621896, "grad_norm": 0.4136074483394623, "learning_rate": 1.9954801802199527e-05, "loss": 0.4954, "step": 2953 }, { "epoch": 0.06264978473415198, "grad_norm": 0.36076244711875916, "learning_rate": 1.995477012477098e-05, "loss": 0.5979, "step": 2954 }, { "epoch": 0.06267099319208501, "grad_norm": 0.30878502130508423, "learning_rate": 1.9954738436270826e-05, "loss": 0.5624, "step": 2955 }, { "epoch": 0.06269220165001803, "grad_norm": 0.33941850066185, "learning_rate": 1.99547067366991e-05, "loss": 0.5003, "step": 2956 }, { "epoch": 0.06271341010795105, "grad_norm": 0.3244805335998535, "learning_rate": 1.9954675026055833e-05, "loss": 0.5569, "step": 2957 }, { "epoch": 0.06273461856588407, "grad_norm": 0.3489895761013031, "learning_rate": 1.9954643304341067e-05, "loss": 0.5236, "step": 2958 }, { "epoch": 0.0627558270238171, "grad_norm": 0.33377712965011597, "learning_rate": 1.9954611571554828e-05, "loss": 0.5497, "step": 2959 }, { "epoch": 0.06277703548175012, "grad_norm": 0.3508075773715973, "learning_rate": 1.995457982769716e-05, "loss": 0.6127, "step": 2960 }, { "epoch": 0.06279824393968314, "grad_norm": 0.3015231490135193, "learning_rate": 1.9954548072768094e-05, "loss": 0.5605, "step": 2961 }, { "epoch": 0.06281945239761617, "grad_norm": 0.419391393661499, "learning_rate": 1.9954516306767667e-05, "loss": 0.6462, "step": 2962 }, { "epoch": 0.0628406608555492, "grad_norm": 0.3225061893463135, "learning_rate": 1.995448452969591e-05, "loss": 0.5933, "step": 2963 }, { "epoch": 0.06286186931348221, "grad_norm": 0.305772989988327, "learning_rate": 1.9954452741552865e-05, "loss": 0.4932, "step": 2964 }, { "epoch": 0.06288307777141525, "grad_norm": 0.321000874042511, "learning_rate": 1.995442094233856e-05, "loss": 0.5867, "step": 2965 }, { "epoch": 0.06290428622934827, "grad_norm": 0.3827943205833435, "learning_rate": 1.995438913205304e-05, "loss": 0.4987, "step": 2966 }, { "epoch": 0.06292549468728129, "grad_norm": 0.2873689830303192, "learning_rate": 1.9954357310696332e-05, "loss": 0.4516, "step": 2967 }, { "epoch": 0.0629467031452143, "grad_norm": 0.5826525092124939, "learning_rate": 1.9954325478268478e-05, "loss": 0.5282, "step": 2968 }, { "epoch": 0.06296791160314734, "grad_norm": 0.30758944153785706, "learning_rate": 1.9954293634769503e-05, "loss": 0.4863, "step": 2969 }, { "epoch": 0.06298912006108036, "grad_norm": 0.30652403831481934, "learning_rate": 1.9954261780199452e-05, "loss": 0.5463, "step": 2970 }, { "epoch": 0.06301032851901338, "grad_norm": 0.6949805617332458, "learning_rate": 1.995422991455836e-05, "loss": 0.5418, "step": 2971 }, { "epoch": 0.06303153697694641, "grad_norm": 0.3293025493621826, "learning_rate": 1.9954198037846256e-05, "loss": 0.556, "step": 2972 }, { "epoch": 0.06305274543487943, "grad_norm": 0.3703502118587494, "learning_rate": 1.9954166150063182e-05, "loss": 0.5448, "step": 2973 }, { "epoch": 0.06307395389281245, "grad_norm": 0.341787189245224, "learning_rate": 1.995413425120917e-05, "loss": 0.4955, "step": 2974 }, { "epoch": 0.06309516235074547, "grad_norm": 0.3332841694355011, "learning_rate": 1.9954102341284257e-05, "loss": 0.5178, "step": 2975 }, { "epoch": 0.0631163708086785, "grad_norm": 0.3439767360687256, "learning_rate": 1.995407042028848e-05, "loss": 0.57, "step": 2976 }, { "epoch": 0.06313757926661152, "grad_norm": 0.3155915439128876, "learning_rate": 1.995403848822187e-05, "loss": 0.545, "step": 2977 }, { "epoch": 0.06315878772454454, "grad_norm": 0.3154694437980652, "learning_rate": 1.9954006545084464e-05, "loss": 0.4733, "step": 2978 }, { "epoch": 0.06317999618247758, "grad_norm": 0.35108688473701477, "learning_rate": 1.99539745908763e-05, "loss": 0.6111, "step": 2979 }, { "epoch": 0.0632012046404106, "grad_norm": 0.3026961386203766, "learning_rate": 1.995394262559741e-05, "loss": 0.4591, "step": 2980 }, { "epoch": 0.06322241309834362, "grad_norm": 0.33420538902282715, "learning_rate": 1.9953910649247833e-05, "loss": 0.4272, "step": 2981 }, { "epoch": 0.06324362155627665, "grad_norm": 0.32571014761924744, "learning_rate": 1.9953878661827603e-05, "loss": 0.5228, "step": 2982 }, { "epoch": 0.06326483001420967, "grad_norm": 0.29876527190208435, "learning_rate": 1.9953846663336757e-05, "loss": 0.4679, "step": 2983 }, { "epoch": 0.06328603847214269, "grad_norm": 0.3476393222808838, "learning_rate": 1.9953814653775326e-05, "loss": 0.5435, "step": 2984 }, { "epoch": 0.06330724693007571, "grad_norm": 0.3108503222465515, "learning_rate": 1.995378263314335e-05, "loss": 0.5495, "step": 2985 }, { "epoch": 0.06332845538800874, "grad_norm": 0.34354594349861145, "learning_rate": 1.9953750601440866e-05, "loss": 0.5674, "step": 2986 }, { "epoch": 0.06334966384594176, "grad_norm": 0.3225001394748688, "learning_rate": 1.99537185586679e-05, "loss": 0.5757, "step": 2987 }, { "epoch": 0.06337087230387478, "grad_norm": 0.4190840423107147, "learning_rate": 1.99536865048245e-05, "loss": 0.5909, "step": 2988 }, { "epoch": 0.06339208076180781, "grad_norm": 0.37231290340423584, "learning_rate": 1.9953654439910697e-05, "loss": 0.516, "step": 2989 }, { "epoch": 0.06341328921974083, "grad_norm": 0.31823551654815674, "learning_rate": 1.9953622363926525e-05, "loss": 0.4711, "step": 2990 }, { "epoch": 0.06343449767767385, "grad_norm": 0.3913917541503906, "learning_rate": 1.9953590276872022e-05, "loss": 0.5572, "step": 2991 }, { "epoch": 0.06345570613560687, "grad_norm": 0.31756386160850525, "learning_rate": 1.9953558178747218e-05, "loss": 0.6064, "step": 2992 }, { "epoch": 0.06347691459353991, "grad_norm": 0.3681689500808716, "learning_rate": 1.9953526069552155e-05, "loss": 0.5563, "step": 2993 }, { "epoch": 0.06349812305147293, "grad_norm": 0.36239659786224365, "learning_rate": 1.995349394928687e-05, "loss": 0.5232, "step": 2994 }, { "epoch": 0.06351933150940595, "grad_norm": 0.3562355041503906, "learning_rate": 1.995346181795139e-05, "loss": 0.4884, "step": 2995 }, { "epoch": 0.06354053996733898, "grad_norm": 0.34987157583236694, "learning_rate": 1.995342967554576e-05, "loss": 0.5466, "step": 2996 }, { "epoch": 0.063561748425272, "grad_norm": 0.31456834077835083, "learning_rate": 1.9953397522070014e-05, "loss": 0.5012, "step": 2997 }, { "epoch": 0.06358295688320502, "grad_norm": 0.3617520034313202, "learning_rate": 1.9953365357524183e-05, "loss": 0.536, "step": 2998 }, { "epoch": 0.06360416534113805, "grad_norm": 0.3388080894947052, "learning_rate": 1.9953333181908306e-05, "loss": 0.4821, "step": 2999 }, { "epoch": 0.06362537379907107, "grad_norm": 0.32820913195610046, "learning_rate": 1.995330099522242e-05, "loss": 0.5175, "step": 3000 }, { "epoch": 0.06364658225700409, "grad_norm": 0.3099275231361389, "learning_rate": 1.9953268797466554e-05, "loss": 0.4806, "step": 3001 }, { "epoch": 0.06366779071493711, "grad_norm": 0.7515498995780945, "learning_rate": 1.995323658864075e-05, "loss": 0.5562, "step": 3002 }, { "epoch": 0.06368899917287014, "grad_norm": 0.5195586681365967, "learning_rate": 1.9953204368745048e-05, "loss": 0.5564, "step": 3003 }, { "epoch": 0.06371020763080316, "grad_norm": 0.3439151346683502, "learning_rate": 1.9953172137779474e-05, "loss": 0.4999, "step": 3004 }, { "epoch": 0.06373141608873618, "grad_norm": 0.3084142208099365, "learning_rate": 1.9953139895744068e-05, "loss": 0.4595, "step": 3005 }, { "epoch": 0.06375262454666922, "grad_norm": 0.34244194626808167, "learning_rate": 1.995310764263887e-05, "loss": 0.4908, "step": 3006 }, { "epoch": 0.06377383300460224, "grad_norm": 0.36343148350715637, "learning_rate": 1.9953075378463908e-05, "loss": 0.5622, "step": 3007 }, { "epoch": 0.06379504146253526, "grad_norm": 0.4302687346935272, "learning_rate": 1.9953043103219226e-05, "loss": 0.5339, "step": 3008 }, { "epoch": 0.06381624992046828, "grad_norm": 0.3241651952266693, "learning_rate": 1.9953010816904854e-05, "loss": 0.5111, "step": 3009 }, { "epoch": 0.06383745837840131, "grad_norm": 0.41197821497917175, "learning_rate": 1.9952978519520828e-05, "loss": 0.5356, "step": 3010 }, { "epoch": 0.06385866683633433, "grad_norm": 0.36252567172050476, "learning_rate": 1.995294621106719e-05, "loss": 0.5836, "step": 3011 }, { "epoch": 0.06387987529426735, "grad_norm": 0.35238274931907654, "learning_rate": 1.995291389154397e-05, "loss": 0.5738, "step": 3012 }, { "epoch": 0.06390108375220038, "grad_norm": 0.2870694398880005, "learning_rate": 1.9952881560951203e-05, "loss": 0.4458, "step": 3013 }, { "epoch": 0.0639222922101334, "grad_norm": 0.3277500569820404, "learning_rate": 1.995284921928893e-05, "loss": 0.483, "step": 3014 }, { "epoch": 0.06394350066806642, "grad_norm": 0.3320966362953186, "learning_rate": 1.9952816866557182e-05, "loss": 0.4816, "step": 3015 }, { "epoch": 0.06396470912599946, "grad_norm": 0.34300318360328674, "learning_rate": 1.9952784502756e-05, "loss": 0.6113, "step": 3016 }, { "epoch": 0.06398591758393248, "grad_norm": 0.3077634274959564, "learning_rate": 1.995275212788542e-05, "loss": 0.5595, "step": 3017 }, { "epoch": 0.0640071260418655, "grad_norm": 0.3250662088394165, "learning_rate": 1.995271974194547e-05, "loss": 0.5435, "step": 3018 }, { "epoch": 0.06402833449979851, "grad_norm": 0.3465721905231476, "learning_rate": 1.9952687344936194e-05, "loss": 0.5709, "step": 3019 }, { "epoch": 0.06404954295773155, "grad_norm": 0.5878958106040955, "learning_rate": 1.9952654936857624e-05, "loss": 0.572, "step": 3020 }, { "epoch": 0.06407075141566457, "grad_norm": 0.28289148211479187, "learning_rate": 1.99526225177098e-05, "loss": 0.519, "step": 3021 }, { "epoch": 0.06409195987359759, "grad_norm": 0.3125613033771515, "learning_rate": 1.9952590087492756e-05, "loss": 0.6151, "step": 3022 }, { "epoch": 0.06411316833153062, "grad_norm": 0.32105013728141785, "learning_rate": 1.995255764620653e-05, "loss": 0.5354, "step": 3023 }, { "epoch": 0.06413437678946364, "grad_norm": 0.35269269347190857, "learning_rate": 1.995252519385115e-05, "loss": 0.6041, "step": 3024 }, { "epoch": 0.06415558524739666, "grad_norm": 0.3437778651714325, "learning_rate": 1.9952492730426662e-05, "loss": 0.5506, "step": 3025 }, { "epoch": 0.06417679370532968, "grad_norm": 0.5259015560150146, "learning_rate": 1.99524602559331e-05, "loss": 0.479, "step": 3026 }, { "epoch": 0.06419800216326271, "grad_norm": 0.2935953438282013, "learning_rate": 1.995242777037049e-05, "loss": 0.453, "step": 3027 }, { "epoch": 0.06421921062119573, "grad_norm": 0.32045426964759827, "learning_rate": 1.9952395273738882e-05, "loss": 0.5181, "step": 3028 }, { "epoch": 0.06424041907912875, "grad_norm": 0.3049894869327545, "learning_rate": 1.9952362766038303e-05, "loss": 0.5146, "step": 3029 }, { "epoch": 0.06426162753706179, "grad_norm": 0.3463037312030792, "learning_rate": 1.99523302472688e-05, "loss": 0.5519, "step": 3030 }, { "epoch": 0.0642828359949948, "grad_norm": 0.3560141921043396, "learning_rate": 1.9952297717430395e-05, "loss": 0.5349, "step": 3031 }, { "epoch": 0.06430404445292782, "grad_norm": 0.3379417359828949, "learning_rate": 1.9952265176523133e-05, "loss": 0.5478, "step": 3032 }, { "epoch": 0.06432525291086084, "grad_norm": 0.33208954334259033, "learning_rate": 1.9952232624547048e-05, "loss": 0.487, "step": 3033 }, { "epoch": 0.06434646136879388, "grad_norm": 0.3531070351600647, "learning_rate": 1.9952200061502176e-05, "loss": 0.5203, "step": 3034 }, { "epoch": 0.0643676698267269, "grad_norm": 0.3417522609233856, "learning_rate": 1.9952167487388553e-05, "loss": 0.5598, "step": 3035 }, { "epoch": 0.06438887828465992, "grad_norm": 0.35384896397590637, "learning_rate": 1.995213490220622e-05, "loss": 0.6476, "step": 3036 }, { "epoch": 0.06441008674259295, "grad_norm": 0.4198814630508423, "learning_rate": 1.9952102305955203e-05, "loss": 0.5294, "step": 3037 }, { "epoch": 0.06443129520052597, "grad_norm": 0.34602540731430054, "learning_rate": 1.9952069698635547e-05, "loss": 0.628, "step": 3038 }, { "epoch": 0.06445250365845899, "grad_norm": 0.32746338844299316, "learning_rate": 1.9952037080247287e-05, "loss": 0.5573, "step": 3039 }, { "epoch": 0.06447371211639202, "grad_norm": 0.3170110881328583, "learning_rate": 1.9952004450790456e-05, "loss": 0.5743, "step": 3040 }, { "epoch": 0.06449492057432504, "grad_norm": 0.32523781061172485, "learning_rate": 1.9951971810265092e-05, "loss": 0.6191, "step": 3041 }, { "epoch": 0.06451612903225806, "grad_norm": 0.2939624488353729, "learning_rate": 1.995193915867123e-05, "loss": 0.5349, "step": 3042 }, { "epoch": 0.06453733749019108, "grad_norm": 0.3185725212097168, "learning_rate": 1.9951906496008912e-05, "loss": 0.5879, "step": 3043 }, { "epoch": 0.06455854594812412, "grad_norm": 0.3437069058418274, "learning_rate": 1.9951873822278166e-05, "loss": 0.5386, "step": 3044 }, { "epoch": 0.06457975440605714, "grad_norm": 0.3139245808124542, "learning_rate": 1.9951841137479034e-05, "loss": 0.5364, "step": 3045 }, { "epoch": 0.06460096286399016, "grad_norm": 0.336847722530365, "learning_rate": 1.9951808441611553e-05, "loss": 0.5443, "step": 3046 }, { "epoch": 0.06462217132192319, "grad_norm": 0.4240720272064209, "learning_rate": 1.9951775734675754e-05, "loss": 0.4998, "step": 3047 }, { "epoch": 0.06464337977985621, "grad_norm": 0.3183218836784363, "learning_rate": 1.9951743016671677e-05, "loss": 0.6088, "step": 3048 }, { "epoch": 0.06466458823778923, "grad_norm": 0.3851470351219177, "learning_rate": 1.995171028759936e-05, "loss": 0.5638, "step": 3049 }, { "epoch": 0.06468579669572225, "grad_norm": 0.2937968373298645, "learning_rate": 1.9951677547458832e-05, "loss": 0.5368, "step": 3050 }, { "epoch": 0.06470700515365528, "grad_norm": 0.3327985405921936, "learning_rate": 1.995164479625014e-05, "loss": 0.5096, "step": 3051 }, { "epoch": 0.0647282136115883, "grad_norm": 0.31094086170196533, "learning_rate": 1.9951612033973312e-05, "loss": 0.5499, "step": 3052 }, { "epoch": 0.06474942206952132, "grad_norm": 0.31764936447143555, "learning_rate": 1.995157926062839e-05, "loss": 0.5013, "step": 3053 }, { "epoch": 0.06477063052745435, "grad_norm": 0.3489755690097809, "learning_rate": 1.9951546476215407e-05, "loss": 0.6093, "step": 3054 }, { "epoch": 0.06479183898538737, "grad_norm": 0.28957197070121765, "learning_rate": 1.99515136807344e-05, "loss": 0.4564, "step": 3055 }, { "epoch": 0.06481304744332039, "grad_norm": 0.36774513125419617, "learning_rate": 1.9951480874185404e-05, "loss": 0.5402, "step": 3056 }, { "epoch": 0.06483425590125343, "grad_norm": 0.35510867834091187, "learning_rate": 1.995144805656846e-05, "loss": 0.6013, "step": 3057 }, { "epoch": 0.06485546435918645, "grad_norm": 0.30457061529159546, "learning_rate": 1.99514152278836e-05, "loss": 0.5562, "step": 3058 }, { "epoch": 0.06487667281711947, "grad_norm": 0.34825190901756287, "learning_rate": 1.9951382388130867e-05, "loss": 0.5949, "step": 3059 }, { "epoch": 0.06489788127505249, "grad_norm": 0.4352380335330963, "learning_rate": 1.995134953731029e-05, "loss": 0.519, "step": 3060 }, { "epoch": 0.06491908973298552, "grad_norm": 0.34269508719444275, "learning_rate": 1.9951316675421907e-05, "loss": 0.5167, "step": 3061 }, { "epoch": 0.06494029819091854, "grad_norm": 0.3351514935493469, "learning_rate": 1.995128380246576e-05, "loss": 0.5406, "step": 3062 }, { "epoch": 0.06496150664885156, "grad_norm": 0.3138357996940613, "learning_rate": 1.9951250918441877e-05, "loss": 0.4685, "step": 3063 }, { "epoch": 0.06498271510678459, "grad_norm": 0.35230013728141785, "learning_rate": 1.9951218023350302e-05, "loss": 0.5518, "step": 3064 }, { "epoch": 0.06500392356471761, "grad_norm": 0.3499331474304199, "learning_rate": 1.9951185117191066e-05, "loss": 0.5635, "step": 3065 }, { "epoch": 0.06502513202265063, "grad_norm": 0.5108399987220764, "learning_rate": 1.9951152199964213e-05, "loss": 0.5936, "step": 3066 }, { "epoch": 0.06504634048058365, "grad_norm": 0.38572677969932556, "learning_rate": 1.9951119271669773e-05, "loss": 0.4926, "step": 3067 }, { "epoch": 0.06506754893851668, "grad_norm": 0.3169403076171875, "learning_rate": 1.9951086332307784e-05, "loss": 0.4688, "step": 3068 }, { "epoch": 0.0650887573964497, "grad_norm": 0.3451535403728485, "learning_rate": 1.9951053381878283e-05, "loss": 0.6141, "step": 3069 }, { "epoch": 0.06510996585438272, "grad_norm": 0.35174018144607544, "learning_rate": 1.9951020420381307e-05, "loss": 0.5873, "step": 3070 }, { "epoch": 0.06513117431231576, "grad_norm": 0.40827131271362305, "learning_rate": 1.995098744781689e-05, "loss": 0.5324, "step": 3071 }, { "epoch": 0.06515238277024878, "grad_norm": 0.32818472385406494, "learning_rate": 1.9950954464185073e-05, "loss": 0.5692, "step": 3072 }, { "epoch": 0.0651735912281818, "grad_norm": 0.3120453357696533, "learning_rate": 1.9950921469485893e-05, "loss": 0.5223, "step": 3073 }, { "epoch": 0.06519479968611483, "grad_norm": 0.307580828666687, "learning_rate": 1.9950888463719384e-05, "loss": 0.5246, "step": 3074 }, { "epoch": 0.06521600814404785, "grad_norm": 0.3886992037296295, "learning_rate": 1.9950855446885583e-05, "loss": 0.5676, "step": 3075 }, { "epoch": 0.06523721660198087, "grad_norm": 0.3422949016094208, "learning_rate": 1.9950822418984524e-05, "loss": 0.5787, "step": 3076 }, { "epoch": 0.06525842505991389, "grad_norm": 0.32291796803474426, "learning_rate": 1.995078938001625e-05, "loss": 0.5579, "step": 3077 }, { "epoch": 0.06527963351784692, "grad_norm": 0.3236961364746094, "learning_rate": 1.9950756329980795e-05, "loss": 0.4765, "step": 3078 }, { "epoch": 0.06530084197577994, "grad_norm": 0.32965537905693054, "learning_rate": 1.9950723268878193e-05, "loss": 0.5905, "step": 3079 }, { "epoch": 0.06532205043371296, "grad_norm": 0.34408968687057495, "learning_rate": 1.9950690196708485e-05, "loss": 0.5306, "step": 3080 }, { "epoch": 0.065343258891646, "grad_norm": 0.3626607358455658, "learning_rate": 1.9950657113471706e-05, "loss": 0.5717, "step": 3081 }, { "epoch": 0.06536446734957901, "grad_norm": 0.42465728521347046, "learning_rate": 1.995062401916789e-05, "loss": 0.593, "step": 3082 }, { "epoch": 0.06538567580751203, "grad_norm": 0.3244973123073578, "learning_rate": 1.9950590913797078e-05, "loss": 0.5214, "step": 3083 }, { "epoch": 0.06540688426544505, "grad_norm": 0.369511216878891, "learning_rate": 1.9950557797359304e-05, "loss": 0.5312, "step": 3084 }, { "epoch": 0.06542809272337809, "grad_norm": 0.3037993609905243, "learning_rate": 1.995052466985461e-05, "loss": 0.5518, "step": 3085 }, { "epoch": 0.0654493011813111, "grad_norm": 0.3359954059123993, "learning_rate": 1.9950491531283026e-05, "loss": 0.5438, "step": 3086 }, { "epoch": 0.06547050963924413, "grad_norm": 0.31368857622146606, "learning_rate": 1.995045838164459e-05, "loss": 0.5654, "step": 3087 }, { "epoch": 0.06549171809717716, "grad_norm": 0.3611810803413391, "learning_rate": 1.9950425220939342e-05, "loss": 0.493, "step": 3088 }, { "epoch": 0.06551292655511018, "grad_norm": 0.3220893442630768, "learning_rate": 1.995039204916732e-05, "loss": 0.6027, "step": 3089 }, { "epoch": 0.0655341350130432, "grad_norm": 0.3677935302257538, "learning_rate": 1.9950358866328554e-05, "loss": 0.4336, "step": 3090 }, { "epoch": 0.06555534347097622, "grad_norm": 0.31349098682403564, "learning_rate": 1.995032567242309e-05, "loss": 0.5584, "step": 3091 }, { "epoch": 0.06557655192890925, "grad_norm": 0.3382042348384857, "learning_rate": 1.9950292467450957e-05, "loss": 0.5807, "step": 3092 }, { "epoch": 0.06559776038684227, "grad_norm": 0.31864455342292786, "learning_rate": 1.9950259251412195e-05, "loss": 0.5492, "step": 3093 }, { "epoch": 0.06561896884477529, "grad_norm": 0.3294074535369873, "learning_rate": 1.9950226024306842e-05, "loss": 0.5568, "step": 3094 }, { "epoch": 0.06564017730270832, "grad_norm": 0.31073006987571716, "learning_rate": 1.9950192786134933e-05, "loss": 0.5053, "step": 3095 }, { "epoch": 0.06566138576064134, "grad_norm": 0.3541753888130188, "learning_rate": 1.9950159536896508e-05, "loss": 0.5625, "step": 3096 }, { "epoch": 0.06568259421857436, "grad_norm": 0.36408933997154236, "learning_rate": 1.9950126276591604e-05, "loss": 0.5749, "step": 3097 }, { "epoch": 0.0657038026765074, "grad_norm": 0.37209761142730713, "learning_rate": 1.9950093005220252e-05, "loss": 0.5623, "step": 3098 }, { "epoch": 0.06572501113444042, "grad_norm": 0.38674214482307434, "learning_rate": 1.9950059722782492e-05, "loss": 0.5663, "step": 3099 }, { "epoch": 0.06574621959237344, "grad_norm": 0.299698144197464, "learning_rate": 1.9950026429278366e-05, "loss": 0.4605, "step": 3100 }, { "epoch": 0.06576742805030646, "grad_norm": 0.344574898481369, "learning_rate": 1.9949993124707903e-05, "loss": 0.5692, "step": 3101 }, { "epoch": 0.06578863650823949, "grad_norm": 0.31998661160469055, "learning_rate": 1.9949959809071148e-05, "loss": 0.5383, "step": 3102 }, { "epoch": 0.06580984496617251, "grad_norm": 0.32195335626602173, "learning_rate": 1.994992648236813e-05, "loss": 0.5297, "step": 3103 }, { "epoch": 0.06583105342410553, "grad_norm": 0.3315741717815399, "learning_rate": 1.9949893144598896e-05, "loss": 0.5511, "step": 3104 }, { "epoch": 0.06585226188203856, "grad_norm": 0.34506192803382874, "learning_rate": 1.9949859795763474e-05, "loss": 0.5126, "step": 3105 }, { "epoch": 0.06587347033997158, "grad_norm": 0.32084769010543823, "learning_rate": 1.9949826435861905e-05, "loss": 0.6199, "step": 3106 }, { "epoch": 0.0658946787979046, "grad_norm": 0.3350769281387329, "learning_rate": 1.9949793064894224e-05, "loss": 0.5005, "step": 3107 }, { "epoch": 0.06591588725583762, "grad_norm": 0.30981746315956116, "learning_rate": 1.994975968286047e-05, "loss": 0.4521, "step": 3108 }, { "epoch": 0.06593709571377065, "grad_norm": 0.9964089393615723, "learning_rate": 1.9949726289760682e-05, "loss": 0.6309, "step": 3109 }, { "epoch": 0.06595830417170367, "grad_norm": 0.30722421407699585, "learning_rate": 1.9949692885594893e-05, "loss": 0.4896, "step": 3110 }, { "epoch": 0.0659795126296367, "grad_norm": 0.3642430603504181, "learning_rate": 1.994965947036314e-05, "loss": 0.4898, "step": 3111 }, { "epoch": 0.06600072108756973, "grad_norm": 0.2983950078487396, "learning_rate": 1.9949626044065463e-05, "loss": 0.5409, "step": 3112 }, { "epoch": 0.06602192954550275, "grad_norm": 0.3525468111038208, "learning_rate": 1.99495926067019e-05, "loss": 0.59, "step": 3113 }, { "epoch": 0.06604313800343577, "grad_norm": 0.31437328457832336, "learning_rate": 1.9949559158272488e-05, "loss": 0.5092, "step": 3114 }, { "epoch": 0.0660643464613688, "grad_norm": 0.29674646258354187, "learning_rate": 1.9949525698777263e-05, "loss": 0.5573, "step": 3115 }, { "epoch": 0.06608555491930182, "grad_norm": 0.3273182511329651, "learning_rate": 1.9949492228216257e-05, "loss": 0.547, "step": 3116 }, { "epoch": 0.06610676337723484, "grad_norm": 0.3125087320804596, "learning_rate": 1.9949458746589515e-05, "loss": 0.5302, "step": 3117 }, { "epoch": 0.06612797183516786, "grad_norm": 0.31859344244003296, "learning_rate": 1.994942525389707e-05, "loss": 0.6082, "step": 3118 }, { "epoch": 0.06614918029310089, "grad_norm": 0.35193854570388794, "learning_rate": 1.9949391750138962e-05, "loss": 0.5045, "step": 3119 }, { "epoch": 0.06617038875103391, "grad_norm": 0.2961507737636566, "learning_rate": 1.9949358235315227e-05, "loss": 0.5028, "step": 3120 }, { "epoch": 0.06619159720896693, "grad_norm": 0.348482221364975, "learning_rate": 1.9949324709425904e-05, "loss": 0.5762, "step": 3121 }, { "epoch": 0.06621280566689997, "grad_norm": 0.3534942865371704, "learning_rate": 1.9949291172471023e-05, "loss": 0.5676, "step": 3122 }, { "epoch": 0.06623401412483299, "grad_norm": 0.3453405797481537, "learning_rate": 1.994925762445063e-05, "loss": 0.5257, "step": 3123 }, { "epoch": 0.066255222582766, "grad_norm": 0.30376356840133667, "learning_rate": 1.994922406536476e-05, "loss": 0.5737, "step": 3124 }, { "epoch": 0.06627643104069902, "grad_norm": 0.35423749685287476, "learning_rate": 1.994919049521345e-05, "loss": 0.6321, "step": 3125 }, { "epoch": 0.06629763949863206, "grad_norm": 0.3553937077522278, "learning_rate": 1.9949156913996738e-05, "loss": 0.5637, "step": 3126 }, { "epoch": 0.06631884795656508, "grad_norm": 0.29747363924980164, "learning_rate": 1.9949123321714655e-05, "loss": 0.4864, "step": 3127 }, { "epoch": 0.0663400564144981, "grad_norm": 0.294191837310791, "learning_rate": 1.994908971836725e-05, "loss": 0.5253, "step": 3128 }, { "epoch": 0.06636126487243113, "grad_norm": 0.33743777871131897, "learning_rate": 1.9949056103954546e-05, "loss": 0.5589, "step": 3129 }, { "epoch": 0.06638247333036415, "grad_norm": 0.3968440592288971, "learning_rate": 1.9949022478476594e-05, "loss": 0.463, "step": 3130 }, { "epoch": 0.06640368178829717, "grad_norm": 0.3136105537414551, "learning_rate": 1.9948988841933425e-05, "loss": 0.5133, "step": 3131 }, { "epoch": 0.0664248902462302, "grad_norm": 0.33039963245391846, "learning_rate": 1.9948955194325074e-05, "loss": 0.5474, "step": 3132 }, { "epoch": 0.06644609870416322, "grad_norm": 0.3498435914516449, "learning_rate": 1.9948921535651583e-05, "loss": 0.5511, "step": 3133 }, { "epoch": 0.06646730716209624, "grad_norm": 0.2880587875843048, "learning_rate": 1.994888786591299e-05, "loss": 0.5011, "step": 3134 }, { "epoch": 0.06648851562002926, "grad_norm": 0.32698485255241394, "learning_rate": 1.994885418510933e-05, "loss": 0.4557, "step": 3135 }, { "epoch": 0.0665097240779623, "grad_norm": 0.3297328054904938, "learning_rate": 1.9948820493240637e-05, "loss": 0.5253, "step": 3136 }, { "epoch": 0.06653093253589532, "grad_norm": 0.333943635225296, "learning_rate": 1.9948786790306954e-05, "loss": 0.5605, "step": 3137 }, { "epoch": 0.06655214099382833, "grad_norm": 0.32261738181114197, "learning_rate": 1.994875307630832e-05, "loss": 0.5654, "step": 3138 }, { "epoch": 0.06657334945176137, "grad_norm": 0.35600319504737854, "learning_rate": 1.9948719351244766e-05, "loss": 0.5477, "step": 3139 }, { "epoch": 0.06659455790969439, "grad_norm": 0.48160919547080994, "learning_rate": 1.9948685615116332e-05, "loss": 0.5109, "step": 3140 }, { "epoch": 0.06661576636762741, "grad_norm": 0.32451897859573364, "learning_rate": 1.994865186792306e-05, "loss": 0.4915, "step": 3141 }, { "epoch": 0.06663697482556043, "grad_norm": 0.5625641942024231, "learning_rate": 1.994861810966498e-05, "loss": 0.5107, "step": 3142 }, { "epoch": 0.06665818328349346, "grad_norm": 0.3354037404060364, "learning_rate": 1.9948584340342135e-05, "loss": 0.5847, "step": 3143 }, { "epoch": 0.06667939174142648, "grad_norm": 0.48575130105018616, "learning_rate": 1.9948550559954563e-05, "loss": 0.559, "step": 3144 }, { "epoch": 0.0667006001993595, "grad_norm": 0.32462334632873535, "learning_rate": 1.9948516768502294e-05, "loss": 0.5668, "step": 3145 }, { "epoch": 0.06672180865729253, "grad_norm": 0.3574446141719818, "learning_rate": 1.9948482965985377e-05, "loss": 0.6393, "step": 3146 }, { "epoch": 0.06674301711522555, "grad_norm": 0.3362714648246765, "learning_rate": 1.994844915240384e-05, "loss": 0.5695, "step": 3147 }, { "epoch": 0.06676422557315857, "grad_norm": 0.5497178435325623, "learning_rate": 1.9948415327757727e-05, "loss": 0.562, "step": 3148 }, { "epoch": 0.0667854340310916, "grad_norm": 0.3499455451965332, "learning_rate": 1.9948381492047072e-05, "loss": 0.4752, "step": 3149 }, { "epoch": 0.06680664248902463, "grad_norm": 0.3122965693473816, "learning_rate": 1.994834764527191e-05, "loss": 0.5365, "step": 3150 }, { "epoch": 0.06682785094695765, "grad_norm": 0.3187670111656189, "learning_rate": 1.9948313787432286e-05, "loss": 0.5415, "step": 3151 }, { "epoch": 0.06684905940489067, "grad_norm": 0.31115350127220154, "learning_rate": 1.9948279918528233e-05, "loss": 0.5297, "step": 3152 }, { "epoch": 0.0668702678628237, "grad_norm": 0.3260204792022705, "learning_rate": 1.9948246038559788e-05, "loss": 0.5385, "step": 3153 }, { "epoch": 0.06689147632075672, "grad_norm": 0.3189789354801178, "learning_rate": 1.9948212147526993e-05, "loss": 0.5936, "step": 3154 }, { "epoch": 0.06691268477868974, "grad_norm": 0.3447343111038208, "learning_rate": 1.994817824542988e-05, "loss": 0.5987, "step": 3155 }, { "epoch": 0.06693389323662277, "grad_norm": 0.35317665338516235, "learning_rate": 1.994814433226849e-05, "loss": 0.5979, "step": 3156 }, { "epoch": 0.06695510169455579, "grad_norm": 0.3180806636810303, "learning_rate": 1.9948110408042863e-05, "loss": 0.5347, "step": 3157 }, { "epoch": 0.06697631015248881, "grad_norm": 0.33065715432167053, "learning_rate": 1.9948076472753032e-05, "loss": 0.5406, "step": 3158 }, { "epoch": 0.06699751861042183, "grad_norm": 0.3218424618244171, "learning_rate": 1.9948042526399038e-05, "loss": 0.5776, "step": 3159 }, { "epoch": 0.06701872706835486, "grad_norm": 0.29404979944229126, "learning_rate": 1.9948008568980916e-05, "loss": 0.5055, "step": 3160 }, { "epoch": 0.06703993552628788, "grad_norm": 0.3327849507331848, "learning_rate": 1.9947974600498704e-05, "loss": 0.5713, "step": 3161 }, { "epoch": 0.0670611439842209, "grad_norm": 0.37461209297180176, "learning_rate": 1.9947940620952442e-05, "loss": 0.5664, "step": 3162 }, { "epoch": 0.06708235244215394, "grad_norm": 0.32930076122283936, "learning_rate": 1.9947906630342172e-05, "loss": 0.5695, "step": 3163 }, { "epoch": 0.06710356090008696, "grad_norm": 0.3954690098762512, "learning_rate": 1.994787262866792e-05, "loss": 0.5235, "step": 3164 }, { "epoch": 0.06712476935801998, "grad_norm": 0.4127572476863861, "learning_rate": 1.9947838615929735e-05, "loss": 0.4915, "step": 3165 }, { "epoch": 0.067145977815953, "grad_norm": 0.30911341309547424, "learning_rate": 1.9947804592127648e-05, "loss": 0.5616, "step": 3166 }, { "epoch": 0.06716718627388603, "grad_norm": 0.31165197491645813, "learning_rate": 1.99477705572617e-05, "loss": 0.5512, "step": 3167 }, { "epoch": 0.06718839473181905, "grad_norm": 0.32786110043525696, "learning_rate": 1.9947736511331924e-05, "loss": 0.5367, "step": 3168 }, { "epoch": 0.06720960318975207, "grad_norm": 0.3814947307109833, "learning_rate": 1.9947702454338367e-05, "loss": 0.5812, "step": 3169 }, { "epoch": 0.0672308116476851, "grad_norm": 0.3122961223125458, "learning_rate": 1.994766838628106e-05, "loss": 0.5316, "step": 3170 }, { "epoch": 0.06725202010561812, "grad_norm": 0.32438087463378906, "learning_rate": 1.994763430716004e-05, "loss": 0.5058, "step": 3171 }, { "epoch": 0.06727322856355114, "grad_norm": 0.3351493775844574, "learning_rate": 1.9947600216975355e-05, "loss": 0.5753, "step": 3172 }, { "epoch": 0.06729443702148417, "grad_norm": 0.34274497628211975, "learning_rate": 1.9947566115727027e-05, "loss": 0.4396, "step": 3173 }, { "epoch": 0.0673156454794172, "grad_norm": 0.534324586391449, "learning_rate": 1.9947532003415107e-05, "loss": 0.5872, "step": 3174 }, { "epoch": 0.06733685393735021, "grad_norm": 0.33299529552459717, "learning_rate": 1.994749788003963e-05, "loss": 0.622, "step": 3175 }, { "epoch": 0.06735806239528323, "grad_norm": 0.34266430139541626, "learning_rate": 1.9947463745600627e-05, "loss": 0.5533, "step": 3176 }, { "epoch": 0.06737927085321627, "grad_norm": 0.3021200895309448, "learning_rate": 1.9947429600098144e-05, "loss": 0.5012, "step": 3177 }, { "epoch": 0.06740047931114929, "grad_norm": 0.3331015408039093, "learning_rate": 1.9947395443532216e-05, "loss": 0.572, "step": 3178 }, { "epoch": 0.0674216877690823, "grad_norm": 0.324989378452301, "learning_rate": 1.994736127590288e-05, "loss": 0.4951, "step": 3179 }, { "epoch": 0.06744289622701534, "grad_norm": 0.3330480456352234, "learning_rate": 1.994732709721018e-05, "loss": 0.5457, "step": 3180 }, { "epoch": 0.06746410468494836, "grad_norm": 0.34513407945632935, "learning_rate": 1.9947292907454145e-05, "loss": 0.4845, "step": 3181 }, { "epoch": 0.06748531314288138, "grad_norm": 0.3236338794231415, "learning_rate": 1.9947258706634817e-05, "loss": 0.5637, "step": 3182 }, { "epoch": 0.0675065216008144, "grad_norm": 0.3287273943424225, "learning_rate": 1.9947224494752236e-05, "loss": 0.541, "step": 3183 }, { "epoch": 0.06752773005874743, "grad_norm": 0.3631773293018341, "learning_rate": 1.9947190271806436e-05, "loss": 0.5966, "step": 3184 }, { "epoch": 0.06754893851668045, "grad_norm": 0.32427576184272766, "learning_rate": 1.994715603779746e-05, "loss": 0.5529, "step": 3185 }, { "epoch": 0.06757014697461347, "grad_norm": 0.3282376229763031, "learning_rate": 1.994712179272534e-05, "loss": 0.5886, "step": 3186 }, { "epoch": 0.0675913554325465, "grad_norm": 0.33902737498283386, "learning_rate": 1.9947087536590122e-05, "loss": 0.5387, "step": 3187 }, { "epoch": 0.06761256389047952, "grad_norm": 0.36913520097732544, "learning_rate": 1.9947053269391835e-05, "loss": 0.5838, "step": 3188 }, { "epoch": 0.06763377234841254, "grad_norm": 0.36112773418426514, "learning_rate": 1.9947018991130528e-05, "loss": 0.4888, "step": 3189 }, { "epoch": 0.06765498080634558, "grad_norm": 0.3181290626525879, "learning_rate": 1.9946984701806228e-05, "loss": 0.4952, "step": 3190 }, { "epoch": 0.0676761892642786, "grad_norm": 0.34161025285720825, "learning_rate": 1.9946950401418977e-05, "loss": 0.535, "step": 3191 }, { "epoch": 0.06769739772221162, "grad_norm": 0.28512775897979736, "learning_rate": 1.9946916089968818e-05, "loss": 0.478, "step": 3192 }, { "epoch": 0.06771860618014464, "grad_norm": 0.3811487555503845, "learning_rate": 1.9946881767455783e-05, "loss": 0.4672, "step": 3193 }, { "epoch": 0.06773981463807767, "grad_norm": 0.4862750172615051, "learning_rate": 1.9946847433879914e-05, "loss": 0.6073, "step": 3194 }, { "epoch": 0.06776102309601069, "grad_norm": 0.32028743624687195, "learning_rate": 1.9946813089241246e-05, "loss": 0.5234, "step": 3195 }, { "epoch": 0.06778223155394371, "grad_norm": 0.3352445065975189, "learning_rate": 1.994677873353982e-05, "loss": 0.5363, "step": 3196 }, { "epoch": 0.06780344001187674, "grad_norm": 0.3401595652103424, "learning_rate": 1.9946744366775675e-05, "loss": 0.5543, "step": 3197 }, { "epoch": 0.06782464846980976, "grad_norm": 0.31147950887680054, "learning_rate": 1.9946709988948843e-05, "loss": 0.5357, "step": 3198 }, { "epoch": 0.06784585692774278, "grad_norm": 0.37399187684059143, "learning_rate": 1.994667560005937e-05, "loss": 0.5208, "step": 3199 }, { "epoch": 0.0678670653856758, "grad_norm": 0.36518460512161255, "learning_rate": 1.9946641200107287e-05, "loss": 0.6608, "step": 3200 }, { "epoch": 0.06788827384360883, "grad_norm": 0.3187735080718994, "learning_rate": 1.994660678909264e-05, "loss": 0.6516, "step": 3201 }, { "epoch": 0.06790948230154185, "grad_norm": 0.5425991415977478, "learning_rate": 1.9946572367015466e-05, "loss": 0.5765, "step": 3202 }, { "epoch": 0.06793069075947487, "grad_norm": 0.30314719676971436, "learning_rate": 1.9946537933875794e-05, "loss": 0.4793, "step": 3203 }, { "epoch": 0.06795189921740791, "grad_norm": 0.34885871410369873, "learning_rate": 1.994650348967367e-05, "loss": 0.5126, "step": 3204 }, { "epoch": 0.06797310767534093, "grad_norm": 0.5101318359375, "learning_rate": 1.9946469034409136e-05, "loss": 0.5104, "step": 3205 }, { "epoch": 0.06799431613327395, "grad_norm": 0.3479830324649811, "learning_rate": 1.994643456808222e-05, "loss": 0.5566, "step": 3206 }, { "epoch": 0.06801552459120698, "grad_norm": 0.32876822352409363, "learning_rate": 1.994640009069297e-05, "loss": 0.6327, "step": 3207 }, { "epoch": 0.06803673304914, "grad_norm": 0.3456188440322876, "learning_rate": 1.994636560224142e-05, "loss": 0.5923, "step": 3208 }, { "epoch": 0.06805794150707302, "grad_norm": 0.35123109817504883, "learning_rate": 1.9946331102727604e-05, "loss": 0.4634, "step": 3209 }, { "epoch": 0.06807914996500604, "grad_norm": 0.3310061991214752, "learning_rate": 1.994629659215157e-05, "loss": 0.5194, "step": 3210 }, { "epoch": 0.06810035842293907, "grad_norm": 0.3089287281036377, "learning_rate": 1.9946262070513347e-05, "loss": 0.5623, "step": 3211 }, { "epoch": 0.06812156688087209, "grad_norm": 0.3397464454174042, "learning_rate": 1.994622753781298e-05, "loss": 0.4746, "step": 3212 }, { "epoch": 0.06814277533880511, "grad_norm": 0.37082114815711975, "learning_rate": 1.9946192994050505e-05, "loss": 0.6747, "step": 3213 }, { "epoch": 0.06816398379673815, "grad_norm": 0.37616050243377686, "learning_rate": 1.994615843922596e-05, "loss": 0.6152, "step": 3214 }, { "epoch": 0.06818519225467116, "grad_norm": 0.3376854658126831, "learning_rate": 1.9946123873339387e-05, "loss": 0.5483, "step": 3215 }, { "epoch": 0.06820640071260418, "grad_norm": 0.32481175661087036, "learning_rate": 1.994608929639082e-05, "loss": 0.4918, "step": 3216 }, { "epoch": 0.0682276091705372, "grad_norm": 0.7087582349777222, "learning_rate": 1.9946054708380297e-05, "loss": 0.6259, "step": 3217 }, { "epoch": 0.06824881762847024, "grad_norm": 0.31898486614227295, "learning_rate": 1.994602010930786e-05, "loss": 0.53, "step": 3218 }, { "epoch": 0.06827002608640326, "grad_norm": 0.3089039623737335, "learning_rate": 1.9945985499173542e-05, "loss": 0.5013, "step": 3219 }, { "epoch": 0.06829123454433628, "grad_norm": 0.28018179535865784, "learning_rate": 1.994595087797739e-05, "loss": 0.499, "step": 3220 }, { "epoch": 0.06831244300226931, "grad_norm": 0.29533907771110535, "learning_rate": 1.994591624571944e-05, "loss": 0.5419, "step": 3221 }, { "epoch": 0.06833365146020233, "grad_norm": 0.32231056690216064, "learning_rate": 1.9945881602399725e-05, "loss": 0.5285, "step": 3222 }, { "epoch": 0.06835485991813535, "grad_norm": 0.2968407869338989, "learning_rate": 1.9945846948018283e-05, "loss": 0.5021, "step": 3223 }, { "epoch": 0.06837606837606838, "grad_norm": 0.3252612352371216, "learning_rate": 1.9945812282575162e-05, "loss": 0.5548, "step": 3224 }, { "epoch": 0.0683972768340014, "grad_norm": 0.32936304807662964, "learning_rate": 1.9945777606070392e-05, "loss": 0.5669, "step": 3225 }, { "epoch": 0.06841848529193442, "grad_norm": 0.3188678026199341, "learning_rate": 1.9945742918504018e-05, "loss": 0.5618, "step": 3226 }, { "epoch": 0.06843969374986744, "grad_norm": 0.3352755904197693, "learning_rate": 1.994570821987607e-05, "loss": 0.5783, "step": 3227 }, { "epoch": 0.06846090220780048, "grad_norm": 0.8670774698257446, "learning_rate": 1.9945673510186596e-05, "loss": 0.5347, "step": 3228 }, { "epoch": 0.0684821106657335, "grad_norm": 0.37750768661499023, "learning_rate": 1.994563878943563e-05, "loss": 0.5012, "step": 3229 }, { "epoch": 0.06850331912366651, "grad_norm": 0.3545053005218506, "learning_rate": 1.9945604057623206e-05, "loss": 0.5762, "step": 3230 }, { "epoch": 0.06852452758159955, "grad_norm": 0.35399413108825684, "learning_rate": 1.9945569314749372e-05, "loss": 0.5818, "step": 3231 }, { "epoch": 0.06854573603953257, "grad_norm": 0.36655405163764954, "learning_rate": 1.9945534560814162e-05, "loss": 0.601, "step": 3232 }, { "epoch": 0.06856694449746559, "grad_norm": 0.3262752592563629, "learning_rate": 1.9945499795817613e-05, "loss": 0.5457, "step": 3233 }, { "epoch": 0.0685881529553986, "grad_norm": 0.35224974155426025, "learning_rate": 1.994546501975977e-05, "loss": 0.5262, "step": 3234 }, { "epoch": 0.06860936141333164, "grad_norm": 0.38598182797431946, "learning_rate": 1.994543023264066e-05, "loss": 0.5938, "step": 3235 }, { "epoch": 0.06863056987126466, "grad_norm": 0.36430296301841736, "learning_rate": 1.9945395434460336e-05, "loss": 0.654, "step": 3236 }, { "epoch": 0.06865177832919768, "grad_norm": 0.5214024186134338, "learning_rate": 1.9945360625218824e-05, "loss": 0.5046, "step": 3237 }, { "epoch": 0.06867298678713071, "grad_norm": 0.3426717519760132, "learning_rate": 1.994532580491617e-05, "loss": 0.6165, "step": 3238 }, { "epoch": 0.06869419524506373, "grad_norm": 0.34658095240592957, "learning_rate": 1.9945290973552415e-05, "loss": 0.5493, "step": 3239 }, { "epoch": 0.06871540370299675, "grad_norm": 0.3740401566028595, "learning_rate": 1.994525613112759e-05, "loss": 0.6123, "step": 3240 }, { "epoch": 0.06873661216092977, "grad_norm": 0.30718737840652466, "learning_rate": 1.994522127764174e-05, "loss": 0.4554, "step": 3241 }, { "epoch": 0.0687578206188628, "grad_norm": 0.34882211685180664, "learning_rate": 1.99451864130949e-05, "loss": 0.5304, "step": 3242 }, { "epoch": 0.06877902907679583, "grad_norm": 0.31080666184425354, "learning_rate": 1.9945151537487107e-05, "loss": 0.5399, "step": 3243 }, { "epoch": 0.06880023753472884, "grad_norm": 0.29617881774902344, "learning_rate": 1.9945116650818406e-05, "loss": 0.4945, "step": 3244 }, { "epoch": 0.06882144599266188, "grad_norm": 0.36125001311302185, "learning_rate": 1.9945081753088833e-05, "loss": 0.5364, "step": 3245 }, { "epoch": 0.0688426544505949, "grad_norm": 0.4009350538253784, "learning_rate": 1.9945046844298422e-05, "loss": 0.6052, "step": 3246 }, { "epoch": 0.06886386290852792, "grad_norm": 0.3259163200855255, "learning_rate": 1.994501192444722e-05, "loss": 0.4737, "step": 3247 }, { "epoch": 0.06888507136646095, "grad_norm": 0.32533034682273865, "learning_rate": 1.994497699353526e-05, "loss": 0.4927, "step": 3248 }, { "epoch": 0.06890627982439397, "grad_norm": 0.3189549446105957, "learning_rate": 1.994494205156259e-05, "loss": 0.5759, "step": 3249 }, { "epoch": 0.06892748828232699, "grad_norm": 0.35605666041374207, "learning_rate": 1.9944907098529236e-05, "loss": 0.5932, "step": 3250 }, { "epoch": 0.06894869674026001, "grad_norm": 0.34307196736335754, "learning_rate": 1.994487213443524e-05, "loss": 0.4583, "step": 3251 }, { "epoch": 0.06896990519819304, "grad_norm": 0.3782796561717987, "learning_rate": 1.994483715928065e-05, "loss": 0.608, "step": 3252 }, { "epoch": 0.06899111365612606, "grad_norm": 0.3618784546852112, "learning_rate": 1.9944802173065495e-05, "loss": 0.5761, "step": 3253 }, { "epoch": 0.06901232211405908, "grad_norm": 0.38205501437187195, "learning_rate": 1.994476717578982e-05, "loss": 0.4872, "step": 3254 }, { "epoch": 0.06903353057199212, "grad_norm": 0.3063679337501526, "learning_rate": 1.994473216745366e-05, "loss": 0.4977, "step": 3255 }, { "epoch": 0.06905473902992514, "grad_norm": 0.34362733364105225, "learning_rate": 1.9944697148057054e-05, "loss": 0.5834, "step": 3256 }, { "epoch": 0.06907594748785816, "grad_norm": 0.30719271302223206, "learning_rate": 1.9944662117600044e-05, "loss": 0.4917, "step": 3257 }, { "epoch": 0.06909715594579117, "grad_norm": 0.4039190113544464, "learning_rate": 1.9944627076082664e-05, "loss": 0.5355, "step": 3258 }, { "epoch": 0.06911836440372421, "grad_norm": 0.5602083206176758, "learning_rate": 1.9944592023504958e-05, "loss": 0.5128, "step": 3259 }, { "epoch": 0.06913957286165723, "grad_norm": 0.3013627231121063, "learning_rate": 1.9944556959866963e-05, "loss": 0.5154, "step": 3260 }, { "epoch": 0.06916078131959025, "grad_norm": 0.3609350025653839, "learning_rate": 1.9944521885168718e-05, "loss": 0.5641, "step": 3261 }, { "epoch": 0.06918198977752328, "grad_norm": 0.3753776550292969, "learning_rate": 1.994448679941026e-05, "loss": 0.5298, "step": 3262 }, { "epoch": 0.0692031982354563, "grad_norm": 0.36395370960235596, "learning_rate": 1.9944451702591635e-05, "loss": 0.6349, "step": 3263 }, { "epoch": 0.06922440669338932, "grad_norm": 0.3512234389781952, "learning_rate": 1.9944416594712876e-05, "loss": 0.5853, "step": 3264 }, { "epoch": 0.06924561515132235, "grad_norm": 0.33255186676979065, "learning_rate": 1.9944381475774022e-05, "loss": 0.6253, "step": 3265 }, { "epoch": 0.06926682360925537, "grad_norm": 0.3446909189224243, "learning_rate": 1.9944346345775113e-05, "loss": 0.6051, "step": 3266 }, { "epoch": 0.0692880320671884, "grad_norm": 0.31043708324432373, "learning_rate": 1.9944311204716193e-05, "loss": 0.6242, "step": 3267 }, { "epoch": 0.06930924052512141, "grad_norm": 0.34882599115371704, "learning_rate": 1.994427605259729e-05, "loss": 0.5151, "step": 3268 }, { "epoch": 0.06933044898305445, "grad_norm": 0.3025883436203003, "learning_rate": 1.9944240889418453e-05, "loss": 0.4683, "step": 3269 }, { "epoch": 0.06935165744098747, "grad_norm": 0.3409661650657654, "learning_rate": 1.9944205715179714e-05, "loss": 0.5816, "step": 3270 }, { "epoch": 0.06937286589892049, "grad_norm": 0.33960577845573425, "learning_rate": 1.994417052988112e-05, "loss": 0.5059, "step": 3271 }, { "epoch": 0.06939407435685352, "grad_norm": 0.36003392934799194, "learning_rate": 1.9944135333522703e-05, "loss": 0.5669, "step": 3272 }, { "epoch": 0.06941528281478654, "grad_norm": 0.33079758286476135, "learning_rate": 1.9944100126104508e-05, "loss": 0.5047, "step": 3273 }, { "epoch": 0.06943649127271956, "grad_norm": 0.35108551383018494, "learning_rate": 1.9944064907626566e-05, "loss": 0.5274, "step": 3274 }, { "epoch": 0.06945769973065258, "grad_norm": 0.3376059830188751, "learning_rate": 1.9944029678088926e-05, "loss": 0.5283, "step": 3275 }, { "epoch": 0.06947890818858561, "grad_norm": 0.3943825960159302, "learning_rate": 1.9943994437491624e-05, "loss": 0.5372, "step": 3276 }, { "epoch": 0.06950011664651863, "grad_norm": 0.30637893080711365, "learning_rate": 1.9943959185834694e-05, "loss": 0.5146, "step": 3277 }, { "epoch": 0.06952132510445165, "grad_norm": 0.3409709632396698, "learning_rate": 1.994392392311818e-05, "loss": 0.5353, "step": 3278 }, { "epoch": 0.06954253356238468, "grad_norm": 0.32773539423942566, "learning_rate": 1.9943888649342123e-05, "loss": 0.562, "step": 3279 }, { "epoch": 0.0695637420203177, "grad_norm": 0.4205581843852997, "learning_rate": 1.9943853364506553e-05, "loss": 0.5887, "step": 3280 }, { "epoch": 0.06958495047825072, "grad_norm": 0.3510764539241791, "learning_rate": 1.994381806861152e-05, "loss": 0.5422, "step": 3281 }, { "epoch": 0.06960615893618376, "grad_norm": 0.3316039741039276, "learning_rate": 1.994378276165706e-05, "loss": 0.4761, "step": 3282 }, { "epoch": 0.06962736739411678, "grad_norm": 0.3842739760875702, "learning_rate": 1.9943747443643212e-05, "loss": 0.5976, "step": 3283 }, { "epoch": 0.0696485758520498, "grad_norm": 0.3067176043987274, "learning_rate": 1.994371211457001e-05, "loss": 0.4433, "step": 3284 }, { "epoch": 0.06966978430998282, "grad_norm": 0.35189542174339294, "learning_rate": 1.9943676774437502e-05, "loss": 0.5384, "step": 3285 }, { "epoch": 0.06969099276791585, "grad_norm": 0.35693132877349854, "learning_rate": 1.994364142324572e-05, "loss": 0.5357, "step": 3286 }, { "epoch": 0.06971220122584887, "grad_norm": 0.3221482038497925, "learning_rate": 1.9943606060994707e-05, "loss": 0.4715, "step": 3287 }, { "epoch": 0.06973340968378189, "grad_norm": 0.3543417453765869, "learning_rate": 1.9943570687684503e-05, "loss": 0.5564, "step": 3288 }, { "epoch": 0.06975461814171492, "grad_norm": 0.31479552388191223, "learning_rate": 1.9943535303315145e-05, "loss": 0.5347, "step": 3289 }, { "epoch": 0.06977582659964794, "grad_norm": 0.3437034487724304, "learning_rate": 1.9943499907886674e-05, "loss": 0.5281, "step": 3290 }, { "epoch": 0.06979703505758096, "grad_norm": 0.3327730894088745, "learning_rate": 1.9943464501399128e-05, "loss": 0.5512, "step": 3291 }, { "epoch": 0.06981824351551398, "grad_norm": 0.43324366211891174, "learning_rate": 1.9943429083852547e-05, "loss": 0.5977, "step": 3292 }, { "epoch": 0.06983945197344701, "grad_norm": 0.31215739250183105, "learning_rate": 1.994339365524697e-05, "loss": 0.5143, "step": 3293 }, { "epoch": 0.06986066043138003, "grad_norm": 0.3423929810523987, "learning_rate": 1.994335821558244e-05, "loss": 0.5474, "step": 3294 }, { "epoch": 0.06988186888931305, "grad_norm": 0.34414929151535034, "learning_rate": 1.994332276485899e-05, "loss": 0.578, "step": 3295 }, { "epoch": 0.06990307734724609, "grad_norm": 0.4736418128013611, "learning_rate": 1.9943287303076663e-05, "loss": 0.5348, "step": 3296 }, { "epoch": 0.0699242858051791, "grad_norm": 0.3287462890148163, "learning_rate": 1.9943251830235503e-05, "loss": 0.5544, "step": 3297 }, { "epoch": 0.06994549426311213, "grad_norm": 0.3155789375305176, "learning_rate": 1.9943216346335542e-05, "loss": 0.5455, "step": 3298 }, { "epoch": 0.06996670272104515, "grad_norm": 0.3353477120399475, "learning_rate": 1.9943180851376818e-05, "loss": 0.4932, "step": 3299 }, { "epoch": 0.06998791117897818, "grad_norm": 0.2922815978527069, "learning_rate": 1.994314534535938e-05, "loss": 0.5206, "step": 3300 }, { "epoch": 0.0700091196369112, "grad_norm": 0.32885777950286865, "learning_rate": 1.994310982828326e-05, "loss": 0.4592, "step": 3301 }, { "epoch": 0.07003032809484422, "grad_norm": 0.419705331325531, "learning_rate": 1.99430743001485e-05, "loss": 0.5774, "step": 3302 }, { "epoch": 0.07005153655277725, "grad_norm": 0.3949402868747711, "learning_rate": 1.9943038760955138e-05, "loss": 0.5499, "step": 3303 }, { "epoch": 0.07007274501071027, "grad_norm": 0.34391820430755615, "learning_rate": 1.9943003210703217e-05, "loss": 0.5697, "step": 3304 }, { "epoch": 0.07009395346864329, "grad_norm": 0.32093533873558044, "learning_rate": 1.9942967649392773e-05, "loss": 0.5683, "step": 3305 }, { "epoch": 0.07011516192657632, "grad_norm": 0.42900004982948303, "learning_rate": 1.9942932077023846e-05, "loss": 0.5876, "step": 3306 }, { "epoch": 0.07013637038450934, "grad_norm": 0.29432791471481323, "learning_rate": 1.9942896493596474e-05, "loss": 0.5371, "step": 3307 }, { "epoch": 0.07015757884244236, "grad_norm": 0.3314080536365509, "learning_rate": 1.9942860899110704e-05, "loss": 0.5416, "step": 3308 }, { "epoch": 0.07017878730037538, "grad_norm": 0.3455040454864502, "learning_rate": 1.994282529356657e-05, "loss": 0.5021, "step": 3309 }, { "epoch": 0.07019999575830842, "grad_norm": 0.3187744617462158, "learning_rate": 1.9942789676964108e-05, "loss": 0.6085, "step": 3310 }, { "epoch": 0.07022120421624144, "grad_norm": 0.36705482006073, "learning_rate": 1.9942754049303364e-05, "loss": 0.5666, "step": 3311 }, { "epoch": 0.07024241267417446, "grad_norm": 0.29031404852867126, "learning_rate": 1.9942718410584375e-05, "loss": 0.4416, "step": 3312 }, { "epoch": 0.07026362113210749, "grad_norm": 0.32808005809783936, "learning_rate": 1.994268276080718e-05, "loss": 0.5989, "step": 3313 }, { "epoch": 0.07028482959004051, "grad_norm": 0.3272189497947693, "learning_rate": 1.9942647099971822e-05, "loss": 0.5197, "step": 3314 }, { "epoch": 0.07030603804797353, "grad_norm": 0.3902880549430847, "learning_rate": 1.9942611428078337e-05, "loss": 0.5667, "step": 3315 }, { "epoch": 0.07032724650590655, "grad_norm": 0.34265896677970886, "learning_rate": 1.9942575745126765e-05, "loss": 0.6502, "step": 3316 }, { "epoch": 0.07034845496383958, "grad_norm": 0.3231754004955292, "learning_rate": 1.994254005111715e-05, "loss": 0.5828, "step": 3317 }, { "epoch": 0.0703696634217726, "grad_norm": 0.31094038486480713, "learning_rate": 1.9942504346049526e-05, "loss": 0.5887, "step": 3318 }, { "epoch": 0.07039087187970562, "grad_norm": 0.33012855052948, "learning_rate": 1.9942468629923936e-05, "loss": 0.5707, "step": 3319 }, { "epoch": 0.07041208033763866, "grad_norm": 0.3681350648403168, "learning_rate": 1.9942432902740415e-05, "loss": 0.5841, "step": 3320 }, { "epoch": 0.07043328879557167, "grad_norm": 0.3324680030345917, "learning_rate": 1.994239716449901e-05, "loss": 0.5261, "step": 3321 }, { "epoch": 0.0704544972535047, "grad_norm": 0.5436466932296753, "learning_rate": 1.994236141519976e-05, "loss": 0.5139, "step": 3322 }, { "epoch": 0.07047570571143773, "grad_norm": 0.3286544382572174, "learning_rate": 1.9942325654842695e-05, "loss": 0.5732, "step": 3323 }, { "epoch": 0.07049691416937075, "grad_norm": 0.32236364483833313, "learning_rate": 1.9942289883427866e-05, "loss": 0.4956, "step": 3324 }, { "epoch": 0.07051812262730377, "grad_norm": 0.32828107476234436, "learning_rate": 1.9942254100955307e-05, "loss": 0.5615, "step": 3325 }, { "epoch": 0.07053933108523679, "grad_norm": 0.3301233649253845, "learning_rate": 1.994221830742506e-05, "loss": 0.496, "step": 3326 }, { "epoch": 0.07056053954316982, "grad_norm": 0.33582961559295654, "learning_rate": 1.9942182502837166e-05, "loss": 0.5613, "step": 3327 }, { "epoch": 0.07058174800110284, "grad_norm": 0.45915862917900085, "learning_rate": 1.9942146687191663e-05, "loss": 0.5572, "step": 3328 }, { "epoch": 0.07060295645903586, "grad_norm": 0.3735507130622864, "learning_rate": 1.9942110860488588e-05, "loss": 0.5485, "step": 3329 }, { "epoch": 0.07062416491696889, "grad_norm": 0.3639492690563202, "learning_rate": 1.9942075022727986e-05, "loss": 0.5721, "step": 3330 }, { "epoch": 0.07064537337490191, "grad_norm": 0.412221223115921, "learning_rate": 1.9942039173909894e-05, "loss": 0.5369, "step": 3331 }, { "epoch": 0.07066658183283493, "grad_norm": 0.3451150953769684, "learning_rate": 1.994200331403435e-05, "loss": 0.5367, "step": 3332 }, { "epoch": 0.07068779029076795, "grad_norm": 0.38033726811408997, "learning_rate": 1.9941967443101396e-05, "loss": 0.6072, "step": 3333 }, { "epoch": 0.07070899874870099, "grad_norm": 0.3236117660999298, "learning_rate": 1.9941931561111075e-05, "loss": 0.4405, "step": 3334 }, { "epoch": 0.070730207206634, "grad_norm": 0.3042357563972473, "learning_rate": 1.9941895668063426e-05, "loss": 0.4802, "step": 3335 }, { "epoch": 0.07075141566456702, "grad_norm": 0.33555325865745544, "learning_rate": 1.994185976395848e-05, "loss": 0.5889, "step": 3336 }, { "epoch": 0.07077262412250006, "grad_norm": 0.3697091042995453, "learning_rate": 1.9941823848796293e-05, "loss": 0.6374, "step": 3337 }, { "epoch": 0.07079383258043308, "grad_norm": 0.3198397159576416, "learning_rate": 1.994178792257689e-05, "loss": 0.5645, "step": 3338 }, { "epoch": 0.0708150410383661, "grad_norm": 0.45133665204048157, "learning_rate": 1.9941751985300317e-05, "loss": 0.4974, "step": 3339 }, { "epoch": 0.07083624949629913, "grad_norm": 0.39921367168426514, "learning_rate": 1.9941716036966615e-05, "loss": 0.584, "step": 3340 }, { "epoch": 0.07085745795423215, "grad_norm": 0.33208492398262024, "learning_rate": 1.9941680077575822e-05, "loss": 0.4874, "step": 3341 }, { "epoch": 0.07087866641216517, "grad_norm": 0.30643364787101746, "learning_rate": 1.994164410712798e-05, "loss": 0.5031, "step": 3342 }, { "epoch": 0.07089987487009819, "grad_norm": 0.32057684659957886, "learning_rate": 1.9941608125623125e-05, "loss": 0.5999, "step": 3343 }, { "epoch": 0.07092108332803122, "grad_norm": 0.3481036126613617, "learning_rate": 1.9941572133061306e-05, "loss": 0.5594, "step": 3344 }, { "epoch": 0.07094229178596424, "grad_norm": 0.3142896890640259, "learning_rate": 1.9941536129442552e-05, "loss": 0.4618, "step": 3345 }, { "epoch": 0.07096350024389726, "grad_norm": 0.3118472397327423, "learning_rate": 1.994150011476691e-05, "loss": 0.611, "step": 3346 }, { "epoch": 0.0709847087018303, "grad_norm": 0.38619324564933777, "learning_rate": 1.9941464089034415e-05, "loss": 0.5005, "step": 3347 }, { "epoch": 0.07100591715976332, "grad_norm": 0.3247150480747223, "learning_rate": 1.9941428052245113e-05, "loss": 0.5173, "step": 3348 }, { "epoch": 0.07102712561769634, "grad_norm": 0.3649487793445587, "learning_rate": 1.994139200439904e-05, "loss": 0.5438, "step": 3349 }, { "epoch": 0.07104833407562935, "grad_norm": 0.32325080037117004, "learning_rate": 1.9941355945496233e-05, "loss": 0.4494, "step": 3350 }, { "epoch": 0.07106954253356239, "grad_norm": 0.3235478401184082, "learning_rate": 1.994131987553674e-05, "loss": 0.5733, "step": 3351 }, { "epoch": 0.07109075099149541, "grad_norm": 0.28185439109802246, "learning_rate": 1.99412837945206e-05, "loss": 0.4658, "step": 3352 }, { "epoch": 0.07111195944942843, "grad_norm": 0.3193177282810211, "learning_rate": 1.994124770244785e-05, "loss": 0.5684, "step": 3353 }, { "epoch": 0.07113316790736146, "grad_norm": 0.3321031928062439, "learning_rate": 1.9941211599318526e-05, "loss": 0.5026, "step": 3354 }, { "epoch": 0.07115437636529448, "grad_norm": 0.4023045301437378, "learning_rate": 1.9941175485132676e-05, "loss": 0.5247, "step": 3355 }, { "epoch": 0.0711755848232275, "grad_norm": 0.34295937418937683, "learning_rate": 1.9941139359890335e-05, "loss": 0.5512, "step": 3356 }, { "epoch": 0.07119679328116053, "grad_norm": 0.3298870921134949, "learning_rate": 1.994110322359155e-05, "loss": 0.5542, "step": 3357 }, { "epoch": 0.07121800173909355, "grad_norm": 0.31314781308174133, "learning_rate": 1.994106707623635e-05, "loss": 0.5514, "step": 3358 }, { "epoch": 0.07123921019702657, "grad_norm": 0.36052730679512024, "learning_rate": 1.9941030917824786e-05, "loss": 0.5753, "step": 3359 }, { "epoch": 0.07126041865495959, "grad_norm": 0.40783998370170593, "learning_rate": 1.9940994748356893e-05, "loss": 0.6627, "step": 3360 }, { "epoch": 0.07128162711289263, "grad_norm": 0.4232538044452667, "learning_rate": 1.994095856783271e-05, "loss": 0.5459, "step": 3361 }, { "epoch": 0.07130283557082565, "grad_norm": 0.3423579931259155, "learning_rate": 1.9940922376252283e-05, "loss": 0.4711, "step": 3362 }, { "epoch": 0.07132404402875867, "grad_norm": 0.3044768273830414, "learning_rate": 1.9940886173615644e-05, "loss": 0.6079, "step": 3363 }, { "epoch": 0.0713452524866917, "grad_norm": 0.3275354206562042, "learning_rate": 1.994084995992284e-05, "loss": 0.5336, "step": 3364 }, { "epoch": 0.07136646094462472, "grad_norm": 0.30174320936203003, "learning_rate": 1.9940813735173908e-05, "loss": 0.5826, "step": 3365 }, { "epoch": 0.07138766940255774, "grad_norm": 0.3257342278957367, "learning_rate": 1.9940777499368892e-05, "loss": 0.6145, "step": 3366 }, { "epoch": 0.07140887786049076, "grad_norm": 0.30325931310653687, "learning_rate": 1.9940741252507826e-05, "loss": 0.6192, "step": 3367 }, { "epoch": 0.07143008631842379, "grad_norm": 0.35733267664909363, "learning_rate": 1.9940704994590757e-05, "loss": 0.5363, "step": 3368 }, { "epoch": 0.07145129477635681, "grad_norm": 0.31152260303497314, "learning_rate": 1.9940668725617723e-05, "loss": 0.5826, "step": 3369 }, { "epoch": 0.07147250323428983, "grad_norm": 0.3342256247997284, "learning_rate": 1.994063244558876e-05, "loss": 0.5602, "step": 3370 }, { "epoch": 0.07149371169222286, "grad_norm": 0.32192060351371765, "learning_rate": 1.994059615450391e-05, "loss": 0.4719, "step": 3371 }, { "epoch": 0.07151492015015588, "grad_norm": 0.31220516562461853, "learning_rate": 1.9940559852363225e-05, "loss": 0.488, "step": 3372 }, { "epoch": 0.0715361286080889, "grad_norm": 0.3584897816181183, "learning_rate": 1.994052353916673e-05, "loss": 0.5657, "step": 3373 }, { "epoch": 0.07155733706602192, "grad_norm": 0.3232174515724182, "learning_rate": 1.994048721491447e-05, "loss": 0.5086, "step": 3374 }, { "epoch": 0.07157854552395496, "grad_norm": 0.3349131643772125, "learning_rate": 1.9940450879606487e-05, "loss": 0.5186, "step": 3375 }, { "epoch": 0.07159975398188798, "grad_norm": 0.31658026576042175, "learning_rate": 1.994041453324282e-05, "loss": 0.5356, "step": 3376 }, { "epoch": 0.071620962439821, "grad_norm": 0.3503831923007965, "learning_rate": 1.994037817582351e-05, "loss": 0.4799, "step": 3377 }, { "epoch": 0.07164217089775403, "grad_norm": 0.2880134582519531, "learning_rate": 1.99403418073486e-05, "loss": 0.5053, "step": 3378 }, { "epoch": 0.07166337935568705, "grad_norm": 0.3835964500904083, "learning_rate": 1.9940305427818127e-05, "loss": 0.5851, "step": 3379 }, { "epoch": 0.07168458781362007, "grad_norm": 0.35036513209342957, "learning_rate": 1.9940269037232134e-05, "loss": 0.5619, "step": 3380 }, { "epoch": 0.0717057962715531, "grad_norm": 0.32658445835113525, "learning_rate": 1.994023263559066e-05, "loss": 0.5425, "step": 3381 }, { "epoch": 0.07172700472948612, "grad_norm": 0.43552857637405396, "learning_rate": 1.9940196222893744e-05, "loss": 0.5206, "step": 3382 }, { "epoch": 0.07174821318741914, "grad_norm": 0.3437120318412781, "learning_rate": 1.994015979914143e-05, "loss": 0.5127, "step": 3383 }, { "epoch": 0.07176942164535216, "grad_norm": 0.41543740034103394, "learning_rate": 1.9940123364333755e-05, "loss": 0.5805, "step": 3384 }, { "epoch": 0.0717906301032852, "grad_norm": 0.31433966755867004, "learning_rate": 1.994008691847076e-05, "loss": 0.5947, "step": 3385 }, { "epoch": 0.07181183856121821, "grad_norm": 0.33283543586730957, "learning_rate": 1.994005046155249e-05, "loss": 0.5803, "step": 3386 }, { "epoch": 0.07183304701915123, "grad_norm": 0.33417779207229614, "learning_rate": 1.994001399357898e-05, "loss": 0.4711, "step": 3387 }, { "epoch": 0.07185425547708427, "grad_norm": 0.3508148193359375, "learning_rate": 1.993997751455027e-05, "loss": 0.5306, "step": 3388 }, { "epoch": 0.07187546393501729, "grad_norm": 0.32219186425209045, "learning_rate": 1.993994102446641e-05, "loss": 0.5402, "step": 3389 }, { "epoch": 0.0718966723929503, "grad_norm": 0.32049116492271423, "learning_rate": 1.993990452332743e-05, "loss": 0.5082, "step": 3390 }, { "epoch": 0.07191788085088333, "grad_norm": 0.37797221541404724, "learning_rate": 1.9939868011133375e-05, "loss": 0.5507, "step": 3391 }, { "epoch": 0.07193908930881636, "grad_norm": 0.3824981153011322, "learning_rate": 1.9939831487884286e-05, "loss": 0.5836, "step": 3392 }, { "epoch": 0.07196029776674938, "grad_norm": 0.33435481786727905, "learning_rate": 1.99397949535802e-05, "loss": 0.512, "step": 3393 }, { "epoch": 0.0719815062246824, "grad_norm": 0.3181103765964508, "learning_rate": 1.9939758408221163e-05, "loss": 0.5557, "step": 3394 }, { "epoch": 0.07200271468261543, "grad_norm": 0.3849862217903137, "learning_rate": 1.9939721851807214e-05, "loss": 0.5168, "step": 3395 }, { "epoch": 0.07202392314054845, "grad_norm": 0.294001966714859, "learning_rate": 1.993968528433839e-05, "loss": 0.5283, "step": 3396 }, { "epoch": 0.07204513159848147, "grad_norm": 0.36652636528015137, "learning_rate": 1.9939648705814736e-05, "loss": 0.5852, "step": 3397 }, { "epoch": 0.0720663400564145, "grad_norm": 0.3431258797645569, "learning_rate": 1.9939612116236293e-05, "loss": 0.6252, "step": 3398 }, { "epoch": 0.07208754851434752, "grad_norm": 0.3251439034938812, "learning_rate": 1.9939575515603097e-05, "loss": 0.5178, "step": 3399 }, { "epoch": 0.07210875697228054, "grad_norm": 0.40968477725982666, "learning_rate": 1.993953890391519e-05, "loss": 0.5271, "step": 3400 }, { "epoch": 0.07212996543021356, "grad_norm": 0.3823264539241791, "learning_rate": 1.9939502281172617e-05, "loss": 0.5553, "step": 3401 }, { "epoch": 0.0721511738881466, "grad_norm": 0.5000187158584595, "learning_rate": 1.9939465647375415e-05, "loss": 0.5662, "step": 3402 }, { "epoch": 0.07217238234607962, "grad_norm": 0.3651352524757385, "learning_rate": 1.9939429002523623e-05, "loss": 0.6144, "step": 3403 }, { "epoch": 0.07219359080401264, "grad_norm": 0.34384027123451233, "learning_rate": 1.993939234661729e-05, "loss": 0.6186, "step": 3404 }, { "epoch": 0.07221479926194567, "grad_norm": 0.3235175907611847, "learning_rate": 1.9939355679656446e-05, "loss": 0.634, "step": 3405 }, { "epoch": 0.07223600771987869, "grad_norm": 0.47352397441864014, "learning_rate": 1.993931900164114e-05, "loss": 0.5404, "step": 3406 }, { "epoch": 0.07225721617781171, "grad_norm": 0.31734248995780945, "learning_rate": 1.993928231257141e-05, "loss": 0.4765, "step": 3407 }, { "epoch": 0.07227842463574473, "grad_norm": 0.33287274837493896, "learning_rate": 1.9939245612447295e-05, "loss": 0.5583, "step": 3408 }, { "epoch": 0.07229963309367776, "grad_norm": 0.3421096205711365, "learning_rate": 1.9939208901268835e-05, "loss": 0.5568, "step": 3409 }, { "epoch": 0.07232084155161078, "grad_norm": 0.3380131721496582, "learning_rate": 1.9939172179036077e-05, "loss": 0.5796, "step": 3410 }, { "epoch": 0.0723420500095438, "grad_norm": 0.37780553102493286, "learning_rate": 1.9939135445749058e-05, "loss": 0.5743, "step": 3411 }, { "epoch": 0.07236325846747683, "grad_norm": 0.36058440804481506, "learning_rate": 1.993909870140782e-05, "loss": 0.6124, "step": 3412 }, { "epoch": 0.07238446692540985, "grad_norm": 0.33576446771621704, "learning_rate": 1.9939061946012396e-05, "loss": 0.5446, "step": 3413 }, { "epoch": 0.07240567538334287, "grad_norm": 0.35574910044670105, "learning_rate": 1.993902517956284e-05, "loss": 0.559, "step": 3414 }, { "epoch": 0.07242688384127591, "grad_norm": 0.3370197117328644, "learning_rate": 1.9938988402059185e-05, "loss": 0.5796, "step": 3415 }, { "epoch": 0.07244809229920893, "grad_norm": 0.3082643151283264, "learning_rate": 1.9938951613501474e-05, "loss": 0.5128, "step": 3416 }, { "epoch": 0.07246930075714195, "grad_norm": 0.3308422267436981, "learning_rate": 1.9938914813889745e-05, "loss": 0.5231, "step": 3417 }, { "epoch": 0.07249050921507497, "grad_norm": 0.4116445481777191, "learning_rate": 1.9938878003224044e-05, "loss": 0.6087, "step": 3418 }, { "epoch": 0.072511717673008, "grad_norm": 0.31348875164985657, "learning_rate": 1.9938841181504407e-05, "loss": 0.5206, "step": 3419 }, { "epoch": 0.07253292613094102, "grad_norm": 0.37248653173446655, "learning_rate": 1.993880434873088e-05, "loss": 0.6012, "step": 3420 }, { "epoch": 0.07255413458887404, "grad_norm": 0.30099689960479736, "learning_rate": 1.9938767504903498e-05, "loss": 0.4607, "step": 3421 }, { "epoch": 0.07257534304680707, "grad_norm": 0.3574754297733307, "learning_rate": 1.9938730650022308e-05, "loss": 0.4951, "step": 3422 }, { "epoch": 0.07259655150474009, "grad_norm": 0.3373386263847351, "learning_rate": 1.9938693784087347e-05, "loss": 0.5348, "step": 3423 }, { "epoch": 0.07261775996267311, "grad_norm": 0.3668428659439087, "learning_rate": 1.9938656907098658e-05, "loss": 0.5407, "step": 3424 }, { "epoch": 0.07263896842060613, "grad_norm": 0.30348148941993713, "learning_rate": 1.9938620019056277e-05, "loss": 0.5294, "step": 3425 }, { "epoch": 0.07266017687853916, "grad_norm": 0.36287540197372437, "learning_rate": 1.9938583119960254e-05, "loss": 0.5015, "step": 3426 }, { "epoch": 0.07268138533647218, "grad_norm": 0.3299969732761383, "learning_rate": 1.993854620981062e-05, "loss": 0.5426, "step": 3427 }, { "epoch": 0.0727025937944052, "grad_norm": 0.325562447309494, "learning_rate": 1.993850928860743e-05, "loss": 0.5266, "step": 3428 }, { "epoch": 0.07272380225233824, "grad_norm": 0.33827635645866394, "learning_rate": 1.9938472356350707e-05, "loss": 0.5314, "step": 3429 }, { "epoch": 0.07274501071027126, "grad_norm": 0.3320232331752777, "learning_rate": 1.9938435413040507e-05, "loss": 0.5972, "step": 3430 }, { "epoch": 0.07276621916820428, "grad_norm": 0.4383350908756256, "learning_rate": 1.9938398458676862e-05, "loss": 0.4916, "step": 3431 }, { "epoch": 0.07278742762613731, "grad_norm": 0.3400631248950958, "learning_rate": 1.993836149325982e-05, "loss": 0.5916, "step": 3432 }, { "epoch": 0.07280863608407033, "grad_norm": 0.34543684124946594, "learning_rate": 1.993832451678942e-05, "loss": 0.5536, "step": 3433 }, { "epoch": 0.07282984454200335, "grad_norm": 0.31696245074272156, "learning_rate": 1.9938287529265694e-05, "loss": 0.4909, "step": 3434 }, { "epoch": 0.07285105299993637, "grad_norm": 0.3484676778316498, "learning_rate": 1.9938250530688698e-05, "loss": 0.5922, "step": 3435 }, { "epoch": 0.0728722614578694, "grad_norm": 0.40839457511901855, "learning_rate": 1.9938213521058464e-05, "loss": 0.5349, "step": 3436 }, { "epoch": 0.07289346991580242, "grad_norm": 0.3406071364879608, "learning_rate": 1.9938176500375036e-05, "loss": 0.5123, "step": 3437 }, { "epoch": 0.07291467837373544, "grad_norm": 0.3305734694004059, "learning_rate": 1.9938139468638452e-05, "loss": 0.5529, "step": 3438 }, { "epoch": 0.07293588683166848, "grad_norm": 0.32083362340927124, "learning_rate": 1.993810242584876e-05, "loss": 0.4886, "step": 3439 }, { "epoch": 0.0729570952896015, "grad_norm": 0.3346279263496399, "learning_rate": 1.993806537200599e-05, "loss": 0.5396, "step": 3440 }, { "epoch": 0.07297830374753451, "grad_norm": 0.3327640891075134, "learning_rate": 1.9938028307110194e-05, "loss": 0.58, "step": 3441 }, { "epoch": 0.07299951220546753, "grad_norm": 0.358345627784729, "learning_rate": 1.993799123116141e-05, "loss": 0.5495, "step": 3442 }, { "epoch": 0.07302072066340057, "grad_norm": 0.33157244324684143, "learning_rate": 1.9937954144159677e-05, "loss": 0.5089, "step": 3443 }, { "epoch": 0.07304192912133359, "grad_norm": 0.35638079047203064, "learning_rate": 1.9937917046105038e-05, "loss": 0.5569, "step": 3444 }, { "epoch": 0.07306313757926661, "grad_norm": 0.3077659010887146, "learning_rate": 1.9937879936997537e-05, "loss": 0.5284, "step": 3445 }, { "epoch": 0.07308434603719964, "grad_norm": 0.3415648937225342, "learning_rate": 1.9937842816837208e-05, "loss": 0.5157, "step": 3446 }, { "epoch": 0.07310555449513266, "grad_norm": 0.30292069911956787, "learning_rate": 1.9937805685624098e-05, "loss": 0.5345, "step": 3447 }, { "epoch": 0.07312676295306568, "grad_norm": 0.35527315735816956, "learning_rate": 1.9937768543358246e-05, "loss": 0.551, "step": 3448 }, { "epoch": 0.0731479714109987, "grad_norm": 0.3369770646095276, "learning_rate": 1.9937731390039697e-05, "loss": 0.5598, "step": 3449 }, { "epoch": 0.07316917986893173, "grad_norm": 0.32555681467056274, "learning_rate": 1.9937694225668487e-05, "loss": 0.6017, "step": 3450 }, { "epoch": 0.07319038832686475, "grad_norm": 0.3230086863040924, "learning_rate": 1.9937657050244662e-05, "loss": 0.5177, "step": 3451 }, { "epoch": 0.07321159678479777, "grad_norm": 0.34240323305130005, "learning_rate": 1.9937619863768257e-05, "loss": 0.6525, "step": 3452 }, { "epoch": 0.0732328052427308, "grad_norm": 0.45544156432151794, "learning_rate": 1.9937582666239322e-05, "loss": 0.5303, "step": 3453 }, { "epoch": 0.07325401370066383, "grad_norm": 0.3127272427082062, "learning_rate": 1.993754545765789e-05, "loss": 0.489, "step": 3454 }, { "epoch": 0.07327522215859684, "grad_norm": 0.33341121673583984, "learning_rate": 1.993750823802401e-05, "loss": 0.5738, "step": 3455 }, { "epoch": 0.07329643061652988, "grad_norm": 0.331725150346756, "learning_rate": 1.9937471007337715e-05, "loss": 0.5753, "step": 3456 }, { "epoch": 0.0733176390744629, "grad_norm": 0.30346837639808655, "learning_rate": 1.9937433765599058e-05, "loss": 0.5022, "step": 3457 }, { "epoch": 0.07333884753239592, "grad_norm": 0.3103903532028198, "learning_rate": 1.993739651280807e-05, "loss": 0.5986, "step": 3458 }, { "epoch": 0.07336005599032894, "grad_norm": 0.3276510536670685, "learning_rate": 1.9937359248964794e-05, "loss": 0.4534, "step": 3459 }, { "epoch": 0.07338126444826197, "grad_norm": 0.3851799964904785, "learning_rate": 1.9937321974069275e-05, "loss": 0.606, "step": 3460 }, { "epoch": 0.07340247290619499, "grad_norm": 0.3152892291545868, "learning_rate": 1.9937284688121553e-05, "loss": 0.5679, "step": 3461 }, { "epoch": 0.07342368136412801, "grad_norm": 0.32321926951408386, "learning_rate": 1.993724739112167e-05, "loss": 0.5135, "step": 3462 }, { "epoch": 0.07344488982206104, "grad_norm": 0.34739580750465393, "learning_rate": 1.9937210083069663e-05, "loss": 0.4777, "step": 3463 }, { "epoch": 0.07346609827999406, "grad_norm": 0.39466291666030884, "learning_rate": 1.993717276396558e-05, "loss": 0.484, "step": 3464 }, { "epoch": 0.07348730673792708, "grad_norm": 0.41094696521759033, "learning_rate": 1.9937135433809464e-05, "loss": 0.537, "step": 3465 }, { "epoch": 0.0735085151958601, "grad_norm": 0.3229794502258301, "learning_rate": 1.9937098092601345e-05, "loss": 0.5436, "step": 3466 }, { "epoch": 0.07352972365379314, "grad_norm": 0.30314695835113525, "learning_rate": 1.9937060740341273e-05, "loss": 0.5406, "step": 3467 }, { "epoch": 0.07355093211172616, "grad_norm": 0.32073456048965454, "learning_rate": 1.993702337702929e-05, "loss": 0.4853, "step": 3468 }, { "epoch": 0.07357214056965918, "grad_norm": 0.33745038509368896, "learning_rate": 1.993698600266544e-05, "loss": 0.5429, "step": 3469 }, { "epoch": 0.07359334902759221, "grad_norm": 0.30796289443969727, "learning_rate": 1.9936948617249755e-05, "loss": 0.5869, "step": 3470 }, { "epoch": 0.07361455748552523, "grad_norm": 0.3221871852874756, "learning_rate": 1.9936911220782284e-05, "loss": 0.4996, "step": 3471 }, { "epoch": 0.07363576594345825, "grad_norm": 0.378913551568985, "learning_rate": 1.9936873813263067e-05, "loss": 0.4553, "step": 3472 }, { "epoch": 0.07365697440139128, "grad_norm": 0.31693825125694275, "learning_rate": 1.9936836394692146e-05, "loss": 0.6158, "step": 3473 }, { "epoch": 0.0736781828593243, "grad_norm": 0.3014770746231079, "learning_rate": 1.993679896506956e-05, "loss": 0.5017, "step": 3474 }, { "epoch": 0.07369939131725732, "grad_norm": 0.3628746271133423, "learning_rate": 1.9936761524395352e-05, "loss": 0.574, "step": 3475 }, { "epoch": 0.07372059977519034, "grad_norm": 0.3259855806827545, "learning_rate": 1.9936724072669563e-05, "loss": 0.5095, "step": 3476 }, { "epoch": 0.07374180823312337, "grad_norm": 0.33171507716178894, "learning_rate": 1.993668660989224e-05, "loss": 0.5042, "step": 3477 }, { "epoch": 0.0737630166910564, "grad_norm": 0.3270484209060669, "learning_rate": 1.993664913606342e-05, "loss": 0.5058, "step": 3478 }, { "epoch": 0.07378422514898941, "grad_norm": 0.360908567905426, "learning_rate": 1.9936611651183143e-05, "loss": 0.574, "step": 3479 }, { "epoch": 0.07380543360692245, "grad_norm": 0.32487353682518005, "learning_rate": 1.9936574155251453e-05, "loss": 0.5676, "step": 3480 }, { "epoch": 0.07382664206485547, "grad_norm": 0.35462939739227295, "learning_rate": 1.9936536648268392e-05, "loss": 0.5304, "step": 3481 }, { "epoch": 0.07384785052278849, "grad_norm": 0.31109127402305603, "learning_rate": 1.9936499130234e-05, "loss": 0.5081, "step": 3482 }, { "epoch": 0.0738690589807215, "grad_norm": 0.3517652750015259, "learning_rate": 1.9936461601148323e-05, "loss": 0.5738, "step": 3483 }, { "epoch": 0.07389026743865454, "grad_norm": 0.35076335072517395, "learning_rate": 1.99364240610114e-05, "loss": 0.5648, "step": 3484 }, { "epoch": 0.07391147589658756, "grad_norm": 0.3601747751235962, "learning_rate": 1.993638650982327e-05, "loss": 0.5761, "step": 3485 }, { "epoch": 0.07393268435452058, "grad_norm": 0.36828354001045227, "learning_rate": 1.9936348947583977e-05, "loss": 0.4719, "step": 3486 }, { "epoch": 0.07395389281245361, "grad_norm": 0.29879218339920044, "learning_rate": 1.993631137429356e-05, "loss": 0.5561, "step": 3487 }, { "epoch": 0.07397510127038663, "grad_norm": 0.3326307535171509, "learning_rate": 1.9936273789952068e-05, "loss": 0.533, "step": 3488 }, { "epoch": 0.07399630972831965, "grad_norm": 0.3436114490032196, "learning_rate": 1.993623619455954e-05, "loss": 0.5991, "step": 3489 }, { "epoch": 0.07401751818625268, "grad_norm": 0.32909873127937317, "learning_rate": 1.9936198588116017e-05, "loss": 0.5902, "step": 3490 }, { "epoch": 0.0740387266441857, "grad_norm": 0.31125137209892273, "learning_rate": 1.9936160970621537e-05, "loss": 0.5374, "step": 3491 }, { "epoch": 0.07405993510211872, "grad_norm": 0.3638637661933899, "learning_rate": 1.9936123342076146e-05, "loss": 0.5151, "step": 3492 }, { "epoch": 0.07408114356005174, "grad_norm": 0.30903011560440063, "learning_rate": 1.9936085702479886e-05, "loss": 0.4421, "step": 3493 }, { "epoch": 0.07410235201798478, "grad_norm": 0.32365676760673523, "learning_rate": 1.9936048051832797e-05, "loss": 0.5615, "step": 3494 }, { "epoch": 0.0741235604759178, "grad_norm": 0.3432924747467041, "learning_rate": 1.993601039013492e-05, "loss": 0.6186, "step": 3495 }, { "epoch": 0.07414476893385082, "grad_norm": 0.31339767575263977, "learning_rate": 1.9935972717386298e-05, "loss": 0.5395, "step": 3496 }, { "epoch": 0.07416597739178385, "grad_norm": 0.3676799535751343, "learning_rate": 1.9935935033586976e-05, "loss": 0.4769, "step": 3497 }, { "epoch": 0.07418718584971687, "grad_norm": 0.3882718086242676, "learning_rate": 1.9935897338736995e-05, "loss": 0.5612, "step": 3498 }, { "epoch": 0.07420839430764989, "grad_norm": 0.31558194756507874, "learning_rate": 1.9935859632836392e-05, "loss": 0.5239, "step": 3499 }, { "epoch": 0.07422960276558291, "grad_norm": 0.39555370807647705, "learning_rate": 1.993582191588521e-05, "loss": 0.558, "step": 3500 }, { "epoch": 0.07425081122351594, "grad_norm": 0.3300986588001251, "learning_rate": 1.99357841878835e-05, "loss": 0.4851, "step": 3501 }, { "epoch": 0.07427201968144896, "grad_norm": 0.49155393242836, "learning_rate": 1.9935746448831295e-05, "loss": 0.5703, "step": 3502 }, { "epoch": 0.07429322813938198, "grad_norm": 0.3820534646511078, "learning_rate": 1.9935708698728637e-05, "loss": 0.57, "step": 3503 }, { "epoch": 0.07431443659731501, "grad_norm": 0.368425190448761, "learning_rate": 1.993567093757557e-05, "loss": 0.4998, "step": 3504 }, { "epoch": 0.07433564505524803, "grad_norm": 0.3761482238769531, "learning_rate": 1.9935633165372137e-05, "loss": 0.517, "step": 3505 }, { "epoch": 0.07435685351318105, "grad_norm": 0.33116644620895386, "learning_rate": 1.9935595382118378e-05, "loss": 0.5444, "step": 3506 }, { "epoch": 0.07437806197111407, "grad_norm": 0.3169419467449188, "learning_rate": 1.9935557587814338e-05, "loss": 0.5241, "step": 3507 }, { "epoch": 0.0743992704290471, "grad_norm": 0.30155330896377563, "learning_rate": 1.993551978246006e-05, "loss": 0.49, "step": 3508 }, { "epoch": 0.07442047888698013, "grad_norm": 0.34964603185653687, "learning_rate": 1.9935481966055575e-05, "loss": 0.5891, "step": 3509 }, { "epoch": 0.07444168734491315, "grad_norm": 0.33202993869781494, "learning_rate": 1.993544413860094e-05, "loss": 0.6074, "step": 3510 }, { "epoch": 0.07446289580284618, "grad_norm": 0.3326171338558197, "learning_rate": 1.9935406300096186e-05, "loss": 0.5518, "step": 3511 }, { "epoch": 0.0744841042607792, "grad_norm": 0.35913899540901184, "learning_rate": 1.9935368450541363e-05, "loss": 0.5067, "step": 3512 }, { "epoch": 0.07450531271871222, "grad_norm": 0.36324888467788696, "learning_rate": 1.9935330589936507e-05, "loss": 0.4763, "step": 3513 }, { "epoch": 0.07452652117664525, "grad_norm": 0.35501423478126526, "learning_rate": 1.9935292718281666e-05, "loss": 0.5013, "step": 3514 }, { "epoch": 0.07454772963457827, "grad_norm": 0.5712665319442749, "learning_rate": 1.9935254835576874e-05, "loss": 0.5111, "step": 3515 }, { "epoch": 0.07456893809251129, "grad_norm": 0.3246420919895172, "learning_rate": 1.993521694182218e-05, "loss": 0.5706, "step": 3516 }, { "epoch": 0.07459014655044431, "grad_norm": 0.31571856141090393, "learning_rate": 1.9935179037017628e-05, "loss": 0.4958, "step": 3517 }, { "epoch": 0.07461135500837734, "grad_norm": 0.3132537603378296, "learning_rate": 1.993514112116325e-05, "loss": 0.5592, "step": 3518 }, { "epoch": 0.07463256346631036, "grad_norm": 0.3205328583717346, "learning_rate": 1.9935103194259095e-05, "loss": 0.6117, "step": 3519 }, { "epoch": 0.07465377192424338, "grad_norm": 0.36165785789489746, "learning_rate": 1.993506525630521e-05, "loss": 0.5621, "step": 3520 }, { "epoch": 0.07467498038217642, "grad_norm": 0.29878881573677063, "learning_rate": 1.9935027307301627e-05, "loss": 0.4105, "step": 3521 }, { "epoch": 0.07469618884010944, "grad_norm": 0.3649044632911682, "learning_rate": 1.9934989347248392e-05, "loss": 0.5127, "step": 3522 }, { "epoch": 0.07471739729804246, "grad_norm": 0.2999210059642792, "learning_rate": 1.9934951376145554e-05, "loss": 0.529, "step": 3523 }, { "epoch": 0.07473860575597548, "grad_norm": 0.33666643500328064, "learning_rate": 1.9934913393993144e-05, "loss": 0.4716, "step": 3524 }, { "epoch": 0.07475981421390851, "grad_norm": 0.32756879925727844, "learning_rate": 1.9934875400791213e-05, "loss": 0.4955, "step": 3525 }, { "epoch": 0.07478102267184153, "grad_norm": 0.3254339098930359, "learning_rate": 1.9934837396539794e-05, "loss": 0.5143, "step": 3526 }, { "epoch": 0.07480223112977455, "grad_norm": 0.3328246474266052, "learning_rate": 1.993479938123894e-05, "loss": 0.5757, "step": 3527 }, { "epoch": 0.07482343958770758, "grad_norm": 0.3185037076473236, "learning_rate": 1.993476135488869e-05, "loss": 0.5002, "step": 3528 }, { "epoch": 0.0748446480456406, "grad_norm": 0.3210792541503906, "learning_rate": 1.993472331748908e-05, "loss": 0.4705, "step": 3529 }, { "epoch": 0.07486585650357362, "grad_norm": 0.32978731393814087, "learning_rate": 1.993468526904016e-05, "loss": 0.5908, "step": 3530 }, { "epoch": 0.07488706496150666, "grad_norm": 0.31810101866722107, "learning_rate": 1.9934647209541967e-05, "loss": 0.5615, "step": 3531 }, { "epoch": 0.07490827341943967, "grad_norm": 0.3078446090221405, "learning_rate": 1.993460913899455e-05, "loss": 0.5228, "step": 3532 }, { "epoch": 0.0749294818773727, "grad_norm": 0.31439724564552307, "learning_rate": 1.9934571057397945e-05, "loss": 0.4869, "step": 3533 }, { "epoch": 0.07495069033530571, "grad_norm": 0.35435524582862854, "learning_rate": 1.9934532964752195e-05, "loss": 0.5111, "step": 3534 }, { "epoch": 0.07497189879323875, "grad_norm": 0.31408390402793884, "learning_rate": 1.9934494861057347e-05, "loss": 0.5102, "step": 3535 }, { "epoch": 0.07499310725117177, "grad_norm": 0.3446716070175171, "learning_rate": 1.9934456746313435e-05, "loss": 0.4681, "step": 3536 }, { "epoch": 0.07501431570910479, "grad_norm": 0.6068913340568542, "learning_rate": 1.993441862052051e-05, "loss": 0.5528, "step": 3537 }, { "epoch": 0.07503552416703782, "grad_norm": 0.32414868474006653, "learning_rate": 1.993438048367861e-05, "loss": 0.4879, "step": 3538 }, { "epoch": 0.07505673262497084, "grad_norm": 0.33811110258102417, "learning_rate": 1.993434233578778e-05, "loss": 0.5455, "step": 3539 }, { "epoch": 0.07507794108290386, "grad_norm": 0.3215664029121399, "learning_rate": 1.9934304176848056e-05, "loss": 0.6304, "step": 3540 }, { "epoch": 0.07509914954083688, "grad_norm": 0.34032127261161804, "learning_rate": 1.993426600685949e-05, "loss": 0.5758, "step": 3541 }, { "epoch": 0.07512035799876991, "grad_norm": 0.30421584844589233, "learning_rate": 1.9934227825822118e-05, "loss": 0.6045, "step": 3542 }, { "epoch": 0.07514156645670293, "grad_norm": 0.37311577796936035, "learning_rate": 1.9934189633735987e-05, "loss": 0.4793, "step": 3543 }, { "epoch": 0.07516277491463595, "grad_norm": 0.3388522267341614, "learning_rate": 1.993415143060113e-05, "loss": 0.6434, "step": 3544 }, { "epoch": 0.07518398337256899, "grad_norm": 0.31364500522613525, "learning_rate": 1.99341132164176e-05, "loss": 0.4871, "step": 3545 }, { "epoch": 0.075205191830502, "grad_norm": 0.3217265009880066, "learning_rate": 1.9934074991185434e-05, "loss": 0.5219, "step": 3546 }, { "epoch": 0.07522640028843502, "grad_norm": 0.31899258494377136, "learning_rate": 1.993403675490468e-05, "loss": 0.609, "step": 3547 }, { "epoch": 0.07524760874636806, "grad_norm": 0.3191778361797333, "learning_rate": 1.9933998507575373e-05, "loss": 0.5118, "step": 3548 }, { "epoch": 0.07526881720430108, "grad_norm": 0.3029082119464874, "learning_rate": 1.993396024919756e-05, "loss": 0.5194, "step": 3549 }, { "epoch": 0.0752900256622341, "grad_norm": 0.34182512760162354, "learning_rate": 1.9933921979771284e-05, "loss": 0.6174, "step": 3550 }, { "epoch": 0.07531123412016712, "grad_norm": 0.35960617661476135, "learning_rate": 1.9933883699296587e-05, "loss": 0.5688, "step": 3551 }, { "epoch": 0.07533244257810015, "grad_norm": 0.42524242401123047, "learning_rate": 1.9933845407773508e-05, "loss": 0.5575, "step": 3552 }, { "epoch": 0.07535365103603317, "grad_norm": 0.30504465103149414, "learning_rate": 1.9933807105202093e-05, "loss": 0.534, "step": 3553 }, { "epoch": 0.07537485949396619, "grad_norm": 0.3124942183494568, "learning_rate": 1.9933768791582384e-05, "loss": 0.5534, "step": 3554 }, { "epoch": 0.07539606795189922, "grad_norm": 0.31718194484710693, "learning_rate": 1.9933730466914424e-05, "loss": 0.5014, "step": 3555 }, { "epoch": 0.07541727640983224, "grad_norm": 0.4116455018520355, "learning_rate": 1.9933692131198258e-05, "loss": 0.6116, "step": 3556 }, { "epoch": 0.07543848486776526, "grad_norm": 0.32541897892951965, "learning_rate": 1.9933653784433923e-05, "loss": 0.5143, "step": 3557 }, { "epoch": 0.07545969332569828, "grad_norm": 0.33775338530540466, "learning_rate": 1.9933615426621465e-05, "loss": 0.5866, "step": 3558 }, { "epoch": 0.07548090178363132, "grad_norm": 0.3645058274269104, "learning_rate": 1.9933577057760924e-05, "loss": 0.6122, "step": 3559 }, { "epoch": 0.07550211024156434, "grad_norm": 0.3692950904369354, "learning_rate": 1.993353867785235e-05, "loss": 0.5093, "step": 3560 }, { "epoch": 0.07552331869949735, "grad_norm": 0.34861111640930176, "learning_rate": 1.9933500286895777e-05, "loss": 0.5661, "step": 3561 }, { "epoch": 0.07554452715743039, "grad_norm": 0.33559849858283997, "learning_rate": 1.9933461884891252e-05, "loss": 0.5539, "step": 3562 }, { "epoch": 0.07556573561536341, "grad_norm": 0.4025788903236389, "learning_rate": 1.9933423471838816e-05, "loss": 0.6145, "step": 3563 }, { "epoch": 0.07558694407329643, "grad_norm": 0.3977574408054352, "learning_rate": 1.9933385047738513e-05, "loss": 0.5795, "step": 3564 }, { "epoch": 0.07560815253122946, "grad_norm": 0.3117772042751312, "learning_rate": 1.993334661259039e-05, "loss": 0.4426, "step": 3565 }, { "epoch": 0.07562936098916248, "grad_norm": 0.3362230360507965, "learning_rate": 1.993330816639448e-05, "loss": 0.4448, "step": 3566 }, { "epoch": 0.0756505694470955, "grad_norm": 0.3828495442867279, "learning_rate": 1.993326970915083e-05, "loss": 0.5168, "step": 3567 }, { "epoch": 0.07567177790502852, "grad_norm": 0.33678603172302246, "learning_rate": 1.9933231240859486e-05, "loss": 0.5948, "step": 3568 }, { "epoch": 0.07569298636296155, "grad_norm": 0.31574055552482605, "learning_rate": 1.9933192761520488e-05, "loss": 0.5649, "step": 3569 }, { "epoch": 0.07571419482089457, "grad_norm": 0.35004785656929016, "learning_rate": 1.993315427113388e-05, "loss": 0.5948, "step": 3570 }, { "epoch": 0.07573540327882759, "grad_norm": 0.3606180250644684, "learning_rate": 1.9933115769699705e-05, "loss": 0.5692, "step": 3571 }, { "epoch": 0.07575661173676063, "grad_norm": 0.33912578225135803, "learning_rate": 1.9933077257218e-05, "loss": 0.5108, "step": 3572 }, { "epoch": 0.07577782019469365, "grad_norm": 0.34530386328697205, "learning_rate": 1.993303873368882e-05, "loss": 0.5922, "step": 3573 }, { "epoch": 0.07579902865262667, "grad_norm": 0.2982379198074341, "learning_rate": 1.9933000199112195e-05, "loss": 0.5265, "step": 3574 }, { "epoch": 0.07582023711055969, "grad_norm": 0.28241339325904846, "learning_rate": 1.9932961653488176e-05, "loss": 0.4789, "step": 3575 }, { "epoch": 0.07584144556849272, "grad_norm": 0.3505716323852539, "learning_rate": 1.9932923096816804e-05, "loss": 0.5288, "step": 3576 }, { "epoch": 0.07586265402642574, "grad_norm": 0.31045234203338623, "learning_rate": 1.993288452909812e-05, "loss": 0.5346, "step": 3577 }, { "epoch": 0.07588386248435876, "grad_norm": 0.32544660568237305, "learning_rate": 1.9932845950332166e-05, "loss": 0.5445, "step": 3578 }, { "epoch": 0.07590507094229179, "grad_norm": 0.3463183343410492, "learning_rate": 1.993280736051899e-05, "loss": 0.6112, "step": 3579 }, { "epoch": 0.07592627940022481, "grad_norm": 0.5576089024543762, "learning_rate": 1.9932768759658635e-05, "loss": 0.5932, "step": 3580 }, { "epoch": 0.07594748785815783, "grad_norm": 0.4012240469455719, "learning_rate": 1.9932730147751137e-05, "loss": 0.5655, "step": 3581 }, { "epoch": 0.07596869631609085, "grad_norm": 0.31003209948539734, "learning_rate": 1.993269152479654e-05, "loss": 0.5554, "step": 3582 }, { "epoch": 0.07598990477402388, "grad_norm": 0.32855746150016785, "learning_rate": 1.9932652890794896e-05, "loss": 0.4809, "step": 3583 }, { "epoch": 0.0760111132319569, "grad_norm": 0.34629639983177185, "learning_rate": 1.9932614245746235e-05, "loss": 0.5811, "step": 3584 }, { "epoch": 0.07603232168988992, "grad_norm": 0.3320378363132477, "learning_rate": 1.993257558965061e-05, "loss": 0.5357, "step": 3585 }, { "epoch": 0.07605353014782296, "grad_norm": 0.35558319091796875, "learning_rate": 1.9932536922508063e-05, "loss": 0.5698, "step": 3586 }, { "epoch": 0.07607473860575598, "grad_norm": 0.31518372893333435, "learning_rate": 1.993249824431863e-05, "loss": 0.508, "step": 3587 }, { "epoch": 0.076095947063689, "grad_norm": 0.3127618432044983, "learning_rate": 1.993245955508236e-05, "loss": 0.4374, "step": 3588 }, { "epoch": 0.07611715552162203, "grad_norm": 0.31938183307647705, "learning_rate": 1.9932420854799298e-05, "loss": 0.5672, "step": 3589 }, { "epoch": 0.07613836397955505, "grad_norm": 0.33281299471855164, "learning_rate": 1.993238214346948e-05, "loss": 0.5763, "step": 3590 }, { "epoch": 0.07615957243748807, "grad_norm": 0.32326823472976685, "learning_rate": 1.993234342109296e-05, "loss": 0.5654, "step": 3591 }, { "epoch": 0.07618078089542109, "grad_norm": 0.33616673946380615, "learning_rate": 1.9932304687669766e-05, "loss": 0.5648, "step": 3592 }, { "epoch": 0.07620198935335412, "grad_norm": 0.29251083731651306, "learning_rate": 1.9932265943199952e-05, "loss": 0.4974, "step": 3593 }, { "epoch": 0.07622319781128714, "grad_norm": 0.30347901582717896, "learning_rate": 1.993222718768356e-05, "loss": 0.559, "step": 3594 }, { "epoch": 0.07624440626922016, "grad_norm": 0.3121647536754608, "learning_rate": 1.9932188421120627e-05, "loss": 0.5148, "step": 3595 }, { "epoch": 0.0762656147271532, "grad_norm": 0.28321918845176697, "learning_rate": 1.9932149643511204e-05, "loss": 0.4574, "step": 3596 }, { "epoch": 0.07628682318508621, "grad_norm": 0.35422271490097046, "learning_rate": 1.9932110854855326e-05, "loss": 0.5935, "step": 3597 }, { "epoch": 0.07630803164301923, "grad_norm": 0.4010635018348694, "learning_rate": 1.9932072055153045e-05, "loss": 0.5473, "step": 3598 }, { "epoch": 0.07632924010095225, "grad_norm": 0.2910021245479584, "learning_rate": 1.99320332444044e-05, "loss": 0.5548, "step": 3599 }, { "epoch": 0.07635044855888529, "grad_norm": 0.368461549282074, "learning_rate": 1.993199442260943e-05, "loss": 0.5882, "step": 3600 }, { "epoch": 0.0763716570168183, "grad_norm": 0.3001369535923004, "learning_rate": 1.9931955589768182e-05, "loss": 0.557, "step": 3601 }, { "epoch": 0.07639286547475133, "grad_norm": 0.37164345383644104, "learning_rate": 1.9931916745880705e-05, "loss": 0.5028, "step": 3602 }, { "epoch": 0.07641407393268436, "grad_norm": 0.34343910217285156, "learning_rate": 1.993187789094703e-05, "loss": 0.5001, "step": 3603 }, { "epoch": 0.07643528239061738, "grad_norm": 0.3242245316505432, "learning_rate": 1.993183902496721e-05, "loss": 0.4493, "step": 3604 }, { "epoch": 0.0764564908485504, "grad_norm": 0.33152878284454346, "learning_rate": 1.9931800147941285e-05, "loss": 0.5862, "step": 3605 }, { "epoch": 0.07647769930648343, "grad_norm": 0.350074827671051, "learning_rate": 1.99317612598693e-05, "loss": 0.5516, "step": 3606 }, { "epoch": 0.07649890776441645, "grad_norm": 0.3240226209163666, "learning_rate": 1.9931722360751293e-05, "loss": 0.5091, "step": 3607 }, { "epoch": 0.07652011622234947, "grad_norm": 0.3465574383735657, "learning_rate": 1.993168345058731e-05, "loss": 0.5137, "step": 3608 }, { "epoch": 0.07654132468028249, "grad_norm": 0.3125419318675995, "learning_rate": 1.9931644529377395e-05, "loss": 0.4809, "step": 3609 }, { "epoch": 0.07656253313821552, "grad_norm": 0.4096183478832245, "learning_rate": 1.9931605597121596e-05, "loss": 0.5178, "step": 3610 }, { "epoch": 0.07658374159614854, "grad_norm": 0.36514851450920105, "learning_rate": 1.9931566653819946e-05, "loss": 0.5826, "step": 3611 }, { "epoch": 0.07660495005408156, "grad_norm": 0.3657795786857605, "learning_rate": 1.9931527699472497e-05, "loss": 0.5667, "step": 3612 }, { "epoch": 0.0766261585120146, "grad_norm": 0.3326996862888336, "learning_rate": 1.993148873407929e-05, "loss": 0.552, "step": 3613 }, { "epoch": 0.07664736696994762, "grad_norm": 0.3430565595626831, "learning_rate": 1.9931449757640364e-05, "loss": 0.5177, "step": 3614 }, { "epoch": 0.07666857542788064, "grad_norm": 0.31225016713142395, "learning_rate": 1.9931410770155767e-05, "loss": 0.5066, "step": 3615 }, { "epoch": 0.07668978388581366, "grad_norm": 0.35499244928359985, "learning_rate": 1.9931371771625545e-05, "loss": 0.5483, "step": 3616 }, { "epoch": 0.07671099234374669, "grad_norm": 0.3291803002357483, "learning_rate": 1.9931332762049733e-05, "loss": 0.605, "step": 3617 }, { "epoch": 0.07673220080167971, "grad_norm": 0.3207278549671173, "learning_rate": 1.993129374142838e-05, "loss": 0.5608, "step": 3618 }, { "epoch": 0.07675340925961273, "grad_norm": 0.3051820397377014, "learning_rate": 1.993125470976153e-05, "loss": 0.5681, "step": 3619 }, { "epoch": 0.07677461771754576, "grad_norm": 0.3325388431549072, "learning_rate": 1.993121566704922e-05, "loss": 0.4954, "step": 3620 }, { "epoch": 0.07679582617547878, "grad_norm": 0.3182115852832794, "learning_rate": 1.9931176613291506e-05, "loss": 0.5692, "step": 3621 }, { "epoch": 0.0768170346334118, "grad_norm": 0.6684937477111816, "learning_rate": 1.993113754848842e-05, "loss": 0.6068, "step": 3622 }, { "epoch": 0.07683824309134483, "grad_norm": 0.29675257205963135, "learning_rate": 1.9931098472640008e-05, "loss": 0.4944, "step": 3623 }, { "epoch": 0.07685945154927785, "grad_norm": 0.34715864062309265, "learning_rate": 1.9931059385746313e-05, "loss": 0.5798, "step": 3624 }, { "epoch": 0.07688066000721087, "grad_norm": 0.3293284475803375, "learning_rate": 1.9931020287807383e-05, "loss": 0.5733, "step": 3625 }, { "epoch": 0.0769018684651439, "grad_norm": 0.33949142694473267, "learning_rate": 1.993098117882326e-05, "loss": 0.512, "step": 3626 }, { "epoch": 0.07692307692307693, "grad_norm": 0.37118425965309143, "learning_rate": 1.9930942058793984e-05, "loss": 0.6085, "step": 3627 }, { "epoch": 0.07694428538100995, "grad_norm": 0.3177777826786041, "learning_rate": 1.9930902927719596e-05, "loss": 0.4586, "step": 3628 }, { "epoch": 0.07696549383894297, "grad_norm": 0.36410292983055115, "learning_rate": 1.993086378560015e-05, "loss": 0.5524, "step": 3629 }, { "epoch": 0.076986702296876, "grad_norm": 0.3202318251132965, "learning_rate": 1.9930824632435684e-05, "loss": 0.5463, "step": 3630 }, { "epoch": 0.07700791075480902, "grad_norm": 0.3188798427581787, "learning_rate": 1.993078546822624e-05, "loss": 0.5188, "step": 3631 }, { "epoch": 0.07702911921274204, "grad_norm": 0.3176670968532562, "learning_rate": 1.993074629297186e-05, "loss": 0.5787, "step": 3632 }, { "epoch": 0.07705032767067506, "grad_norm": 0.3480730652809143, "learning_rate": 1.993070710667259e-05, "loss": 0.6464, "step": 3633 }, { "epoch": 0.07707153612860809, "grad_norm": 0.3430778980255127, "learning_rate": 1.9930667909328475e-05, "loss": 0.5181, "step": 3634 }, { "epoch": 0.07709274458654111, "grad_norm": 0.3039397895336151, "learning_rate": 1.993062870093956e-05, "loss": 0.5018, "step": 3635 }, { "epoch": 0.07711395304447413, "grad_norm": 0.2994823753833771, "learning_rate": 1.9930589481505886e-05, "loss": 0.5124, "step": 3636 }, { "epoch": 0.07713516150240717, "grad_norm": 0.2945428788661957, "learning_rate": 1.9930550251027492e-05, "loss": 0.5225, "step": 3637 }, { "epoch": 0.07715636996034018, "grad_norm": 0.35446855425834656, "learning_rate": 1.993051100950443e-05, "loss": 0.4919, "step": 3638 }, { "epoch": 0.0771775784182732, "grad_norm": 0.4121154546737671, "learning_rate": 1.993047175693674e-05, "loss": 0.5228, "step": 3639 }, { "epoch": 0.07719878687620624, "grad_norm": 0.361889511346817, "learning_rate": 1.9930432493324463e-05, "loss": 0.4457, "step": 3640 }, { "epoch": 0.07721999533413926, "grad_norm": 0.31859663128852844, "learning_rate": 1.9930393218667646e-05, "loss": 0.5561, "step": 3641 }, { "epoch": 0.07724120379207228, "grad_norm": 0.44680145382881165, "learning_rate": 1.9930353932966337e-05, "loss": 0.5289, "step": 3642 }, { "epoch": 0.0772624122500053, "grad_norm": 0.32003191113471985, "learning_rate": 1.9930314636220568e-05, "loss": 0.5563, "step": 3643 }, { "epoch": 0.07728362070793833, "grad_norm": 0.2907082140445709, "learning_rate": 1.993027532843039e-05, "loss": 0.4992, "step": 3644 }, { "epoch": 0.07730482916587135, "grad_norm": 0.3720782697200775, "learning_rate": 1.993023600959585e-05, "loss": 0.553, "step": 3645 }, { "epoch": 0.07732603762380437, "grad_norm": 0.3596644103527069, "learning_rate": 1.9930196679716984e-05, "loss": 0.6063, "step": 3646 }, { "epoch": 0.0773472460817374, "grad_norm": 0.3195468783378601, "learning_rate": 1.993015733879384e-05, "loss": 0.509, "step": 3647 }, { "epoch": 0.07736845453967042, "grad_norm": 0.3240601718425751, "learning_rate": 1.9930117986826466e-05, "loss": 0.4914, "step": 3648 }, { "epoch": 0.07738966299760344, "grad_norm": 0.32508033514022827, "learning_rate": 1.9930078623814894e-05, "loss": 0.624, "step": 3649 }, { "epoch": 0.07741087145553646, "grad_norm": 0.30223527550697327, "learning_rate": 1.9930039249759178e-05, "loss": 0.5942, "step": 3650 }, { "epoch": 0.0774320799134695, "grad_norm": 0.27942955493927, "learning_rate": 1.992999986465936e-05, "loss": 0.4213, "step": 3651 }, { "epoch": 0.07745328837140251, "grad_norm": 0.3322221636772156, "learning_rate": 1.992996046851548e-05, "loss": 0.5773, "step": 3652 }, { "epoch": 0.07747449682933553, "grad_norm": 0.3256630301475525, "learning_rate": 1.9929921061327585e-05, "loss": 0.5307, "step": 3653 }, { "epoch": 0.07749570528726857, "grad_norm": 0.3714809715747833, "learning_rate": 1.992988164309572e-05, "loss": 0.5981, "step": 3654 }, { "epoch": 0.07751691374520159, "grad_norm": 0.34589141607284546, "learning_rate": 1.9929842213819927e-05, "loss": 0.5157, "step": 3655 }, { "epoch": 0.07753812220313461, "grad_norm": 0.32059210538864136, "learning_rate": 1.9929802773500247e-05, "loss": 0.5151, "step": 3656 }, { "epoch": 0.07755933066106763, "grad_norm": 0.3116569519042969, "learning_rate": 1.992976332213673e-05, "loss": 0.5876, "step": 3657 }, { "epoch": 0.07758053911900066, "grad_norm": 0.3273245096206665, "learning_rate": 1.992972385972941e-05, "loss": 0.5186, "step": 3658 }, { "epoch": 0.07760174757693368, "grad_norm": 0.509803056716919, "learning_rate": 1.9929684386278343e-05, "loss": 0.4898, "step": 3659 }, { "epoch": 0.0776229560348667, "grad_norm": 0.38152042031288147, "learning_rate": 1.9929644901783565e-05, "loss": 0.5578, "step": 3660 }, { "epoch": 0.07764416449279973, "grad_norm": 0.4007568955421448, "learning_rate": 1.9929605406245126e-05, "loss": 0.56, "step": 3661 }, { "epoch": 0.07766537295073275, "grad_norm": 0.34244561195373535, "learning_rate": 1.9929565899663062e-05, "loss": 0.5365, "step": 3662 }, { "epoch": 0.07768658140866577, "grad_norm": 0.3295951187610626, "learning_rate": 1.9929526382037423e-05, "loss": 0.564, "step": 3663 }, { "epoch": 0.0777077898665988, "grad_norm": 0.3172720670700073, "learning_rate": 1.992948685336825e-05, "loss": 0.5889, "step": 3664 }, { "epoch": 0.07772899832453183, "grad_norm": 0.31856343150138855, "learning_rate": 1.9929447313655587e-05, "loss": 0.5227, "step": 3665 }, { "epoch": 0.07775020678246485, "grad_norm": 0.3201015293598175, "learning_rate": 1.992940776289948e-05, "loss": 0.4382, "step": 3666 }, { "epoch": 0.07777141524039786, "grad_norm": 0.34670355916023254, "learning_rate": 1.9929368201099972e-05, "loss": 0.5581, "step": 3667 }, { "epoch": 0.0777926236983309, "grad_norm": 0.31424984335899353, "learning_rate": 1.9929328628257106e-05, "loss": 0.5018, "step": 3668 }, { "epoch": 0.07781383215626392, "grad_norm": 0.33572325110435486, "learning_rate": 1.992928904437093e-05, "loss": 0.548, "step": 3669 }, { "epoch": 0.07783504061419694, "grad_norm": 0.33907729387283325, "learning_rate": 1.9929249449441482e-05, "loss": 0.5431, "step": 3670 }, { "epoch": 0.07785624907212997, "grad_norm": 0.3212750554084778, "learning_rate": 1.9929209843468812e-05, "loss": 0.4978, "step": 3671 }, { "epoch": 0.07787745753006299, "grad_norm": 0.33053529262542725, "learning_rate": 1.9929170226452958e-05, "loss": 0.5047, "step": 3672 }, { "epoch": 0.07789866598799601, "grad_norm": 0.32277557253837585, "learning_rate": 1.992913059839397e-05, "loss": 0.4472, "step": 3673 }, { "epoch": 0.07791987444592903, "grad_norm": 0.35500258207321167, "learning_rate": 1.9929090959291885e-05, "loss": 0.5736, "step": 3674 }, { "epoch": 0.07794108290386206, "grad_norm": 0.34606504440307617, "learning_rate": 1.9929051309146752e-05, "loss": 0.562, "step": 3675 }, { "epoch": 0.07796229136179508, "grad_norm": 0.36418846249580383, "learning_rate": 1.9929011647958617e-05, "loss": 0.6229, "step": 3676 }, { "epoch": 0.0779834998197281, "grad_norm": 0.3449963629245758, "learning_rate": 1.9928971975727524e-05, "loss": 0.6241, "step": 3677 }, { "epoch": 0.07800470827766114, "grad_norm": 0.3085576593875885, "learning_rate": 1.992893229245351e-05, "loss": 0.571, "step": 3678 }, { "epoch": 0.07802591673559416, "grad_norm": 0.3133513927459717, "learning_rate": 1.9928892598136625e-05, "loss": 0.4714, "step": 3679 }, { "epoch": 0.07804712519352718, "grad_norm": 0.29119136929512024, "learning_rate": 1.992885289277691e-05, "loss": 0.4922, "step": 3680 }, { "epoch": 0.07806833365146021, "grad_norm": 0.31488290429115295, "learning_rate": 1.9928813176374415e-05, "loss": 0.5721, "step": 3681 }, { "epoch": 0.07808954210939323, "grad_norm": 0.31345367431640625, "learning_rate": 1.9928773448929177e-05, "loss": 0.4516, "step": 3682 }, { "epoch": 0.07811075056732625, "grad_norm": 0.3154347836971283, "learning_rate": 1.9928733710441246e-05, "loss": 0.5271, "step": 3683 }, { "epoch": 0.07813195902525927, "grad_norm": 0.34966108202934265, "learning_rate": 1.992869396091066e-05, "loss": 0.5184, "step": 3684 }, { "epoch": 0.0781531674831923, "grad_norm": 0.3585110306739807, "learning_rate": 1.9928654200337473e-05, "loss": 0.6071, "step": 3685 }, { "epoch": 0.07817437594112532, "grad_norm": 0.35850009322166443, "learning_rate": 1.9928614428721717e-05, "loss": 0.5898, "step": 3686 }, { "epoch": 0.07819558439905834, "grad_norm": 0.3214147984981537, "learning_rate": 1.9928574646063447e-05, "loss": 0.5729, "step": 3687 }, { "epoch": 0.07821679285699137, "grad_norm": 0.389639288187027, "learning_rate": 1.99285348523627e-05, "loss": 0.5451, "step": 3688 }, { "epoch": 0.0782380013149244, "grad_norm": 0.3425115644931793, "learning_rate": 1.9928495047619522e-05, "loss": 0.5112, "step": 3689 }, { "epoch": 0.07825920977285741, "grad_norm": 0.31959590315818787, "learning_rate": 1.992845523183396e-05, "loss": 0.5929, "step": 3690 }, { "epoch": 0.07828041823079043, "grad_norm": 0.3537054657936096, "learning_rate": 1.992841540500605e-05, "loss": 0.56, "step": 3691 }, { "epoch": 0.07830162668872347, "grad_norm": 0.3697081506252289, "learning_rate": 1.992837556713585e-05, "loss": 0.6733, "step": 3692 }, { "epoch": 0.07832283514665649, "grad_norm": 0.334004282951355, "learning_rate": 1.9928335718223396e-05, "loss": 0.4964, "step": 3693 }, { "epoch": 0.0783440436045895, "grad_norm": 0.31872329115867615, "learning_rate": 1.992829585826873e-05, "loss": 0.5339, "step": 3694 }, { "epoch": 0.07836525206252254, "grad_norm": 0.3705868124961853, "learning_rate": 1.9928255987271903e-05, "loss": 0.5579, "step": 3695 }, { "epoch": 0.07838646052045556, "grad_norm": 0.33607712388038635, "learning_rate": 1.9928216105232955e-05, "loss": 0.5165, "step": 3696 }, { "epoch": 0.07840766897838858, "grad_norm": 0.309808611869812, "learning_rate": 1.992817621215193e-05, "loss": 0.5484, "step": 3697 }, { "epoch": 0.07842887743632161, "grad_norm": 0.2986443042755127, "learning_rate": 1.9928136308028873e-05, "loss": 0.5131, "step": 3698 }, { "epoch": 0.07845008589425463, "grad_norm": 0.35309484601020813, "learning_rate": 1.992809639286383e-05, "loss": 0.4907, "step": 3699 }, { "epoch": 0.07847129435218765, "grad_norm": 0.3166668713092804, "learning_rate": 1.9928056466656847e-05, "loss": 0.5257, "step": 3700 }, { "epoch": 0.07849250281012067, "grad_norm": 0.3633171319961548, "learning_rate": 1.9928016529407962e-05, "loss": 0.5026, "step": 3701 }, { "epoch": 0.0785137112680537, "grad_norm": 0.42933157086372375, "learning_rate": 1.9927976581117227e-05, "loss": 0.5228, "step": 3702 }, { "epoch": 0.07853491972598672, "grad_norm": 0.34673842787742615, "learning_rate": 1.9927936621784682e-05, "loss": 0.5634, "step": 3703 }, { "epoch": 0.07855612818391974, "grad_norm": 0.3387942910194397, "learning_rate": 1.9927896651410368e-05, "loss": 0.5159, "step": 3704 }, { "epoch": 0.07857733664185278, "grad_norm": 0.30818331241607666, "learning_rate": 1.9927856669994336e-05, "loss": 0.531, "step": 3705 }, { "epoch": 0.0785985450997858, "grad_norm": 0.31276223063468933, "learning_rate": 1.992781667753663e-05, "loss": 0.5615, "step": 3706 }, { "epoch": 0.07861975355771882, "grad_norm": 0.4078735411167145, "learning_rate": 1.992777667403729e-05, "loss": 0.5385, "step": 3707 }, { "epoch": 0.07864096201565184, "grad_norm": 0.3389255106449127, "learning_rate": 1.9927736659496367e-05, "loss": 0.5229, "step": 3708 }, { "epoch": 0.07866217047358487, "grad_norm": 0.3421410024166107, "learning_rate": 1.99276966339139e-05, "loss": 0.6233, "step": 3709 }, { "epoch": 0.07868337893151789, "grad_norm": 0.36867615580558777, "learning_rate": 1.992765659728993e-05, "loss": 0.5585, "step": 3710 }, { "epoch": 0.07870458738945091, "grad_norm": 0.3475898504257202, "learning_rate": 1.992761654962451e-05, "loss": 0.4928, "step": 3711 }, { "epoch": 0.07872579584738394, "grad_norm": 0.3119983375072479, "learning_rate": 1.992757649091768e-05, "loss": 0.5302, "step": 3712 }, { "epoch": 0.07874700430531696, "grad_norm": 0.3468345105648041, "learning_rate": 1.992753642116949e-05, "loss": 0.5283, "step": 3713 }, { "epoch": 0.07876821276324998, "grad_norm": 0.3225080668926239, "learning_rate": 1.9927496340379977e-05, "loss": 0.4994, "step": 3714 }, { "epoch": 0.07878942122118301, "grad_norm": 0.38672733306884766, "learning_rate": 1.992745624854919e-05, "loss": 0.4789, "step": 3715 }, { "epoch": 0.07881062967911603, "grad_norm": 0.3665793538093567, "learning_rate": 1.992741614567717e-05, "loss": 0.5791, "step": 3716 }, { "epoch": 0.07883183813704905, "grad_norm": 0.3158472776412964, "learning_rate": 1.9927376031763965e-05, "loss": 0.4928, "step": 3717 }, { "epoch": 0.07885304659498207, "grad_norm": 0.31260305643081665, "learning_rate": 1.9927335906809618e-05, "loss": 0.5219, "step": 3718 }, { "epoch": 0.07887425505291511, "grad_norm": 0.3355560004711151, "learning_rate": 1.9927295770814176e-05, "loss": 0.5272, "step": 3719 }, { "epoch": 0.07889546351084813, "grad_norm": 0.33301684260368347, "learning_rate": 1.992725562377768e-05, "loss": 0.5182, "step": 3720 }, { "epoch": 0.07891667196878115, "grad_norm": 0.34613341093063354, "learning_rate": 1.992721546570018e-05, "loss": 0.558, "step": 3721 }, { "epoch": 0.07893788042671418, "grad_norm": 0.30388110876083374, "learning_rate": 1.9927175296581715e-05, "loss": 0.544, "step": 3722 }, { "epoch": 0.0789590888846472, "grad_norm": 0.31427738070487976, "learning_rate": 1.9927135116422334e-05, "loss": 0.5985, "step": 3723 }, { "epoch": 0.07898029734258022, "grad_norm": 0.4630886912345886, "learning_rate": 1.9927094925222075e-05, "loss": 0.623, "step": 3724 }, { "epoch": 0.07900150580051324, "grad_norm": 0.32028043270111084, "learning_rate": 1.992705472298099e-05, "loss": 0.4589, "step": 3725 }, { "epoch": 0.07902271425844627, "grad_norm": 0.3477146327495575, "learning_rate": 1.992701450969912e-05, "loss": 0.636, "step": 3726 }, { "epoch": 0.07904392271637929, "grad_norm": 0.3089326322078705, "learning_rate": 1.9926974285376513e-05, "loss": 0.5207, "step": 3727 }, { "epoch": 0.07906513117431231, "grad_norm": 0.3412814438343048, "learning_rate": 1.992693405001321e-05, "loss": 0.5378, "step": 3728 }, { "epoch": 0.07908633963224534, "grad_norm": 0.39125847816467285, "learning_rate": 1.9926893803609256e-05, "loss": 0.589, "step": 3729 }, { "epoch": 0.07910754809017836, "grad_norm": 0.30059531331062317, "learning_rate": 1.99268535461647e-05, "loss": 0.5088, "step": 3730 }, { "epoch": 0.07912875654811138, "grad_norm": 0.3318430185317993, "learning_rate": 1.9926813277679578e-05, "loss": 0.6534, "step": 3731 }, { "epoch": 0.0791499650060444, "grad_norm": 0.31745606660842896, "learning_rate": 1.9926772998153946e-05, "loss": 0.5456, "step": 3732 }, { "epoch": 0.07917117346397744, "grad_norm": 0.34723713994026184, "learning_rate": 1.9926732707587842e-05, "loss": 0.5815, "step": 3733 }, { "epoch": 0.07919238192191046, "grad_norm": 0.33861473202705383, "learning_rate": 1.992669240598131e-05, "loss": 0.5365, "step": 3734 }, { "epoch": 0.07921359037984348, "grad_norm": 0.3188377320766449, "learning_rate": 1.99266520933344e-05, "loss": 0.5978, "step": 3735 }, { "epoch": 0.07923479883777651, "grad_norm": 0.30095356702804565, "learning_rate": 1.992661176964715e-05, "loss": 0.5073, "step": 3736 }, { "epoch": 0.07925600729570953, "grad_norm": 0.3415878415107727, "learning_rate": 1.992657143491961e-05, "loss": 0.585, "step": 3737 }, { "epoch": 0.07927721575364255, "grad_norm": 0.3300704061985016, "learning_rate": 1.9926531089151825e-05, "loss": 0.5055, "step": 3738 }, { "epoch": 0.07929842421157558, "grad_norm": 0.3121482729911804, "learning_rate": 1.9926490732343838e-05, "loss": 0.5232, "step": 3739 }, { "epoch": 0.0793196326695086, "grad_norm": 0.32808539271354675, "learning_rate": 1.9926450364495695e-05, "loss": 0.5576, "step": 3740 }, { "epoch": 0.07934084112744162, "grad_norm": 0.3348873257637024, "learning_rate": 1.992640998560744e-05, "loss": 0.5075, "step": 3741 }, { "epoch": 0.07936204958537464, "grad_norm": 0.3135927617549896, "learning_rate": 1.9926369595679117e-05, "loss": 0.5151, "step": 3742 }, { "epoch": 0.07938325804330768, "grad_norm": 0.3257668912410736, "learning_rate": 1.9926329194710772e-05, "loss": 0.5304, "step": 3743 }, { "epoch": 0.0794044665012407, "grad_norm": 0.3265121877193451, "learning_rate": 1.992628878270245e-05, "loss": 0.5136, "step": 3744 }, { "epoch": 0.07942567495917371, "grad_norm": 0.33757203817367554, "learning_rate": 1.9926248359654195e-05, "loss": 0.625, "step": 3745 }, { "epoch": 0.07944688341710675, "grad_norm": 0.31272760033607483, "learning_rate": 1.9926207925566056e-05, "loss": 0.5252, "step": 3746 }, { "epoch": 0.07946809187503977, "grad_norm": 0.44777247309684753, "learning_rate": 1.992616748043807e-05, "loss": 0.5431, "step": 3747 }, { "epoch": 0.07948930033297279, "grad_norm": 0.326456218957901, "learning_rate": 1.992612702427029e-05, "loss": 0.5352, "step": 3748 }, { "epoch": 0.0795105087909058, "grad_norm": 0.3584713637828827, "learning_rate": 1.992608655706276e-05, "loss": 0.5959, "step": 3749 }, { "epoch": 0.07953171724883884, "grad_norm": 0.3341144621372223, "learning_rate": 1.992604607881552e-05, "loss": 0.5172, "step": 3750 }, { "epoch": 0.07955292570677186, "grad_norm": 0.31195637583732605, "learning_rate": 1.9926005589528615e-05, "loss": 0.4878, "step": 3751 }, { "epoch": 0.07957413416470488, "grad_norm": 0.31180647015571594, "learning_rate": 1.9925965089202096e-05, "loss": 0.5406, "step": 3752 }, { "epoch": 0.07959534262263791, "grad_norm": 0.30078497529029846, "learning_rate": 1.9925924577836e-05, "loss": 0.559, "step": 3753 }, { "epoch": 0.07961655108057093, "grad_norm": 0.3272033929824829, "learning_rate": 1.9925884055430383e-05, "loss": 0.5326, "step": 3754 }, { "epoch": 0.07963775953850395, "grad_norm": 0.3522736430168152, "learning_rate": 1.9925843521985283e-05, "loss": 0.5885, "step": 3755 }, { "epoch": 0.07965896799643699, "grad_norm": 0.32095786929130554, "learning_rate": 1.9925802977500745e-05, "loss": 0.4534, "step": 3756 }, { "epoch": 0.07968017645437, "grad_norm": 0.32955312728881836, "learning_rate": 1.9925762421976816e-05, "loss": 0.5461, "step": 3757 }, { "epoch": 0.07970138491230302, "grad_norm": 0.3059951663017273, "learning_rate": 1.992572185541354e-05, "loss": 0.5515, "step": 3758 }, { "epoch": 0.07972259337023604, "grad_norm": 0.32608526945114136, "learning_rate": 1.992568127781096e-05, "loss": 0.4792, "step": 3759 }, { "epoch": 0.07974380182816908, "grad_norm": 0.28406503796577454, "learning_rate": 1.9925640689169127e-05, "loss": 0.5073, "step": 3760 }, { "epoch": 0.0797650102861021, "grad_norm": 0.32882413268089294, "learning_rate": 1.9925600089488083e-05, "loss": 0.5459, "step": 3761 }, { "epoch": 0.07978621874403512, "grad_norm": 0.3492322564125061, "learning_rate": 1.992555947876787e-05, "loss": 0.6105, "step": 3762 }, { "epoch": 0.07980742720196815, "grad_norm": 0.34307557344436646, "learning_rate": 1.9925518857008536e-05, "loss": 0.5643, "step": 3763 }, { "epoch": 0.07982863565990117, "grad_norm": 0.3160533010959625, "learning_rate": 1.9925478224210127e-05, "loss": 0.5619, "step": 3764 }, { "epoch": 0.07984984411783419, "grad_norm": 0.480612576007843, "learning_rate": 1.9925437580372686e-05, "loss": 0.4832, "step": 3765 }, { "epoch": 0.07987105257576721, "grad_norm": 0.368335098028183, "learning_rate": 1.9925396925496262e-05, "loss": 0.4753, "step": 3766 }, { "epoch": 0.07989226103370024, "grad_norm": 0.31168848276138306, "learning_rate": 1.9925356259580897e-05, "loss": 0.5375, "step": 3767 }, { "epoch": 0.07991346949163326, "grad_norm": 0.35381391644477844, "learning_rate": 1.9925315582626634e-05, "loss": 0.5612, "step": 3768 }, { "epoch": 0.07993467794956628, "grad_norm": 0.36441606283187866, "learning_rate": 1.9925274894633525e-05, "loss": 0.5737, "step": 3769 }, { "epoch": 0.07995588640749932, "grad_norm": 0.3417079448699951, "learning_rate": 1.9925234195601612e-05, "loss": 0.5143, "step": 3770 }, { "epoch": 0.07997709486543234, "grad_norm": 0.28718385100364685, "learning_rate": 1.9925193485530938e-05, "loss": 0.463, "step": 3771 }, { "epoch": 0.07999830332336536, "grad_norm": 0.2934138774871826, "learning_rate": 1.992515276442155e-05, "loss": 0.548, "step": 3772 }, { "epoch": 0.08001951178129839, "grad_norm": 0.329345166683197, "learning_rate": 1.9925112032273493e-05, "loss": 0.5151, "step": 3773 }, { "epoch": 0.08004072023923141, "grad_norm": 0.40652114152908325, "learning_rate": 1.9925071289086813e-05, "loss": 0.5317, "step": 3774 }, { "epoch": 0.08006192869716443, "grad_norm": 0.34677472710609436, "learning_rate": 1.9925030534861555e-05, "loss": 0.6225, "step": 3775 }, { "epoch": 0.08008313715509745, "grad_norm": 0.33871445059776306, "learning_rate": 1.9924989769597764e-05, "loss": 0.5603, "step": 3776 }, { "epoch": 0.08010434561303048, "grad_norm": 0.34018799662590027, "learning_rate": 1.9924948993295484e-05, "loss": 0.5673, "step": 3777 }, { "epoch": 0.0801255540709635, "grad_norm": 0.30428168177604675, "learning_rate": 1.992490820595477e-05, "loss": 0.5273, "step": 3778 }, { "epoch": 0.08014676252889652, "grad_norm": 0.3222500681877136, "learning_rate": 1.9924867407575648e-05, "loss": 0.5095, "step": 3779 }, { "epoch": 0.08016797098682955, "grad_norm": 0.30800268054008484, "learning_rate": 1.992482659815818e-05, "loss": 0.4628, "step": 3780 }, { "epoch": 0.08018917944476257, "grad_norm": 0.3673870861530304, "learning_rate": 1.9924785777702408e-05, "loss": 0.5671, "step": 3781 }, { "epoch": 0.08021038790269559, "grad_norm": 0.3483523428440094, "learning_rate": 1.9924744946208372e-05, "loss": 0.5089, "step": 3782 }, { "epoch": 0.08023159636062861, "grad_norm": 0.34916046261787415, "learning_rate": 1.992470410367612e-05, "loss": 0.5696, "step": 3783 }, { "epoch": 0.08025280481856165, "grad_norm": 0.3308577239513397, "learning_rate": 1.9924663250105703e-05, "loss": 0.5167, "step": 3784 }, { "epoch": 0.08027401327649467, "grad_norm": 0.3372441828250885, "learning_rate": 1.992462238549716e-05, "loss": 0.5759, "step": 3785 }, { "epoch": 0.08029522173442769, "grad_norm": 0.3232755959033966, "learning_rate": 1.9924581509850534e-05, "loss": 0.5275, "step": 3786 }, { "epoch": 0.08031643019236072, "grad_norm": 0.35105565190315247, "learning_rate": 1.992454062316588e-05, "loss": 0.5498, "step": 3787 }, { "epoch": 0.08033763865029374, "grad_norm": 0.30067548155784607, "learning_rate": 1.9924499725443237e-05, "loss": 0.4551, "step": 3788 }, { "epoch": 0.08035884710822676, "grad_norm": 0.3166419565677643, "learning_rate": 1.9924458816682653e-05, "loss": 0.5655, "step": 3789 }, { "epoch": 0.08038005556615978, "grad_norm": 0.3260050415992737, "learning_rate": 1.992441789688417e-05, "loss": 0.5371, "step": 3790 }, { "epoch": 0.08040126402409281, "grad_norm": 0.334239661693573, "learning_rate": 1.9924376966047835e-05, "loss": 0.5749, "step": 3791 }, { "epoch": 0.08042247248202583, "grad_norm": 0.3270573318004608, "learning_rate": 1.9924336024173694e-05, "loss": 0.6304, "step": 3792 }, { "epoch": 0.08044368093995885, "grad_norm": 0.30177897214889526, "learning_rate": 1.9924295071261795e-05, "loss": 0.5728, "step": 3793 }, { "epoch": 0.08046488939789188, "grad_norm": 0.3231731653213501, "learning_rate": 1.992425410731218e-05, "loss": 0.4878, "step": 3794 }, { "epoch": 0.0804860978558249, "grad_norm": 0.3456801176071167, "learning_rate": 1.9924213132324893e-05, "loss": 0.4815, "step": 3795 }, { "epoch": 0.08050730631375792, "grad_norm": 0.3377254009246826, "learning_rate": 1.9924172146299986e-05, "loss": 0.5556, "step": 3796 }, { "epoch": 0.08052851477169096, "grad_norm": 0.34339767694473267, "learning_rate": 1.9924131149237498e-05, "loss": 0.5541, "step": 3797 }, { "epoch": 0.08054972322962398, "grad_norm": 0.47685977816581726, "learning_rate": 1.992409014113748e-05, "loss": 0.6354, "step": 3798 }, { "epoch": 0.080570931687557, "grad_norm": 0.42184212803840637, "learning_rate": 1.9924049121999977e-05, "loss": 0.5342, "step": 3799 }, { "epoch": 0.08059214014549002, "grad_norm": 0.3408306837081909, "learning_rate": 1.992400809182503e-05, "loss": 0.5444, "step": 3800 }, { "epoch": 0.08061334860342305, "grad_norm": 0.3186613619327545, "learning_rate": 1.9923967050612686e-05, "loss": 0.533, "step": 3801 }, { "epoch": 0.08063455706135607, "grad_norm": 0.399548202753067, "learning_rate": 1.9923925998362996e-05, "loss": 0.5053, "step": 3802 }, { "epoch": 0.08065576551928909, "grad_norm": 0.3301224708557129, "learning_rate": 1.9923884935076e-05, "loss": 0.5222, "step": 3803 }, { "epoch": 0.08067697397722212, "grad_norm": 0.35008272528648376, "learning_rate": 1.9923843860751742e-05, "loss": 0.5574, "step": 3804 }, { "epoch": 0.08069818243515514, "grad_norm": 0.35471776127815247, "learning_rate": 1.9923802775390273e-05, "loss": 0.5539, "step": 3805 }, { "epoch": 0.08071939089308816, "grad_norm": 0.3189281225204468, "learning_rate": 1.9923761678991637e-05, "loss": 0.5134, "step": 3806 }, { "epoch": 0.08074059935102118, "grad_norm": 0.35365721583366394, "learning_rate": 1.9923720571555883e-05, "loss": 0.5011, "step": 3807 }, { "epoch": 0.08076180780895421, "grad_norm": 0.3048893213272095, "learning_rate": 1.992367945308305e-05, "loss": 0.4154, "step": 3808 }, { "epoch": 0.08078301626688723, "grad_norm": 0.3036755919456482, "learning_rate": 1.9923638323573187e-05, "loss": 0.5898, "step": 3809 }, { "epoch": 0.08080422472482025, "grad_norm": 0.34441471099853516, "learning_rate": 1.992359718302634e-05, "loss": 0.592, "step": 3810 }, { "epoch": 0.08082543318275329, "grad_norm": 0.3269899785518646, "learning_rate": 1.9923556031442554e-05, "loss": 0.5969, "step": 3811 }, { "epoch": 0.0808466416406863, "grad_norm": 0.3855137825012207, "learning_rate": 1.9923514868821875e-05, "loss": 0.6905, "step": 3812 }, { "epoch": 0.08086785009861933, "grad_norm": 0.3134619891643524, "learning_rate": 1.992347369516435e-05, "loss": 0.5323, "step": 3813 }, { "epoch": 0.08088905855655236, "grad_norm": 0.39214658737182617, "learning_rate": 1.9923432510470024e-05, "loss": 0.6284, "step": 3814 }, { "epoch": 0.08091026701448538, "grad_norm": 0.3280870020389557, "learning_rate": 1.992339131473894e-05, "loss": 0.5463, "step": 3815 }, { "epoch": 0.0809314754724184, "grad_norm": 0.3128315508365631, "learning_rate": 1.992335010797115e-05, "loss": 0.539, "step": 3816 }, { "epoch": 0.08095268393035142, "grad_norm": 0.3180510997772217, "learning_rate": 1.9923308890166692e-05, "loss": 0.5472, "step": 3817 }, { "epoch": 0.08097389238828445, "grad_norm": 0.36247462034225464, "learning_rate": 1.992326766132562e-05, "loss": 0.5735, "step": 3818 }, { "epoch": 0.08099510084621747, "grad_norm": 0.3401390016078949, "learning_rate": 1.9923226421447976e-05, "loss": 0.5709, "step": 3819 }, { "epoch": 0.08101630930415049, "grad_norm": 0.3216424286365509, "learning_rate": 1.9923185170533802e-05, "loss": 0.6275, "step": 3820 }, { "epoch": 0.08103751776208352, "grad_norm": 0.3007332384586334, "learning_rate": 1.9923143908583152e-05, "loss": 0.5168, "step": 3821 }, { "epoch": 0.08105872622001654, "grad_norm": 0.3217267096042633, "learning_rate": 1.9923102635596066e-05, "loss": 0.5775, "step": 3822 }, { "epoch": 0.08107993467794956, "grad_norm": 0.3028106689453125, "learning_rate": 1.992306135157259e-05, "loss": 0.5352, "step": 3823 }, { "epoch": 0.08110114313588258, "grad_norm": 0.3534447252750397, "learning_rate": 1.992302005651277e-05, "loss": 0.5855, "step": 3824 }, { "epoch": 0.08112235159381562, "grad_norm": 0.33891910314559937, "learning_rate": 1.9922978750416658e-05, "loss": 0.5702, "step": 3825 }, { "epoch": 0.08114356005174864, "grad_norm": 0.3200433552265167, "learning_rate": 1.992293743328429e-05, "loss": 0.5055, "step": 3826 }, { "epoch": 0.08116476850968166, "grad_norm": 0.4048544764518738, "learning_rate": 1.9922896105115723e-05, "loss": 0.5263, "step": 3827 }, { "epoch": 0.08118597696761469, "grad_norm": 0.36910951137542725, "learning_rate": 1.9922854765910992e-05, "loss": 0.5298, "step": 3828 }, { "epoch": 0.08120718542554771, "grad_norm": 0.33937153220176697, "learning_rate": 1.992281341567015e-05, "loss": 0.5077, "step": 3829 }, { "epoch": 0.08122839388348073, "grad_norm": 0.3165932297706604, "learning_rate": 1.9922772054393246e-05, "loss": 0.5303, "step": 3830 }, { "epoch": 0.08124960234141376, "grad_norm": 0.4296859800815582, "learning_rate": 1.9922730682080315e-05, "loss": 0.549, "step": 3831 }, { "epoch": 0.08127081079934678, "grad_norm": 0.3520396947860718, "learning_rate": 1.9922689298731412e-05, "loss": 0.5596, "step": 3832 }, { "epoch": 0.0812920192572798, "grad_norm": 0.2969440519809723, "learning_rate": 1.9922647904346574e-05, "loss": 0.6003, "step": 3833 }, { "epoch": 0.08131322771521282, "grad_norm": 0.311458021402359, "learning_rate": 1.992260649892586e-05, "loss": 0.5305, "step": 3834 }, { "epoch": 0.08133443617314585, "grad_norm": 0.32377517223358154, "learning_rate": 1.992256508246931e-05, "loss": 0.5292, "step": 3835 }, { "epoch": 0.08135564463107887, "grad_norm": 0.31920796632766724, "learning_rate": 1.9922523654976965e-05, "loss": 0.5283, "step": 3836 }, { "epoch": 0.0813768530890119, "grad_norm": 0.3174648582935333, "learning_rate": 1.9922482216448877e-05, "loss": 0.59, "step": 3837 }, { "epoch": 0.08139806154694493, "grad_norm": 0.4578002691268921, "learning_rate": 1.992244076688509e-05, "loss": 0.5152, "step": 3838 }, { "epoch": 0.08141927000487795, "grad_norm": 0.4256555438041687, "learning_rate": 1.992239930628565e-05, "loss": 0.5895, "step": 3839 }, { "epoch": 0.08144047846281097, "grad_norm": 0.3311035931110382, "learning_rate": 1.9922357834650606e-05, "loss": 0.5919, "step": 3840 }, { "epoch": 0.08146168692074399, "grad_norm": 0.3343440890312195, "learning_rate": 1.992231635198e-05, "loss": 0.613, "step": 3841 }, { "epoch": 0.08148289537867702, "grad_norm": 0.31081563234329224, "learning_rate": 1.992227485827388e-05, "loss": 0.5847, "step": 3842 }, { "epoch": 0.08150410383661004, "grad_norm": 0.30186110734939575, "learning_rate": 1.992223335353229e-05, "loss": 0.5302, "step": 3843 }, { "epoch": 0.08152531229454306, "grad_norm": 0.32733532786369324, "learning_rate": 1.9922191837755283e-05, "loss": 0.5795, "step": 3844 }, { "epoch": 0.08154652075247609, "grad_norm": 0.3658483624458313, "learning_rate": 1.99221503109429e-05, "loss": 0.5736, "step": 3845 }, { "epoch": 0.08156772921040911, "grad_norm": 0.38041791319847107, "learning_rate": 1.9922108773095183e-05, "loss": 0.5711, "step": 3846 }, { "epoch": 0.08158893766834213, "grad_norm": 0.3681657314300537, "learning_rate": 1.9922067224212186e-05, "loss": 0.532, "step": 3847 }, { "epoch": 0.08161014612627517, "grad_norm": 0.3109898567199707, "learning_rate": 1.9922025664293953e-05, "loss": 0.4629, "step": 3848 }, { "epoch": 0.08163135458420819, "grad_norm": 0.33062365651130676, "learning_rate": 1.9921984093340527e-05, "loss": 0.5001, "step": 3849 }, { "epoch": 0.0816525630421412, "grad_norm": 0.31762921810150146, "learning_rate": 1.992194251135196e-05, "loss": 0.6135, "step": 3850 }, { "epoch": 0.08167377150007422, "grad_norm": 0.35903429985046387, "learning_rate": 1.992190091832829e-05, "loss": 0.5684, "step": 3851 }, { "epoch": 0.08169497995800726, "grad_norm": 0.327575147151947, "learning_rate": 1.992185931426957e-05, "loss": 0.5426, "step": 3852 }, { "epoch": 0.08171618841594028, "grad_norm": 0.36474502086639404, "learning_rate": 1.9921817699175844e-05, "loss": 0.5564, "step": 3853 }, { "epoch": 0.0817373968738733, "grad_norm": 0.48594576120376587, "learning_rate": 1.992177607304716e-05, "loss": 0.4601, "step": 3854 }, { "epoch": 0.08175860533180633, "grad_norm": 0.3304283618927002, "learning_rate": 1.992173443588356e-05, "loss": 0.5015, "step": 3855 }, { "epoch": 0.08177981378973935, "grad_norm": 0.3516501486301422, "learning_rate": 1.9921692787685097e-05, "loss": 0.4933, "step": 3856 }, { "epoch": 0.08180102224767237, "grad_norm": 0.31246280670166016, "learning_rate": 1.992165112845181e-05, "loss": 0.5028, "step": 3857 }, { "epoch": 0.08182223070560539, "grad_norm": 0.35477808117866516, "learning_rate": 1.992160945818375e-05, "loss": 0.5222, "step": 3858 }, { "epoch": 0.08184343916353842, "grad_norm": 0.31780683994293213, "learning_rate": 1.9921567776880963e-05, "loss": 0.5182, "step": 3859 }, { "epoch": 0.08186464762147144, "grad_norm": 0.3088952600955963, "learning_rate": 1.9921526084543494e-05, "loss": 0.6035, "step": 3860 }, { "epoch": 0.08188585607940446, "grad_norm": 0.33957770466804504, "learning_rate": 1.992148438117139e-05, "loss": 0.4772, "step": 3861 }, { "epoch": 0.0819070645373375, "grad_norm": 0.33610060811042786, "learning_rate": 1.9921442666764695e-05, "loss": 0.6097, "step": 3862 }, { "epoch": 0.08192827299527052, "grad_norm": 0.3269990384578705, "learning_rate": 1.992140094132346e-05, "loss": 0.4563, "step": 3863 }, { "epoch": 0.08194948145320353, "grad_norm": 0.3041713535785675, "learning_rate": 1.992135920484773e-05, "loss": 0.5687, "step": 3864 }, { "epoch": 0.08197068991113655, "grad_norm": 0.2864619791507721, "learning_rate": 1.992131745733755e-05, "loss": 0.4473, "step": 3865 }, { "epoch": 0.08199189836906959, "grad_norm": 0.3233056962490082, "learning_rate": 1.9921275698792966e-05, "loss": 0.5261, "step": 3866 }, { "epoch": 0.08201310682700261, "grad_norm": 0.32307809591293335, "learning_rate": 1.9921233929214027e-05, "loss": 0.5644, "step": 3867 }, { "epoch": 0.08203431528493563, "grad_norm": 0.33146747946739197, "learning_rate": 1.9921192148600773e-05, "loss": 0.4763, "step": 3868 }, { "epoch": 0.08205552374286866, "grad_norm": 0.3149113357067108, "learning_rate": 1.9921150356953262e-05, "loss": 0.4737, "step": 3869 }, { "epoch": 0.08207673220080168, "grad_norm": 0.3107772171497345, "learning_rate": 1.992110855427153e-05, "loss": 0.496, "step": 3870 }, { "epoch": 0.0820979406587347, "grad_norm": 0.3687222898006439, "learning_rate": 1.9921066740555625e-05, "loss": 0.5499, "step": 3871 }, { "epoch": 0.08211914911666773, "grad_norm": 0.3103746473789215, "learning_rate": 1.99210249158056e-05, "loss": 0.5175, "step": 3872 }, { "epoch": 0.08214035757460075, "grad_norm": 0.33572012186050415, "learning_rate": 1.9920983080021494e-05, "loss": 0.5925, "step": 3873 }, { "epoch": 0.08216156603253377, "grad_norm": 0.2994047999382019, "learning_rate": 1.992094123320336e-05, "loss": 0.5229, "step": 3874 }, { "epoch": 0.08218277449046679, "grad_norm": 0.3580612242221832, "learning_rate": 1.9920899375351238e-05, "loss": 0.5571, "step": 3875 }, { "epoch": 0.08220398294839983, "grad_norm": 0.3305562734603882, "learning_rate": 1.992085750646518e-05, "loss": 0.4815, "step": 3876 }, { "epoch": 0.08222519140633285, "grad_norm": 0.3536771833896637, "learning_rate": 1.992081562654523e-05, "loss": 0.5283, "step": 3877 }, { "epoch": 0.08224639986426587, "grad_norm": 0.3448227345943451, "learning_rate": 1.9920773735591435e-05, "loss": 0.5188, "step": 3878 }, { "epoch": 0.0822676083221989, "grad_norm": 0.3348742127418518, "learning_rate": 1.9920731833603845e-05, "loss": 0.564, "step": 3879 }, { "epoch": 0.08228881678013192, "grad_norm": 0.328453004360199, "learning_rate": 1.9920689920582496e-05, "loss": 0.558, "step": 3880 }, { "epoch": 0.08231002523806494, "grad_norm": 0.3265362083911896, "learning_rate": 1.9920647996527448e-05, "loss": 0.5296, "step": 3881 }, { "epoch": 0.08233123369599796, "grad_norm": 0.36966434121131897, "learning_rate": 1.992060606143874e-05, "loss": 0.5195, "step": 3882 }, { "epoch": 0.08235244215393099, "grad_norm": 0.373318612575531, "learning_rate": 1.992056411531642e-05, "loss": 0.5483, "step": 3883 }, { "epoch": 0.08237365061186401, "grad_norm": 0.31127190589904785, "learning_rate": 1.9920522158160533e-05, "loss": 0.4692, "step": 3884 }, { "epoch": 0.08239485906979703, "grad_norm": 0.3392300009727478, "learning_rate": 1.992048018997113e-05, "loss": 0.6035, "step": 3885 }, { "epoch": 0.08241606752773006, "grad_norm": 0.34025803208351135, "learning_rate": 1.9920438210748252e-05, "loss": 0.637, "step": 3886 }, { "epoch": 0.08243727598566308, "grad_norm": 0.3352659046649933, "learning_rate": 1.9920396220491952e-05, "loss": 0.5759, "step": 3887 }, { "epoch": 0.0824584844435961, "grad_norm": 0.3210453987121582, "learning_rate": 1.992035421920227e-05, "loss": 0.5902, "step": 3888 }, { "epoch": 0.08247969290152914, "grad_norm": 0.3552902638912201, "learning_rate": 1.992031220687926e-05, "loss": 0.5129, "step": 3889 }, { "epoch": 0.08250090135946216, "grad_norm": 0.3175475001335144, "learning_rate": 1.9920270183522964e-05, "loss": 0.6493, "step": 3890 }, { "epoch": 0.08252210981739518, "grad_norm": 0.34296220541000366, "learning_rate": 1.9920228149133428e-05, "loss": 0.5169, "step": 3891 }, { "epoch": 0.0825433182753282, "grad_norm": 0.34883347153663635, "learning_rate": 1.99201861037107e-05, "loss": 0.5574, "step": 3892 }, { "epoch": 0.08256452673326123, "grad_norm": 0.34700238704681396, "learning_rate": 1.992014404725483e-05, "loss": 0.6132, "step": 3893 }, { "epoch": 0.08258573519119425, "grad_norm": 0.3185758590698242, "learning_rate": 1.992010197976586e-05, "loss": 0.5214, "step": 3894 }, { "epoch": 0.08260694364912727, "grad_norm": 0.3342382311820984, "learning_rate": 1.992005990124384e-05, "loss": 0.5244, "step": 3895 }, { "epoch": 0.0826281521070603, "grad_norm": 0.3038395643234253, "learning_rate": 1.9920017811688816e-05, "loss": 0.5595, "step": 3896 }, { "epoch": 0.08264936056499332, "grad_norm": 0.36238378286361694, "learning_rate": 1.9919975711100833e-05, "loss": 0.523, "step": 3897 }, { "epoch": 0.08267056902292634, "grad_norm": 0.31873780488967896, "learning_rate": 1.9919933599479937e-05, "loss": 0.4707, "step": 3898 }, { "epoch": 0.08269177748085936, "grad_norm": 0.3528134524822235, "learning_rate": 1.991989147682618e-05, "loss": 0.5562, "step": 3899 }, { "epoch": 0.0827129859387924, "grad_norm": 0.34206926822662354, "learning_rate": 1.9919849343139606e-05, "loss": 0.5722, "step": 3900 }, { "epoch": 0.08273419439672541, "grad_norm": 0.32941141724586487, "learning_rate": 1.991980719842026e-05, "loss": 0.6086, "step": 3901 }, { "epoch": 0.08275540285465843, "grad_norm": 0.38782772421836853, "learning_rate": 1.9919765042668193e-05, "loss": 0.5572, "step": 3902 }, { "epoch": 0.08277661131259147, "grad_norm": 0.35850605368614197, "learning_rate": 1.9919722875883447e-05, "loss": 0.5598, "step": 3903 }, { "epoch": 0.08279781977052449, "grad_norm": 0.3463386595249176, "learning_rate": 1.9919680698066073e-05, "loss": 0.555, "step": 3904 }, { "epoch": 0.0828190282284575, "grad_norm": 0.3140659034252167, "learning_rate": 1.991963850921612e-05, "loss": 0.5566, "step": 3905 }, { "epoch": 0.08284023668639054, "grad_norm": 0.4010949730873108, "learning_rate": 1.991959630933362e-05, "loss": 0.5483, "step": 3906 }, { "epoch": 0.08286144514432356, "grad_norm": 0.3201977014541626, "learning_rate": 1.991955409841864e-05, "loss": 0.5187, "step": 3907 }, { "epoch": 0.08288265360225658, "grad_norm": 0.3192313313484192, "learning_rate": 1.991951187647122e-05, "loss": 0.5496, "step": 3908 }, { "epoch": 0.0829038620601896, "grad_norm": 0.34592172503471375, "learning_rate": 1.9919469643491397e-05, "loss": 0.5727, "step": 3909 }, { "epoch": 0.08292507051812263, "grad_norm": 0.33898961544036865, "learning_rate": 1.9919427399479232e-05, "loss": 0.5637, "step": 3910 }, { "epoch": 0.08294627897605565, "grad_norm": 0.325575590133667, "learning_rate": 1.9919385144434764e-05, "loss": 0.4929, "step": 3911 }, { "epoch": 0.08296748743398867, "grad_norm": 0.3039776682853699, "learning_rate": 1.9919342878358044e-05, "loss": 0.5296, "step": 3912 }, { "epoch": 0.0829886958919217, "grad_norm": 0.5404266119003296, "learning_rate": 1.9919300601249113e-05, "loss": 0.5706, "step": 3913 }, { "epoch": 0.08300990434985472, "grad_norm": 0.3275658190250397, "learning_rate": 1.991925831310803e-05, "loss": 0.4583, "step": 3914 }, { "epoch": 0.08303111280778774, "grad_norm": 0.3271043300628662, "learning_rate": 1.9919216013934825e-05, "loss": 0.6138, "step": 3915 }, { "epoch": 0.08305232126572076, "grad_norm": 0.3523138463497162, "learning_rate": 1.991917370372956e-05, "loss": 0.6319, "step": 3916 }, { "epoch": 0.0830735297236538, "grad_norm": 0.3271919786930084, "learning_rate": 1.991913138249227e-05, "loss": 0.5484, "step": 3917 }, { "epoch": 0.08309473818158682, "grad_norm": 0.3981100916862488, "learning_rate": 1.9919089050223013e-05, "loss": 0.4958, "step": 3918 }, { "epoch": 0.08311594663951984, "grad_norm": 0.3262622356414795, "learning_rate": 1.9919046706921832e-05, "loss": 0.5279, "step": 3919 }, { "epoch": 0.08313715509745287, "grad_norm": 0.32708844542503357, "learning_rate": 1.9919004352588768e-05, "loss": 0.4431, "step": 3920 }, { "epoch": 0.08315836355538589, "grad_norm": 0.30384761095046997, "learning_rate": 1.9918961987223883e-05, "loss": 0.6018, "step": 3921 }, { "epoch": 0.08317957201331891, "grad_norm": 0.30480387806892395, "learning_rate": 1.9918919610827207e-05, "loss": 0.4648, "step": 3922 }, { "epoch": 0.08320078047125194, "grad_norm": 0.3124915361404419, "learning_rate": 1.9918877223398798e-05, "loss": 0.441, "step": 3923 }, { "epoch": 0.08322198892918496, "grad_norm": 0.36341017484664917, "learning_rate": 1.9918834824938695e-05, "loss": 0.5998, "step": 3924 }, { "epoch": 0.08324319738711798, "grad_norm": 0.35191813111305237, "learning_rate": 1.9918792415446955e-05, "loss": 0.575, "step": 3925 }, { "epoch": 0.083264405845051, "grad_norm": 0.3693563640117645, "learning_rate": 1.991874999492362e-05, "loss": 0.6122, "step": 3926 }, { "epoch": 0.08328561430298403, "grad_norm": 0.40490061044692993, "learning_rate": 1.9918707563368737e-05, "loss": 0.5338, "step": 3927 }, { "epoch": 0.08330682276091705, "grad_norm": 0.4010772109031677, "learning_rate": 1.9918665120782352e-05, "loss": 0.5503, "step": 3928 }, { "epoch": 0.08332803121885007, "grad_norm": 0.3229183852672577, "learning_rate": 1.9918622667164514e-05, "loss": 0.4577, "step": 3929 }, { "epoch": 0.08334923967678311, "grad_norm": 0.427979052066803, "learning_rate": 1.9918580202515272e-05, "loss": 0.5672, "step": 3930 }, { "epoch": 0.08337044813471613, "grad_norm": 0.32484665513038635, "learning_rate": 1.991853772683467e-05, "loss": 0.5434, "step": 3931 }, { "epoch": 0.08339165659264915, "grad_norm": 0.30459389090538025, "learning_rate": 1.9918495240122757e-05, "loss": 0.4961, "step": 3932 }, { "epoch": 0.08341286505058217, "grad_norm": 0.5003608465194702, "learning_rate": 1.9918452742379578e-05, "loss": 0.4218, "step": 3933 }, { "epoch": 0.0834340735085152, "grad_norm": 0.33856192231178284, "learning_rate": 1.9918410233605185e-05, "loss": 0.5575, "step": 3934 }, { "epoch": 0.08345528196644822, "grad_norm": 0.45241618156433105, "learning_rate": 1.991836771379962e-05, "loss": 0.5071, "step": 3935 }, { "epoch": 0.08347649042438124, "grad_norm": 0.7587847709655762, "learning_rate": 1.9918325182962932e-05, "loss": 0.5623, "step": 3936 }, { "epoch": 0.08349769888231427, "grad_norm": 0.3130665421485901, "learning_rate": 1.991828264109517e-05, "loss": 0.5148, "step": 3937 }, { "epoch": 0.08351890734024729, "grad_norm": 0.319254070520401, "learning_rate": 1.9918240088196382e-05, "loss": 0.6233, "step": 3938 }, { "epoch": 0.08354011579818031, "grad_norm": 0.35999974608421326, "learning_rate": 1.991819752426661e-05, "loss": 0.589, "step": 3939 }, { "epoch": 0.08356132425611333, "grad_norm": 0.29035472869873047, "learning_rate": 1.9918154949305907e-05, "loss": 0.4302, "step": 3940 }, { "epoch": 0.08358253271404636, "grad_norm": 0.3666682541370392, "learning_rate": 1.9918112363314323e-05, "loss": 0.6236, "step": 3941 }, { "epoch": 0.08360374117197938, "grad_norm": 0.328429639339447, "learning_rate": 1.9918069766291893e-05, "loss": 0.5509, "step": 3942 }, { "epoch": 0.0836249496299124, "grad_norm": 0.3326505720615387, "learning_rate": 1.9918027158238672e-05, "loss": 0.5409, "step": 3943 }, { "epoch": 0.08364615808784544, "grad_norm": 0.32536929845809937, "learning_rate": 1.9917984539154714e-05, "loss": 0.6277, "step": 3944 }, { "epoch": 0.08366736654577846, "grad_norm": 0.32393375039100647, "learning_rate": 1.9917941909040052e-05, "loss": 0.5622, "step": 3945 }, { "epoch": 0.08368857500371148, "grad_norm": 0.32959794998168945, "learning_rate": 1.9917899267894747e-05, "loss": 0.4673, "step": 3946 }, { "epoch": 0.08370978346164451, "grad_norm": 0.3841785788536072, "learning_rate": 1.9917856615718838e-05, "loss": 0.6482, "step": 3947 }, { "epoch": 0.08373099191957753, "grad_norm": 0.3448648452758789, "learning_rate": 1.9917813952512378e-05, "loss": 0.5577, "step": 3948 }, { "epoch": 0.08375220037751055, "grad_norm": 0.3178967237472534, "learning_rate": 1.9917771278275407e-05, "loss": 0.5452, "step": 3949 }, { "epoch": 0.08377340883544357, "grad_norm": 0.2899917662143707, "learning_rate": 1.991772859300798e-05, "loss": 0.5501, "step": 3950 }, { "epoch": 0.0837946172933766, "grad_norm": 0.3834804594516754, "learning_rate": 1.991768589671014e-05, "loss": 0.5839, "step": 3951 }, { "epoch": 0.08381582575130962, "grad_norm": 0.34141653776168823, "learning_rate": 1.9917643189381935e-05, "loss": 0.593, "step": 3952 }, { "epoch": 0.08383703420924264, "grad_norm": 0.30800682306289673, "learning_rate": 1.9917600471023415e-05, "loss": 0.5593, "step": 3953 }, { "epoch": 0.08385824266717568, "grad_norm": 0.3293534517288208, "learning_rate": 1.9917557741634628e-05, "loss": 0.5452, "step": 3954 }, { "epoch": 0.0838794511251087, "grad_norm": 0.3101208806037903, "learning_rate": 1.9917515001215616e-05, "loss": 0.5828, "step": 3955 }, { "epoch": 0.08390065958304171, "grad_norm": 0.36223405599594116, "learning_rate": 1.991747224976643e-05, "loss": 0.4945, "step": 3956 }, { "epoch": 0.08392186804097473, "grad_norm": 0.3199591636657715, "learning_rate": 1.991742948728712e-05, "loss": 0.483, "step": 3957 }, { "epoch": 0.08394307649890777, "grad_norm": 0.3780869245529175, "learning_rate": 1.991738671377773e-05, "loss": 0.5689, "step": 3958 }, { "epoch": 0.08396428495684079, "grad_norm": 0.33367887139320374, "learning_rate": 1.9917343929238305e-05, "loss": 0.5308, "step": 3959 }, { "epoch": 0.0839854934147738, "grad_norm": 0.29375362396240234, "learning_rate": 1.9917301133668903e-05, "loss": 0.4977, "step": 3960 }, { "epoch": 0.08400670187270684, "grad_norm": 0.3169552981853485, "learning_rate": 1.991725832706956e-05, "loss": 0.6021, "step": 3961 }, { "epoch": 0.08402791033063986, "grad_norm": 0.3629116117954254, "learning_rate": 1.991721550944033e-05, "loss": 0.5566, "step": 3962 }, { "epoch": 0.08404911878857288, "grad_norm": 0.4126042127609253, "learning_rate": 1.9917172680781257e-05, "loss": 0.5653, "step": 3963 }, { "epoch": 0.08407032724650591, "grad_norm": 0.3280884027481079, "learning_rate": 1.9917129841092392e-05, "loss": 0.4956, "step": 3964 }, { "epoch": 0.08409153570443893, "grad_norm": 0.38803327083587646, "learning_rate": 1.9917086990373783e-05, "loss": 0.4931, "step": 3965 }, { "epoch": 0.08411274416237195, "grad_norm": 0.31969940662384033, "learning_rate": 1.9917044128625473e-05, "loss": 0.5291, "step": 3966 }, { "epoch": 0.08413395262030497, "grad_norm": 0.3327477276325226, "learning_rate": 1.9917001255847517e-05, "loss": 0.4888, "step": 3967 }, { "epoch": 0.084155161078238, "grad_norm": 0.3524967133998871, "learning_rate": 1.9916958372039955e-05, "loss": 0.5901, "step": 3968 }, { "epoch": 0.08417636953617103, "grad_norm": 0.3174650967121124, "learning_rate": 1.9916915477202838e-05, "loss": 0.5267, "step": 3969 }, { "epoch": 0.08419757799410404, "grad_norm": 0.31942427158355713, "learning_rate": 1.9916872571336217e-05, "loss": 0.4621, "step": 3970 }, { "epoch": 0.08421878645203708, "grad_norm": 0.35054728388786316, "learning_rate": 1.9916829654440133e-05, "loss": 0.4987, "step": 3971 }, { "epoch": 0.0842399949099701, "grad_norm": 0.32620003819465637, "learning_rate": 1.991678672651464e-05, "loss": 0.543, "step": 3972 }, { "epoch": 0.08426120336790312, "grad_norm": 0.3226051926612854, "learning_rate": 1.991674378755978e-05, "loss": 0.5071, "step": 3973 }, { "epoch": 0.08428241182583614, "grad_norm": 0.3305819034576416, "learning_rate": 1.9916700837575603e-05, "loss": 0.5681, "step": 3974 }, { "epoch": 0.08430362028376917, "grad_norm": 0.6068106889724731, "learning_rate": 1.9916657876562163e-05, "loss": 0.5323, "step": 3975 }, { "epoch": 0.08432482874170219, "grad_norm": 0.35164061188697815, "learning_rate": 1.9916614904519495e-05, "loss": 0.5464, "step": 3976 }, { "epoch": 0.08434603719963521, "grad_norm": 0.3271360695362091, "learning_rate": 1.991657192144766e-05, "loss": 0.5539, "step": 3977 }, { "epoch": 0.08436724565756824, "grad_norm": 0.3331727981567383, "learning_rate": 1.9916528927346695e-05, "loss": 0.5737, "step": 3978 }, { "epoch": 0.08438845411550126, "grad_norm": 0.3555724024772644, "learning_rate": 1.9916485922216656e-05, "loss": 0.5396, "step": 3979 }, { "epoch": 0.08440966257343428, "grad_norm": 0.32367515563964844, "learning_rate": 1.9916442906057587e-05, "loss": 0.5528, "step": 3980 }, { "epoch": 0.08443087103136732, "grad_norm": 0.3133643865585327, "learning_rate": 1.991639987886954e-05, "loss": 0.4896, "step": 3981 }, { "epoch": 0.08445207948930034, "grad_norm": 0.3972967565059662, "learning_rate": 1.9916356840652554e-05, "loss": 0.5417, "step": 3982 }, { "epoch": 0.08447328794723336, "grad_norm": 0.31992611289024353, "learning_rate": 1.9916313791406684e-05, "loss": 0.5432, "step": 3983 }, { "epoch": 0.08449449640516637, "grad_norm": 0.3052099347114563, "learning_rate": 1.991627073113197e-05, "loss": 0.5525, "step": 3984 }, { "epoch": 0.08451570486309941, "grad_norm": 0.354322612285614, "learning_rate": 1.9916227659828476e-05, "loss": 0.4793, "step": 3985 }, { "epoch": 0.08453691332103243, "grad_norm": 0.31785547733306885, "learning_rate": 1.9916184577496235e-05, "loss": 0.5064, "step": 3986 }, { "epoch": 0.08455812177896545, "grad_norm": 0.3493156433105469, "learning_rate": 1.9916141484135297e-05, "loss": 0.6237, "step": 3987 }, { "epoch": 0.08457933023689848, "grad_norm": 0.3163145184516907, "learning_rate": 1.9916098379745716e-05, "loss": 0.4678, "step": 3988 }, { "epoch": 0.0846005386948315, "grad_norm": 0.34797462821006775, "learning_rate": 1.9916055264327535e-05, "loss": 0.5818, "step": 3989 }, { "epoch": 0.08462174715276452, "grad_norm": 0.2764665484428406, "learning_rate": 1.9916012137880806e-05, "loss": 0.4555, "step": 3990 }, { "epoch": 0.08464295561069754, "grad_norm": 0.32672369480133057, "learning_rate": 1.9915969000405572e-05, "loss": 0.4895, "step": 3991 }, { "epoch": 0.08466416406863057, "grad_norm": 0.32853254675865173, "learning_rate": 1.9915925851901884e-05, "loss": 0.5401, "step": 3992 }, { "epoch": 0.0846853725265636, "grad_norm": 0.34205710887908936, "learning_rate": 1.991588269236979e-05, "loss": 0.444, "step": 3993 }, { "epoch": 0.08470658098449661, "grad_norm": 0.3611625134944916, "learning_rate": 1.9915839521809338e-05, "loss": 0.5524, "step": 3994 }, { "epoch": 0.08472778944242965, "grad_norm": 0.4464031457901001, "learning_rate": 1.9915796340220574e-05, "loss": 0.5548, "step": 3995 }, { "epoch": 0.08474899790036267, "grad_norm": 0.4021095037460327, "learning_rate": 1.9915753147603546e-05, "loss": 0.5185, "step": 3996 }, { "epoch": 0.08477020635829569, "grad_norm": 0.3362791836261749, "learning_rate": 1.9915709943958305e-05, "loss": 0.6509, "step": 3997 }, { "epoch": 0.0847914148162287, "grad_norm": 0.3286195993423462, "learning_rate": 1.99156667292849e-05, "loss": 0.5437, "step": 3998 }, { "epoch": 0.08481262327416174, "grad_norm": 0.37265050411224365, "learning_rate": 1.9915623503583373e-05, "loss": 0.5691, "step": 3999 }, { "epoch": 0.08483383173209476, "grad_norm": 0.3171764612197876, "learning_rate": 1.9915580266853774e-05, "loss": 0.6159, "step": 4000 }, { "epoch": 0.08485504019002778, "grad_norm": 0.28631827235221863, "learning_rate": 1.9915537019096162e-05, "loss": 0.4099, "step": 4001 }, { "epoch": 0.08487624864796081, "grad_norm": 1.1833857297897339, "learning_rate": 1.9915493760310567e-05, "loss": 0.4875, "step": 4002 }, { "epoch": 0.08489745710589383, "grad_norm": 0.37716713547706604, "learning_rate": 1.9915450490497046e-05, "loss": 0.5193, "step": 4003 }, { "epoch": 0.08491866556382685, "grad_norm": 0.7162646055221558, "learning_rate": 1.9915407209655652e-05, "loss": 0.5008, "step": 4004 }, { "epoch": 0.08493987402175988, "grad_norm": 0.3252946436405182, "learning_rate": 1.9915363917786427e-05, "loss": 0.4985, "step": 4005 }, { "epoch": 0.0849610824796929, "grad_norm": 0.3359781503677368, "learning_rate": 1.991532061488942e-05, "loss": 0.5354, "step": 4006 }, { "epoch": 0.08498229093762592, "grad_norm": 0.4687052369117737, "learning_rate": 1.9915277300964676e-05, "loss": 0.5749, "step": 4007 }, { "epoch": 0.08500349939555894, "grad_norm": 0.3327188789844513, "learning_rate": 1.9915233976012252e-05, "loss": 0.6186, "step": 4008 }, { "epoch": 0.08502470785349198, "grad_norm": 0.29759204387664795, "learning_rate": 1.9915190640032188e-05, "loss": 0.5275, "step": 4009 }, { "epoch": 0.085045916311425, "grad_norm": 0.3446033000946045, "learning_rate": 1.9915147293024536e-05, "loss": 0.5877, "step": 4010 }, { "epoch": 0.08506712476935802, "grad_norm": 0.3570135831832886, "learning_rate": 1.9915103934989343e-05, "loss": 0.5115, "step": 4011 }, { "epoch": 0.08508833322729105, "grad_norm": 0.3371436297893524, "learning_rate": 1.9915060565926655e-05, "loss": 0.5109, "step": 4012 }, { "epoch": 0.08510954168522407, "grad_norm": 0.30915069580078125, "learning_rate": 1.9915017185836526e-05, "loss": 0.4643, "step": 4013 }, { "epoch": 0.08513075014315709, "grad_norm": 0.5815343856811523, "learning_rate": 1.9914973794719e-05, "loss": 0.5111, "step": 4014 }, { "epoch": 0.08515195860109011, "grad_norm": 0.3486214578151703, "learning_rate": 1.9914930392574126e-05, "loss": 0.5564, "step": 4015 }, { "epoch": 0.08517316705902314, "grad_norm": 0.35429292917251587, "learning_rate": 1.9914886979401954e-05, "loss": 0.4901, "step": 4016 }, { "epoch": 0.08519437551695616, "grad_norm": 0.3058789074420929, "learning_rate": 1.9914843555202533e-05, "loss": 0.5299, "step": 4017 }, { "epoch": 0.08521558397488918, "grad_norm": 0.3462844789028168, "learning_rate": 1.9914800119975903e-05, "loss": 0.4703, "step": 4018 }, { "epoch": 0.08523679243282221, "grad_norm": 0.3424871861934662, "learning_rate": 1.9914756673722123e-05, "loss": 0.531, "step": 4019 }, { "epoch": 0.08525800089075523, "grad_norm": 0.3338595926761627, "learning_rate": 1.9914713216441234e-05, "loss": 0.4865, "step": 4020 }, { "epoch": 0.08527920934868825, "grad_norm": 0.33697763085365295, "learning_rate": 1.991466974813329e-05, "loss": 0.5148, "step": 4021 }, { "epoch": 0.08530041780662129, "grad_norm": 0.33935055136680603, "learning_rate": 1.9914626268798335e-05, "loss": 0.5461, "step": 4022 }, { "epoch": 0.0853216262645543, "grad_norm": 0.29488644003868103, "learning_rate": 1.991458277843642e-05, "loss": 0.5799, "step": 4023 }, { "epoch": 0.08534283472248733, "grad_norm": 0.3117203414440155, "learning_rate": 1.9914539277047588e-05, "loss": 0.562, "step": 4024 }, { "epoch": 0.08536404318042035, "grad_norm": 0.6924782395362854, "learning_rate": 1.9914495764631895e-05, "loss": 0.5147, "step": 4025 }, { "epoch": 0.08538525163835338, "grad_norm": 0.31346696615219116, "learning_rate": 1.9914452241189385e-05, "loss": 0.586, "step": 4026 }, { "epoch": 0.0854064600962864, "grad_norm": 0.3711898624897003, "learning_rate": 1.991440870672011e-05, "loss": 0.5351, "step": 4027 }, { "epoch": 0.08542766855421942, "grad_norm": 0.3589540421962738, "learning_rate": 1.991436516122411e-05, "loss": 0.5846, "step": 4028 }, { "epoch": 0.08544887701215245, "grad_norm": 0.33105602860450745, "learning_rate": 1.9914321604701443e-05, "loss": 0.6193, "step": 4029 }, { "epoch": 0.08547008547008547, "grad_norm": 0.32407325506210327, "learning_rate": 1.9914278037152152e-05, "loss": 0.5513, "step": 4030 }, { "epoch": 0.08549129392801849, "grad_norm": 0.3204441964626312, "learning_rate": 1.9914234458576288e-05, "loss": 0.5594, "step": 4031 }, { "epoch": 0.08551250238595151, "grad_norm": 0.4098456799983978, "learning_rate": 1.99141908689739e-05, "loss": 0.565, "step": 4032 }, { "epoch": 0.08553371084388454, "grad_norm": 0.3087325692176819, "learning_rate": 1.9914147268345033e-05, "loss": 0.5268, "step": 4033 }, { "epoch": 0.08555491930181756, "grad_norm": 0.3151373565196991, "learning_rate": 1.991410365668974e-05, "loss": 0.5124, "step": 4034 }, { "epoch": 0.08557612775975058, "grad_norm": 0.3178536593914032, "learning_rate": 1.9914060034008065e-05, "loss": 0.5139, "step": 4035 }, { "epoch": 0.08559733621768362, "grad_norm": 0.29891669750213623, "learning_rate": 1.991401640030006e-05, "loss": 0.5167, "step": 4036 }, { "epoch": 0.08561854467561664, "grad_norm": 0.5584654808044434, "learning_rate": 1.991397275556577e-05, "loss": 0.6041, "step": 4037 }, { "epoch": 0.08563975313354966, "grad_norm": 0.3028865456581116, "learning_rate": 1.9913929099805247e-05, "loss": 0.5711, "step": 4038 }, { "epoch": 0.08566096159148269, "grad_norm": 0.34936830401420593, "learning_rate": 1.9913885433018538e-05, "loss": 0.6, "step": 4039 }, { "epoch": 0.08568217004941571, "grad_norm": 0.36550137400627136, "learning_rate": 1.9913841755205693e-05, "loss": 0.6049, "step": 4040 }, { "epoch": 0.08570337850734873, "grad_norm": 0.30652305483818054, "learning_rate": 1.9913798066366756e-05, "loss": 0.532, "step": 4041 }, { "epoch": 0.08572458696528175, "grad_norm": 0.330320805311203, "learning_rate": 1.991375436650178e-05, "loss": 0.5603, "step": 4042 }, { "epoch": 0.08574579542321478, "grad_norm": 0.33818525075912476, "learning_rate": 1.9913710655610816e-05, "loss": 0.5441, "step": 4043 }, { "epoch": 0.0857670038811478, "grad_norm": 0.30081820487976074, "learning_rate": 1.9913666933693907e-05, "loss": 0.4484, "step": 4044 }, { "epoch": 0.08578821233908082, "grad_norm": 0.3885141909122467, "learning_rate": 1.9913623200751103e-05, "loss": 0.589, "step": 4045 }, { "epoch": 0.08580942079701386, "grad_norm": 0.3032717704772949, "learning_rate": 1.9913579456782454e-05, "loss": 0.5225, "step": 4046 }, { "epoch": 0.08583062925494687, "grad_norm": 0.36501505970954895, "learning_rate": 1.9913535701788004e-05, "loss": 0.6795, "step": 4047 }, { "epoch": 0.0858518377128799, "grad_norm": 0.331224650144577, "learning_rate": 1.991349193576781e-05, "loss": 0.4771, "step": 4048 }, { "epoch": 0.08587304617081291, "grad_norm": 0.3085748553276062, "learning_rate": 1.991344815872192e-05, "loss": 0.5138, "step": 4049 }, { "epoch": 0.08589425462874595, "grad_norm": 0.3094431459903717, "learning_rate": 1.991340437065037e-05, "loss": 0.4856, "step": 4050 }, { "epoch": 0.08591546308667897, "grad_norm": 0.3772123456001282, "learning_rate": 1.9913360571553223e-05, "loss": 0.5333, "step": 4051 }, { "epoch": 0.08593667154461199, "grad_norm": 0.2885012924671173, "learning_rate": 1.9913316761430525e-05, "loss": 0.4725, "step": 4052 }, { "epoch": 0.08595788000254502, "grad_norm": 0.3096107244491577, "learning_rate": 1.9913272940282318e-05, "loss": 0.4852, "step": 4053 }, { "epoch": 0.08597908846047804, "grad_norm": 0.3341190814971924, "learning_rate": 1.991322910810866e-05, "loss": 0.625, "step": 4054 }, { "epoch": 0.08600029691841106, "grad_norm": 0.31934991478919983, "learning_rate": 1.9913185264909588e-05, "loss": 0.4855, "step": 4055 }, { "epoch": 0.08602150537634409, "grad_norm": 0.3402058184146881, "learning_rate": 1.991314141068516e-05, "loss": 0.5898, "step": 4056 }, { "epoch": 0.08604271383427711, "grad_norm": 0.3554590046405792, "learning_rate": 1.9913097545435424e-05, "loss": 0.5394, "step": 4057 }, { "epoch": 0.08606392229221013, "grad_norm": 0.29919424653053284, "learning_rate": 1.9913053669160425e-05, "loss": 0.5003, "step": 4058 }, { "epoch": 0.08608513075014315, "grad_norm": 0.35301655530929565, "learning_rate": 1.9913009781860215e-05, "loss": 0.5056, "step": 4059 }, { "epoch": 0.08610633920807619, "grad_norm": 0.3796166181564331, "learning_rate": 1.991296588353484e-05, "loss": 0.6185, "step": 4060 }, { "epoch": 0.0861275476660092, "grad_norm": 0.33738771080970764, "learning_rate": 1.991292197418435e-05, "loss": 0.4912, "step": 4061 }, { "epoch": 0.08614875612394222, "grad_norm": 0.32861900329589844, "learning_rate": 1.9912878053808798e-05, "loss": 0.5933, "step": 4062 }, { "epoch": 0.08616996458187526, "grad_norm": 0.3333924412727356, "learning_rate": 1.9912834122408228e-05, "loss": 0.5324, "step": 4063 }, { "epoch": 0.08619117303980828, "grad_norm": 0.33411547541618347, "learning_rate": 1.991279017998269e-05, "loss": 0.5635, "step": 4064 }, { "epoch": 0.0862123814977413, "grad_norm": 0.3286474049091339, "learning_rate": 1.9912746226532232e-05, "loss": 0.578, "step": 4065 }, { "epoch": 0.08623358995567432, "grad_norm": 0.34262168407440186, "learning_rate": 1.9912702262056902e-05, "loss": 0.622, "step": 4066 }, { "epoch": 0.08625479841360735, "grad_norm": 0.37446537613868713, "learning_rate": 1.9912658286556754e-05, "loss": 0.5854, "step": 4067 }, { "epoch": 0.08627600687154037, "grad_norm": 0.38174155354499817, "learning_rate": 1.991261430003183e-05, "loss": 0.5569, "step": 4068 }, { "epoch": 0.08629721532947339, "grad_norm": 1.4282740354537964, "learning_rate": 1.9912570302482185e-05, "loss": 0.5125, "step": 4069 }, { "epoch": 0.08631842378740642, "grad_norm": 0.32901597023010254, "learning_rate": 1.9912526293907867e-05, "loss": 0.5414, "step": 4070 }, { "epoch": 0.08633963224533944, "grad_norm": 0.3276839852333069, "learning_rate": 1.991248227430892e-05, "loss": 0.4948, "step": 4071 }, { "epoch": 0.08636084070327246, "grad_norm": 0.33822837471961975, "learning_rate": 1.9912438243685395e-05, "loss": 0.5437, "step": 4072 }, { "epoch": 0.08638204916120548, "grad_norm": 0.3068612217903137, "learning_rate": 1.9912394202037347e-05, "loss": 0.5434, "step": 4073 }, { "epoch": 0.08640325761913852, "grad_norm": 0.3898123502731323, "learning_rate": 1.9912350149364817e-05, "loss": 0.5458, "step": 4074 }, { "epoch": 0.08642446607707154, "grad_norm": 0.5006074905395508, "learning_rate": 1.9912306085667857e-05, "loss": 0.5907, "step": 4075 }, { "epoch": 0.08644567453500455, "grad_norm": 0.3533743917942047, "learning_rate": 1.991226201094652e-05, "loss": 0.5992, "step": 4076 }, { "epoch": 0.08646688299293759, "grad_norm": 0.32831016182899475, "learning_rate": 1.9912217925200846e-05, "loss": 0.5387, "step": 4077 }, { "epoch": 0.08648809145087061, "grad_norm": 0.3537203073501587, "learning_rate": 1.9912173828430896e-05, "loss": 0.604, "step": 4078 }, { "epoch": 0.08650929990880363, "grad_norm": 0.31692275404930115, "learning_rate": 1.9912129720636707e-05, "loss": 0.5827, "step": 4079 }, { "epoch": 0.08653050836673666, "grad_norm": 0.2931910455226898, "learning_rate": 1.9912085601818336e-05, "loss": 0.486, "step": 4080 }, { "epoch": 0.08655171682466968, "grad_norm": 0.33323439955711365, "learning_rate": 1.991204147197583e-05, "loss": 0.5799, "step": 4081 }, { "epoch": 0.0865729252826027, "grad_norm": 0.3464677035808563, "learning_rate": 1.9911997331109235e-05, "loss": 0.5372, "step": 4082 }, { "epoch": 0.08659413374053572, "grad_norm": 0.36070993542671204, "learning_rate": 1.9911953179218605e-05, "loss": 0.5973, "step": 4083 }, { "epoch": 0.08661534219846875, "grad_norm": 0.3208327293395996, "learning_rate": 1.9911909016303984e-05, "loss": 0.4799, "step": 4084 }, { "epoch": 0.08663655065640177, "grad_norm": 0.3251133859157562, "learning_rate": 1.9911864842365426e-05, "loss": 0.531, "step": 4085 }, { "epoch": 0.08665775911433479, "grad_norm": 0.35547035932540894, "learning_rate": 1.9911820657402978e-05, "loss": 0.5629, "step": 4086 }, { "epoch": 0.08667896757226783, "grad_norm": 0.3532243072986603, "learning_rate": 1.9911776461416687e-05, "loss": 0.5351, "step": 4087 }, { "epoch": 0.08670017603020085, "grad_norm": 0.3580791652202606, "learning_rate": 1.991173225440661e-05, "loss": 0.5915, "step": 4088 }, { "epoch": 0.08672138448813387, "grad_norm": 0.3052576184272766, "learning_rate": 1.9911688036372782e-05, "loss": 0.4483, "step": 4089 }, { "epoch": 0.08674259294606688, "grad_norm": 0.36427125334739685, "learning_rate": 1.991164380731527e-05, "loss": 0.5563, "step": 4090 }, { "epoch": 0.08676380140399992, "grad_norm": 0.3596949279308319, "learning_rate": 1.9911599567234107e-05, "loss": 0.5144, "step": 4091 }, { "epoch": 0.08678500986193294, "grad_norm": 0.33504313230514526, "learning_rate": 1.9911555316129353e-05, "loss": 0.535, "step": 4092 }, { "epoch": 0.08680621831986596, "grad_norm": 0.3091961443424225, "learning_rate": 1.9911511054001048e-05, "loss": 0.4963, "step": 4093 }, { "epoch": 0.08682742677779899, "grad_norm": 0.3404417037963867, "learning_rate": 1.991146678084925e-05, "loss": 0.5555, "step": 4094 }, { "epoch": 0.08684863523573201, "grad_norm": 0.3268885612487793, "learning_rate": 1.9911422496674005e-05, "loss": 0.5823, "step": 4095 }, { "epoch": 0.08686984369366503, "grad_norm": 0.3007968068122864, "learning_rate": 1.9911378201475366e-05, "loss": 0.6114, "step": 4096 }, { "epoch": 0.08689105215159806, "grad_norm": 0.3180350065231323, "learning_rate": 1.991133389525337e-05, "loss": 0.5005, "step": 4097 }, { "epoch": 0.08691226060953108, "grad_norm": 0.3485575020313263, "learning_rate": 1.991128957800808e-05, "loss": 0.5838, "step": 4098 }, { "epoch": 0.0869334690674641, "grad_norm": 0.36371690034866333, "learning_rate": 1.991124524973954e-05, "loss": 0.5487, "step": 4099 }, { "epoch": 0.08695467752539712, "grad_norm": 0.37669384479522705, "learning_rate": 1.9911200910447793e-05, "loss": 0.5779, "step": 4100 }, { "epoch": 0.08697588598333016, "grad_norm": 0.3156534731388092, "learning_rate": 1.9911156560132903e-05, "loss": 0.6189, "step": 4101 }, { "epoch": 0.08699709444126318, "grad_norm": 0.31894704699516296, "learning_rate": 1.9911112198794905e-05, "loss": 0.5328, "step": 4102 }, { "epoch": 0.0870183028991962, "grad_norm": 0.4021362066268921, "learning_rate": 1.9911067826433858e-05, "loss": 0.6035, "step": 4103 }, { "epoch": 0.08703951135712923, "grad_norm": 0.3392866849899292, "learning_rate": 1.9911023443049805e-05, "loss": 0.4939, "step": 4104 }, { "epoch": 0.08706071981506225, "grad_norm": 0.3349984288215637, "learning_rate": 1.9910979048642797e-05, "loss": 0.5772, "step": 4105 }, { "epoch": 0.08708192827299527, "grad_norm": 0.3072693943977356, "learning_rate": 1.9910934643212885e-05, "loss": 0.502, "step": 4106 }, { "epoch": 0.08710313673092829, "grad_norm": 0.8407513499259949, "learning_rate": 1.9910890226760114e-05, "loss": 0.5813, "step": 4107 }, { "epoch": 0.08712434518886132, "grad_norm": 0.32271042466163635, "learning_rate": 1.9910845799284544e-05, "loss": 0.546, "step": 4108 }, { "epoch": 0.08714555364679434, "grad_norm": 0.3161599040031433, "learning_rate": 1.9910801360786215e-05, "loss": 0.496, "step": 4109 }, { "epoch": 0.08716676210472736, "grad_norm": 0.6974987983703613, "learning_rate": 1.991075691126518e-05, "loss": 0.5503, "step": 4110 }, { "epoch": 0.0871879705626604, "grad_norm": 0.3099864721298218, "learning_rate": 1.9910712450721483e-05, "loss": 0.5229, "step": 4111 }, { "epoch": 0.08720917902059341, "grad_norm": 0.33332377672195435, "learning_rate": 1.9910667979155178e-05, "loss": 0.5435, "step": 4112 }, { "epoch": 0.08723038747852643, "grad_norm": 0.3569093644618988, "learning_rate": 1.9910623496566318e-05, "loss": 0.6539, "step": 4113 }, { "epoch": 0.08725159593645947, "grad_norm": 0.30493032932281494, "learning_rate": 1.9910579002954947e-05, "loss": 0.5074, "step": 4114 }, { "epoch": 0.08727280439439249, "grad_norm": 0.33829644322395325, "learning_rate": 1.9910534498321116e-05, "loss": 0.4953, "step": 4115 }, { "epoch": 0.0872940128523255, "grad_norm": 1.0559484958648682, "learning_rate": 1.9910489982664876e-05, "loss": 0.6148, "step": 4116 }, { "epoch": 0.08731522131025853, "grad_norm": 0.37754738330841064, "learning_rate": 1.9910445455986274e-05, "loss": 0.51, "step": 4117 }, { "epoch": 0.08733642976819156, "grad_norm": 0.32502368092536926, "learning_rate": 1.991040091828536e-05, "loss": 0.4806, "step": 4118 }, { "epoch": 0.08735763822612458, "grad_norm": 0.3393086791038513, "learning_rate": 1.9910356369562184e-05, "loss": 0.5584, "step": 4119 }, { "epoch": 0.0873788466840576, "grad_norm": 0.34743738174438477, "learning_rate": 1.9910311809816794e-05, "loss": 0.5773, "step": 4120 }, { "epoch": 0.08740005514199063, "grad_norm": 0.3294562101364136, "learning_rate": 1.9910267239049247e-05, "loss": 0.4651, "step": 4121 }, { "epoch": 0.08742126359992365, "grad_norm": 0.34011054039001465, "learning_rate": 1.991022265725958e-05, "loss": 0.5824, "step": 4122 }, { "epoch": 0.08744247205785667, "grad_norm": 0.3026707172393799, "learning_rate": 1.9910178064447855e-05, "loss": 0.5946, "step": 4123 }, { "epoch": 0.08746368051578969, "grad_norm": 0.32225194573402405, "learning_rate": 1.9910133460614112e-05, "loss": 0.5717, "step": 4124 }, { "epoch": 0.08748488897372272, "grad_norm": 0.34561336040496826, "learning_rate": 1.9910088845758408e-05, "loss": 0.5889, "step": 4125 }, { "epoch": 0.08750609743165574, "grad_norm": 0.31024500727653503, "learning_rate": 1.991004421988079e-05, "loss": 0.5687, "step": 4126 }, { "epoch": 0.08752730588958876, "grad_norm": 0.358336865901947, "learning_rate": 1.9909999582981303e-05, "loss": 0.6607, "step": 4127 }, { "epoch": 0.0875485143475218, "grad_norm": 0.3257421553134918, "learning_rate": 1.9909954935060004e-05, "loss": 0.6227, "step": 4128 }, { "epoch": 0.08756972280545482, "grad_norm": 0.32662948966026306, "learning_rate": 1.9909910276116936e-05, "loss": 0.5358, "step": 4129 }, { "epoch": 0.08759093126338784, "grad_norm": 0.283586323261261, "learning_rate": 1.9909865606152154e-05, "loss": 0.5076, "step": 4130 }, { "epoch": 0.08761213972132087, "grad_norm": 0.33056092262268066, "learning_rate": 1.9909820925165704e-05, "loss": 0.5363, "step": 4131 }, { "epoch": 0.08763334817925389, "grad_norm": 0.298528254032135, "learning_rate": 1.990977623315764e-05, "loss": 0.598, "step": 4132 }, { "epoch": 0.08765455663718691, "grad_norm": 0.34653839468955994, "learning_rate": 1.9909731530128006e-05, "loss": 0.586, "step": 4133 }, { "epoch": 0.08767576509511993, "grad_norm": 0.53379887342453, "learning_rate": 1.9909686816076856e-05, "loss": 0.5246, "step": 4134 }, { "epoch": 0.08769697355305296, "grad_norm": 0.5828325152397156, "learning_rate": 1.990964209100424e-05, "loss": 0.4999, "step": 4135 }, { "epoch": 0.08771818201098598, "grad_norm": 0.3016301989555359, "learning_rate": 1.9909597354910205e-05, "loss": 0.5296, "step": 4136 }, { "epoch": 0.087739390468919, "grad_norm": 0.30503231287002563, "learning_rate": 1.99095526077948e-05, "loss": 0.5428, "step": 4137 }, { "epoch": 0.08776059892685203, "grad_norm": 0.3157005310058594, "learning_rate": 1.9909507849658077e-05, "loss": 0.5613, "step": 4138 }, { "epoch": 0.08778180738478505, "grad_norm": 0.3736876845359802, "learning_rate": 1.990946308050009e-05, "loss": 0.5378, "step": 4139 }, { "epoch": 0.08780301584271807, "grad_norm": 0.3329050540924072, "learning_rate": 1.990941830032088e-05, "loss": 0.6532, "step": 4140 }, { "epoch": 0.0878242243006511, "grad_norm": 0.4119981527328491, "learning_rate": 1.99093735091205e-05, "loss": 0.5951, "step": 4141 }, { "epoch": 0.08784543275858413, "grad_norm": 0.3605404794216156, "learning_rate": 1.9909328706899003e-05, "loss": 0.5045, "step": 4142 }, { "epoch": 0.08786664121651715, "grad_norm": 0.2910558879375458, "learning_rate": 1.9909283893656435e-05, "loss": 0.5611, "step": 4143 }, { "epoch": 0.08788784967445017, "grad_norm": 0.28627994656562805, "learning_rate": 1.9909239069392848e-05, "loss": 0.4507, "step": 4144 }, { "epoch": 0.0879090581323832, "grad_norm": 1.3312370777130127, "learning_rate": 1.9909194234108295e-05, "loss": 0.5888, "step": 4145 }, { "epoch": 0.08793026659031622, "grad_norm": 0.2832708954811096, "learning_rate": 1.9909149387802816e-05, "loss": 0.4452, "step": 4146 }, { "epoch": 0.08795147504824924, "grad_norm": 0.3755432665348053, "learning_rate": 1.9909104530476474e-05, "loss": 0.5915, "step": 4147 }, { "epoch": 0.08797268350618226, "grad_norm": 0.5129530429840088, "learning_rate": 1.9909059662129307e-05, "loss": 0.5473, "step": 4148 }, { "epoch": 0.08799389196411529, "grad_norm": 0.32880765199661255, "learning_rate": 1.9909014782761375e-05, "loss": 0.5987, "step": 4149 }, { "epoch": 0.08801510042204831, "grad_norm": 0.30611440539360046, "learning_rate": 1.9908969892372717e-05, "loss": 0.4957, "step": 4150 }, { "epoch": 0.08803630887998133, "grad_norm": 0.31822919845581055, "learning_rate": 1.9908924990963392e-05, "loss": 0.56, "step": 4151 }, { "epoch": 0.08805751733791436, "grad_norm": 0.40422704815864563, "learning_rate": 1.9908880078533446e-05, "loss": 0.514, "step": 4152 }, { "epoch": 0.08807872579584738, "grad_norm": 0.34757909178733826, "learning_rate": 1.9908835155082928e-05, "loss": 0.598, "step": 4153 }, { "epoch": 0.0880999342537804, "grad_norm": 0.41404011845588684, "learning_rate": 1.990879022061189e-05, "loss": 0.6656, "step": 4154 }, { "epoch": 0.08812114271171344, "grad_norm": 0.30490347743034363, "learning_rate": 1.9908745275120385e-05, "loss": 0.5419, "step": 4155 }, { "epoch": 0.08814235116964646, "grad_norm": 0.37386730313301086, "learning_rate": 1.990870031860846e-05, "loss": 0.5466, "step": 4156 }, { "epoch": 0.08816355962757948, "grad_norm": 0.355993390083313, "learning_rate": 1.990865535107616e-05, "loss": 0.573, "step": 4157 }, { "epoch": 0.0881847680855125, "grad_norm": 0.32059550285339355, "learning_rate": 1.990861037252354e-05, "loss": 0.4913, "step": 4158 }, { "epoch": 0.08820597654344553, "grad_norm": 0.3814453184604645, "learning_rate": 1.990856538295065e-05, "loss": 0.5971, "step": 4159 }, { "epoch": 0.08822718500137855, "grad_norm": 0.36925873160362244, "learning_rate": 1.9908520382357542e-05, "loss": 0.5303, "step": 4160 }, { "epoch": 0.08824839345931157, "grad_norm": 0.36128556728363037, "learning_rate": 1.9908475370744264e-05, "loss": 0.5815, "step": 4161 }, { "epoch": 0.0882696019172446, "grad_norm": 0.3414778411388397, "learning_rate": 1.9908430348110866e-05, "loss": 0.5092, "step": 4162 }, { "epoch": 0.08829081037517762, "grad_norm": 0.33692920207977295, "learning_rate": 1.9908385314457393e-05, "loss": 0.5824, "step": 4163 }, { "epoch": 0.08831201883311064, "grad_norm": 0.3318423330783844, "learning_rate": 1.9908340269783903e-05, "loss": 0.5431, "step": 4164 }, { "epoch": 0.08833322729104366, "grad_norm": 0.33305102586746216, "learning_rate": 1.9908295214090444e-05, "loss": 0.5297, "step": 4165 }, { "epoch": 0.0883544357489767, "grad_norm": 0.32993537187576294, "learning_rate": 1.9908250147377065e-05, "loss": 0.5745, "step": 4166 }, { "epoch": 0.08837564420690971, "grad_norm": 0.3558272123336792, "learning_rate": 1.9908205069643815e-05, "loss": 0.5964, "step": 4167 }, { "epoch": 0.08839685266484273, "grad_norm": 0.3300720453262329, "learning_rate": 1.9908159980890748e-05, "loss": 0.4765, "step": 4168 }, { "epoch": 0.08841806112277577, "grad_norm": 0.2939693331718445, "learning_rate": 1.9908114881117907e-05, "loss": 0.5195, "step": 4169 }, { "epoch": 0.08843926958070879, "grad_norm": 0.40086856484413147, "learning_rate": 1.990806977032535e-05, "loss": 0.477, "step": 4170 }, { "epoch": 0.08846047803864181, "grad_norm": 0.3411766290664673, "learning_rate": 1.9908024648513123e-05, "loss": 0.5605, "step": 4171 }, { "epoch": 0.08848168649657484, "grad_norm": 0.34897616505622864, "learning_rate": 1.9907979515681277e-05, "loss": 0.5621, "step": 4172 }, { "epoch": 0.08850289495450786, "grad_norm": 0.32240477204322815, "learning_rate": 1.9907934371829865e-05, "loss": 0.6146, "step": 4173 }, { "epoch": 0.08852410341244088, "grad_norm": 0.36398860812187195, "learning_rate": 1.990788921695893e-05, "loss": 0.5745, "step": 4174 }, { "epoch": 0.0885453118703739, "grad_norm": 0.3228786289691925, "learning_rate": 1.9907844051068528e-05, "loss": 0.5486, "step": 4175 }, { "epoch": 0.08856652032830693, "grad_norm": 0.3300994634628296, "learning_rate": 1.990779887415871e-05, "loss": 0.4965, "step": 4176 }, { "epoch": 0.08858772878623995, "grad_norm": 0.34076613187789917, "learning_rate": 1.990775368622952e-05, "loss": 0.5234, "step": 4177 }, { "epoch": 0.08860893724417297, "grad_norm": 0.3505801260471344, "learning_rate": 1.9907708487281017e-05, "loss": 0.6111, "step": 4178 }, { "epoch": 0.088630145702106, "grad_norm": 0.32597866654396057, "learning_rate": 1.9907663277313242e-05, "loss": 0.4634, "step": 4179 }, { "epoch": 0.08865135416003903, "grad_norm": 0.2924438714981079, "learning_rate": 1.9907618056326256e-05, "loss": 0.4097, "step": 4180 }, { "epoch": 0.08867256261797204, "grad_norm": 0.3607981503009796, "learning_rate": 1.99075728243201e-05, "loss": 0.5197, "step": 4181 }, { "epoch": 0.08869377107590506, "grad_norm": 0.3392462432384491, "learning_rate": 1.9907527581294825e-05, "loss": 0.4767, "step": 4182 }, { "epoch": 0.0887149795338381, "grad_norm": 0.36184853315353394, "learning_rate": 1.9907482327250487e-05, "loss": 0.5336, "step": 4183 }, { "epoch": 0.08873618799177112, "grad_norm": 0.34183570742607117, "learning_rate": 1.9907437062187133e-05, "loss": 0.5367, "step": 4184 }, { "epoch": 0.08875739644970414, "grad_norm": 0.3693642318248749, "learning_rate": 1.9907391786104814e-05, "loss": 0.6116, "step": 4185 }, { "epoch": 0.08877860490763717, "grad_norm": 0.3013056814670563, "learning_rate": 1.990734649900358e-05, "loss": 0.5737, "step": 4186 }, { "epoch": 0.08879981336557019, "grad_norm": 0.3491639494895935, "learning_rate": 1.990730120088348e-05, "loss": 0.5461, "step": 4187 }, { "epoch": 0.08882102182350321, "grad_norm": 0.3111128509044647, "learning_rate": 1.9907255891744562e-05, "loss": 0.5485, "step": 4188 }, { "epoch": 0.08884223028143624, "grad_norm": 0.34859803318977356, "learning_rate": 1.9907210571586888e-05, "loss": 0.5309, "step": 4189 }, { "epoch": 0.08886343873936926, "grad_norm": 0.3267183005809784, "learning_rate": 1.9907165240410494e-05, "loss": 0.4719, "step": 4190 }, { "epoch": 0.08888464719730228, "grad_norm": 0.3309048116207123, "learning_rate": 1.990711989821544e-05, "loss": 0.5192, "step": 4191 }, { "epoch": 0.0889058556552353, "grad_norm": 0.3335420787334442, "learning_rate": 1.9907074545001774e-05, "loss": 0.4972, "step": 4192 }, { "epoch": 0.08892706411316834, "grad_norm": 0.33652403950691223, "learning_rate": 1.9907029180769544e-05, "loss": 0.5676, "step": 4193 }, { "epoch": 0.08894827257110136, "grad_norm": 0.3459886610507965, "learning_rate": 1.9906983805518803e-05, "loss": 0.5495, "step": 4194 }, { "epoch": 0.08896948102903438, "grad_norm": 0.4301086962223053, "learning_rate": 1.99069384192496e-05, "loss": 0.5089, "step": 4195 }, { "epoch": 0.08899068948696741, "grad_norm": 0.31929272413253784, "learning_rate": 1.9906893021961987e-05, "loss": 0.5415, "step": 4196 }, { "epoch": 0.08901189794490043, "grad_norm": 0.31964829564094543, "learning_rate": 1.9906847613656013e-05, "loss": 0.5371, "step": 4197 }, { "epoch": 0.08903310640283345, "grad_norm": 0.5591877102851868, "learning_rate": 1.990680219433173e-05, "loss": 0.508, "step": 4198 }, { "epoch": 0.08905431486076647, "grad_norm": 0.3271026313304901, "learning_rate": 1.9906756763989187e-05, "loss": 0.527, "step": 4199 }, { "epoch": 0.0890755233186995, "grad_norm": 0.3917927145957947, "learning_rate": 1.9906711322628436e-05, "loss": 0.5668, "step": 4200 }, { "epoch": 0.08909673177663252, "grad_norm": 0.3259356915950775, "learning_rate": 1.9906665870249525e-05, "loss": 0.5263, "step": 4201 }, { "epoch": 0.08911794023456554, "grad_norm": 0.309041827917099, "learning_rate": 1.9906620406852508e-05, "loss": 0.4678, "step": 4202 }, { "epoch": 0.08913914869249857, "grad_norm": 0.3243986666202545, "learning_rate": 1.9906574932437432e-05, "loss": 0.5866, "step": 4203 }, { "epoch": 0.0891603571504316, "grad_norm": 0.4091220498085022, "learning_rate": 1.990652944700435e-05, "loss": 0.5599, "step": 4204 }, { "epoch": 0.08918156560836461, "grad_norm": 0.36482810974121094, "learning_rate": 1.9906483950553316e-05, "loss": 0.5355, "step": 4205 }, { "epoch": 0.08920277406629763, "grad_norm": 0.36974576115608215, "learning_rate": 1.990643844308437e-05, "loss": 0.663, "step": 4206 }, { "epoch": 0.08922398252423067, "grad_norm": 0.3845413327217102, "learning_rate": 1.990639292459757e-05, "loss": 0.4688, "step": 4207 }, { "epoch": 0.08924519098216369, "grad_norm": 0.31321269273757935, "learning_rate": 1.990634739509297e-05, "loss": 0.5099, "step": 4208 }, { "epoch": 0.0892663994400967, "grad_norm": 0.5491796731948853, "learning_rate": 1.9906301854570614e-05, "loss": 0.5549, "step": 4209 }, { "epoch": 0.08928760789802974, "grad_norm": 0.28612005710601807, "learning_rate": 1.990625630303056e-05, "loss": 0.4451, "step": 4210 }, { "epoch": 0.08930881635596276, "grad_norm": 0.3283970057964325, "learning_rate": 1.9906210740472848e-05, "loss": 0.5848, "step": 4211 }, { "epoch": 0.08933002481389578, "grad_norm": 0.3528957962989807, "learning_rate": 1.9906165166897536e-05, "loss": 0.5731, "step": 4212 }, { "epoch": 0.08935123327182881, "grad_norm": 0.31174641847610474, "learning_rate": 1.9906119582304673e-05, "loss": 0.5524, "step": 4213 }, { "epoch": 0.08937244172976183, "grad_norm": 0.3475569486618042, "learning_rate": 1.990607398669431e-05, "loss": 0.572, "step": 4214 }, { "epoch": 0.08939365018769485, "grad_norm": 0.33990779519081116, "learning_rate": 1.9906028380066498e-05, "loss": 0.5879, "step": 4215 }, { "epoch": 0.08941485864562787, "grad_norm": 0.2982340157032013, "learning_rate": 1.9905982762421288e-05, "loss": 0.4396, "step": 4216 }, { "epoch": 0.0894360671035609, "grad_norm": 0.3410467207431793, "learning_rate": 1.9905937133758727e-05, "loss": 0.559, "step": 4217 }, { "epoch": 0.08945727556149392, "grad_norm": 0.3162706196308136, "learning_rate": 1.990589149407887e-05, "loss": 0.6159, "step": 4218 }, { "epoch": 0.08947848401942694, "grad_norm": 0.335318386554718, "learning_rate": 1.990584584338177e-05, "loss": 0.5265, "step": 4219 }, { "epoch": 0.08949969247735998, "grad_norm": 0.33945709466934204, "learning_rate": 1.990580018166747e-05, "loss": 0.6266, "step": 4220 }, { "epoch": 0.089520900935293, "grad_norm": 0.3199300467967987, "learning_rate": 1.990575450893603e-05, "loss": 0.6272, "step": 4221 }, { "epoch": 0.08954210939322602, "grad_norm": 0.3073209822177887, "learning_rate": 1.990570882518749e-05, "loss": 0.5491, "step": 4222 }, { "epoch": 0.08956331785115904, "grad_norm": 0.47523632645606995, "learning_rate": 1.990566313042191e-05, "loss": 0.4791, "step": 4223 }, { "epoch": 0.08958452630909207, "grad_norm": 0.3363409638404846, "learning_rate": 1.9905617424639336e-05, "loss": 0.533, "step": 4224 }, { "epoch": 0.08960573476702509, "grad_norm": 0.29059553146362305, "learning_rate": 1.9905571707839822e-05, "loss": 0.5405, "step": 4225 }, { "epoch": 0.08962694322495811, "grad_norm": 0.31635037064552307, "learning_rate": 1.9905525980023415e-05, "loss": 0.5544, "step": 4226 }, { "epoch": 0.08964815168289114, "grad_norm": 0.34932732582092285, "learning_rate": 1.9905480241190168e-05, "loss": 0.5877, "step": 4227 }, { "epoch": 0.08966936014082416, "grad_norm": 0.6298927068710327, "learning_rate": 1.9905434491340134e-05, "loss": 0.5034, "step": 4228 }, { "epoch": 0.08969056859875718, "grad_norm": 0.3415224850177765, "learning_rate": 1.990538873047336e-05, "loss": 0.5813, "step": 4229 }, { "epoch": 0.08971177705669021, "grad_norm": 0.4146331250667572, "learning_rate": 1.9905342958589903e-05, "loss": 0.553, "step": 4230 }, { "epoch": 0.08973298551462323, "grad_norm": 0.30705198645591736, "learning_rate": 1.9905297175689804e-05, "loss": 0.5427, "step": 4231 }, { "epoch": 0.08975419397255625, "grad_norm": 0.3419937193393707, "learning_rate": 1.9905251381773125e-05, "loss": 0.5888, "step": 4232 }, { "epoch": 0.08977540243048927, "grad_norm": 0.3022231161594391, "learning_rate": 1.9905205576839907e-05, "loss": 0.4904, "step": 4233 }, { "epoch": 0.0897966108884223, "grad_norm": 0.30087190866470337, "learning_rate": 1.9905159760890206e-05, "loss": 0.4822, "step": 4234 }, { "epoch": 0.08981781934635533, "grad_norm": 0.3065852224826813, "learning_rate": 1.9905113933924073e-05, "loss": 0.5921, "step": 4235 }, { "epoch": 0.08983902780428835, "grad_norm": 0.334921270608902, "learning_rate": 1.9905068095941558e-05, "loss": 0.5548, "step": 4236 }, { "epoch": 0.08986023626222138, "grad_norm": 0.3500231206417084, "learning_rate": 1.9905022246942715e-05, "loss": 0.5765, "step": 4237 }, { "epoch": 0.0898814447201544, "grad_norm": 0.3301975429058075, "learning_rate": 1.990497638692759e-05, "loss": 0.5159, "step": 4238 }, { "epoch": 0.08990265317808742, "grad_norm": 0.6125115156173706, "learning_rate": 1.9904930515896233e-05, "loss": 0.5123, "step": 4239 }, { "epoch": 0.08992386163602044, "grad_norm": 0.3790571093559265, "learning_rate": 1.9904884633848703e-05, "loss": 0.5132, "step": 4240 }, { "epoch": 0.08994507009395347, "grad_norm": 0.3847695291042328, "learning_rate": 1.9904838740785048e-05, "loss": 0.6005, "step": 4241 }, { "epoch": 0.08996627855188649, "grad_norm": 0.38642483949661255, "learning_rate": 1.9904792836705313e-05, "loss": 0.5575, "step": 4242 }, { "epoch": 0.08998748700981951, "grad_norm": 0.2932489216327667, "learning_rate": 1.9904746921609555e-05, "loss": 0.4789, "step": 4243 }, { "epoch": 0.09000869546775254, "grad_norm": 0.33990082144737244, "learning_rate": 1.9904700995497822e-05, "loss": 0.5654, "step": 4244 }, { "epoch": 0.09002990392568556, "grad_norm": 0.32288625836372375, "learning_rate": 1.9904655058370168e-05, "loss": 0.5525, "step": 4245 }, { "epoch": 0.09005111238361858, "grad_norm": 0.32468920946121216, "learning_rate": 1.9904609110226644e-05, "loss": 0.5474, "step": 4246 }, { "epoch": 0.09007232084155162, "grad_norm": 0.3163609206676483, "learning_rate": 1.9904563151067297e-05, "loss": 0.5049, "step": 4247 }, { "epoch": 0.09009352929948464, "grad_norm": 0.35923337936401367, "learning_rate": 1.9904517180892182e-05, "loss": 0.4912, "step": 4248 }, { "epoch": 0.09011473775741766, "grad_norm": 0.3087043762207031, "learning_rate": 1.990447119970135e-05, "loss": 0.5392, "step": 4249 }, { "epoch": 0.09013594621535068, "grad_norm": 0.30956849455833435, "learning_rate": 1.9904425207494853e-05, "loss": 0.4916, "step": 4250 }, { "epoch": 0.09015715467328371, "grad_norm": 0.3671339750289917, "learning_rate": 1.9904379204272737e-05, "loss": 0.5273, "step": 4251 }, { "epoch": 0.09017836313121673, "grad_norm": 0.3182538151741028, "learning_rate": 1.9904333190035056e-05, "loss": 0.4463, "step": 4252 }, { "epoch": 0.09019957158914975, "grad_norm": 0.290891170501709, "learning_rate": 1.9904287164781864e-05, "loss": 0.487, "step": 4253 }, { "epoch": 0.09022078004708278, "grad_norm": 0.3429212272167206, "learning_rate": 1.9904241128513208e-05, "loss": 0.5737, "step": 4254 }, { "epoch": 0.0902419885050158, "grad_norm": 0.3256791830062866, "learning_rate": 1.9904195081229143e-05, "loss": 0.4549, "step": 4255 }, { "epoch": 0.09026319696294882, "grad_norm": 0.3319517970085144, "learning_rate": 1.9904149022929716e-05, "loss": 0.4846, "step": 4256 }, { "epoch": 0.09028440542088184, "grad_norm": 0.32174044847488403, "learning_rate": 1.9904102953614985e-05, "loss": 0.5282, "step": 4257 }, { "epoch": 0.09030561387881487, "grad_norm": 0.31170654296875, "learning_rate": 1.9904056873284993e-05, "loss": 0.5123, "step": 4258 }, { "epoch": 0.0903268223367479, "grad_norm": 0.3032299280166626, "learning_rate": 1.9904010781939792e-05, "loss": 0.4939, "step": 4259 }, { "epoch": 0.09034803079468091, "grad_norm": 0.35257458686828613, "learning_rate": 1.9903964679579442e-05, "loss": 0.5058, "step": 4260 }, { "epoch": 0.09036923925261395, "grad_norm": 0.33253729343414307, "learning_rate": 1.9903918566203985e-05, "loss": 0.5263, "step": 4261 }, { "epoch": 0.09039044771054697, "grad_norm": 0.306448757648468, "learning_rate": 1.9903872441813477e-05, "loss": 0.5176, "step": 4262 }, { "epoch": 0.09041165616847999, "grad_norm": 0.37649479508399963, "learning_rate": 1.990382630640797e-05, "loss": 0.5707, "step": 4263 }, { "epoch": 0.09043286462641302, "grad_norm": 0.30283039808273315, "learning_rate": 1.990378015998751e-05, "loss": 0.4875, "step": 4264 }, { "epoch": 0.09045407308434604, "grad_norm": 0.30817708373069763, "learning_rate": 1.9903734002552155e-05, "loss": 0.5256, "step": 4265 }, { "epoch": 0.09047528154227906, "grad_norm": 0.3119705319404602, "learning_rate": 1.990368783410195e-05, "loss": 0.5996, "step": 4266 }, { "epoch": 0.09049649000021208, "grad_norm": 0.31724756956100464, "learning_rate": 1.990364165463695e-05, "loss": 0.5265, "step": 4267 }, { "epoch": 0.09051769845814511, "grad_norm": 0.339474618434906, "learning_rate": 1.990359546415721e-05, "loss": 0.4889, "step": 4268 }, { "epoch": 0.09053890691607813, "grad_norm": 0.32726797461509705, "learning_rate": 1.990354926266277e-05, "loss": 0.4695, "step": 4269 }, { "epoch": 0.09056011537401115, "grad_norm": 0.3803546130657196, "learning_rate": 1.9903503050153693e-05, "loss": 0.6565, "step": 4270 }, { "epoch": 0.09058132383194419, "grad_norm": 0.3575212061405182, "learning_rate": 1.9903456826630027e-05, "loss": 0.5746, "step": 4271 }, { "epoch": 0.0906025322898772, "grad_norm": 0.35305753350257874, "learning_rate": 1.990341059209182e-05, "loss": 0.5197, "step": 4272 }, { "epoch": 0.09062374074781022, "grad_norm": 0.3592042624950409, "learning_rate": 1.9903364346539127e-05, "loss": 0.5472, "step": 4273 }, { "epoch": 0.09064494920574324, "grad_norm": 0.30533304810523987, "learning_rate": 1.9903318089971998e-05, "loss": 0.5027, "step": 4274 }, { "epoch": 0.09066615766367628, "grad_norm": 0.32477858662605286, "learning_rate": 1.9903271822390486e-05, "loss": 0.5579, "step": 4275 }, { "epoch": 0.0906873661216093, "grad_norm": 0.36087700724601746, "learning_rate": 1.990322554379464e-05, "loss": 0.6384, "step": 4276 }, { "epoch": 0.09070857457954232, "grad_norm": 0.33162567019462585, "learning_rate": 1.990317925418451e-05, "loss": 0.555, "step": 4277 }, { "epoch": 0.09072978303747535, "grad_norm": 0.35959169268608093, "learning_rate": 1.990313295356015e-05, "loss": 0.478, "step": 4278 }, { "epoch": 0.09075099149540837, "grad_norm": 0.33516308665275574, "learning_rate": 1.9903086641921616e-05, "loss": 0.4623, "step": 4279 }, { "epoch": 0.09077219995334139, "grad_norm": 0.33678174018859863, "learning_rate": 1.9903040319268953e-05, "loss": 0.496, "step": 4280 }, { "epoch": 0.09079340841127441, "grad_norm": 0.3459279537200928, "learning_rate": 1.990299398560221e-05, "loss": 0.5789, "step": 4281 }, { "epoch": 0.09081461686920744, "grad_norm": 0.3879348039627075, "learning_rate": 1.990294764092145e-05, "loss": 0.5261, "step": 4282 }, { "epoch": 0.09083582532714046, "grad_norm": 0.3211462199687958, "learning_rate": 1.9902901285226715e-05, "loss": 0.5437, "step": 4283 }, { "epoch": 0.09085703378507348, "grad_norm": 0.3704633414745331, "learning_rate": 1.990285491851806e-05, "loss": 0.5819, "step": 4284 }, { "epoch": 0.09087824224300652, "grad_norm": 0.3347354829311371, "learning_rate": 1.9902808540795535e-05, "loss": 0.4937, "step": 4285 }, { "epoch": 0.09089945070093954, "grad_norm": 0.3758449852466583, "learning_rate": 1.9902762152059193e-05, "loss": 0.5032, "step": 4286 }, { "epoch": 0.09092065915887255, "grad_norm": 0.3289925158023834, "learning_rate": 1.9902715752309083e-05, "loss": 0.5091, "step": 4287 }, { "epoch": 0.09094186761680559, "grad_norm": 0.3407081365585327, "learning_rate": 1.990266934154526e-05, "loss": 0.5043, "step": 4288 }, { "epoch": 0.09096307607473861, "grad_norm": 0.30081072449684143, "learning_rate": 1.9902622919767773e-05, "loss": 0.5387, "step": 4289 }, { "epoch": 0.09098428453267163, "grad_norm": 0.32263484597206116, "learning_rate": 1.9902576486976676e-05, "loss": 0.5858, "step": 4290 }, { "epoch": 0.09100549299060465, "grad_norm": 0.3408041000366211, "learning_rate": 1.990253004317202e-05, "loss": 0.5696, "step": 4291 }, { "epoch": 0.09102670144853768, "grad_norm": 0.3391294479370117, "learning_rate": 1.9902483588353857e-05, "loss": 0.5627, "step": 4292 }, { "epoch": 0.0910479099064707, "grad_norm": 0.6206053495407104, "learning_rate": 1.9902437122522237e-05, "loss": 0.4864, "step": 4293 }, { "epoch": 0.09106911836440372, "grad_norm": 0.386970579624176, "learning_rate": 1.990239064567721e-05, "loss": 0.5431, "step": 4294 }, { "epoch": 0.09109032682233675, "grad_norm": 0.30832168459892273, "learning_rate": 1.990234415781883e-05, "loss": 0.5099, "step": 4295 }, { "epoch": 0.09111153528026977, "grad_norm": 0.32430946826934814, "learning_rate": 1.9902297658947153e-05, "loss": 0.4685, "step": 4296 }, { "epoch": 0.09113274373820279, "grad_norm": 0.31523019075393677, "learning_rate": 1.990225114906222e-05, "loss": 0.4913, "step": 4297 }, { "epoch": 0.09115395219613581, "grad_norm": 0.3174244165420532, "learning_rate": 1.9902204628164097e-05, "loss": 0.5244, "step": 4298 }, { "epoch": 0.09117516065406885, "grad_norm": 0.3350011706352234, "learning_rate": 1.9902158096252825e-05, "loss": 0.4728, "step": 4299 }, { "epoch": 0.09119636911200187, "grad_norm": 0.4432368576526642, "learning_rate": 1.9902111553328458e-05, "loss": 0.5673, "step": 4300 }, { "epoch": 0.09121757756993489, "grad_norm": 0.31665781140327454, "learning_rate": 1.990206499939105e-05, "loss": 0.503, "step": 4301 }, { "epoch": 0.09123878602786792, "grad_norm": 0.47291430830955505, "learning_rate": 1.990201843444065e-05, "loss": 0.5446, "step": 4302 }, { "epoch": 0.09125999448580094, "grad_norm": 0.2988526523113251, "learning_rate": 1.990197185847731e-05, "loss": 0.4154, "step": 4303 }, { "epoch": 0.09128120294373396, "grad_norm": 0.3179165720939636, "learning_rate": 1.9901925271501085e-05, "loss": 0.5052, "step": 4304 }, { "epoch": 0.09130241140166699, "grad_norm": 0.3112322688102722, "learning_rate": 1.9901878673512024e-05, "loss": 0.5388, "step": 4305 }, { "epoch": 0.09132361985960001, "grad_norm": 0.3948257863521576, "learning_rate": 1.990183206451018e-05, "loss": 0.6009, "step": 4306 }, { "epoch": 0.09134482831753303, "grad_norm": 0.3205551505088806, "learning_rate": 1.99017854444956e-05, "loss": 0.4816, "step": 4307 }, { "epoch": 0.09136603677546605, "grad_norm": 0.348040372133255, "learning_rate": 1.990173881346835e-05, "loss": 0.5587, "step": 4308 }, { "epoch": 0.09138724523339908, "grad_norm": 0.3237541913986206, "learning_rate": 1.9901692171428465e-05, "loss": 0.5718, "step": 4309 }, { "epoch": 0.0914084536913321, "grad_norm": 0.3063626289367676, "learning_rate": 1.9901645518376004e-05, "loss": 0.5059, "step": 4310 }, { "epoch": 0.09142966214926512, "grad_norm": 0.37283599376678467, "learning_rate": 1.990159885431102e-05, "loss": 0.6018, "step": 4311 }, { "epoch": 0.09145087060719816, "grad_norm": 0.31839215755462646, "learning_rate": 1.9901552179233566e-05, "loss": 0.4923, "step": 4312 }, { "epoch": 0.09147207906513118, "grad_norm": 0.32870498299598694, "learning_rate": 1.9901505493143686e-05, "loss": 0.6225, "step": 4313 }, { "epoch": 0.0914932875230642, "grad_norm": 0.3542814254760742, "learning_rate": 1.9901458796041443e-05, "loss": 0.522, "step": 4314 }, { "epoch": 0.09151449598099722, "grad_norm": 0.3944341242313385, "learning_rate": 1.990141208792688e-05, "loss": 0.5069, "step": 4315 }, { "epoch": 0.09153570443893025, "grad_norm": 0.30261561274528503, "learning_rate": 1.9901365368800056e-05, "loss": 0.5668, "step": 4316 }, { "epoch": 0.09155691289686327, "grad_norm": 0.353459894657135, "learning_rate": 1.9901318638661017e-05, "loss": 0.5007, "step": 4317 }, { "epoch": 0.09157812135479629, "grad_norm": 0.382863312959671, "learning_rate": 1.990127189750982e-05, "loss": 0.5642, "step": 4318 }, { "epoch": 0.09159932981272932, "grad_norm": 0.3764468729496002, "learning_rate": 1.990122514534651e-05, "loss": 0.5072, "step": 4319 }, { "epoch": 0.09162053827066234, "grad_norm": 0.3553580045700073, "learning_rate": 1.9901178382171147e-05, "loss": 0.5334, "step": 4320 }, { "epoch": 0.09164174672859536, "grad_norm": 0.31074926257133484, "learning_rate": 1.990113160798378e-05, "loss": 0.5176, "step": 4321 }, { "epoch": 0.0916629551865284, "grad_norm": 0.38755014538764954, "learning_rate": 1.990108482278446e-05, "loss": 0.6359, "step": 4322 }, { "epoch": 0.09168416364446141, "grad_norm": 0.34214234352111816, "learning_rate": 1.9901038026573235e-05, "loss": 0.5664, "step": 4323 }, { "epoch": 0.09170537210239443, "grad_norm": 0.3018989861011505, "learning_rate": 1.9900991219350166e-05, "loss": 0.5064, "step": 4324 }, { "epoch": 0.09172658056032745, "grad_norm": 0.3195086121559143, "learning_rate": 1.99009444011153e-05, "loss": 0.5515, "step": 4325 }, { "epoch": 0.09174778901826049, "grad_norm": 0.3551144301891327, "learning_rate": 1.9900897571868687e-05, "loss": 0.5496, "step": 4326 }, { "epoch": 0.0917689974761935, "grad_norm": 0.3614080846309662, "learning_rate": 1.9900850731610384e-05, "loss": 0.5807, "step": 4327 }, { "epoch": 0.09179020593412653, "grad_norm": 0.3268947899341583, "learning_rate": 1.990080388034044e-05, "loss": 0.5907, "step": 4328 }, { "epoch": 0.09181141439205956, "grad_norm": 0.3034144341945648, "learning_rate": 1.990075701805891e-05, "loss": 0.4597, "step": 4329 }, { "epoch": 0.09183262284999258, "grad_norm": 0.3268119692802429, "learning_rate": 1.9900710144765843e-05, "loss": 0.5869, "step": 4330 }, { "epoch": 0.0918538313079256, "grad_norm": 0.29772046208381653, "learning_rate": 1.990066326046129e-05, "loss": 0.4466, "step": 4331 }, { "epoch": 0.09187503976585862, "grad_norm": 0.32007989287376404, "learning_rate": 1.990061636514531e-05, "loss": 0.5248, "step": 4332 }, { "epoch": 0.09189624822379165, "grad_norm": 1.1295281648635864, "learning_rate": 1.9900569458817943e-05, "loss": 0.5981, "step": 4333 }, { "epoch": 0.09191745668172467, "grad_norm": 0.40056610107421875, "learning_rate": 1.9900522541479256e-05, "loss": 0.5541, "step": 4334 }, { "epoch": 0.09193866513965769, "grad_norm": 0.29222020506858826, "learning_rate": 1.990047561312929e-05, "loss": 0.3679, "step": 4335 }, { "epoch": 0.09195987359759072, "grad_norm": 0.356332391500473, "learning_rate": 1.9900428673768102e-05, "loss": 0.576, "step": 4336 }, { "epoch": 0.09198108205552374, "grad_norm": 0.7691659331321716, "learning_rate": 1.9900381723395744e-05, "loss": 0.5538, "step": 4337 }, { "epoch": 0.09200229051345676, "grad_norm": 0.39518511295318604, "learning_rate": 1.9900334762012265e-05, "loss": 0.5704, "step": 4338 }, { "epoch": 0.0920234989713898, "grad_norm": 0.3678796887397766, "learning_rate": 1.990028778961772e-05, "loss": 0.5169, "step": 4339 }, { "epoch": 0.09204470742932282, "grad_norm": 0.2807145118713379, "learning_rate": 1.9900240806212165e-05, "loss": 0.4542, "step": 4340 }, { "epoch": 0.09206591588725584, "grad_norm": 0.3328563868999481, "learning_rate": 1.990019381179565e-05, "loss": 0.5375, "step": 4341 }, { "epoch": 0.09208712434518886, "grad_norm": 0.360928475856781, "learning_rate": 1.9900146806368215e-05, "loss": 0.588, "step": 4342 }, { "epoch": 0.09210833280312189, "grad_norm": 0.34426164627075195, "learning_rate": 1.9900099789929933e-05, "loss": 0.5001, "step": 4343 }, { "epoch": 0.09212954126105491, "grad_norm": 0.3320837914943695, "learning_rate": 1.9900052762480843e-05, "loss": 0.5388, "step": 4344 }, { "epoch": 0.09215074971898793, "grad_norm": 0.3221137225627899, "learning_rate": 1.9900005724020998e-05, "loss": 0.5521, "step": 4345 }, { "epoch": 0.09217195817692096, "grad_norm": 0.4666212201118469, "learning_rate": 1.9899958674550454e-05, "loss": 0.4731, "step": 4346 }, { "epoch": 0.09219316663485398, "grad_norm": 0.33671990036964417, "learning_rate": 1.989991161406926e-05, "loss": 0.5219, "step": 4347 }, { "epoch": 0.092214375092787, "grad_norm": 0.33714696764945984, "learning_rate": 1.989986454257747e-05, "loss": 0.4828, "step": 4348 }, { "epoch": 0.09223558355072002, "grad_norm": 0.3070316016674042, "learning_rate": 1.989981746007514e-05, "loss": 0.4674, "step": 4349 }, { "epoch": 0.09225679200865305, "grad_norm": 0.30082690715789795, "learning_rate": 1.9899770366562317e-05, "loss": 0.4873, "step": 4350 }, { "epoch": 0.09227800046658607, "grad_norm": 0.33898448944091797, "learning_rate": 1.9899723262039056e-05, "loss": 0.5637, "step": 4351 }, { "epoch": 0.0922992089245191, "grad_norm": 0.48238223791122437, "learning_rate": 1.989967614650541e-05, "loss": 0.539, "step": 4352 }, { "epoch": 0.09232041738245213, "grad_norm": 0.31506434082984924, "learning_rate": 1.9899629019961427e-05, "loss": 0.5106, "step": 4353 }, { "epoch": 0.09234162584038515, "grad_norm": 0.3369409441947937, "learning_rate": 1.9899581882407167e-05, "loss": 0.6393, "step": 4354 }, { "epoch": 0.09236283429831817, "grad_norm": 0.5010576844215393, "learning_rate": 1.9899534733842675e-05, "loss": 0.5491, "step": 4355 }, { "epoch": 0.09238404275625119, "grad_norm": 0.34116530418395996, "learning_rate": 1.9899487574268005e-05, "loss": 0.527, "step": 4356 }, { "epoch": 0.09240525121418422, "grad_norm": 0.31634804606437683, "learning_rate": 1.9899440403683215e-05, "loss": 0.5275, "step": 4357 }, { "epoch": 0.09242645967211724, "grad_norm": 0.31699079275131226, "learning_rate": 1.989939322208835e-05, "loss": 0.4741, "step": 4358 }, { "epoch": 0.09244766813005026, "grad_norm": 0.37637728452682495, "learning_rate": 1.989934602948347e-05, "loss": 0.5455, "step": 4359 }, { "epoch": 0.09246887658798329, "grad_norm": 0.3210213780403137, "learning_rate": 1.9899298825868622e-05, "loss": 0.5289, "step": 4360 }, { "epoch": 0.09249008504591631, "grad_norm": 0.3425774872303009, "learning_rate": 1.9899251611243854e-05, "loss": 0.5822, "step": 4361 }, { "epoch": 0.09251129350384933, "grad_norm": 0.35867932438850403, "learning_rate": 1.989920438560923e-05, "loss": 0.5651, "step": 4362 }, { "epoch": 0.09253250196178237, "grad_norm": 0.3158562183380127, "learning_rate": 1.9899157148964798e-05, "loss": 0.542, "step": 4363 }, { "epoch": 0.09255371041971538, "grad_norm": 0.36216819286346436, "learning_rate": 1.9899109901310604e-05, "loss": 0.5343, "step": 4364 }, { "epoch": 0.0925749188776484, "grad_norm": 0.32227078080177307, "learning_rate": 1.9899062642646713e-05, "loss": 0.6096, "step": 4365 }, { "epoch": 0.09259612733558142, "grad_norm": 0.3351494371891022, "learning_rate": 1.9899015372973162e-05, "loss": 0.6085, "step": 4366 }, { "epoch": 0.09261733579351446, "grad_norm": 0.3304573893547058, "learning_rate": 1.989896809229002e-05, "loss": 0.5526, "step": 4367 }, { "epoch": 0.09263854425144748, "grad_norm": 0.42436492443084717, "learning_rate": 1.9898920800597325e-05, "loss": 0.5615, "step": 4368 }, { "epoch": 0.0926597527093805, "grad_norm": 0.341888964176178, "learning_rate": 1.989887349789514e-05, "loss": 0.5527, "step": 4369 }, { "epoch": 0.09268096116731353, "grad_norm": 0.33371588587760925, "learning_rate": 1.9898826184183514e-05, "loss": 0.5017, "step": 4370 }, { "epoch": 0.09270216962524655, "grad_norm": 0.33436188101768494, "learning_rate": 1.98987788594625e-05, "loss": 0.4944, "step": 4371 }, { "epoch": 0.09272337808317957, "grad_norm": 0.3694901466369629, "learning_rate": 1.9898731523732148e-05, "loss": 0.4878, "step": 4372 }, { "epoch": 0.09274458654111259, "grad_norm": 0.3002481758594513, "learning_rate": 1.9898684176992514e-05, "loss": 0.5379, "step": 4373 }, { "epoch": 0.09276579499904562, "grad_norm": 0.35298794507980347, "learning_rate": 1.989863681924365e-05, "loss": 0.6618, "step": 4374 }, { "epoch": 0.09278700345697864, "grad_norm": 0.4298907518386841, "learning_rate": 1.989858945048561e-05, "loss": 0.5019, "step": 4375 }, { "epoch": 0.09280821191491166, "grad_norm": 0.3592173457145691, "learning_rate": 1.989854207071844e-05, "loss": 0.4531, "step": 4376 }, { "epoch": 0.0928294203728447, "grad_norm": 0.408743679523468, "learning_rate": 1.98984946799422e-05, "loss": 0.4918, "step": 4377 }, { "epoch": 0.09285062883077772, "grad_norm": 0.5238083004951477, "learning_rate": 1.989844727815694e-05, "loss": 0.4955, "step": 4378 }, { "epoch": 0.09287183728871073, "grad_norm": 0.3146871030330658, "learning_rate": 1.9898399865362713e-05, "loss": 0.5373, "step": 4379 }, { "epoch": 0.09289304574664377, "grad_norm": 0.305652379989624, "learning_rate": 1.989835244155957e-05, "loss": 0.5221, "step": 4380 }, { "epoch": 0.09291425420457679, "grad_norm": 0.323775053024292, "learning_rate": 1.989830500674757e-05, "loss": 0.5215, "step": 4381 }, { "epoch": 0.09293546266250981, "grad_norm": 0.3454100489616394, "learning_rate": 1.9898257560926757e-05, "loss": 0.6203, "step": 4382 }, { "epoch": 0.09295667112044283, "grad_norm": 0.3251400291919708, "learning_rate": 1.989821010409719e-05, "loss": 0.4749, "step": 4383 }, { "epoch": 0.09297787957837586, "grad_norm": 0.39074158668518066, "learning_rate": 1.9898162636258917e-05, "loss": 0.5676, "step": 4384 }, { "epoch": 0.09299908803630888, "grad_norm": 0.31725600361824036, "learning_rate": 1.9898115157411996e-05, "loss": 0.5299, "step": 4385 }, { "epoch": 0.0930202964942419, "grad_norm": 0.3065555989742279, "learning_rate": 1.989806766755648e-05, "loss": 0.4704, "step": 4386 }, { "epoch": 0.09304150495217493, "grad_norm": 0.3044293224811554, "learning_rate": 1.9898020166692414e-05, "loss": 0.4746, "step": 4387 }, { "epoch": 0.09306271341010795, "grad_norm": 0.38196173310279846, "learning_rate": 1.989797265481986e-05, "loss": 0.5445, "step": 4388 }, { "epoch": 0.09308392186804097, "grad_norm": 0.3446385860443115, "learning_rate": 1.9897925131938863e-05, "loss": 0.6061, "step": 4389 }, { "epoch": 0.09310513032597399, "grad_norm": 0.42787012457847595, "learning_rate": 1.989787759804948e-05, "loss": 0.6009, "step": 4390 }, { "epoch": 0.09312633878390703, "grad_norm": 0.3689744174480438, "learning_rate": 1.9897830053151765e-05, "loss": 0.4783, "step": 4391 }, { "epoch": 0.09314754724184005, "grad_norm": 0.2889319360256195, "learning_rate": 1.989778249724577e-05, "loss": 0.4772, "step": 4392 }, { "epoch": 0.09316875569977306, "grad_norm": 0.36695849895477295, "learning_rate": 1.9897734930331543e-05, "loss": 0.5268, "step": 4393 }, { "epoch": 0.0931899641577061, "grad_norm": 0.46781983971595764, "learning_rate": 1.9897687352409147e-05, "loss": 0.5887, "step": 4394 }, { "epoch": 0.09321117261563912, "grad_norm": 0.38403305411338806, "learning_rate": 1.9897639763478626e-05, "loss": 0.6154, "step": 4395 }, { "epoch": 0.09323238107357214, "grad_norm": 0.3498455882072449, "learning_rate": 1.9897592163540035e-05, "loss": 0.6165, "step": 4396 }, { "epoch": 0.09325358953150517, "grad_norm": 0.3137850761413574, "learning_rate": 1.9897544552593433e-05, "loss": 0.534, "step": 4397 }, { "epoch": 0.09327479798943819, "grad_norm": 0.3071429133415222, "learning_rate": 1.9897496930638863e-05, "loss": 0.4697, "step": 4398 }, { "epoch": 0.09329600644737121, "grad_norm": 0.3105306625366211, "learning_rate": 1.9897449297676384e-05, "loss": 0.5389, "step": 4399 }, { "epoch": 0.09331721490530423, "grad_norm": 0.33923661708831787, "learning_rate": 1.989740165370605e-05, "loss": 0.5413, "step": 4400 }, { "epoch": 0.09333842336323726, "grad_norm": 0.3434990644454956, "learning_rate": 1.989735399872791e-05, "loss": 0.5693, "step": 4401 }, { "epoch": 0.09335963182117028, "grad_norm": 0.40053579211235046, "learning_rate": 1.989730633274202e-05, "loss": 0.531, "step": 4402 }, { "epoch": 0.0933808402791033, "grad_norm": 0.3538215458393097, "learning_rate": 1.989725865574843e-05, "loss": 0.5873, "step": 4403 }, { "epoch": 0.09340204873703634, "grad_norm": 0.3746947944164276, "learning_rate": 1.9897210967747194e-05, "loss": 0.5458, "step": 4404 }, { "epoch": 0.09342325719496936, "grad_norm": 0.3039323687553406, "learning_rate": 1.989716326873837e-05, "loss": 0.5186, "step": 4405 }, { "epoch": 0.09344446565290238, "grad_norm": 0.32389432191848755, "learning_rate": 1.9897115558722e-05, "loss": 0.4709, "step": 4406 }, { "epoch": 0.0934656741108354, "grad_norm": 0.36144864559173584, "learning_rate": 1.9897067837698156e-05, "loss": 0.5058, "step": 4407 }, { "epoch": 0.09348688256876843, "grad_norm": 0.31782466173171997, "learning_rate": 1.989702010566687e-05, "loss": 0.578, "step": 4408 }, { "epoch": 0.09350809102670145, "grad_norm": 0.3537827134132385, "learning_rate": 1.9896972362628208e-05, "loss": 0.5485, "step": 4409 }, { "epoch": 0.09352929948463447, "grad_norm": 0.38444575667381287, "learning_rate": 1.9896924608582214e-05, "loss": 0.5985, "step": 4410 }, { "epoch": 0.0935505079425675, "grad_norm": 0.3238719403743744, "learning_rate": 1.989687684352895e-05, "loss": 0.5529, "step": 4411 }, { "epoch": 0.09357171640050052, "grad_norm": 0.3304806351661682, "learning_rate": 1.9896829067468466e-05, "loss": 0.5827, "step": 4412 }, { "epoch": 0.09359292485843354, "grad_norm": 0.4127870202064514, "learning_rate": 1.9896781280400815e-05, "loss": 0.5628, "step": 4413 }, { "epoch": 0.09361413331636656, "grad_norm": 0.28980371356010437, "learning_rate": 1.9896733482326048e-05, "loss": 0.4766, "step": 4414 }, { "epoch": 0.0936353417742996, "grad_norm": 0.3192940950393677, "learning_rate": 1.9896685673244224e-05, "loss": 0.5123, "step": 4415 }, { "epoch": 0.09365655023223261, "grad_norm": 0.3066926896572113, "learning_rate": 1.9896637853155386e-05, "loss": 0.434, "step": 4416 }, { "epoch": 0.09367775869016563, "grad_norm": 0.37141749262809753, "learning_rate": 1.9896590022059595e-05, "loss": 0.59, "step": 4417 }, { "epoch": 0.09369896714809867, "grad_norm": 0.3502371311187744, "learning_rate": 1.9896542179956906e-05, "loss": 0.6284, "step": 4418 }, { "epoch": 0.09372017560603169, "grad_norm": 0.4033423364162445, "learning_rate": 1.9896494326847367e-05, "loss": 0.4575, "step": 4419 }, { "epoch": 0.0937413840639647, "grad_norm": 0.339398592710495, "learning_rate": 1.9896446462731033e-05, "loss": 0.5785, "step": 4420 }, { "epoch": 0.09376259252189774, "grad_norm": 0.3496653735637665, "learning_rate": 1.9896398587607958e-05, "loss": 0.4921, "step": 4421 }, { "epoch": 0.09378380097983076, "grad_norm": 0.35781142115592957, "learning_rate": 1.989635070147819e-05, "loss": 0.5752, "step": 4422 }, { "epoch": 0.09380500943776378, "grad_norm": 0.43167492747306824, "learning_rate": 1.9896302804341792e-05, "loss": 0.5062, "step": 4423 }, { "epoch": 0.0938262178956968, "grad_norm": 0.33435407280921936, "learning_rate": 1.989625489619881e-05, "loss": 0.5061, "step": 4424 }, { "epoch": 0.09384742635362983, "grad_norm": 0.3408162295818329, "learning_rate": 1.98962069770493e-05, "loss": 0.4531, "step": 4425 }, { "epoch": 0.09386863481156285, "grad_norm": 0.29091519117355347, "learning_rate": 1.989615904689331e-05, "loss": 0.5281, "step": 4426 }, { "epoch": 0.09388984326949587, "grad_norm": 0.32592126727104187, "learning_rate": 1.9896111105730905e-05, "loss": 0.5538, "step": 4427 }, { "epoch": 0.0939110517274289, "grad_norm": 0.3229294717311859, "learning_rate": 1.989606315356213e-05, "loss": 0.5568, "step": 4428 }, { "epoch": 0.09393226018536192, "grad_norm": 0.35523104667663574, "learning_rate": 1.9896015190387038e-05, "loss": 0.605, "step": 4429 }, { "epoch": 0.09395346864329494, "grad_norm": 0.3297822177410126, "learning_rate": 1.9895967216205682e-05, "loss": 0.6044, "step": 4430 }, { "epoch": 0.09397467710122796, "grad_norm": 0.38151106238365173, "learning_rate": 1.989591923101812e-05, "loss": 0.5251, "step": 4431 }, { "epoch": 0.093995885559161, "grad_norm": 0.3756776452064514, "learning_rate": 1.9895871234824402e-05, "loss": 0.5895, "step": 4432 }, { "epoch": 0.09401709401709402, "grad_norm": 0.34645989537239075, "learning_rate": 1.989582322762458e-05, "loss": 0.5272, "step": 4433 }, { "epoch": 0.09403830247502704, "grad_norm": 0.3767816722393036, "learning_rate": 1.989577520941871e-05, "loss": 0.5535, "step": 4434 }, { "epoch": 0.09405951093296007, "grad_norm": 0.31807470321655273, "learning_rate": 1.9895727180206846e-05, "loss": 0.5315, "step": 4435 }, { "epoch": 0.09408071939089309, "grad_norm": 0.39125683903694153, "learning_rate": 1.989567913998904e-05, "loss": 0.652, "step": 4436 }, { "epoch": 0.09410192784882611, "grad_norm": 0.32748398184776306, "learning_rate": 1.9895631088765347e-05, "loss": 0.5434, "step": 4437 }, { "epoch": 0.09412313630675914, "grad_norm": 0.4159823954105377, "learning_rate": 1.9895583026535818e-05, "loss": 0.5625, "step": 4438 }, { "epoch": 0.09414434476469216, "grad_norm": 0.3073355257511139, "learning_rate": 1.9895534953300504e-05, "loss": 0.488, "step": 4439 }, { "epoch": 0.09416555322262518, "grad_norm": 0.372541218996048, "learning_rate": 1.9895486869059468e-05, "loss": 0.4759, "step": 4440 }, { "epoch": 0.0941867616805582, "grad_norm": 0.3222963213920593, "learning_rate": 1.9895438773812753e-05, "loss": 0.4612, "step": 4441 }, { "epoch": 0.09420797013849123, "grad_norm": 0.3332172632217407, "learning_rate": 1.9895390667560417e-05, "loss": 0.4908, "step": 4442 }, { "epoch": 0.09422917859642425, "grad_norm": 0.32729238271713257, "learning_rate": 1.9895342550302514e-05, "loss": 0.5464, "step": 4443 }, { "epoch": 0.09425038705435727, "grad_norm": 0.31363973021507263, "learning_rate": 1.98952944220391e-05, "loss": 0.4385, "step": 4444 }, { "epoch": 0.09427159551229031, "grad_norm": 0.3649331033229828, "learning_rate": 1.989524628277022e-05, "loss": 0.5603, "step": 4445 }, { "epoch": 0.09429280397022333, "grad_norm": 0.33193087577819824, "learning_rate": 1.9895198132495936e-05, "loss": 0.5488, "step": 4446 }, { "epoch": 0.09431401242815635, "grad_norm": 0.3275814652442932, "learning_rate": 1.98951499712163e-05, "loss": 0.5199, "step": 4447 }, { "epoch": 0.09433522088608937, "grad_norm": 0.39282241463661194, "learning_rate": 1.9895101798931364e-05, "loss": 0.5535, "step": 4448 }, { "epoch": 0.0943564293440224, "grad_norm": 0.31780707836151123, "learning_rate": 1.989505361564118e-05, "loss": 0.4842, "step": 4449 }, { "epoch": 0.09437763780195542, "grad_norm": 0.337049275636673, "learning_rate": 1.9895005421345805e-05, "loss": 0.5046, "step": 4450 }, { "epoch": 0.09439884625988844, "grad_norm": 0.3163037598133087, "learning_rate": 1.9894957216045288e-05, "loss": 0.5302, "step": 4451 }, { "epoch": 0.09442005471782147, "grad_norm": 0.3538593053817749, "learning_rate": 1.989490899973969e-05, "loss": 0.6244, "step": 4452 }, { "epoch": 0.09444126317575449, "grad_norm": 0.3381907641887665, "learning_rate": 1.9894860772429056e-05, "loss": 0.4897, "step": 4453 }, { "epoch": 0.09446247163368751, "grad_norm": 0.2925681173801422, "learning_rate": 1.9894812534113444e-05, "loss": 0.5324, "step": 4454 }, { "epoch": 0.09448368009162054, "grad_norm": 0.3177368640899658, "learning_rate": 1.989476428479291e-05, "loss": 0.5318, "step": 4455 }, { "epoch": 0.09450488854955356, "grad_norm": 0.35735616087913513, "learning_rate": 1.9894716024467504e-05, "loss": 0.5938, "step": 4456 }, { "epoch": 0.09452609700748658, "grad_norm": 0.3281232714653015, "learning_rate": 1.9894667753137283e-05, "loss": 0.4543, "step": 4457 }, { "epoch": 0.0945473054654196, "grad_norm": 0.3319208323955536, "learning_rate": 1.9894619470802298e-05, "loss": 0.5255, "step": 4458 }, { "epoch": 0.09456851392335264, "grad_norm": 0.33583053946495056, "learning_rate": 1.98945711774626e-05, "loss": 0.5547, "step": 4459 }, { "epoch": 0.09458972238128566, "grad_norm": 0.43215349316596985, "learning_rate": 1.9894522873118246e-05, "loss": 0.5224, "step": 4460 }, { "epoch": 0.09461093083921868, "grad_norm": 0.32547056674957275, "learning_rate": 1.989447455776929e-05, "loss": 0.4568, "step": 4461 }, { "epoch": 0.09463213929715171, "grad_norm": 0.32952889800071716, "learning_rate": 1.989442623141579e-05, "loss": 0.5721, "step": 4462 }, { "epoch": 0.09465334775508473, "grad_norm": 0.3184248208999634, "learning_rate": 1.989437789405779e-05, "loss": 0.5221, "step": 4463 }, { "epoch": 0.09467455621301775, "grad_norm": 0.3530489504337311, "learning_rate": 1.989432954569535e-05, "loss": 0.5372, "step": 4464 }, { "epoch": 0.09469576467095077, "grad_norm": 0.3350260257720947, "learning_rate": 1.9894281186328524e-05, "loss": 0.481, "step": 4465 }, { "epoch": 0.0947169731288838, "grad_norm": 0.314751535654068, "learning_rate": 1.9894232815957363e-05, "loss": 0.5203, "step": 4466 }, { "epoch": 0.09473818158681682, "grad_norm": 0.31150782108306885, "learning_rate": 1.9894184434581922e-05, "loss": 0.578, "step": 4467 }, { "epoch": 0.09475939004474984, "grad_norm": 0.3603430688381195, "learning_rate": 1.9894136042202256e-05, "loss": 0.5982, "step": 4468 }, { "epoch": 0.09478059850268288, "grad_norm": 0.3202221989631653, "learning_rate": 1.989408763881842e-05, "loss": 0.5425, "step": 4469 }, { "epoch": 0.0948018069606159, "grad_norm": 0.34498411417007446, "learning_rate": 1.9894039224430463e-05, "loss": 0.5405, "step": 4470 }, { "epoch": 0.09482301541854891, "grad_norm": 0.32656779885292053, "learning_rate": 1.989399079903844e-05, "loss": 0.5356, "step": 4471 }, { "epoch": 0.09484422387648195, "grad_norm": 0.29254767298698425, "learning_rate": 1.9893942362642405e-05, "loss": 0.4916, "step": 4472 }, { "epoch": 0.09486543233441497, "grad_norm": 0.401685506105423, "learning_rate": 1.989389391524242e-05, "loss": 0.5864, "step": 4473 }, { "epoch": 0.09488664079234799, "grad_norm": 0.2964249551296234, "learning_rate": 1.989384545683853e-05, "loss": 0.4997, "step": 4474 }, { "epoch": 0.094907849250281, "grad_norm": 0.35108307003974915, "learning_rate": 1.9893796987430788e-05, "loss": 0.5145, "step": 4475 }, { "epoch": 0.09492905770821404, "grad_norm": 0.3254283368587494, "learning_rate": 1.9893748507019252e-05, "loss": 0.4971, "step": 4476 }, { "epoch": 0.09495026616614706, "grad_norm": 0.31745025515556335, "learning_rate": 1.9893700015603976e-05, "loss": 0.6078, "step": 4477 }, { "epoch": 0.09497147462408008, "grad_norm": 0.3309163749217987, "learning_rate": 1.9893651513185013e-05, "loss": 0.6049, "step": 4478 }, { "epoch": 0.09499268308201311, "grad_norm": 0.3244384825229645, "learning_rate": 1.9893602999762414e-05, "loss": 0.4727, "step": 4479 }, { "epoch": 0.09501389153994613, "grad_norm": 0.3395080864429474, "learning_rate": 1.9893554475336238e-05, "loss": 0.5368, "step": 4480 }, { "epoch": 0.09503509999787915, "grad_norm": 0.30883467197418213, "learning_rate": 1.9893505939906538e-05, "loss": 0.5345, "step": 4481 }, { "epoch": 0.09505630845581217, "grad_norm": 0.3350468873977661, "learning_rate": 1.9893457393473364e-05, "loss": 0.5057, "step": 4482 }, { "epoch": 0.0950775169137452, "grad_norm": 0.3253026008605957, "learning_rate": 1.9893408836036773e-05, "loss": 0.4937, "step": 4483 }, { "epoch": 0.09509872537167822, "grad_norm": 0.3584539592266083, "learning_rate": 1.989336026759682e-05, "loss": 0.5576, "step": 4484 }, { "epoch": 0.09511993382961124, "grad_norm": 0.6515663862228394, "learning_rate": 1.9893311688153556e-05, "loss": 0.5452, "step": 4485 }, { "epoch": 0.09514114228754428, "grad_norm": 0.34361177682876587, "learning_rate": 1.9893263097707036e-05, "loss": 0.5458, "step": 4486 }, { "epoch": 0.0951623507454773, "grad_norm": 0.285316526889801, "learning_rate": 1.9893214496257318e-05, "loss": 0.4941, "step": 4487 }, { "epoch": 0.09518355920341032, "grad_norm": 1.4249850511550903, "learning_rate": 1.9893165883804452e-05, "loss": 0.549, "step": 4488 }, { "epoch": 0.09520476766134334, "grad_norm": 0.3165207803249359, "learning_rate": 1.9893117260348493e-05, "loss": 0.5165, "step": 4489 }, { "epoch": 0.09522597611927637, "grad_norm": 0.28780195116996765, "learning_rate": 1.989306862588949e-05, "loss": 0.4952, "step": 4490 }, { "epoch": 0.09524718457720939, "grad_norm": 1.1789716482162476, "learning_rate": 1.9893019980427507e-05, "loss": 0.4775, "step": 4491 }, { "epoch": 0.09526839303514241, "grad_norm": 0.36443406343460083, "learning_rate": 1.9892971323962592e-05, "loss": 0.5591, "step": 4492 }, { "epoch": 0.09528960149307544, "grad_norm": 0.3294370472431183, "learning_rate": 1.98929226564948e-05, "loss": 0.6014, "step": 4493 }, { "epoch": 0.09531080995100846, "grad_norm": 0.31963425874710083, "learning_rate": 1.9892873978024184e-05, "loss": 0.5404, "step": 4494 }, { "epoch": 0.09533201840894148, "grad_norm": 0.3494456112384796, "learning_rate": 1.98928252885508e-05, "loss": 0.5434, "step": 4495 }, { "epoch": 0.09535322686687452, "grad_norm": 0.36968883872032166, "learning_rate": 1.9892776588074705e-05, "loss": 0.5904, "step": 4496 }, { "epoch": 0.09537443532480754, "grad_norm": 0.33775457739830017, "learning_rate": 1.9892727876595946e-05, "loss": 0.5996, "step": 4497 }, { "epoch": 0.09539564378274056, "grad_norm": 0.4321351945400238, "learning_rate": 1.989267915411458e-05, "loss": 0.4955, "step": 4498 }, { "epoch": 0.09541685224067357, "grad_norm": 0.3264862596988678, "learning_rate": 1.9892630420630668e-05, "loss": 0.5566, "step": 4499 }, { "epoch": 0.09543806069860661, "grad_norm": 0.2990967631340027, "learning_rate": 1.9892581676144255e-05, "loss": 0.4716, "step": 4500 }, { "epoch": 0.09545926915653963, "grad_norm": 0.3305988609790802, "learning_rate": 1.9892532920655398e-05, "loss": 0.463, "step": 4501 }, { "epoch": 0.09548047761447265, "grad_norm": 0.3511427044868469, "learning_rate": 1.9892484154164153e-05, "loss": 0.5094, "step": 4502 }, { "epoch": 0.09550168607240568, "grad_norm": 0.38071078062057495, "learning_rate": 1.9892435376670573e-05, "loss": 0.5586, "step": 4503 }, { "epoch": 0.0955228945303387, "grad_norm": 0.42735767364501953, "learning_rate": 1.9892386588174713e-05, "loss": 0.4892, "step": 4504 }, { "epoch": 0.09554410298827172, "grad_norm": 0.31775060296058655, "learning_rate": 1.9892337788676626e-05, "loss": 0.5178, "step": 4505 }, { "epoch": 0.09556531144620474, "grad_norm": 0.3270639181137085, "learning_rate": 1.9892288978176365e-05, "loss": 0.4724, "step": 4506 }, { "epoch": 0.09558651990413777, "grad_norm": 0.3558734357357025, "learning_rate": 1.989224015667399e-05, "loss": 0.5251, "step": 4507 }, { "epoch": 0.09560772836207079, "grad_norm": 0.33329352736473083, "learning_rate": 1.9892191324169546e-05, "loss": 0.6083, "step": 4508 }, { "epoch": 0.09562893682000381, "grad_norm": 0.33967462182044983, "learning_rate": 1.9892142480663093e-05, "loss": 0.5321, "step": 4509 }, { "epoch": 0.09565014527793685, "grad_norm": 0.307015985250473, "learning_rate": 1.989209362615469e-05, "loss": 0.5125, "step": 4510 }, { "epoch": 0.09567135373586987, "grad_norm": 1.3058781623840332, "learning_rate": 1.989204476064438e-05, "loss": 0.5205, "step": 4511 }, { "epoch": 0.09569256219380289, "grad_norm": 0.4127606451511383, "learning_rate": 1.9891995884132233e-05, "loss": 0.514, "step": 4512 }, { "epoch": 0.09571377065173592, "grad_norm": 0.3024190068244934, "learning_rate": 1.989194699661829e-05, "loss": 0.538, "step": 4513 }, { "epoch": 0.09573497910966894, "grad_norm": 0.3493652641773224, "learning_rate": 1.9891898098102603e-05, "loss": 0.6261, "step": 4514 }, { "epoch": 0.09575618756760196, "grad_norm": 0.3076809346675873, "learning_rate": 1.989184918858524e-05, "loss": 0.5569, "step": 4515 }, { "epoch": 0.09577739602553498, "grad_norm": 0.34455206990242004, "learning_rate": 1.9891800268066248e-05, "loss": 0.5663, "step": 4516 }, { "epoch": 0.09579860448346801, "grad_norm": 0.3694130778312683, "learning_rate": 1.989175133654568e-05, "loss": 0.5101, "step": 4517 }, { "epoch": 0.09581981294140103, "grad_norm": 0.286483496427536, "learning_rate": 1.989170239402359e-05, "loss": 0.4951, "step": 4518 }, { "epoch": 0.09584102139933405, "grad_norm": 0.36598488688468933, "learning_rate": 1.989165344050004e-05, "loss": 0.5733, "step": 4519 }, { "epoch": 0.09586222985726708, "grad_norm": 0.30655738711357117, "learning_rate": 1.9891604475975073e-05, "loss": 0.6017, "step": 4520 }, { "epoch": 0.0958834383152001, "grad_norm": 0.3229007124900818, "learning_rate": 1.9891555500448754e-05, "loss": 0.5658, "step": 4521 }, { "epoch": 0.09590464677313312, "grad_norm": 0.3099710941314697, "learning_rate": 1.989150651392113e-05, "loss": 0.4856, "step": 4522 }, { "epoch": 0.09592585523106614, "grad_norm": 0.31766247749328613, "learning_rate": 1.9891457516392257e-05, "loss": 0.5352, "step": 4523 }, { "epoch": 0.09594706368899918, "grad_norm": 0.32641854882240295, "learning_rate": 1.9891408507862198e-05, "loss": 0.5071, "step": 4524 }, { "epoch": 0.0959682721469322, "grad_norm": 0.43216249346733093, "learning_rate": 1.9891359488330996e-05, "loss": 0.5509, "step": 4525 }, { "epoch": 0.09598948060486522, "grad_norm": 0.30570167303085327, "learning_rate": 1.9891310457798707e-05, "loss": 0.4747, "step": 4526 }, { "epoch": 0.09601068906279825, "grad_norm": 0.419619083404541, "learning_rate": 1.9891261416265394e-05, "loss": 0.4463, "step": 4527 }, { "epoch": 0.09603189752073127, "grad_norm": 0.34940966963768005, "learning_rate": 1.98912123637311e-05, "loss": 0.6268, "step": 4528 }, { "epoch": 0.09605310597866429, "grad_norm": 0.3130711019039154, "learning_rate": 1.989116330019589e-05, "loss": 0.6141, "step": 4529 }, { "epoch": 0.09607431443659732, "grad_norm": 0.41862982511520386, "learning_rate": 1.9891114225659814e-05, "loss": 0.6183, "step": 4530 }, { "epoch": 0.09609552289453034, "grad_norm": 0.32453763484954834, "learning_rate": 1.9891065140122923e-05, "loss": 0.4927, "step": 4531 }, { "epoch": 0.09611673135246336, "grad_norm": 0.3187149167060852, "learning_rate": 1.9891016043585277e-05, "loss": 0.521, "step": 4532 }, { "epoch": 0.09613793981039638, "grad_norm": 0.38007086515426636, "learning_rate": 1.9890966936046928e-05, "loss": 0.5882, "step": 4533 }, { "epoch": 0.09615914826832941, "grad_norm": 0.3537326455116272, "learning_rate": 1.9890917817507936e-05, "loss": 0.5786, "step": 4534 }, { "epoch": 0.09618035672626243, "grad_norm": 0.3280154764652252, "learning_rate": 1.9890868687968348e-05, "loss": 0.5794, "step": 4535 }, { "epoch": 0.09620156518419545, "grad_norm": 0.3195796608924866, "learning_rate": 1.989081954742822e-05, "loss": 0.55, "step": 4536 }, { "epoch": 0.09622277364212849, "grad_norm": 0.344962477684021, "learning_rate": 1.989077039588761e-05, "loss": 0.5726, "step": 4537 }, { "epoch": 0.0962439821000615, "grad_norm": 0.7143914103507996, "learning_rate": 1.9890721233346572e-05, "loss": 0.4386, "step": 4538 }, { "epoch": 0.09626519055799453, "grad_norm": 0.35156694054603577, "learning_rate": 1.989067205980516e-05, "loss": 0.6164, "step": 4539 }, { "epoch": 0.09628639901592755, "grad_norm": 0.4562861919403076, "learning_rate": 1.9890622875263426e-05, "loss": 0.5257, "step": 4540 }, { "epoch": 0.09630760747386058, "grad_norm": 0.34953171014785767, "learning_rate": 1.989057367972143e-05, "loss": 0.5478, "step": 4541 }, { "epoch": 0.0963288159317936, "grad_norm": 0.31084418296813965, "learning_rate": 1.9890524473179226e-05, "loss": 0.5084, "step": 4542 }, { "epoch": 0.09635002438972662, "grad_norm": 0.3326403796672821, "learning_rate": 1.989047525563686e-05, "loss": 0.4623, "step": 4543 }, { "epoch": 0.09637123284765965, "grad_norm": 0.3552161753177643, "learning_rate": 1.9890426027094398e-05, "loss": 0.5544, "step": 4544 }, { "epoch": 0.09639244130559267, "grad_norm": 0.3067667484283447, "learning_rate": 1.9890376787551888e-05, "loss": 0.457, "step": 4545 }, { "epoch": 0.09641364976352569, "grad_norm": 0.5056514739990234, "learning_rate": 1.9890327537009387e-05, "loss": 0.528, "step": 4546 }, { "epoch": 0.09643485822145872, "grad_norm": 0.3541433811187744, "learning_rate": 1.989027827546695e-05, "loss": 0.5843, "step": 4547 }, { "epoch": 0.09645606667939174, "grad_norm": 0.3690759837627411, "learning_rate": 1.9890229002924633e-05, "loss": 0.573, "step": 4548 }, { "epoch": 0.09647727513732476, "grad_norm": 0.3791924715042114, "learning_rate": 1.9890179719382487e-05, "loss": 0.4802, "step": 4549 }, { "epoch": 0.09649848359525778, "grad_norm": 0.3075946867465973, "learning_rate": 1.989013042484057e-05, "loss": 0.5119, "step": 4550 }, { "epoch": 0.09651969205319082, "grad_norm": 0.30717018246650696, "learning_rate": 1.9890081119298935e-05, "loss": 0.5922, "step": 4551 }, { "epoch": 0.09654090051112384, "grad_norm": 0.32813626527786255, "learning_rate": 1.989003180275764e-05, "loss": 0.5216, "step": 4552 }, { "epoch": 0.09656210896905686, "grad_norm": 0.32444095611572266, "learning_rate": 1.9889982475216737e-05, "loss": 0.5268, "step": 4553 }, { "epoch": 0.09658331742698989, "grad_norm": 0.34202155470848083, "learning_rate": 1.988993313667628e-05, "loss": 0.5804, "step": 4554 }, { "epoch": 0.09660452588492291, "grad_norm": 0.3436482846736908, "learning_rate": 1.9889883787136325e-05, "loss": 0.5505, "step": 4555 }, { "epoch": 0.09662573434285593, "grad_norm": 0.3183897137641907, "learning_rate": 1.988983442659693e-05, "loss": 0.4928, "step": 4556 }, { "epoch": 0.09664694280078895, "grad_norm": 0.5295714139938354, "learning_rate": 1.988978505505814e-05, "loss": 0.5586, "step": 4557 }, { "epoch": 0.09666815125872198, "grad_norm": 0.3077634572982788, "learning_rate": 1.9889735672520026e-05, "loss": 0.5116, "step": 4558 }, { "epoch": 0.096689359716655, "grad_norm": 0.34399476647377014, "learning_rate": 1.9889686278982626e-05, "loss": 0.5046, "step": 4559 }, { "epoch": 0.09671056817458802, "grad_norm": 0.32567426562309265, "learning_rate": 1.988963687444601e-05, "loss": 0.5152, "step": 4560 }, { "epoch": 0.09673177663252105, "grad_norm": 0.2883749306201935, "learning_rate": 1.988958745891022e-05, "loss": 0.4881, "step": 4561 }, { "epoch": 0.09675298509045407, "grad_norm": 0.3462862968444824, "learning_rate": 1.988953803237532e-05, "loss": 0.445, "step": 4562 }, { "epoch": 0.0967741935483871, "grad_norm": 0.3135647773742676, "learning_rate": 1.9889488594841363e-05, "loss": 0.5175, "step": 4563 }, { "epoch": 0.09679540200632011, "grad_norm": 0.30782046914100647, "learning_rate": 1.9889439146308397e-05, "loss": 0.5134, "step": 4564 }, { "epoch": 0.09681661046425315, "grad_norm": 0.31112566590309143, "learning_rate": 1.9889389686776485e-05, "loss": 0.4648, "step": 4565 }, { "epoch": 0.09683781892218617, "grad_norm": 0.316299706697464, "learning_rate": 1.9889340216245684e-05, "loss": 0.4922, "step": 4566 }, { "epoch": 0.09685902738011919, "grad_norm": 0.3672685921192169, "learning_rate": 1.9889290734716042e-05, "loss": 0.6279, "step": 4567 }, { "epoch": 0.09688023583805222, "grad_norm": 0.318343847990036, "learning_rate": 1.9889241242187616e-05, "loss": 0.5199, "step": 4568 }, { "epoch": 0.09690144429598524, "grad_norm": 0.3541485667228699, "learning_rate": 1.9889191738660463e-05, "loss": 0.4226, "step": 4569 }, { "epoch": 0.09692265275391826, "grad_norm": 0.351584255695343, "learning_rate": 1.9889142224134636e-05, "loss": 0.567, "step": 4570 }, { "epoch": 0.09694386121185129, "grad_norm": 0.33489128947257996, "learning_rate": 1.988909269861019e-05, "loss": 0.5707, "step": 4571 }, { "epoch": 0.09696506966978431, "grad_norm": 0.3165123164653778, "learning_rate": 1.988904316208718e-05, "loss": 0.6349, "step": 4572 }, { "epoch": 0.09698627812771733, "grad_norm": 0.3240211308002472, "learning_rate": 1.9888993614565666e-05, "loss": 0.5445, "step": 4573 }, { "epoch": 0.09700748658565035, "grad_norm": 0.3240644037723541, "learning_rate": 1.9888944056045696e-05, "loss": 0.6688, "step": 4574 }, { "epoch": 0.09702869504358339, "grad_norm": 0.3584204614162445, "learning_rate": 1.988889448652733e-05, "loss": 0.507, "step": 4575 }, { "epoch": 0.0970499035015164, "grad_norm": 0.31273260712623596, "learning_rate": 1.9888844906010624e-05, "loss": 0.4444, "step": 4576 }, { "epoch": 0.09707111195944942, "grad_norm": 0.328805148601532, "learning_rate": 1.9888795314495627e-05, "loss": 0.5455, "step": 4577 }, { "epoch": 0.09709232041738246, "grad_norm": 0.3205696642398834, "learning_rate": 1.9888745711982395e-05, "loss": 0.5301, "step": 4578 }, { "epoch": 0.09711352887531548, "grad_norm": 0.6204623579978943, "learning_rate": 1.988869609847099e-05, "loss": 0.6334, "step": 4579 }, { "epoch": 0.0971347373332485, "grad_norm": 0.3062382638454437, "learning_rate": 1.988864647396146e-05, "loss": 0.6176, "step": 4580 }, { "epoch": 0.09715594579118152, "grad_norm": 0.35602742433547974, "learning_rate": 1.988859683845387e-05, "loss": 0.5916, "step": 4581 }, { "epoch": 0.09717715424911455, "grad_norm": 0.3133813142776489, "learning_rate": 1.9888547191948264e-05, "loss": 0.5322, "step": 4582 }, { "epoch": 0.09719836270704757, "grad_norm": 0.46264907717704773, "learning_rate": 1.98884975344447e-05, "loss": 0.458, "step": 4583 }, { "epoch": 0.09721957116498059, "grad_norm": 0.35279032588005066, "learning_rate": 1.988844786594324e-05, "loss": 0.5385, "step": 4584 }, { "epoch": 0.09724077962291362, "grad_norm": 0.37048953771591187, "learning_rate": 1.9888398186443927e-05, "loss": 0.5095, "step": 4585 }, { "epoch": 0.09726198808084664, "grad_norm": 0.30374786257743835, "learning_rate": 1.988834849594683e-05, "loss": 0.4768, "step": 4586 }, { "epoch": 0.09728319653877966, "grad_norm": 0.329254150390625, "learning_rate": 1.9888298794451993e-05, "loss": 0.5429, "step": 4587 }, { "epoch": 0.0973044049967127, "grad_norm": 0.3603665828704834, "learning_rate": 1.9888249081959482e-05, "loss": 0.568, "step": 4588 }, { "epoch": 0.09732561345464572, "grad_norm": 0.36108487844467163, "learning_rate": 1.988819935846934e-05, "loss": 0.5916, "step": 4589 }, { "epoch": 0.09734682191257873, "grad_norm": 1.741728663444519, "learning_rate": 1.988814962398163e-05, "loss": 0.4602, "step": 4590 }, { "epoch": 0.09736803037051175, "grad_norm": 0.6854979395866394, "learning_rate": 1.9888099878496407e-05, "loss": 0.5206, "step": 4591 }, { "epoch": 0.09738923882844479, "grad_norm": 0.632657527923584, "learning_rate": 1.9888050122013727e-05, "loss": 0.49, "step": 4592 }, { "epoch": 0.09741044728637781, "grad_norm": 0.42837756872177124, "learning_rate": 1.988800035453364e-05, "loss": 0.6334, "step": 4593 }, { "epoch": 0.09743165574431083, "grad_norm": 0.40940168499946594, "learning_rate": 1.9887950576056208e-05, "loss": 0.5742, "step": 4594 }, { "epoch": 0.09745286420224386, "grad_norm": 0.36052027344703674, "learning_rate": 1.9887900786581483e-05, "loss": 0.5519, "step": 4595 }, { "epoch": 0.09747407266017688, "grad_norm": 0.3086702525615692, "learning_rate": 1.988785098610952e-05, "loss": 0.4971, "step": 4596 }, { "epoch": 0.0974952811181099, "grad_norm": 0.46713101863861084, "learning_rate": 1.9887801174640374e-05, "loss": 0.5355, "step": 4597 }, { "epoch": 0.09751648957604292, "grad_norm": 0.37815675139427185, "learning_rate": 1.98877513521741e-05, "loss": 0.5815, "step": 4598 }, { "epoch": 0.09753769803397595, "grad_norm": 0.3593353033065796, "learning_rate": 1.9887701518710755e-05, "loss": 0.5043, "step": 4599 }, { "epoch": 0.09755890649190897, "grad_norm": 0.34884145855903625, "learning_rate": 1.9887651674250398e-05, "loss": 0.4812, "step": 4600 }, { "epoch": 0.09758011494984199, "grad_norm": 0.29348886013031006, "learning_rate": 1.9887601818793077e-05, "loss": 0.4982, "step": 4601 }, { "epoch": 0.09760132340777503, "grad_norm": 0.3495955765247345, "learning_rate": 1.9887551952338857e-05, "loss": 0.5891, "step": 4602 }, { "epoch": 0.09762253186570805, "grad_norm": 0.311499685049057, "learning_rate": 1.9887502074887783e-05, "loss": 0.5204, "step": 4603 }, { "epoch": 0.09764374032364107, "grad_norm": 0.33400556445121765, "learning_rate": 1.988745218643991e-05, "loss": 0.5078, "step": 4604 }, { "epoch": 0.0976649487815741, "grad_norm": 0.3722502589225769, "learning_rate": 1.988740228699531e-05, "loss": 0.6002, "step": 4605 }, { "epoch": 0.09768615723950712, "grad_norm": 0.33724790811538696, "learning_rate": 1.988735237655402e-05, "loss": 0.5546, "step": 4606 }, { "epoch": 0.09770736569744014, "grad_norm": 0.32900765538215637, "learning_rate": 1.9887302455116106e-05, "loss": 0.5788, "step": 4607 }, { "epoch": 0.09772857415537316, "grad_norm": 0.33876365423202515, "learning_rate": 1.9887252522681616e-05, "loss": 0.4661, "step": 4608 }, { "epoch": 0.09774978261330619, "grad_norm": 0.28997769951820374, "learning_rate": 1.9887202579250613e-05, "loss": 0.4424, "step": 4609 }, { "epoch": 0.09777099107123921, "grad_norm": 0.3115832805633545, "learning_rate": 1.9887152624823147e-05, "loss": 0.5089, "step": 4610 }, { "epoch": 0.09779219952917223, "grad_norm": 0.3361284136772156, "learning_rate": 1.9887102659399276e-05, "loss": 0.5847, "step": 4611 }, { "epoch": 0.09781340798710526, "grad_norm": 0.3491755723953247, "learning_rate": 1.9887052682979057e-05, "loss": 0.5603, "step": 4612 }, { "epoch": 0.09783461644503828, "grad_norm": 0.39544859528541565, "learning_rate": 1.988700269556254e-05, "loss": 0.4608, "step": 4613 }, { "epoch": 0.0978558249029713, "grad_norm": 0.5733045935630798, "learning_rate": 1.988695269714979e-05, "loss": 0.5646, "step": 4614 }, { "epoch": 0.09787703336090432, "grad_norm": 0.3312130570411682, "learning_rate": 1.9886902687740853e-05, "loss": 0.5907, "step": 4615 }, { "epoch": 0.09789824181883736, "grad_norm": 0.35094574093818665, "learning_rate": 1.988685266733579e-05, "loss": 0.5984, "step": 4616 }, { "epoch": 0.09791945027677038, "grad_norm": 0.3617396652698517, "learning_rate": 1.9886802635934654e-05, "loss": 0.5713, "step": 4617 }, { "epoch": 0.0979406587347034, "grad_norm": 0.34143856167793274, "learning_rate": 1.9886752593537504e-05, "loss": 0.5127, "step": 4618 }, { "epoch": 0.09796186719263643, "grad_norm": 0.325784295797348, "learning_rate": 1.9886702540144392e-05, "loss": 0.5155, "step": 4619 }, { "epoch": 0.09798307565056945, "grad_norm": 0.41758474707603455, "learning_rate": 1.9886652475755377e-05, "loss": 0.5529, "step": 4620 }, { "epoch": 0.09800428410850247, "grad_norm": 0.34403252601623535, "learning_rate": 1.988660240037051e-05, "loss": 0.5048, "step": 4621 }, { "epoch": 0.09802549256643549, "grad_norm": 0.3273913264274597, "learning_rate": 1.9886552313989854e-05, "loss": 0.5785, "step": 4622 }, { "epoch": 0.09804670102436852, "grad_norm": 0.32497185468673706, "learning_rate": 1.9886502216613462e-05, "loss": 0.5503, "step": 4623 }, { "epoch": 0.09806790948230154, "grad_norm": 0.2879800796508789, "learning_rate": 1.9886452108241384e-05, "loss": 0.5222, "step": 4624 }, { "epoch": 0.09808911794023456, "grad_norm": 0.3035293221473694, "learning_rate": 1.988640198887368e-05, "loss": 0.51, "step": 4625 }, { "epoch": 0.0981103263981676, "grad_norm": 0.3236549198627472, "learning_rate": 1.9886351858510405e-05, "loss": 0.5238, "step": 4626 }, { "epoch": 0.09813153485610061, "grad_norm": 0.3529897630214691, "learning_rate": 1.9886301717151615e-05, "loss": 0.5753, "step": 4627 }, { "epoch": 0.09815274331403363, "grad_norm": 0.32507842779159546, "learning_rate": 1.9886251564797367e-05, "loss": 0.5503, "step": 4628 }, { "epoch": 0.09817395177196667, "grad_norm": 0.35950371623039246, "learning_rate": 1.9886201401447717e-05, "loss": 0.3686, "step": 4629 }, { "epoch": 0.09819516022989969, "grad_norm": 0.30963096022605896, "learning_rate": 1.9886151227102722e-05, "loss": 0.5579, "step": 4630 }, { "epoch": 0.0982163686878327, "grad_norm": 0.3448413908481598, "learning_rate": 1.988610104176243e-05, "loss": 0.5725, "step": 4631 }, { "epoch": 0.09823757714576573, "grad_norm": 0.3262839615345001, "learning_rate": 1.9886050845426904e-05, "loss": 0.5851, "step": 4632 }, { "epoch": 0.09825878560369876, "grad_norm": 0.3140266537666321, "learning_rate": 1.98860006380962e-05, "loss": 0.5168, "step": 4633 }, { "epoch": 0.09827999406163178, "grad_norm": 0.4103030860424042, "learning_rate": 1.988595041977037e-05, "loss": 0.5354, "step": 4634 }, { "epoch": 0.0983012025195648, "grad_norm": 0.34276437759399414, "learning_rate": 1.9885900190449475e-05, "loss": 0.5148, "step": 4635 }, { "epoch": 0.09832241097749783, "grad_norm": 0.3101278841495514, "learning_rate": 1.9885849950133563e-05, "loss": 0.4609, "step": 4636 }, { "epoch": 0.09834361943543085, "grad_norm": 0.3254052698612213, "learning_rate": 1.98857996988227e-05, "loss": 0.4715, "step": 4637 }, { "epoch": 0.09836482789336387, "grad_norm": 0.41990482807159424, "learning_rate": 1.988574943651693e-05, "loss": 0.4665, "step": 4638 }, { "epoch": 0.09838603635129689, "grad_norm": 0.30186083912849426, "learning_rate": 1.9885699163216323e-05, "loss": 0.5574, "step": 4639 }, { "epoch": 0.09840724480922992, "grad_norm": 0.30279502272605896, "learning_rate": 1.988564887892092e-05, "loss": 0.5074, "step": 4640 }, { "epoch": 0.09842845326716294, "grad_norm": 0.3141637444496155, "learning_rate": 1.988559858363079e-05, "loss": 0.6026, "step": 4641 }, { "epoch": 0.09844966172509596, "grad_norm": 0.31786367297172546, "learning_rate": 1.988554827734598e-05, "loss": 0.4697, "step": 4642 }, { "epoch": 0.098470870183029, "grad_norm": 0.3295067846775055, "learning_rate": 1.988549796006655e-05, "loss": 0.6131, "step": 4643 }, { "epoch": 0.09849207864096202, "grad_norm": 0.5618856549263, "learning_rate": 1.988544763179255e-05, "loss": 0.6034, "step": 4644 }, { "epoch": 0.09851328709889504, "grad_norm": 0.34256884455680847, "learning_rate": 1.9885397292524048e-05, "loss": 0.5535, "step": 4645 }, { "epoch": 0.09853449555682807, "grad_norm": 0.33236217498779297, "learning_rate": 1.9885346942261094e-05, "loss": 0.5015, "step": 4646 }, { "epoch": 0.09855570401476109, "grad_norm": 0.3384269177913666, "learning_rate": 1.9885296581003737e-05, "loss": 0.5344, "step": 4647 }, { "epoch": 0.09857691247269411, "grad_norm": 0.3087598979473114, "learning_rate": 1.988524620875204e-05, "loss": 0.4667, "step": 4648 }, { "epoch": 0.09859812093062713, "grad_norm": 0.3479098081588745, "learning_rate": 1.988519582550606e-05, "loss": 0.5461, "step": 4649 }, { "epoch": 0.09861932938856016, "grad_norm": 0.3533537983894348, "learning_rate": 1.988514543126585e-05, "loss": 0.5663, "step": 4650 }, { "epoch": 0.09864053784649318, "grad_norm": 0.32565125823020935, "learning_rate": 1.988509502603147e-05, "loss": 0.5129, "step": 4651 }, { "epoch": 0.0986617463044262, "grad_norm": 0.36493590474128723, "learning_rate": 1.988504460980297e-05, "loss": 0.5267, "step": 4652 }, { "epoch": 0.09868295476235923, "grad_norm": 0.33870404958724976, "learning_rate": 1.9884994182580412e-05, "loss": 0.4722, "step": 4653 }, { "epoch": 0.09870416322029225, "grad_norm": 0.40114831924438477, "learning_rate": 1.9884943744363844e-05, "loss": 0.5223, "step": 4654 }, { "epoch": 0.09872537167822527, "grad_norm": 0.31374654173851013, "learning_rate": 1.9884893295153334e-05, "loss": 0.5562, "step": 4655 }, { "epoch": 0.0987465801361583, "grad_norm": 0.3300725221633911, "learning_rate": 1.9884842834948926e-05, "loss": 0.5379, "step": 4656 }, { "epoch": 0.09876778859409133, "grad_norm": 0.32290127873420715, "learning_rate": 1.9884792363750684e-05, "loss": 0.5587, "step": 4657 }, { "epoch": 0.09878899705202435, "grad_norm": 0.3209746479988098, "learning_rate": 1.9884741881558666e-05, "loss": 0.5221, "step": 4658 }, { "epoch": 0.09881020550995737, "grad_norm": 0.3155078887939453, "learning_rate": 1.988469138837292e-05, "loss": 0.4892, "step": 4659 }, { "epoch": 0.0988314139678904, "grad_norm": 0.3353123068809509, "learning_rate": 1.9884640884193504e-05, "loss": 0.564, "step": 4660 }, { "epoch": 0.09885262242582342, "grad_norm": 0.31582221388816833, "learning_rate": 1.988459036902048e-05, "loss": 0.4892, "step": 4661 }, { "epoch": 0.09887383088375644, "grad_norm": 0.35179877281188965, "learning_rate": 1.9884539842853896e-05, "loss": 0.5783, "step": 4662 }, { "epoch": 0.09889503934168947, "grad_norm": 0.35144302248954773, "learning_rate": 1.9884489305693817e-05, "loss": 0.5326, "step": 4663 }, { "epoch": 0.09891624779962249, "grad_norm": 0.35379353165626526, "learning_rate": 1.988443875754029e-05, "loss": 0.5465, "step": 4664 }, { "epoch": 0.09893745625755551, "grad_norm": 0.3162241280078888, "learning_rate": 1.9884388198393384e-05, "loss": 0.5912, "step": 4665 }, { "epoch": 0.09895866471548853, "grad_norm": 0.30164268612861633, "learning_rate": 1.9884337628253143e-05, "loss": 0.4424, "step": 4666 }, { "epoch": 0.09897987317342156, "grad_norm": 0.4509297311306, "learning_rate": 1.9884287047119626e-05, "loss": 0.5853, "step": 4667 }, { "epoch": 0.09900108163135458, "grad_norm": 0.31289565563201904, "learning_rate": 1.9884236454992892e-05, "loss": 0.5925, "step": 4668 }, { "epoch": 0.0990222900892876, "grad_norm": 0.3202580511569977, "learning_rate": 1.9884185851872996e-05, "loss": 0.4868, "step": 4669 }, { "epoch": 0.09904349854722064, "grad_norm": 0.32516181468963623, "learning_rate": 1.9884135237759998e-05, "loss": 0.5241, "step": 4670 }, { "epoch": 0.09906470700515366, "grad_norm": 0.34096020460128784, "learning_rate": 1.9884084612653945e-05, "loss": 0.5114, "step": 4671 }, { "epoch": 0.09908591546308668, "grad_norm": 0.7410527467727661, "learning_rate": 1.9884033976554898e-05, "loss": 0.5322, "step": 4672 }, { "epoch": 0.0991071239210197, "grad_norm": 0.31486281752586365, "learning_rate": 1.9883983329462923e-05, "loss": 0.4207, "step": 4673 }, { "epoch": 0.09912833237895273, "grad_norm": 0.34802699089050293, "learning_rate": 1.988393267137806e-05, "loss": 0.5731, "step": 4674 }, { "epoch": 0.09914954083688575, "grad_norm": 0.3306048810482025, "learning_rate": 1.9883882002300374e-05, "loss": 0.4833, "step": 4675 }, { "epoch": 0.09917074929481877, "grad_norm": 0.3217979073524475, "learning_rate": 1.988383132222992e-05, "loss": 0.5488, "step": 4676 }, { "epoch": 0.0991919577527518, "grad_norm": 0.34785348176956177, "learning_rate": 1.9883780631166756e-05, "loss": 0.6126, "step": 4677 }, { "epoch": 0.09921316621068482, "grad_norm": 0.32879891991615295, "learning_rate": 1.988372992911094e-05, "loss": 0.5136, "step": 4678 }, { "epoch": 0.09923437466861784, "grad_norm": 0.2992701530456543, "learning_rate": 1.9883679216062523e-05, "loss": 0.5474, "step": 4679 }, { "epoch": 0.09925558312655088, "grad_norm": 0.345975786447525, "learning_rate": 1.988362849202156e-05, "loss": 0.5277, "step": 4680 }, { "epoch": 0.0992767915844839, "grad_norm": 0.3354499936103821, "learning_rate": 1.9883577756988118e-05, "loss": 0.4946, "step": 4681 }, { "epoch": 0.09929800004241691, "grad_norm": 0.3048204481601715, "learning_rate": 1.9883527010962244e-05, "loss": 0.5354, "step": 4682 }, { "epoch": 0.09931920850034993, "grad_norm": 0.3194414973258972, "learning_rate": 1.9883476253943992e-05, "loss": 0.524, "step": 4683 }, { "epoch": 0.09934041695828297, "grad_norm": 0.31029990315437317, "learning_rate": 1.988342548593343e-05, "loss": 0.5206, "step": 4684 }, { "epoch": 0.09936162541621599, "grad_norm": 0.30989736318588257, "learning_rate": 1.9883374706930603e-05, "loss": 0.5233, "step": 4685 }, { "epoch": 0.099382833874149, "grad_norm": 0.33542120456695557, "learning_rate": 1.9883323916935577e-05, "loss": 0.5585, "step": 4686 }, { "epoch": 0.09940404233208204, "grad_norm": 0.3378335237503052, "learning_rate": 1.98832731159484e-05, "loss": 0.5409, "step": 4687 }, { "epoch": 0.09942525079001506, "grad_norm": 0.3194883167743683, "learning_rate": 1.988322230396914e-05, "loss": 0.5534, "step": 4688 }, { "epoch": 0.09944645924794808, "grad_norm": 0.32380303740501404, "learning_rate": 1.988317148099784e-05, "loss": 0.5681, "step": 4689 }, { "epoch": 0.0994676677058811, "grad_norm": 0.35316482186317444, "learning_rate": 1.988312064703456e-05, "loss": 0.5645, "step": 4690 }, { "epoch": 0.09948887616381413, "grad_norm": 0.3295019268989563, "learning_rate": 1.9883069802079365e-05, "loss": 0.4218, "step": 4691 }, { "epoch": 0.09951008462174715, "grad_norm": 0.3663923442363739, "learning_rate": 1.98830189461323e-05, "loss": 0.6102, "step": 4692 }, { "epoch": 0.09953129307968017, "grad_norm": 0.32498371601104736, "learning_rate": 1.9882968079193432e-05, "loss": 0.5493, "step": 4693 }, { "epoch": 0.0995525015376132, "grad_norm": 0.31943726539611816, "learning_rate": 1.9882917201262813e-05, "loss": 0.5552, "step": 4694 }, { "epoch": 0.09957370999554623, "grad_norm": 0.3629741370677948, "learning_rate": 1.9882866312340495e-05, "loss": 0.5543, "step": 4695 }, { "epoch": 0.09959491845347924, "grad_norm": 0.35248255729675293, "learning_rate": 1.9882815412426542e-05, "loss": 0.4593, "step": 4696 }, { "epoch": 0.09961612691141226, "grad_norm": 0.342454195022583, "learning_rate": 1.988276450152101e-05, "loss": 0.5917, "step": 4697 }, { "epoch": 0.0996373353693453, "grad_norm": 0.3184548020362854, "learning_rate": 1.988271357962395e-05, "loss": 0.5102, "step": 4698 }, { "epoch": 0.09965854382727832, "grad_norm": 0.3565083146095276, "learning_rate": 1.988266264673542e-05, "loss": 0.5239, "step": 4699 }, { "epoch": 0.09967975228521134, "grad_norm": 0.32884839177131653, "learning_rate": 1.988261170285548e-05, "loss": 0.5359, "step": 4700 }, { "epoch": 0.09970096074314437, "grad_norm": 0.3305744230747223, "learning_rate": 1.9882560747984186e-05, "loss": 0.5013, "step": 4701 }, { "epoch": 0.09972216920107739, "grad_norm": 0.2984376847743988, "learning_rate": 1.9882509782121593e-05, "loss": 0.5347, "step": 4702 }, { "epoch": 0.09974337765901041, "grad_norm": 0.29696229100227356, "learning_rate": 1.988245880526776e-05, "loss": 0.5136, "step": 4703 }, { "epoch": 0.09976458611694344, "grad_norm": 0.3322579562664032, "learning_rate": 1.988240781742274e-05, "loss": 0.6153, "step": 4704 }, { "epoch": 0.09978579457487646, "grad_norm": 0.32851314544677734, "learning_rate": 1.9882356818586593e-05, "loss": 0.5682, "step": 4705 }, { "epoch": 0.09980700303280948, "grad_norm": 0.3924388587474823, "learning_rate": 1.9882305808759375e-05, "loss": 0.5571, "step": 4706 }, { "epoch": 0.0998282114907425, "grad_norm": 0.39304667711257935, "learning_rate": 1.9882254787941142e-05, "loss": 0.4824, "step": 4707 }, { "epoch": 0.09984941994867554, "grad_norm": 0.3280963599681854, "learning_rate": 1.988220375613195e-05, "loss": 0.5271, "step": 4708 }, { "epoch": 0.09987062840660856, "grad_norm": 0.3413145840167999, "learning_rate": 1.9882152713331856e-05, "loss": 0.5705, "step": 4709 }, { "epoch": 0.09989183686454157, "grad_norm": 0.33981961011886597, "learning_rate": 1.988210165954092e-05, "loss": 0.6212, "step": 4710 }, { "epoch": 0.09991304532247461, "grad_norm": 0.31659913063049316, "learning_rate": 1.9882050594759197e-05, "loss": 0.5309, "step": 4711 }, { "epoch": 0.09993425378040763, "grad_norm": 0.3147604465484619, "learning_rate": 1.988199951898674e-05, "loss": 0.5494, "step": 4712 }, { "epoch": 0.09995546223834065, "grad_norm": 0.34396523237228394, "learning_rate": 1.988194843222361e-05, "loss": 0.534, "step": 4713 }, { "epoch": 0.09997667069627367, "grad_norm": 0.4512636065483093, "learning_rate": 1.9881897334469867e-05, "loss": 0.5229, "step": 4714 }, { "epoch": 0.0999978791542067, "grad_norm": 0.3430407643318176, "learning_rate": 1.988184622572556e-05, "loss": 0.5091, "step": 4715 }, { "epoch": 0.10001908761213972, "grad_norm": 0.3244439363479614, "learning_rate": 1.988179510599075e-05, "loss": 0.5256, "step": 4716 }, { "epoch": 0.10004029607007274, "grad_norm": 0.345962256193161, "learning_rate": 1.988174397526549e-05, "loss": 0.6642, "step": 4717 }, { "epoch": 0.10006150452800577, "grad_norm": 0.3319436013698578, "learning_rate": 1.9881692833549848e-05, "loss": 0.5219, "step": 4718 }, { "epoch": 0.1000827129859388, "grad_norm": 0.3985510766506195, "learning_rate": 1.9881641680843865e-05, "loss": 0.4483, "step": 4719 }, { "epoch": 0.10010392144387181, "grad_norm": 0.3172174394130707, "learning_rate": 1.988159051714761e-05, "loss": 0.4561, "step": 4720 }, { "epoch": 0.10012512990180485, "grad_norm": 0.4193243980407715, "learning_rate": 1.9881539342461136e-05, "loss": 0.5423, "step": 4721 }, { "epoch": 0.10014633835973787, "grad_norm": 0.2857676148414612, "learning_rate": 1.9881488156784497e-05, "loss": 0.5186, "step": 4722 }, { "epoch": 0.10016754681767089, "grad_norm": 0.2930758595466614, "learning_rate": 1.9881436960117756e-05, "loss": 0.4707, "step": 4723 }, { "epoch": 0.1001887552756039, "grad_norm": 0.33678463101387024, "learning_rate": 1.9881385752460965e-05, "loss": 0.5645, "step": 4724 }, { "epoch": 0.10020996373353694, "grad_norm": 0.3468639552593231, "learning_rate": 1.9881334533814182e-05, "loss": 0.5558, "step": 4725 }, { "epoch": 0.10023117219146996, "grad_norm": 0.3177781403064728, "learning_rate": 1.9881283304177465e-05, "loss": 0.4771, "step": 4726 }, { "epoch": 0.10025238064940298, "grad_norm": 0.2995133697986603, "learning_rate": 1.9881232063550872e-05, "loss": 0.4899, "step": 4727 }, { "epoch": 0.10027358910733601, "grad_norm": 0.33923742175102234, "learning_rate": 1.9881180811934457e-05, "loss": 0.5732, "step": 4728 }, { "epoch": 0.10029479756526903, "grad_norm": 0.34477680921554565, "learning_rate": 1.9881129549328277e-05, "loss": 0.4915, "step": 4729 }, { "epoch": 0.10031600602320205, "grad_norm": 0.44889727234840393, "learning_rate": 1.9881078275732394e-05, "loss": 0.5976, "step": 4730 }, { "epoch": 0.10033721448113507, "grad_norm": 0.4542688727378845, "learning_rate": 1.988102699114686e-05, "loss": 0.5504, "step": 4731 }, { "epoch": 0.1003584229390681, "grad_norm": 0.3620469272136688, "learning_rate": 1.9880975695571733e-05, "loss": 0.5418, "step": 4732 }, { "epoch": 0.10037963139700112, "grad_norm": 0.3500796854496002, "learning_rate": 1.988092438900707e-05, "loss": 0.5219, "step": 4733 }, { "epoch": 0.10040083985493414, "grad_norm": 0.32715198397636414, "learning_rate": 1.9880873071452932e-05, "loss": 0.4681, "step": 4734 }, { "epoch": 0.10042204831286718, "grad_norm": 0.2884206473827362, "learning_rate": 1.9880821742909374e-05, "loss": 0.4473, "step": 4735 }, { "epoch": 0.1004432567708002, "grad_norm": 0.30037298798561096, "learning_rate": 1.9880770403376445e-05, "loss": 0.5108, "step": 4736 }, { "epoch": 0.10046446522873322, "grad_norm": 0.3569730222225189, "learning_rate": 1.9880719052854215e-05, "loss": 0.5529, "step": 4737 }, { "epoch": 0.10048567368666625, "grad_norm": 0.29793357849121094, "learning_rate": 1.9880667691342732e-05, "loss": 0.4979, "step": 4738 }, { "epoch": 0.10050688214459927, "grad_norm": 0.41576701402664185, "learning_rate": 1.9880616318842057e-05, "loss": 0.5189, "step": 4739 }, { "epoch": 0.10052809060253229, "grad_norm": 0.2951081693172455, "learning_rate": 1.9880564935352248e-05, "loss": 0.5579, "step": 4740 }, { "epoch": 0.10054929906046531, "grad_norm": 0.28744781017303467, "learning_rate": 1.988051354087336e-05, "loss": 0.486, "step": 4741 }, { "epoch": 0.10057050751839834, "grad_norm": 0.3387874364852905, "learning_rate": 1.9880462135405447e-05, "loss": 0.5198, "step": 4742 }, { "epoch": 0.10059171597633136, "grad_norm": 0.3305411636829376, "learning_rate": 1.9880410718948573e-05, "loss": 0.6062, "step": 4743 }, { "epoch": 0.10061292443426438, "grad_norm": 0.3111690878868103, "learning_rate": 1.9880359291502792e-05, "loss": 0.5696, "step": 4744 }, { "epoch": 0.10063413289219741, "grad_norm": 0.31529513001441956, "learning_rate": 1.988030785306816e-05, "loss": 0.5256, "step": 4745 }, { "epoch": 0.10065534135013043, "grad_norm": 0.3442894220352173, "learning_rate": 1.9880256403644738e-05, "loss": 0.5922, "step": 4746 }, { "epoch": 0.10067654980806345, "grad_norm": 0.3410980999469757, "learning_rate": 1.988020494323258e-05, "loss": 0.5308, "step": 4747 }, { "epoch": 0.10069775826599647, "grad_norm": 0.3567905128002167, "learning_rate": 1.9880153471831742e-05, "loss": 0.592, "step": 4748 }, { "epoch": 0.1007189667239295, "grad_norm": 0.30347520112991333, "learning_rate": 1.9880101989442285e-05, "loss": 0.5167, "step": 4749 }, { "epoch": 0.10074017518186253, "grad_norm": 0.4023325741291046, "learning_rate": 1.9880050496064262e-05, "loss": 0.5184, "step": 4750 }, { "epoch": 0.10076138363979555, "grad_norm": 0.3591653108596802, "learning_rate": 1.9879998991697732e-05, "loss": 0.5908, "step": 4751 }, { "epoch": 0.10078259209772858, "grad_norm": 0.3449101150035858, "learning_rate": 1.987994747634276e-05, "loss": 0.4909, "step": 4752 }, { "epoch": 0.1008038005556616, "grad_norm": 0.3276667296886444, "learning_rate": 1.987989594999939e-05, "loss": 0.5326, "step": 4753 }, { "epoch": 0.10082500901359462, "grad_norm": 0.3082127571105957, "learning_rate": 1.9879844412667685e-05, "loss": 0.4421, "step": 4754 }, { "epoch": 0.10084621747152765, "grad_norm": 0.31872251629829407, "learning_rate": 1.9879792864347704e-05, "loss": 0.5058, "step": 4755 }, { "epoch": 0.10086742592946067, "grad_norm": 0.3085978329181671, "learning_rate": 1.9879741305039503e-05, "loss": 0.5131, "step": 4756 }, { "epoch": 0.10088863438739369, "grad_norm": 0.3096526861190796, "learning_rate": 1.9879689734743143e-05, "loss": 0.5595, "step": 4757 }, { "epoch": 0.10090984284532671, "grad_norm": 0.32066935300827026, "learning_rate": 1.9879638153458676e-05, "loss": 0.5351, "step": 4758 }, { "epoch": 0.10093105130325974, "grad_norm": 0.319484680891037, "learning_rate": 1.987958656118616e-05, "loss": 0.4997, "step": 4759 }, { "epoch": 0.10095225976119276, "grad_norm": 0.8057374358177185, "learning_rate": 1.9879534957925655e-05, "loss": 0.5775, "step": 4760 }, { "epoch": 0.10097346821912578, "grad_norm": 0.3408023715019226, "learning_rate": 1.9879483343677217e-05, "loss": 0.5703, "step": 4761 }, { "epoch": 0.10099467667705882, "grad_norm": 0.35434359312057495, "learning_rate": 1.98794317184409e-05, "loss": 0.5462, "step": 4762 }, { "epoch": 0.10101588513499184, "grad_norm": 0.3651542067527771, "learning_rate": 1.987938008221677e-05, "loss": 0.556, "step": 4763 }, { "epoch": 0.10103709359292486, "grad_norm": 0.31989359855651855, "learning_rate": 1.9879328435004878e-05, "loss": 0.6621, "step": 4764 }, { "epoch": 0.10105830205085788, "grad_norm": 0.2918999493122101, "learning_rate": 1.987927677680528e-05, "loss": 0.4886, "step": 4765 }, { "epoch": 0.10107951050879091, "grad_norm": 0.3136514127254486, "learning_rate": 1.9879225107618036e-05, "loss": 0.5215, "step": 4766 }, { "epoch": 0.10110071896672393, "grad_norm": 0.33890819549560547, "learning_rate": 1.9879173427443207e-05, "loss": 0.5254, "step": 4767 }, { "epoch": 0.10112192742465695, "grad_norm": 0.3491463363170624, "learning_rate": 1.9879121736280847e-05, "loss": 0.5189, "step": 4768 }, { "epoch": 0.10114313588258998, "grad_norm": 0.31226807832717896, "learning_rate": 1.987907003413101e-05, "loss": 0.5065, "step": 4769 }, { "epoch": 0.101164344340523, "grad_norm": 0.33235064148902893, "learning_rate": 1.987901832099376e-05, "loss": 0.5483, "step": 4770 }, { "epoch": 0.10118555279845602, "grad_norm": 0.34939834475517273, "learning_rate": 1.987896659686915e-05, "loss": 0.4626, "step": 4771 }, { "epoch": 0.10120676125638904, "grad_norm": 0.3369830250740051, "learning_rate": 1.987891486175724e-05, "loss": 0.5021, "step": 4772 }, { "epoch": 0.10122796971432207, "grad_norm": 0.3213105797767639, "learning_rate": 1.987886311565809e-05, "loss": 0.549, "step": 4773 }, { "epoch": 0.1012491781722551, "grad_norm": 0.3310515582561493, "learning_rate": 1.9878811358571747e-05, "loss": 0.4717, "step": 4774 }, { "epoch": 0.10127038663018811, "grad_norm": 0.35905921459198, "learning_rate": 1.9878759590498284e-05, "loss": 0.5799, "step": 4775 }, { "epoch": 0.10129159508812115, "grad_norm": 0.3324420154094696, "learning_rate": 1.9878707811437742e-05, "loss": 0.5225, "step": 4776 }, { "epoch": 0.10131280354605417, "grad_norm": 0.28709638118743896, "learning_rate": 1.9878656021390194e-05, "loss": 0.5683, "step": 4777 }, { "epoch": 0.10133401200398719, "grad_norm": 0.29652297496795654, "learning_rate": 1.987860422035569e-05, "loss": 0.4647, "step": 4778 }, { "epoch": 0.10135522046192022, "grad_norm": 0.2936772108078003, "learning_rate": 1.9878552408334284e-05, "loss": 0.5464, "step": 4779 }, { "epoch": 0.10137642891985324, "grad_norm": 0.31105801463127136, "learning_rate": 1.987850058532604e-05, "loss": 0.5302, "step": 4780 }, { "epoch": 0.10139763737778626, "grad_norm": 0.365197092294693, "learning_rate": 1.987844875133101e-05, "loss": 0.5095, "step": 4781 }, { "epoch": 0.10141884583571928, "grad_norm": 0.3909362256526947, "learning_rate": 1.9878396906349264e-05, "loss": 0.5023, "step": 4782 }, { "epoch": 0.10144005429365231, "grad_norm": 0.3411078453063965, "learning_rate": 1.9878345050380844e-05, "loss": 0.4993, "step": 4783 }, { "epoch": 0.10146126275158533, "grad_norm": 0.3316592276096344, "learning_rate": 1.9878293183425815e-05, "loss": 0.5875, "step": 4784 }, { "epoch": 0.10148247120951835, "grad_norm": 0.3567280173301697, "learning_rate": 1.9878241305484236e-05, "loss": 0.5573, "step": 4785 }, { "epoch": 0.10150367966745139, "grad_norm": 0.3348449766635895, "learning_rate": 1.9878189416556162e-05, "loss": 0.4442, "step": 4786 }, { "epoch": 0.1015248881253844, "grad_norm": 0.3034769594669342, "learning_rate": 1.9878137516641647e-05, "loss": 0.5131, "step": 4787 }, { "epoch": 0.10154609658331742, "grad_norm": 0.344794899225235, "learning_rate": 1.987808560574076e-05, "loss": 0.5863, "step": 4788 }, { "epoch": 0.10156730504125044, "grad_norm": 0.2857883870601654, "learning_rate": 1.987803368385355e-05, "loss": 0.477, "step": 4789 }, { "epoch": 0.10158851349918348, "grad_norm": 0.297713041305542, "learning_rate": 1.9877981750980075e-05, "loss": 0.5747, "step": 4790 }, { "epoch": 0.1016097219571165, "grad_norm": 0.3060615360736847, "learning_rate": 1.9877929807120397e-05, "loss": 0.5128, "step": 4791 }, { "epoch": 0.10163093041504952, "grad_norm": 0.31836843490600586, "learning_rate": 1.987787785227457e-05, "loss": 0.5914, "step": 4792 }, { "epoch": 0.10165213887298255, "grad_norm": 0.32932931184768677, "learning_rate": 1.9877825886442656e-05, "loss": 0.5173, "step": 4793 }, { "epoch": 0.10167334733091557, "grad_norm": 0.3151474893093109, "learning_rate": 1.9877773909624705e-05, "loss": 0.5737, "step": 4794 }, { "epoch": 0.10169455578884859, "grad_norm": 0.3126355707645416, "learning_rate": 1.987772192182078e-05, "loss": 0.4505, "step": 4795 }, { "epoch": 0.10171576424678162, "grad_norm": 0.28346624970436096, "learning_rate": 1.9877669923030942e-05, "loss": 0.4863, "step": 4796 }, { "epoch": 0.10173697270471464, "grad_norm": 0.33252209424972534, "learning_rate": 1.9877617913255244e-05, "loss": 0.4924, "step": 4797 }, { "epoch": 0.10175818116264766, "grad_norm": 0.309929758310318, "learning_rate": 1.9877565892493743e-05, "loss": 0.4757, "step": 4798 }, { "epoch": 0.10177938962058068, "grad_norm": 0.34478652477264404, "learning_rate": 1.9877513860746502e-05, "loss": 0.566, "step": 4799 }, { "epoch": 0.10180059807851372, "grad_norm": 0.4221493899822235, "learning_rate": 1.9877461818013575e-05, "loss": 0.53, "step": 4800 }, { "epoch": 0.10182180653644674, "grad_norm": 0.29194721579551697, "learning_rate": 1.9877409764295018e-05, "loss": 0.5385, "step": 4801 }, { "epoch": 0.10184301499437975, "grad_norm": 0.32825955748558044, "learning_rate": 1.9877357699590894e-05, "loss": 0.4709, "step": 4802 }, { "epoch": 0.10186422345231279, "grad_norm": 0.3340986371040344, "learning_rate": 1.987730562390126e-05, "loss": 0.5121, "step": 4803 }, { "epoch": 0.10188543191024581, "grad_norm": 0.32285231351852417, "learning_rate": 1.987725353722617e-05, "loss": 0.5056, "step": 4804 }, { "epoch": 0.10190664036817883, "grad_norm": 0.3374912738800049, "learning_rate": 1.9877201439565684e-05, "loss": 0.6028, "step": 4805 }, { "epoch": 0.10192784882611185, "grad_norm": 0.3362709879875183, "learning_rate": 1.9877149330919866e-05, "loss": 0.6017, "step": 4806 }, { "epoch": 0.10194905728404488, "grad_norm": 0.31229469180107117, "learning_rate": 1.9877097211288762e-05, "loss": 0.5831, "step": 4807 }, { "epoch": 0.1019702657419779, "grad_norm": 0.35101965069770813, "learning_rate": 1.987704508067244e-05, "loss": 0.5513, "step": 4808 }, { "epoch": 0.10199147419991092, "grad_norm": 0.3307977020740509, "learning_rate": 1.9876992939070952e-05, "loss": 0.5131, "step": 4809 }, { "epoch": 0.10201268265784395, "grad_norm": 0.344899445772171, "learning_rate": 1.9876940786484358e-05, "loss": 0.6575, "step": 4810 }, { "epoch": 0.10203389111577697, "grad_norm": 0.3451317846775055, "learning_rate": 1.987688862291272e-05, "loss": 0.4904, "step": 4811 }, { "epoch": 0.10205509957370999, "grad_norm": 0.392956018447876, "learning_rate": 1.987683644835609e-05, "loss": 0.5933, "step": 4812 }, { "epoch": 0.10207630803164303, "grad_norm": 0.3374994397163391, "learning_rate": 1.9876784262814525e-05, "loss": 0.5582, "step": 4813 }, { "epoch": 0.10209751648957605, "grad_norm": 0.3216346204280853, "learning_rate": 1.987673206628809e-05, "loss": 0.5622, "step": 4814 }, { "epoch": 0.10211872494750907, "grad_norm": 0.3975948393344879, "learning_rate": 1.9876679858776837e-05, "loss": 0.6538, "step": 4815 }, { "epoch": 0.10213993340544208, "grad_norm": 0.35353395342826843, "learning_rate": 1.987662764028083e-05, "loss": 0.5398, "step": 4816 }, { "epoch": 0.10216114186337512, "grad_norm": 0.30971670150756836, "learning_rate": 1.987657541080012e-05, "loss": 0.5183, "step": 4817 }, { "epoch": 0.10218235032130814, "grad_norm": 0.2921152412891388, "learning_rate": 1.9876523170334773e-05, "loss": 0.5121, "step": 4818 }, { "epoch": 0.10220355877924116, "grad_norm": 0.3159913122653961, "learning_rate": 1.9876470918884838e-05, "loss": 0.5507, "step": 4819 }, { "epoch": 0.10222476723717419, "grad_norm": 0.3333420753479004, "learning_rate": 1.987641865645038e-05, "loss": 0.5355, "step": 4820 }, { "epoch": 0.10224597569510721, "grad_norm": 0.33442479372024536, "learning_rate": 1.9876366383031455e-05, "loss": 0.6505, "step": 4821 }, { "epoch": 0.10226718415304023, "grad_norm": 0.33735090494155884, "learning_rate": 1.987631409862812e-05, "loss": 0.5802, "step": 4822 }, { "epoch": 0.10228839261097325, "grad_norm": 0.44911712408065796, "learning_rate": 1.9876261803240434e-05, "loss": 0.4667, "step": 4823 }, { "epoch": 0.10230960106890628, "grad_norm": 0.3438829481601715, "learning_rate": 1.987620949686846e-05, "loss": 0.505, "step": 4824 }, { "epoch": 0.1023308095268393, "grad_norm": 0.3136049807071686, "learning_rate": 1.987615717951225e-05, "loss": 0.5188, "step": 4825 }, { "epoch": 0.10235201798477232, "grad_norm": 0.3375575840473175, "learning_rate": 1.9876104851171857e-05, "loss": 0.5012, "step": 4826 }, { "epoch": 0.10237322644270536, "grad_norm": 0.4015757739543915, "learning_rate": 1.9876052511847353e-05, "loss": 0.5343, "step": 4827 }, { "epoch": 0.10239443490063838, "grad_norm": 0.29875659942626953, "learning_rate": 1.9876000161538786e-05, "loss": 0.5699, "step": 4828 }, { "epoch": 0.1024156433585714, "grad_norm": 0.3381369113922119, "learning_rate": 1.9875947800246218e-05, "loss": 0.5202, "step": 4829 }, { "epoch": 0.10243685181650442, "grad_norm": 0.6289122700691223, "learning_rate": 1.987589542796971e-05, "loss": 0.5742, "step": 4830 }, { "epoch": 0.10245806027443745, "grad_norm": 0.30079978704452515, "learning_rate": 1.9875843044709314e-05, "loss": 0.4717, "step": 4831 }, { "epoch": 0.10247926873237047, "grad_norm": 0.34210410714149475, "learning_rate": 1.987579065046509e-05, "loss": 0.5828, "step": 4832 }, { "epoch": 0.10250047719030349, "grad_norm": 0.42304718494415283, "learning_rate": 1.9875738245237096e-05, "loss": 0.5522, "step": 4833 }, { "epoch": 0.10252168564823652, "grad_norm": 0.5053495168685913, "learning_rate": 1.98756858290254e-05, "loss": 0.5523, "step": 4834 }, { "epoch": 0.10254289410616954, "grad_norm": 0.33197420835494995, "learning_rate": 1.9875633401830042e-05, "loss": 0.4638, "step": 4835 }, { "epoch": 0.10256410256410256, "grad_norm": 0.39764848351478577, "learning_rate": 1.9875580963651093e-05, "loss": 0.5515, "step": 4836 }, { "epoch": 0.1025853110220356, "grad_norm": 0.3342152535915375, "learning_rate": 1.9875528514488615e-05, "loss": 0.5766, "step": 4837 }, { "epoch": 0.10260651947996861, "grad_norm": 0.4252210259437561, "learning_rate": 1.9875476054342653e-05, "loss": 0.5112, "step": 4838 }, { "epoch": 0.10262772793790163, "grad_norm": 0.3607790470123291, "learning_rate": 1.9875423583213273e-05, "loss": 0.5109, "step": 4839 }, { "epoch": 0.10264893639583465, "grad_norm": 0.3158769905567169, "learning_rate": 1.9875371101100535e-05, "loss": 0.596, "step": 4840 }, { "epoch": 0.10267014485376769, "grad_norm": 0.32496732473373413, "learning_rate": 1.9875318608004493e-05, "loss": 0.5, "step": 4841 }, { "epoch": 0.1026913533117007, "grad_norm": 0.34635642170906067, "learning_rate": 1.987526610392521e-05, "loss": 0.4453, "step": 4842 }, { "epoch": 0.10271256176963373, "grad_norm": 0.3380424976348877, "learning_rate": 1.987521358886274e-05, "loss": 0.549, "step": 4843 }, { "epoch": 0.10273377022756676, "grad_norm": 0.35251665115356445, "learning_rate": 1.9875161062817145e-05, "loss": 0.6155, "step": 4844 }, { "epoch": 0.10275497868549978, "grad_norm": 0.3378262221813202, "learning_rate": 1.987510852578848e-05, "loss": 0.4635, "step": 4845 }, { "epoch": 0.1027761871434328, "grad_norm": 0.36002060770988464, "learning_rate": 1.9875055977776805e-05, "loss": 0.6108, "step": 4846 }, { "epoch": 0.10279739560136582, "grad_norm": 0.302638977766037, "learning_rate": 1.987500341878218e-05, "loss": 0.5211, "step": 4847 }, { "epoch": 0.10281860405929885, "grad_norm": 0.34823572635650635, "learning_rate": 1.987495084880466e-05, "loss": 0.5821, "step": 4848 }, { "epoch": 0.10283981251723187, "grad_norm": 0.32837700843811035, "learning_rate": 1.9874898267844306e-05, "loss": 0.5529, "step": 4849 }, { "epoch": 0.10286102097516489, "grad_norm": 0.3542337715625763, "learning_rate": 1.9874845675901177e-05, "loss": 0.6017, "step": 4850 }, { "epoch": 0.10288222943309792, "grad_norm": 0.32835862040519714, "learning_rate": 1.987479307297533e-05, "loss": 0.537, "step": 4851 }, { "epoch": 0.10290343789103094, "grad_norm": 0.3100719153881073, "learning_rate": 1.9874740459066823e-05, "loss": 0.5522, "step": 4852 }, { "epoch": 0.10292464634896396, "grad_norm": 0.29192861914634705, "learning_rate": 1.9874687834175718e-05, "loss": 0.514, "step": 4853 }, { "epoch": 0.102945854806897, "grad_norm": 0.3395257890224457, "learning_rate": 1.987463519830207e-05, "loss": 0.446, "step": 4854 }, { "epoch": 0.10296706326483002, "grad_norm": 0.35960718989372253, "learning_rate": 1.987458255144594e-05, "loss": 0.5456, "step": 4855 }, { "epoch": 0.10298827172276304, "grad_norm": 0.3519428074359894, "learning_rate": 1.987452989360738e-05, "loss": 0.4928, "step": 4856 }, { "epoch": 0.10300948018069606, "grad_norm": 0.3367151618003845, "learning_rate": 1.9874477224786455e-05, "loss": 0.6181, "step": 4857 }, { "epoch": 0.10303068863862909, "grad_norm": 0.3754948377609253, "learning_rate": 1.9874424544983224e-05, "loss": 0.6228, "step": 4858 }, { "epoch": 0.10305189709656211, "grad_norm": 0.33620816469192505, "learning_rate": 1.9874371854197743e-05, "loss": 0.4939, "step": 4859 }, { "epoch": 0.10307310555449513, "grad_norm": 0.38827750086784363, "learning_rate": 1.9874319152430073e-05, "loss": 0.5449, "step": 4860 }, { "epoch": 0.10309431401242816, "grad_norm": 0.35374510288238525, "learning_rate": 1.987426643968027e-05, "loss": 0.5275, "step": 4861 }, { "epoch": 0.10311552247036118, "grad_norm": 0.32007262110710144, "learning_rate": 1.9874213715948394e-05, "loss": 0.4887, "step": 4862 }, { "epoch": 0.1031367309282942, "grad_norm": 0.35080352425575256, "learning_rate": 1.98741609812345e-05, "loss": 0.5297, "step": 4863 }, { "epoch": 0.10315793938622722, "grad_norm": 0.3223437964916229, "learning_rate": 1.9874108235538655e-05, "loss": 0.5239, "step": 4864 }, { "epoch": 0.10317914784416025, "grad_norm": 0.31108710169792175, "learning_rate": 1.987405547886091e-05, "loss": 0.5471, "step": 4865 }, { "epoch": 0.10320035630209327, "grad_norm": 0.3243173360824585, "learning_rate": 1.9874002711201326e-05, "loss": 0.5116, "step": 4866 }, { "epoch": 0.1032215647600263, "grad_norm": 0.3991662561893463, "learning_rate": 1.9873949932559965e-05, "loss": 0.5751, "step": 4867 }, { "epoch": 0.10324277321795933, "grad_norm": 0.32322025299072266, "learning_rate": 1.987389714293688e-05, "loss": 0.5597, "step": 4868 }, { "epoch": 0.10326398167589235, "grad_norm": 0.3560025095939636, "learning_rate": 1.9873844342332133e-05, "loss": 0.4711, "step": 4869 }, { "epoch": 0.10328519013382537, "grad_norm": 0.3511524200439453, "learning_rate": 1.9873791530745778e-05, "loss": 0.4919, "step": 4870 }, { "epoch": 0.1033063985917584, "grad_norm": 0.2835848033428192, "learning_rate": 1.9873738708177884e-05, "loss": 0.4657, "step": 4871 }, { "epoch": 0.10332760704969142, "grad_norm": 0.43762439489364624, "learning_rate": 1.98736858746285e-05, "loss": 0.4756, "step": 4872 }, { "epoch": 0.10334881550762444, "grad_norm": 0.33787283301353455, "learning_rate": 1.987363303009769e-05, "loss": 0.5037, "step": 4873 }, { "epoch": 0.10337002396555746, "grad_norm": 1.2443207502365112, "learning_rate": 1.987358017458551e-05, "loss": 0.6195, "step": 4874 }, { "epoch": 0.10339123242349049, "grad_norm": 0.34063032269477844, "learning_rate": 1.987352730809202e-05, "loss": 0.5491, "step": 4875 }, { "epoch": 0.10341244088142351, "grad_norm": 0.35227108001708984, "learning_rate": 1.987347443061728e-05, "loss": 0.5039, "step": 4876 }, { "epoch": 0.10343364933935653, "grad_norm": 0.33016178011894226, "learning_rate": 1.9873421542161347e-05, "loss": 0.5222, "step": 4877 }, { "epoch": 0.10345485779728956, "grad_norm": 0.3462058901786804, "learning_rate": 1.987336864272428e-05, "loss": 0.5637, "step": 4878 }, { "epoch": 0.10347606625522258, "grad_norm": 0.40650734305381775, "learning_rate": 1.987331573230614e-05, "loss": 0.5245, "step": 4879 }, { "epoch": 0.1034972747131556, "grad_norm": 0.34823501110076904, "learning_rate": 1.987326281090698e-05, "loss": 0.5366, "step": 4880 }, { "epoch": 0.10351848317108862, "grad_norm": 0.35458508133888245, "learning_rate": 1.9873209878526865e-05, "loss": 0.455, "step": 4881 }, { "epoch": 0.10353969162902166, "grad_norm": 0.30394816398620605, "learning_rate": 1.9873156935165852e-05, "loss": 0.5551, "step": 4882 }, { "epoch": 0.10356090008695468, "grad_norm": 0.31599748134613037, "learning_rate": 1.9873103980824e-05, "loss": 0.4485, "step": 4883 }, { "epoch": 0.1035821085448877, "grad_norm": 0.3318248391151428, "learning_rate": 1.9873051015501364e-05, "loss": 0.4914, "step": 4884 }, { "epoch": 0.10360331700282073, "grad_norm": 0.29527509212493896, "learning_rate": 1.9872998039198014e-05, "loss": 0.5438, "step": 4885 }, { "epoch": 0.10362452546075375, "grad_norm": 0.34318941831588745, "learning_rate": 1.9872945051913996e-05, "loss": 0.5713, "step": 4886 }, { "epoch": 0.10364573391868677, "grad_norm": 0.37542077898979187, "learning_rate": 1.987289205364937e-05, "loss": 0.5812, "step": 4887 }, { "epoch": 0.1036669423766198, "grad_norm": 0.47580066323280334, "learning_rate": 1.9872839044404205e-05, "loss": 0.5119, "step": 4888 }, { "epoch": 0.10368815083455282, "grad_norm": 0.3487319052219391, "learning_rate": 1.9872786024178558e-05, "loss": 0.5057, "step": 4889 }, { "epoch": 0.10370935929248584, "grad_norm": 0.37866559624671936, "learning_rate": 1.9872732992972477e-05, "loss": 0.5798, "step": 4890 }, { "epoch": 0.10373056775041886, "grad_norm": 0.3221296966075897, "learning_rate": 1.987267995078603e-05, "loss": 0.5208, "step": 4891 }, { "epoch": 0.1037517762083519, "grad_norm": 0.33678826689720154, "learning_rate": 1.9872626897619277e-05, "loss": 0.5325, "step": 4892 }, { "epoch": 0.10377298466628491, "grad_norm": 0.32211121916770935, "learning_rate": 1.987257383347227e-05, "loss": 0.4962, "step": 4893 }, { "epoch": 0.10379419312421793, "grad_norm": 0.2905695140361786, "learning_rate": 1.9872520758345075e-05, "loss": 0.4928, "step": 4894 }, { "epoch": 0.10381540158215097, "grad_norm": 0.3263687491416931, "learning_rate": 1.9872467672237747e-05, "loss": 0.4937, "step": 4895 }, { "epoch": 0.10383661004008399, "grad_norm": 0.31785041093826294, "learning_rate": 1.9872414575150345e-05, "loss": 0.5762, "step": 4896 }, { "epoch": 0.10385781849801701, "grad_norm": 0.5250889658927917, "learning_rate": 1.9872361467082932e-05, "loss": 0.533, "step": 4897 }, { "epoch": 0.10387902695595003, "grad_norm": 0.2836602032184601, "learning_rate": 1.9872308348035563e-05, "loss": 0.4473, "step": 4898 }, { "epoch": 0.10390023541388306, "grad_norm": 0.2896232604980469, "learning_rate": 1.98722552180083e-05, "loss": 0.4792, "step": 4899 }, { "epoch": 0.10392144387181608, "grad_norm": 0.4176860749721527, "learning_rate": 1.98722020770012e-05, "loss": 0.5742, "step": 4900 }, { "epoch": 0.1039426523297491, "grad_norm": 0.3366117477416992, "learning_rate": 1.9872148925014318e-05, "loss": 0.5556, "step": 4901 }, { "epoch": 0.10396386078768213, "grad_norm": 0.3381725549697876, "learning_rate": 1.987209576204772e-05, "loss": 0.5709, "step": 4902 }, { "epoch": 0.10398506924561515, "grad_norm": 0.34647655487060547, "learning_rate": 1.9872042588101464e-05, "loss": 0.5723, "step": 4903 }, { "epoch": 0.10400627770354817, "grad_norm": 0.3399573564529419, "learning_rate": 1.987198940317561e-05, "loss": 0.5166, "step": 4904 }, { "epoch": 0.10402748616148119, "grad_norm": 0.3232472240924835, "learning_rate": 1.9871936207270216e-05, "loss": 0.5036, "step": 4905 }, { "epoch": 0.10404869461941423, "grad_norm": 0.34397831559181213, "learning_rate": 1.9871883000385334e-05, "loss": 0.5991, "step": 4906 }, { "epoch": 0.10406990307734724, "grad_norm": 0.345439612865448, "learning_rate": 1.9871829782521035e-05, "loss": 0.5335, "step": 4907 }, { "epoch": 0.10409111153528026, "grad_norm": 0.30029812455177307, "learning_rate": 1.987177655367737e-05, "loss": 0.526, "step": 4908 }, { "epoch": 0.1041123199932133, "grad_norm": 0.3249521553516388, "learning_rate": 1.9871723313854402e-05, "loss": 0.5457, "step": 4909 }, { "epoch": 0.10413352845114632, "grad_norm": 0.3753929138183594, "learning_rate": 1.987167006305219e-05, "loss": 0.6197, "step": 4910 }, { "epoch": 0.10415473690907934, "grad_norm": 0.3134445548057556, "learning_rate": 1.987161680127079e-05, "loss": 0.5042, "step": 4911 }, { "epoch": 0.10417594536701237, "grad_norm": 0.3590386211872101, "learning_rate": 1.9871563528510266e-05, "loss": 0.5122, "step": 4912 }, { "epoch": 0.10419715382494539, "grad_norm": 0.3268589973449707, "learning_rate": 1.987151024477067e-05, "loss": 0.4533, "step": 4913 }, { "epoch": 0.10421836228287841, "grad_norm": 0.3545893430709839, "learning_rate": 1.9871456950052073e-05, "loss": 0.6485, "step": 4914 }, { "epoch": 0.10423957074081143, "grad_norm": 0.3118310570716858, "learning_rate": 1.9871403644354523e-05, "loss": 0.551, "step": 4915 }, { "epoch": 0.10426077919874446, "grad_norm": 0.34830114245414734, "learning_rate": 1.9871350327678085e-05, "loss": 0.5685, "step": 4916 }, { "epoch": 0.10428198765667748, "grad_norm": 0.3739362359046936, "learning_rate": 1.9871297000022815e-05, "loss": 0.6651, "step": 4917 }, { "epoch": 0.1043031961146105, "grad_norm": 0.3268127739429474, "learning_rate": 1.9871243661388778e-05, "loss": 0.5027, "step": 4918 }, { "epoch": 0.10432440457254354, "grad_norm": 0.3062172830104828, "learning_rate": 1.9871190311776026e-05, "loss": 0.5355, "step": 4919 }, { "epoch": 0.10434561303047656, "grad_norm": 0.6152536869049072, "learning_rate": 1.9871136951184622e-05, "loss": 0.4791, "step": 4920 }, { "epoch": 0.10436682148840958, "grad_norm": 0.32612940669059753, "learning_rate": 1.9871083579614624e-05, "loss": 0.4837, "step": 4921 }, { "epoch": 0.1043880299463426, "grad_norm": 0.3188636898994446, "learning_rate": 1.9871030197066097e-05, "loss": 0.4583, "step": 4922 }, { "epoch": 0.10440923840427563, "grad_norm": 0.3304890990257263, "learning_rate": 1.9870976803539094e-05, "loss": 0.5924, "step": 4923 }, { "epoch": 0.10443044686220865, "grad_norm": 0.31664106249809265, "learning_rate": 1.9870923399033673e-05, "loss": 0.5327, "step": 4924 }, { "epoch": 0.10445165532014167, "grad_norm": 0.32941651344299316, "learning_rate": 1.9870869983549903e-05, "loss": 0.5453, "step": 4925 }, { "epoch": 0.1044728637780747, "grad_norm": 0.3371448814868927, "learning_rate": 1.9870816557087833e-05, "loss": 0.4958, "step": 4926 }, { "epoch": 0.10449407223600772, "grad_norm": 0.3289448916912079, "learning_rate": 1.987076311964753e-05, "loss": 0.4559, "step": 4927 }, { "epoch": 0.10451528069394074, "grad_norm": 0.35639166831970215, "learning_rate": 1.9870709671229047e-05, "loss": 0.5272, "step": 4928 }, { "epoch": 0.10453648915187377, "grad_norm": 0.38981565833091736, "learning_rate": 1.9870656211832446e-05, "loss": 0.545, "step": 4929 }, { "epoch": 0.1045576976098068, "grad_norm": 0.31874579191207886, "learning_rate": 1.9870602741457786e-05, "loss": 0.5736, "step": 4930 }, { "epoch": 0.10457890606773981, "grad_norm": 0.4341548979282379, "learning_rate": 1.9870549260105132e-05, "loss": 0.5678, "step": 4931 }, { "epoch": 0.10460011452567283, "grad_norm": 0.35604575276374817, "learning_rate": 1.9870495767774535e-05, "loss": 0.5331, "step": 4932 }, { "epoch": 0.10462132298360587, "grad_norm": 0.5751118063926697, "learning_rate": 1.987044226446606e-05, "loss": 0.524, "step": 4933 }, { "epoch": 0.10464253144153889, "grad_norm": 0.33798304200172424, "learning_rate": 1.9870388750179767e-05, "loss": 0.5027, "step": 4934 }, { "epoch": 0.1046637398994719, "grad_norm": 0.35563239455223083, "learning_rate": 1.987033522491571e-05, "loss": 0.5634, "step": 4935 }, { "epoch": 0.10468494835740494, "grad_norm": 0.3318108320236206, "learning_rate": 1.987028168867395e-05, "loss": 0.5366, "step": 4936 }, { "epoch": 0.10470615681533796, "grad_norm": 0.393306165933609, "learning_rate": 1.987022814145455e-05, "loss": 0.5963, "step": 4937 }, { "epoch": 0.10472736527327098, "grad_norm": 0.31514880061149597, "learning_rate": 1.9870174583257574e-05, "loss": 0.506, "step": 4938 }, { "epoch": 0.104748573731204, "grad_norm": 0.3915211260318756, "learning_rate": 1.9870121014083067e-05, "loss": 0.5711, "step": 4939 }, { "epoch": 0.10476978218913703, "grad_norm": 0.3581075668334961, "learning_rate": 1.9870067433931104e-05, "loss": 0.6858, "step": 4940 }, { "epoch": 0.10479099064707005, "grad_norm": 0.3025328516960144, "learning_rate": 1.9870013842801733e-05, "loss": 0.4784, "step": 4941 }, { "epoch": 0.10481219910500307, "grad_norm": 0.337475061416626, "learning_rate": 1.9869960240695022e-05, "loss": 0.5309, "step": 4942 }, { "epoch": 0.1048334075629361, "grad_norm": 0.34338438510894775, "learning_rate": 1.986990662761102e-05, "loss": 0.5924, "step": 4943 }, { "epoch": 0.10485461602086912, "grad_norm": 0.37871474027633667, "learning_rate": 1.9869853003549802e-05, "loss": 0.5575, "step": 4944 }, { "epoch": 0.10487582447880214, "grad_norm": 0.3265075385570526, "learning_rate": 1.9869799368511416e-05, "loss": 0.5368, "step": 4945 }, { "epoch": 0.10489703293673518, "grad_norm": 0.4316108226776123, "learning_rate": 1.9869745722495923e-05, "loss": 0.5881, "step": 4946 }, { "epoch": 0.1049182413946682, "grad_norm": 0.36257004737854004, "learning_rate": 1.9869692065503387e-05, "loss": 0.4932, "step": 4947 }, { "epoch": 0.10493944985260122, "grad_norm": 0.328954815864563, "learning_rate": 1.9869638397533864e-05, "loss": 0.5712, "step": 4948 }, { "epoch": 0.10496065831053424, "grad_norm": 0.34242767095565796, "learning_rate": 1.9869584718587414e-05, "loss": 0.5444, "step": 4949 }, { "epoch": 0.10498186676846727, "grad_norm": 0.31134670972824097, "learning_rate": 1.98695310286641e-05, "loss": 0.4631, "step": 4950 }, { "epoch": 0.10500307522640029, "grad_norm": 0.32797789573669434, "learning_rate": 1.986947732776398e-05, "loss": 0.5525, "step": 4951 }, { "epoch": 0.10502428368433331, "grad_norm": 0.35191449522972107, "learning_rate": 1.986942361588711e-05, "loss": 0.5433, "step": 4952 }, { "epoch": 0.10504549214226634, "grad_norm": 0.40776851773262024, "learning_rate": 1.9869369893033553e-05, "loss": 0.5452, "step": 4953 }, { "epoch": 0.10506670060019936, "grad_norm": 0.3447471559047699, "learning_rate": 1.986931615920337e-05, "loss": 0.5274, "step": 4954 }, { "epoch": 0.10508790905813238, "grad_norm": 0.4365485906600952, "learning_rate": 1.986926241439662e-05, "loss": 0.5288, "step": 4955 }, { "epoch": 0.1051091175160654, "grad_norm": 0.3895895779132843, "learning_rate": 1.9869208658613363e-05, "loss": 0.5707, "step": 4956 }, { "epoch": 0.10513032597399843, "grad_norm": 0.3624267578125, "learning_rate": 1.9869154891853655e-05, "loss": 0.5835, "step": 4957 }, { "epoch": 0.10515153443193145, "grad_norm": 0.3220037519931793, "learning_rate": 1.986910111411756e-05, "loss": 0.5406, "step": 4958 }, { "epoch": 0.10517274288986447, "grad_norm": 0.30801481008529663, "learning_rate": 1.9869047325405136e-05, "loss": 0.5532, "step": 4959 }, { "epoch": 0.1051939513477975, "grad_norm": 0.3861192464828491, "learning_rate": 1.9868993525716443e-05, "loss": 0.5884, "step": 4960 }, { "epoch": 0.10521515980573053, "grad_norm": 0.3329497277736664, "learning_rate": 1.9868939715051542e-05, "loss": 0.5847, "step": 4961 }, { "epoch": 0.10523636826366355, "grad_norm": 0.32109296321868896, "learning_rate": 1.986888589341049e-05, "loss": 0.4808, "step": 4962 }, { "epoch": 0.10525757672159658, "grad_norm": 0.36402666568756104, "learning_rate": 1.986883206079335e-05, "loss": 0.5892, "step": 4963 }, { "epoch": 0.1052787851795296, "grad_norm": 0.35429784655570984, "learning_rate": 1.986877821720018e-05, "loss": 0.5444, "step": 4964 }, { "epoch": 0.10529999363746262, "grad_norm": 0.3445490002632141, "learning_rate": 1.9868724362631045e-05, "loss": 0.5708, "step": 4965 }, { "epoch": 0.10532120209539564, "grad_norm": 0.35178864002227783, "learning_rate": 1.9868670497085994e-05, "loss": 0.6455, "step": 4966 }, { "epoch": 0.10534241055332867, "grad_norm": 0.3463272452354431, "learning_rate": 1.9868616620565098e-05, "loss": 0.541, "step": 4967 }, { "epoch": 0.10536361901126169, "grad_norm": 0.3137154281139374, "learning_rate": 1.986856273306841e-05, "loss": 0.5242, "step": 4968 }, { "epoch": 0.10538482746919471, "grad_norm": 0.36592331528663635, "learning_rate": 1.9868508834595993e-05, "loss": 0.514, "step": 4969 }, { "epoch": 0.10540603592712774, "grad_norm": 0.3521188795566559, "learning_rate": 1.9868454925147907e-05, "loss": 0.4964, "step": 4970 }, { "epoch": 0.10542724438506076, "grad_norm": 0.3056684732437134, "learning_rate": 1.986840100472421e-05, "loss": 0.5444, "step": 4971 }, { "epoch": 0.10544845284299378, "grad_norm": 0.3149590790271759, "learning_rate": 1.986834707332496e-05, "loss": 0.495, "step": 4972 }, { "epoch": 0.1054696613009268, "grad_norm": 0.32442420721054077, "learning_rate": 1.9868293130950228e-05, "loss": 0.5358, "step": 4973 }, { "epoch": 0.10549086975885984, "grad_norm": 0.2985241115093231, "learning_rate": 1.9868239177600062e-05, "loss": 0.4805, "step": 4974 }, { "epoch": 0.10551207821679286, "grad_norm": 0.38540345430374146, "learning_rate": 1.9868185213274524e-05, "loss": 0.5049, "step": 4975 }, { "epoch": 0.10553328667472588, "grad_norm": 0.34600645303726196, "learning_rate": 1.986813123797368e-05, "loss": 0.5688, "step": 4976 }, { "epoch": 0.10555449513265891, "grad_norm": 0.35304203629493713, "learning_rate": 1.9868077251697583e-05, "loss": 0.6238, "step": 4977 }, { "epoch": 0.10557570359059193, "grad_norm": 0.3647240698337555, "learning_rate": 1.98680232544463e-05, "loss": 0.5199, "step": 4978 }, { "epoch": 0.10559691204852495, "grad_norm": 0.3521049916744232, "learning_rate": 1.9867969246219883e-05, "loss": 0.5391, "step": 4979 }, { "epoch": 0.10561812050645797, "grad_norm": 0.36736008524894714, "learning_rate": 1.98679152270184e-05, "loss": 0.6018, "step": 4980 }, { "epoch": 0.105639328964391, "grad_norm": 0.4301984906196594, "learning_rate": 1.9867861196841902e-05, "loss": 0.5443, "step": 4981 }, { "epoch": 0.10566053742232402, "grad_norm": 0.4488684833049774, "learning_rate": 1.986780715569046e-05, "loss": 0.501, "step": 4982 }, { "epoch": 0.10568174588025704, "grad_norm": 0.34257203340530396, "learning_rate": 1.9867753103564124e-05, "loss": 0.5076, "step": 4983 }, { "epoch": 0.10570295433819007, "grad_norm": 0.31381767988204956, "learning_rate": 1.9867699040462964e-05, "loss": 0.5418, "step": 4984 }, { "epoch": 0.1057241627961231, "grad_norm": 0.3553372323513031, "learning_rate": 1.986764496638703e-05, "loss": 0.4799, "step": 4985 }, { "epoch": 0.10574537125405611, "grad_norm": 0.30996209383010864, "learning_rate": 1.9867590881336388e-05, "loss": 0.5628, "step": 4986 }, { "epoch": 0.10576657971198915, "grad_norm": 0.3096071183681488, "learning_rate": 1.98675367853111e-05, "loss": 0.4216, "step": 4987 }, { "epoch": 0.10578778816992217, "grad_norm": 0.30526793003082275, "learning_rate": 1.9867482678311224e-05, "loss": 0.451, "step": 4988 }, { "epoch": 0.10580899662785519, "grad_norm": 0.3232988119125366, "learning_rate": 1.9867428560336813e-05, "loss": 0.5473, "step": 4989 }, { "epoch": 0.1058302050857882, "grad_norm": 0.3476572632789612, "learning_rate": 1.9867374431387942e-05, "loss": 0.4871, "step": 4990 }, { "epoch": 0.10585141354372124, "grad_norm": 0.3366141617298126, "learning_rate": 1.9867320291464657e-05, "loss": 0.4938, "step": 4991 }, { "epoch": 0.10587262200165426, "grad_norm": 0.3147006332874298, "learning_rate": 1.9867266140567024e-05, "loss": 0.5494, "step": 4992 }, { "epoch": 0.10589383045958728, "grad_norm": 0.3333834111690521, "learning_rate": 1.9867211978695103e-05, "loss": 0.5607, "step": 4993 }, { "epoch": 0.10591503891752031, "grad_norm": 0.3831862211227417, "learning_rate": 1.986715780584896e-05, "loss": 0.5501, "step": 4994 }, { "epoch": 0.10593624737545333, "grad_norm": 0.3445526361465454, "learning_rate": 1.9867103622028646e-05, "loss": 0.4673, "step": 4995 }, { "epoch": 0.10595745583338635, "grad_norm": 0.3446250855922699, "learning_rate": 1.9867049427234225e-05, "loss": 0.4715, "step": 4996 }, { "epoch": 0.10597866429131937, "grad_norm": 0.3271624743938446, "learning_rate": 1.9866995221465757e-05, "loss": 0.5151, "step": 4997 }, { "epoch": 0.1059998727492524, "grad_norm": 0.31980255246162415, "learning_rate": 1.9866941004723306e-05, "loss": 0.5645, "step": 4998 }, { "epoch": 0.10602108120718542, "grad_norm": 0.32752975821495056, "learning_rate": 1.9866886777006924e-05, "loss": 0.5379, "step": 4999 }, { "epoch": 0.10604228966511844, "grad_norm": 0.3325035572052002, "learning_rate": 1.986683253831668e-05, "loss": 0.523, "step": 5000 }, { "epoch": 0.10606349812305148, "grad_norm": 0.30366548895835876, "learning_rate": 1.9866778288652627e-05, "loss": 0.4096, "step": 5001 }, { "epoch": 0.1060847065809845, "grad_norm": 0.3694078028202057, "learning_rate": 1.9866724028014832e-05, "loss": 0.5841, "step": 5002 }, { "epoch": 0.10610591503891752, "grad_norm": 0.3631089925765991, "learning_rate": 1.9866669756403352e-05, "loss": 0.5507, "step": 5003 }, { "epoch": 0.10612712349685055, "grad_norm": 0.3252786099910736, "learning_rate": 1.986661547381825e-05, "loss": 0.5135, "step": 5004 }, { "epoch": 0.10614833195478357, "grad_norm": 0.3063773214817047, "learning_rate": 1.9866561180259576e-05, "loss": 0.4487, "step": 5005 }, { "epoch": 0.10616954041271659, "grad_norm": 0.36573705077171326, "learning_rate": 1.98665068757274e-05, "loss": 0.5415, "step": 5006 }, { "epoch": 0.10619074887064961, "grad_norm": 0.2952653765678406, "learning_rate": 1.986645256022179e-05, "loss": 0.4688, "step": 5007 }, { "epoch": 0.10621195732858264, "grad_norm": 0.3623667359352112, "learning_rate": 1.986639823374279e-05, "loss": 0.5185, "step": 5008 }, { "epoch": 0.10623316578651566, "grad_norm": 0.38026437163352966, "learning_rate": 1.986634389629047e-05, "loss": 0.5398, "step": 5009 }, { "epoch": 0.10625437424444868, "grad_norm": 0.3378385305404663, "learning_rate": 1.9866289547864884e-05, "loss": 0.6095, "step": 5010 }, { "epoch": 0.10627558270238172, "grad_norm": 0.31561487913131714, "learning_rate": 1.98662351884661e-05, "loss": 0.5654, "step": 5011 }, { "epoch": 0.10629679116031474, "grad_norm": 0.3300575315952301, "learning_rate": 1.9866180818094173e-05, "loss": 0.5703, "step": 5012 }, { "epoch": 0.10631799961824775, "grad_norm": 0.3483789265155792, "learning_rate": 1.986612643674917e-05, "loss": 0.4583, "step": 5013 }, { "epoch": 0.10633920807618077, "grad_norm": 0.35143348574638367, "learning_rate": 1.9866072044431143e-05, "loss": 0.5871, "step": 5014 }, { "epoch": 0.10636041653411381, "grad_norm": 0.3422878682613373, "learning_rate": 1.9866017641140155e-05, "loss": 0.5778, "step": 5015 }, { "epoch": 0.10638162499204683, "grad_norm": 0.3419683277606964, "learning_rate": 1.9865963226876272e-05, "loss": 0.538, "step": 5016 }, { "epoch": 0.10640283344997985, "grad_norm": 0.3061589002609253, "learning_rate": 1.9865908801639548e-05, "loss": 0.4927, "step": 5017 }, { "epoch": 0.10642404190791288, "grad_norm": 0.35814300179481506, "learning_rate": 1.9865854365430044e-05, "loss": 0.5945, "step": 5018 }, { "epoch": 0.1064452503658459, "grad_norm": 0.3058653473854065, "learning_rate": 1.9865799918247824e-05, "loss": 0.52, "step": 5019 }, { "epoch": 0.10646645882377892, "grad_norm": 0.4249992370605469, "learning_rate": 1.986574546009295e-05, "loss": 0.5354, "step": 5020 }, { "epoch": 0.10648766728171195, "grad_norm": 0.332520067691803, "learning_rate": 1.9865690990965473e-05, "loss": 0.4787, "step": 5021 }, { "epoch": 0.10650887573964497, "grad_norm": 0.37778010964393616, "learning_rate": 1.9865636510865466e-05, "loss": 0.5063, "step": 5022 }, { "epoch": 0.10653008419757799, "grad_norm": 0.37229788303375244, "learning_rate": 1.9865582019792984e-05, "loss": 0.5279, "step": 5023 }, { "epoch": 0.10655129265551101, "grad_norm": 0.486463338136673, "learning_rate": 1.9865527517748085e-05, "loss": 0.5495, "step": 5024 }, { "epoch": 0.10657250111344405, "grad_norm": 0.4942179322242737, "learning_rate": 1.9865473004730832e-05, "loss": 0.6209, "step": 5025 }, { "epoch": 0.10659370957137707, "grad_norm": 0.31352412700653076, "learning_rate": 1.9865418480741288e-05, "loss": 0.4883, "step": 5026 }, { "epoch": 0.10661491802931009, "grad_norm": 0.3455331027507782, "learning_rate": 1.9865363945779506e-05, "loss": 0.6384, "step": 5027 }, { "epoch": 0.10663612648724312, "grad_norm": 0.4149295687675476, "learning_rate": 1.9865309399845558e-05, "loss": 0.5299, "step": 5028 }, { "epoch": 0.10665733494517614, "grad_norm": 0.3013952374458313, "learning_rate": 1.9865254842939493e-05, "loss": 0.4779, "step": 5029 }, { "epoch": 0.10667854340310916, "grad_norm": 0.3525279760360718, "learning_rate": 1.986520027506138e-05, "loss": 0.5777, "step": 5030 }, { "epoch": 0.10669975186104218, "grad_norm": 0.3500728905200958, "learning_rate": 1.9865145696211277e-05, "loss": 0.5769, "step": 5031 }, { "epoch": 0.10672096031897521, "grad_norm": 0.3403029441833496, "learning_rate": 1.986509110638925e-05, "loss": 0.6076, "step": 5032 }, { "epoch": 0.10674216877690823, "grad_norm": 0.3244627118110657, "learning_rate": 1.9865036505595347e-05, "loss": 0.6237, "step": 5033 }, { "epoch": 0.10676337723484125, "grad_norm": 0.3524913489818573, "learning_rate": 1.986498189382964e-05, "loss": 0.4983, "step": 5034 }, { "epoch": 0.10678458569277428, "grad_norm": 0.4009362459182739, "learning_rate": 1.986492727109218e-05, "loss": 0.5305, "step": 5035 }, { "epoch": 0.1068057941507073, "grad_norm": 0.3216380178928375, "learning_rate": 1.986487263738304e-05, "loss": 0.4784, "step": 5036 }, { "epoch": 0.10682700260864032, "grad_norm": 0.34145376086235046, "learning_rate": 1.986481799270227e-05, "loss": 0.5899, "step": 5037 }, { "epoch": 0.10684821106657334, "grad_norm": 0.7992262244224548, "learning_rate": 1.9864763337049936e-05, "loss": 0.5421, "step": 5038 }, { "epoch": 0.10686941952450638, "grad_norm": 0.33569806814193726, "learning_rate": 1.98647086704261e-05, "loss": 0.5038, "step": 5039 }, { "epoch": 0.1068906279824394, "grad_norm": 0.3655130863189697, "learning_rate": 1.9864653992830822e-05, "loss": 0.5694, "step": 5040 }, { "epoch": 0.10691183644037242, "grad_norm": 0.3492888808250427, "learning_rate": 1.986459930426416e-05, "loss": 0.537, "step": 5041 }, { "epoch": 0.10693304489830545, "grad_norm": 0.30639275908470154, "learning_rate": 1.9864544604726172e-05, "loss": 0.523, "step": 5042 }, { "epoch": 0.10695425335623847, "grad_norm": 0.35953205823898315, "learning_rate": 1.986448989421693e-05, "loss": 0.5487, "step": 5043 }, { "epoch": 0.10697546181417149, "grad_norm": 0.3548646569252014, "learning_rate": 1.9864435172736484e-05, "loss": 0.6354, "step": 5044 }, { "epoch": 0.10699667027210452, "grad_norm": 0.2987287938594818, "learning_rate": 1.98643804402849e-05, "loss": 0.4946, "step": 5045 }, { "epoch": 0.10701787873003754, "grad_norm": 0.3167459964752197, "learning_rate": 1.9864325696862236e-05, "loss": 0.4184, "step": 5046 }, { "epoch": 0.10703908718797056, "grad_norm": 0.33542877435684204, "learning_rate": 1.9864270942468556e-05, "loss": 0.5043, "step": 5047 }, { "epoch": 0.10706029564590358, "grad_norm": 0.30871671438217163, "learning_rate": 1.9864216177103923e-05, "loss": 0.5028, "step": 5048 }, { "epoch": 0.10708150410383661, "grad_norm": 0.36147528886795044, "learning_rate": 1.986416140076839e-05, "loss": 0.5828, "step": 5049 }, { "epoch": 0.10710271256176963, "grad_norm": 0.32962465286254883, "learning_rate": 1.986410661346202e-05, "loss": 0.5428, "step": 5050 }, { "epoch": 0.10712392101970265, "grad_norm": 0.30616089701652527, "learning_rate": 1.9864051815184882e-05, "loss": 0.5917, "step": 5051 }, { "epoch": 0.10714512947763569, "grad_norm": 0.32171472907066345, "learning_rate": 1.986399700593703e-05, "loss": 0.5474, "step": 5052 }, { "epoch": 0.1071663379355687, "grad_norm": 0.347522109746933, "learning_rate": 1.9863942185718526e-05, "loss": 0.5364, "step": 5053 }, { "epoch": 0.10718754639350173, "grad_norm": 0.3211832046508789, "learning_rate": 1.986388735452943e-05, "loss": 0.4918, "step": 5054 }, { "epoch": 0.10720875485143475, "grad_norm": 0.3820374011993408, "learning_rate": 1.9863832512369805e-05, "loss": 0.5944, "step": 5055 }, { "epoch": 0.10722996330936778, "grad_norm": 0.3823583126068115, "learning_rate": 1.986377765923971e-05, "loss": 0.5409, "step": 5056 }, { "epoch": 0.1072511717673008, "grad_norm": 0.334909588098526, "learning_rate": 1.9863722795139207e-05, "loss": 0.6124, "step": 5057 }, { "epoch": 0.10727238022523382, "grad_norm": 0.3506239652633667, "learning_rate": 1.986366792006836e-05, "loss": 0.5044, "step": 5058 }, { "epoch": 0.10729358868316685, "grad_norm": 0.3479996919631958, "learning_rate": 1.9863613034027224e-05, "loss": 0.5554, "step": 5059 }, { "epoch": 0.10731479714109987, "grad_norm": 0.380288302898407, "learning_rate": 1.9863558137015866e-05, "loss": 0.5138, "step": 5060 }, { "epoch": 0.10733600559903289, "grad_norm": 0.35828906297683716, "learning_rate": 1.9863503229034343e-05, "loss": 0.54, "step": 5061 }, { "epoch": 0.10735721405696592, "grad_norm": 0.33695101737976074, "learning_rate": 1.9863448310082717e-05, "loss": 0.457, "step": 5062 }, { "epoch": 0.10737842251489894, "grad_norm": 0.3839670717716217, "learning_rate": 1.9863393380161053e-05, "loss": 0.6148, "step": 5063 }, { "epoch": 0.10739963097283196, "grad_norm": 0.3421170711517334, "learning_rate": 1.9863338439269403e-05, "loss": 0.4924, "step": 5064 }, { "epoch": 0.10742083943076498, "grad_norm": 0.3115857243537903, "learning_rate": 1.9863283487407837e-05, "loss": 0.5654, "step": 5065 }, { "epoch": 0.10744204788869802, "grad_norm": 0.3202979862689972, "learning_rate": 1.986322852457641e-05, "loss": 0.5508, "step": 5066 }, { "epoch": 0.10746325634663104, "grad_norm": 0.37381330132484436, "learning_rate": 1.9863173550775187e-05, "loss": 0.5706, "step": 5067 }, { "epoch": 0.10748446480456406, "grad_norm": 0.3247605264186859, "learning_rate": 1.9863118566004233e-05, "loss": 0.5643, "step": 5068 }, { "epoch": 0.10750567326249709, "grad_norm": 0.403357595205307, "learning_rate": 1.98630635702636e-05, "loss": 0.5039, "step": 5069 }, { "epoch": 0.10752688172043011, "grad_norm": 0.3512684106826782, "learning_rate": 1.9863008563553352e-05, "loss": 0.5434, "step": 5070 }, { "epoch": 0.10754809017836313, "grad_norm": 0.34956371784210205, "learning_rate": 1.9862953545873555e-05, "loss": 0.4918, "step": 5071 }, { "epoch": 0.10756929863629615, "grad_norm": 0.30493786931037903, "learning_rate": 1.9862898517224264e-05, "loss": 0.4799, "step": 5072 }, { "epoch": 0.10759050709422918, "grad_norm": 0.5087894201278687, "learning_rate": 1.986284347760554e-05, "loss": 0.5505, "step": 5073 }, { "epoch": 0.1076117155521622, "grad_norm": 0.3133924603462219, "learning_rate": 1.9862788427017453e-05, "loss": 0.5145, "step": 5074 }, { "epoch": 0.10763292401009522, "grad_norm": 0.33668896555900574, "learning_rate": 1.9862733365460055e-05, "loss": 0.5202, "step": 5075 }, { "epoch": 0.10765413246802825, "grad_norm": 0.3658277094364166, "learning_rate": 1.9862678292933414e-05, "loss": 0.5112, "step": 5076 }, { "epoch": 0.10767534092596127, "grad_norm": 0.34248796105384827, "learning_rate": 1.9862623209437585e-05, "loss": 0.6463, "step": 5077 }, { "epoch": 0.1076965493838943, "grad_norm": 0.2918865978717804, "learning_rate": 1.9862568114972635e-05, "loss": 0.4773, "step": 5078 }, { "epoch": 0.10771775784182733, "grad_norm": 0.4074249565601349, "learning_rate": 1.9862513009538618e-05, "loss": 0.6768, "step": 5079 }, { "epoch": 0.10773896629976035, "grad_norm": 0.3405279815196991, "learning_rate": 1.9862457893135603e-05, "loss": 0.5812, "step": 5080 }, { "epoch": 0.10776017475769337, "grad_norm": 0.5085097551345825, "learning_rate": 1.9862402765763646e-05, "loss": 0.516, "step": 5081 }, { "epoch": 0.10778138321562639, "grad_norm": 0.31768599152565, "learning_rate": 1.9862347627422813e-05, "loss": 0.6638, "step": 5082 }, { "epoch": 0.10780259167355942, "grad_norm": 0.3133472800254822, "learning_rate": 1.986229247811316e-05, "loss": 0.547, "step": 5083 }, { "epoch": 0.10782380013149244, "grad_norm": 0.310027152299881, "learning_rate": 1.9862237317834753e-05, "loss": 0.4557, "step": 5084 }, { "epoch": 0.10784500858942546, "grad_norm": 0.2871890366077423, "learning_rate": 1.9862182146587652e-05, "loss": 0.5046, "step": 5085 }, { "epoch": 0.10786621704735849, "grad_norm": 0.36353540420532227, "learning_rate": 1.9862126964371915e-05, "loss": 0.5703, "step": 5086 }, { "epoch": 0.10788742550529151, "grad_norm": 0.3881050944328308, "learning_rate": 1.9862071771187606e-05, "loss": 0.641, "step": 5087 }, { "epoch": 0.10790863396322453, "grad_norm": 0.40718790888786316, "learning_rate": 1.9862016567034786e-05, "loss": 0.5522, "step": 5088 }, { "epoch": 0.10792984242115755, "grad_norm": 0.30117911100387573, "learning_rate": 1.986196135191352e-05, "loss": 0.5128, "step": 5089 }, { "epoch": 0.10795105087909058, "grad_norm": 0.3325022757053375, "learning_rate": 1.9861906125823865e-05, "loss": 0.5357, "step": 5090 }, { "epoch": 0.1079722593370236, "grad_norm": 0.3262808620929718, "learning_rate": 1.9861850888765885e-05, "loss": 0.4959, "step": 5091 }, { "epoch": 0.10799346779495662, "grad_norm": 0.32498976588249207, "learning_rate": 1.9861795640739635e-05, "loss": 0.6171, "step": 5092 }, { "epoch": 0.10801467625288966, "grad_norm": 0.2967236340045929, "learning_rate": 1.986174038174519e-05, "loss": 0.5602, "step": 5093 }, { "epoch": 0.10803588471082268, "grad_norm": 0.36495742201805115, "learning_rate": 1.9861685111782595e-05, "loss": 0.5298, "step": 5094 }, { "epoch": 0.1080570931687557, "grad_norm": 0.3158922493457794, "learning_rate": 1.9861629830851923e-05, "loss": 0.4761, "step": 5095 }, { "epoch": 0.10807830162668873, "grad_norm": 0.3067660927772522, "learning_rate": 1.9861574538953233e-05, "loss": 0.4818, "step": 5096 }, { "epoch": 0.10809951008462175, "grad_norm": 0.6351698637008667, "learning_rate": 1.9861519236086583e-05, "loss": 0.5019, "step": 5097 }, { "epoch": 0.10812071854255477, "grad_norm": 0.30873867869377136, "learning_rate": 1.986146392225204e-05, "loss": 0.4972, "step": 5098 }, { "epoch": 0.10814192700048779, "grad_norm": 0.5579400062561035, "learning_rate": 1.986140859744966e-05, "loss": 0.4914, "step": 5099 }, { "epoch": 0.10816313545842082, "grad_norm": 0.32782572507858276, "learning_rate": 1.9861353261679507e-05, "loss": 0.5682, "step": 5100 }, { "epoch": 0.10818434391635384, "grad_norm": 0.3246448040008545, "learning_rate": 1.9861297914941643e-05, "loss": 0.5098, "step": 5101 }, { "epoch": 0.10820555237428686, "grad_norm": 0.2893657088279724, "learning_rate": 1.986124255723613e-05, "loss": 0.4807, "step": 5102 }, { "epoch": 0.1082267608322199, "grad_norm": 0.3658846616744995, "learning_rate": 1.9861187188563026e-05, "loss": 0.5609, "step": 5103 }, { "epoch": 0.10824796929015292, "grad_norm": 0.2969250977039337, "learning_rate": 1.9861131808922397e-05, "loss": 0.4416, "step": 5104 }, { "epoch": 0.10826917774808593, "grad_norm": 0.34344491362571716, "learning_rate": 1.9861076418314305e-05, "loss": 0.5345, "step": 5105 }, { "epoch": 0.10829038620601895, "grad_norm": 0.32001790404319763, "learning_rate": 1.9861021016738807e-05, "loss": 0.5787, "step": 5106 }, { "epoch": 0.10831159466395199, "grad_norm": 0.3123072683811188, "learning_rate": 1.9860965604195966e-05, "loss": 0.5426, "step": 5107 }, { "epoch": 0.10833280312188501, "grad_norm": 0.3575272858142853, "learning_rate": 1.9860910180685845e-05, "loss": 0.5316, "step": 5108 }, { "epoch": 0.10835401157981803, "grad_norm": 0.31317412853240967, "learning_rate": 1.986085474620851e-05, "loss": 0.491, "step": 5109 }, { "epoch": 0.10837522003775106, "grad_norm": 0.37478527426719666, "learning_rate": 1.9860799300764014e-05, "loss": 0.5639, "step": 5110 }, { "epoch": 0.10839642849568408, "grad_norm": 0.36352062225341797, "learning_rate": 1.9860743844352422e-05, "loss": 0.557, "step": 5111 }, { "epoch": 0.1084176369536171, "grad_norm": 0.32499194145202637, "learning_rate": 1.9860688376973797e-05, "loss": 0.5086, "step": 5112 }, { "epoch": 0.10843884541155012, "grad_norm": 0.36865469813346863, "learning_rate": 1.9860632898628203e-05, "loss": 0.529, "step": 5113 }, { "epoch": 0.10846005386948315, "grad_norm": 0.30286526679992676, "learning_rate": 1.9860577409315697e-05, "loss": 0.5402, "step": 5114 }, { "epoch": 0.10848126232741617, "grad_norm": 0.3066936135292053, "learning_rate": 1.986052190903634e-05, "loss": 0.4794, "step": 5115 }, { "epoch": 0.10850247078534919, "grad_norm": 0.34773409366607666, "learning_rate": 1.98604663977902e-05, "loss": 0.5356, "step": 5116 }, { "epoch": 0.10852367924328223, "grad_norm": 0.3617272973060608, "learning_rate": 1.9860410875577332e-05, "loss": 0.447, "step": 5117 }, { "epoch": 0.10854488770121525, "grad_norm": 0.3768354058265686, "learning_rate": 1.9860355342397803e-05, "loss": 0.5901, "step": 5118 }, { "epoch": 0.10856609615914826, "grad_norm": 0.3303990662097931, "learning_rate": 1.986029979825167e-05, "loss": 0.4799, "step": 5119 }, { "epoch": 0.1085873046170813, "grad_norm": 0.3235989809036255, "learning_rate": 1.9860244243139e-05, "loss": 0.4889, "step": 5120 }, { "epoch": 0.10860851307501432, "grad_norm": 0.35211268067359924, "learning_rate": 1.986018867705985e-05, "loss": 0.593, "step": 5121 }, { "epoch": 0.10862972153294734, "grad_norm": 0.32726582884788513, "learning_rate": 1.9860133100014283e-05, "loss": 0.6362, "step": 5122 }, { "epoch": 0.10865092999088036, "grad_norm": 0.33161064982414246, "learning_rate": 1.9860077512002364e-05, "loss": 0.5933, "step": 5123 }, { "epoch": 0.10867213844881339, "grad_norm": 0.31582316756248474, "learning_rate": 1.986002191302415e-05, "loss": 0.5257, "step": 5124 }, { "epoch": 0.10869334690674641, "grad_norm": 0.31604325771331787, "learning_rate": 1.9859966303079708e-05, "loss": 0.552, "step": 5125 }, { "epoch": 0.10871455536467943, "grad_norm": 0.4807720482349396, "learning_rate": 1.9859910682169094e-05, "loss": 0.4921, "step": 5126 }, { "epoch": 0.10873576382261246, "grad_norm": 0.33217206597328186, "learning_rate": 1.9859855050292378e-05, "loss": 0.495, "step": 5127 }, { "epoch": 0.10875697228054548, "grad_norm": 0.40362367033958435, "learning_rate": 1.985979940744961e-05, "loss": 0.5923, "step": 5128 }, { "epoch": 0.1087781807384785, "grad_norm": 0.3344717025756836, "learning_rate": 1.9859743753640863e-05, "loss": 0.7192, "step": 5129 }, { "epoch": 0.10879938919641152, "grad_norm": 0.3340068757534027, "learning_rate": 1.9859688088866193e-05, "loss": 0.5293, "step": 5130 }, { "epoch": 0.10882059765434456, "grad_norm": 0.36170291900634766, "learning_rate": 1.9859632413125666e-05, "loss": 0.5553, "step": 5131 }, { "epoch": 0.10884180611227758, "grad_norm": 0.3593340814113617, "learning_rate": 1.985957672641934e-05, "loss": 0.5737, "step": 5132 }, { "epoch": 0.1088630145702106, "grad_norm": 0.29740914702415466, "learning_rate": 1.9859521028747278e-05, "loss": 0.4719, "step": 5133 }, { "epoch": 0.10888422302814363, "grad_norm": 0.3331804573535919, "learning_rate": 1.9859465320109542e-05, "loss": 0.5056, "step": 5134 }, { "epoch": 0.10890543148607665, "grad_norm": 0.353380411863327, "learning_rate": 1.9859409600506196e-05, "loss": 0.5984, "step": 5135 }, { "epoch": 0.10892663994400967, "grad_norm": 0.33546674251556396, "learning_rate": 1.98593538699373e-05, "loss": 0.4655, "step": 5136 }, { "epoch": 0.1089478484019427, "grad_norm": 0.31198811531066895, "learning_rate": 1.9859298128402916e-05, "loss": 0.5601, "step": 5137 }, { "epoch": 0.10896905685987572, "grad_norm": 0.2879650592803955, "learning_rate": 1.9859242375903104e-05, "loss": 0.4358, "step": 5138 }, { "epoch": 0.10899026531780874, "grad_norm": 0.4618382751941681, "learning_rate": 1.985918661243793e-05, "loss": 0.5101, "step": 5139 }, { "epoch": 0.10901147377574176, "grad_norm": 0.34933891892433167, "learning_rate": 1.9859130838007455e-05, "loss": 0.544, "step": 5140 }, { "epoch": 0.1090326822336748, "grad_norm": 0.33609265089035034, "learning_rate": 1.985907505261174e-05, "loss": 0.5142, "step": 5141 }, { "epoch": 0.10905389069160781, "grad_norm": 0.32479050755500793, "learning_rate": 1.9859019256250847e-05, "loss": 0.5685, "step": 5142 }, { "epoch": 0.10907509914954083, "grad_norm": 0.34695151448249817, "learning_rate": 1.985896344892484e-05, "loss": 0.4992, "step": 5143 }, { "epoch": 0.10909630760747387, "grad_norm": 0.31184616684913635, "learning_rate": 1.985890763063378e-05, "loss": 0.5796, "step": 5144 }, { "epoch": 0.10911751606540689, "grad_norm": 0.29949745535850525, "learning_rate": 1.9858851801377726e-05, "loss": 0.5184, "step": 5145 }, { "epoch": 0.1091387245233399, "grad_norm": 1.2064571380615234, "learning_rate": 1.9858795961156742e-05, "loss": 0.5228, "step": 5146 }, { "epoch": 0.10915993298127293, "grad_norm": 0.3506251573562622, "learning_rate": 1.9858740109970894e-05, "loss": 0.5435, "step": 5147 }, { "epoch": 0.10918114143920596, "grad_norm": 0.32813307642936707, "learning_rate": 1.985868424782024e-05, "loss": 0.5812, "step": 5148 }, { "epoch": 0.10920234989713898, "grad_norm": 0.3195964992046356, "learning_rate": 1.985862837470484e-05, "loss": 0.424, "step": 5149 }, { "epoch": 0.109223558355072, "grad_norm": 0.3789912760257721, "learning_rate": 1.9858572490624763e-05, "loss": 0.5553, "step": 5150 }, { "epoch": 0.10924476681300503, "grad_norm": 0.40711510181427, "learning_rate": 1.9858516595580063e-05, "loss": 0.4995, "step": 5151 }, { "epoch": 0.10926597527093805, "grad_norm": 0.3225436210632324, "learning_rate": 1.985846068957081e-05, "loss": 0.473, "step": 5152 }, { "epoch": 0.10928718372887107, "grad_norm": 0.2759946286678314, "learning_rate": 1.985840477259706e-05, "loss": 0.5333, "step": 5153 }, { "epoch": 0.1093083921868041, "grad_norm": 0.4415521025657654, "learning_rate": 1.985834884465888e-05, "loss": 0.5355, "step": 5154 }, { "epoch": 0.10932960064473712, "grad_norm": 0.35352909564971924, "learning_rate": 1.9858292905756333e-05, "loss": 0.5204, "step": 5155 }, { "epoch": 0.10935080910267014, "grad_norm": 0.31637075543403625, "learning_rate": 1.9858236955889475e-05, "loss": 0.5254, "step": 5156 }, { "epoch": 0.10937201756060316, "grad_norm": 0.3514651358127594, "learning_rate": 1.9858180995058373e-05, "loss": 0.5312, "step": 5157 }, { "epoch": 0.1093932260185362, "grad_norm": 0.41652989387512207, "learning_rate": 1.9858125023263083e-05, "loss": 0.6694, "step": 5158 }, { "epoch": 0.10941443447646922, "grad_norm": 0.38786864280700684, "learning_rate": 1.9858069040503673e-05, "loss": 0.5135, "step": 5159 }, { "epoch": 0.10943564293440224, "grad_norm": 0.3190839886665344, "learning_rate": 1.9858013046780208e-05, "loss": 0.5699, "step": 5160 }, { "epoch": 0.10945685139233527, "grad_norm": 0.3149369955062866, "learning_rate": 1.9857957042092744e-05, "loss": 0.4622, "step": 5161 }, { "epoch": 0.10947805985026829, "grad_norm": 0.3186325132846832, "learning_rate": 1.9857901026441348e-05, "loss": 0.5144, "step": 5162 }, { "epoch": 0.10949926830820131, "grad_norm": 0.3022976815700531, "learning_rate": 1.985784499982608e-05, "loss": 0.5052, "step": 5163 }, { "epoch": 0.10952047676613433, "grad_norm": 0.3615899384021759, "learning_rate": 1.9857788962247e-05, "loss": 0.6063, "step": 5164 }, { "epoch": 0.10954168522406736, "grad_norm": 0.3708120882511139, "learning_rate": 1.9857732913704173e-05, "loss": 0.603, "step": 5165 }, { "epoch": 0.10956289368200038, "grad_norm": 0.583096444606781, "learning_rate": 1.9857676854197666e-05, "loss": 0.4957, "step": 5166 }, { "epoch": 0.1095841021399334, "grad_norm": 0.4833725094795227, "learning_rate": 1.985762078372753e-05, "loss": 0.5961, "step": 5167 }, { "epoch": 0.10960531059786643, "grad_norm": 0.3394676744937897, "learning_rate": 1.9857564702293837e-05, "loss": 0.5426, "step": 5168 }, { "epoch": 0.10962651905579945, "grad_norm": 0.3328695297241211, "learning_rate": 1.9857508609896644e-05, "loss": 0.5455, "step": 5169 }, { "epoch": 0.10964772751373247, "grad_norm": 0.3374538719654083, "learning_rate": 1.9857452506536015e-05, "loss": 0.5881, "step": 5170 }, { "epoch": 0.10966893597166551, "grad_norm": 0.32478681206703186, "learning_rate": 1.985739639221202e-05, "loss": 0.5243, "step": 5171 }, { "epoch": 0.10969014442959853, "grad_norm": 0.3167220652103424, "learning_rate": 1.9857340266924706e-05, "loss": 0.4786, "step": 5172 }, { "epoch": 0.10971135288753155, "grad_norm": 0.2968831956386566, "learning_rate": 1.985728413067415e-05, "loss": 0.4565, "step": 5173 }, { "epoch": 0.10973256134546457, "grad_norm": 0.3267821967601776, "learning_rate": 1.9857227983460405e-05, "loss": 0.4442, "step": 5174 }, { "epoch": 0.1097537698033976, "grad_norm": 0.37035974860191345, "learning_rate": 1.9857171825283537e-05, "loss": 0.6198, "step": 5175 }, { "epoch": 0.10977497826133062, "grad_norm": 0.3232911229133606, "learning_rate": 1.9857115656143608e-05, "loss": 0.5139, "step": 5176 }, { "epoch": 0.10979618671926364, "grad_norm": 0.3164528012275696, "learning_rate": 1.985705947604068e-05, "loss": 0.4631, "step": 5177 }, { "epoch": 0.10981739517719667, "grad_norm": 0.38899368047714233, "learning_rate": 1.9857003284974817e-05, "loss": 0.5216, "step": 5178 }, { "epoch": 0.10983860363512969, "grad_norm": 0.43053895235061646, "learning_rate": 1.9856947082946082e-05, "loss": 0.5194, "step": 5179 }, { "epoch": 0.10985981209306271, "grad_norm": 0.3509257137775421, "learning_rate": 1.9856890869954535e-05, "loss": 0.5149, "step": 5180 }, { "epoch": 0.10988102055099573, "grad_norm": 0.5032163262367249, "learning_rate": 1.9856834646000237e-05, "loss": 0.5654, "step": 5181 }, { "epoch": 0.10990222900892876, "grad_norm": 0.3340119421482086, "learning_rate": 1.9856778411083256e-05, "loss": 0.5414, "step": 5182 }, { "epoch": 0.10992343746686178, "grad_norm": 0.3183174133300781, "learning_rate": 1.985672216520365e-05, "loss": 0.5516, "step": 5183 }, { "epoch": 0.1099446459247948, "grad_norm": 0.34181147813796997, "learning_rate": 1.9856665908361487e-05, "loss": 0.5758, "step": 5184 }, { "epoch": 0.10996585438272784, "grad_norm": 0.35806092619895935, "learning_rate": 1.985660964055682e-05, "loss": 0.5645, "step": 5185 }, { "epoch": 0.10998706284066086, "grad_norm": 0.2898602783679962, "learning_rate": 1.985655336178972e-05, "loss": 0.4701, "step": 5186 }, { "epoch": 0.11000827129859388, "grad_norm": 0.32295557856559753, "learning_rate": 1.985649707206025e-05, "loss": 0.5746, "step": 5187 }, { "epoch": 0.1100294797565269, "grad_norm": 0.3072860836982727, "learning_rate": 1.9856440771368464e-05, "loss": 0.5397, "step": 5188 }, { "epoch": 0.11005068821445993, "grad_norm": 0.3369671106338501, "learning_rate": 1.9856384459714435e-05, "loss": 0.5926, "step": 5189 }, { "epoch": 0.11007189667239295, "grad_norm": 0.31137657165527344, "learning_rate": 1.9856328137098217e-05, "loss": 0.5023, "step": 5190 }, { "epoch": 0.11009310513032597, "grad_norm": 0.34288135170936584, "learning_rate": 1.985627180351988e-05, "loss": 0.5027, "step": 5191 }, { "epoch": 0.110114313588259, "grad_norm": 0.37541013956069946, "learning_rate": 1.9856215458979482e-05, "loss": 0.553, "step": 5192 }, { "epoch": 0.11013552204619202, "grad_norm": 0.3062174618244171, "learning_rate": 1.9856159103477085e-05, "loss": 0.4562, "step": 5193 }, { "epoch": 0.11015673050412504, "grad_norm": 0.32489317655563354, "learning_rate": 1.9856102737012756e-05, "loss": 0.6209, "step": 5194 }, { "epoch": 0.11017793896205808, "grad_norm": 0.3469341993331909, "learning_rate": 1.9856046359586554e-05, "loss": 0.6015, "step": 5195 }, { "epoch": 0.1101991474199911, "grad_norm": 0.43686535954475403, "learning_rate": 1.9855989971198542e-05, "loss": 0.555, "step": 5196 }, { "epoch": 0.11022035587792411, "grad_norm": 0.37719810009002686, "learning_rate": 1.9855933571848786e-05, "loss": 0.4163, "step": 5197 }, { "epoch": 0.11024156433585713, "grad_norm": 0.382529616355896, "learning_rate": 1.9855877161537346e-05, "loss": 0.551, "step": 5198 }, { "epoch": 0.11026277279379017, "grad_norm": 0.3310345411300659, "learning_rate": 1.9855820740264284e-05, "loss": 0.446, "step": 5199 }, { "epoch": 0.11028398125172319, "grad_norm": 0.32828494906425476, "learning_rate": 1.9855764308029662e-05, "loss": 0.505, "step": 5200 }, { "epoch": 0.1103051897096562, "grad_norm": 0.39532139897346497, "learning_rate": 1.9855707864833547e-05, "loss": 0.52, "step": 5201 }, { "epoch": 0.11032639816758924, "grad_norm": 0.38304615020751953, "learning_rate": 1.9855651410676e-05, "loss": 0.5333, "step": 5202 }, { "epoch": 0.11034760662552226, "grad_norm": 0.31746920943260193, "learning_rate": 1.985559494555708e-05, "loss": 0.5109, "step": 5203 }, { "epoch": 0.11036881508345528, "grad_norm": 0.31997472047805786, "learning_rate": 1.9855538469476855e-05, "loss": 0.5078, "step": 5204 }, { "epoch": 0.1103900235413883, "grad_norm": 0.3374902606010437, "learning_rate": 1.9855481982435383e-05, "loss": 0.5548, "step": 5205 }, { "epoch": 0.11041123199932133, "grad_norm": 0.4714047908782959, "learning_rate": 1.9855425484432733e-05, "loss": 0.4814, "step": 5206 }, { "epoch": 0.11043244045725435, "grad_norm": 0.3228549063205719, "learning_rate": 1.9855368975468962e-05, "loss": 0.5217, "step": 5207 }, { "epoch": 0.11045364891518737, "grad_norm": 0.33387306332588196, "learning_rate": 1.9855312455544137e-05, "loss": 0.5245, "step": 5208 }, { "epoch": 0.1104748573731204, "grad_norm": 0.3498426675796509, "learning_rate": 1.9855255924658316e-05, "loss": 0.5548, "step": 5209 }, { "epoch": 0.11049606583105342, "grad_norm": 0.3246719539165497, "learning_rate": 1.985519938281157e-05, "loss": 0.4972, "step": 5210 }, { "epoch": 0.11051727428898644, "grad_norm": 0.3035816252231598, "learning_rate": 1.9855142830003954e-05, "loss": 0.5341, "step": 5211 }, { "epoch": 0.11053848274691948, "grad_norm": 0.32165583968162537, "learning_rate": 1.9855086266235534e-05, "loss": 0.549, "step": 5212 }, { "epoch": 0.1105596912048525, "grad_norm": 0.35133010149002075, "learning_rate": 1.985502969150637e-05, "loss": 0.5624, "step": 5213 }, { "epoch": 0.11058089966278552, "grad_norm": 0.281696081161499, "learning_rate": 1.9854973105816534e-05, "loss": 0.5014, "step": 5214 }, { "epoch": 0.11060210812071854, "grad_norm": 0.3136778771877289, "learning_rate": 1.985491650916608e-05, "loss": 0.506, "step": 5215 }, { "epoch": 0.11062331657865157, "grad_norm": 0.3827126622200012, "learning_rate": 1.985485990155507e-05, "loss": 0.5706, "step": 5216 }, { "epoch": 0.11064452503658459, "grad_norm": 0.30031269788742065, "learning_rate": 1.9854803282983574e-05, "loss": 0.4864, "step": 5217 }, { "epoch": 0.11066573349451761, "grad_norm": 0.3168204128742218, "learning_rate": 1.985474665345165e-05, "loss": 0.5124, "step": 5218 }, { "epoch": 0.11068694195245064, "grad_norm": 0.35730621218681335, "learning_rate": 1.985469001295936e-05, "loss": 0.5756, "step": 5219 }, { "epoch": 0.11070815041038366, "grad_norm": 0.3122204840183258, "learning_rate": 1.9854633361506775e-05, "loss": 0.4681, "step": 5220 }, { "epoch": 0.11072935886831668, "grad_norm": 0.30402806401252747, "learning_rate": 1.9854576699093946e-05, "loss": 0.5051, "step": 5221 }, { "epoch": 0.1107505673262497, "grad_norm": 0.3313393294811249, "learning_rate": 1.9854520025720947e-05, "loss": 0.6223, "step": 5222 }, { "epoch": 0.11077177578418274, "grad_norm": 0.38321661949157715, "learning_rate": 1.9854463341387834e-05, "loss": 0.5498, "step": 5223 }, { "epoch": 0.11079298424211576, "grad_norm": 0.4212969243526459, "learning_rate": 1.9854406646094676e-05, "loss": 0.6147, "step": 5224 }, { "epoch": 0.11081419270004877, "grad_norm": 0.3467670679092407, "learning_rate": 1.985434993984153e-05, "loss": 0.5356, "step": 5225 }, { "epoch": 0.11083540115798181, "grad_norm": 0.31591281294822693, "learning_rate": 1.985429322262846e-05, "loss": 0.5354, "step": 5226 }, { "epoch": 0.11085660961591483, "grad_norm": 0.3151893615722656, "learning_rate": 1.9854236494455536e-05, "loss": 0.4896, "step": 5227 }, { "epoch": 0.11087781807384785, "grad_norm": 0.33356428146362305, "learning_rate": 1.985417975532281e-05, "loss": 0.5764, "step": 5228 }, { "epoch": 0.11089902653178088, "grad_norm": 0.32900360226631165, "learning_rate": 1.9854123005230354e-05, "loss": 0.4766, "step": 5229 }, { "epoch": 0.1109202349897139, "grad_norm": 0.3460659682750702, "learning_rate": 1.9854066244178224e-05, "loss": 0.523, "step": 5230 }, { "epoch": 0.11094144344764692, "grad_norm": 1.6858059167861938, "learning_rate": 1.985400947216649e-05, "loss": 0.5292, "step": 5231 }, { "epoch": 0.11096265190557994, "grad_norm": 0.5225509405136108, "learning_rate": 1.9853952689195214e-05, "loss": 0.5388, "step": 5232 }, { "epoch": 0.11098386036351297, "grad_norm": 0.2992072105407715, "learning_rate": 1.9853895895264456e-05, "loss": 0.5353, "step": 5233 }, { "epoch": 0.11100506882144599, "grad_norm": 0.35962194204330444, "learning_rate": 1.985383909037428e-05, "loss": 0.5039, "step": 5234 }, { "epoch": 0.11102627727937901, "grad_norm": 0.30767688155174255, "learning_rate": 1.985378227452475e-05, "loss": 0.5814, "step": 5235 }, { "epoch": 0.11104748573731205, "grad_norm": 0.38983359932899475, "learning_rate": 1.9853725447715928e-05, "loss": 0.5167, "step": 5236 }, { "epoch": 0.11106869419524507, "grad_norm": 0.49740129709243774, "learning_rate": 1.985366860994788e-05, "loss": 0.4805, "step": 5237 }, { "epoch": 0.11108990265317809, "grad_norm": 0.29040786623954773, "learning_rate": 1.985361176122067e-05, "loss": 0.5825, "step": 5238 }, { "epoch": 0.1111111111111111, "grad_norm": 0.3454415500164032, "learning_rate": 1.9853554901534354e-05, "loss": 0.5369, "step": 5239 }, { "epoch": 0.11113231956904414, "grad_norm": 0.33301153779029846, "learning_rate": 1.9853498030889e-05, "loss": 0.5558, "step": 5240 }, { "epoch": 0.11115352802697716, "grad_norm": 0.301278293132782, "learning_rate": 1.9853441149284672e-05, "loss": 0.5494, "step": 5241 }, { "epoch": 0.11117473648491018, "grad_norm": 0.34356769919395447, "learning_rate": 1.9853384256721434e-05, "loss": 0.5311, "step": 5242 }, { "epoch": 0.11119594494284321, "grad_norm": 0.3378205895423889, "learning_rate": 1.9853327353199345e-05, "loss": 0.5151, "step": 5243 }, { "epoch": 0.11121715340077623, "grad_norm": 0.3493712246417999, "learning_rate": 1.9853270438718472e-05, "loss": 0.5405, "step": 5244 }, { "epoch": 0.11123836185870925, "grad_norm": 0.34324538707733154, "learning_rate": 1.9853213513278876e-05, "loss": 0.5167, "step": 5245 }, { "epoch": 0.11125957031664227, "grad_norm": 0.35422733426094055, "learning_rate": 1.9853156576880623e-05, "loss": 0.5815, "step": 5246 }, { "epoch": 0.1112807787745753, "grad_norm": 0.32520946860313416, "learning_rate": 1.9853099629523775e-05, "loss": 0.5214, "step": 5247 }, { "epoch": 0.11130198723250832, "grad_norm": 0.33284974098205566, "learning_rate": 1.9853042671208395e-05, "loss": 0.5512, "step": 5248 }, { "epoch": 0.11132319569044134, "grad_norm": 0.3358282446861267, "learning_rate": 1.9852985701934547e-05, "loss": 0.5046, "step": 5249 }, { "epoch": 0.11134440414837438, "grad_norm": 0.3366226553916931, "learning_rate": 1.9852928721702294e-05, "loss": 0.5583, "step": 5250 }, { "epoch": 0.1113656126063074, "grad_norm": 0.3208066523075104, "learning_rate": 1.98528717305117e-05, "loss": 0.4901, "step": 5251 }, { "epoch": 0.11138682106424042, "grad_norm": 0.31565701961517334, "learning_rate": 1.9852814728362824e-05, "loss": 0.387, "step": 5252 }, { "epoch": 0.11140802952217345, "grad_norm": 0.32201752066612244, "learning_rate": 1.9852757715255735e-05, "loss": 0.4679, "step": 5253 }, { "epoch": 0.11142923798010647, "grad_norm": 0.37780874967575073, "learning_rate": 1.9852700691190497e-05, "loss": 0.5393, "step": 5254 }, { "epoch": 0.11145044643803949, "grad_norm": 0.37143760919570923, "learning_rate": 1.985264365616717e-05, "loss": 0.6512, "step": 5255 }, { "epoch": 0.11147165489597251, "grad_norm": 0.3283017575740814, "learning_rate": 1.9852586610185813e-05, "loss": 0.563, "step": 5256 }, { "epoch": 0.11149286335390554, "grad_norm": 0.3618355393409729, "learning_rate": 1.98525295532465e-05, "loss": 0.5213, "step": 5257 }, { "epoch": 0.11151407181183856, "grad_norm": 0.32499679923057556, "learning_rate": 1.985247248534929e-05, "loss": 0.5284, "step": 5258 }, { "epoch": 0.11153528026977158, "grad_norm": 0.33294981718063354, "learning_rate": 1.9852415406494243e-05, "loss": 0.5053, "step": 5259 }, { "epoch": 0.11155648872770461, "grad_norm": 0.3832356631755829, "learning_rate": 1.9852358316681423e-05, "loss": 0.5326, "step": 5260 }, { "epoch": 0.11157769718563763, "grad_norm": 0.35389944911003113, "learning_rate": 1.98523012159109e-05, "loss": 0.4938, "step": 5261 }, { "epoch": 0.11159890564357065, "grad_norm": 0.28323349356651306, "learning_rate": 1.985224410418273e-05, "loss": 0.4851, "step": 5262 }, { "epoch": 0.11162011410150367, "grad_norm": 0.31029263138771057, "learning_rate": 1.9852186981496983e-05, "loss": 0.5098, "step": 5263 }, { "epoch": 0.1116413225594367, "grad_norm": 0.3333083987236023, "learning_rate": 1.9852129847853717e-05, "loss": 0.5261, "step": 5264 }, { "epoch": 0.11166253101736973, "grad_norm": 0.3594379723072052, "learning_rate": 1.9852072703253e-05, "loss": 0.5202, "step": 5265 }, { "epoch": 0.11168373947530275, "grad_norm": 0.3104178011417389, "learning_rate": 1.9852015547694892e-05, "loss": 0.5275, "step": 5266 }, { "epoch": 0.11170494793323578, "grad_norm": 0.3268987238407135, "learning_rate": 1.9851958381179454e-05, "loss": 0.5919, "step": 5267 }, { "epoch": 0.1117261563911688, "grad_norm": 0.33793503046035767, "learning_rate": 1.985190120370676e-05, "loss": 0.508, "step": 5268 }, { "epoch": 0.11174736484910182, "grad_norm": 0.31768202781677246, "learning_rate": 1.985184401527686e-05, "loss": 0.5627, "step": 5269 }, { "epoch": 0.11176857330703485, "grad_norm": 0.3498424291610718, "learning_rate": 1.985178681588983e-05, "loss": 0.44, "step": 5270 }, { "epoch": 0.11178978176496787, "grad_norm": 0.3926907181739807, "learning_rate": 1.985172960554573e-05, "loss": 0.4791, "step": 5271 }, { "epoch": 0.11181099022290089, "grad_norm": 0.532857358455658, "learning_rate": 1.985167238424462e-05, "loss": 0.551, "step": 5272 }, { "epoch": 0.11183219868083391, "grad_norm": 0.3538873493671417, "learning_rate": 1.9851615151986563e-05, "loss": 0.5722, "step": 5273 }, { "epoch": 0.11185340713876694, "grad_norm": 0.4163001477718353, "learning_rate": 1.9851557908771628e-05, "loss": 0.46, "step": 5274 }, { "epoch": 0.11187461559669996, "grad_norm": 0.33388039469718933, "learning_rate": 1.9851500654599873e-05, "loss": 0.4132, "step": 5275 }, { "epoch": 0.11189582405463298, "grad_norm": 0.339871346950531, "learning_rate": 1.985144338947137e-05, "loss": 0.5708, "step": 5276 }, { "epoch": 0.11191703251256602, "grad_norm": 0.3334391713142395, "learning_rate": 1.9851386113386168e-05, "loss": 0.5955, "step": 5277 }, { "epoch": 0.11193824097049904, "grad_norm": 0.3228667676448822, "learning_rate": 1.9851328826344348e-05, "loss": 0.4206, "step": 5278 }, { "epoch": 0.11195944942843206, "grad_norm": 0.32157596945762634, "learning_rate": 1.985127152834596e-05, "loss": 0.5602, "step": 5279 }, { "epoch": 0.11198065788636508, "grad_norm": 0.7876967787742615, "learning_rate": 1.9851214219391077e-05, "loss": 0.5496, "step": 5280 }, { "epoch": 0.11200186634429811, "grad_norm": 0.37346774339675903, "learning_rate": 1.985115689947976e-05, "loss": 0.5945, "step": 5281 }, { "epoch": 0.11202307480223113, "grad_norm": 0.3384890556335449, "learning_rate": 1.985109956861207e-05, "loss": 0.5456, "step": 5282 }, { "epoch": 0.11204428326016415, "grad_norm": 0.3163702189922333, "learning_rate": 1.9851042226788073e-05, "loss": 0.5422, "step": 5283 }, { "epoch": 0.11206549171809718, "grad_norm": 0.30850479006767273, "learning_rate": 1.9850984874007833e-05, "loss": 0.5453, "step": 5284 }, { "epoch": 0.1120867001760302, "grad_norm": 0.4179971218109131, "learning_rate": 1.9850927510271415e-05, "loss": 0.4578, "step": 5285 }, { "epoch": 0.11210790863396322, "grad_norm": 0.31970158219337463, "learning_rate": 1.9850870135578873e-05, "loss": 0.6255, "step": 5286 }, { "epoch": 0.11212911709189625, "grad_norm": 0.34327295422554016, "learning_rate": 1.9850812749930285e-05, "loss": 0.5572, "step": 5287 }, { "epoch": 0.11215032554982927, "grad_norm": 0.34801337122917175, "learning_rate": 1.985075535332571e-05, "loss": 0.4897, "step": 5288 }, { "epoch": 0.1121715340077623, "grad_norm": 0.4127434194087982, "learning_rate": 1.985069794576521e-05, "loss": 0.5315, "step": 5289 }, { "epoch": 0.11219274246569531, "grad_norm": 0.34741586446762085, "learning_rate": 1.9850640527248847e-05, "loss": 0.5508, "step": 5290 }, { "epoch": 0.11221395092362835, "grad_norm": 0.3413953185081482, "learning_rate": 1.985058309777669e-05, "loss": 0.5382, "step": 5291 }, { "epoch": 0.11223515938156137, "grad_norm": 0.32048848271369934, "learning_rate": 1.9850525657348796e-05, "loss": 0.5554, "step": 5292 }, { "epoch": 0.11225636783949439, "grad_norm": 0.3334718644618988, "learning_rate": 1.9850468205965234e-05, "loss": 0.5896, "step": 5293 }, { "epoch": 0.11227757629742742, "grad_norm": 0.3075495660305023, "learning_rate": 1.985041074362607e-05, "loss": 0.4313, "step": 5294 }, { "epoch": 0.11229878475536044, "grad_norm": 0.32855117321014404, "learning_rate": 1.985035327033136e-05, "loss": 0.4929, "step": 5295 }, { "epoch": 0.11231999321329346, "grad_norm": 0.34229299426078796, "learning_rate": 1.9850295786081176e-05, "loss": 0.5697, "step": 5296 }, { "epoch": 0.11234120167122648, "grad_norm": 0.33342885971069336, "learning_rate": 1.985023829087558e-05, "loss": 0.56, "step": 5297 }, { "epoch": 0.11236241012915951, "grad_norm": 0.428481787443161, "learning_rate": 1.985018078471463e-05, "loss": 0.4898, "step": 5298 }, { "epoch": 0.11238361858709253, "grad_norm": 0.2980647385120392, "learning_rate": 1.9850123267598397e-05, "loss": 0.5761, "step": 5299 }, { "epoch": 0.11240482704502555, "grad_norm": 0.3259725868701935, "learning_rate": 1.9850065739526942e-05, "loss": 0.4631, "step": 5300 }, { "epoch": 0.11242603550295859, "grad_norm": 0.3164932131767273, "learning_rate": 1.9850008200500332e-05, "loss": 0.5267, "step": 5301 }, { "epoch": 0.1124472439608916, "grad_norm": 0.3196411728858948, "learning_rate": 1.9849950650518625e-05, "loss": 0.5679, "step": 5302 }, { "epoch": 0.11246845241882462, "grad_norm": 0.3032628297805786, "learning_rate": 1.984989308958189e-05, "loss": 0.528, "step": 5303 }, { "epoch": 0.11248966087675766, "grad_norm": 0.36446529626846313, "learning_rate": 1.984983551769019e-05, "loss": 0.5452, "step": 5304 }, { "epoch": 0.11251086933469068, "grad_norm": 0.3161706030368805, "learning_rate": 1.9849777934843587e-05, "loss": 0.4876, "step": 5305 }, { "epoch": 0.1125320777926237, "grad_norm": 0.3420289158821106, "learning_rate": 1.9849720341042148e-05, "loss": 0.5004, "step": 5306 }, { "epoch": 0.11255328625055672, "grad_norm": 0.32251039147377014, "learning_rate": 1.9849662736285934e-05, "loss": 0.608, "step": 5307 }, { "epoch": 0.11257449470848975, "grad_norm": 0.3136913478374481, "learning_rate": 1.9849605120575012e-05, "loss": 0.5818, "step": 5308 }, { "epoch": 0.11259570316642277, "grad_norm": 0.346674382686615, "learning_rate": 1.9849547493909444e-05, "loss": 0.6121, "step": 5309 }, { "epoch": 0.11261691162435579, "grad_norm": 0.2956903278827667, "learning_rate": 1.9849489856289293e-05, "loss": 0.5067, "step": 5310 }, { "epoch": 0.11263812008228882, "grad_norm": 0.296815425157547, "learning_rate": 1.984943220771463e-05, "loss": 0.436, "step": 5311 }, { "epoch": 0.11265932854022184, "grad_norm": 0.34195420145988464, "learning_rate": 1.984937454818551e-05, "loss": 0.5344, "step": 5312 }, { "epoch": 0.11268053699815486, "grad_norm": 0.3849796950817108, "learning_rate": 1.9849316877702e-05, "loss": 0.4683, "step": 5313 }, { "epoch": 0.11270174545608788, "grad_norm": 0.40844476222991943, "learning_rate": 1.984925919626417e-05, "loss": 0.5952, "step": 5314 }, { "epoch": 0.11272295391402092, "grad_norm": 0.38444915413856506, "learning_rate": 1.984920150387208e-05, "loss": 0.489, "step": 5315 }, { "epoch": 0.11274416237195393, "grad_norm": 0.3381255865097046, "learning_rate": 1.9849143800525787e-05, "loss": 0.5674, "step": 5316 }, { "epoch": 0.11276537082988695, "grad_norm": 0.37603428959846497, "learning_rate": 1.984908608622537e-05, "loss": 0.4519, "step": 5317 }, { "epoch": 0.11278657928781999, "grad_norm": 0.31130996346473694, "learning_rate": 1.9849028360970876e-05, "loss": 0.4973, "step": 5318 }, { "epoch": 0.11280778774575301, "grad_norm": 0.311166912317276, "learning_rate": 1.9848970624762385e-05, "loss": 0.5016, "step": 5319 }, { "epoch": 0.11282899620368603, "grad_norm": 0.4260913133621216, "learning_rate": 1.9848912877599952e-05, "loss": 0.4462, "step": 5320 }, { "epoch": 0.11285020466161905, "grad_norm": 0.36196234822273254, "learning_rate": 1.9848855119483646e-05, "loss": 0.5206, "step": 5321 }, { "epoch": 0.11287141311955208, "grad_norm": 0.3508675992488861, "learning_rate": 1.9848797350413526e-05, "loss": 0.5514, "step": 5322 }, { "epoch": 0.1128926215774851, "grad_norm": 0.3914775848388672, "learning_rate": 1.9848739570389658e-05, "loss": 0.4191, "step": 5323 }, { "epoch": 0.11291383003541812, "grad_norm": 0.333126038312912, "learning_rate": 1.9848681779412108e-05, "loss": 0.6406, "step": 5324 }, { "epoch": 0.11293503849335115, "grad_norm": 0.31245434284210205, "learning_rate": 1.9848623977480942e-05, "loss": 0.5271, "step": 5325 }, { "epoch": 0.11295624695128417, "grad_norm": 0.36993762850761414, "learning_rate": 1.9848566164596224e-05, "loss": 0.5292, "step": 5326 }, { "epoch": 0.11297745540921719, "grad_norm": 0.3980167806148529, "learning_rate": 1.9848508340758014e-05, "loss": 0.5789, "step": 5327 }, { "epoch": 0.11299866386715023, "grad_norm": 0.3271391987800598, "learning_rate": 1.9848450505966377e-05, "loss": 0.5845, "step": 5328 }, { "epoch": 0.11301987232508325, "grad_norm": 0.32253196835517883, "learning_rate": 1.984839266022138e-05, "loss": 0.5979, "step": 5329 }, { "epoch": 0.11304108078301627, "grad_norm": 0.38339605927467346, "learning_rate": 1.9848334803523085e-05, "loss": 0.5015, "step": 5330 }, { "epoch": 0.11306228924094928, "grad_norm": 0.32091063261032104, "learning_rate": 1.9848276935871557e-05, "loss": 0.5182, "step": 5331 }, { "epoch": 0.11308349769888232, "grad_norm": 0.4200285077095032, "learning_rate": 1.9848219057266864e-05, "loss": 0.5017, "step": 5332 }, { "epoch": 0.11310470615681534, "grad_norm": 0.3103872835636139, "learning_rate": 1.9848161167709065e-05, "loss": 0.5412, "step": 5333 }, { "epoch": 0.11312591461474836, "grad_norm": 0.3171074688434601, "learning_rate": 1.984810326719823e-05, "loss": 0.511, "step": 5334 }, { "epoch": 0.11314712307268139, "grad_norm": 0.36383742094039917, "learning_rate": 1.9848045355734414e-05, "loss": 0.6565, "step": 5335 }, { "epoch": 0.11316833153061441, "grad_norm": 0.4752236008644104, "learning_rate": 1.9847987433317693e-05, "loss": 0.537, "step": 5336 }, { "epoch": 0.11318953998854743, "grad_norm": 0.3168857991695404, "learning_rate": 1.9847929499948125e-05, "loss": 0.5328, "step": 5337 }, { "epoch": 0.11321074844648045, "grad_norm": 0.3442288041114807, "learning_rate": 1.9847871555625774e-05, "loss": 0.521, "step": 5338 }, { "epoch": 0.11323195690441348, "grad_norm": 0.4016864597797394, "learning_rate": 1.9847813600350707e-05, "loss": 0.5742, "step": 5339 }, { "epoch": 0.1132531653623465, "grad_norm": 0.32929500937461853, "learning_rate": 1.9847755634122986e-05, "loss": 0.4922, "step": 5340 }, { "epoch": 0.11327437382027952, "grad_norm": 0.35246366262435913, "learning_rate": 1.984769765694268e-05, "loss": 0.5656, "step": 5341 }, { "epoch": 0.11329558227821256, "grad_norm": 0.35665372014045715, "learning_rate": 1.9847639668809845e-05, "loss": 0.4717, "step": 5342 }, { "epoch": 0.11331679073614558, "grad_norm": 0.35994753241539, "learning_rate": 1.984758166972455e-05, "loss": 0.431, "step": 5343 }, { "epoch": 0.1133379991940786, "grad_norm": 0.344677209854126, "learning_rate": 1.9847523659686866e-05, "loss": 0.5138, "step": 5344 }, { "epoch": 0.11335920765201163, "grad_norm": 0.3346233069896698, "learning_rate": 1.984746563869685e-05, "loss": 0.5118, "step": 5345 }, { "epoch": 0.11338041610994465, "grad_norm": 0.35762861371040344, "learning_rate": 1.9847407606754564e-05, "loss": 0.5513, "step": 5346 }, { "epoch": 0.11340162456787767, "grad_norm": 0.35457897186279297, "learning_rate": 1.9847349563860085e-05, "loss": 0.5375, "step": 5347 }, { "epoch": 0.11342283302581069, "grad_norm": 0.29395991563796997, "learning_rate": 1.984729151001346e-05, "loss": 0.5541, "step": 5348 }, { "epoch": 0.11344404148374372, "grad_norm": 0.3289870023727417, "learning_rate": 1.984723344521477e-05, "loss": 0.5471, "step": 5349 }, { "epoch": 0.11346524994167674, "grad_norm": 0.3328453004360199, "learning_rate": 1.984717536946407e-05, "loss": 0.5273, "step": 5350 }, { "epoch": 0.11348645839960976, "grad_norm": 0.3352157473564148, "learning_rate": 1.9847117282761426e-05, "loss": 0.5158, "step": 5351 }, { "epoch": 0.1135076668575428, "grad_norm": 0.3435005247592926, "learning_rate": 1.9847059185106904e-05, "loss": 0.4665, "step": 5352 }, { "epoch": 0.11352887531547581, "grad_norm": 0.30369260907173157, "learning_rate": 1.984700107650057e-05, "loss": 0.525, "step": 5353 }, { "epoch": 0.11355008377340883, "grad_norm": 0.3399356007575989, "learning_rate": 1.9846942956942485e-05, "loss": 0.5648, "step": 5354 }, { "epoch": 0.11357129223134185, "grad_norm": 0.33622145652770996, "learning_rate": 1.9846884826432717e-05, "loss": 0.5201, "step": 5355 }, { "epoch": 0.11359250068927489, "grad_norm": 0.3170107305049896, "learning_rate": 1.984682668497133e-05, "loss": 0.5207, "step": 5356 }, { "epoch": 0.1136137091472079, "grad_norm": 0.41744935512542725, "learning_rate": 1.9846768532558386e-05, "loss": 0.5667, "step": 5357 }, { "epoch": 0.11363491760514093, "grad_norm": 0.33810874819755554, "learning_rate": 1.9846710369193952e-05, "loss": 0.5751, "step": 5358 }, { "epoch": 0.11365612606307396, "grad_norm": 0.3186854422092438, "learning_rate": 1.9846652194878093e-05, "loss": 0.5246, "step": 5359 }, { "epoch": 0.11367733452100698, "grad_norm": 0.341842919588089, "learning_rate": 1.9846594009610874e-05, "loss": 0.4901, "step": 5360 }, { "epoch": 0.11369854297894, "grad_norm": 0.30028998851776123, "learning_rate": 1.9846535813392356e-05, "loss": 0.5494, "step": 5361 }, { "epoch": 0.11371975143687303, "grad_norm": 0.3394160866737366, "learning_rate": 1.9846477606222606e-05, "loss": 0.483, "step": 5362 }, { "epoch": 0.11374095989480605, "grad_norm": 0.3265453279018402, "learning_rate": 1.9846419388101693e-05, "loss": 0.4943, "step": 5363 }, { "epoch": 0.11376216835273907, "grad_norm": 0.35395175218582153, "learning_rate": 1.9846361159029674e-05, "loss": 0.501, "step": 5364 }, { "epoch": 0.11378337681067209, "grad_norm": 0.33722689747810364, "learning_rate": 1.9846302919006623e-05, "loss": 0.5704, "step": 5365 }, { "epoch": 0.11380458526860512, "grad_norm": 0.3110581934452057, "learning_rate": 1.9846244668032595e-05, "loss": 0.5845, "step": 5366 }, { "epoch": 0.11382579372653814, "grad_norm": 0.33608517050743103, "learning_rate": 1.9846186406107658e-05, "loss": 0.5981, "step": 5367 }, { "epoch": 0.11384700218447116, "grad_norm": 0.3090724050998688, "learning_rate": 1.984612813323188e-05, "loss": 0.5299, "step": 5368 }, { "epoch": 0.1138682106424042, "grad_norm": 0.3446555733680725, "learning_rate": 1.9846069849405322e-05, "loss": 0.5202, "step": 5369 }, { "epoch": 0.11388941910033722, "grad_norm": 0.31800577044487, "learning_rate": 1.9846011554628056e-05, "loss": 0.5118, "step": 5370 }, { "epoch": 0.11391062755827024, "grad_norm": 0.3241419494152069, "learning_rate": 1.9845953248900138e-05, "loss": 0.4531, "step": 5371 }, { "epoch": 0.11393183601620326, "grad_norm": 0.3116320073604584, "learning_rate": 1.9845894932221635e-05, "loss": 0.4463, "step": 5372 }, { "epoch": 0.11395304447413629, "grad_norm": 0.30674290657043457, "learning_rate": 1.9845836604592613e-05, "loss": 0.5598, "step": 5373 }, { "epoch": 0.11397425293206931, "grad_norm": 0.34130290150642395, "learning_rate": 1.9845778266013142e-05, "loss": 0.4809, "step": 5374 }, { "epoch": 0.11399546139000233, "grad_norm": 0.32639792561531067, "learning_rate": 1.9845719916483273e-05, "loss": 0.4398, "step": 5375 }, { "epoch": 0.11401666984793536, "grad_norm": 0.345549076795578, "learning_rate": 1.9845661556003088e-05, "loss": 0.4833, "step": 5376 }, { "epoch": 0.11403787830586838, "grad_norm": 0.38420936465263367, "learning_rate": 1.984560318457264e-05, "loss": 0.5305, "step": 5377 }, { "epoch": 0.1140590867638014, "grad_norm": 0.3095080554485321, "learning_rate": 1.9845544802192e-05, "loss": 0.5771, "step": 5378 }, { "epoch": 0.11408029522173443, "grad_norm": 0.33725738525390625, "learning_rate": 1.9845486408861228e-05, "loss": 0.5829, "step": 5379 }, { "epoch": 0.11410150367966745, "grad_norm": 0.3554198145866394, "learning_rate": 1.9845428004580393e-05, "loss": 0.5554, "step": 5380 }, { "epoch": 0.11412271213760047, "grad_norm": 0.3386230170726776, "learning_rate": 1.984536958934956e-05, "loss": 0.5431, "step": 5381 }, { "epoch": 0.1141439205955335, "grad_norm": 0.3271602988243103, "learning_rate": 1.984531116316879e-05, "loss": 0.5149, "step": 5382 }, { "epoch": 0.11416512905346653, "grad_norm": 0.35685670375823975, "learning_rate": 1.9845252726038153e-05, "loss": 0.504, "step": 5383 }, { "epoch": 0.11418633751139955, "grad_norm": 0.3174417316913605, "learning_rate": 1.984519427795771e-05, "loss": 0.5222, "step": 5384 }, { "epoch": 0.11420754596933257, "grad_norm": 0.3004874587059021, "learning_rate": 1.9845135818927527e-05, "loss": 0.5405, "step": 5385 }, { "epoch": 0.1142287544272656, "grad_norm": 0.30482009053230286, "learning_rate": 1.984507734894767e-05, "loss": 0.439, "step": 5386 }, { "epoch": 0.11424996288519862, "grad_norm": 0.3552326261997223, "learning_rate": 1.9845018868018202e-05, "loss": 0.5076, "step": 5387 }, { "epoch": 0.11427117134313164, "grad_norm": 0.33837610483169556, "learning_rate": 1.984496037613919e-05, "loss": 0.5053, "step": 5388 }, { "epoch": 0.11429237980106466, "grad_norm": 0.3048795461654663, "learning_rate": 1.98449018733107e-05, "loss": 0.4979, "step": 5389 }, { "epoch": 0.11431358825899769, "grad_norm": 0.34696030616760254, "learning_rate": 1.9844843359532795e-05, "loss": 0.5397, "step": 5390 }, { "epoch": 0.11433479671693071, "grad_norm": 0.335740327835083, "learning_rate": 1.9844784834805542e-05, "loss": 0.5469, "step": 5391 }, { "epoch": 0.11435600517486373, "grad_norm": 0.362874299287796, "learning_rate": 1.9844726299129002e-05, "loss": 0.5586, "step": 5392 }, { "epoch": 0.11437721363279676, "grad_norm": 0.40122637152671814, "learning_rate": 1.9844667752503243e-05, "loss": 0.4804, "step": 5393 }, { "epoch": 0.11439842209072978, "grad_norm": 0.3172115981578827, "learning_rate": 1.9844609194928334e-05, "loss": 0.5446, "step": 5394 }, { "epoch": 0.1144196305486628, "grad_norm": 0.30947139859199524, "learning_rate": 1.9844550626404334e-05, "loss": 0.5524, "step": 5395 }, { "epoch": 0.11444083900659582, "grad_norm": 0.322002649307251, "learning_rate": 1.9844492046931308e-05, "loss": 0.5236, "step": 5396 }, { "epoch": 0.11446204746452886, "grad_norm": 0.33562418818473816, "learning_rate": 1.9844433456509328e-05, "loss": 0.5733, "step": 5397 }, { "epoch": 0.11448325592246188, "grad_norm": 0.28925278782844543, "learning_rate": 1.984437485513845e-05, "loss": 0.5867, "step": 5398 }, { "epoch": 0.1145044643803949, "grad_norm": 0.3754028379917145, "learning_rate": 1.9844316242818745e-05, "loss": 0.5279, "step": 5399 }, { "epoch": 0.11452567283832793, "grad_norm": 0.3659166097640991, "learning_rate": 1.984425761955028e-05, "loss": 0.6323, "step": 5400 }, { "epoch": 0.11454688129626095, "grad_norm": 0.4099862575531006, "learning_rate": 1.9844198985333116e-05, "loss": 0.4698, "step": 5401 }, { "epoch": 0.11456808975419397, "grad_norm": 0.3090866506099701, "learning_rate": 1.9844140340167315e-05, "loss": 0.4756, "step": 5402 }, { "epoch": 0.114589298212127, "grad_norm": 0.34192344546318054, "learning_rate": 1.9844081684052953e-05, "loss": 0.5088, "step": 5403 }, { "epoch": 0.11461050667006002, "grad_norm": 0.3189680278301239, "learning_rate": 1.9844023016990083e-05, "loss": 0.5565, "step": 5404 }, { "epoch": 0.11463171512799304, "grad_norm": 0.3210137188434601, "learning_rate": 1.984396433897878e-05, "loss": 0.5979, "step": 5405 }, { "epoch": 0.11465292358592606, "grad_norm": 0.4614016115665436, "learning_rate": 1.9843905650019106e-05, "loss": 0.4915, "step": 5406 }, { "epoch": 0.1146741320438591, "grad_norm": 0.33293411135673523, "learning_rate": 1.9843846950111125e-05, "loss": 0.5352, "step": 5407 }, { "epoch": 0.11469534050179211, "grad_norm": 0.3144708275794983, "learning_rate": 1.98437882392549e-05, "loss": 0.4801, "step": 5408 }, { "epoch": 0.11471654895972513, "grad_norm": 0.3187905550003052, "learning_rate": 1.98437295174505e-05, "loss": 0.4915, "step": 5409 }, { "epoch": 0.11473775741765817, "grad_norm": 0.35112476348876953, "learning_rate": 1.9843670784697993e-05, "loss": 0.5859, "step": 5410 }, { "epoch": 0.11475896587559119, "grad_norm": 0.3243328630924225, "learning_rate": 1.984361204099744e-05, "loss": 0.4762, "step": 5411 }, { "epoch": 0.11478017433352421, "grad_norm": 0.3098059892654419, "learning_rate": 1.9843553286348906e-05, "loss": 0.5152, "step": 5412 }, { "epoch": 0.11480138279145723, "grad_norm": 0.34087520837783813, "learning_rate": 1.9843494520752455e-05, "loss": 0.5463, "step": 5413 }, { "epoch": 0.11482259124939026, "grad_norm": 0.31612637639045715, "learning_rate": 1.9843435744208158e-05, "loss": 0.5029, "step": 5414 }, { "epoch": 0.11484379970732328, "grad_norm": 0.3194977939128876, "learning_rate": 1.9843376956716075e-05, "loss": 0.4287, "step": 5415 }, { "epoch": 0.1148650081652563, "grad_norm": 0.3237660825252533, "learning_rate": 1.9843318158276278e-05, "loss": 0.5403, "step": 5416 }, { "epoch": 0.11488621662318933, "grad_norm": 0.338508278131485, "learning_rate": 1.9843259348888824e-05, "loss": 0.4501, "step": 5417 }, { "epoch": 0.11490742508112235, "grad_norm": 0.33058321475982666, "learning_rate": 1.9843200528553784e-05, "loss": 0.5757, "step": 5418 }, { "epoch": 0.11492863353905537, "grad_norm": 0.3076834976673126, "learning_rate": 1.9843141697271222e-05, "loss": 0.5024, "step": 5419 }, { "epoch": 0.1149498419969884, "grad_norm": 0.3225138187408447, "learning_rate": 1.9843082855041204e-05, "loss": 0.5467, "step": 5420 }, { "epoch": 0.11497105045492143, "grad_norm": 0.33843570947647095, "learning_rate": 1.9843024001863793e-05, "loss": 0.5235, "step": 5421 }, { "epoch": 0.11499225891285444, "grad_norm": 0.34549885988235474, "learning_rate": 1.9842965137739057e-05, "loss": 0.5611, "step": 5422 }, { "epoch": 0.11501346737078746, "grad_norm": 0.3383813798427582, "learning_rate": 1.9842906262667057e-05, "loss": 0.5155, "step": 5423 }, { "epoch": 0.1150346758287205, "grad_norm": 0.32638436555862427, "learning_rate": 1.9842847376647868e-05, "loss": 0.487, "step": 5424 }, { "epoch": 0.11505588428665352, "grad_norm": 0.35141798853874207, "learning_rate": 1.9842788479681548e-05, "loss": 0.5211, "step": 5425 }, { "epoch": 0.11507709274458654, "grad_norm": 0.7511587738990784, "learning_rate": 1.9842729571768166e-05, "loss": 0.5871, "step": 5426 }, { "epoch": 0.11509830120251957, "grad_norm": 0.30088573694229126, "learning_rate": 1.9842670652907782e-05, "loss": 0.5004, "step": 5427 }, { "epoch": 0.11511950966045259, "grad_norm": 0.49470046162605286, "learning_rate": 1.9842611723100466e-05, "loss": 0.523, "step": 5428 }, { "epoch": 0.11514071811838561, "grad_norm": 0.3048693835735321, "learning_rate": 1.9842552782346282e-05, "loss": 0.5193, "step": 5429 }, { "epoch": 0.11516192657631863, "grad_norm": 0.3136551082134247, "learning_rate": 1.98424938306453e-05, "loss": 0.5355, "step": 5430 }, { "epoch": 0.11518313503425166, "grad_norm": 0.34853386878967285, "learning_rate": 1.9842434867997576e-05, "loss": 0.5165, "step": 5431 }, { "epoch": 0.11520434349218468, "grad_norm": 0.4284881353378296, "learning_rate": 1.9842375894403187e-05, "loss": 0.5155, "step": 5432 }, { "epoch": 0.1152255519501177, "grad_norm": 0.42070525884628296, "learning_rate": 1.984231690986219e-05, "loss": 0.533, "step": 5433 }, { "epoch": 0.11524676040805074, "grad_norm": 0.3475412130355835, "learning_rate": 1.9842257914374656e-05, "loss": 0.4491, "step": 5434 }, { "epoch": 0.11526796886598376, "grad_norm": 0.2970484495162964, "learning_rate": 1.9842198907940643e-05, "loss": 0.3891, "step": 5435 }, { "epoch": 0.11528917732391677, "grad_norm": 0.32103002071380615, "learning_rate": 1.9842139890560227e-05, "loss": 0.5527, "step": 5436 }, { "epoch": 0.11531038578184981, "grad_norm": 0.3313666582107544, "learning_rate": 1.9842080862233468e-05, "loss": 0.5255, "step": 5437 }, { "epoch": 0.11533159423978283, "grad_norm": 0.3148542046546936, "learning_rate": 1.9842021822960432e-05, "loss": 0.537, "step": 5438 }, { "epoch": 0.11535280269771585, "grad_norm": 0.3516092896461487, "learning_rate": 1.9841962772741182e-05, "loss": 0.6123, "step": 5439 }, { "epoch": 0.11537401115564887, "grad_norm": 0.3375104069709778, "learning_rate": 1.984190371157579e-05, "loss": 0.5101, "step": 5440 }, { "epoch": 0.1153952196135819, "grad_norm": 0.3366343677043915, "learning_rate": 1.9841844639464316e-05, "loss": 0.5524, "step": 5441 }, { "epoch": 0.11541642807151492, "grad_norm": 0.3190661668777466, "learning_rate": 1.9841785556406828e-05, "loss": 0.5493, "step": 5442 }, { "epoch": 0.11543763652944794, "grad_norm": 0.3258489668369293, "learning_rate": 1.9841726462403395e-05, "loss": 0.4964, "step": 5443 }, { "epoch": 0.11545884498738097, "grad_norm": 0.35236474871635437, "learning_rate": 1.9841667357454077e-05, "loss": 0.5328, "step": 5444 }, { "epoch": 0.115480053445314, "grad_norm": 0.31172239780426025, "learning_rate": 1.984160824155894e-05, "loss": 0.59, "step": 5445 }, { "epoch": 0.11550126190324701, "grad_norm": 0.3421609699726105, "learning_rate": 1.9841549114718057e-05, "loss": 0.5772, "step": 5446 }, { "epoch": 0.11552247036118003, "grad_norm": 0.30201247334480286, "learning_rate": 1.984148997693148e-05, "loss": 0.4917, "step": 5447 }, { "epoch": 0.11554367881911307, "grad_norm": 0.4257998764514923, "learning_rate": 1.984143082819929e-05, "loss": 0.456, "step": 5448 }, { "epoch": 0.11556488727704609, "grad_norm": 0.3292594254016876, "learning_rate": 1.9841371668521547e-05, "loss": 0.5797, "step": 5449 }, { "epoch": 0.1155860957349791, "grad_norm": 0.3373384177684784, "learning_rate": 1.984131249789831e-05, "loss": 0.4694, "step": 5450 }, { "epoch": 0.11560730419291214, "grad_norm": 0.3198540508747101, "learning_rate": 1.9841253316329656e-05, "loss": 0.5017, "step": 5451 }, { "epoch": 0.11562851265084516, "grad_norm": 0.3148592710494995, "learning_rate": 1.9841194123815643e-05, "loss": 0.5035, "step": 5452 }, { "epoch": 0.11564972110877818, "grad_norm": 0.35102298855781555, "learning_rate": 1.984113492035634e-05, "loss": 0.5562, "step": 5453 }, { "epoch": 0.1156709295667112, "grad_norm": 0.34759721159935, "learning_rate": 1.9841075705951814e-05, "loss": 0.5877, "step": 5454 }, { "epoch": 0.11569213802464423, "grad_norm": 0.3781633973121643, "learning_rate": 1.9841016480602126e-05, "loss": 0.61, "step": 5455 }, { "epoch": 0.11571334648257725, "grad_norm": 0.3448585569858551, "learning_rate": 1.9840957244307346e-05, "loss": 0.5049, "step": 5456 }, { "epoch": 0.11573455494051027, "grad_norm": 0.3202410936355591, "learning_rate": 1.9840897997067538e-05, "loss": 0.5657, "step": 5457 }, { "epoch": 0.1157557633984433, "grad_norm": 0.3776338994503021, "learning_rate": 1.984083873888277e-05, "loss": 0.496, "step": 5458 }, { "epoch": 0.11577697185637632, "grad_norm": 0.29369860887527466, "learning_rate": 1.984077946975311e-05, "loss": 0.4696, "step": 5459 }, { "epoch": 0.11579818031430934, "grad_norm": 0.3312641978263855, "learning_rate": 1.9840720189678615e-05, "loss": 0.5179, "step": 5460 }, { "epoch": 0.11581938877224238, "grad_norm": 0.3167448341846466, "learning_rate": 1.984066089865936e-05, "loss": 0.4913, "step": 5461 }, { "epoch": 0.1158405972301754, "grad_norm": 0.34517478942871094, "learning_rate": 1.9840601596695406e-05, "loss": 0.5062, "step": 5462 }, { "epoch": 0.11586180568810842, "grad_norm": 0.3848803639411926, "learning_rate": 1.9840542283786818e-05, "loss": 0.4857, "step": 5463 }, { "epoch": 0.11588301414604144, "grad_norm": 0.34652772545814514, "learning_rate": 1.984048295993367e-05, "loss": 0.5536, "step": 5464 }, { "epoch": 0.11590422260397447, "grad_norm": 0.3025050461292267, "learning_rate": 1.9840423625136018e-05, "loss": 0.5273, "step": 5465 }, { "epoch": 0.11592543106190749, "grad_norm": 0.3484133780002594, "learning_rate": 1.9840364279393935e-05, "loss": 0.613, "step": 5466 }, { "epoch": 0.11594663951984051, "grad_norm": 0.31828904151916504, "learning_rate": 1.9840304922707482e-05, "loss": 0.5701, "step": 5467 }, { "epoch": 0.11596784797777354, "grad_norm": 0.43032535910606384, "learning_rate": 1.9840245555076726e-05, "loss": 0.5802, "step": 5468 }, { "epoch": 0.11598905643570656, "grad_norm": 0.3255784213542938, "learning_rate": 1.984018617650174e-05, "loss": 0.5583, "step": 5469 }, { "epoch": 0.11601026489363958, "grad_norm": 0.2997155785560608, "learning_rate": 1.984012678698258e-05, "loss": 0.4357, "step": 5470 }, { "epoch": 0.1160314733515726, "grad_norm": 0.41672319173812866, "learning_rate": 1.9840067386519318e-05, "loss": 0.5634, "step": 5471 }, { "epoch": 0.11605268180950563, "grad_norm": 0.32571449875831604, "learning_rate": 1.9840007975112015e-05, "loss": 0.447, "step": 5472 }, { "epoch": 0.11607389026743865, "grad_norm": 0.47303247451782227, "learning_rate": 1.9839948552760746e-05, "loss": 0.4877, "step": 5473 }, { "epoch": 0.11609509872537167, "grad_norm": 0.3204825818538666, "learning_rate": 1.983988911946557e-05, "loss": 0.446, "step": 5474 }, { "epoch": 0.1161163071833047, "grad_norm": 0.32333675026893616, "learning_rate": 1.9839829675226553e-05, "loss": 0.5208, "step": 5475 }, { "epoch": 0.11613751564123773, "grad_norm": 0.31834694743156433, "learning_rate": 1.9839770220043765e-05, "loss": 0.6039, "step": 5476 }, { "epoch": 0.11615872409917075, "grad_norm": 0.40805912017822266, "learning_rate": 1.9839710753917267e-05, "loss": 0.5916, "step": 5477 }, { "epoch": 0.11617993255710378, "grad_norm": 0.32899460196495056, "learning_rate": 1.983965127684713e-05, "loss": 0.4906, "step": 5478 }, { "epoch": 0.1162011410150368, "grad_norm": 0.32281774282455444, "learning_rate": 1.983959178883342e-05, "loss": 0.5115, "step": 5479 }, { "epoch": 0.11622234947296982, "grad_norm": 0.3157528340816498, "learning_rate": 1.98395322898762e-05, "loss": 0.5187, "step": 5480 }, { "epoch": 0.11624355793090284, "grad_norm": 0.2809058427810669, "learning_rate": 1.9839472779975536e-05, "loss": 0.5, "step": 5481 }, { "epoch": 0.11626476638883587, "grad_norm": 0.431901752948761, "learning_rate": 1.9839413259131503e-05, "loss": 0.4647, "step": 5482 }, { "epoch": 0.11628597484676889, "grad_norm": 0.34737375378608704, "learning_rate": 1.9839353727344153e-05, "loss": 0.5077, "step": 5483 }, { "epoch": 0.11630718330470191, "grad_norm": 0.31454139947891235, "learning_rate": 1.9839294184613557e-05, "loss": 0.4671, "step": 5484 }, { "epoch": 0.11632839176263494, "grad_norm": 0.34916532039642334, "learning_rate": 1.983923463093979e-05, "loss": 0.5651, "step": 5485 }, { "epoch": 0.11634960022056796, "grad_norm": 0.32572346925735474, "learning_rate": 1.983917506632291e-05, "loss": 0.4775, "step": 5486 }, { "epoch": 0.11637080867850098, "grad_norm": 0.36370524764060974, "learning_rate": 1.9839115490762984e-05, "loss": 0.465, "step": 5487 }, { "epoch": 0.116392017136434, "grad_norm": 0.32440420985221863, "learning_rate": 1.9839055904260077e-05, "loss": 0.5735, "step": 5488 }, { "epoch": 0.11641322559436704, "grad_norm": 0.32725101709365845, "learning_rate": 1.983899630681426e-05, "loss": 0.6053, "step": 5489 }, { "epoch": 0.11643443405230006, "grad_norm": 0.3870764374732971, "learning_rate": 1.9838936698425597e-05, "loss": 0.4823, "step": 5490 }, { "epoch": 0.11645564251023308, "grad_norm": 0.3128495216369629, "learning_rate": 1.9838877079094156e-05, "loss": 0.4782, "step": 5491 }, { "epoch": 0.11647685096816611, "grad_norm": 0.344419926404953, "learning_rate": 1.9838817448819994e-05, "loss": 0.4322, "step": 5492 }, { "epoch": 0.11649805942609913, "grad_norm": 0.36516061425209045, "learning_rate": 1.9838757807603194e-05, "loss": 0.5631, "step": 5493 }, { "epoch": 0.11651926788403215, "grad_norm": 0.30276739597320557, "learning_rate": 1.983869815544381e-05, "loss": 0.5272, "step": 5494 }, { "epoch": 0.11654047634196518, "grad_norm": 0.3358875513076782, "learning_rate": 1.9838638492341906e-05, "loss": 0.5443, "step": 5495 }, { "epoch": 0.1165616847998982, "grad_norm": 0.3325512111186981, "learning_rate": 1.983857881829756e-05, "loss": 0.6389, "step": 5496 }, { "epoch": 0.11658289325783122, "grad_norm": 0.3240317404270172, "learning_rate": 1.983851913331083e-05, "loss": 0.463, "step": 5497 }, { "epoch": 0.11660410171576424, "grad_norm": 0.3217201232910156, "learning_rate": 1.9838459437381784e-05, "loss": 0.5593, "step": 5498 }, { "epoch": 0.11662531017369727, "grad_norm": 0.3214467465877533, "learning_rate": 1.983839973051049e-05, "loss": 0.5839, "step": 5499 }, { "epoch": 0.1166465186316303, "grad_norm": 0.3201436996459961, "learning_rate": 1.9838340012697013e-05, "loss": 0.5525, "step": 5500 }, { "epoch": 0.11666772708956331, "grad_norm": 0.31473103165626526, "learning_rate": 1.9838280283941417e-05, "loss": 0.5924, "step": 5501 }, { "epoch": 0.11668893554749635, "grad_norm": 0.2904552221298218, "learning_rate": 1.9838220544243775e-05, "loss": 0.4391, "step": 5502 }, { "epoch": 0.11671014400542937, "grad_norm": 0.36615851521492004, "learning_rate": 1.983816079360415e-05, "loss": 0.5169, "step": 5503 }, { "epoch": 0.11673135246336239, "grad_norm": 0.6464483141899109, "learning_rate": 1.9838101032022604e-05, "loss": 0.5516, "step": 5504 }, { "epoch": 0.1167525609212954, "grad_norm": 0.3211837112903595, "learning_rate": 1.983804125949921e-05, "loss": 0.5307, "step": 5505 }, { "epoch": 0.11677376937922844, "grad_norm": 0.3290458917617798, "learning_rate": 1.983798147603403e-05, "loss": 0.5618, "step": 5506 }, { "epoch": 0.11679497783716146, "grad_norm": 0.324641615152359, "learning_rate": 1.9837921681627134e-05, "loss": 0.5312, "step": 5507 }, { "epoch": 0.11681618629509448, "grad_norm": 0.5571150779724121, "learning_rate": 1.9837861876278588e-05, "loss": 0.5607, "step": 5508 }, { "epoch": 0.11683739475302751, "grad_norm": 0.5547189116477966, "learning_rate": 1.9837802059988456e-05, "loss": 0.5591, "step": 5509 }, { "epoch": 0.11685860321096053, "grad_norm": 0.3585481345653534, "learning_rate": 1.9837742232756807e-05, "loss": 0.5594, "step": 5510 }, { "epoch": 0.11687981166889355, "grad_norm": 0.41694214940071106, "learning_rate": 1.9837682394583707e-05, "loss": 0.5169, "step": 5511 }, { "epoch": 0.11690102012682659, "grad_norm": 0.3345957398414612, "learning_rate": 1.983762254546922e-05, "loss": 0.5185, "step": 5512 }, { "epoch": 0.1169222285847596, "grad_norm": 0.3394634425640106, "learning_rate": 1.9837562685413417e-05, "loss": 0.485, "step": 5513 }, { "epoch": 0.11694343704269262, "grad_norm": 0.46752503514289856, "learning_rate": 1.983750281441636e-05, "loss": 0.5095, "step": 5514 }, { "epoch": 0.11696464550062564, "grad_norm": 0.3331697881221771, "learning_rate": 1.983744293247812e-05, "loss": 0.4821, "step": 5515 }, { "epoch": 0.11698585395855868, "grad_norm": 0.33108827471733093, "learning_rate": 1.9837383039598757e-05, "loss": 0.5512, "step": 5516 }, { "epoch": 0.1170070624164917, "grad_norm": 0.33403897285461426, "learning_rate": 1.9837323135778348e-05, "loss": 0.5533, "step": 5517 }, { "epoch": 0.11702827087442472, "grad_norm": 0.3451194167137146, "learning_rate": 1.983726322101695e-05, "loss": 0.5318, "step": 5518 }, { "epoch": 0.11704947933235775, "grad_norm": 0.32083049416542053, "learning_rate": 1.9837203295314634e-05, "loss": 0.5797, "step": 5519 }, { "epoch": 0.11707068779029077, "grad_norm": 0.32601264119148254, "learning_rate": 1.9837143358671468e-05, "loss": 0.5348, "step": 5520 }, { "epoch": 0.11709189624822379, "grad_norm": 0.3671376705169678, "learning_rate": 1.9837083411087516e-05, "loss": 0.6035, "step": 5521 }, { "epoch": 0.11711310470615681, "grad_norm": 0.31204310059547424, "learning_rate": 1.9837023452562842e-05, "loss": 0.5371, "step": 5522 }, { "epoch": 0.11713431316408984, "grad_norm": 0.3593313992023468, "learning_rate": 1.983696348309752e-05, "loss": 0.5641, "step": 5523 }, { "epoch": 0.11715552162202286, "grad_norm": 0.324308305978775, "learning_rate": 1.9836903502691613e-05, "loss": 0.5105, "step": 5524 }, { "epoch": 0.11717673007995588, "grad_norm": 0.3294530212879181, "learning_rate": 1.983684351134518e-05, "loss": 0.5016, "step": 5525 }, { "epoch": 0.11719793853788892, "grad_norm": 0.3460775911808014, "learning_rate": 1.98367835090583e-05, "loss": 0.6167, "step": 5526 }, { "epoch": 0.11721914699582194, "grad_norm": 0.34756705164909363, "learning_rate": 1.983672349583104e-05, "loss": 0.4762, "step": 5527 }, { "epoch": 0.11724035545375495, "grad_norm": 0.32833921909332275, "learning_rate": 1.9836663471663454e-05, "loss": 0.5012, "step": 5528 }, { "epoch": 0.11726156391168797, "grad_norm": 0.3209921419620514, "learning_rate": 1.9836603436555618e-05, "loss": 0.5117, "step": 5529 }, { "epoch": 0.11728277236962101, "grad_norm": 0.27364441752433777, "learning_rate": 1.98365433905076e-05, "loss": 0.4605, "step": 5530 }, { "epoch": 0.11730398082755403, "grad_norm": 0.3593696355819702, "learning_rate": 1.983648333351946e-05, "loss": 0.5473, "step": 5531 }, { "epoch": 0.11732518928548705, "grad_norm": 0.33627355098724365, "learning_rate": 1.9836423265591272e-05, "loss": 0.5032, "step": 5532 }, { "epoch": 0.11734639774342008, "grad_norm": 0.35535144805908203, "learning_rate": 1.9836363186723098e-05, "loss": 0.5302, "step": 5533 }, { "epoch": 0.1173676062013531, "grad_norm": 0.3269789516925812, "learning_rate": 1.983630309691501e-05, "loss": 0.6071, "step": 5534 }, { "epoch": 0.11738881465928612, "grad_norm": 0.35750511288642883, "learning_rate": 1.9836242996167067e-05, "loss": 0.5353, "step": 5535 }, { "epoch": 0.11741002311721915, "grad_norm": 0.3812682330608368, "learning_rate": 1.9836182884479338e-05, "loss": 0.5539, "step": 5536 }, { "epoch": 0.11743123157515217, "grad_norm": 0.39038392901420593, "learning_rate": 1.9836122761851894e-05, "loss": 0.5703, "step": 5537 }, { "epoch": 0.11745244003308519, "grad_norm": 0.32002565264701843, "learning_rate": 1.98360626282848e-05, "loss": 0.4974, "step": 5538 }, { "epoch": 0.11747364849101821, "grad_norm": 0.32523810863494873, "learning_rate": 1.9836002483778124e-05, "loss": 0.5122, "step": 5539 }, { "epoch": 0.11749485694895125, "grad_norm": 0.3359552025794983, "learning_rate": 1.9835942328331928e-05, "loss": 0.5178, "step": 5540 }, { "epoch": 0.11751606540688427, "grad_norm": 0.32514265179634094, "learning_rate": 1.9835882161946288e-05, "loss": 0.5652, "step": 5541 }, { "epoch": 0.11753727386481728, "grad_norm": 0.33036619424819946, "learning_rate": 1.983582198462126e-05, "loss": 0.5218, "step": 5542 }, { "epoch": 0.11755848232275032, "grad_norm": 0.3500824570655823, "learning_rate": 1.9835761796356918e-05, "loss": 0.4496, "step": 5543 }, { "epoch": 0.11757969078068334, "grad_norm": 0.390601247549057, "learning_rate": 1.983570159715333e-05, "loss": 0.5873, "step": 5544 }, { "epoch": 0.11760089923861636, "grad_norm": 0.32308027148246765, "learning_rate": 1.9835641387010555e-05, "loss": 0.5214, "step": 5545 }, { "epoch": 0.11762210769654938, "grad_norm": 0.3819955885410309, "learning_rate": 1.9835581165928667e-05, "loss": 0.512, "step": 5546 }, { "epoch": 0.11764331615448241, "grad_norm": 0.3436245918273926, "learning_rate": 1.983552093390773e-05, "loss": 0.5006, "step": 5547 }, { "epoch": 0.11766452461241543, "grad_norm": 0.3477643132209778, "learning_rate": 1.9835460690947815e-05, "loss": 0.5397, "step": 5548 }, { "epoch": 0.11768573307034845, "grad_norm": 0.3184507489204407, "learning_rate": 1.9835400437048987e-05, "loss": 0.4889, "step": 5549 }, { "epoch": 0.11770694152828148, "grad_norm": 0.33591964840888977, "learning_rate": 1.983534017221131e-05, "loss": 0.5157, "step": 5550 }, { "epoch": 0.1177281499862145, "grad_norm": 0.7444807887077332, "learning_rate": 1.9835279896434852e-05, "loss": 0.5377, "step": 5551 }, { "epoch": 0.11774935844414752, "grad_norm": 0.3187522888183594, "learning_rate": 1.9835219609719684e-05, "loss": 0.5348, "step": 5552 }, { "epoch": 0.11777056690208056, "grad_norm": 0.34455230832099915, "learning_rate": 1.9835159312065868e-05, "loss": 0.5262, "step": 5553 }, { "epoch": 0.11779177536001358, "grad_norm": 0.3417348861694336, "learning_rate": 1.9835099003473472e-05, "loss": 0.5134, "step": 5554 }, { "epoch": 0.1178129838179466, "grad_norm": 0.33741384744644165, "learning_rate": 1.9835038683942567e-05, "loss": 0.5489, "step": 5555 }, { "epoch": 0.11783419227587962, "grad_norm": 0.316557914018631, "learning_rate": 1.9834978353473214e-05, "loss": 0.5109, "step": 5556 }, { "epoch": 0.11785540073381265, "grad_norm": 0.3107370138168335, "learning_rate": 1.9834918012065488e-05, "loss": 0.5531, "step": 5557 }, { "epoch": 0.11787660919174567, "grad_norm": 0.3415735065937042, "learning_rate": 1.983485765971945e-05, "loss": 0.5279, "step": 5558 }, { "epoch": 0.11789781764967869, "grad_norm": 0.330604612827301, "learning_rate": 1.9834797296435165e-05, "loss": 0.607, "step": 5559 }, { "epoch": 0.11791902610761172, "grad_norm": 0.3078664243221283, "learning_rate": 1.983473692221271e-05, "loss": 0.4907, "step": 5560 }, { "epoch": 0.11794023456554474, "grad_norm": 0.3221736252307892, "learning_rate": 1.9834676537052142e-05, "loss": 0.535, "step": 5561 }, { "epoch": 0.11796144302347776, "grad_norm": 0.44262221455574036, "learning_rate": 1.983461614095353e-05, "loss": 0.5021, "step": 5562 }, { "epoch": 0.11798265148141078, "grad_norm": 0.3211243152618408, "learning_rate": 1.983455573391695e-05, "loss": 0.4466, "step": 5563 }, { "epoch": 0.11800385993934381, "grad_norm": 0.3771063983440399, "learning_rate": 1.983449531594246e-05, "loss": 0.5709, "step": 5564 }, { "epoch": 0.11802506839727683, "grad_norm": 0.3181055188179016, "learning_rate": 1.983443488703013e-05, "loss": 0.4806, "step": 5565 }, { "epoch": 0.11804627685520985, "grad_norm": 0.3217218816280365, "learning_rate": 1.9834374447180026e-05, "loss": 0.5077, "step": 5566 }, { "epoch": 0.11806748531314289, "grad_norm": 0.33502131700515747, "learning_rate": 1.9834313996392215e-05, "loss": 0.5396, "step": 5567 }, { "epoch": 0.1180886937710759, "grad_norm": 0.3242606818675995, "learning_rate": 1.9834253534666765e-05, "loss": 0.4535, "step": 5568 }, { "epoch": 0.11810990222900893, "grad_norm": 0.3765430450439453, "learning_rate": 1.9834193062003745e-05, "loss": 0.5498, "step": 5569 }, { "epoch": 0.11813111068694196, "grad_norm": 0.3659304976463318, "learning_rate": 1.9834132578403222e-05, "loss": 0.5813, "step": 5570 }, { "epoch": 0.11815231914487498, "grad_norm": 0.31359103322029114, "learning_rate": 1.983407208386526e-05, "loss": 0.5471, "step": 5571 }, { "epoch": 0.118173527602808, "grad_norm": 0.3400658965110779, "learning_rate": 1.983401157838993e-05, "loss": 0.4798, "step": 5572 }, { "epoch": 0.11819473606074102, "grad_norm": 0.34527379274368286, "learning_rate": 1.9833951061977295e-05, "loss": 0.5688, "step": 5573 }, { "epoch": 0.11821594451867405, "grad_norm": 0.34111782908439636, "learning_rate": 1.9833890534627433e-05, "loss": 0.6029, "step": 5574 }, { "epoch": 0.11823715297660707, "grad_norm": 0.4634585976600647, "learning_rate": 1.9833829996340396e-05, "loss": 0.6196, "step": 5575 }, { "epoch": 0.11825836143454009, "grad_norm": 0.2983562648296356, "learning_rate": 1.983376944711626e-05, "loss": 0.4959, "step": 5576 }, { "epoch": 0.11827956989247312, "grad_norm": 0.3803950548171997, "learning_rate": 1.9833708886955092e-05, "loss": 0.4873, "step": 5577 }, { "epoch": 0.11830077835040614, "grad_norm": 0.7105876207351685, "learning_rate": 1.9833648315856957e-05, "loss": 0.5112, "step": 5578 }, { "epoch": 0.11832198680833916, "grad_norm": 0.3226695954799652, "learning_rate": 1.9833587733821926e-05, "loss": 0.4708, "step": 5579 }, { "epoch": 0.11834319526627218, "grad_norm": 0.33484408259391785, "learning_rate": 1.9833527140850062e-05, "loss": 0.6005, "step": 5580 }, { "epoch": 0.11836440372420522, "grad_norm": 0.2985613942146301, "learning_rate": 1.9833466536941435e-05, "loss": 0.4902, "step": 5581 }, { "epoch": 0.11838561218213824, "grad_norm": 0.328027606010437, "learning_rate": 1.9833405922096115e-05, "loss": 0.5857, "step": 5582 }, { "epoch": 0.11840682064007126, "grad_norm": 0.33208975195884705, "learning_rate": 1.9833345296314163e-05, "loss": 0.5273, "step": 5583 }, { "epoch": 0.11842802909800429, "grad_norm": 0.31622031331062317, "learning_rate": 1.983328465959565e-05, "loss": 0.4695, "step": 5584 }, { "epoch": 0.11844923755593731, "grad_norm": 0.34123995900154114, "learning_rate": 1.983322401194064e-05, "loss": 0.6076, "step": 5585 }, { "epoch": 0.11847044601387033, "grad_norm": 0.3319070339202881, "learning_rate": 1.983316335334921e-05, "loss": 0.5576, "step": 5586 }, { "epoch": 0.11849165447180336, "grad_norm": 0.358316034078598, "learning_rate": 1.9833102683821418e-05, "loss": 0.429, "step": 5587 }, { "epoch": 0.11851286292973638, "grad_norm": 0.33554181456565857, "learning_rate": 1.9833042003357337e-05, "loss": 0.5443, "step": 5588 }, { "epoch": 0.1185340713876694, "grad_norm": 0.331455260515213, "learning_rate": 1.983298131195703e-05, "loss": 0.533, "step": 5589 }, { "epoch": 0.11855527984560242, "grad_norm": 0.3060349225997925, "learning_rate": 1.9832920609620565e-05, "loss": 0.3767, "step": 5590 }, { "epoch": 0.11857648830353545, "grad_norm": 0.31774720549583435, "learning_rate": 1.9832859896348014e-05, "loss": 0.4729, "step": 5591 }, { "epoch": 0.11859769676146847, "grad_norm": 0.3787643313407898, "learning_rate": 1.983279917213944e-05, "loss": 0.5355, "step": 5592 }, { "epoch": 0.1186189052194015, "grad_norm": 0.3381052017211914, "learning_rate": 1.9832738436994914e-05, "loss": 0.602, "step": 5593 }, { "epoch": 0.11864011367733453, "grad_norm": 0.3490326702594757, "learning_rate": 1.98326776909145e-05, "loss": 0.5369, "step": 5594 }, { "epoch": 0.11866132213526755, "grad_norm": 0.3375735580921173, "learning_rate": 1.9832616933898267e-05, "loss": 0.4978, "step": 5595 }, { "epoch": 0.11868253059320057, "grad_norm": 0.2990897595882416, "learning_rate": 1.9832556165946287e-05, "loss": 0.5126, "step": 5596 }, { "epoch": 0.11870373905113359, "grad_norm": 0.37261155247688293, "learning_rate": 1.9832495387058617e-05, "loss": 0.6211, "step": 5597 }, { "epoch": 0.11872494750906662, "grad_norm": 0.3176575005054474, "learning_rate": 1.9832434597235335e-05, "loss": 0.5301, "step": 5598 }, { "epoch": 0.11874615596699964, "grad_norm": 0.4632589817047119, "learning_rate": 1.98323737964765e-05, "loss": 0.6355, "step": 5599 }, { "epoch": 0.11876736442493266, "grad_norm": 0.32513877749443054, "learning_rate": 1.983231298478219e-05, "loss": 0.4698, "step": 5600 }, { "epoch": 0.11878857288286569, "grad_norm": 0.35772809386253357, "learning_rate": 1.983225216215247e-05, "loss": 0.6029, "step": 5601 }, { "epoch": 0.11880978134079871, "grad_norm": 0.3411058187484741, "learning_rate": 1.98321913285874e-05, "loss": 0.5857, "step": 5602 }, { "epoch": 0.11883098979873173, "grad_norm": 0.3683134615421295, "learning_rate": 1.983213048408705e-05, "loss": 0.4861, "step": 5603 }, { "epoch": 0.11885219825666475, "grad_norm": 0.34650948643684387, "learning_rate": 1.983206962865149e-05, "loss": 0.6427, "step": 5604 }, { "epoch": 0.11887340671459778, "grad_norm": 0.39167240262031555, "learning_rate": 1.983200876228079e-05, "loss": 0.5562, "step": 5605 }, { "epoch": 0.1188946151725308, "grad_norm": 0.31546545028686523, "learning_rate": 1.9831947884975013e-05, "loss": 0.4926, "step": 5606 }, { "epoch": 0.11891582363046382, "grad_norm": 0.2847128212451935, "learning_rate": 1.983188699673423e-05, "loss": 0.4596, "step": 5607 }, { "epoch": 0.11893703208839686, "grad_norm": 0.3594275414943695, "learning_rate": 1.983182609755851e-05, "loss": 0.5767, "step": 5608 }, { "epoch": 0.11895824054632988, "grad_norm": 0.40966635942459106, "learning_rate": 1.9831765187447917e-05, "loss": 0.5458, "step": 5609 }, { "epoch": 0.1189794490042629, "grad_norm": 0.31389304995536804, "learning_rate": 1.983170426640252e-05, "loss": 0.432, "step": 5610 }, { "epoch": 0.11900065746219593, "grad_norm": 0.726841926574707, "learning_rate": 1.9831643334422386e-05, "loss": 0.5453, "step": 5611 }, { "epoch": 0.11902186592012895, "grad_norm": 0.39074909687042236, "learning_rate": 1.9831582391507586e-05, "loss": 0.538, "step": 5612 }, { "epoch": 0.11904307437806197, "grad_norm": 0.36282971501350403, "learning_rate": 1.9831521437658182e-05, "loss": 0.5966, "step": 5613 }, { "epoch": 0.11906428283599499, "grad_norm": 0.31864047050476074, "learning_rate": 1.9831460472874248e-05, "loss": 0.6026, "step": 5614 }, { "epoch": 0.11908549129392802, "grad_norm": 0.3280498683452606, "learning_rate": 1.983139949715585e-05, "loss": 0.479, "step": 5615 }, { "epoch": 0.11910669975186104, "grad_norm": 0.3501769006252289, "learning_rate": 1.9831338510503052e-05, "loss": 0.5756, "step": 5616 }, { "epoch": 0.11912790820979406, "grad_norm": 0.3694387972354889, "learning_rate": 1.9831277512915924e-05, "loss": 0.5695, "step": 5617 }, { "epoch": 0.1191491166677271, "grad_norm": 0.4106445908546448, "learning_rate": 1.983121650439454e-05, "loss": 0.5273, "step": 5618 }, { "epoch": 0.11917032512566011, "grad_norm": 0.3011186420917511, "learning_rate": 1.9831155484938957e-05, "loss": 0.523, "step": 5619 }, { "epoch": 0.11919153358359313, "grad_norm": 0.33091026544570923, "learning_rate": 1.9831094454549248e-05, "loss": 0.5329, "step": 5620 }, { "epoch": 0.11921274204152615, "grad_norm": 0.338993102312088, "learning_rate": 1.9831033413225485e-05, "loss": 0.5137, "step": 5621 }, { "epoch": 0.11923395049945919, "grad_norm": 0.3442126512527466, "learning_rate": 1.983097236096773e-05, "loss": 0.5634, "step": 5622 }, { "epoch": 0.11925515895739221, "grad_norm": 0.2966088652610779, "learning_rate": 1.9830911297776052e-05, "loss": 0.4727, "step": 5623 }, { "epoch": 0.11927636741532523, "grad_norm": 0.49221277236938477, "learning_rate": 1.9830850223650522e-05, "loss": 0.4613, "step": 5624 }, { "epoch": 0.11929757587325826, "grad_norm": 0.35501036047935486, "learning_rate": 1.9830789138591205e-05, "loss": 0.5372, "step": 5625 }, { "epoch": 0.11931878433119128, "grad_norm": 0.3339281678199768, "learning_rate": 1.983072804259817e-05, "loss": 0.561, "step": 5626 }, { "epoch": 0.1193399927891243, "grad_norm": 0.3969675898551941, "learning_rate": 1.9830666935671483e-05, "loss": 0.4468, "step": 5627 }, { "epoch": 0.11936120124705733, "grad_norm": 0.30108100175857544, "learning_rate": 1.983060581781122e-05, "loss": 0.4313, "step": 5628 }, { "epoch": 0.11938240970499035, "grad_norm": 0.3134165406227112, "learning_rate": 1.9830544689017436e-05, "loss": 0.4858, "step": 5629 }, { "epoch": 0.11940361816292337, "grad_norm": 0.45165133476257324, "learning_rate": 1.9830483549290203e-05, "loss": 0.5556, "step": 5630 }, { "epoch": 0.11942482662085639, "grad_norm": 0.39250072836875916, "learning_rate": 1.9830422398629598e-05, "loss": 0.5244, "step": 5631 }, { "epoch": 0.11944603507878943, "grad_norm": 0.3245507776737213, "learning_rate": 1.9830361237035678e-05, "loss": 0.4986, "step": 5632 }, { "epoch": 0.11946724353672245, "grad_norm": 0.33239197731018066, "learning_rate": 1.9830300064508517e-05, "loss": 0.5815, "step": 5633 }, { "epoch": 0.11948845199465546, "grad_norm": 0.3434460759162903, "learning_rate": 1.9830238881048184e-05, "loss": 0.5455, "step": 5634 }, { "epoch": 0.1195096604525885, "grad_norm": 0.3184487521648407, "learning_rate": 1.9830177686654742e-05, "loss": 0.521, "step": 5635 }, { "epoch": 0.11953086891052152, "grad_norm": 0.3372950851917267, "learning_rate": 1.9830116481328267e-05, "loss": 0.4821, "step": 5636 }, { "epoch": 0.11955207736845454, "grad_norm": 0.34120768308639526, "learning_rate": 1.983005526506882e-05, "loss": 0.4681, "step": 5637 }, { "epoch": 0.11957328582638756, "grad_norm": 0.30535879731178284, "learning_rate": 1.9829994037876466e-05, "loss": 0.4881, "step": 5638 }, { "epoch": 0.11959449428432059, "grad_norm": 0.3630048334598541, "learning_rate": 1.982993279975128e-05, "loss": 0.5896, "step": 5639 }, { "epoch": 0.11961570274225361, "grad_norm": 0.35234367847442627, "learning_rate": 1.9829871550693326e-05, "loss": 0.5662, "step": 5640 }, { "epoch": 0.11963691120018663, "grad_norm": 0.34782683849334717, "learning_rate": 1.9829810290702682e-05, "loss": 0.5657, "step": 5641 }, { "epoch": 0.11965811965811966, "grad_norm": 0.32826822996139526, "learning_rate": 1.98297490197794e-05, "loss": 0.4366, "step": 5642 }, { "epoch": 0.11967932811605268, "grad_norm": 0.3720936179161072, "learning_rate": 1.9829687737923562e-05, "loss": 0.485, "step": 5643 }, { "epoch": 0.1197005365739857, "grad_norm": 0.3375582993030548, "learning_rate": 1.982962644513523e-05, "loss": 0.5571, "step": 5644 }, { "epoch": 0.11972174503191874, "grad_norm": 0.3690543472766876, "learning_rate": 1.982956514141447e-05, "loss": 0.5092, "step": 5645 }, { "epoch": 0.11974295348985176, "grad_norm": 0.3071865737438202, "learning_rate": 1.9829503826761357e-05, "loss": 0.4557, "step": 5646 }, { "epoch": 0.11976416194778478, "grad_norm": 0.3340297341346741, "learning_rate": 1.982944250117595e-05, "loss": 0.5718, "step": 5647 }, { "epoch": 0.1197853704057178, "grad_norm": 0.31485486030578613, "learning_rate": 1.9829381164658324e-05, "loss": 0.4849, "step": 5648 }, { "epoch": 0.11980657886365083, "grad_norm": 0.3410162925720215, "learning_rate": 1.982931981720855e-05, "loss": 0.5284, "step": 5649 }, { "epoch": 0.11982778732158385, "grad_norm": 0.40703824162483215, "learning_rate": 1.9829258458826687e-05, "loss": 0.5213, "step": 5650 }, { "epoch": 0.11984899577951687, "grad_norm": 0.32695555686950684, "learning_rate": 1.9829197089512814e-05, "loss": 0.5235, "step": 5651 }, { "epoch": 0.1198702042374499, "grad_norm": 0.32337772846221924, "learning_rate": 1.9829135709266988e-05, "loss": 0.6007, "step": 5652 }, { "epoch": 0.11989141269538292, "grad_norm": 0.2876361012458801, "learning_rate": 1.9829074318089283e-05, "loss": 0.4578, "step": 5653 }, { "epoch": 0.11991262115331594, "grad_norm": 0.3782494068145752, "learning_rate": 1.9829012915979768e-05, "loss": 0.5868, "step": 5654 }, { "epoch": 0.11993382961124896, "grad_norm": 0.35394805669784546, "learning_rate": 1.982895150293851e-05, "loss": 0.5623, "step": 5655 }, { "epoch": 0.119955038069182, "grad_norm": 0.3443884551525116, "learning_rate": 1.982889007896558e-05, "loss": 0.5626, "step": 5656 }, { "epoch": 0.11997624652711501, "grad_norm": 0.6104008555412292, "learning_rate": 1.982882864406104e-05, "loss": 0.5461, "step": 5657 }, { "epoch": 0.11999745498504803, "grad_norm": 0.5004672408103943, "learning_rate": 1.9828767198224966e-05, "loss": 0.5286, "step": 5658 }, { "epoch": 0.12001866344298107, "grad_norm": 0.3761432468891144, "learning_rate": 1.9828705741457418e-05, "loss": 0.5274, "step": 5659 }, { "epoch": 0.12003987190091409, "grad_norm": 0.37028276920318604, "learning_rate": 1.982864427375847e-05, "loss": 0.6525, "step": 5660 }, { "epoch": 0.1200610803588471, "grad_norm": 0.36124587059020996, "learning_rate": 1.982858279512819e-05, "loss": 0.4986, "step": 5661 }, { "epoch": 0.12008228881678013, "grad_norm": 0.35341009497642517, "learning_rate": 1.9828521305566647e-05, "loss": 0.5347, "step": 5662 }, { "epoch": 0.12010349727471316, "grad_norm": 0.31192412972450256, "learning_rate": 1.9828459805073906e-05, "loss": 0.5198, "step": 5663 }, { "epoch": 0.12012470573264618, "grad_norm": 0.32072147727012634, "learning_rate": 1.982839829365004e-05, "loss": 0.5854, "step": 5664 }, { "epoch": 0.1201459141905792, "grad_norm": 0.3285421133041382, "learning_rate": 1.982833677129511e-05, "loss": 0.5774, "step": 5665 }, { "epoch": 0.12016712264851223, "grad_norm": 0.31005579233169556, "learning_rate": 1.9828275238009197e-05, "loss": 0.5134, "step": 5666 }, { "epoch": 0.12018833110644525, "grad_norm": 0.45928290486335754, "learning_rate": 1.9828213693792354e-05, "loss": 0.6105, "step": 5667 }, { "epoch": 0.12020953956437827, "grad_norm": 0.3325490951538086, "learning_rate": 1.9828152138644663e-05, "loss": 0.5346, "step": 5668 }, { "epoch": 0.1202307480223113, "grad_norm": 0.3961971402168274, "learning_rate": 1.9828090572566185e-05, "loss": 0.5818, "step": 5669 }, { "epoch": 0.12025195648024432, "grad_norm": 0.32284387946128845, "learning_rate": 1.9828028995556983e-05, "loss": 0.5416, "step": 5670 }, { "epoch": 0.12027316493817734, "grad_norm": 0.3229600489139557, "learning_rate": 1.982796740761714e-05, "loss": 0.5389, "step": 5671 }, { "epoch": 0.12029437339611036, "grad_norm": 0.35070499777793884, "learning_rate": 1.9827905808746717e-05, "loss": 0.5631, "step": 5672 }, { "epoch": 0.1203155818540434, "grad_norm": 0.38202178478240967, "learning_rate": 1.982784419894578e-05, "loss": 0.553, "step": 5673 }, { "epoch": 0.12033679031197642, "grad_norm": 0.8146793842315674, "learning_rate": 1.98277825782144e-05, "loss": 0.5175, "step": 5674 }, { "epoch": 0.12035799876990944, "grad_norm": 0.4065069854259491, "learning_rate": 1.982772094655265e-05, "loss": 0.5457, "step": 5675 }, { "epoch": 0.12037920722784247, "grad_norm": 0.39971762895584106, "learning_rate": 1.9827659303960586e-05, "loss": 0.5211, "step": 5676 }, { "epoch": 0.12040041568577549, "grad_norm": 0.36604875326156616, "learning_rate": 1.982759765043829e-05, "loss": 0.6046, "step": 5677 }, { "epoch": 0.12042162414370851, "grad_norm": 0.3863549530506134, "learning_rate": 1.9827535985985828e-05, "loss": 0.5104, "step": 5678 }, { "epoch": 0.12044283260164153, "grad_norm": 0.3233189582824707, "learning_rate": 1.982747431060326e-05, "loss": 0.5125, "step": 5679 }, { "epoch": 0.12046404105957456, "grad_norm": 0.36286574602127075, "learning_rate": 1.9827412624290664e-05, "loss": 0.5017, "step": 5680 }, { "epoch": 0.12048524951750758, "grad_norm": 0.3130350708961487, "learning_rate": 1.9827350927048103e-05, "loss": 0.5146, "step": 5681 }, { "epoch": 0.1205064579754406, "grad_norm": 0.3500562012195587, "learning_rate": 1.982728921887565e-05, "loss": 0.5624, "step": 5682 }, { "epoch": 0.12052766643337363, "grad_norm": 0.35081976652145386, "learning_rate": 1.982722749977337e-05, "loss": 0.47, "step": 5683 }, { "epoch": 0.12054887489130665, "grad_norm": 0.34127455949783325, "learning_rate": 1.9827165769741333e-05, "loss": 0.5405, "step": 5684 }, { "epoch": 0.12057008334923967, "grad_norm": 0.3367837071418762, "learning_rate": 1.9827104028779608e-05, "loss": 0.4661, "step": 5685 }, { "epoch": 0.1205912918071727, "grad_norm": 0.49708986282348633, "learning_rate": 1.9827042276888264e-05, "loss": 0.5127, "step": 5686 }, { "epoch": 0.12061250026510573, "grad_norm": 0.35586902499198914, "learning_rate": 1.9826980514067366e-05, "loss": 0.4603, "step": 5687 }, { "epoch": 0.12063370872303875, "grad_norm": 0.3603423535823822, "learning_rate": 1.9826918740316988e-05, "loss": 0.5512, "step": 5688 }, { "epoch": 0.12065491718097177, "grad_norm": 0.4344133138656616, "learning_rate": 1.9826856955637192e-05, "loss": 0.4432, "step": 5689 }, { "epoch": 0.1206761256389048, "grad_norm": 0.32326069474220276, "learning_rate": 1.9826795160028055e-05, "loss": 0.5068, "step": 5690 }, { "epoch": 0.12069733409683782, "grad_norm": 0.42599257826805115, "learning_rate": 1.9826733353489646e-05, "loss": 0.495, "step": 5691 }, { "epoch": 0.12071854255477084, "grad_norm": 0.3273950517177582, "learning_rate": 1.982667153602202e-05, "loss": 0.5153, "step": 5692 }, { "epoch": 0.12073975101270387, "grad_norm": 0.36077365279197693, "learning_rate": 1.9826609707625262e-05, "loss": 0.5922, "step": 5693 }, { "epoch": 0.12076095947063689, "grad_norm": 0.41714176535606384, "learning_rate": 1.9826547868299433e-05, "loss": 0.6819, "step": 5694 }, { "epoch": 0.12078216792856991, "grad_norm": 0.28705787658691406, "learning_rate": 1.9826486018044603e-05, "loss": 0.4781, "step": 5695 }, { "epoch": 0.12080337638650293, "grad_norm": 0.2932156026363373, "learning_rate": 1.9826424156860838e-05, "loss": 0.4746, "step": 5696 }, { "epoch": 0.12082458484443596, "grad_norm": 0.4541601538658142, "learning_rate": 1.9826362284748214e-05, "loss": 0.5211, "step": 5697 }, { "epoch": 0.12084579330236898, "grad_norm": 0.3324410915374756, "learning_rate": 1.9826300401706792e-05, "loss": 0.5255, "step": 5698 }, { "epoch": 0.120867001760302, "grad_norm": 0.3445071280002594, "learning_rate": 1.9826238507736645e-05, "loss": 0.6302, "step": 5699 }, { "epoch": 0.12088821021823504, "grad_norm": 0.2967158854007721, "learning_rate": 1.982617660283784e-05, "loss": 0.5214, "step": 5700 }, { "epoch": 0.12090941867616806, "grad_norm": 0.2945900857448578, "learning_rate": 1.9826114687010448e-05, "loss": 0.4627, "step": 5701 }, { "epoch": 0.12093062713410108, "grad_norm": 0.33335080742836, "learning_rate": 1.9826052760254532e-05, "loss": 0.4983, "step": 5702 }, { "epoch": 0.12095183559203411, "grad_norm": 0.4785946011543274, "learning_rate": 1.982599082257017e-05, "loss": 0.5534, "step": 5703 }, { "epoch": 0.12097304404996713, "grad_norm": 0.31929126381874084, "learning_rate": 1.9825928873957427e-05, "loss": 0.5907, "step": 5704 }, { "epoch": 0.12099425250790015, "grad_norm": 0.323211133480072, "learning_rate": 1.982586691441637e-05, "loss": 0.4842, "step": 5705 }, { "epoch": 0.12101546096583317, "grad_norm": 0.3295627236366272, "learning_rate": 1.9825804943947066e-05, "loss": 0.491, "step": 5706 }, { "epoch": 0.1210366694237662, "grad_norm": 0.29201894998550415, "learning_rate": 1.9825742962549593e-05, "loss": 0.52, "step": 5707 }, { "epoch": 0.12105787788169922, "grad_norm": 0.35688087344169617, "learning_rate": 1.982568097022401e-05, "loss": 0.4484, "step": 5708 }, { "epoch": 0.12107908633963224, "grad_norm": 0.32266852259635925, "learning_rate": 1.982561896697039e-05, "loss": 0.5252, "step": 5709 }, { "epoch": 0.12110029479756527, "grad_norm": 0.3327677249908447, "learning_rate": 1.9825556952788804e-05, "loss": 0.5555, "step": 5710 }, { "epoch": 0.1211215032554983, "grad_norm": 0.3178946375846863, "learning_rate": 1.982549492767932e-05, "loss": 0.6429, "step": 5711 }, { "epoch": 0.12114271171343131, "grad_norm": 0.33694565296173096, "learning_rate": 1.9825432891642e-05, "loss": 0.5506, "step": 5712 }, { "epoch": 0.12116392017136433, "grad_norm": 0.2982384264469147, "learning_rate": 1.9825370844676925e-05, "loss": 0.4821, "step": 5713 }, { "epoch": 0.12118512862929737, "grad_norm": 0.3364729583263397, "learning_rate": 1.9825308786784158e-05, "loss": 0.5249, "step": 5714 }, { "epoch": 0.12120633708723039, "grad_norm": 0.3353915214538574, "learning_rate": 1.982524671796376e-05, "loss": 0.5625, "step": 5715 }, { "epoch": 0.1212275455451634, "grad_norm": 0.349630743265152, "learning_rate": 1.9825184638215816e-05, "loss": 0.5493, "step": 5716 }, { "epoch": 0.12124875400309644, "grad_norm": 0.3192254602909088, "learning_rate": 1.9825122547540384e-05, "loss": 0.6404, "step": 5717 }, { "epoch": 0.12126996246102946, "grad_norm": 0.30325108766555786, "learning_rate": 1.982506044593754e-05, "loss": 0.4291, "step": 5718 }, { "epoch": 0.12129117091896248, "grad_norm": 0.32197460532188416, "learning_rate": 1.9824998333407342e-05, "loss": 0.5699, "step": 5719 }, { "epoch": 0.12131237937689551, "grad_norm": 0.3430648744106293, "learning_rate": 1.982493620994987e-05, "loss": 0.5629, "step": 5720 }, { "epoch": 0.12133358783482853, "grad_norm": 0.3589760959148407, "learning_rate": 1.982487407556519e-05, "loss": 0.4782, "step": 5721 }, { "epoch": 0.12135479629276155, "grad_norm": 0.31633421778678894, "learning_rate": 1.9824811930253372e-05, "loss": 0.487, "step": 5722 }, { "epoch": 0.12137600475069457, "grad_norm": 0.3272970914840698, "learning_rate": 1.982474977401448e-05, "loss": 0.5262, "step": 5723 }, { "epoch": 0.1213972132086276, "grad_norm": 0.35430094599723816, "learning_rate": 1.9824687606848588e-05, "loss": 0.5718, "step": 5724 }, { "epoch": 0.12141842166656062, "grad_norm": 0.36160707473754883, "learning_rate": 1.982462542875576e-05, "loss": 0.5068, "step": 5725 }, { "epoch": 0.12143963012449364, "grad_norm": 0.3502708673477173, "learning_rate": 1.9824563239736076e-05, "loss": 0.5292, "step": 5726 }, { "epoch": 0.12146083858242668, "grad_norm": 0.3610079884529114, "learning_rate": 1.9824501039789595e-05, "loss": 0.5989, "step": 5727 }, { "epoch": 0.1214820470403597, "grad_norm": 0.2993060350418091, "learning_rate": 1.9824438828916386e-05, "loss": 0.5322, "step": 5728 }, { "epoch": 0.12150325549829272, "grad_norm": 0.5712020397186279, "learning_rate": 1.9824376607116527e-05, "loss": 0.6281, "step": 5729 }, { "epoch": 0.12152446395622574, "grad_norm": 0.361248254776001, "learning_rate": 1.9824314374390078e-05, "loss": 0.446, "step": 5730 }, { "epoch": 0.12154567241415877, "grad_norm": 0.3607386350631714, "learning_rate": 1.9824252130737114e-05, "loss": 0.5729, "step": 5731 }, { "epoch": 0.12156688087209179, "grad_norm": 0.307655930519104, "learning_rate": 1.9824189876157703e-05, "loss": 0.5496, "step": 5732 }, { "epoch": 0.12158808933002481, "grad_norm": 0.34592634439468384, "learning_rate": 1.9824127610651913e-05, "loss": 0.442, "step": 5733 }, { "epoch": 0.12160929778795784, "grad_norm": 0.3298860788345337, "learning_rate": 1.9824065334219807e-05, "loss": 0.5375, "step": 5734 }, { "epoch": 0.12163050624589086, "grad_norm": 0.3103909194469452, "learning_rate": 1.9824003046861468e-05, "loss": 0.4916, "step": 5735 }, { "epoch": 0.12165171470382388, "grad_norm": 0.32232823967933655, "learning_rate": 1.9823940748576956e-05, "loss": 0.4629, "step": 5736 }, { "epoch": 0.1216729231617569, "grad_norm": 0.3609732389450073, "learning_rate": 1.9823878439366342e-05, "loss": 0.5832, "step": 5737 }, { "epoch": 0.12169413161968994, "grad_norm": 0.3763359487056732, "learning_rate": 1.9823816119229695e-05, "loss": 0.5487, "step": 5738 }, { "epoch": 0.12171534007762295, "grad_norm": 0.28753265738487244, "learning_rate": 1.9823753788167088e-05, "loss": 0.4808, "step": 5739 }, { "epoch": 0.12173654853555597, "grad_norm": 0.3172096312046051, "learning_rate": 1.9823691446178586e-05, "loss": 0.5582, "step": 5740 }, { "epoch": 0.12175775699348901, "grad_norm": 0.35920804738998413, "learning_rate": 1.982362909326426e-05, "loss": 0.5418, "step": 5741 }, { "epoch": 0.12177896545142203, "grad_norm": 0.33822062611579895, "learning_rate": 1.982356672942418e-05, "loss": 0.4983, "step": 5742 }, { "epoch": 0.12180017390935505, "grad_norm": 0.36405470967292786, "learning_rate": 1.9823504354658413e-05, "loss": 0.4576, "step": 5743 }, { "epoch": 0.12182138236728808, "grad_norm": 0.3388778269290924, "learning_rate": 1.982344196896703e-05, "loss": 0.4616, "step": 5744 }, { "epoch": 0.1218425908252211, "grad_norm": 0.33403193950653076, "learning_rate": 1.98233795723501e-05, "loss": 0.554, "step": 5745 }, { "epoch": 0.12186379928315412, "grad_norm": 0.37969547510147095, "learning_rate": 1.982331716480769e-05, "loss": 0.6427, "step": 5746 }, { "epoch": 0.12188500774108714, "grad_norm": 0.33532294631004333, "learning_rate": 1.9823254746339874e-05, "loss": 0.5107, "step": 5747 }, { "epoch": 0.12190621619902017, "grad_norm": 0.33911970257759094, "learning_rate": 1.982319231694672e-05, "loss": 0.5903, "step": 5748 }, { "epoch": 0.12192742465695319, "grad_norm": 0.3046433627605438, "learning_rate": 1.9823129876628298e-05, "loss": 0.5221, "step": 5749 }, { "epoch": 0.12194863311488621, "grad_norm": 0.36136773228645325, "learning_rate": 1.9823067425384672e-05, "loss": 0.5402, "step": 5750 }, { "epoch": 0.12196984157281925, "grad_norm": 0.26423168182373047, "learning_rate": 1.982300496321592e-05, "loss": 0.3784, "step": 5751 }, { "epoch": 0.12199105003075227, "grad_norm": 0.4141056537628174, "learning_rate": 1.9822942490122104e-05, "loss": 0.5099, "step": 5752 }, { "epoch": 0.12201225848868529, "grad_norm": 0.3319272994995117, "learning_rate": 1.98228800061033e-05, "loss": 0.5392, "step": 5753 }, { "epoch": 0.1220334669466183, "grad_norm": 0.3835449516773224, "learning_rate": 1.9822817511159575e-05, "loss": 0.6153, "step": 5754 }, { "epoch": 0.12205467540455134, "grad_norm": 0.3021094799041748, "learning_rate": 1.9822755005290994e-05, "loss": 0.5573, "step": 5755 }, { "epoch": 0.12207588386248436, "grad_norm": 0.30337414145469666, "learning_rate": 1.9822692488497633e-05, "loss": 0.5468, "step": 5756 }, { "epoch": 0.12209709232041738, "grad_norm": 0.33626991510391235, "learning_rate": 1.9822629960779557e-05, "loss": 0.5185, "step": 5757 }, { "epoch": 0.12211830077835041, "grad_norm": 0.31637337803840637, "learning_rate": 1.982256742213684e-05, "loss": 0.5628, "step": 5758 }, { "epoch": 0.12213950923628343, "grad_norm": 0.4106883704662323, "learning_rate": 1.9822504872569547e-05, "loss": 0.4995, "step": 5759 }, { "epoch": 0.12216071769421645, "grad_norm": 0.38867878913879395, "learning_rate": 1.9822442312077746e-05, "loss": 0.5325, "step": 5760 }, { "epoch": 0.12218192615214948, "grad_norm": 0.3399538993835449, "learning_rate": 1.9822379740661514e-05, "loss": 0.544, "step": 5761 }, { "epoch": 0.1222031346100825, "grad_norm": 0.32439807057380676, "learning_rate": 1.9822317158320915e-05, "loss": 0.5298, "step": 5762 }, { "epoch": 0.12222434306801552, "grad_norm": 0.31444108486175537, "learning_rate": 1.982225456505602e-05, "loss": 0.5197, "step": 5763 }, { "epoch": 0.12224555152594854, "grad_norm": 0.3595481216907501, "learning_rate": 1.9822191960866898e-05, "loss": 0.5768, "step": 5764 }, { "epoch": 0.12226675998388158, "grad_norm": 0.312663733959198, "learning_rate": 1.982212934575362e-05, "loss": 0.5087, "step": 5765 }, { "epoch": 0.1222879684418146, "grad_norm": 0.45847487449645996, "learning_rate": 1.982206671971626e-05, "loss": 0.5446, "step": 5766 }, { "epoch": 0.12230917689974762, "grad_norm": 0.3299297094345093, "learning_rate": 1.982200408275488e-05, "loss": 0.521, "step": 5767 }, { "epoch": 0.12233038535768065, "grad_norm": 0.31896916031837463, "learning_rate": 1.982194143486955e-05, "loss": 0.5233, "step": 5768 }, { "epoch": 0.12235159381561367, "grad_norm": 0.3499814569950104, "learning_rate": 1.9821878776060345e-05, "loss": 0.4528, "step": 5769 }, { "epoch": 0.12237280227354669, "grad_norm": 0.43127569556236267, "learning_rate": 1.982181610632733e-05, "loss": 0.483, "step": 5770 }, { "epoch": 0.12239401073147971, "grad_norm": 0.2930319309234619, "learning_rate": 1.9821753425670572e-05, "loss": 0.4518, "step": 5771 }, { "epoch": 0.12241521918941274, "grad_norm": 0.4238460958003998, "learning_rate": 1.9821690734090153e-05, "loss": 0.5802, "step": 5772 }, { "epoch": 0.12243642764734576, "grad_norm": 0.3394433259963989, "learning_rate": 1.982162803158613e-05, "loss": 0.4952, "step": 5773 }, { "epoch": 0.12245763610527878, "grad_norm": 0.3679126799106598, "learning_rate": 1.982156531815858e-05, "loss": 0.6313, "step": 5774 }, { "epoch": 0.12247884456321181, "grad_norm": 0.39477062225341797, "learning_rate": 1.9821502593807567e-05, "loss": 0.52, "step": 5775 }, { "epoch": 0.12250005302114483, "grad_norm": 0.29039838910102844, "learning_rate": 1.9821439858533167e-05, "loss": 0.4688, "step": 5776 }, { "epoch": 0.12252126147907785, "grad_norm": 0.3423283100128174, "learning_rate": 1.982137711233545e-05, "loss": 0.5417, "step": 5777 }, { "epoch": 0.12254246993701089, "grad_norm": 0.34807875752449036, "learning_rate": 1.982131435521448e-05, "loss": 0.5467, "step": 5778 }, { "epoch": 0.1225636783949439, "grad_norm": 0.3306569755077362, "learning_rate": 1.9821251587170326e-05, "loss": 0.5129, "step": 5779 }, { "epoch": 0.12258488685287693, "grad_norm": 0.2968866229057312, "learning_rate": 1.9821188808203066e-05, "loss": 0.5762, "step": 5780 }, { "epoch": 0.12260609531080995, "grad_norm": 0.6363003849983215, "learning_rate": 1.982112601831276e-05, "loss": 0.6148, "step": 5781 }, { "epoch": 0.12262730376874298, "grad_norm": 0.36877718567848206, "learning_rate": 1.9821063217499488e-05, "loss": 0.4106, "step": 5782 }, { "epoch": 0.122648512226676, "grad_norm": 0.3783344328403473, "learning_rate": 1.9821000405763312e-05, "loss": 0.5891, "step": 5783 }, { "epoch": 0.12266972068460902, "grad_norm": 0.32519540190696716, "learning_rate": 1.9820937583104304e-05, "loss": 0.5441, "step": 5784 }, { "epoch": 0.12269092914254205, "grad_norm": 0.3377980589866638, "learning_rate": 1.982087474952254e-05, "loss": 0.5204, "step": 5785 }, { "epoch": 0.12271213760047507, "grad_norm": 0.3334970474243164, "learning_rate": 1.9820811905018077e-05, "loss": 0.5132, "step": 5786 }, { "epoch": 0.12273334605840809, "grad_norm": 0.34543436765670776, "learning_rate": 1.9820749049590996e-05, "loss": 0.5949, "step": 5787 }, { "epoch": 0.12275455451634111, "grad_norm": 0.34844672679901123, "learning_rate": 1.9820686183241365e-05, "loss": 0.6113, "step": 5788 }, { "epoch": 0.12277576297427414, "grad_norm": 0.29051095247268677, "learning_rate": 1.982062330596925e-05, "loss": 0.4482, "step": 5789 }, { "epoch": 0.12279697143220716, "grad_norm": 0.36628472805023193, "learning_rate": 1.9820560417774724e-05, "loss": 0.5346, "step": 5790 }, { "epoch": 0.12281817989014018, "grad_norm": 0.34370216727256775, "learning_rate": 1.9820497518657853e-05, "loss": 0.4988, "step": 5791 }, { "epoch": 0.12283938834807322, "grad_norm": 0.3527291715145111, "learning_rate": 1.9820434608618716e-05, "loss": 0.5966, "step": 5792 }, { "epoch": 0.12286059680600624, "grad_norm": 0.4162350296974182, "learning_rate": 1.982037168765737e-05, "loss": 0.4626, "step": 5793 }, { "epoch": 0.12288180526393926, "grad_norm": 0.32819753885269165, "learning_rate": 1.9820308755773894e-05, "loss": 0.5136, "step": 5794 }, { "epoch": 0.12290301372187229, "grad_norm": 0.32571759819984436, "learning_rate": 1.982024581296836e-05, "loss": 0.4486, "step": 5795 }, { "epoch": 0.12292422217980531, "grad_norm": 0.3125440776348114, "learning_rate": 1.9820182859240828e-05, "loss": 0.5562, "step": 5796 }, { "epoch": 0.12294543063773833, "grad_norm": 0.41246330738067627, "learning_rate": 1.982011989459138e-05, "loss": 0.4544, "step": 5797 }, { "epoch": 0.12296663909567135, "grad_norm": 0.5276615023612976, "learning_rate": 1.9820056919020075e-05, "loss": 0.4752, "step": 5798 }, { "epoch": 0.12298784755360438, "grad_norm": 0.368692547082901, "learning_rate": 1.9819993932526992e-05, "loss": 0.5357, "step": 5799 }, { "epoch": 0.1230090560115374, "grad_norm": 0.3229730427265167, "learning_rate": 1.9819930935112195e-05, "loss": 0.5687, "step": 5800 }, { "epoch": 0.12303026446947042, "grad_norm": 0.357561320066452, "learning_rate": 1.9819867926775752e-05, "loss": 0.5374, "step": 5801 }, { "epoch": 0.12305147292740345, "grad_norm": 0.3617715835571289, "learning_rate": 1.9819804907517744e-05, "loss": 0.5404, "step": 5802 }, { "epoch": 0.12307268138533647, "grad_norm": 0.34167003631591797, "learning_rate": 1.981974187733823e-05, "loss": 0.565, "step": 5803 }, { "epoch": 0.1230938898432695, "grad_norm": 0.3185359835624695, "learning_rate": 1.9819678836237284e-05, "loss": 0.5084, "step": 5804 }, { "epoch": 0.12311509830120251, "grad_norm": 0.33120542764663696, "learning_rate": 1.981961578421498e-05, "loss": 0.4858, "step": 5805 }, { "epoch": 0.12313630675913555, "grad_norm": 0.3204059898853302, "learning_rate": 1.9819552721271385e-05, "loss": 0.4694, "step": 5806 }, { "epoch": 0.12315751521706857, "grad_norm": 0.38890931010246277, "learning_rate": 1.9819489647406566e-05, "loss": 0.6064, "step": 5807 }, { "epoch": 0.12317872367500159, "grad_norm": 0.3823481798171997, "learning_rate": 1.9819426562620595e-05, "loss": 0.5396, "step": 5808 }, { "epoch": 0.12319993213293462, "grad_norm": 0.5298944115638733, "learning_rate": 1.9819363466913547e-05, "loss": 0.4719, "step": 5809 }, { "epoch": 0.12322114059086764, "grad_norm": 0.5059569478034973, "learning_rate": 1.9819300360285486e-05, "loss": 0.5493, "step": 5810 }, { "epoch": 0.12324234904880066, "grad_norm": 0.3165290653705597, "learning_rate": 1.9819237242736484e-05, "loss": 0.5073, "step": 5811 }, { "epoch": 0.12326355750673368, "grad_norm": 0.38957101106643677, "learning_rate": 1.9819174114266612e-05, "loss": 0.5609, "step": 5812 }, { "epoch": 0.12328476596466671, "grad_norm": 0.3514224588871002, "learning_rate": 1.981911097487594e-05, "loss": 0.5371, "step": 5813 }, { "epoch": 0.12330597442259973, "grad_norm": 0.46396714448928833, "learning_rate": 1.9819047824564537e-05, "loss": 0.5076, "step": 5814 }, { "epoch": 0.12332718288053275, "grad_norm": 0.3506139814853668, "learning_rate": 1.9818984663332478e-05, "loss": 0.5231, "step": 5815 }, { "epoch": 0.12334839133846578, "grad_norm": 0.35685452818870544, "learning_rate": 1.9818921491179825e-05, "loss": 0.5629, "step": 5816 }, { "epoch": 0.1233695997963988, "grad_norm": 0.36880484223365784, "learning_rate": 1.9818858308106655e-05, "loss": 0.617, "step": 5817 }, { "epoch": 0.12339080825433182, "grad_norm": 0.3405919373035431, "learning_rate": 1.9818795114113038e-05, "loss": 0.5329, "step": 5818 }, { "epoch": 0.12341201671226486, "grad_norm": 0.3589674234390259, "learning_rate": 1.981873190919904e-05, "loss": 0.5676, "step": 5819 }, { "epoch": 0.12343322517019788, "grad_norm": 0.3656904995441437, "learning_rate": 1.9818668693364734e-05, "loss": 0.4627, "step": 5820 }, { "epoch": 0.1234544336281309, "grad_norm": 0.34776273369789124, "learning_rate": 1.9818605466610187e-05, "loss": 0.5075, "step": 5821 }, { "epoch": 0.12347564208606392, "grad_norm": 0.37201836705207825, "learning_rate": 1.9818542228935477e-05, "loss": 0.6676, "step": 5822 }, { "epoch": 0.12349685054399695, "grad_norm": 0.31936970353126526, "learning_rate": 1.9818478980340666e-05, "loss": 0.5121, "step": 5823 }, { "epoch": 0.12351805900192997, "grad_norm": 0.40321871638298035, "learning_rate": 1.981841572082583e-05, "loss": 0.5618, "step": 5824 }, { "epoch": 0.12353926745986299, "grad_norm": 0.33881446719169617, "learning_rate": 1.9818352450391035e-05, "loss": 0.5785, "step": 5825 }, { "epoch": 0.12356047591779602, "grad_norm": 0.38486695289611816, "learning_rate": 1.981828916903636e-05, "loss": 0.5649, "step": 5826 }, { "epoch": 0.12358168437572904, "grad_norm": 0.3078557848930359, "learning_rate": 1.9818225876761863e-05, "loss": 0.4799, "step": 5827 }, { "epoch": 0.12360289283366206, "grad_norm": 0.3138613700866699, "learning_rate": 1.9818162573567624e-05, "loss": 0.4284, "step": 5828 }, { "epoch": 0.12362410129159508, "grad_norm": 0.4595782160758972, "learning_rate": 1.9818099259453706e-05, "loss": 0.5556, "step": 5829 }, { "epoch": 0.12364530974952812, "grad_norm": 0.3119618892669678, "learning_rate": 1.9818035934420186e-05, "loss": 0.5775, "step": 5830 }, { "epoch": 0.12366651820746113, "grad_norm": 0.34966522455215454, "learning_rate": 1.981797259846713e-05, "loss": 0.5783, "step": 5831 }, { "epoch": 0.12368772666539415, "grad_norm": 0.34057384729385376, "learning_rate": 1.9817909251594613e-05, "loss": 0.5127, "step": 5832 }, { "epoch": 0.12370893512332719, "grad_norm": 0.3269475996494293, "learning_rate": 1.98178458938027e-05, "loss": 0.5269, "step": 5833 }, { "epoch": 0.12373014358126021, "grad_norm": 0.2976049780845642, "learning_rate": 1.9817782525091465e-05, "loss": 0.5685, "step": 5834 }, { "epoch": 0.12375135203919323, "grad_norm": 0.32504376769065857, "learning_rate": 1.981771914546098e-05, "loss": 0.5101, "step": 5835 }, { "epoch": 0.12377256049712626, "grad_norm": 0.33321613073349, "learning_rate": 1.981765575491131e-05, "loss": 0.5781, "step": 5836 }, { "epoch": 0.12379376895505928, "grad_norm": 0.31761956214904785, "learning_rate": 1.981759235344253e-05, "loss": 0.5913, "step": 5837 }, { "epoch": 0.1238149774129923, "grad_norm": 0.37977519631385803, "learning_rate": 1.9817528941054707e-05, "loss": 0.6169, "step": 5838 }, { "epoch": 0.12383618587092532, "grad_norm": 0.3393011689186096, "learning_rate": 1.9817465517747915e-05, "loss": 0.5617, "step": 5839 }, { "epoch": 0.12385739432885835, "grad_norm": 0.35792380571365356, "learning_rate": 1.9817402083522222e-05, "loss": 0.5645, "step": 5840 }, { "epoch": 0.12387860278679137, "grad_norm": 0.33834707736968994, "learning_rate": 1.9817338638377702e-05, "loss": 0.5248, "step": 5841 }, { "epoch": 0.12389981124472439, "grad_norm": 0.3279115855693817, "learning_rate": 1.9817275182314422e-05, "loss": 0.5734, "step": 5842 }, { "epoch": 0.12392101970265743, "grad_norm": 0.3527960479259491, "learning_rate": 1.9817211715332454e-05, "loss": 0.5207, "step": 5843 }, { "epoch": 0.12394222816059045, "grad_norm": 0.31720566749572754, "learning_rate": 1.9817148237431868e-05, "loss": 0.531, "step": 5844 }, { "epoch": 0.12396343661852346, "grad_norm": 0.3437095880508423, "learning_rate": 1.9817084748612733e-05, "loss": 0.5627, "step": 5845 }, { "epoch": 0.12398464507645648, "grad_norm": 0.4412139058113098, "learning_rate": 1.9817021248875127e-05, "loss": 0.5509, "step": 5846 }, { "epoch": 0.12400585353438952, "grad_norm": 0.345790296792984, "learning_rate": 1.981695773821911e-05, "loss": 0.5201, "step": 5847 }, { "epoch": 0.12402706199232254, "grad_norm": 0.35459640622138977, "learning_rate": 1.9816894216644763e-05, "loss": 0.4751, "step": 5848 }, { "epoch": 0.12404827045025556, "grad_norm": 0.3345361351966858, "learning_rate": 1.9816830684152145e-05, "loss": 0.5217, "step": 5849 }, { "epoch": 0.12406947890818859, "grad_norm": 0.30766257643699646, "learning_rate": 1.9816767140741337e-05, "loss": 0.588, "step": 5850 }, { "epoch": 0.12409068736612161, "grad_norm": 0.30653759837150574, "learning_rate": 1.9816703586412406e-05, "loss": 0.5199, "step": 5851 }, { "epoch": 0.12411189582405463, "grad_norm": 0.29709020256996155, "learning_rate": 1.9816640021165424e-05, "loss": 0.513, "step": 5852 }, { "epoch": 0.12413310428198766, "grad_norm": 0.3056514859199524, "learning_rate": 1.981657644500046e-05, "loss": 0.557, "step": 5853 }, { "epoch": 0.12415431273992068, "grad_norm": 0.3045022487640381, "learning_rate": 1.981651285791758e-05, "loss": 0.5375, "step": 5854 }, { "epoch": 0.1241755211978537, "grad_norm": 0.30551108717918396, "learning_rate": 1.9816449259916864e-05, "loss": 0.4593, "step": 5855 }, { "epoch": 0.12419672965578672, "grad_norm": 0.31670135259628296, "learning_rate": 1.9816385650998374e-05, "loss": 0.5702, "step": 5856 }, { "epoch": 0.12421793811371976, "grad_norm": 0.32087716460227966, "learning_rate": 1.9816322031162192e-05, "loss": 0.5726, "step": 5857 }, { "epoch": 0.12423914657165278, "grad_norm": 0.28709909319877625, "learning_rate": 1.981625840040838e-05, "loss": 0.4536, "step": 5858 }, { "epoch": 0.1242603550295858, "grad_norm": 0.35241901874542236, "learning_rate": 1.981619475873701e-05, "loss": 0.6022, "step": 5859 }, { "epoch": 0.12428156348751883, "grad_norm": 0.340069055557251, "learning_rate": 1.981613110614815e-05, "loss": 0.5219, "step": 5860 }, { "epoch": 0.12430277194545185, "grad_norm": 0.3251855969429016, "learning_rate": 1.9816067442641876e-05, "loss": 0.5802, "step": 5861 }, { "epoch": 0.12432398040338487, "grad_norm": 0.3889335095882416, "learning_rate": 1.9816003768218256e-05, "loss": 0.5818, "step": 5862 }, { "epoch": 0.12434518886131789, "grad_norm": 0.30566415190696716, "learning_rate": 1.9815940082877367e-05, "loss": 0.529, "step": 5863 }, { "epoch": 0.12436639731925092, "grad_norm": 0.4015576243400574, "learning_rate": 1.981587638661927e-05, "loss": 0.4744, "step": 5864 }, { "epoch": 0.12438760577718394, "grad_norm": 0.4183906614780426, "learning_rate": 1.9815812679444043e-05, "loss": 0.5947, "step": 5865 }, { "epoch": 0.12440881423511696, "grad_norm": 0.3374214768409729, "learning_rate": 1.981574896135175e-05, "loss": 0.5169, "step": 5866 }, { "epoch": 0.12443002269305, "grad_norm": 0.32099294662475586, "learning_rate": 1.9815685232342475e-05, "loss": 0.5269, "step": 5867 }, { "epoch": 0.12445123115098301, "grad_norm": 0.3255516588687897, "learning_rate": 1.9815621492416273e-05, "loss": 0.4615, "step": 5868 }, { "epoch": 0.12447243960891603, "grad_norm": 0.34120991826057434, "learning_rate": 1.9815557741573225e-05, "loss": 0.5475, "step": 5869 }, { "epoch": 0.12449364806684905, "grad_norm": 0.3784065842628479, "learning_rate": 1.98154939798134e-05, "loss": 0.4613, "step": 5870 }, { "epoch": 0.12451485652478209, "grad_norm": 0.327644020318985, "learning_rate": 1.9815430207136864e-05, "loss": 0.5289, "step": 5871 }, { "epoch": 0.1245360649827151, "grad_norm": 0.3296199142932892, "learning_rate": 1.981536642354369e-05, "loss": 0.5245, "step": 5872 }, { "epoch": 0.12455727344064813, "grad_norm": 0.34953364729881287, "learning_rate": 1.9815302629033957e-05, "loss": 0.613, "step": 5873 }, { "epoch": 0.12457848189858116, "grad_norm": 0.35908326506614685, "learning_rate": 1.9815238823607726e-05, "loss": 0.5554, "step": 5874 }, { "epoch": 0.12459969035651418, "grad_norm": 0.31856805086135864, "learning_rate": 1.9815175007265074e-05, "loss": 0.5529, "step": 5875 }, { "epoch": 0.1246208988144472, "grad_norm": 0.3953181505203247, "learning_rate": 1.9815111180006068e-05, "loss": 0.6024, "step": 5876 }, { "epoch": 0.12464210727238023, "grad_norm": 0.328290194272995, "learning_rate": 1.9815047341830782e-05, "loss": 0.5008, "step": 5877 }, { "epoch": 0.12466331573031325, "grad_norm": 0.28733551502227783, "learning_rate": 1.9814983492739285e-05, "loss": 0.3819, "step": 5878 }, { "epoch": 0.12468452418824627, "grad_norm": 0.3419427275657654, "learning_rate": 1.9814919632731647e-05, "loss": 0.5804, "step": 5879 }, { "epoch": 0.12470573264617929, "grad_norm": 0.3337918221950531, "learning_rate": 1.981485576180794e-05, "loss": 0.491, "step": 5880 }, { "epoch": 0.12472694110411232, "grad_norm": 0.28381988406181335, "learning_rate": 1.981479187996824e-05, "loss": 0.4909, "step": 5881 }, { "epoch": 0.12474814956204534, "grad_norm": 0.34962743520736694, "learning_rate": 1.981472798721261e-05, "loss": 0.4478, "step": 5882 }, { "epoch": 0.12476935801997836, "grad_norm": 0.3151862323284149, "learning_rate": 1.9814664083541125e-05, "loss": 0.5065, "step": 5883 }, { "epoch": 0.1247905664779114, "grad_norm": 0.3296024203300476, "learning_rate": 1.9814600168953855e-05, "loss": 0.5742, "step": 5884 }, { "epoch": 0.12481177493584442, "grad_norm": 0.3180822432041168, "learning_rate": 1.9814536243450876e-05, "loss": 0.5397, "step": 5885 }, { "epoch": 0.12483298339377744, "grad_norm": 0.343700110912323, "learning_rate": 1.9814472307032254e-05, "loss": 0.5426, "step": 5886 }, { "epoch": 0.12485419185171046, "grad_norm": 0.3082927167415619, "learning_rate": 1.981440835969806e-05, "loss": 0.5076, "step": 5887 }, { "epoch": 0.12487540030964349, "grad_norm": 0.3336303234100342, "learning_rate": 1.9814344401448363e-05, "loss": 0.4379, "step": 5888 }, { "epoch": 0.12489660876757651, "grad_norm": 0.2971121668815613, "learning_rate": 1.9814280432283244e-05, "loss": 0.4989, "step": 5889 }, { "epoch": 0.12491781722550953, "grad_norm": 0.4803219735622406, "learning_rate": 1.9814216452202764e-05, "loss": 0.4956, "step": 5890 }, { "epoch": 0.12493902568344256, "grad_norm": 0.32380297780036926, "learning_rate": 1.9814152461206996e-05, "loss": 0.5391, "step": 5891 }, { "epoch": 0.12496023414137558, "grad_norm": 0.7607859969139099, "learning_rate": 1.9814088459296017e-05, "loss": 0.5491, "step": 5892 }, { "epoch": 0.1249814425993086, "grad_norm": 0.3585093915462494, "learning_rate": 1.981402444646989e-05, "loss": 0.469, "step": 5893 }, { "epoch": 0.12500265105724162, "grad_norm": 0.33757343888282776, "learning_rate": 1.9813960422728693e-05, "loss": 0.5521, "step": 5894 }, { "epoch": 0.12502385951517464, "grad_norm": 0.32277849316596985, "learning_rate": 1.9813896388072494e-05, "loss": 0.4438, "step": 5895 }, { "epoch": 0.1250450679731077, "grad_norm": 0.338290274143219, "learning_rate": 1.9813832342501364e-05, "loss": 0.6465, "step": 5896 }, { "epoch": 0.1250662764310407, "grad_norm": 0.5395222902297974, "learning_rate": 1.9813768286015375e-05, "loss": 0.6434, "step": 5897 }, { "epoch": 0.12508748488897373, "grad_norm": 0.3556731641292572, "learning_rate": 1.9813704218614596e-05, "loss": 0.598, "step": 5898 }, { "epoch": 0.12510869334690675, "grad_norm": 0.31241315603256226, "learning_rate": 1.9813640140299104e-05, "loss": 0.5675, "step": 5899 }, { "epoch": 0.12512990180483977, "grad_norm": 0.3321014940738678, "learning_rate": 1.981357605106897e-05, "loss": 0.5797, "step": 5900 }, { "epoch": 0.12515111026277279, "grad_norm": 0.3609551787376404, "learning_rate": 1.9813511950924255e-05, "loss": 0.5566, "step": 5901 }, { "epoch": 0.1251723187207058, "grad_norm": 0.32147228717803955, "learning_rate": 1.981344783986504e-05, "loss": 0.5589, "step": 5902 }, { "epoch": 0.12519352717863885, "grad_norm": 0.3171762228012085, "learning_rate": 1.9813383717891393e-05, "loss": 0.4421, "step": 5903 }, { "epoch": 0.12521473563657187, "grad_norm": 0.38139161467552185, "learning_rate": 1.9813319585003387e-05, "loss": 0.5013, "step": 5904 }, { "epoch": 0.1252359440945049, "grad_norm": 0.3559788763523102, "learning_rate": 1.981325544120109e-05, "loss": 0.5081, "step": 5905 }, { "epoch": 0.1252571525524379, "grad_norm": 0.3311311900615692, "learning_rate": 1.981319128648458e-05, "loss": 0.504, "step": 5906 }, { "epoch": 0.12527836101037093, "grad_norm": 0.3671359121799469, "learning_rate": 1.9813127120853918e-05, "loss": 0.4876, "step": 5907 }, { "epoch": 0.12529956946830395, "grad_norm": 0.28918537497520447, "learning_rate": 1.9813062944309183e-05, "loss": 0.4378, "step": 5908 }, { "epoch": 0.12532077792623697, "grad_norm": 0.36545702815055847, "learning_rate": 1.9812998756850447e-05, "loss": 0.5877, "step": 5909 }, { "epoch": 0.12534198638417002, "grad_norm": 0.3158120810985565, "learning_rate": 1.9812934558477778e-05, "loss": 0.5827, "step": 5910 }, { "epoch": 0.12536319484210304, "grad_norm": 0.35038483142852783, "learning_rate": 1.9812870349191246e-05, "loss": 0.6093, "step": 5911 }, { "epoch": 0.12538440330003606, "grad_norm": 0.3264686167240143, "learning_rate": 1.9812806128990928e-05, "loss": 0.5157, "step": 5912 }, { "epoch": 0.12540561175796908, "grad_norm": 0.3325169086456299, "learning_rate": 1.9812741897876892e-05, "loss": 0.5944, "step": 5913 }, { "epoch": 0.1254268202159021, "grad_norm": 0.4345516562461853, "learning_rate": 1.981267765584921e-05, "loss": 0.5457, "step": 5914 }, { "epoch": 0.12544802867383512, "grad_norm": 0.33775293827056885, "learning_rate": 1.981261340290795e-05, "loss": 0.5337, "step": 5915 }, { "epoch": 0.12546923713176814, "grad_norm": 0.33207032084465027, "learning_rate": 1.981254913905319e-05, "loss": 0.4641, "step": 5916 }, { "epoch": 0.12549044558970118, "grad_norm": 0.3269832730293274, "learning_rate": 1.9812484864285e-05, "loss": 0.4389, "step": 5917 }, { "epoch": 0.1255116540476342, "grad_norm": 0.34364819526672363, "learning_rate": 1.9812420578603444e-05, "loss": 0.5132, "step": 5918 }, { "epoch": 0.12553286250556722, "grad_norm": 0.34659022092819214, "learning_rate": 1.98123562820086e-05, "loss": 0.6857, "step": 5919 }, { "epoch": 0.12555407096350024, "grad_norm": 0.32402876019477844, "learning_rate": 1.9812291974500542e-05, "loss": 0.5348, "step": 5920 }, { "epoch": 0.12557527942143326, "grad_norm": 0.35063332319259644, "learning_rate": 1.9812227656079334e-05, "loss": 0.6406, "step": 5921 }, { "epoch": 0.12559648787936628, "grad_norm": 0.29835742712020874, "learning_rate": 1.981216332674506e-05, "loss": 0.5056, "step": 5922 }, { "epoch": 0.12561769633729933, "grad_norm": 0.3635779321193695, "learning_rate": 1.9812098986497773e-05, "loss": 0.5697, "step": 5923 }, { "epoch": 0.12563890479523235, "grad_norm": 0.3305917978286743, "learning_rate": 1.981203463533756e-05, "loss": 0.5575, "step": 5924 }, { "epoch": 0.12566011325316537, "grad_norm": 0.3089677393436432, "learning_rate": 1.9811970273264485e-05, "loss": 0.5559, "step": 5925 }, { "epoch": 0.1256813217110984, "grad_norm": 0.3431435525417328, "learning_rate": 1.9811905900278624e-05, "loss": 0.5629, "step": 5926 }, { "epoch": 0.1257025301690314, "grad_norm": 0.32685157656669617, "learning_rate": 1.9811841516380044e-05, "loss": 0.5681, "step": 5927 }, { "epoch": 0.12572373862696443, "grad_norm": 0.30985286831855774, "learning_rate": 1.9811777121568824e-05, "loss": 0.5012, "step": 5928 }, { "epoch": 0.12574494708489745, "grad_norm": 0.3486809730529785, "learning_rate": 1.9811712715845025e-05, "loss": 0.5147, "step": 5929 }, { "epoch": 0.1257661555428305, "grad_norm": 0.3281698524951935, "learning_rate": 1.9811648299208727e-05, "loss": 0.5818, "step": 5930 }, { "epoch": 0.1257873640007635, "grad_norm": 0.3204367756843567, "learning_rate": 1.981158387166e-05, "loss": 0.6182, "step": 5931 }, { "epoch": 0.12580857245869653, "grad_norm": 0.3672013282775879, "learning_rate": 1.981151943319891e-05, "loss": 0.5881, "step": 5932 }, { "epoch": 0.12582978091662955, "grad_norm": 0.38541096448898315, "learning_rate": 1.981145498382554e-05, "loss": 0.5475, "step": 5933 }, { "epoch": 0.12585098937456257, "grad_norm": 0.33130332827568054, "learning_rate": 1.981139052353995e-05, "loss": 0.5683, "step": 5934 }, { "epoch": 0.1258721978324956, "grad_norm": 0.6519209742546082, "learning_rate": 1.9811326052342217e-05, "loss": 0.5051, "step": 5935 }, { "epoch": 0.1258934062904286, "grad_norm": 0.3483564555644989, "learning_rate": 1.9811261570232414e-05, "loss": 0.6034, "step": 5936 }, { "epoch": 0.12591461474836166, "grad_norm": 0.3219462037086487, "learning_rate": 1.981119707721061e-05, "loss": 0.4886, "step": 5937 }, { "epoch": 0.12593582320629468, "grad_norm": 0.32058292627334595, "learning_rate": 1.9811132573276878e-05, "loss": 0.5887, "step": 5938 }, { "epoch": 0.1259570316642277, "grad_norm": 0.3187481760978699, "learning_rate": 1.9811068058431294e-05, "loss": 0.5556, "step": 5939 }, { "epoch": 0.12597824012216072, "grad_norm": 0.3150671124458313, "learning_rate": 1.981100353267392e-05, "loss": 0.4647, "step": 5940 }, { "epoch": 0.12599944858009374, "grad_norm": 0.33217883110046387, "learning_rate": 1.9810938996004837e-05, "loss": 0.5187, "step": 5941 }, { "epoch": 0.12602065703802676, "grad_norm": 0.31157180666923523, "learning_rate": 1.981087444842411e-05, "loss": 0.5466, "step": 5942 }, { "epoch": 0.12604186549595978, "grad_norm": 0.306698739528656, "learning_rate": 1.9810809889931813e-05, "loss": 0.5484, "step": 5943 }, { "epoch": 0.12606307395389282, "grad_norm": 0.30879247188568115, "learning_rate": 1.981074532052802e-05, "loss": 0.4473, "step": 5944 }, { "epoch": 0.12608428241182584, "grad_norm": 0.3012017011642456, "learning_rate": 1.98106807402128e-05, "loss": 0.5363, "step": 5945 }, { "epoch": 0.12610549086975886, "grad_norm": 0.3332739770412445, "learning_rate": 1.9810616148986228e-05, "loss": 0.5335, "step": 5946 }, { "epoch": 0.12612669932769188, "grad_norm": 0.3241516053676605, "learning_rate": 1.9810551546848373e-05, "loss": 0.5, "step": 5947 }, { "epoch": 0.1261479077856249, "grad_norm": 0.3340761065483093, "learning_rate": 1.981048693379931e-05, "loss": 0.5136, "step": 5948 }, { "epoch": 0.12616911624355792, "grad_norm": 0.30443695187568665, "learning_rate": 1.9810422309839105e-05, "loss": 0.5056, "step": 5949 }, { "epoch": 0.12619032470149094, "grad_norm": 0.35898780822753906, "learning_rate": 1.9810357674967835e-05, "loss": 0.4925, "step": 5950 }, { "epoch": 0.126211533159424, "grad_norm": 0.539297342300415, "learning_rate": 1.981029302918557e-05, "loss": 0.6259, "step": 5951 }, { "epoch": 0.126232741617357, "grad_norm": 0.3241955637931824, "learning_rate": 1.9810228372492383e-05, "loss": 0.4751, "step": 5952 }, { "epoch": 0.12625395007529003, "grad_norm": 0.3160960376262665, "learning_rate": 1.9810163704888345e-05, "loss": 0.4989, "step": 5953 }, { "epoch": 0.12627515853322305, "grad_norm": 0.3060787320137024, "learning_rate": 1.981009902637353e-05, "loss": 0.4615, "step": 5954 }, { "epoch": 0.12629636699115607, "grad_norm": 0.30818480253219604, "learning_rate": 1.9810034336948004e-05, "loss": 0.4132, "step": 5955 }, { "epoch": 0.1263175754490891, "grad_norm": 0.510349690914154, "learning_rate": 1.9809969636611847e-05, "loss": 0.5371, "step": 5956 }, { "epoch": 0.12633878390702213, "grad_norm": 0.38530200719833374, "learning_rate": 1.9809904925365124e-05, "loss": 0.5223, "step": 5957 }, { "epoch": 0.12635999236495515, "grad_norm": 0.34548312425613403, "learning_rate": 1.980984020320791e-05, "loss": 0.4588, "step": 5958 }, { "epoch": 0.12638120082288817, "grad_norm": 0.3149879574775696, "learning_rate": 1.9809775470140277e-05, "loss": 0.5247, "step": 5959 }, { "epoch": 0.1264024092808212, "grad_norm": 0.27442488074302673, "learning_rate": 1.98097107261623e-05, "loss": 0.5121, "step": 5960 }, { "epoch": 0.1264236177387542, "grad_norm": 0.3176473379135132, "learning_rate": 1.9809645971274047e-05, "loss": 0.5551, "step": 5961 }, { "epoch": 0.12644482619668723, "grad_norm": 0.33726051449775696, "learning_rate": 1.980958120547559e-05, "loss": 0.5889, "step": 5962 }, { "epoch": 0.12646603465462025, "grad_norm": 0.31015390157699585, "learning_rate": 1.9809516428767e-05, "loss": 0.4735, "step": 5963 }, { "epoch": 0.1264872431125533, "grad_norm": 0.3831333816051483, "learning_rate": 1.9809451641148355e-05, "loss": 0.5156, "step": 5964 }, { "epoch": 0.12650845157048632, "grad_norm": 0.3534530699253082, "learning_rate": 1.980938684261972e-05, "loss": 0.5549, "step": 5965 }, { "epoch": 0.12652966002841934, "grad_norm": 0.37197229266166687, "learning_rate": 1.9809322033181174e-05, "loss": 0.5342, "step": 5966 }, { "epoch": 0.12655086848635236, "grad_norm": 0.2945379316806793, "learning_rate": 1.9809257212832782e-05, "loss": 0.4786, "step": 5967 }, { "epoch": 0.12657207694428538, "grad_norm": 0.5832785964012146, "learning_rate": 1.980919238157462e-05, "loss": 0.5214, "step": 5968 }, { "epoch": 0.1265932854022184, "grad_norm": 0.43166467547416687, "learning_rate": 1.980912753940676e-05, "loss": 0.5493, "step": 5969 }, { "epoch": 0.12661449386015142, "grad_norm": 0.35339659452438354, "learning_rate": 1.980906268632927e-05, "loss": 0.5064, "step": 5970 }, { "epoch": 0.12663570231808446, "grad_norm": 0.32931962609291077, "learning_rate": 1.980899782234223e-05, "loss": 0.5163, "step": 5971 }, { "epoch": 0.12665691077601748, "grad_norm": 0.3950553834438324, "learning_rate": 1.9808932947445708e-05, "loss": 0.5682, "step": 5972 }, { "epoch": 0.1266781192339505, "grad_norm": 0.3213031589984894, "learning_rate": 1.9808868061639772e-05, "loss": 0.6302, "step": 5973 }, { "epoch": 0.12669932769188352, "grad_norm": 0.3013339042663574, "learning_rate": 1.9808803164924502e-05, "loss": 0.4375, "step": 5974 }, { "epoch": 0.12672053614981654, "grad_norm": 0.3262981176376343, "learning_rate": 1.9808738257299966e-05, "loss": 0.5199, "step": 5975 }, { "epoch": 0.12674174460774956, "grad_norm": 0.3776504099369049, "learning_rate": 1.9808673338766235e-05, "loss": 0.5927, "step": 5976 }, { "epoch": 0.12676295306568258, "grad_norm": 0.3414778709411621, "learning_rate": 1.9808608409323383e-05, "loss": 0.5409, "step": 5977 }, { "epoch": 0.12678416152361563, "grad_norm": 0.3566276729106903, "learning_rate": 1.980854346897148e-05, "loss": 0.4573, "step": 5978 }, { "epoch": 0.12680536998154865, "grad_norm": 0.35760998725891113, "learning_rate": 1.9808478517710605e-05, "loss": 0.5187, "step": 5979 }, { "epoch": 0.12682657843948167, "grad_norm": 0.36842435598373413, "learning_rate": 1.9808413555540823e-05, "loss": 0.58, "step": 5980 }, { "epoch": 0.1268477868974147, "grad_norm": 0.2999430000782013, "learning_rate": 1.9808348582462206e-05, "loss": 0.5244, "step": 5981 }, { "epoch": 0.1268689953553477, "grad_norm": 0.35202115774154663, "learning_rate": 1.980828359847483e-05, "loss": 0.6028, "step": 5982 }, { "epoch": 0.12689020381328073, "grad_norm": 0.3495957851409912, "learning_rate": 1.980821860357877e-05, "loss": 0.4962, "step": 5983 }, { "epoch": 0.12691141227121375, "grad_norm": 0.3243476450443268, "learning_rate": 1.980815359777409e-05, "loss": 0.5183, "step": 5984 }, { "epoch": 0.1269326207291468, "grad_norm": 0.34618133306503296, "learning_rate": 1.980808858106087e-05, "loss": 0.5523, "step": 5985 }, { "epoch": 0.12695382918707981, "grad_norm": 0.38635993003845215, "learning_rate": 1.9808023553439178e-05, "loss": 0.5375, "step": 5986 }, { "epoch": 0.12697503764501283, "grad_norm": 0.33974000811576843, "learning_rate": 1.9807958514909088e-05, "loss": 0.5573, "step": 5987 }, { "epoch": 0.12699624610294585, "grad_norm": 0.3241097927093506, "learning_rate": 1.9807893465470667e-05, "loss": 0.607, "step": 5988 }, { "epoch": 0.12701745456087887, "grad_norm": 0.3231009542942047, "learning_rate": 1.9807828405124e-05, "loss": 0.5291, "step": 5989 }, { "epoch": 0.1270386630188119, "grad_norm": 0.30951711535453796, "learning_rate": 1.9807763333869145e-05, "loss": 0.4596, "step": 5990 }, { "epoch": 0.1270598714767449, "grad_norm": 0.33285778760910034, "learning_rate": 1.9807698251706182e-05, "loss": 0.5392, "step": 5991 }, { "epoch": 0.12708107993467796, "grad_norm": 0.3274248242378235, "learning_rate": 1.9807633158635187e-05, "loss": 0.4868, "step": 5992 }, { "epoch": 0.12710228839261098, "grad_norm": 0.3889133334159851, "learning_rate": 1.9807568054656223e-05, "loss": 0.4713, "step": 5993 }, { "epoch": 0.127123496850544, "grad_norm": 0.31663769483566284, "learning_rate": 1.980750293976937e-05, "loss": 0.4725, "step": 5994 }, { "epoch": 0.12714470530847702, "grad_norm": 0.3491986095905304, "learning_rate": 1.9807437813974692e-05, "loss": 0.541, "step": 5995 }, { "epoch": 0.12716591376641004, "grad_norm": 0.3555462956428528, "learning_rate": 1.980737267727227e-05, "loss": 0.5427, "step": 5996 }, { "epoch": 0.12718712222434306, "grad_norm": 0.3269186317920685, "learning_rate": 1.9807307529662175e-05, "loss": 0.5188, "step": 5997 }, { "epoch": 0.1272083306822761, "grad_norm": 0.3127172291278839, "learning_rate": 1.9807242371144478e-05, "loss": 0.5216, "step": 5998 }, { "epoch": 0.12722953914020912, "grad_norm": 0.3724561929702759, "learning_rate": 1.980717720171925e-05, "loss": 0.5775, "step": 5999 }, { "epoch": 0.12725074759814214, "grad_norm": 0.33479949831962585, "learning_rate": 1.980711202138656e-05, "loss": 0.5839, "step": 6000 }, { "epoch": 0.12727195605607516, "grad_norm": 0.3624166250228882, "learning_rate": 1.9807046830146493e-05, "loss": 0.5035, "step": 6001 }, { "epoch": 0.12729316451400818, "grad_norm": 0.3841949999332428, "learning_rate": 1.9806981627999107e-05, "loss": 0.5487, "step": 6002 }, { "epoch": 0.1273143729719412, "grad_norm": 0.31612110137939453, "learning_rate": 1.9806916414944485e-05, "loss": 0.4548, "step": 6003 }, { "epoch": 0.12733558142987422, "grad_norm": 0.44557738304138184, "learning_rate": 1.9806851190982698e-05, "loss": 0.5401, "step": 6004 }, { "epoch": 0.12735678988780727, "grad_norm": 0.3433683514595032, "learning_rate": 1.9806785956113813e-05, "loss": 0.5236, "step": 6005 }, { "epoch": 0.1273779983457403, "grad_norm": 0.3345687985420227, "learning_rate": 1.980672071033791e-05, "loss": 0.4372, "step": 6006 }, { "epoch": 0.1273992068036733, "grad_norm": 0.3483676314353943, "learning_rate": 1.980665545365505e-05, "loss": 0.5209, "step": 6007 }, { "epoch": 0.12742041526160633, "grad_norm": 0.33024904131889343, "learning_rate": 1.980659018606532e-05, "loss": 0.586, "step": 6008 }, { "epoch": 0.12744162371953935, "grad_norm": 0.31928950548171997, "learning_rate": 1.980652490756878e-05, "loss": 0.5625, "step": 6009 }, { "epoch": 0.12746283217747237, "grad_norm": 0.3338426947593689, "learning_rate": 1.980645961816551e-05, "loss": 0.4655, "step": 6010 }, { "epoch": 0.1274840406354054, "grad_norm": 0.37396878004074097, "learning_rate": 1.9806394317855582e-05, "loss": 0.6337, "step": 6011 }, { "epoch": 0.12750524909333844, "grad_norm": 0.5093364715576172, "learning_rate": 1.9806329006639068e-05, "loss": 0.5053, "step": 6012 }, { "epoch": 0.12752645755127145, "grad_norm": 0.34477898478507996, "learning_rate": 1.9806263684516035e-05, "loss": 0.5918, "step": 6013 }, { "epoch": 0.12754766600920447, "grad_norm": 0.3641211688518524, "learning_rate": 1.9806198351486565e-05, "loss": 0.6654, "step": 6014 }, { "epoch": 0.1275688744671375, "grad_norm": 0.295102596282959, "learning_rate": 1.9806133007550726e-05, "loss": 0.4654, "step": 6015 }, { "epoch": 0.1275900829250705, "grad_norm": 0.3241845369338989, "learning_rate": 1.9806067652708593e-05, "loss": 0.4656, "step": 6016 }, { "epoch": 0.12761129138300353, "grad_norm": 0.32701951265335083, "learning_rate": 1.9806002286960236e-05, "loss": 0.5017, "step": 6017 }, { "epoch": 0.12763249984093655, "grad_norm": 0.3294045031070709, "learning_rate": 1.9805936910305727e-05, "loss": 0.5071, "step": 6018 }, { "epoch": 0.1276537082988696, "grad_norm": 0.347403883934021, "learning_rate": 1.980587152274514e-05, "loss": 0.5526, "step": 6019 }, { "epoch": 0.12767491675680262, "grad_norm": 0.4019286334514618, "learning_rate": 1.9805806124278547e-05, "loss": 0.5176, "step": 6020 }, { "epoch": 0.12769612521473564, "grad_norm": 0.33018895983695984, "learning_rate": 1.9805740714906025e-05, "loss": 0.4905, "step": 6021 }, { "epoch": 0.12771733367266866, "grad_norm": 0.3237280249595642, "learning_rate": 1.9805675294627638e-05, "loss": 0.5503, "step": 6022 }, { "epoch": 0.12773854213060168, "grad_norm": 0.30213794112205505, "learning_rate": 1.980560986344347e-05, "loss": 0.5226, "step": 6023 }, { "epoch": 0.1277597505885347, "grad_norm": 0.3599641025066376, "learning_rate": 1.9805544421353584e-05, "loss": 0.6099, "step": 6024 }, { "epoch": 0.12778095904646772, "grad_norm": 0.3013005554676056, "learning_rate": 1.980547896835806e-05, "loss": 0.4562, "step": 6025 }, { "epoch": 0.12780216750440077, "grad_norm": 0.34178051352500916, "learning_rate": 1.9805413504456964e-05, "loss": 0.5301, "step": 6026 }, { "epoch": 0.12782337596233379, "grad_norm": 0.34115689992904663, "learning_rate": 1.9805348029650374e-05, "loss": 0.4984, "step": 6027 }, { "epoch": 0.1278445844202668, "grad_norm": 0.30843386054039, "learning_rate": 1.980528254393836e-05, "loss": 0.4956, "step": 6028 }, { "epoch": 0.12786579287819982, "grad_norm": 0.33645451068878174, "learning_rate": 1.9805217047320998e-05, "loss": 0.544, "step": 6029 }, { "epoch": 0.12788700133613284, "grad_norm": 0.3628022372722626, "learning_rate": 1.9805151539798357e-05, "loss": 0.5267, "step": 6030 }, { "epoch": 0.12790820979406586, "grad_norm": 0.3706980049610138, "learning_rate": 1.9805086021370512e-05, "loss": 0.5616, "step": 6031 }, { "epoch": 0.1279294182519989, "grad_norm": 0.31939953565597534, "learning_rate": 1.9805020492037538e-05, "loss": 0.5381, "step": 6032 }, { "epoch": 0.12795062670993193, "grad_norm": 0.30411866307258606, "learning_rate": 1.98049549517995e-05, "loss": 0.4218, "step": 6033 }, { "epoch": 0.12797183516786495, "grad_norm": 0.3268270194530487, "learning_rate": 1.9804889400656478e-05, "loss": 0.6136, "step": 6034 }, { "epoch": 0.12799304362579797, "grad_norm": 0.30565106868743896, "learning_rate": 1.9804823838608547e-05, "loss": 0.5259, "step": 6035 }, { "epoch": 0.128014252083731, "grad_norm": 0.3128798007965088, "learning_rate": 1.9804758265655772e-05, "loss": 0.5346, "step": 6036 }, { "epoch": 0.128035460541664, "grad_norm": 0.3060488700866699, "learning_rate": 1.980469268179823e-05, "loss": 0.4756, "step": 6037 }, { "epoch": 0.12805666899959703, "grad_norm": 0.3590461313724518, "learning_rate": 1.9804627087035997e-05, "loss": 0.4911, "step": 6038 }, { "epoch": 0.12807787745753008, "grad_norm": 0.42766857147216797, "learning_rate": 1.9804561481369142e-05, "loss": 0.546, "step": 6039 }, { "epoch": 0.1280990859154631, "grad_norm": 0.3129141926765442, "learning_rate": 1.9804495864797738e-05, "loss": 0.473, "step": 6040 }, { "epoch": 0.12812029437339612, "grad_norm": 0.4044061005115509, "learning_rate": 1.980443023732186e-05, "loss": 0.6053, "step": 6041 }, { "epoch": 0.12814150283132913, "grad_norm": 0.34203606843948364, "learning_rate": 1.9804364598941582e-05, "loss": 0.5047, "step": 6042 }, { "epoch": 0.12816271128926215, "grad_norm": 0.34571024775505066, "learning_rate": 1.980429894965697e-05, "loss": 0.5214, "step": 6043 }, { "epoch": 0.12818391974719517, "grad_norm": 0.35580477118492126, "learning_rate": 1.9804233289468105e-05, "loss": 0.5881, "step": 6044 }, { "epoch": 0.1282051282051282, "grad_norm": 0.39783281087875366, "learning_rate": 1.9804167618375054e-05, "loss": 0.5331, "step": 6045 }, { "epoch": 0.12822633666306124, "grad_norm": 0.3388017416000366, "learning_rate": 1.9804101936377896e-05, "loss": 0.5511, "step": 6046 }, { "epoch": 0.12824754512099426, "grad_norm": 0.3624337911605835, "learning_rate": 1.9804036243476697e-05, "loss": 0.573, "step": 6047 }, { "epoch": 0.12826875357892728, "grad_norm": 0.40388286113739014, "learning_rate": 1.980397053967154e-05, "loss": 0.6059, "step": 6048 }, { "epoch": 0.1282899620368603, "grad_norm": 0.40527981519699097, "learning_rate": 1.9803904824962487e-05, "loss": 0.4944, "step": 6049 }, { "epoch": 0.12831117049479332, "grad_norm": 0.31790608167648315, "learning_rate": 1.980383909934962e-05, "loss": 0.5086, "step": 6050 }, { "epoch": 0.12833237895272634, "grad_norm": 0.31606632471084595, "learning_rate": 1.9803773362833005e-05, "loss": 0.5429, "step": 6051 }, { "epoch": 0.12835358741065936, "grad_norm": 0.35353416204452515, "learning_rate": 1.980370761541272e-05, "loss": 0.487, "step": 6052 }, { "epoch": 0.1283747958685924, "grad_norm": 0.3230263590812683, "learning_rate": 1.9803641857088838e-05, "loss": 0.5223, "step": 6053 }, { "epoch": 0.12839600432652543, "grad_norm": 0.296956866979599, "learning_rate": 1.9803576087861425e-05, "loss": 0.497, "step": 6054 }, { "epoch": 0.12841721278445845, "grad_norm": 0.3418789803981781, "learning_rate": 1.9803510307730568e-05, "loss": 0.468, "step": 6055 }, { "epoch": 0.12843842124239147, "grad_norm": 0.6382973790168762, "learning_rate": 1.9803444516696326e-05, "loss": 0.4771, "step": 6056 }, { "epoch": 0.12845962970032448, "grad_norm": 0.33058276772499084, "learning_rate": 1.9803378714758782e-05, "loss": 0.4803, "step": 6057 }, { "epoch": 0.1284808381582575, "grad_norm": 0.3252488076686859, "learning_rate": 1.9803312901918002e-05, "loss": 0.5357, "step": 6058 }, { "epoch": 0.12850204661619052, "grad_norm": 0.3400140404701233, "learning_rate": 1.9803247078174066e-05, "loss": 0.5952, "step": 6059 }, { "epoch": 0.12852325507412357, "grad_norm": 0.34913668036460876, "learning_rate": 1.980318124352704e-05, "loss": 0.5256, "step": 6060 }, { "epoch": 0.1285444635320566, "grad_norm": 0.324258953332901, "learning_rate": 1.9803115397977005e-05, "loss": 0.4879, "step": 6061 }, { "epoch": 0.1285656719899896, "grad_norm": 0.2950041592121124, "learning_rate": 1.9803049541524027e-05, "loss": 0.4919, "step": 6062 }, { "epoch": 0.12858688044792263, "grad_norm": 0.3520925045013428, "learning_rate": 1.9802983674168183e-05, "loss": 0.5473, "step": 6063 }, { "epoch": 0.12860808890585565, "grad_norm": 0.319476455450058, "learning_rate": 1.9802917795909545e-05, "loss": 0.5498, "step": 6064 }, { "epoch": 0.12862929736378867, "grad_norm": 4.883192539215088, "learning_rate": 1.980285190674819e-05, "loss": 0.6086, "step": 6065 }, { "epoch": 0.1286505058217217, "grad_norm": 0.4708448350429535, "learning_rate": 1.9802786006684186e-05, "loss": 0.5203, "step": 6066 }, { "epoch": 0.12867171427965474, "grad_norm": 0.38032224774360657, "learning_rate": 1.9802720095717607e-05, "loss": 0.6697, "step": 6067 }, { "epoch": 0.12869292273758776, "grad_norm": 0.3337591886520386, "learning_rate": 1.9802654173848532e-05, "loss": 0.5028, "step": 6068 }, { "epoch": 0.12871413119552078, "grad_norm": 0.3501589596271515, "learning_rate": 1.9802588241077028e-05, "loss": 0.5327, "step": 6069 }, { "epoch": 0.1287353396534538, "grad_norm": 0.34678301215171814, "learning_rate": 1.980252229740317e-05, "loss": 0.5762, "step": 6070 }, { "epoch": 0.12875654811138681, "grad_norm": 0.3839874267578125, "learning_rate": 1.9802456342827033e-05, "loss": 0.5631, "step": 6071 }, { "epoch": 0.12877775656931983, "grad_norm": 0.326934814453125, "learning_rate": 1.9802390377348687e-05, "loss": 0.5175, "step": 6072 }, { "epoch": 0.12879896502725288, "grad_norm": 0.3238871693611145, "learning_rate": 1.980232440096821e-05, "loss": 0.5129, "step": 6073 }, { "epoch": 0.1288201734851859, "grad_norm": 0.31364986300468445, "learning_rate": 1.9802258413685674e-05, "loss": 0.5206, "step": 6074 }, { "epoch": 0.12884138194311892, "grad_norm": 0.3427838385105133, "learning_rate": 1.9802192415501146e-05, "loss": 0.5662, "step": 6075 }, { "epoch": 0.12886259040105194, "grad_norm": 0.34377753734588623, "learning_rate": 1.980212640641471e-05, "loss": 0.5162, "step": 6076 }, { "epoch": 0.12888379885898496, "grad_norm": 0.33074384927749634, "learning_rate": 1.9802060386426434e-05, "loss": 0.5181, "step": 6077 }, { "epoch": 0.12890500731691798, "grad_norm": 0.30357784032821655, "learning_rate": 1.980199435553639e-05, "loss": 0.5536, "step": 6078 }, { "epoch": 0.128926215774851, "grad_norm": 0.29754897952079773, "learning_rate": 1.980192831374465e-05, "loss": 0.4769, "step": 6079 }, { "epoch": 0.12894742423278405, "grad_norm": 0.37794363498687744, "learning_rate": 1.9801862261051296e-05, "loss": 0.5229, "step": 6080 }, { "epoch": 0.12896863269071707, "grad_norm": 0.40126463770866394, "learning_rate": 1.9801796197456394e-05, "loss": 0.5512, "step": 6081 }, { "epoch": 0.1289898411486501, "grad_norm": 0.3834253251552582, "learning_rate": 1.9801730122960018e-05, "loss": 0.5307, "step": 6082 }, { "epoch": 0.1290110496065831, "grad_norm": 0.33882829546928406, "learning_rate": 1.9801664037562244e-05, "loss": 0.5532, "step": 6083 }, { "epoch": 0.12903225806451613, "grad_norm": 0.3305020034313202, "learning_rate": 1.9801597941263146e-05, "loss": 0.5335, "step": 6084 }, { "epoch": 0.12905346652244915, "grad_norm": 0.2820890247821808, "learning_rate": 1.9801531834062794e-05, "loss": 0.4753, "step": 6085 }, { "epoch": 0.12907467498038216, "grad_norm": 0.3355691730976105, "learning_rate": 1.980146571596126e-05, "loss": 0.5583, "step": 6086 }, { "epoch": 0.1290958834383152, "grad_norm": 0.31076958775520325, "learning_rate": 1.9801399586958627e-05, "loss": 0.4974, "step": 6087 }, { "epoch": 0.12911709189624823, "grad_norm": 0.319916695356369, "learning_rate": 1.9801333447054957e-05, "loss": 0.5359, "step": 6088 }, { "epoch": 0.12913830035418125, "grad_norm": 0.3512687385082245, "learning_rate": 1.9801267296250335e-05, "loss": 0.577, "step": 6089 }, { "epoch": 0.12915950881211427, "grad_norm": 0.31039324402809143, "learning_rate": 1.9801201134544826e-05, "loss": 0.564, "step": 6090 }, { "epoch": 0.1291807172700473, "grad_norm": 0.42390990257263184, "learning_rate": 1.9801134961938505e-05, "loss": 0.4075, "step": 6091 }, { "epoch": 0.1292019257279803, "grad_norm": 0.39610618352890015, "learning_rate": 1.9801068778431446e-05, "loss": 0.5902, "step": 6092 }, { "epoch": 0.12922313418591333, "grad_norm": 0.3107106387615204, "learning_rate": 1.9801002584023726e-05, "loss": 0.5281, "step": 6093 }, { "epoch": 0.12924434264384638, "grad_norm": 0.47801175713539124, "learning_rate": 1.9800936378715418e-05, "loss": 0.518, "step": 6094 }, { "epoch": 0.1292655511017794, "grad_norm": 0.38484570384025574, "learning_rate": 1.9800870162506588e-05, "loss": 0.5928, "step": 6095 }, { "epoch": 0.12928675955971242, "grad_norm": 0.3184574544429779, "learning_rate": 1.9800803935397322e-05, "loss": 0.5631, "step": 6096 }, { "epoch": 0.12930796801764544, "grad_norm": 0.3237893581390381, "learning_rate": 1.9800737697387683e-05, "loss": 0.5309, "step": 6097 }, { "epoch": 0.12932917647557846, "grad_norm": 0.3324364721775055, "learning_rate": 1.9800671448477748e-05, "loss": 0.6148, "step": 6098 }, { "epoch": 0.12935038493351148, "grad_norm": 0.2895086705684662, "learning_rate": 1.9800605188667595e-05, "loss": 0.4348, "step": 6099 }, { "epoch": 0.1293715933914445, "grad_norm": 0.3131236433982849, "learning_rate": 1.9800538917957288e-05, "loss": 0.491, "step": 6100 }, { "epoch": 0.12939280184937754, "grad_norm": 0.3594632148742676, "learning_rate": 1.980047263634691e-05, "loss": 0.5373, "step": 6101 }, { "epoch": 0.12941401030731056, "grad_norm": 0.32037660479545593, "learning_rate": 1.9800406343836532e-05, "loss": 0.4825, "step": 6102 }, { "epoch": 0.12943521876524358, "grad_norm": 0.3655133545398712, "learning_rate": 1.9800340040426227e-05, "loss": 0.6918, "step": 6103 }, { "epoch": 0.1294564272231766, "grad_norm": 0.29624173045158386, "learning_rate": 1.980027372611607e-05, "loss": 0.4865, "step": 6104 }, { "epoch": 0.12947763568110962, "grad_norm": 0.34078559279441833, "learning_rate": 1.980020740090613e-05, "loss": 0.51, "step": 6105 }, { "epoch": 0.12949884413904264, "grad_norm": 0.33671557903289795, "learning_rate": 1.9800141064796488e-05, "loss": 0.5546, "step": 6106 }, { "epoch": 0.1295200525969757, "grad_norm": 0.377170592546463, "learning_rate": 1.9800074717787218e-05, "loss": 0.4933, "step": 6107 }, { "epoch": 0.1295412610549087, "grad_norm": 0.3129148483276367, "learning_rate": 1.9800008359878382e-05, "loss": 0.4733, "step": 6108 }, { "epoch": 0.12956246951284173, "grad_norm": 0.3842153251171112, "learning_rate": 1.9799941991070065e-05, "loss": 0.607, "step": 6109 }, { "epoch": 0.12958367797077475, "grad_norm": 0.3798241913318634, "learning_rate": 1.979987561136234e-05, "loss": 0.5603, "step": 6110 }, { "epoch": 0.12960488642870777, "grad_norm": 0.32716330885887146, "learning_rate": 1.9799809220755276e-05, "loss": 0.495, "step": 6111 }, { "epoch": 0.12962609488664079, "grad_norm": 0.3482165038585663, "learning_rate": 1.979974281924895e-05, "loss": 0.5888, "step": 6112 }, { "epoch": 0.1296473033445738, "grad_norm": 0.3163154125213623, "learning_rate": 1.9799676406843434e-05, "loss": 0.5607, "step": 6113 }, { "epoch": 0.12966851180250685, "grad_norm": 0.2758994400501251, "learning_rate": 1.9799609983538802e-05, "loss": 0.4119, "step": 6114 }, { "epoch": 0.12968972026043987, "grad_norm": 0.32473793625831604, "learning_rate": 1.979954354933513e-05, "loss": 0.5048, "step": 6115 }, { "epoch": 0.1297109287183729, "grad_norm": 0.31706517934799194, "learning_rate": 1.9799477104232493e-05, "loss": 0.4798, "step": 6116 }, { "epoch": 0.1297321371763059, "grad_norm": 0.6428342461585999, "learning_rate": 1.979941064823096e-05, "loss": 0.5111, "step": 6117 }, { "epoch": 0.12975334563423893, "grad_norm": 0.35011622309684753, "learning_rate": 1.979934418133061e-05, "loss": 0.5356, "step": 6118 }, { "epoch": 0.12977455409217195, "grad_norm": 0.34924420714378357, "learning_rate": 1.9799277703531514e-05, "loss": 0.532, "step": 6119 }, { "epoch": 0.12979576255010497, "grad_norm": 0.36506232619285583, "learning_rate": 1.9799211214833748e-05, "loss": 0.5598, "step": 6120 }, { "epoch": 0.12981697100803802, "grad_norm": 0.34437206387519836, "learning_rate": 1.979914471523738e-05, "loss": 0.6392, "step": 6121 }, { "epoch": 0.12983817946597104, "grad_norm": 0.47011640667915344, "learning_rate": 1.979907820474249e-05, "loss": 0.4186, "step": 6122 }, { "epoch": 0.12985938792390406, "grad_norm": 0.32512474060058594, "learning_rate": 1.979901168334915e-05, "loss": 0.5518, "step": 6123 }, { "epoch": 0.12988059638183708, "grad_norm": 0.2999889850616455, "learning_rate": 1.9798945151057438e-05, "loss": 0.4996, "step": 6124 }, { "epoch": 0.1299018048397701, "grad_norm": 0.32642337679862976, "learning_rate": 1.979887860786742e-05, "loss": 0.6003, "step": 6125 }, { "epoch": 0.12992301329770312, "grad_norm": 0.3678671717643738, "learning_rate": 1.9798812053779176e-05, "loss": 0.5786, "step": 6126 }, { "epoch": 0.12994422175563614, "grad_norm": 0.3514675796031952, "learning_rate": 1.979874548879278e-05, "loss": 0.5912, "step": 6127 }, { "epoch": 0.12996543021356918, "grad_norm": 0.3527752161026001, "learning_rate": 1.9798678912908303e-05, "loss": 0.5783, "step": 6128 }, { "epoch": 0.1299866386715022, "grad_norm": 0.3470027446746826, "learning_rate": 1.9798612326125818e-05, "loss": 0.5291, "step": 6129 }, { "epoch": 0.13000784712943522, "grad_norm": 0.34748008847236633, "learning_rate": 1.9798545728445404e-05, "loss": 0.5606, "step": 6130 }, { "epoch": 0.13002905558736824, "grad_norm": 0.2933361530303955, "learning_rate": 1.9798479119867132e-05, "loss": 0.5638, "step": 6131 }, { "epoch": 0.13005026404530126, "grad_norm": 0.34216880798339844, "learning_rate": 1.9798412500391076e-05, "loss": 0.572, "step": 6132 }, { "epoch": 0.13007147250323428, "grad_norm": 0.30712711811065674, "learning_rate": 1.979834587001731e-05, "loss": 0.506, "step": 6133 }, { "epoch": 0.1300926809611673, "grad_norm": 0.3992197811603546, "learning_rate": 1.9798279228745912e-05, "loss": 0.51, "step": 6134 }, { "epoch": 0.13011388941910035, "grad_norm": 0.3531166613101959, "learning_rate": 1.9798212576576953e-05, "loss": 0.5107, "step": 6135 }, { "epoch": 0.13013509787703337, "grad_norm": 0.37528613209724426, "learning_rate": 1.9798145913510504e-05, "loss": 0.6092, "step": 6136 }, { "epoch": 0.1301563063349664, "grad_norm": 0.36362072825431824, "learning_rate": 1.9798079239546645e-05, "loss": 0.6149, "step": 6137 }, { "epoch": 0.1301775147928994, "grad_norm": 0.32215964794158936, "learning_rate": 1.9798012554685443e-05, "loss": 0.409, "step": 6138 }, { "epoch": 0.13019872325083243, "grad_norm": 0.2920721471309662, "learning_rate": 1.9797945858926983e-05, "loss": 0.4482, "step": 6139 }, { "epoch": 0.13021993170876545, "grad_norm": 0.34582555294036865, "learning_rate": 1.9797879152271328e-05, "loss": 0.5378, "step": 6140 }, { "epoch": 0.13024114016669847, "grad_norm": 0.36174190044403076, "learning_rate": 1.9797812434718557e-05, "loss": 0.4908, "step": 6141 }, { "epoch": 0.1302623486246315, "grad_norm": 0.35688358545303345, "learning_rate": 1.9797745706268748e-05, "loss": 0.5563, "step": 6142 }, { "epoch": 0.13028355708256453, "grad_norm": 0.3467182517051697, "learning_rate": 1.979767896692197e-05, "loss": 0.3826, "step": 6143 }, { "epoch": 0.13030476554049755, "grad_norm": 0.3302443325519562, "learning_rate": 1.9797612216678293e-05, "loss": 0.5581, "step": 6144 }, { "epoch": 0.13032597399843057, "grad_norm": 0.3172862231731415, "learning_rate": 1.9797545455537803e-05, "loss": 0.5001, "step": 6145 }, { "epoch": 0.1303471824563636, "grad_norm": 0.5562947988510132, "learning_rate": 1.9797478683500566e-05, "loss": 0.5607, "step": 6146 }, { "epoch": 0.1303683909142966, "grad_norm": 0.3136769235134125, "learning_rate": 1.9797411900566657e-05, "loss": 0.4655, "step": 6147 }, { "epoch": 0.13038959937222966, "grad_norm": 0.3952636420726776, "learning_rate": 1.9797345106736157e-05, "loss": 0.4609, "step": 6148 }, { "epoch": 0.13041080783016268, "grad_norm": 0.36034896969795227, "learning_rate": 1.9797278302009127e-05, "loss": 0.5694, "step": 6149 }, { "epoch": 0.1304320162880957, "grad_norm": 0.3758857548236847, "learning_rate": 1.9797211486385655e-05, "loss": 0.5363, "step": 6150 }, { "epoch": 0.13045322474602872, "grad_norm": 0.3066504895687103, "learning_rate": 1.979714465986581e-05, "loss": 0.5075, "step": 6151 }, { "epoch": 0.13047443320396174, "grad_norm": 0.3729819059371948, "learning_rate": 1.9797077822449663e-05, "loss": 0.5177, "step": 6152 }, { "epoch": 0.13049564166189476, "grad_norm": 0.3084888756275177, "learning_rate": 1.979701097413729e-05, "loss": 0.4966, "step": 6153 }, { "epoch": 0.13051685011982778, "grad_norm": 0.35490238666534424, "learning_rate": 1.9796944114928768e-05, "loss": 0.6068, "step": 6154 }, { "epoch": 0.13053805857776082, "grad_norm": 0.3023281693458557, "learning_rate": 1.979687724482417e-05, "loss": 0.4827, "step": 6155 }, { "epoch": 0.13055926703569384, "grad_norm": 0.3451002836227417, "learning_rate": 1.9796810363823572e-05, "loss": 0.5286, "step": 6156 }, { "epoch": 0.13058047549362686, "grad_norm": 0.36035871505737305, "learning_rate": 1.9796743471927043e-05, "loss": 0.4541, "step": 6157 }, { "epoch": 0.13060168395155988, "grad_norm": 0.38556408882141113, "learning_rate": 1.9796676569134662e-05, "loss": 0.5644, "step": 6158 }, { "epoch": 0.1306228924094929, "grad_norm": 0.3254821002483368, "learning_rate": 1.9796609655446503e-05, "loss": 0.4582, "step": 6159 }, { "epoch": 0.13064410086742592, "grad_norm": 0.3949688673019409, "learning_rate": 1.979654273086264e-05, "loss": 0.5029, "step": 6160 }, { "epoch": 0.13066530932535894, "grad_norm": 0.3514719009399414, "learning_rate": 1.979647579538315e-05, "loss": 0.5666, "step": 6161 }, { "epoch": 0.130686517783292, "grad_norm": 0.4496711194515228, "learning_rate": 1.97964088490081e-05, "loss": 0.5788, "step": 6162 }, { "epoch": 0.130707726241225, "grad_norm": 0.34124377369880676, "learning_rate": 1.9796341891737572e-05, "loss": 0.6549, "step": 6163 }, { "epoch": 0.13072893469915803, "grad_norm": 0.3465164005756378, "learning_rate": 1.9796274923571638e-05, "loss": 0.5219, "step": 6164 }, { "epoch": 0.13075014315709105, "grad_norm": 0.32987159490585327, "learning_rate": 1.979620794451037e-05, "loss": 0.5192, "step": 6165 }, { "epoch": 0.13077135161502407, "grad_norm": 0.32216712832450867, "learning_rate": 1.9796140954553844e-05, "loss": 0.4313, "step": 6166 }, { "epoch": 0.1307925600729571, "grad_norm": 0.31234434247016907, "learning_rate": 1.979607395370214e-05, "loss": 0.4698, "step": 6167 }, { "epoch": 0.1308137685308901, "grad_norm": 0.32350000739097595, "learning_rate": 1.9796006941955323e-05, "loss": 0.5087, "step": 6168 }, { "epoch": 0.13083497698882315, "grad_norm": 0.35465487837791443, "learning_rate": 1.9795939919313475e-05, "loss": 0.4605, "step": 6169 }, { "epoch": 0.13085618544675617, "grad_norm": 0.31834930181503296, "learning_rate": 1.9795872885776663e-05, "loss": 0.5419, "step": 6170 }, { "epoch": 0.1308773939046892, "grad_norm": 0.3236314654350281, "learning_rate": 1.9795805841344973e-05, "loss": 0.5045, "step": 6171 }, { "epoch": 0.1308986023626222, "grad_norm": 0.3331316411495209, "learning_rate": 1.979573878601847e-05, "loss": 0.5375, "step": 6172 }, { "epoch": 0.13091981082055523, "grad_norm": 0.5091875195503235, "learning_rate": 1.979567171979723e-05, "loss": 0.483, "step": 6173 }, { "epoch": 0.13094101927848825, "grad_norm": 0.27940067648887634, "learning_rate": 1.9795604642681327e-05, "loss": 0.4441, "step": 6174 }, { "epoch": 0.13096222773642127, "grad_norm": 0.3692651391029358, "learning_rate": 1.9795537554670843e-05, "loss": 0.4906, "step": 6175 }, { "epoch": 0.13098343619435432, "grad_norm": 0.3386799693107605, "learning_rate": 1.9795470455765845e-05, "loss": 0.5268, "step": 6176 }, { "epoch": 0.13100464465228734, "grad_norm": 0.38658347725868225, "learning_rate": 1.9795403345966408e-05, "loss": 0.5371, "step": 6177 }, { "epoch": 0.13102585311022036, "grad_norm": 0.3594132661819458, "learning_rate": 1.979533622527261e-05, "loss": 0.4703, "step": 6178 }, { "epoch": 0.13104706156815338, "grad_norm": 0.3754139840602875, "learning_rate": 1.9795269093684526e-05, "loss": 0.6154, "step": 6179 }, { "epoch": 0.1310682700260864, "grad_norm": 0.3416498005390167, "learning_rate": 1.9795201951202227e-05, "loss": 0.4572, "step": 6180 }, { "epoch": 0.13108947848401942, "grad_norm": 0.34378111362457275, "learning_rate": 1.979513479782579e-05, "loss": 0.5848, "step": 6181 }, { "epoch": 0.13111068694195244, "grad_norm": 0.3086530864238739, "learning_rate": 1.9795067633555287e-05, "loss": 0.5535, "step": 6182 }, { "epoch": 0.13113189539988548, "grad_norm": 0.2868274748325348, "learning_rate": 1.9795000458390795e-05, "loss": 0.512, "step": 6183 }, { "epoch": 0.1311531038578185, "grad_norm": 0.3221675753593445, "learning_rate": 1.979493327233239e-05, "loss": 0.5053, "step": 6184 }, { "epoch": 0.13117431231575152, "grad_norm": 0.302391916513443, "learning_rate": 1.9794866075380148e-05, "loss": 0.5218, "step": 6185 }, { "epoch": 0.13119552077368454, "grad_norm": 0.3629280626773834, "learning_rate": 1.979479886753414e-05, "loss": 0.5474, "step": 6186 }, { "epoch": 0.13121672923161756, "grad_norm": 0.33676066994667053, "learning_rate": 1.9794731648794437e-05, "loss": 0.4884, "step": 6187 }, { "epoch": 0.13123793768955058, "grad_norm": 0.30715906620025635, "learning_rate": 1.9794664419161122e-05, "loss": 0.4968, "step": 6188 }, { "epoch": 0.13125914614748363, "grad_norm": 0.3156845271587372, "learning_rate": 1.979459717863427e-05, "loss": 0.5919, "step": 6189 }, { "epoch": 0.13128035460541665, "grad_norm": 0.3184211850166321, "learning_rate": 1.9794529927213944e-05, "loss": 0.5632, "step": 6190 }, { "epoch": 0.13130156306334967, "grad_norm": 0.31101152300834656, "learning_rate": 1.9794462664900234e-05, "loss": 0.4357, "step": 6191 }, { "epoch": 0.1313227715212827, "grad_norm": 0.356890469789505, "learning_rate": 1.9794395391693203e-05, "loss": 0.6015, "step": 6192 }, { "epoch": 0.1313439799792157, "grad_norm": 0.311395525932312, "learning_rate": 1.979432810759293e-05, "loss": 0.5121, "step": 6193 }, { "epoch": 0.13136518843714873, "grad_norm": 0.32644444704055786, "learning_rate": 1.9794260812599498e-05, "loss": 0.4988, "step": 6194 }, { "epoch": 0.13138639689508175, "grad_norm": 0.28657543659210205, "learning_rate": 1.9794193506712966e-05, "loss": 0.5445, "step": 6195 }, { "epoch": 0.1314076053530148, "grad_norm": 0.3698466122150421, "learning_rate": 1.9794126189933422e-05, "loss": 0.534, "step": 6196 }, { "epoch": 0.13142881381094781, "grad_norm": 0.3099096715450287, "learning_rate": 1.9794058862260935e-05, "loss": 0.4666, "step": 6197 }, { "epoch": 0.13145002226888083, "grad_norm": 0.31629228591918945, "learning_rate": 1.9793991523695578e-05, "loss": 0.5295, "step": 6198 }, { "epoch": 0.13147123072681385, "grad_norm": 0.3139303922653198, "learning_rate": 1.979392417423743e-05, "loss": 0.5343, "step": 6199 }, { "epoch": 0.13149243918474687, "grad_norm": 0.387428879737854, "learning_rate": 1.9793856813886566e-05, "loss": 0.5823, "step": 6200 }, { "epoch": 0.1315136476426799, "grad_norm": 0.30842000246047974, "learning_rate": 1.9793789442643057e-05, "loss": 0.4535, "step": 6201 }, { "epoch": 0.1315348561006129, "grad_norm": 0.3427533507347107, "learning_rate": 1.9793722060506982e-05, "loss": 0.5468, "step": 6202 }, { "epoch": 0.13155606455854596, "grad_norm": 0.32690444588661194, "learning_rate": 1.9793654667478416e-05, "loss": 0.4702, "step": 6203 }, { "epoch": 0.13157727301647898, "grad_norm": 0.40719473361968994, "learning_rate": 1.979358726355743e-05, "loss": 0.462, "step": 6204 }, { "epoch": 0.131598481474412, "grad_norm": 0.37264499068260193, "learning_rate": 1.9793519848744104e-05, "loss": 0.5544, "step": 6205 }, { "epoch": 0.13161968993234502, "grad_norm": 0.43753471970558167, "learning_rate": 1.9793452423038508e-05, "loss": 0.5529, "step": 6206 }, { "epoch": 0.13164089839027804, "grad_norm": 0.33620235323905945, "learning_rate": 1.979338498644072e-05, "loss": 0.539, "step": 6207 }, { "epoch": 0.13166210684821106, "grad_norm": 0.32280656695365906, "learning_rate": 1.9793317538950814e-05, "loss": 0.4529, "step": 6208 }, { "epoch": 0.13168331530614408, "grad_norm": 0.3251165449619293, "learning_rate": 1.9793250080568865e-05, "loss": 0.6144, "step": 6209 }, { "epoch": 0.13170452376407712, "grad_norm": 0.30026718974113464, "learning_rate": 1.9793182611294946e-05, "loss": 0.5127, "step": 6210 }, { "epoch": 0.13172573222201014, "grad_norm": 0.3296383321285248, "learning_rate": 1.979311513112914e-05, "loss": 0.5701, "step": 6211 }, { "epoch": 0.13174694067994316, "grad_norm": 0.30534490942955017, "learning_rate": 1.9793047640071513e-05, "loss": 0.4644, "step": 6212 }, { "epoch": 0.13176814913787618, "grad_norm": 0.2791900634765625, "learning_rate": 1.9792980138122143e-05, "loss": 0.4683, "step": 6213 }, { "epoch": 0.1317893575958092, "grad_norm": 0.31707626581192017, "learning_rate": 1.9792912625281105e-05, "loss": 0.5456, "step": 6214 }, { "epoch": 0.13181056605374222, "grad_norm": 0.35237938165664673, "learning_rate": 1.9792845101548477e-05, "loss": 0.4922, "step": 6215 }, { "epoch": 0.13183177451167524, "grad_norm": 0.31460002064704895, "learning_rate": 1.979277756692433e-05, "loss": 0.5636, "step": 6216 }, { "epoch": 0.1318529829696083, "grad_norm": 0.30304795503616333, "learning_rate": 1.9792710021408746e-05, "loss": 0.5006, "step": 6217 }, { "epoch": 0.1318741914275413, "grad_norm": 0.35598593950271606, "learning_rate": 1.979264246500179e-05, "loss": 0.4825, "step": 6218 }, { "epoch": 0.13189539988547433, "grad_norm": 0.32023024559020996, "learning_rate": 1.9792574897703542e-05, "loss": 0.559, "step": 6219 }, { "epoch": 0.13191660834340735, "grad_norm": 0.2965252101421356, "learning_rate": 1.979250731951408e-05, "loss": 0.5497, "step": 6220 }, { "epoch": 0.13193781680134037, "grad_norm": 0.33134031295776367, "learning_rate": 1.9792439730433472e-05, "loss": 0.4934, "step": 6221 }, { "epoch": 0.1319590252592734, "grad_norm": 0.310332328081131, "learning_rate": 1.97923721304618e-05, "loss": 0.4949, "step": 6222 }, { "epoch": 0.13198023371720644, "grad_norm": 0.3186633586883545, "learning_rate": 1.979230451959914e-05, "loss": 0.5646, "step": 6223 }, { "epoch": 0.13200144217513946, "grad_norm": 0.3322465419769287, "learning_rate": 1.9792236897845558e-05, "loss": 0.5374, "step": 6224 }, { "epoch": 0.13202265063307247, "grad_norm": 0.38711950182914734, "learning_rate": 1.979216926520114e-05, "loss": 0.644, "step": 6225 }, { "epoch": 0.1320438590910055, "grad_norm": 0.32641562819480896, "learning_rate": 1.9792101621665953e-05, "loss": 0.4815, "step": 6226 }, { "epoch": 0.13206506754893851, "grad_norm": 0.3246786594390869, "learning_rate": 1.9792033967240075e-05, "loss": 0.5126, "step": 6227 }, { "epoch": 0.13208627600687153, "grad_norm": 0.33093127608299255, "learning_rate": 1.9791966301923586e-05, "loss": 0.5617, "step": 6228 }, { "epoch": 0.13210748446480455, "grad_norm": 0.3068969249725342, "learning_rate": 1.9791898625716554e-05, "loss": 0.4873, "step": 6229 }, { "epoch": 0.1321286929227376, "grad_norm": 0.34128424525260925, "learning_rate": 1.979183093861906e-05, "loss": 0.4986, "step": 6230 }, { "epoch": 0.13214990138067062, "grad_norm": 0.3811732530593872, "learning_rate": 1.9791763240631176e-05, "loss": 0.5831, "step": 6231 }, { "epoch": 0.13217110983860364, "grad_norm": 0.3275667130947113, "learning_rate": 1.9791695531752978e-05, "loss": 0.5001, "step": 6232 }, { "epoch": 0.13219231829653666, "grad_norm": 0.3454020023345947, "learning_rate": 1.979162781198454e-05, "loss": 0.542, "step": 6233 }, { "epoch": 0.13221352675446968, "grad_norm": 0.3613431453704834, "learning_rate": 1.979156008132594e-05, "loss": 0.4785, "step": 6234 }, { "epoch": 0.1322347352124027, "grad_norm": 0.32295218110084534, "learning_rate": 1.9791492339777248e-05, "loss": 0.4877, "step": 6235 }, { "epoch": 0.13225594367033572, "grad_norm": 0.3852725923061371, "learning_rate": 1.9791424587338552e-05, "loss": 0.5288, "step": 6236 }, { "epoch": 0.13227715212826877, "grad_norm": 0.3132099509239197, "learning_rate": 1.979135682400991e-05, "loss": 0.5282, "step": 6237 }, { "epoch": 0.13229836058620179, "grad_norm": 0.32139816880226135, "learning_rate": 1.979128904979141e-05, "loss": 0.5241, "step": 6238 }, { "epoch": 0.1323195690441348, "grad_norm": 0.30072134733200073, "learning_rate": 1.9791221264683124e-05, "loss": 0.4979, "step": 6239 }, { "epoch": 0.13234077750206782, "grad_norm": 0.3191682994365692, "learning_rate": 1.979115346868512e-05, "loss": 0.465, "step": 6240 }, { "epoch": 0.13236198596000084, "grad_norm": 0.3611156642436981, "learning_rate": 1.9791085661797487e-05, "loss": 0.5066, "step": 6241 }, { "epoch": 0.13238319441793386, "grad_norm": 0.34931033849716187, "learning_rate": 1.9791017844020292e-05, "loss": 0.5341, "step": 6242 }, { "epoch": 0.13240440287586688, "grad_norm": 0.2986331284046173, "learning_rate": 1.9790950015353615e-05, "loss": 0.5015, "step": 6243 }, { "epoch": 0.13242561133379993, "grad_norm": 0.3316824436187744, "learning_rate": 1.9790882175797522e-05, "loss": 0.487, "step": 6244 }, { "epoch": 0.13244681979173295, "grad_norm": 0.34427574276924133, "learning_rate": 1.97908143253521e-05, "loss": 0.5225, "step": 6245 }, { "epoch": 0.13246802824966597, "grad_norm": 0.3250032365322113, "learning_rate": 1.9790746464017418e-05, "loss": 0.5452, "step": 6246 }, { "epoch": 0.132489236707599, "grad_norm": 0.3064810037612915, "learning_rate": 1.979067859179355e-05, "loss": 0.4364, "step": 6247 }, { "epoch": 0.132510445165532, "grad_norm": 0.31255969405174255, "learning_rate": 1.979061070868058e-05, "loss": 0.4952, "step": 6248 }, { "epoch": 0.13253165362346503, "grad_norm": 0.3433445692062378, "learning_rate": 1.9790542814678574e-05, "loss": 0.4776, "step": 6249 }, { "epoch": 0.13255286208139805, "grad_norm": 0.30829918384552, "learning_rate": 1.979047490978761e-05, "loss": 0.5084, "step": 6250 }, { "epoch": 0.1325740705393311, "grad_norm": 0.3054545819759369, "learning_rate": 1.979040699400777e-05, "loss": 0.5372, "step": 6251 }, { "epoch": 0.13259527899726412, "grad_norm": 0.3398788273334503, "learning_rate": 1.979033906733912e-05, "loss": 0.5443, "step": 6252 }, { "epoch": 0.13261648745519714, "grad_norm": 0.3292025029659271, "learning_rate": 1.979027112978174e-05, "loss": 0.4862, "step": 6253 }, { "epoch": 0.13263769591313015, "grad_norm": 0.3566587567329407, "learning_rate": 1.9790203181335707e-05, "loss": 0.5129, "step": 6254 }, { "epoch": 0.13265890437106317, "grad_norm": 0.3226414918899536, "learning_rate": 1.9790135222001096e-05, "loss": 0.5151, "step": 6255 }, { "epoch": 0.1326801128289962, "grad_norm": 0.33834752440452576, "learning_rate": 1.979006725177798e-05, "loss": 0.4492, "step": 6256 }, { "epoch": 0.1327013212869292, "grad_norm": 0.32751303911209106, "learning_rate": 1.9789999270666437e-05, "loss": 0.5597, "step": 6257 }, { "epoch": 0.13272252974486226, "grad_norm": 0.40009185671806335, "learning_rate": 1.978993127866654e-05, "loss": 0.4889, "step": 6258 }, { "epoch": 0.13274373820279528, "grad_norm": 0.32158055901527405, "learning_rate": 1.978986327577837e-05, "loss": 0.4709, "step": 6259 }, { "epoch": 0.1327649466607283, "grad_norm": 0.329428493976593, "learning_rate": 1.9789795262001996e-05, "loss": 0.5071, "step": 6260 }, { "epoch": 0.13278615511866132, "grad_norm": 0.31590375304222107, "learning_rate": 1.97897272373375e-05, "loss": 0.5196, "step": 6261 }, { "epoch": 0.13280736357659434, "grad_norm": 0.35646089911460876, "learning_rate": 1.9789659201784948e-05, "loss": 0.553, "step": 6262 }, { "epoch": 0.13282857203452736, "grad_norm": 0.3553810715675354, "learning_rate": 1.9789591155344426e-05, "loss": 0.5279, "step": 6263 }, { "epoch": 0.1328497804924604, "grad_norm": 0.385061115026474, "learning_rate": 1.9789523098016007e-05, "loss": 0.5283, "step": 6264 }, { "epoch": 0.13287098895039343, "grad_norm": 0.34759220480918884, "learning_rate": 1.9789455029799765e-05, "loss": 0.536, "step": 6265 }, { "epoch": 0.13289219740832645, "grad_norm": 0.34751906991004944, "learning_rate": 1.9789386950695776e-05, "loss": 0.5377, "step": 6266 }, { "epoch": 0.13291340586625947, "grad_norm": 0.44181281328201294, "learning_rate": 1.9789318860704116e-05, "loss": 0.5361, "step": 6267 }, { "epoch": 0.13293461432419248, "grad_norm": 0.3133504390716553, "learning_rate": 1.978925075982486e-05, "loss": 0.5202, "step": 6268 }, { "epoch": 0.1329558227821255, "grad_norm": 0.3241862654685974, "learning_rate": 1.9789182648058085e-05, "loss": 0.512, "step": 6269 }, { "epoch": 0.13297703124005852, "grad_norm": 0.4295468032360077, "learning_rate": 1.9789114525403866e-05, "loss": 0.52, "step": 6270 }, { "epoch": 0.13299823969799157, "grad_norm": 0.3563438057899475, "learning_rate": 1.9789046391862275e-05, "loss": 0.621, "step": 6271 }, { "epoch": 0.1330194481559246, "grad_norm": 0.3161875307559967, "learning_rate": 1.97889782474334e-05, "loss": 0.5929, "step": 6272 }, { "epoch": 0.1330406566138576, "grad_norm": 0.31379973888397217, "learning_rate": 1.9788910092117303e-05, "loss": 0.5343, "step": 6273 }, { "epoch": 0.13306186507179063, "grad_norm": 0.3057572841644287, "learning_rate": 1.9788841925914064e-05, "loss": 0.5332, "step": 6274 }, { "epoch": 0.13308307352972365, "grad_norm": 0.3477347195148468, "learning_rate": 1.9788773748823762e-05, "loss": 0.5987, "step": 6275 }, { "epoch": 0.13310428198765667, "grad_norm": 0.31494927406311035, "learning_rate": 1.978870556084647e-05, "loss": 0.4981, "step": 6276 }, { "epoch": 0.1331254904455897, "grad_norm": 0.4838106632232666, "learning_rate": 1.9788637361982264e-05, "loss": 0.5906, "step": 6277 }, { "epoch": 0.13314669890352274, "grad_norm": 0.32342660427093506, "learning_rate": 1.9788569152231225e-05, "loss": 0.5398, "step": 6278 }, { "epoch": 0.13316790736145576, "grad_norm": 0.2940163016319275, "learning_rate": 1.978850093159342e-05, "loss": 0.4653, "step": 6279 }, { "epoch": 0.13318911581938878, "grad_norm": 0.2889755666255951, "learning_rate": 1.9788432700068932e-05, "loss": 0.4917, "step": 6280 }, { "epoch": 0.1332103242773218, "grad_norm": 0.31006261706352234, "learning_rate": 1.9788364457657834e-05, "loss": 0.5221, "step": 6281 }, { "epoch": 0.13323153273525482, "grad_norm": 0.31922581791877747, "learning_rate": 1.97882962043602e-05, "loss": 0.5081, "step": 6282 }, { "epoch": 0.13325274119318783, "grad_norm": 0.3871995210647583, "learning_rate": 1.978822794017611e-05, "loss": 0.5658, "step": 6283 }, { "epoch": 0.13327394965112085, "grad_norm": 0.33544865250587463, "learning_rate": 1.9788159665105635e-05, "loss": 0.6385, "step": 6284 }, { "epoch": 0.1332951581090539, "grad_norm": 0.3420267701148987, "learning_rate": 1.9788091379148854e-05, "loss": 0.4583, "step": 6285 }, { "epoch": 0.13331636656698692, "grad_norm": 0.3319692015647888, "learning_rate": 1.9788023082305846e-05, "loss": 0.5273, "step": 6286 }, { "epoch": 0.13333757502491994, "grad_norm": 0.3197912573814392, "learning_rate": 1.9787954774576683e-05, "loss": 0.5473, "step": 6287 }, { "epoch": 0.13335878348285296, "grad_norm": 0.3293786644935608, "learning_rate": 1.978788645596144e-05, "loss": 0.5118, "step": 6288 }, { "epoch": 0.13337999194078598, "grad_norm": 0.32493582367897034, "learning_rate": 1.9787818126460196e-05, "loss": 0.5082, "step": 6289 }, { "epoch": 0.133401200398719, "grad_norm": 0.3301178812980652, "learning_rate": 1.9787749786073024e-05, "loss": 0.4719, "step": 6290 }, { "epoch": 0.13342240885665202, "grad_norm": 0.4875868558883667, "learning_rate": 1.9787681434800004e-05, "loss": 0.5386, "step": 6291 }, { "epoch": 0.13344361731458507, "grad_norm": 0.31073713302612305, "learning_rate": 1.978761307264121e-05, "loss": 0.4724, "step": 6292 }, { "epoch": 0.1334648257725181, "grad_norm": 0.3456456661224365, "learning_rate": 1.9787544699596716e-05, "loss": 0.4773, "step": 6293 }, { "epoch": 0.1334860342304511, "grad_norm": 0.30476245284080505, "learning_rate": 1.9787476315666602e-05, "loss": 0.5299, "step": 6294 }, { "epoch": 0.13350724268838413, "grad_norm": 0.5971050262451172, "learning_rate": 1.978740792085094e-05, "loss": 0.46, "step": 6295 }, { "epoch": 0.13352845114631715, "grad_norm": 0.35523056983947754, "learning_rate": 1.978733951514981e-05, "loss": 0.4974, "step": 6296 }, { "epoch": 0.13354965960425016, "grad_norm": 0.3226662874221802, "learning_rate": 1.9787271098563285e-05, "loss": 0.5728, "step": 6297 }, { "epoch": 0.1335708680621832, "grad_norm": 0.3119082450866699, "learning_rate": 1.978720267109144e-05, "loss": 0.478, "step": 6298 }, { "epoch": 0.13359207652011623, "grad_norm": 0.3181594908237457, "learning_rate": 1.9787134232734352e-05, "loss": 0.5023, "step": 6299 }, { "epoch": 0.13361328497804925, "grad_norm": 0.3530328869819641, "learning_rate": 1.9787065783492103e-05, "loss": 0.473, "step": 6300 }, { "epoch": 0.13363449343598227, "grad_norm": 0.2994166314601898, "learning_rate": 1.9786997323364762e-05, "loss": 0.496, "step": 6301 }, { "epoch": 0.1336557018939153, "grad_norm": 0.3312479853630066, "learning_rate": 1.9786928852352407e-05, "loss": 0.5477, "step": 6302 }, { "epoch": 0.1336769103518483, "grad_norm": 0.3297177851200104, "learning_rate": 1.9786860370455117e-05, "loss": 0.5437, "step": 6303 }, { "epoch": 0.13369811880978133, "grad_norm": 0.30671828985214233, "learning_rate": 1.9786791877672965e-05, "loss": 0.5787, "step": 6304 }, { "epoch": 0.13371932726771438, "grad_norm": 0.33812415599823, "learning_rate": 1.9786723374006027e-05, "loss": 0.5631, "step": 6305 }, { "epoch": 0.1337405357256474, "grad_norm": 0.3037593364715576, "learning_rate": 1.9786654859454383e-05, "loss": 0.5035, "step": 6306 }, { "epoch": 0.13376174418358042, "grad_norm": 0.31498464941978455, "learning_rate": 1.97865863340181e-05, "loss": 0.5541, "step": 6307 }, { "epoch": 0.13378295264151344, "grad_norm": 0.3076023757457733, "learning_rate": 1.978651779769727e-05, "loss": 0.5188, "step": 6308 }, { "epoch": 0.13380416109944646, "grad_norm": 0.2957978844642639, "learning_rate": 1.9786449250491954e-05, "loss": 0.4012, "step": 6309 }, { "epoch": 0.13382536955737948, "grad_norm": 0.3050364851951599, "learning_rate": 1.9786380692402234e-05, "loss": 0.4341, "step": 6310 }, { "epoch": 0.1338465780153125, "grad_norm": 0.4042172431945801, "learning_rate": 1.9786312123428188e-05, "loss": 0.5847, "step": 6311 }, { "epoch": 0.13386778647324554, "grad_norm": 0.3685097396373749, "learning_rate": 1.978624354356989e-05, "loss": 0.6146, "step": 6312 }, { "epoch": 0.13388899493117856, "grad_norm": 0.3154434561729431, "learning_rate": 1.9786174952827416e-05, "loss": 0.4755, "step": 6313 }, { "epoch": 0.13391020338911158, "grad_norm": 0.3442998230457306, "learning_rate": 1.9786106351200846e-05, "loss": 0.6039, "step": 6314 }, { "epoch": 0.1339314118470446, "grad_norm": 0.3683169484138489, "learning_rate": 1.978603773869025e-05, "loss": 0.5606, "step": 6315 }, { "epoch": 0.13395262030497762, "grad_norm": 0.33834224939346313, "learning_rate": 1.978596911529571e-05, "loss": 0.4466, "step": 6316 }, { "epoch": 0.13397382876291064, "grad_norm": 0.3177957832813263, "learning_rate": 1.97859004810173e-05, "loss": 0.5415, "step": 6317 }, { "epoch": 0.13399503722084366, "grad_norm": 0.3604778051376343, "learning_rate": 1.9785831835855098e-05, "loss": 0.5873, "step": 6318 }, { "epoch": 0.1340162456787767, "grad_norm": 0.3350578248500824, "learning_rate": 1.9785763179809176e-05, "loss": 0.5385, "step": 6319 }, { "epoch": 0.13403745413670973, "grad_norm": 0.28211039304733276, "learning_rate": 1.9785694512879615e-05, "loss": 0.4456, "step": 6320 }, { "epoch": 0.13405866259464275, "grad_norm": 0.32533586025238037, "learning_rate": 1.978562583506649e-05, "loss": 0.4665, "step": 6321 }, { "epoch": 0.13407987105257577, "grad_norm": 0.3211863040924072, "learning_rate": 1.9785557146369877e-05, "loss": 0.4468, "step": 6322 }, { "epoch": 0.13410107951050879, "grad_norm": 0.31703364849090576, "learning_rate": 1.9785488446789852e-05, "loss": 0.5602, "step": 6323 }, { "epoch": 0.1341222879684418, "grad_norm": 0.5378225445747375, "learning_rate": 1.978541973632649e-05, "loss": 0.5079, "step": 6324 }, { "epoch": 0.13414349642637483, "grad_norm": 0.32327529788017273, "learning_rate": 1.978535101497987e-05, "loss": 0.5428, "step": 6325 }, { "epoch": 0.13416470488430787, "grad_norm": 0.3058258891105652, "learning_rate": 1.978528228275007e-05, "loss": 0.532, "step": 6326 }, { "epoch": 0.1341859133422409, "grad_norm": 0.37295088171958923, "learning_rate": 1.978521353963716e-05, "loss": 0.4982, "step": 6327 }, { "epoch": 0.1342071218001739, "grad_norm": 0.6185670495033264, "learning_rate": 1.9785144785641224e-05, "loss": 0.5175, "step": 6328 }, { "epoch": 0.13422833025810693, "grad_norm": 0.34629085659980774, "learning_rate": 1.9785076020762332e-05, "loss": 0.5879, "step": 6329 }, { "epoch": 0.13424953871603995, "grad_norm": 0.3584023714065552, "learning_rate": 1.9785007245000564e-05, "loss": 0.6017, "step": 6330 }, { "epoch": 0.13427074717397297, "grad_norm": 0.32916781306266785, "learning_rate": 1.9784938458355997e-05, "loss": 0.5137, "step": 6331 }, { "epoch": 0.134291955631906, "grad_norm": 0.39391419291496277, "learning_rate": 1.9784869660828708e-05, "loss": 0.6391, "step": 6332 }, { "epoch": 0.13431316408983904, "grad_norm": 0.35023781657218933, "learning_rate": 1.978480085241877e-05, "loss": 0.507, "step": 6333 }, { "epoch": 0.13433437254777206, "grad_norm": 0.32676681876182556, "learning_rate": 1.978473203312626e-05, "loss": 0.5841, "step": 6334 }, { "epoch": 0.13435558100570508, "grad_norm": 0.32114407420158386, "learning_rate": 1.9784663202951262e-05, "loss": 0.5315, "step": 6335 }, { "epoch": 0.1343767894636381, "grad_norm": 0.3422534763813019, "learning_rate": 1.978459436189384e-05, "loss": 0.5842, "step": 6336 }, { "epoch": 0.13439799792157112, "grad_norm": 0.38233065605163574, "learning_rate": 1.978452550995408e-05, "loss": 0.5566, "step": 6337 }, { "epoch": 0.13441920637950414, "grad_norm": 0.373127818107605, "learning_rate": 1.9784456647132057e-05, "loss": 0.6015, "step": 6338 }, { "epoch": 0.13444041483743718, "grad_norm": 0.3194609582424164, "learning_rate": 1.9784387773427842e-05, "loss": 0.5087, "step": 6339 }, { "epoch": 0.1344616232953702, "grad_norm": 0.3561531901359558, "learning_rate": 1.978431888884152e-05, "loss": 0.5697, "step": 6340 }, { "epoch": 0.13448283175330322, "grad_norm": 0.34245383739471436, "learning_rate": 1.9784249993373163e-05, "loss": 0.5033, "step": 6341 }, { "epoch": 0.13450404021123624, "grad_norm": 0.309120237827301, "learning_rate": 1.9784181087022848e-05, "loss": 0.4792, "step": 6342 }, { "epoch": 0.13452524866916926, "grad_norm": 0.33381760120391846, "learning_rate": 1.978411216979065e-05, "loss": 0.4901, "step": 6343 }, { "epoch": 0.13454645712710228, "grad_norm": 0.33829933404922485, "learning_rate": 1.9784043241676647e-05, "loss": 0.5048, "step": 6344 }, { "epoch": 0.1345676655850353, "grad_norm": 0.3552475869655609, "learning_rate": 1.978397430268092e-05, "loss": 0.5513, "step": 6345 }, { "epoch": 0.13458887404296835, "grad_norm": 0.3102605938911438, "learning_rate": 1.978390535280354e-05, "loss": 0.5146, "step": 6346 }, { "epoch": 0.13461008250090137, "grad_norm": 0.31153491139411926, "learning_rate": 1.9783836392044585e-05, "loss": 0.507, "step": 6347 }, { "epoch": 0.1346312909588344, "grad_norm": 0.30369454622268677, "learning_rate": 1.978376742040413e-05, "loss": 0.4951, "step": 6348 }, { "epoch": 0.1346524994167674, "grad_norm": 0.32757455110549927, "learning_rate": 1.9783698437882254e-05, "loss": 0.5139, "step": 6349 }, { "epoch": 0.13467370787470043, "grad_norm": 0.33144116401672363, "learning_rate": 1.978362944447904e-05, "loss": 0.4535, "step": 6350 }, { "epoch": 0.13469491633263345, "grad_norm": 0.3641398847103119, "learning_rate": 1.978356044019455e-05, "loss": 0.5018, "step": 6351 }, { "epoch": 0.13471612479056647, "grad_norm": 0.35866090655326843, "learning_rate": 1.9783491425028875e-05, "loss": 0.642, "step": 6352 }, { "epoch": 0.1347373332484995, "grad_norm": 0.3293224275112152, "learning_rate": 1.978342239898208e-05, "loss": 0.5243, "step": 6353 }, { "epoch": 0.13475854170643253, "grad_norm": 0.31565603613853455, "learning_rate": 1.9783353362054254e-05, "loss": 0.472, "step": 6354 }, { "epoch": 0.13477975016436555, "grad_norm": 0.3320217430591583, "learning_rate": 1.9783284314245463e-05, "loss": 0.5907, "step": 6355 }, { "epoch": 0.13480095862229857, "grad_norm": 0.3547661006450653, "learning_rate": 1.978321525555579e-05, "loss": 0.5811, "step": 6356 }, { "epoch": 0.1348221670802316, "grad_norm": 0.30257582664489746, "learning_rate": 1.978314618598531e-05, "loss": 0.5024, "step": 6357 }, { "epoch": 0.1348433755381646, "grad_norm": 0.3393680155277252, "learning_rate": 1.9783077105534102e-05, "loss": 0.5435, "step": 6358 }, { "epoch": 0.13486458399609763, "grad_norm": 0.42062899470329285, "learning_rate": 1.9783008014202237e-05, "loss": 0.5453, "step": 6359 }, { "epoch": 0.13488579245403068, "grad_norm": 0.32202351093292236, "learning_rate": 1.9782938911989794e-05, "loss": 0.553, "step": 6360 }, { "epoch": 0.1349070009119637, "grad_norm": 0.3280606269836426, "learning_rate": 1.9782869798896856e-05, "loss": 0.5505, "step": 6361 }, { "epoch": 0.13492820936989672, "grad_norm": 0.44021424651145935, "learning_rate": 1.9782800674923496e-05, "loss": 0.4768, "step": 6362 }, { "epoch": 0.13494941782782974, "grad_norm": 0.42695334553718567, "learning_rate": 1.9782731540069787e-05, "loss": 0.4873, "step": 6363 }, { "epoch": 0.13497062628576276, "grad_norm": 0.38141587376594543, "learning_rate": 1.9782662394335808e-05, "loss": 0.5589, "step": 6364 }, { "epoch": 0.13499183474369578, "grad_norm": 0.31512945890426636, "learning_rate": 1.978259323772164e-05, "loss": 0.5705, "step": 6365 }, { "epoch": 0.1350130432016288, "grad_norm": 0.3736168444156647, "learning_rate": 1.978252407022735e-05, "loss": 0.5312, "step": 6366 }, { "epoch": 0.13503425165956184, "grad_norm": 0.30987057089805603, "learning_rate": 1.9782454891853028e-05, "loss": 0.5364, "step": 6367 }, { "epoch": 0.13505546011749486, "grad_norm": 0.323226660490036, "learning_rate": 1.9782385702598744e-05, "loss": 0.5529, "step": 6368 }, { "epoch": 0.13507666857542788, "grad_norm": 0.31088635325431824, "learning_rate": 1.9782316502464575e-05, "loss": 0.518, "step": 6369 }, { "epoch": 0.1350978770333609, "grad_norm": 0.33419936895370483, "learning_rate": 1.97822472914506e-05, "loss": 0.5977, "step": 6370 }, { "epoch": 0.13511908549129392, "grad_norm": 0.35254600644111633, "learning_rate": 1.9782178069556894e-05, "loss": 0.5183, "step": 6371 }, { "epoch": 0.13514029394922694, "grad_norm": 0.3388831615447998, "learning_rate": 1.978210883678353e-05, "loss": 0.5051, "step": 6372 }, { "epoch": 0.13516150240716, "grad_norm": 0.3119165301322937, "learning_rate": 1.9782039593130594e-05, "loss": 0.5765, "step": 6373 }, { "epoch": 0.135182710865093, "grad_norm": 0.2951212525367737, "learning_rate": 1.9781970338598157e-05, "loss": 0.438, "step": 6374 }, { "epoch": 0.13520391932302603, "grad_norm": 0.32889223098754883, "learning_rate": 1.9781901073186297e-05, "loss": 0.5587, "step": 6375 }, { "epoch": 0.13522512778095905, "grad_norm": 0.33384183049201965, "learning_rate": 1.9781831796895094e-05, "loss": 0.5654, "step": 6376 }, { "epoch": 0.13524633623889207, "grad_norm": 0.35843625664711, "learning_rate": 1.978176250972462e-05, "loss": 0.5937, "step": 6377 }, { "epoch": 0.1352675446968251, "grad_norm": 0.316935271024704, "learning_rate": 1.9781693211674958e-05, "loss": 0.5219, "step": 6378 }, { "epoch": 0.1352887531547581, "grad_norm": 0.32162293791770935, "learning_rate": 1.978162390274618e-05, "loss": 0.4385, "step": 6379 }, { "epoch": 0.13530996161269115, "grad_norm": 0.3013295531272888, "learning_rate": 1.9781554582938365e-05, "loss": 0.4771, "step": 6380 }, { "epoch": 0.13533117007062417, "grad_norm": 0.3516240417957306, "learning_rate": 1.978148525225159e-05, "loss": 0.5073, "step": 6381 }, { "epoch": 0.1353523785285572, "grad_norm": 0.3446676731109619, "learning_rate": 1.9781415910685933e-05, "loss": 0.5988, "step": 6382 }, { "epoch": 0.1353735869864902, "grad_norm": 0.34504175186157227, "learning_rate": 1.9781346558241467e-05, "loss": 0.6069, "step": 6383 }, { "epoch": 0.13539479544442323, "grad_norm": 0.3482387661933899, "learning_rate": 1.9781277194918273e-05, "loss": 0.6126, "step": 6384 }, { "epoch": 0.13541600390235625, "grad_norm": 0.31755727529525757, "learning_rate": 1.9781207820716432e-05, "loss": 0.4843, "step": 6385 }, { "epoch": 0.13543721236028927, "grad_norm": 0.32513493299484253, "learning_rate": 1.9781138435636014e-05, "loss": 0.515, "step": 6386 }, { "epoch": 0.13545842081822232, "grad_norm": 0.3399151563644409, "learning_rate": 1.9781069039677098e-05, "loss": 0.4521, "step": 6387 }, { "epoch": 0.13547962927615534, "grad_norm": 0.3491286039352417, "learning_rate": 1.978099963283976e-05, "loss": 0.568, "step": 6388 }, { "epoch": 0.13550083773408836, "grad_norm": 0.33085277676582336, "learning_rate": 1.9780930215124083e-05, "loss": 0.5303, "step": 6389 }, { "epoch": 0.13552204619202138, "grad_norm": 0.3070320188999176, "learning_rate": 1.978086078653014e-05, "loss": 0.6144, "step": 6390 }, { "epoch": 0.1355432546499544, "grad_norm": 0.30776751041412354, "learning_rate": 1.9780791347058005e-05, "loss": 0.5651, "step": 6391 }, { "epoch": 0.13556446310788742, "grad_norm": 0.311124712228775, "learning_rate": 1.9780721896707763e-05, "loss": 0.5261, "step": 6392 }, { "epoch": 0.13558567156582044, "grad_norm": 0.3241422176361084, "learning_rate": 1.9780652435479483e-05, "loss": 0.5025, "step": 6393 }, { "epoch": 0.13560688002375348, "grad_norm": 0.35428717732429504, "learning_rate": 1.978058296337325e-05, "loss": 0.4528, "step": 6394 }, { "epoch": 0.1356280884816865, "grad_norm": 0.3487105965614319, "learning_rate": 1.9780513480389136e-05, "loss": 0.5592, "step": 6395 }, { "epoch": 0.13564929693961952, "grad_norm": 0.31363511085510254, "learning_rate": 1.9780443986527218e-05, "loss": 0.5029, "step": 6396 }, { "epoch": 0.13567050539755254, "grad_norm": 0.3312996029853821, "learning_rate": 1.978037448178758e-05, "loss": 0.5496, "step": 6397 }, { "epoch": 0.13569171385548556, "grad_norm": 0.3843262791633606, "learning_rate": 1.9780304966170288e-05, "loss": 0.6175, "step": 6398 }, { "epoch": 0.13571292231341858, "grad_norm": 0.3511042594909668, "learning_rate": 1.9780235439675433e-05, "loss": 0.5089, "step": 6399 }, { "epoch": 0.1357341307713516, "grad_norm": 0.3514053225517273, "learning_rate": 1.9780165902303082e-05, "loss": 0.5569, "step": 6400 }, { "epoch": 0.13575533922928465, "grad_norm": 0.3477688431739807, "learning_rate": 1.9780096354053314e-05, "loss": 0.4882, "step": 6401 }, { "epoch": 0.13577654768721767, "grad_norm": 0.31571200489997864, "learning_rate": 1.978002679492621e-05, "loss": 0.4955, "step": 6402 }, { "epoch": 0.1357977561451507, "grad_norm": 0.340614378452301, "learning_rate": 1.9779957224921843e-05, "loss": 0.5795, "step": 6403 }, { "epoch": 0.1358189646030837, "grad_norm": 0.3193766176700592, "learning_rate": 1.9779887644040293e-05, "loss": 0.4717, "step": 6404 }, { "epoch": 0.13584017306101673, "grad_norm": 0.44685015082359314, "learning_rate": 1.9779818052281637e-05, "loss": 0.4312, "step": 6405 }, { "epoch": 0.13586138151894975, "grad_norm": 0.30569398403167725, "learning_rate": 1.9779748449645954e-05, "loss": 0.5264, "step": 6406 }, { "epoch": 0.13588258997688277, "grad_norm": 0.3176659643650055, "learning_rate": 1.9779678836133317e-05, "loss": 0.532, "step": 6407 }, { "epoch": 0.13590379843481581, "grad_norm": 0.3756678104400635, "learning_rate": 1.9779609211743808e-05, "loss": 0.5496, "step": 6408 }, { "epoch": 0.13592500689274883, "grad_norm": 0.34211915731430054, "learning_rate": 1.97795395764775e-05, "loss": 0.4324, "step": 6409 }, { "epoch": 0.13594621535068185, "grad_norm": 0.33899834752082825, "learning_rate": 1.9779469930334478e-05, "loss": 0.5841, "step": 6410 }, { "epoch": 0.13596742380861487, "grad_norm": 0.33873969316482544, "learning_rate": 1.977940027331481e-05, "loss": 0.5617, "step": 6411 }, { "epoch": 0.1359886322665479, "grad_norm": 0.3630305230617523, "learning_rate": 1.9779330605418582e-05, "loss": 0.5421, "step": 6412 }, { "epoch": 0.1360098407244809, "grad_norm": 0.3364561200141907, "learning_rate": 1.9779260926645865e-05, "loss": 0.5408, "step": 6413 }, { "epoch": 0.13603104918241396, "grad_norm": 0.3312736451625824, "learning_rate": 1.9779191236996736e-05, "loss": 0.5965, "step": 6414 }, { "epoch": 0.13605225764034698, "grad_norm": 0.33508533239364624, "learning_rate": 1.977912153647128e-05, "loss": 0.5512, "step": 6415 }, { "epoch": 0.13607346609828, "grad_norm": 0.3631700873374939, "learning_rate": 1.977905182506957e-05, "loss": 0.5197, "step": 6416 }, { "epoch": 0.13609467455621302, "grad_norm": 0.36875444650650024, "learning_rate": 1.9778982102791682e-05, "loss": 0.5234, "step": 6417 }, { "epoch": 0.13611588301414604, "grad_norm": 0.3361959159374237, "learning_rate": 1.9778912369637695e-05, "loss": 0.5199, "step": 6418 }, { "epoch": 0.13613709147207906, "grad_norm": 0.32335156202316284, "learning_rate": 1.977884262560769e-05, "loss": 0.5666, "step": 6419 }, { "epoch": 0.13615829993001208, "grad_norm": 0.3193000257015228, "learning_rate": 1.9778772870701736e-05, "loss": 0.5707, "step": 6420 }, { "epoch": 0.13617950838794513, "grad_norm": 0.31194445490837097, "learning_rate": 1.9778703104919917e-05, "loss": 0.5115, "step": 6421 }, { "epoch": 0.13620071684587814, "grad_norm": 1.2678544521331787, "learning_rate": 1.977863332826231e-05, "loss": 0.6221, "step": 6422 }, { "epoch": 0.13622192530381116, "grad_norm": 0.3331334888935089, "learning_rate": 1.977856354072899e-05, "loss": 0.4528, "step": 6423 }, { "epoch": 0.13624313376174418, "grad_norm": 0.3451862335205078, "learning_rate": 1.9778493742320042e-05, "loss": 0.613, "step": 6424 }, { "epoch": 0.1362643422196772, "grad_norm": 0.32134899497032166, "learning_rate": 1.9778423933035533e-05, "loss": 0.4897, "step": 6425 }, { "epoch": 0.13628555067761022, "grad_norm": 0.3295796513557434, "learning_rate": 1.9778354112875547e-05, "loss": 0.4954, "step": 6426 }, { "epoch": 0.13630675913554324, "grad_norm": 0.3319498896598816, "learning_rate": 1.977828428184016e-05, "loss": 0.4719, "step": 6427 }, { "epoch": 0.1363279675934763, "grad_norm": 0.34441420435905457, "learning_rate": 1.9778214439929453e-05, "loss": 0.4713, "step": 6428 }, { "epoch": 0.1363491760514093, "grad_norm": 0.4115889370441437, "learning_rate": 1.97781445871435e-05, "loss": 0.5297, "step": 6429 }, { "epoch": 0.13637038450934233, "grad_norm": 0.3219560384750366, "learning_rate": 1.977807472348238e-05, "loss": 0.5141, "step": 6430 }, { "epoch": 0.13639159296727535, "grad_norm": 0.3124277591705322, "learning_rate": 1.9778004848946168e-05, "loss": 0.5329, "step": 6431 }, { "epoch": 0.13641280142520837, "grad_norm": 0.3269980847835541, "learning_rate": 1.9777934963534945e-05, "loss": 0.5774, "step": 6432 }, { "epoch": 0.1364340098831414, "grad_norm": 0.3110431432723999, "learning_rate": 1.977786506724879e-05, "loss": 0.5158, "step": 6433 }, { "epoch": 0.1364552183410744, "grad_norm": 0.3384896218776703, "learning_rate": 1.9777795160087772e-05, "loss": 0.5277, "step": 6434 }, { "epoch": 0.13647642679900746, "grad_norm": 0.3895302414894104, "learning_rate": 1.977772524205198e-05, "loss": 0.5263, "step": 6435 }, { "epoch": 0.13649763525694047, "grad_norm": 0.3081737458705902, "learning_rate": 1.9777655313141486e-05, "loss": 0.5254, "step": 6436 }, { "epoch": 0.1365188437148735, "grad_norm": 0.3811683654785156, "learning_rate": 1.9777585373356366e-05, "loss": 0.4983, "step": 6437 }, { "epoch": 0.13654005217280651, "grad_norm": 0.31567156314849854, "learning_rate": 1.9777515422696708e-05, "loss": 0.465, "step": 6438 }, { "epoch": 0.13656126063073953, "grad_norm": 0.32021740078926086, "learning_rate": 1.9777445461162576e-05, "loss": 0.5389, "step": 6439 }, { "epoch": 0.13658246908867255, "grad_norm": 0.32302603125572205, "learning_rate": 1.9777375488754056e-05, "loss": 0.52, "step": 6440 }, { "epoch": 0.13660367754660557, "grad_norm": 0.34735891222953796, "learning_rate": 1.9777305505471223e-05, "loss": 0.5158, "step": 6441 }, { "epoch": 0.13662488600453862, "grad_norm": 0.4092380106449127, "learning_rate": 1.9777235511314156e-05, "loss": 0.5356, "step": 6442 }, { "epoch": 0.13664609446247164, "grad_norm": 0.3343685269355774, "learning_rate": 1.9777165506282935e-05, "loss": 0.537, "step": 6443 }, { "epoch": 0.13666730292040466, "grad_norm": 0.3178403377532959, "learning_rate": 1.9777095490377633e-05, "loss": 0.4911, "step": 6444 }, { "epoch": 0.13668851137833768, "grad_norm": 0.32130300998687744, "learning_rate": 1.977702546359833e-05, "loss": 0.5222, "step": 6445 }, { "epoch": 0.1367097198362707, "grad_norm": 0.3455832600593567, "learning_rate": 1.9776955425945104e-05, "loss": 0.5467, "step": 6446 }, { "epoch": 0.13673092829420372, "grad_norm": 0.34246447682380676, "learning_rate": 1.9776885377418033e-05, "loss": 0.5521, "step": 6447 }, { "epoch": 0.13675213675213677, "grad_norm": 0.36449548602104187, "learning_rate": 1.9776815318017196e-05, "loss": 0.4732, "step": 6448 }, { "epoch": 0.13677334521006979, "grad_norm": 0.32545706629753113, "learning_rate": 1.977674524774267e-05, "loss": 0.512, "step": 6449 }, { "epoch": 0.1367945536680028, "grad_norm": 0.34368306398391724, "learning_rate": 1.9776675166594533e-05, "loss": 0.5143, "step": 6450 }, { "epoch": 0.13681576212593582, "grad_norm": 0.30559349060058594, "learning_rate": 1.9776605074572862e-05, "loss": 0.5095, "step": 6451 }, { "epoch": 0.13683697058386884, "grad_norm": 0.39010950922966003, "learning_rate": 1.9776534971677738e-05, "loss": 0.5621, "step": 6452 }, { "epoch": 0.13685817904180186, "grad_norm": 0.3636895418167114, "learning_rate": 1.9776464857909234e-05, "loss": 0.5398, "step": 6453 }, { "epoch": 0.13687938749973488, "grad_norm": 0.30160918831825256, "learning_rate": 1.9776394733267433e-05, "loss": 0.4726, "step": 6454 }, { "epoch": 0.13690059595766793, "grad_norm": 0.3113860785961151, "learning_rate": 1.977632459775241e-05, "loss": 0.5078, "step": 6455 }, { "epoch": 0.13692180441560095, "grad_norm": 0.33866649866104126, "learning_rate": 1.977625445136424e-05, "loss": 0.4909, "step": 6456 }, { "epoch": 0.13694301287353397, "grad_norm": 0.33543094992637634, "learning_rate": 1.977618429410301e-05, "loss": 0.4823, "step": 6457 }, { "epoch": 0.136964221331467, "grad_norm": 0.3097529411315918, "learning_rate": 1.9776114125968792e-05, "loss": 0.4396, "step": 6458 }, { "epoch": 0.1369854297894, "grad_norm": 0.3790442943572998, "learning_rate": 1.9776043946961662e-05, "loss": 0.6022, "step": 6459 }, { "epoch": 0.13700663824733303, "grad_norm": 0.34044143557548523, "learning_rate": 1.9775973757081702e-05, "loss": 0.5542, "step": 6460 }, { "epoch": 0.13702784670526605, "grad_norm": 0.3367929458618164, "learning_rate": 1.977590355632899e-05, "loss": 0.5148, "step": 6461 }, { "epoch": 0.1370490551631991, "grad_norm": 0.621294379234314, "learning_rate": 1.97758333447036e-05, "loss": 0.515, "step": 6462 }, { "epoch": 0.13707026362113212, "grad_norm": 0.3248210549354553, "learning_rate": 1.9775763122205618e-05, "loss": 0.4705, "step": 6463 }, { "epoch": 0.13709147207906514, "grad_norm": 0.32463428378105164, "learning_rate": 1.9775692888835113e-05, "loss": 0.5453, "step": 6464 }, { "epoch": 0.13711268053699815, "grad_norm": 0.3696709871292114, "learning_rate": 1.9775622644592172e-05, "loss": 0.5244, "step": 6465 }, { "epoch": 0.13713388899493117, "grad_norm": 0.3563998341560364, "learning_rate": 1.9775552389476865e-05, "loss": 0.508, "step": 6466 }, { "epoch": 0.1371550974528642, "grad_norm": 0.327730268239975, "learning_rate": 1.977548212348927e-05, "loss": 0.4544, "step": 6467 }, { "epoch": 0.1371763059107972, "grad_norm": 0.2957471013069153, "learning_rate": 1.9775411846629475e-05, "loss": 0.525, "step": 6468 }, { "epoch": 0.13719751436873026, "grad_norm": 0.3926929831504822, "learning_rate": 1.977534155889755e-05, "loss": 0.6274, "step": 6469 }, { "epoch": 0.13721872282666328, "grad_norm": 0.30190935730934143, "learning_rate": 1.9775271260293575e-05, "loss": 0.5701, "step": 6470 }, { "epoch": 0.1372399312845963, "grad_norm": 0.3131212294101715, "learning_rate": 1.9775200950817628e-05, "loss": 0.4999, "step": 6471 }, { "epoch": 0.13726113974252932, "grad_norm": 0.32789310812950134, "learning_rate": 1.9775130630469786e-05, "loss": 0.5651, "step": 6472 }, { "epoch": 0.13728234820046234, "grad_norm": 0.32634004950523376, "learning_rate": 1.9775060299250127e-05, "loss": 0.5134, "step": 6473 }, { "epoch": 0.13730355665839536, "grad_norm": 0.30273282527923584, "learning_rate": 1.9774989957158736e-05, "loss": 0.5508, "step": 6474 }, { "epoch": 0.13732476511632838, "grad_norm": 0.3641899824142456, "learning_rate": 1.9774919604195684e-05, "loss": 0.4944, "step": 6475 }, { "epoch": 0.13734597357426143, "grad_norm": 0.29949113726615906, "learning_rate": 1.9774849240361052e-05, "loss": 0.4989, "step": 6476 }, { "epoch": 0.13736718203219445, "grad_norm": 0.32185304164886475, "learning_rate": 1.9774778865654915e-05, "loss": 0.526, "step": 6477 }, { "epoch": 0.13738839049012747, "grad_norm": 0.33425045013427734, "learning_rate": 1.9774708480077357e-05, "loss": 0.5955, "step": 6478 }, { "epoch": 0.13740959894806049, "grad_norm": 0.3578861653804779, "learning_rate": 1.977463808362845e-05, "loss": 0.5145, "step": 6479 }, { "epoch": 0.1374308074059935, "grad_norm": 0.32567664980888367, "learning_rate": 1.9774567676308275e-05, "loss": 0.5861, "step": 6480 }, { "epoch": 0.13745201586392652, "grad_norm": 0.3259558379650116, "learning_rate": 1.9774497258116913e-05, "loss": 0.4837, "step": 6481 }, { "epoch": 0.13747322432185954, "grad_norm": 0.32349565625190735, "learning_rate": 1.9774426829054442e-05, "loss": 0.5887, "step": 6482 }, { "epoch": 0.1374944327797926, "grad_norm": 0.3506581783294678, "learning_rate": 1.9774356389120933e-05, "loss": 0.5068, "step": 6483 }, { "epoch": 0.1375156412377256, "grad_norm": 0.3206596374511719, "learning_rate": 1.9774285938316475e-05, "loss": 0.5203, "step": 6484 }, { "epoch": 0.13753684969565863, "grad_norm": 0.31517043709754944, "learning_rate": 1.977421547664114e-05, "loss": 0.5735, "step": 6485 }, { "epoch": 0.13755805815359165, "grad_norm": 0.401269793510437, "learning_rate": 1.9774145004095005e-05, "loss": 0.5055, "step": 6486 }, { "epoch": 0.13757926661152467, "grad_norm": 0.37837284803390503, "learning_rate": 1.9774074520678152e-05, "loss": 0.5722, "step": 6487 }, { "epoch": 0.1376004750694577, "grad_norm": 0.31281280517578125, "learning_rate": 1.977400402639066e-05, "loss": 0.5056, "step": 6488 }, { "epoch": 0.13762168352739074, "grad_norm": 0.3704708516597748, "learning_rate": 1.97739335212326e-05, "loss": 0.5515, "step": 6489 }, { "epoch": 0.13764289198532376, "grad_norm": 0.28105252981185913, "learning_rate": 1.9773863005204065e-05, "loss": 0.5191, "step": 6490 }, { "epoch": 0.13766410044325678, "grad_norm": 0.3927956223487854, "learning_rate": 1.9773792478305118e-05, "loss": 0.5281, "step": 6491 }, { "epoch": 0.1376853089011898, "grad_norm": 0.3694004714488983, "learning_rate": 1.9773721940535845e-05, "loss": 0.606, "step": 6492 }, { "epoch": 0.13770651735912282, "grad_norm": 0.3239995539188385, "learning_rate": 1.9773651391896323e-05, "loss": 0.3876, "step": 6493 }, { "epoch": 0.13772772581705583, "grad_norm": 0.3210374414920807, "learning_rate": 1.9773580832386635e-05, "loss": 0.5582, "step": 6494 }, { "epoch": 0.13774893427498885, "grad_norm": 0.316315621137619, "learning_rate": 1.977351026200685e-05, "loss": 0.518, "step": 6495 }, { "epoch": 0.1377701427329219, "grad_norm": 0.349173367023468, "learning_rate": 1.9773439680757055e-05, "loss": 0.4457, "step": 6496 }, { "epoch": 0.13779135119085492, "grad_norm": 0.3296678066253662, "learning_rate": 1.9773369088637324e-05, "loss": 0.5145, "step": 6497 }, { "epoch": 0.13781255964878794, "grad_norm": 0.33277907967567444, "learning_rate": 1.977329848564774e-05, "loss": 0.5325, "step": 6498 }, { "epoch": 0.13783376810672096, "grad_norm": 0.3985128104686737, "learning_rate": 1.9773227871788372e-05, "loss": 0.7484, "step": 6499 }, { "epoch": 0.13785497656465398, "grad_norm": 0.34943729639053345, "learning_rate": 1.977315724705931e-05, "loss": 0.5465, "step": 6500 }, { "epoch": 0.137876185022587, "grad_norm": 0.34538254141807556, "learning_rate": 1.9773086611460624e-05, "loss": 0.4462, "step": 6501 }, { "epoch": 0.13789739348052002, "grad_norm": 0.32892391085624695, "learning_rate": 1.97730159649924e-05, "loss": 0.4473, "step": 6502 }, { "epoch": 0.13791860193845307, "grad_norm": 0.4236983358860016, "learning_rate": 1.9772945307654708e-05, "loss": 0.5423, "step": 6503 }, { "epoch": 0.1379398103963861, "grad_norm": 0.3255806267261505, "learning_rate": 1.9772874639447632e-05, "loss": 0.5329, "step": 6504 }, { "epoch": 0.1379610188543191, "grad_norm": 0.29922017455101013, "learning_rate": 1.977280396037125e-05, "loss": 0.4879, "step": 6505 }, { "epoch": 0.13798222731225213, "grad_norm": 0.35210639238357544, "learning_rate": 1.977273327042564e-05, "loss": 0.5032, "step": 6506 }, { "epoch": 0.13800343577018515, "grad_norm": 0.36922451853752136, "learning_rate": 1.9772662569610884e-05, "loss": 0.5559, "step": 6507 }, { "epoch": 0.13802464422811817, "grad_norm": 0.3835562765598297, "learning_rate": 1.9772591857927058e-05, "loss": 0.5729, "step": 6508 }, { "epoch": 0.13804585268605118, "grad_norm": 0.3449987471103668, "learning_rate": 1.9772521135374237e-05, "loss": 0.5324, "step": 6509 }, { "epoch": 0.13806706114398423, "grad_norm": 0.43549102544784546, "learning_rate": 1.9772450401952503e-05, "loss": 0.4255, "step": 6510 }, { "epoch": 0.13808826960191725, "grad_norm": 0.35136207938194275, "learning_rate": 1.9772379657661937e-05, "loss": 0.461, "step": 6511 }, { "epoch": 0.13810947805985027, "grad_norm": 0.3186909556388855, "learning_rate": 1.9772308902502613e-05, "loss": 0.4791, "step": 6512 }, { "epoch": 0.1381306865177833, "grad_norm": 0.3517954647541046, "learning_rate": 1.9772238136474613e-05, "loss": 0.581, "step": 6513 }, { "epoch": 0.1381518949757163, "grad_norm": 0.34497660398483276, "learning_rate": 1.9772167359578015e-05, "loss": 0.4969, "step": 6514 }, { "epoch": 0.13817310343364933, "grad_norm": 0.32819241285324097, "learning_rate": 1.9772096571812897e-05, "loss": 0.4989, "step": 6515 }, { "epoch": 0.13819431189158235, "grad_norm": 0.3516151010990143, "learning_rate": 1.9772025773179336e-05, "loss": 0.5426, "step": 6516 }, { "epoch": 0.1382155203495154, "grad_norm": 0.29099902510643005, "learning_rate": 1.9771954963677415e-05, "loss": 0.4531, "step": 6517 }, { "epoch": 0.13823672880744842, "grad_norm": 0.32729315757751465, "learning_rate": 1.977188414330721e-05, "loss": 0.5066, "step": 6518 }, { "epoch": 0.13825793726538144, "grad_norm": 0.3073849380016327, "learning_rate": 1.9771813312068803e-05, "loss": 0.4834, "step": 6519 }, { "epoch": 0.13827914572331446, "grad_norm": 0.32699957489967346, "learning_rate": 1.9771742469962266e-05, "loss": 0.6394, "step": 6520 }, { "epoch": 0.13830035418124748, "grad_norm": 0.3252829909324646, "learning_rate": 1.9771671616987682e-05, "loss": 0.4499, "step": 6521 }, { "epoch": 0.1383215626391805, "grad_norm": 0.34348487854003906, "learning_rate": 1.977160075314513e-05, "loss": 0.6377, "step": 6522 }, { "epoch": 0.13834277109711354, "grad_norm": 0.3342905342578888, "learning_rate": 1.9771529878434693e-05, "loss": 0.4934, "step": 6523 }, { "epoch": 0.13836397955504656, "grad_norm": 0.35036560893058777, "learning_rate": 1.977145899285644e-05, "loss": 0.5777, "step": 6524 }, { "epoch": 0.13838518801297958, "grad_norm": 0.3320276141166687, "learning_rate": 1.9771388096410458e-05, "loss": 0.4558, "step": 6525 }, { "epoch": 0.1384063964709126, "grad_norm": 0.32209810614585876, "learning_rate": 1.977131718909682e-05, "loss": 0.5328, "step": 6526 }, { "epoch": 0.13842760492884562, "grad_norm": 0.3253316581249237, "learning_rate": 1.977124627091561e-05, "loss": 0.494, "step": 6527 }, { "epoch": 0.13844881338677864, "grad_norm": 0.37209513783454895, "learning_rate": 1.977117534186691e-05, "loss": 0.688, "step": 6528 }, { "epoch": 0.13847002184471166, "grad_norm": 0.3351329565048218, "learning_rate": 1.9771104401950783e-05, "loss": 0.5931, "step": 6529 }, { "epoch": 0.1384912303026447, "grad_norm": 0.3187977075576782, "learning_rate": 1.9771033451167327e-05, "loss": 0.5425, "step": 6530 }, { "epoch": 0.13851243876057773, "grad_norm": 0.32696014642715454, "learning_rate": 1.977096248951661e-05, "loss": 0.505, "step": 6531 }, { "epoch": 0.13853364721851075, "grad_norm": 0.3225260078907013, "learning_rate": 1.9770891516998712e-05, "loss": 0.5018, "step": 6532 }, { "epoch": 0.13855485567644377, "grad_norm": 0.33257922530174255, "learning_rate": 1.9770820533613716e-05, "loss": 0.5293, "step": 6533 }, { "epoch": 0.1385760641343768, "grad_norm": 0.36477139592170715, "learning_rate": 1.9770749539361698e-05, "loss": 0.5374, "step": 6534 }, { "epoch": 0.1385972725923098, "grad_norm": 0.29074230790138245, "learning_rate": 1.9770678534242735e-05, "loss": 0.4795, "step": 6535 }, { "epoch": 0.13861848105024283, "grad_norm": 0.3418310880661011, "learning_rate": 1.9770607518256908e-05, "loss": 0.4697, "step": 6536 }, { "epoch": 0.13863968950817587, "grad_norm": 0.2993663251399994, "learning_rate": 1.9770536491404296e-05, "loss": 0.5371, "step": 6537 }, { "epoch": 0.1386608979661089, "grad_norm": 0.33054119348526, "learning_rate": 1.9770465453684983e-05, "loss": 0.5364, "step": 6538 }, { "epoch": 0.1386821064240419, "grad_norm": 0.3038027584552765, "learning_rate": 1.977039440509904e-05, "loss": 0.5511, "step": 6539 }, { "epoch": 0.13870331488197493, "grad_norm": 0.32717883586883545, "learning_rate": 1.977032334564655e-05, "loss": 0.5114, "step": 6540 }, { "epoch": 0.13872452333990795, "grad_norm": 0.33483195304870605, "learning_rate": 1.9770252275327587e-05, "loss": 0.5794, "step": 6541 }, { "epoch": 0.13874573179784097, "grad_norm": 0.3413184583187103, "learning_rate": 1.9770181194142236e-05, "loss": 0.5736, "step": 6542 }, { "epoch": 0.138766940255774, "grad_norm": 0.3731805086135864, "learning_rate": 1.9770110102090577e-05, "loss": 0.5207, "step": 6543 }, { "epoch": 0.13878814871370704, "grad_norm": 0.3711572587490082, "learning_rate": 1.9770038999172688e-05, "loss": 0.5755, "step": 6544 }, { "epoch": 0.13880935717164006, "grad_norm": 0.34224429726600647, "learning_rate": 1.9769967885388644e-05, "loss": 0.5235, "step": 6545 }, { "epoch": 0.13883056562957308, "grad_norm": 0.35150763392448425, "learning_rate": 1.9769896760738522e-05, "loss": 0.5149, "step": 6546 }, { "epoch": 0.1388517740875061, "grad_norm": 0.3035375773906708, "learning_rate": 1.976982562522241e-05, "loss": 0.5283, "step": 6547 }, { "epoch": 0.13887298254543912, "grad_norm": 0.37795668840408325, "learning_rate": 1.9769754478840382e-05, "loss": 0.6092, "step": 6548 }, { "epoch": 0.13889419100337214, "grad_norm": 0.3598251938819885, "learning_rate": 1.976968332159252e-05, "loss": 0.5883, "step": 6549 }, { "epoch": 0.13891539946130516, "grad_norm": 0.3646174967288971, "learning_rate": 1.97696121534789e-05, "loss": 0.4732, "step": 6550 }, { "epoch": 0.1389366079192382, "grad_norm": 0.3248865604400635, "learning_rate": 1.9769540974499602e-05, "loss": 0.4128, "step": 6551 }, { "epoch": 0.13895781637717122, "grad_norm": 0.3232952952384949, "learning_rate": 1.976946978465471e-05, "loss": 0.484, "step": 6552 }, { "epoch": 0.13897902483510424, "grad_norm": 0.3368457555770874, "learning_rate": 1.9769398583944292e-05, "loss": 0.5136, "step": 6553 }, { "epoch": 0.13900023329303726, "grad_norm": 0.41973623633384705, "learning_rate": 1.9769327372368434e-05, "loss": 0.494, "step": 6554 }, { "epoch": 0.13902144175097028, "grad_norm": 0.4052709639072418, "learning_rate": 1.9769256149927217e-05, "loss": 0.5857, "step": 6555 }, { "epoch": 0.1390426502089033, "grad_norm": 0.3241598606109619, "learning_rate": 1.9769184916620722e-05, "loss": 0.5907, "step": 6556 }, { "epoch": 0.13906385866683632, "grad_norm": 0.30973032116889954, "learning_rate": 1.9769113672449017e-05, "loss": 0.4793, "step": 6557 }, { "epoch": 0.13908506712476937, "grad_norm": 0.35893312096595764, "learning_rate": 1.9769042417412193e-05, "loss": 0.5811, "step": 6558 }, { "epoch": 0.1391062755827024, "grad_norm": 0.34465354681015015, "learning_rate": 1.9768971151510326e-05, "loss": 0.5514, "step": 6559 }, { "epoch": 0.1391274840406354, "grad_norm": 0.3316480219364166, "learning_rate": 1.976889987474349e-05, "loss": 0.5791, "step": 6560 }, { "epoch": 0.13914869249856843, "grad_norm": 0.34186887741088867, "learning_rate": 1.9768828587111773e-05, "loss": 0.5767, "step": 6561 }, { "epoch": 0.13916990095650145, "grad_norm": 0.3360084891319275, "learning_rate": 1.9768757288615244e-05, "loss": 0.5903, "step": 6562 }, { "epoch": 0.13919110941443447, "grad_norm": 0.3469945192337036, "learning_rate": 1.9768685979253992e-05, "loss": 0.6184, "step": 6563 }, { "epoch": 0.1392123178723675, "grad_norm": 0.3278781771659851, "learning_rate": 1.976861465902809e-05, "loss": 0.4653, "step": 6564 }, { "epoch": 0.13923352633030053, "grad_norm": 0.40336906909942627, "learning_rate": 1.9768543327937625e-05, "loss": 0.5291, "step": 6565 }, { "epoch": 0.13925473478823355, "grad_norm": 0.34579914808273315, "learning_rate": 1.9768471985982668e-05, "loss": 0.6153, "step": 6566 }, { "epoch": 0.13927594324616657, "grad_norm": 0.36721113324165344, "learning_rate": 1.97684006331633e-05, "loss": 0.5588, "step": 6567 }, { "epoch": 0.1392971517040996, "grad_norm": 0.33196255564689636, "learning_rate": 1.97683292694796e-05, "loss": 0.5255, "step": 6568 }, { "epoch": 0.1393183601620326, "grad_norm": 0.31934794783592224, "learning_rate": 1.976825789493165e-05, "loss": 0.5734, "step": 6569 }, { "epoch": 0.13933956861996563, "grad_norm": 0.4409990608692169, "learning_rate": 1.976818650951953e-05, "loss": 0.5779, "step": 6570 }, { "epoch": 0.13936077707789868, "grad_norm": 0.2983035445213318, "learning_rate": 1.9768115113243317e-05, "loss": 0.4832, "step": 6571 }, { "epoch": 0.1393819855358317, "grad_norm": 0.5186615586280823, "learning_rate": 1.9768043706103093e-05, "loss": 0.5343, "step": 6572 }, { "epoch": 0.13940319399376472, "grad_norm": 0.4157814383506775, "learning_rate": 1.9767972288098932e-05, "loss": 0.5457, "step": 6573 }, { "epoch": 0.13942440245169774, "grad_norm": 0.3361794948577881, "learning_rate": 1.976790085923092e-05, "loss": 0.6691, "step": 6574 }, { "epoch": 0.13944561090963076, "grad_norm": 0.3179982602596283, "learning_rate": 1.9767829419499135e-05, "loss": 0.4798, "step": 6575 }, { "epoch": 0.13946681936756378, "grad_norm": 0.3381878137588501, "learning_rate": 1.9767757968903654e-05, "loss": 0.4638, "step": 6576 }, { "epoch": 0.1394880278254968, "grad_norm": 0.4279809594154358, "learning_rate": 1.9767686507444556e-05, "loss": 0.6633, "step": 6577 }, { "epoch": 0.13950923628342984, "grad_norm": 0.3303023874759674, "learning_rate": 1.976761503512192e-05, "loss": 0.51, "step": 6578 }, { "epoch": 0.13953044474136286, "grad_norm": 0.2877829670906067, "learning_rate": 1.976754355193583e-05, "loss": 0.4345, "step": 6579 }, { "epoch": 0.13955165319929588, "grad_norm": 0.5650593042373657, "learning_rate": 1.9767472057886364e-05, "loss": 0.5714, "step": 6580 }, { "epoch": 0.1395728616572289, "grad_norm": 0.3156544268131256, "learning_rate": 1.9767400552973598e-05, "loss": 0.5135, "step": 6581 }, { "epoch": 0.13959407011516192, "grad_norm": 0.36168941855430603, "learning_rate": 1.9767329037197617e-05, "loss": 0.6025, "step": 6582 }, { "epoch": 0.13961527857309494, "grad_norm": 0.4493236839771271, "learning_rate": 1.9767257510558493e-05, "loss": 0.5352, "step": 6583 }, { "epoch": 0.13963648703102796, "grad_norm": 0.33525919914245605, "learning_rate": 1.9767185973056313e-05, "loss": 0.6191, "step": 6584 }, { "epoch": 0.139657695488961, "grad_norm": 0.3087519705295563, "learning_rate": 1.9767114424691153e-05, "loss": 0.489, "step": 6585 }, { "epoch": 0.13967890394689403, "grad_norm": 0.3648194968700409, "learning_rate": 1.9767042865463094e-05, "loss": 0.5677, "step": 6586 }, { "epoch": 0.13970011240482705, "grad_norm": 0.3376066982746124, "learning_rate": 1.9766971295372213e-05, "loss": 0.5719, "step": 6587 }, { "epoch": 0.13972132086276007, "grad_norm": 0.3218159079551697, "learning_rate": 1.9766899714418592e-05, "loss": 0.4818, "step": 6588 }, { "epoch": 0.1397425293206931, "grad_norm": 0.32796937227249146, "learning_rate": 1.9766828122602312e-05, "loss": 0.5056, "step": 6589 }, { "epoch": 0.1397637377786261, "grad_norm": 0.49438583850860596, "learning_rate": 1.9766756519923448e-05, "loss": 0.4511, "step": 6590 }, { "epoch": 0.13978494623655913, "grad_norm": 0.3129476308822632, "learning_rate": 1.9766684906382084e-05, "loss": 0.4812, "step": 6591 }, { "epoch": 0.13980615469449217, "grad_norm": 0.29830145835876465, "learning_rate": 1.97666132819783e-05, "loss": 0.4884, "step": 6592 }, { "epoch": 0.1398273631524252, "grad_norm": 0.33781698346138, "learning_rate": 1.9766541646712166e-05, "loss": 0.5118, "step": 6593 }, { "epoch": 0.1398485716103582, "grad_norm": 0.35340872406959534, "learning_rate": 1.9766470000583774e-05, "loss": 0.4929, "step": 6594 }, { "epoch": 0.13986978006829123, "grad_norm": 0.39999300241470337, "learning_rate": 1.97663983435932e-05, "loss": 0.4664, "step": 6595 }, { "epoch": 0.13989098852622425, "grad_norm": 0.3267422914505005, "learning_rate": 1.9766326675740522e-05, "loss": 0.5697, "step": 6596 }, { "epoch": 0.13991219698415727, "grad_norm": 0.3523600399494171, "learning_rate": 1.976625499702582e-05, "loss": 0.6092, "step": 6597 }, { "epoch": 0.1399334054420903, "grad_norm": 0.3314726948738098, "learning_rate": 1.9766183307449173e-05, "loss": 0.5022, "step": 6598 }, { "epoch": 0.13995461390002334, "grad_norm": 0.33951878547668457, "learning_rate": 1.976611160701066e-05, "loss": 0.4946, "step": 6599 }, { "epoch": 0.13997582235795636, "grad_norm": 0.3887704312801361, "learning_rate": 1.9766039895710366e-05, "loss": 0.5111, "step": 6600 }, { "epoch": 0.13999703081588938, "grad_norm": 0.312712699174881, "learning_rate": 1.976596817354837e-05, "loss": 0.4879, "step": 6601 }, { "epoch": 0.1400182392738224, "grad_norm": 0.36174654960632324, "learning_rate": 1.9765896440524742e-05, "loss": 0.5027, "step": 6602 }, { "epoch": 0.14003944773175542, "grad_norm": 0.3407568335533142, "learning_rate": 1.9765824696639573e-05, "loss": 0.506, "step": 6603 }, { "epoch": 0.14006065618968844, "grad_norm": 0.30031758546829224, "learning_rate": 1.9765752941892938e-05, "loss": 0.4761, "step": 6604 }, { "epoch": 0.14008186464762148, "grad_norm": 0.35259759426116943, "learning_rate": 1.9765681176284918e-05, "loss": 0.4622, "step": 6605 }, { "epoch": 0.1401030731055545, "grad_norm": 0.3166150152683258, "learning_rate": 1.976560939981559e-05, "loss": 0.5301, "step": 6606 }, { "epoch": 0.14012428156348752, "grad_norm": 0.32040420174598694, "learning_rate": 1.976553761248504e-05, "loss": 0.525, "step": 6607 }, { "epoch": 0.14014549002142054, "grad_norm": 0.3497636914253235, "learning_rate": 1.976546581429334e-05, "loss": 0.5278, "step": 6608 }, { "epoch": 0.14016669847935356, "grad_norm": 0.31352943181991577, "learning_rate": 1.9765394005240575e-05, "loss": 0.5428, "step": 6609 }, { "epoch": 0.14018790693728658, "grad_norm": 0.3789610266685486, "learning_rate": 1.9765322185326827e-05, "loss": 0.5098, "step": 6610 }, { "epoch": 0.1402091153952196, "grad_norm": 0.30909958481788635, "learning_rate": 1.9765250354552168e-05, "loss": 0.4455, "step": 6611 }, { "epoch": 0.14023032385315265, "grad_norm": 0.3471221923828125, "learning_rate": 1.9765178512916683e-05, "loss": 0.5685, "step": 6612 }, { "epoch": 0.14025153231108567, "grad_norm": 0.3246045410633087, "learning_rate": 1.976510666042045e-05, "loss": 0.5503, "step": 6613 }, { "epoch": 0.1402727407690187, "grad_norm": 0.3778909146785736, "learning_rate": 1.9765034797063554e-05, "loss": 0.6333, "step": 6614 }, { "epoch": 0.1402939492269517, "grad_norm": 0.33276844024658203, "learning_rate": 1.976496292284607e-05, "loss": 0.5659, "step": 6615 }, { "epoch": 0.14031515768488473, "grad_norm": 0.3314593732357025, "learning_rate": 1.9764891037768077e-05, "loss": 0.5463, "step": 6616 }, { "epoch": 0.14033636614281775, "grad_norm": 0.3351655900478363, "learning_rate": 1.976481914182966e-05, "loss": 0.5996, "step": 6617 }, { "epoch": 0.14035757460075077, "grad_norm": 0.3470619022846222, "learning_rate": 1.9764747235030893e-05, "loss": 0.5472, "step": 6618 }, { "epoch": 0.14037878305868381, "grad_norm": 0.3140762746334076, "learning_rate": 1.976467531737186e-05, "loss": 0.5065, "step": 6619 }, { "epoch": 0.14039999151661683, "grad_norm": 0.4893466830253601, "learning_rate": 1.9764603388852638e-05, "loss": 0.5209, "step": 6620 }, { "epoch": 0.14042119997454985, "grad_norm": 0.3621346950531006, "learning_rate": 1.9764531449473316e-05, "loss": 0.5912, "step": 6621 }, { "epoch": 0.14044240843248287, "grad_norm": 0.33097150921821594, "learning_rate": 1.976445949923396e-05, "loss": 0.5516, "step": 6622 }, { "epoch": 0.1404636168904159, "grad_norm": 0.32336437702178955, "learning_rate": 1.976438753813466e-05, "loss": 0.5592, "step": 6623 }, { "epoch": 0.1404848253483489, "grad_norm": 0.33779212832450867, "learning_rate": 1.9764315566175488e-05, "loss": 0.5508, "step": 6624 }, { "epoch": 0.14050603380628193, "grad_norm": 0.33000561594963074, "learning_rate": 1.9764243583356535e-05, "loss": 0.4819, "step": 6625 }, { "epoch": 0.14052724226421498, "grad_norm": 0.29952993988990784, "learning_rate": 1.9764171589677873e-05, "loss": 0.494, "step": 6626 }, { "epoch": 0.140548450722148, "grad_norm": 0.32499170303344727, "learning_rate": 1.9764099585139586e-05, "loss": 0.5557, "step": 6627 }, { "epoch": 0.14056965918008102, "grad_norm": 0.3208351731300354, "learning_rate": 1.976402756974175e-05, "loss": 0.5071, "step": 6628 }, { "epoch": 0.14059086763801404, "grad_norm": 0.33847877383232117, "learning_rate": 1.9763955543484446e-05, "loss": 0.52, "step": 6629 }, { "epoch": 0.14061207609594706, "grad_norm": 0.30457600951194763, "learning_rate": 1.9763883506367757e-05, "loss": 0.4685, "step": 6630 }, { "epoch": 0.14063328455388008, "grad_norm": 0.35863032937049866, "learning_rate": 1.9763811458391763e-05, "loss": 0.5533, "step": 6631 }, { "epoch": 0.1406544930118131, "grad_norm": 0.377610981464386, "learning_rate": 1.9763739399556543e-05, "loss": 0.5222, "step": 6632 }, { "epoch": 0.14067570146974614, "grad_norm": 0.33981096744537354, "learning_rate": 1.9763667329862177e-05, "loss": 0.5268, "step": 6633 }, { "epoch": 0.14069690992767916, "grad_norm": 0.35491257905960083, "learning_rate": 1.9763595249308744e-05, "loss": 0.5987, "step": 6634 }, { "epoch": 0.14071811838561218, "grad_norm": 0.30853819847106934, "learning_rate": 1.9763523157896324e-05, "loss": 0.4863, "step": 6635 }, { "epoch": 0.1407393268435452, "grad_norm": 0.3397642970085144, "learning_rate": 1.9763451055625e-05, "loss": 0.5184, "step": 6636 }, { "epoch": 0.14076053530147822, "grad_norm": 0.31718501448631287, "learning_rate": 1.976337894249485e-05, "loss": 0.4204, "step": 6637 }, { "epoch": 0.14078174375941124, "grad_norm": 0.3551066815853119, "learning_rate": 1.9763306818505952e-05, "loss": 0.4832, "step": 6638 }, { "epoch": 0.1408029522173443, "grad_norm": 0.4991002678871155, "learning_rate": 1.976323468365839e-05, "loss": 0.5506, "step": 6639 }, { "epoch": 0.1408241606752773, "grad_norm": 0.5985103249549866, "learning_rate": 1.9763162537952248e-05, "loss": 0.4779, "step": 6640 }, { "epoch": 0.14084536913321033, "grad_norm": 0.32619404792785645, "learning_rate": 1.9763090381387596e-05, "loss": 0.5617, "step": 6641 }, { "epoch": 0.14086657759114335, "grad_norm": 0.33578431606292725, "learning_rate": 1.9763018213964523e-05, "loss": 0.5393, "step": 6642 }, { "epoch": 0.14088778604907637, "grad_norm": 0.3227806091308594, "learning_rate": 1.9762946035683105e-05, "loss": 0.4478, "step": 6643 }, { "epoch": 0.1409089945070094, "grad_norm": 0.32868096232414246, "learning_rate": 1.9762873846543423e-05, "loss": 0.4955, "step": 6644 }, { "epoch": 0.1409302029649424, "grad_norm": 0.3110634982585907, "learning_rate": 1.9762801646545557e-05, "loss": 0.4821, "step": 6645 }, { "epoch": 0.14095141142287546, "grad_norm": 0.3601089417934418, "learning_rate": 1.976272943568959e-05, "loss": 0.5137, "step": 6646 }, { "epoch": 0.14097261988080848, "grad_norm": 0.4154081642627716, "learning_rate": 1.97626572139756e-05, "loss": 0.4498, "step": 6647 }, { "epoch": 0.1409938283387415, "grad_norm": 0.3310070335865021, "learning_rate": 1.9762584981403665e-05, "loss": 0.4582, "step": 6648 }, { "epoch": 0.14101503679667451, "grad_norm": 0.3799281418323517, "learning_rate": 1.9762512737973866e-05, "loss": 0.5034, "step": 6649 }, { "epoch": 0.14103624525460753, "grad_norm": 0.3328115940093994, "learning_rate": 1.976244048368629e-05, "loss": 0.4968, "step": 6650 }, { "epoch": 0.14105745371254055, "grad_norm": 0.33513981103897095, "learning_rate": 1.976236821854101e-05, "loss": 0.5829, "step": 6651 }, { "epoch": 0.14107866217047357, "grad_norm": 0.3460007905960083, "learning_rate": 1.976229594253811e-05, "loss": 0.5646, "step": 6652 }, { "epoch": 0.14109987062840662, "grad_norm": 0.40176019072532654, "learning_rate": 1.976222365567767e-05, "loss": 0.5338, "step": 6653 }, { "epoch": 0.14112107908633964, "grad_norm": 0.31340375542640686, "learning_rate": 1.9762151357959767e-05, "loss": 0.5195, "step": 6654 }, { "epoch": 0.14114228754427266, "grad_norm": 0.5092438459396362, "learning_rate": 1.9762079049384485e-05, "loss": 0.477, "step": 6655 }, { "epoch": 0.14116349600220568, "grad_norm": 0.3939444422721863, "learning_rate": 1.9762006729951904e-05, "loss": 0.5103, "step": 6656 }, { "epoch": 0.1411847044601387, "grad_norm": 0.34593284130096436, "learning_rate": 1.9761934399662102e-05, "loss": 0.4674, "step": 6657 }, { "epoch": 0.14120591291807172, "grad_norm": 0.30683648586273193, "learning_rate": 1.976186205851516e-05, "loss": 0.5372, "step": 6658 }, { "epoch": 0.14122712137600474, "grad_norm": 0.30559927225112915, "learning_rate": 1.9761789706511165e-05, "loss": 0.5532, "step": 6659 }, { "epoch": 0.14124832983393779, "grad_norm": 0.3683421015739441, "learning_rate": 1.9761717343650192e-05, "loss": 0.4967, "step": 6660 }, { "epoch": 0.1412695382918708, "grad_norm": 0.3383431136608124, "learning_rate": 1.976164496993232e-05, "loss": 0.4558, "step": 6661 }, { "epoch": 0.14129074674980382, "grad_norm": 0.3610707223415375, "learning_rate": 1.976157258535763e-05, "loss": 0.5458, "step": 6662 }, { "epoch": 0.14131195520773684, "grad_norm": 0.3203815817832947, "learning_rate": 1.9761500189926207e-05, "loss": 0.6027, "step": 6663 }, { "epoch": 0.14133316366566986, "grad_norm": 0.37453046441078186, "learning_rate": 1.9761427783638126e-05, "loss": 0.6133, "step": 6664 }, { "epoch": 0.14135437212360288, "grad_norm": 0.4194514751434326, "learning_rate": 1.9761355366493467e-05, "loss": 0.5227, "step": 6665 }, { "epoch": 0.1413755805815359, "grad_norm": 0.2814328372478485, "learning_rate": 1.976128293849232e-05, "loss": 0.4817, "step": 6666 }, { "epoch": 0.14139678903946895, "grad_norm": 0.3303377032279968, "learning_rate": 1.9761210499634754e-05, "loss": 0.5891, "step": 6667 }, { "epoch": 0.14141799749740197, "grad_norm": 0.3246908187866211, "learning_rate": 1.9761138049920853e-05, "loss": 0.5278, "step": 6668 }, { "epoch": 0.141439205955335, "grad_norm": 0.32858139276504517, "learning_rate": 1.9761065589350702e-05, "loss": 0.5523, "step": 6669 }, { "epoch": 0.141460414413268, "grad_norm": 0.3194211721420288, "learning_rate": 1.9760993117924378e-05, "loss": 0.5021, "step": 6670 }, { "epoch": 0.14148162287120103, "grad_norm": 0.3620372712612152, "learning_rate": 1.9760920635641964e-05, "loss": 0.5151, "step": 6671 }, { "epoch": 0.14150283132913405, "grad_norm": 0.35575345158576965, "learning_rate": 1.9760848142503536e-05, "loss": 0.578, "step": 6672 }, { "epoch": 0.14152403978706707, "grad_norm": 0.3488157093524933, "learning_rate": 1.976077563850918e-05, "loss": 0.5171, "step": 6673 }, { "epoch": 0.14154524824500012, "grad_norm": 0.3420921266078949, "learning_rate": 1.9760703123658974e-05, "loss": 0.494, "step": 6674 }, { "epoch": 0.14156645670293314, "grad_norm": 0.30637863278388977, "learning_rate": 1.9760630597953e-05, "loss": 0.4813, "step": 6675 }, { "epoch": 0.14158766516086616, "grad_norm": 0.3057595193386078, "learning_rate": 1.9760558061391336e-05, "loss": 0.5438, "step": 6676 }, { "epoch": 0.14160887361879917, "grad_norm": 0.36409491300582886, "learning_rate": 1.976048551397406e-05, "loss": 0.5039, "step": 6677 }, { "epoch": 0.1416300820767322, "grad_norm": 0.31476065516471863, "learning_rate": 1.976041295570126e-05, "loss": 0.6063, "step": 6678 }, { "epoch": 0.14165129053466521, "grad_norm": 0.31931769847869873, "learning_rate": 1.9760340386573014e-05, "loss": 0.4975, "step": 6679 }, { "epoch": 0.14167249899259826, "grad_norm": 0.33098945021629333, "learning_rate": 1.9760267806589405e-05, "loss": 0.5662, "step": 6680 }, { "epoch": 0.14169370745053128, "grad_norm": 0.3509715497493744, "learning_rate": 1.9760195215750507e-05, "loss": 0.5344, "step": 6681 }, { "epoch": 0.1417149159084643, "grad_norm": 0.33446604013442993, "learning_rate": 1.9760122614056404e-05, "loss": 0.5409, "step": 6682 }, { "epoch": 0.14173612436639732, "grad_norm": 0.3224527835845947, "learning_rate": 1.976005000150718e-05, "loss": 0.5438, "step": 6683 }, { "epoch": 0.14175733282433034, "grad_norm": 0.3565681576728821, "learning_rate": 1.975997737810291e-05, "loss": 0.5958, "step": 6684 }, { "epoch": 0.14177854128226336, "grad_norm": 0.9522379040718079, "learning_rate": 1.975990474384368e-05, "loss": 0.5998, "step": 6685 }, { "epoch": 0.14179974974019638, "grad_norm": 0.3210737705230713, "learning_rate": 1.9759832098729572e-05, "loss": 0.4503, "step": 6686 }, { "epoch": 0.14182095819812943, "grad_norm": 0.3081170618534088, "learning_rate": 1.975975944276066e-05, "loss": 0.5515, "step": 6687 }, { "epoch": 0.14184216665606245, "grad_norm": 0.42968299984931946, "learning_rate": 1.975968677593703e-05, "loss": 0.5237, "step": 6688 }, { "epoch": 0.14186337511399547, "grad_norm": 0.3304022550582886, "learning_rate": 1.975961409825876e-05, "loss": 0.5608, "step": 6689 }, { "epoch": 0.14188458357192849, "grad_norm": 0.31550684571266174, "learning_rate": 1.9759541409725932e-05, "loss": 0.5099, "step": 6690 }, { "epoch": 0.1419057920298615, "grad_norm": 0.32126450538635254, "learning_rate": 1.9759468710338626e-05, "loss": 0.598, "step": 6691 }, { "epoch": 0.14192700048779452, "grad_norm": 0.3467634618282318, "learning_rate": 1.9759396000096923e-05, "loss": 0.5009, "step": 6692 }, { "epoch": 0.14194820894572754, "grad_norm": 0.3141586482524872, "learning_rate": 1.9759323279000905e-05, "loss": 0.6202, "step": 6693 }, { "epoch": 0.1419694174036606, "grad_norm": 0.3660701513290405, "learning_rate": 1.9759250547050654e-05, "loss": 0.5909, "step": 6694 }, { "epoch": 0.1419906258615936, "grad_norm": 0.35340890288352966, "learning_rate": 1.975917780424625e-05, "loss": 0.5467, "step": 6695 }, { "epoch": 0.14201183431952663, "grad_norm": 0.3002515435218811, "learning_rate": 1.975910505058777e-05, "loss": 0.5049, "step": 6696 }, { "epoch": 0.14203304277745965, "grad_norm": 0.35562118887901306, "learning_rate": 1.97590322860753e-05, "loss": 0.4896, "step": 6697 }, { "epoch": 0.14205425123539267, "grad_norm": 0.3442736566066742, "learning_rate": 1.975895951070892e-05, "loss": 0.5338, "step": 6698 }, { "epoch": 0.1420754596933257, "grad_norm": 0.3296003043651581, "learning_rate": 1.975888672448871e-05, "loss": 0.5034, "step": 6699 }, { "epoch": 0.1420966681512587, "grad_norm": 0.4361097812652588, "learning_rate": 1.9758813927414748e-05, "loss": 0.5548, "step": 6700 }, { "epoch": 0.14211787660919176, "grad_norm": 0.3458167314529419, "learning_rate": 1.9758741119487123e-05, "loss": 0.5319, "step": 6701 }, { "epoch": 0.14213908506712478, "grad_norm": 0.4614258110523224, "learning_rate": 1.9758668300705905e-05, "loss": 0.499, "step": 6702 }, { "epoch": 0.1421602935250578, "grad_norm": 0.38808712363243103, "learning_rate": 1.9758595471071187e-05, "loss": 0.4877, "step": 6703 }, { "epoch": 0.14218150198299082, "grad_norm": 0.32338884472846985, "learning_rate": 1.975852263058304e-05, "loss": 0.5163, "step": 6704 }, { "epoch": 0.14220271044092384, "grad_norm": 0.32555052638053894, "learning_rate": 1.9758449779241547e-05, "loss": 0.5763, "step": 6705 }, { "epoch": 0.14222391889885685, "grad_norm": 0.3831503093242645, "learning_rate": 1.9758376917046795e-05, "loss": 0.4873, "step": 6706 }, { "epoch": 0.14224512735678987, "grad_norm": 0.3362683951854706, "learning_rate": 1.975830404399886e-05, "loss": 0.5527, "step": 6707 }, { "epoch": 0.14226633581472292, "grad_norm": 0.34234321117401123, "learning_rate": 1.9758231160097824e-05, "loss": 0.565, "step": 6708 }, { "epoch": 0.14228754427265594, "grad_norm": 0.34952014684677124, "learning_rate": 1.9758158265343765e-05, "loss": 0.4737, "step": 6709 }, { "epoch": 0.14230875273058896, "grad_norm": 0.30346986651420593, "learning_rate": 1.9758085359736772e-05, "loss": 0.532, "step": 6710 }, { "epoch": 0.14232996118852198, "grad_norm": 0.3107157349586487, "learning_rate": 1.9758012443276918e-05, "loss": 0.4892, "step": 6711 }, { "epoch": 0.142351169646455, "grad_norm": 0.36666879057884216, "learning_rate": 1.9757939515964288e-05, "loss": 0.503, "step": 6712 }, { "epoch": 0.14237237810438802, "grad_norm": 0.30581048130989075, "learning_rate": 1.9757866577798964e-05, "loss": 0.447, "step": 6713 }, { "epoch": 0.14239358656232107, "grad_norm": 1.0280333757400513, "learning_rate": 1.9757793628781024e-05, "loss": 0.5282, "step": 6714 }, { "epoch": 0.1424147950202541, "grad_norm": 0.2948739528656006, "learning_rate": 1.9757720668910552e-05, "loss": 0.5602, "step": 6715 }, { "epoch": 0.1424360034781871, "grad_norm": 0.3210456967353821, "learning_rate": 1.9757647698187626e-05, "loss": 0.5122, "step": 6716 }, { "epoch": 0.14245721193612013, "grad_norm": 0.35013890266418457, "learning_rate": 1.975757471661233e-05, "loss": 0.5897, "step": 6717 }, { "epoch": 0.14247842039405315, "grad_norm": 0.3447574973106384, "learning_rate": 1.9757501724184745e-05, "loss": 0.5596, "step": 6718 }, { "epoch": 0.14249962885198617, "grad_norm": 0.3400421738624573, "learning_rate": 1.975742872090495e-05, "loss": 0.6098, "step": 6719 }, { "epoch": 0.14252083730991918, "grad_norm": 0.36818063259124756, "learning_rate": 1.9757355706773028e-05, "loss": 0.5518, "step": 6720 }, { "epoch": 0.14254204576785223, "grad_norm": 0.3851020336151123, "learning_rate": 1.975728268178906e-05, "loss": 0.6047, "step": 6721 }, { "epoch": 0.14256325422578525, "grad_norm": 0.3982507586479187, "learning_rate": 1.9757209645953125e-05, "loss": 0.5219, "step": 6722 }, { "epoch": 0.14258446268371827, "grad_norm": 0.36075738072395325, "learning_rate": 1.9757136599265306e-05, "loss": 0.6004, "step": 6723 }, { "epoch": 0.1426056711416513, "grad_norm": 0.3712412416934967, "learning_rate": 1.975706354172569e-05, "loss": 0.4568, "step": 6724 }, { "epoch": 0.1426268795995843, "grad_norm": 0.30385324358940125, "learning_rate": 1.9756990473334347e-05, "loss": 0.4515, "step": 6725 }, { "epoch": 0.14264808805751733, "grad_norm": 0.3087419271469116, "learning_rate": 1.9756917394091366e-05, "loss": 0.5437, "step": 6726 }, { "epoch": 0.14266929651545035, "grad_norm": 0.313082754611969, "learning_rate": 1.9756844303996825e-05, "loss": 0.4694, "step": 6727 }, { "epoch": 0.1426905049733834, "grad_norm": 0.32141590118408203, "learning_rate": 1.9756771203050807e-05, "loss": 0.5095, "step": 6728 }, { "epoch": 0.14271171343131642, "grad_norm": 0.3308684825897217, "learning_rate": 1.9756698091253395e-05, "loss": 0.5451, "step": 6729 }, { "epoch": 0.14273292188924944, "grad_norm": 0.3098679184913635, "learning_rate": 1.9756624968604664e-05, "loss": 0.5184, "step": 6730 }, { "epoch": 0.14275413034718246, "grad_norm": 0.36172330379486084, "learning_rate": 1.9756551835104703e-05, "loss": 0.5575, "step": 6731 }, { "epoch": 0.14277533880511548, "grad_norm": 0.3125039041042328, "learning_rate": 1.975647869075359e-05, "loss": 0.5435, "step": 6732 }, { "epoch": 0.1427965472630485, "grad_norm": 0.36662817001342773, "learning_rate": 1.9756405535551404e-05, "loss": 0.6111, "step": 6733 }, { "epoch": 0.14281775572098152, "grad_norm": 0.3209191858768463, "learning_rate": 1.975633236949823e-05, "loss": 0.5689, "step": 6734 }, { "epoch": 0.14283896417891456, "grad_norm": 0.3198107182979584, "learning_rate": 1.975625919259415e-05, "loss": 0.5332, "step": 6735 }, { "epoch": 0.14286017263684758, "grad_norm": 0.35126084089279175, "learning_rate": 1.975618600483924e-05, "loss": 0.4833, "step": 6736 }, { "epoch": 0.1428813810947806, "grad_norm": 0.3477507531642914, "learning_rate": 1.9756112806233584e-05, "loss": 0.4724, "step": 6737 }, { "epoch": 0.14290258955271362, "grad_norm": 0.3632937967777252, "learning_rate": 1.9756039596777265e-05, "loss": 0.5355, "step": 6738 }, { "epoch": 0.14292379801064664, "grad_norm": 0.42949748039245605, "learning_rate": 1.9755966376470365e-05, "loss": 0.5403, "step": 6739 }, { "epoch": 0.14294500646857966, "grad_norm": 0.33791613578796387, "learning_rate": 1.9755893145312963e-05, "loss": 0.5417, "step": 6740 }, { "epoch": 0.14296621492651268, "grad_norm": 0.3208973705768585, "learning_rate": 1.975581990330514e-05, "loss": 0.6017, "step": 6741 }, { "epoch": 0.14298742338444573, "grad_norm": 0.35814738273620605, "learning_rate": 1.9755746650446982e-05, "loss": 0.585, "step": 6742 }, { "epoch": 0.14300863184237875, "grad_norm": 0.41159650683403015, "learning_rate": 1.9755673386738565e-05, "loss": 0.6198, "step": 6743 }, { "epoch": 0.14302984030031177, "grad_norm": 0.3255379796028137, "learning_rate": 1.9755600112179974e-05, "loss": 0.5784, "step": 6744 }, { "epoch": 0.1430510487582448, "grad_norm": 0.33462706208229065, "learning_rate": 1.9755526826771288e-05, "loss": 0.5354, "step": 6745 }, { "epoch": 0.1430722572161778, "grad_norm": 0.29116350412368774, "learning_rate": 1.975545353051259e-05, "loss": 0.4881, "step": 6746 }, { "epoch": 0.14309346567411083, "grad_norm": 0.3493495583534241, "learning_rate": 1.975538022340396e-05, "loss": 0.4837, "step": 6747 }, { "epoch": 0.14311467413204385, "grad_norm": 0.3416072428226471, "learning_rate": 1.9755306905445485e-05, "loss": 0.6053, "step": 6748 }, { "epoch": 0.1431358825899769, "grad_norm": 0.34099051356315613, "learning_rate": 1.975523357663724e-05, "loss": 0.4559, "step": 6749 }, { "epoch": 0.1431570910479099, "grad_norm": 0.40007641911506653, "learning_rate": 1.9755160236979308e-05, "loss": 0.4689, "step": 6750 }, { "epoch": 0.14317829950584293, "grad_norm": 0.3601808547973633, "learning_rate": 1.9755086886471772e-05, "loss": 0.5471, "step": 6751 }, { "epoch": 0.14319950796377595, "grad_norm": 0.3209770619869232, "learning_rate": 1.9755013525114713e-05, "loss": 0.5878, "step": 6752 }, { "epoch": 0.14322071642170897, "grad_norm": 0.3128022253513336, "learning_rate": 1.9754940152908216e-05, "loss": 0.5781, "step": 6753 }, { "epoch": 0.143241924879642, "grad_norm": 0.30601370334625244, "learning_rate": 1.9754866769852355e-05, "loss": 0.5084, "step": 6754 }, { "epoch": 0.14326313333757504, "grad_norm": 0.34422817826271057, "learning_rate": 1.9754793375947215e-05, "loss": 0.5052, "step": 6755 }, { "epoch": 0.14328434179550806, "grad_norm": 0.3360820412635803, "learning_rate": 1.9754719971192884e-05, "loss": 0.5712, "step": 6756 }, { "epoch": 0.14330555025344108, "grad_norm": 0.324905663728714, "learning_rate": 1.9754646555589433e-05, "loss": 0.4617, "step": 6757 }, { "epoch": 0.1433267587113741, "grad_norm": 0.4214479923248291, "learning_rate": 1.9754573129136954e-05, "loss": 0.5311, "step": 6758 }, { "epoch": 0.14334796716930712, "grad_norm": 0.3904370665550232, "learning_rate": 1.975449969183552e-05, "loss": 0.5246, "step": 6759 }, { "epoch": 0.14336917562724014, "grad_norm": 0.306625097990036, "learning_rate": 1.9754426243685215e-05, "loss": 0.5068, "step": 6760 }, { "epoch": 0.14339038408517316, "grad_norm": 0.31964707374572754, "learning_rate": 1.9754352784686125e-05, "loss": 0.5301, "step": 6761 }, { "epoch": 0.1434115925431062, "grad_norm": 0.3427765667438507, "learning_rate": 1.9754279314838325e-05, "loss": 0.5406, "step": 6762 }, { "epoch": 0.14343280100103922, "grad_norm": 0.3568299114704132, "learning_rate": 1.9754205834141907e-05, "loss": 0.4967, "step": 6763 }, { "epoch": 0.14345400945897224, "grad_norm": 0.2976829707622528, "learning_rate": 1.975413234259694e-05, "loss": 0.5469, "step": 6764 }, { "epoch": 0.14347521791690526, "grad_norm": 0.476115882396698, "learning_rate": 1.9754058840203513e-05, "loss": 0.504, "step": 6765 }, { "epoch": 0.14349642637483828, "grad_norm": 0.34593209624290466, "learning_rate": 1.9753985326961707e-05, "loss": 0.6046, "step": 6766 }, { "epoch": 0.1435176348327713, "grad_norm": 0.31223148107528687, "learning_rate": 1.97539118028716e-05, "loss": 0.597, "step": 6767 }, { "epoch": 0.14353884329070432, "grad_norm": 0.3069845139980316, "learning_rate": 1.9753838267933283e-05, "loss": 0.545, "step": 6768 }, { "epoch": 0.14356005174863737, "grad_norm": 0.3329567611217499, "learning_rate": 1.9753764722146828e-05, "loss": 0.4596, "step": 6769 }, { "epoch": 0.1435812602065704, "grad_norm": 0.33103659749031067, "learning_rate": 1.975369116551232e-05, "loss": 0.6146, "step": 6770 }, { "epoch": 0.1436024686645034, "grad_norm": 0.3434281647205353, "learning_rate": 1.9753617598029845e-05, "loss": 0.551, "step": 6771 }, { "epoch": 0.14362367712243643, "grad_norm": 0.32191959023475647, "learning_rate": 1.975354401969948e-05, "loss": 0.4528, "step": 6772 }, { "epoch": 0.14364488558036945, "grad_norm": 0.4516148865222931, "learning_rate": 1.9753470430521305e-05, "loss": 0.4335, "step": 6773 }, { "epoch": 0.14366609403830247, "grad_norm": 0.3718413710594177, "learning_rate": 1.9753396830495407e-05, "loss": 0.511, "step": 6774 }, { "epoch": 0.14368730249623549, "grad_norm": 0.30933713912963867, "learning_rate": 1.9753323219621866e-05, "loss": 0.5027, "step": 6775 }, { "epoch": 0.14370851095416853, "grad_norm": 0.3397534489631653, "learning_rate": 1.9753249597900764e-05, "loss": 0.5181, "step": 6776 }, { "epoch": 0.14372971941210155, "grad_norm": 0.28839829564094543, "learning_rate": 1.9753175965332182e-05, "loss": 0.5058, "step": 6777 }, { "epoch": 0.14375092787003457, "grad_norm": 0.4774223268032074, "learning_rate": 1.97531023219162e-05, "loss": 0.5335, "step": 6778 }, { "epoch": 0.1437721363279676, "grad_norm": 0.3772013783454895, "learning_rate": 1.975302866765291e-05, "loss": 0.5204, "step": 6779 }, { "epoch": 0.1437933447859006, "grad_norm": 0.3363814353942871, "learning_rate": 1.975295500254238e-05, "loss": 0.5167, "step": 6780 }, { "epoch": 0.14381455324383363, "grad_norm": 0.3122124969959259, "learning_rate": 1.97528813265847e-05, "loss": 0.4623, "step": 6781 }, { "epoch": 0.14383576170176665, "grad_norm": 0.329127699136734, "learning_rate": 1.9752807639779947e-05, "loss": 0.5258, "step": 6782 }, { "epoch": 0.1438569701596997, "grad_norm": 0.33131247758865356, "learning_rate": 1.975273394212821e-05, "loss": 0.5105, "step": 6783 }, { "epoch": 0.14387817861763272, "grad_norm": 0.30753111839294434, "learning_rate": 1.9752660233629565e-05, "loss": 0.5559, "step": 6784 }, { "epoch": 0.14389938707556574, "grad_norm": 0.36644843220710754, "learning_rate": 1.9752586514284097e-05, "loss": 0.5237, "step": 6785 }, { "epoch": 0.14392059553349876, "grad_norm": 0.39632755517959595, "learning_rate": 1.9752512784091884e-05, "loss": 0.561, "step": 6786 }, { "epoch": 0.14394180399143178, "grad_norm": 0.3216540217399597, "learning_rate": 1.9752439043053012e-05, "loss": 0.4802, "step": 6787 }, { "epoch": 0.1439630124493648, "grad_norm": 0.3295232057571411, "learning_rate": 1.9752365291167567e-05, "loss": 0.5781, "step": 6788 }, { "epoch": 0.14398422090729784, "grad_norm": 0.4198627769947052, "learning_rate": 1.9752291528435622e-05, "loss": 0.5947, "step": 6789 }, { "epoch": 0.14400542936523086, "grad_norm": 0.3697710633277893, "learning_rate": 1.9752217754857264e-05, "loss": 0.5423, "step": 6790 }, { "epoch": 0.14402663782316388, "grad_norm": 0.35587766766548157, "learning_rate": 1.9752143970432574e-05, "loss": 0.5787, "step": 6791 }, { "epoch": 0.1440478462810969, "grad_norm": 0.3153628706932068, "learning_rate": 1.9752070175161634e-05, "loss": 0.5774, "step": 6792 }, { "epoch": 0.14406905473902992, "grad_norm": 0.3371224105358124, "learning_rate": 1.9751996369044522e-05, "loss": 0.4961, "step": 6793 }, { "epoch": 0.14409026319696294, "grad_norm": 0.3379753530025482, "learning_rate": 1.9751922552081333e-05, "loss": 0.4939, "step": 6794 }, { "epoch": 0.14411147165489596, "grad_norm": 0.3541620671749115, "learning_rate": 1.9751848724272134e-05, "loss": 0.5275, "step": 6795 }, { "epoch": 0.144132680112829, "grad_norm": 0.3798801898956299, "learning_rate": 1.9751774885617014e-05, "loss": 0.5499, "step": 6796 }, { "epoch": 0.14415388857076203, "grad_norm": 0.3756880462169647, "learning_rate": 1.9751701036116057e-05, "loss": 0.5381, "step": 6797 }, { "epoch": 0.14417509702869505, "grad_norm": 0.4242284297943115, "learning_rate": 1.9751627175769345e-05, "loss": 0.4567, "step": 6798 }, { "epoch": 0.14419630548662807, "grad_norm": 0.3197422921657562, "learning_rate": 1.9751553304576955e-05, "loss": 0.5274, "step": 6799 }, { "epoch": 0.1442175139445611, "grad_norm": 0.3413996994495392, "learning_rate": 1.9751479422538972e-05, "loss": 0.5014, "step": 6800 }, { "epoch": 0.1442387224024941, "grad_norm": 0.40620365738868713, "learning_rate": 1.9751405529655478e-05, "loss": 0.5867, "step": 6801 }, { "epoch": 0.14425993086042713, "grad_norm": 0.356980562210083, "learning_rate": 1.9751331625926557e-05, "loss": 0.5686, "step": 6802 }, { "epoch": 0.14428113931836017, "grad_norm": 0.3464182913303375, "learning_rate": 1.9751257711352287e-05, "loss": 0.5429, "step": 6803 }, { "epoch": 0.1443023477762932, "grad_norm": 0.34128862619400024, "learning_rate": 1.9751183785932753e-05, "loss": 0.4962, "step": 6804 }, { "epoch": 0.1443235562342262, "grad_norm": 0.3052786588668823, "learning_rate": 1.975110984966804e-05, "loss": 0.488, "step": 6805 }, { "epoch": 0.14434476469215923, "grad_norm": 0.36851584911346436, "learning_rate": 1.9751035902558224e-05, "loss": 0.5568, "step": 6806 }, { "epoch": 0.14436597315009225, "grad_norm": 0.35035085678100586, "learning_rate": 1.9750961944603392e-05, "loss": 0.5186, "step": 6807 }, { "epoch": 0.14438718160802527, "grad_norm": 0.3549915850162506, "learning_rate": 1.9750887975803628e-05, "loss": 0.5575, "step": 6808 }, { "epoch": 0.1444083900659583, "grad_norm": 0.3199600875377655, "learning_rate": 1.975081399615901e-05, "loss": 0.5069, "step": 6809 }, { "epoch": 0.14442959852389134, "grad_norm": 0.31037983298301697, "learning_rate": 1.975074000566962e-05, "loss": 0.487, "step": 6810 }, { "epoch": 0.14445080698182436, "grad_norm": 0.2886905372142792, "learning_rate": 1.975066600433554e-05, "loss": 0.4453, "step": 6811 }, { "epoch": 0.14447201543975738, "grad_norm": 0.35834476351737976, "learning_rate": 1.9750591992156853e-05, "loss": 0.5881, "step": 6812 }, { "epoch": 0.1444932238976904, "grad_norm": 0.3371366858482361, "learning_rate": 1.9750517969133644e-05, "loss": 0.5943, "step": 6813 }, { "epoch": 0.14451443235562342, "grad_norm": 0.36133384704589844, "learning_rate": 1.9750443935265995e-05, "loss": 0.5472, "step": 6814 }, { "epoch": 0.14453564081355644, "grad_norm": 0.2913220226764679, "learning_rate": 1.975036989055399e-05, "loss": 0.447, "step": 6815 }, { "epoch": 0.14455684927148946, "grad_norm": 0.33616816997528076, "learning_rate": 1.9750295834997703e-05, "loss": 0.4769, "step": 6816 }, { "epoch": 0.1445780577294225, "grad_norm": 0.30806782841682434, "learning_rate": 1.9750221768597222e-05, "loss": 0.4598, "step": 6817 }, { "epoch": 0.14459926618735552, "grad_norm": 0.30039653182029724, "learning_rate": 1.975014769135263e-05, "loss": 0.4859, "step": 6818 }, { "epoch": 0.14462047464528854, "grad_norm": 0.3699229061603546, "learning_rate": 1.975007360326401e-05, "loss": 0.5278, "step": 6819 }, { "epoch": 0.14464168310322156, "grad_norm": 0.3517981171607971, "learning_rate": 1.974999950433144e-05, "loss": 0.4795, "step": 6820 }, { "epoch": 0.14466289156115458, "grad_norm": 0.4190092980861664, "learning_rate": 1.9749925394555008e-05, "loss": 0.5617, "step": 6821 }, { "epoch": 0.1446841000190876, "grad_norm": 0.33783096075057983, "learning_rate": 1.9749851273934793e-05, "loss": 0.4997, "step": 6822 }, { "epoch": 0.14470530847702062, "grad_norm": 0.45527249574661255, "learning_rate": 1.974977714247088e-05, "loss": 0.6148, "step": 6823 }, { "epoch": 0.14472651693495367, "grad_norm": 0.34862929582595825, "learning_rate": 1.9749703000163345e-05, "loss": 0.5195, "step": 6824 }, { "epoch": 0.1447477253928867, "grad_norm": 0.30895593762397766, "learning_rate": 1.9749628847012278e-05, "loss": 0.5462, "step": 6825 }, { "epoch": 0.1447689338508197, "grad_norm": 0.3031478524208069, "learning_rate": 1.974955468301776e-05, "loss": 0.5328, "step": 6826 }, { "epoch": 0.14479014230875273, "grad_norm": 0.32254138588905334, "learning_rate": 1.9749480508179866e-05, "loss": 0.4975, "step": 6827 }, { "epoch": 0.14481135076668575, "grad_norm": 0.3718762695789337, "learning_rate": 1.9749406322498688e-05, "loss": 0.5373, "step": 6828 }, { "epoch": 0.14483255922461877, "grad_norm": 0.31537261605262756, "learning_rate": 1.9749332125974306e-05, "loss": 0.5506, "step": 6829 }, { "epoch": 0.14485376768255182, "grad_norm": 0.3338125944137573, "learning_rate": 1.97492579186068e-05, "loss": 0.5254, "step": 6830 }, { "epoch": 0.14487497614048483, "grad_norm": 0.3883122503757477, "learning_rate": 1.9749183700396254e-05, "loss": 0.5588, "step": 6831 }, { "epoch": 0.14489618459841785, "grad_norm": 0.4301868975162506, "learning_rate": 1.974910947134275e-05, "loss": 0.5636, "step": 6832 }, { "epoch": 0.14491739305635087, "grad_norm": 0.4586760103702545, "learning_rate": 1.9749035231446375e-05, "loss": 0.562, "step": 6833 }, { "epoch": 0.1449386015142839, "grad_norm": 0.2934337556362152, "learning_rate": 1.9748960980707205e-05, "loss": 0.4802, "step": 6834 }, { "epoch": 0.1449598099722169, "grad_norm": 0.35384371876716614, "learning_rate": 1.9748886719125326e-05, "loss": 0.5376, "step": 6835 }, { "epoch": 0.14498101843014993, "grad_norm": 0.3234405815601349, "learning_rate": 1.9748812446700818e-05, "loss": 0.4752, "step": 6836 }, { "epoch": 0.14500222688808298, "grad_norm": 0.29665476083755493, "learning_rate": 1.9748738163433767e-05, "loss": 0.4872, "step": 6837 }, { "epoch": 0.145023435346016, "grad_norm": 0.31759753823280334, "learning_rate": 1.9748663869324254e-05, "loss": 0.4255, "step": 6838 }, { "epoch": 0.14504464380394902, "grad_norm": 0.35121825337409973, "learning_rate": 1.974858956437236e-05, "loss": 0.6195, "step": 6839 }, { "epoch": 0.14506585226188204, "grad_norm": 0.37197479605674744, "learning_rate": 1.9748515248578173e-05, "loss": 0.543, "step": 6840 }, { "epoch": 0.14508706071981506, "grad_norm": 0.3579099178314209, "learning_rate": 1.9748440921941768e-05, "loss": 0.5065, "step": 6841 }, { "epoch": 0.14510826917774808, "grad_norm": 0.3394242227077484, "learning_rate": 1.9748366584463236e-05, "loss": 0.6326, "step": 6842 }, { "epoch": 0.1451294776356811, "grad_norm": 0.30948513746261597, "learning_rate": 1.9748292236142652e-05, "loss": 0.5066, "step": 6843 }, { "epoch": 0.14515068609361415, "grad_norm": 0.33421677350997925, "learning_rate": 1.9748217876980104e-05, "loss": 0.5513, "step": 6844 }, { "epoch": 0.14517189455154716, "grad_norm": 0.31418612599372864, "learning_rate": 1.974814350697567e-05, "loss": 0.4657, "step": 6845 }, { "epoch": 0.14519310300948018, "grad_norm": 0.3065241575241089, "learning_rate": 1.974806912612944e-05, "loss": 0.4468, "step": 6846 }, { "epoch": 0.1452143114674132, "grad_norm": 0.3369990885257721, "learning_rate": 1.974799473444149e-05, "loss": 0.5008, "step": 6847 }, { "epoch": 0.14523551992534622, "grad_norm": 0.45722830295562744, "learning_rate": 1.9747920331911903e-05, "loss": 0.5789, "step": 6848 }, { "epoch": 0.14525672838327924, "grad_norm": 0.320203959941864, "learning_rate": 1.9747845918540764e-05, "loss": 0.5111, "step": 6849 }, { "epoch": 0.14527793684121226, "grad_norm": 0.3300848603248596, "learning_rate": 1.9747771494328155e-05, "loss": 0.4859, "step": 6850 }, { "epoch": 0.1452991452991453, "grad_norm": 0.32693320512771606, "learning_rate": 1.9747697059274165e-05, "loss": 0.485, "step": 6851 }, { "epoch": 0.14532035375707833, "grad_norm": 0.3016309142112732, "learning_rate": 1.9747622613378865e-05, "loss": 0.5605, "step": 6852 }, { "epoch": 0.14534156221501135, "grad_norm": 0.32643142342567444, "learning_rate": 1.9747548156642343e-05, "loss": 0.5468, "step": 6853 }, { "epoch": 0.14536277067294437, "grad_norm": 0.42588403820991516, "learning_rate": 1.974747368906469e-05, "loss": 0.5444, "step": 6854 }, { "epoch": 0.1453839791308774, "grad_norm": 0.3595166802406311, "learning_rate": 1.9747399210645975e-05, "loss": 0.5658, "step": 6855 }, { "epoch": 0.1454051875888104, "grad_norm": 0.35404953360557556, "learning_rate": 1.9747324721386288e-05, "loss": 0.506, "step": 6856 }, { "epoch": 0.14542639604674343, "grad_norm": 0.3405422568321228, "learning_rate": 1.9747250221285714e-05, "loss": 0.5425, "step": 6857 }, { "epoch": 0.14544760450467648, "grad_norm": 0.3288126587867737, "learning_rate": 1.9747175710344328e-05, "loss": 0.4896, "step": 6858 }, { "epoch": 0.1454688129626095, "grad_norm": 0.3493885099887848, "learning_rate": 1.9747101188562225e-05, "loss": 0.582, "step": 6859 }, { "epoch": 0.14549002142054251, "grad_norm": 0.32647138833999634, "learning_rate": 1.9747026655939475e-05, "loss": 0.5039, "step": 6860 }, { "epoch": 0.14551122987847553, "grad_norm": 0.383466899394989, "learning_rate": 1.9746952112476168e-05, "loss": 0.5781, "step": 6861 }, { "epoch": 0.14553243833640855, "grad_norm": 0.30945688486099243, "learning_rate": 1.9746877558172387e-05, "loss": 0.4732, "step": 6862 }, { "epoch": 0.14555364679434157, "grad_norm": 0.305850625038147, "learning_rate": 1.974680299302821e-05, "loss": 0.6103, "step": 6863 }, { "epoch": 0.14557485525227462, "grad_norm": 0.3342943489551544, "learning_rate": 1.9746728417043724e-05, "loss": 0.5594, "step": 6864 }, { "epoch": 0.14559606371020764, "grad_norm": 0.3030388057231903, "learning_rate": 1.9746653830219015e-05, "loss": 0.5202, "step": 6865 }, { "epoch": 0.14561727216814066, "grad_norm": 0.38855117559432983, "learning_rate": 1.974657923255416e-05, "loss": 0.4992, "step": 6866 }, { "epoch": 0.14563848062607368, "grad_norm": 0.33112284541130066, "learning_rate": 1.9746504624049246e-05, "loss": 0.5625, "step": 6867 }, { "epoch": 0.1456596890840067, "grad_norm": 0.32755225896835327, "learning_rate": 1.9746430004704353e-05, "loss": 0.5605, "step": 6868 }, { "epoch": 0.14568089754193972, "grad_norm": 0.38206812739372253, "learning_rate": 1.9746355374519566e-05, "loss": 0.5104, "step": 6869 }, { "epoch": 0.14570210599987274, "grad_norm": 0.30602359771728516, "learning_rate": 1.9746280733494963e-05, "loss": 0.5366, "step": 6870 }, { "epoch": 0.14572331445780579, "grad_norm": 0.36852192878723145, "learning_rate": 1.9746206081630636e-05, "loss": 0.5108, "step": 6871 }, { "epoch": 0.1457445229157388, "grad_norm": 0.34013959765434265, "learning_rate": 1.974613141892666e-05, "loss": 0.5451, "step": 6872 }, { "epoch": 0.14576573137367183, "grad_norm": 0.3141433894634247, "learning_rate": 1.9746056745383126e-05, "loss": 0.5189, "step": 6873 }, { "epoch": 0.14578693983160484, "grad_norm": 0.5396631360054016, "learning_rate": 1.974598206100011e-05, "loss": 0.4182, "step": 6874 }, { "epoch": 0.14580814828953786, "grad_norm": 0.3590454161167145, "learning_rate": 1.9745907365777696e-05, "loss": 0.5203, "step": 6875 }, { "epoch": 0.14582935674747088, "grad_norm": 0.37811291217803955, "learning_rate": 1.974583265971597e-05, "loss": 0.5872, "step": 6876 }, { "epoch": 0.1458505652054039, "grad_norm": 0.3319397270679474, "learning_rate": 1.974575794281501e-05, "loss": 0.5139, "step": 6877 }, { "epoch": 0.14587177366333695, "grad_norm": 0.2960353195667267, "learning_rate": 1.9745683215074906e-05, "loss": 0.4342, "step": 6878 }, { "epoch": 0.14589298212126997, "grad_norm": 0.3151487112045288, "learning_rate": 1.9745608476495736e-05, "loss": 0.4398, "step": 6879 }, { "epoch": 0.145914190579203, "grad_norm": 0.3135541081428528, "learning_rate": 1.9745533727077588e-05, "loss": 0.546, "step": 6880 }, { "epoch": 0.145935399037136, "grad_norm": 0.30358296632766724, "learning_rate": 1.974545896682054e-05, "loss": 0.4559, "step": 6881 }, { "epoch": 0.14595660749506903, "grad_norm": 0.3349842429161072, "learning_rate": 1.9745384195724678e-05, "loss": 0.4632, "step": 6882 }, { "epoch": 0.14597781595300205, "grad_norm": 0.3400260806083679, "learning_rate": 1.9745309413790082e-05, "loss": 0.5431, "step": 6883 }, { "epoch": 0.14599902441093507, "grad_norm": 0.34440675377845764, "learning_rate": 1.9745234621016844e-05, "loss": 0.5339, "step": 6884 }, { "epoch": 0.14602023286886812, "grad_norm": 0.3489144742488861, "learning_rate": 1.9745159817405032e-05, "loss": 0.5262, "step": 6885 }, { "epoch": 0.14604144132680114, "grad_norm": 0.49976325035095215, "learning_rate": 1.9745085002954743e-05, "loss": 0.542, "step": 6886 }, { "epoch": 0.14606264978473416, "grad_norm": 0.30430400371551514, "learning_rate": 1.9745010177666054e-05, "loss": 0.497, "step": 6887 }, { "epoch": 0.14608385824266718, "grad_norm": 0.30845510959625244, "learning_rate": 1.974493534153905e-05, "loss": 0.5139, "step": 6888 }, { "epoch": 0.1461050667006002, "grad_norm": 0.3694647252559662, "learning_rate": 1.9744860494573812e-05, "loss": 0.5149, "step": 6889 }, { "epoch": 0.14612627515853321, "grad_norm": 0.325464129447937, "learning_rate": 1.9744785636770425e-05, "loss": 0.3908, "step": 6890 }, { "epoch": 0.14614748361646623, "grad_norm": 0.33383798599243164, "learning_rate": 1.9744710768128974e-05, "loss": 0.5055, "step": 6891 }, { "epoch": 0.14616869207439928, "grad_norm": 0.3344821333885193, "learning_rate": 1.974463588864954e-05, "loss": 0.5467, "step": 6892 }, { "epoch": 0.1461899005323323, "grad_norm": 0.3276851177215576, "learning_rate": 1.9744560998332206e-05, "loss": 0.4987, "step": 6893 }, { "epoch": 0.14621110899026532, "grad_norm": 0.2986738085746765, "learning_rate": 1.9744486097177056e-05, "loss": 0.525, "step": 6894 }, { "epoch": 0.14623231744819834, "grad_norm": 0.3217712342739105, "learning_rate": 1.974441118518417e-05, "loss": 0.6185, "step": 6895 }, { "epoch": 0.14625352590613136, "grad_norm": 0.3373154401779175, "learning_rate": 1.9744336262353638e-05, "loss": 0.4567, "step": 6896 }, { "epoch": 0.14627473436406438, "grad_norm": 0.32563474774360657, "learning_rate": 1.9744261328685538e-05, "loss": 0.4861, "step": 6897 }, { "epoch": 0.1462959428219974, "grad_norm": 0.3377024531364441, "learning_rate": 1.974418638417996e-05, "loss": 0.5239, "step": 6898 }, { "epoch": 0.14631715127993045, "grad_norm": 0.33648747205734253, "learning_rate": 1.974411142883698e-05, "loss": 0.5794, "step": 6899 }, { "epoch": 0.14633835973786347, "grad_norm": 0.3366807699203491, "learning_rate": 1.9744036462656678e-05, "loss": 0.6049, "step": 6900 }, { "epoch": 0.14635956819579649, "grad_norm": 0.3277961313724518, "learning_rate": 1.974396148563915e-05, "loss": 0.6094, "step": 6901 }, { "epoch": 0.1463807766537295, "grad_norm": 0.33537378907203674, "learning_rate": 1.9743886497784472e-05, "loss": 0.5506, "step": 6902 }, { "epoch": 0.14640198511166252, "grad_norm": 0.5857880115509033, "learning_rate": 1.9743811499092726e-05, "loss": 0.5199, "step": 6903 }, { "epoch": 0.14642319356959554, "grad_norm": 0.3594510555267334, "learning_rate": 1.9743736489563997e-05, "loss": 0.51, "step": 6904 }, { "epoch": 0.1464444020275286, "grad_norm": 0.3171103596687317, "learning_rate": 1.9743661469198372e-05, "loss": 0.4949, "step": 6905 }, { "epoch": 0.1464656104854616, "grad_norm": 0.3717913329601288, "learning_rate": 1.974358643799593e-05, "loss": 0.4419, "step": 6906 }, { "epoch": 0.14648681894339463, "grad_norm": 0.3303418755531311, "learning_rate": 1.9743511395956757e-05, "loss": 0.4905, "step": 6907 }, { "epoch": 0.14650802740132765, "grad_norm": 0.3107629418373108, "learning_rate": 1.9743436343080934e-05, "loss": 0.5587, "step": 6908 }, { "epoch": 0.14652923585926067, "grad_norm": 0.3440854251384735, "learning_rate": 1.9743361279368544e-05, "loss": 0.5011, "step": 6909 }, { "epoch": 0.1465504443171937, "grad_norm": 0.30019184947013855, "learning_rate": 1.9743286204819674e-05, "loss": 0.4668, "step": 6910 }, { "epoch": 0.1465716527751267, "grad_norm": 0.3779541850090027, "learning_rate": 1.9743211119434408e-05, "loss": 0.6226, "step": 6911 }, { "epoch": 0.14659286123305976, "grad_norm": 0.3689761459827423, "learning_rate": 1.9743136023212822e-05, "loss": 0.6412, "step": 6912 }, { "epoch": 0.14661406969099278, "grad_norm": 0.3182789981365204, "learning_rate": 1.974306091615501e-05, "loss": 0.4609, "step": 6913 }, { "epoch": 0.1466352781489258, "grad_norm": 0.31405240297317505, "learning_rate": 1.9742985798261047e-05, "loss": 0.4789, "step": 6914 }, { "epoch": 0.14665648660685882, "grad_norm": 0.3497753143310547, "learning_rate": 1.9742910669531018e-05, "loss": 0.4536, "step": 6915 }, { "epoch": 0.14667769506479184, "grad_norm": 0.3069697916507721, "learning_rate": 1.9742835529965014e-05, "loss": 0.4816, "step": 6916 }, { "epoch": 0.14669890352272486, "grad_norm": 0.3072974681854248, "learning_rate": 1.9742760379563108e-05, "loss": 0.4428, "step": 6917 }, { "epoch": 0.14672011198065787, "grad_norm": 0.31156688928604126, "learning_rate": 1.9742685218325394e-05, "loss": 0.5127, "step": 6918 }, { "epoch": 0.14674132043859092, "grad_norm": 0.4641406536102295, "learning_rate": 1.9742610046251943e-05, "loss": 0.5464, "step": 6919 }, { "epoch": 0.14676252889652394, "grad_norm": 0.34139111638069153, "learning_rate": 1.974253486334285e-05, "loss": 0.5217, "step": 6920 }, { "epoch": 0.14678373735445696, "grad_norm": 0.3463275134563446, "learning_rate": 1.9742459669598194e-05, "loss": 0.5255, "step": 6921 }, { "epoch": 0.14680494581238998, "grad_norm": 0.2940859794616699, "learning_rate": 1.974238446501806e-05, "loss": 0.4561, "step": 6922 }, { "epoch": 0.146826154270323, "grad_norm": 0.33820831775665283, "learning_rate": 1.974230924960253e-05, "loss": 0.5201, "step": 6923 }, { "epoch": 0.14684736272825602, "grad_norm": 0.30393052101135254, "learning_rate": 1.9742234023351687e-05, "loss": 0.5374, "step": 6924 }, { "epoch": 0.14686857118618904, "grad_norm": 0.3466443717479706, "learning_rate": 1.974215878626562e-05, "loss": 0.5143, "step": 6925 }, { "epoch": 0.1468897796441221, "grad_norm": 0.32523995637893677, "learning_rate": 1.9742083538344402e-05, "loss": 0.4662, "step": 6926 }, { "epoch": 0.1469109881020551, "grad_norm": 0.3354303538799286, "learning_rate": 1.9742008279588127e-05, "loss": 0.5298, "step": 6927 }, { "epoch": 0.14693219655998813, "grad_norm": 0.39324432611465454, "learning_rate": 1.9741933009996874e-05, "loss": 0.4499, "step": 6928 }, { "epoch": 0.14695340501792115, "grad_norm": 0.49494051933288574, "learning_rate": 1.9741857729570732e-05, "loss": 0.5272, "step": 6929 }, { "epoch": 0.14697461347585417, "grad_norm": 0.3169724941253662, "learning_rate": 1.9741782438309774e-05, "loss": 0.528, "step": 6930 }, { "epoch": 0.14699582193378719, "grad_norm": 0.32649746537208557, "learning_rate": 1.9741707136214093e-05, "loss": 0.5647, "step": 6931 }, { "epoch": 0.1470170303917202, "grad_norm": 0.3344084918498993, "learning_rate": 1.974163182328377e-05, "loss": 0.5055, "step": 6932 }, { "epoch": 0.14703823884965325, "grad_norm": 0.370240181684494, "learning_rate": 1.974155649951889e-05, "loss": 0.57, "step": 6933 }, { "epoch": 0.14705944730758627, "grad_norm": 0.3691026270389557, "learning_rate": 1.9741481164919533e-05, "loss": 0.6257, "step": 6934 }, { "epoch": 0.1470806557655193, "grad_norm": 0.35446834564208984, "learning_rate": 1.9741405819485787e-05, "loss": 0.4945, "step": 6935 }, { "epoch": 0.1471018642234523, "grad_norm": 0.2901232838630676, "learning_rate": 1.9741330463217733e-05, "loss": 0.4823, "step": 6936 }, { "epoch": 0.14712307268138533, "grad_norm": 0.31932854652404785, "learning_rate": 1.9741255096115455e-05, "loss": 0.4352, "step": 6937 }, { "epoch": 0.14714428113931835, "grad_norm": 0.3960351347923279, "learning_rate": 1.974117971817904e-05, "loss": 0.4766, "step": 6938 }, { "epoch": 0.1471654895972514, "grad_norm": 0.3481695353984833, "learning_rate": 1.974110432940857e-05, "loss": 0.5229, "step": 6939 }, { "epoch": 0.14718669805518442, "grad_norm": 0.3451584577560425, "learning_rate": 1.9741028929804125e-05, "loss": 0.4845, "step": 6940 }, { "epoch": 0.14720790651311744, "grad_norm": 0.37129461765289307, "learning_rate": 1.9740953519365795e-05, "loss": 0.4923, "step": 6941 }, { "epoch": 0.14722911497105046, "grad_norm": 0.36454838514328003, "learning_rate": 1.974087809809366e-05, "loss": 0.5547, "step": 6942 }, { "epoch": 0.14725032342898348, "grad_norm": 0.3346073031425476, "learning_rate": 1.9740802665987805e-05, "loss": 0.5448, "step": 6943 }, { "epoch": 0.1472715318869165, "grad_norm": 0.5214165449142456, "learning_rate": 1.9740727223048318e-05, "loss": 0.5778, "step": 6944 }, { "epoch": 0.14729274034484952, "grad_norm": 0.3078988194465637, "learning_rate": 1.9740651769275273e-05, "loss": 0.5231, "step": 6945 }, { "epoch": 0.14731394880278256, "grad_norm": 0.37248602509498596, "learning_rate": 1.9740576304668762e-05, "loss": 0.6175, "step": 6946 }, { "epoch": 0.14733515726071558, "grad_norm": 0.3070460259914398, "learning_rate": 1.9740500829228865e-05, "loss": 0.5645, "step": 6947 }, { "epoch": 0.1473563657186486, "grad_norm": 0.29722845554351807, "learning_rate": 1.9740425342955668e-05, "loss": 0.4963, "step": 6948 }, { "epoch": 0.14737757417658162, "grad_norm": 0.33206281065940857, "learning_rate": 1.9740349845849258e-05, "loss": 0.4145, "step": 6949 }, { "epoch": 0.14739878263451464, "grad_norm": 0.2853332757949829, "learning_rate": 1.9740274337909713e-05, "loss": 0.4756, "step": 6950 }, { "epoch": 0.14741999109244766, "grad_norm": 0.3214767575263977, "learning_rate": 1.974019881913712e-05, "loss": 0.5978, "step": 6951 }, { "epoch": 0.14744119955038068, "grad_norm": 0.29460904002189636, "learning_rate": 1.9740123289531563e-05, "loss": 0.4355, "step": 6952 }, { "epoch": 0.14746240800831373, "grad_norm": 0.32918858528137207, "learning_rate": 1.9740047749093122e-05, "loss": 0.5132, "step": 6953 }, { "epoch": 0.14748361646624675, "grad_norm": 0.3229239583015442, "learning_rate": 1.973997219782189e-05, "loss": 0.6531, "step": 6954 }, { "epoch": 0.14750482492417977, "grad_norm": 0.414808064699173, "learning_rate": 1.9739896635717942e-05, "loss": 0.4751, "step": 6955 }, { "epoch": 0.1475260333821128, "grad_norm": 0.43023476004600525, "learning_rate": 1.9739821062781365e-05, "loss": 0.5917, "step": 6956 }, { "epoch": 0.1475472418400458, "grad_norm": 0.5284488201141357, "learning_rate": 1.9739745479012246e-05, "loss": 0.517, "step": 6957 }, { "epoch": 0.14756845029797883, "grad_norm": 0.42897653579711914, "learning_rate": 1.9739669884410663e-05, "loss": 0.583, "step": 6958 }, { "epoch": 0.14758965875591185, "grad_norm": 0.34467563033103943, "learning_rate": 1.9739594278976708e-05, "loss": 0.6219, "step": 6959 }, { "epoch": 0.1476108672138449, "grad_norm": 0.3213485777378082, "learning_rate": 1.973951866271046e-05, "loss": 0.5222, "step": 6960 }, { "epoch": 0.1476320756717779, "grad_norm": 0.42253249883651733, "learning_rate": 1.9739443035612003e-05, "loss": 0.5397, "step": 6961 }, { "epoch": 0.14765328412971093, "grad_norm": 0.32167041301727295, "learning_rate": 1.9739367397681422e-05, "loss": 0.497, "step": 6962 }, { "epoch": 0.14767449258764395, "grad_norm": 0.3547438085079193, "learning_rate": 1.97392917489188e-05, "loss": 0.5292, "step": 6963 }, { "epoch": 0.14769570104557697, "grad_norm": 0.3253835439682007, "learning_rate": 1.9739216089324225e-05, "loss": 0.5022, "step": 6964 }, { "epoch": 0.14771690950351, "grad_norm": 0.3239671289920807, "learning_rate": 1.9739140418897777e-05, "loss": 0.4832, "step": 6965 }, { "epoch": 0.147738117961443, "grad_norm": 0.3155825436115265, "learning_rate": 1.973906473763954e-05, "loss": 0.5064, "step": 6966 }, { "epoch": 0.14775932641937606, "grad_norm": 0.35630056262016296, "learning_rate": 1.9738989045549602e-05, "loss": 0.5236, "step": 6967 }, { "epoch": 0.14778053487730908, "grad_norm": 0.31565070152282715, "learning_rate": 1.9738913342628046e-05, "loss": 0.5551, "step": 6968 }, { "epoch": 0.1478017433352421, "grad_norm": 0.3083237409591675, "learning_rate": 1.9738837628874952e-05, "loss": 0.5285, "step": 6969 }, { "epoch": 0.14782295179317512, "grad_norm": 0.3103879988193512, "learning_rate": 1.9738761904290407e-05, "loss": 0.4596, "step": 6970 }, { "epoch": 0.14784416025110814, "grad_norm": 0.37745341658592224, "learning_rate": 1.97386861688745e-05, "loss": 0.6223, "step": 6971 }, { "epoch": 0.14786536870904116, "grad_norm": 0.37172433733940125, "learning_rate": 1.973861042262731e-05, "loss": 0.6013, "step": 6972 }, { "epoch": 0.14788657716697418, "grad_norm": 0.33882930874824524, "learning_rate": 1.9738534665548922e-05, "loss": 0.5403, "step": 6973 }, { "epoch": 0.14790778562490722, "grad_norm": 0.4105833172798157, "learning_rate": 1.9738458897639418e-05, "loss": 0.5765, "step": 6974 }, { "epoch": 0.14792899408284024, "grad_norm": 0.7077440619468689, "learning_rate": 1.9738383118898886e-05, "loss": 0.4726, "step": 6975 }, { "epoch": 0.14795020254077326, "grad_norm": 0.332832932472229, "learning_rate": 1.973830732932741e-05, "loss": 0.5443, "step": 6976 }, { "epoch": 0.14797141099870628, "grad_norm": 0.3470858037471771, "learning_rate": 1.973823152892507e-05, "loss": 0.5035, "step": 6977 }, { "epoch": 0.1479926194566393, "grad_norm": 0.34099292755126953, "learning_rate": 1.9738155717691955e-05, "loss": 0.4805, "step": 6978 }, { "epoch": 0.14801382791457232, "grad_norm": 0.3175806701183319, "learning_rate": 1.9738079895628148e-05, "loss": 0.5877, "step": 6979 }, { "epoch": 0.14803503637250537, "grad_norm": 0.323757141828537, "learning_rate": 1.9738004062733734e-05, "loss": 0.5238, "step": 6980 }, { "epoch": 0.1480562448304384, "grad_norm": 0.3684273362159729, "learning_rate": 1.9737928219008794e-05, "loss": 0.5655, "step": 6981 }, { "epoch": 0.1480774532883714, "grad_norm": 0.3539048135280609, "learning_rate": 1.9737852364453417e-05, "loss": 0.586, "step": 6982 }, { "epoch": 0.14809866174630443, "grad_norm": 0.39404454827308655, "learning_rate": 1.9737776499067686e-05, "loss": 0.5787, "step": 6983 }, { "epoch": 0.14811987020423745, "grad_norm": 0.35742032527923584, "learning_rate": 1.9737700622851684e-05, "loss": 0.4763, "step": 6984 }, { "epoch": 0.14814107866217047, "grad_norm": 0.36749669909477234, "learning_rate": 1.9737624735805495e-05, "loss": 0.5134, "step": 6985 }, { "epoch": 0.1481622871201035, "grad_norm": 0.30393508076667786, "learning_rate": 1.9737548837929203e-05, "loss": 0.5966, "step": 6986 }, { "epoch": 0.14818349557803653, "grad_norm": 0.33624014258384705, "learning_rate": 1.9737472929222897e-05, "loss": 0.4555, "step": 6987 }, { "epoch": 0.14820470403596955, "grad_norm": 0.35439762473106384, "learning_rate": 1.9737397009686655e-05, "loss": 0.6297, "step": 6988 }, { "epoch": 0.14822591249390257, "grad_norm": 0.6534976959228516, "learning_rate": 1.9737321079320565e-05, "loss": 0.5176, "step": 6989 }, { "epoch": 0.1482471209518356, "grad_norm": 0.3172553479671478, "learning_rate": 1.9737245138124712e-05, "loss": 0.5587, "step": 6990 }, { "epoch": 0.1482683294097686, "grad_norm": 0.31612786650657654, "learning_rate": 1.973716918609918e-05, "loss": 0.5107, "step": 6991 }, { "epoch": 0.14828953786770163, "grad_norm": 0.40603527426719666, "learning_rate": 1.9737093223244056e-05, "loss": 0.5557, "step": 6992 }, { "epoch": 0.14831074632563465, "grad_norm": 0.3316144645214081, "learning_rate": 1.9737017249559417e-05, "loss": 0.4812, "step": 6993 }, { "epoch": 0.1483319547835677, "grad_norm": 0.31535419821739197, "learning_rate": 1.973694126504535e-05, "loss": 0.564, "step": 6994 }, { "epoch": 0.14835316324150072, "grad_norm": 0.3466285467147827, "learning_rate": 1.9736865269701946e-05, "loss": 0.602, "step": 6995 }, { "epoch": 0.14837437169943374, "grad_norm": 0.32508283853530884, "learning_rate": 1.9736789263529284e-05, "loss": 0.5132, "step": 6996 }, { "epoch": 0.14839558015736676, "grad_norm": 0.33330869674682617, "learning_rate": 1.973671324652745e-05, "loss": 0.6372, "step": 6997 }, { "epoch": 0.14841678861529978, "grad_norm": 0.3009128272533417, "learning_rate": 1.973663721869653e-05, "loss": 0.5175, "step": 6998 }, { "epoch": 0.1484379970732328, "grad_norm": 0.3113533556461334, "learning_rate": 1.9736561180036602e-05, "loss": 0.6298, "step": 6999 }, { "epoch": 0.14845920553116582, "grad_norm": 0.3451727628707886, "learning_rate": 1.9736485130547756e-05, "loss": 0.5406, "step": 7000 }, { "epoch": 0.14848041398909886, "grad_norm": 0.3671184182167053, "learning_rate": 1.9736409070230078e-05, "loss": 0.517, "step": 7001 }, { "epoch": 0.14850162244703188, "grad_norm": 0.3418494462966919, "learning_rate": 1.973633299908365e-05, "loss": 0.4874, "step": 7002 }, { "epoch": 0.1485228309049649, "grad_norm": 0.36104878783226013, "learning_rate": 1.9736256917108555e-05, "loss": 0.5588, "step": 7003 }, { "epoch": 0.14854403936289792, "grad_norm": 0.35624200105667114, "learning_rate": 1.9736180824304886e-05, "loss": 0.5672, "step": 7004 }, { "epoch": 0.14856524782083094, "grad_norm": 0.3196251392364502, "learning_rate": 1.9736104720672714e-05, "loss": 0.3709, "step": 7005 }, { "epoch": 0.14858645627876396, "grad_norm": 0.30785372853279114, "learning_rate": 1.973602860621213e-05, "loss": 0.54, "step": 7006 }, { "epoch": 0.14860766473669698, "grad_norm": 0.3405420780181885, "learning_rate": 1.9735952480923228e-05, "loss": 0.461, "step": 7007 }, { "epoch": 0.14862887319463003, "grad_norm": 0.3314864933490753, "learning_rate": 1.973587634480608e-05, "loss": 0.6389, "step": 7008 }, { "epoch": 0.14865008165256305, "grad_norm": 0.3067796230316162, "learning_rate": 1.973580019786077e-05, "loss": 0.5002, "step": 7009 }, { "epoch": 0.14867129011049607, "grad_norm": 0.3413553237915039, "learning_rate": 1.9735724040087393e-05, "loss": 0.5175, "step": 7010 }, { "epoch": 0.1486924985684291, "grad_norm": 0.47998714447021484, "learning_rate": 1.973564787148603e-05, "loss": 0.5012, "step": 7011 }, { "epoch": 0.1487137070263621, "grad_norm": 0.33387255668640137, "learning_rate": 1.973557169205676e-05, "loss": 0.557, "step": 7012 }, { "epoch": 0.14873491548429513, "grad_norm": 0.32594382762908936, "learning_rate": 1.9735495501799673e-05, "loss": 0.51, "step": 7013 }, { "epoch": 0.14875612394222815, "grad_norm": 0.3429100215435028, "learning_rate": 1.973541930071485e-05, "loss": 0.5218, "step": 7014 }, { "epoch": 0.1487773324001612, "grad_norm": 0.28092557191848755, "learning_rate": 1.973534308880238e-05, "loss": 0.4759, "step": 7015 }, { "epoch": 0.1487985408580942, "grad_norm": 0.3288383483886719, "learning_rate": 1.973526686606235e-05, "loss": 0.4953, "step": 7016 }, { "epoch": 0.14881974931602723, "grad_norm": 0.33381685614585876, "learning_rate": 1.9735190632494836e-05, "loss": 0.5983, "step": 7017 }, { "epoch": 0.14884095777396025, "grad_norm": 0.3124115467071533, "learning_rate": 1.9735114388099927e-05, "loss": 0.5017, "step": 7018 }, { "epoch": 0.14886216623189327, "grad_norm": 0.46717000007629395, "learning_rate": 1.973503813287771e-05, "loss": 0.5692, "step": 7019 }, { "epoch": 0.1488833746898263, "grad_norm": 0.3933444321155548, "learning_rate": 1.973496186682827e-05, "loss": 0.4676, "step": 7020 }, { "epoch": 0.14890458314775934, "grad_norm": 0.3185195028781891, "learning_rate": 1.9734885589951687e-05, "loss": 0.463, "step": 7021 }, { "epoch": 0.14892579160569236, "grad_norm": 0.387712299823761, "learning_rate": 1.973480930224805e-05, "loss": 0.5398, "step": 7022 }, { "epoch": 0.14894700006362538, "grad_norm": 0.32808178663253784, "learning_rate": 1.9734733003717444e-05, "loss": 0.4967, "step": 7023 }, { "epoch": 0.1489682085215584, "grad_norm": 0.39349764585494995, "learning_rate": 1.9734656694359953e-05, "loss": 0.5247, "step": 7024 }, { "epoch": 0.14898941697949142, "grad_norm": 0.30920112133026123, "learning_rate": 1.9734580374175657e-05, "loss": 0.5598, "step": 7025 }, { "epoch": 0.14901062543742444, "grad_norm": 0.33388206362724304, "learning_rate": 1.973450404316465e-05, "loss": 0.5923, "step": 7026 }, { "epoch": 0.14903183389535746, "grad_norm": 0.31089383363723755, "learning_rate": 1.973442770132701e-05, "loss": 0.4642, "step": 7027 }, { "epoch": 0.1490530423532905, "grad_norm": 0.30965495109558105, "learning_rate": 1.9734351348662825e-05, "loss": 0.4537, "step": 7028 }, { "epoch": 0.14907425081122352, "grad_norm": 0.32203957438468933, "learning_rate": 1.9734274985172173e-05, "loss": 0.5605, "step": 7029 }, { "epoch": 0.14909545926915654, "grad_norm": 0.4001864492893219, "learning_rate": 1.9734198610855153e-05, "loss": 0.5034, "step": 7030 }, { "epoch": 0.14911666772708956, "grad_norm": 0.3124341666698456, "learning_rate": 1.973412222571184e-05, "loss": 0.5467, "step": 7031 }, { "epoch": 0.14913787618502258, "grad_norm": 0.34253013134002686, "learning_rate": 1.973404582974232e-05, "loss": 0.5539, "step": 7032 }, { "epoch": 0.1491590846429556, "grad_norm": 0.3529592454433441, "learning_rate": 1.973396942294668e-05, "loss": 0.4595, "step": 7033 }, { "epoch": 0.14918029310088862, "grad_norm": 0.42313051223754883, "learning_rate": 1.9733893005325e-05, "loss": 0.6683, "step": 7034 }, { "epoch": 0.14920150155882167, "grad_norm": 0.3298081159591675, "learning_rate": 1.973381657687737e-05, "loss": 0.5124, "step": 7035 }, { "epoch": 0.1492227100167547, "grad_norm": 0.31831908226013184, "learning_rate": 1.9733740137603877e-05, "loss": 0.6355, "step": 7036 }, { "epoch": 0.1492439184746877, "grad_norm": 0.3797280490398407, "learning_rate": 1.97336636875046e-05, "loss": 0.549, "step": 7037 }, { "epoch": 0.14926512693262073, "grad_norm": 0.2978169023990631, "learning_rate": 1.9733587226579627e-05, "loss": 0.4351, "step": 7038 }, { "epoch": 0.14928633539055375, "grad_norm": 0.32749149203300476, "learning_rate": 1.9733510754829044e-05, "loss": 0.4941, "step": 7039 }, { "epoch": 0.14930754384848677, "grad_norm": 0.3982941508293152, "learning_rate": 1.9733434272252933e-05, "loss": 0.6035, "step": 7040 }, { "epoch": 0.1493287523064198, "grad_norm": 0.33403322100639343, "learning_rate": 1.973335777885138e-05, "loss": 0.5434, "step": 7041 }, { "epoch": 0.14934996076435283, "grad_norm": 0.3515329360961914, "learning_rate": 1.9733281274624478e-05, "loss": 0.5358, "step": 7042 }, { "epoch": 0.14937116922228585, "grad_norm": 0.3086894154548645, "learning_rate": 1.97332047595723e-05, "loss": 0.5889, "step": 7043 }, { "epoch": 0.14939237768021887, "grad_norm": 0.3145686089992523, "learning_rate": 1.9733128233694936e-05, "loss": 0.4593, "step": 7044 }, { "epoch": 0.1494135861381519, "grad_norm": 0.29212790727615356, "learning_rate": 1.973305169699247e-05, "loss": 0.4715, "step": 7045 }, { "epoch": 0.1494347945960849, "grad_norm": 0.3393513262271881, "learning_rate": 1.973297514946499e-05, "loss": 0.4654, "step": 7046 }, { "epoch": 0.14945600305401793, "grad_norm": 0.3987804651260376, "learning_rate": 1.973289859111258e-05, "loss": 0.5331, "step": 7047 }, { "epoch": 0.14947721151195095, "grad_norm": 0.41818365454673767, "learning_rate": 1.9732822021935326e-05, "loss": 0.5259, "step": 7048 }, { "epoch": 0.149498419969884, "grad_norm": 0.3231777548789978, "learning_rate": 1.973274544193331e-05, "loss": 0.4834, "step": 7049 }, { "epoch": 0.14951962842781702, "grad_norm": 0.3209832012653351, "learning_rate": 1.973266885110662e-05, "loss": 0.6002, "step": 7050 }, { "epoch": 0.14954083688575004, "grad_norm": 0.4051654636859894, "learning_rate": 1.973259224945534e-05, "loss": 0.4753, "step": 7051 }, { "epoch": 0.14956204534368306, "grad_norm": 0.3127768933773041, "learning_rate": 1.9732515636979553e-05, "loss": 0.523, "step": 7052 }, { "epoch": 0.14958325380161608, "grad_norm": 0.3330953121185303, "learning_rate": 1.9732439013679353e-05, "loss": 0.5158, "step": 7053 }, { "epoch": 0.1496044622595491, "grad_norm": 0.3292379677295685, "learning_rate": 1.9732362379554814e-05, "loss": 0.5795, "step": 7054 }, { "epoch": 0.14962567071748215, "grad_norm": 0.2998238503932953, "learning_rate": 1.9732285734606023e-05, "loss": 0.4385, "step": 7055 }, { "epoch": 0.14964687917541517, "grad_norm": 0.307910293340683, "learning_rate": 1.9732209078833077e-05, "loss": 0.5079, "step": 7056 }, { "epoch": 0.14966808763334818, "grad_norm": 0.3708224892616272, "learning_rate": 1.9732132412236046e-05, "loss": 0.4768, "step": 7057 }, { "epoch": 0.1496892960912812, "grad_norm": 0.34323662519454956, "learning_rate": 1.9732055734815025e-05, "loss": 0.569, "step": 7058 }, { "epoch": 0.14971050454921422, "grad_norm": 0.32346728444099426, "learning_rate": 1.9731979046570094e-05, "loss": 0.5511, "step": 7059 }, { "epoch": 0.14973171300714724, "grad_norm": 0.345077782869339, "learning_rate": 1.973190234750134e-05, "loss": 0.5377, "step": 7060 }, { "epoch": 0.14975292146508026, "grad_norm": 0.34707242250442505, "learning_rate": 1.9731825637608853e-05, "loss": 0.5017, "step": 7061 }, { "epoch": 0.1497741299230133, "grad_norm": 0.37677785754203796, "learning_rate": 1.973174891689271e-05, "loss": 0.4933, "step": 7062 }, { "epoch": 0.14979533838094633, "grad_norm": 0.4512099623680115, "learning_rate": 1.9731672185353e-05, "loss": 0.5327, "step": 7063 }, { "epoch": 0.14981654683887935, "grad_norm": 0.3045765459537506, "learning_rate": 1.973159544298981e-05, "loss": 0.5129, "step": 7064 }, { "epoch": 0.14983775529681237, "grad_norm": 0.34684115648269653, "learning_rate": 1.9731518689803227e-05, "loss": 0.5491, "step": 7065 }, { "epoch": 0.1498589637547454, "grad_norm": 0.3246382772922516, "learning_rate": 1.973144192579333e-05, "loss": 0.538, "step": 7066 }, { "epoch": 0.1498801722126784, "grad_norm": 0.31693097949028015, "learning_rate": 1.9731365150960207e-05, "loss": 0.4566, "step": 7067 }, { "epoch": 0.14990138067061143, "grad_norm": 0.3143620193004608, "learning_rate": 1.973128836530395e-05, "loss": 0.4625, "step": 7068 }, { "epoch": 0.14992258912854448, "grad_norm": 0.39438363909721375, "learning_rate": 1.9731211568824632e-05, "loss": 0.5588, "step": 7069 }, { "epoch": 0.1499437975864775, "grad_norm": 0.35826393961906433, "learning_rate": 1.9731134761522347e-05, "loss": 0.5368, "step": 7070 }, { "epoch": 0.14996500604441051, "grad_norm": 0.3377465605735779, "learning_rate": 1.973105794339718e-05, "loss": 0.5687, "step": 7071 }, { "epoch": 0.14998621450234353, "grad_norm": 0.3592316210269928, "learning_rate": 1.9730981114449214e-05, "loss": 0.5994, "step": 7072 }, { "epoch": 0.15000742296027655, "grad_norm": 0.3197788894176483, "learning_rate": 1.9730904274678536e-05, "loss": 0.5701, "step": 7073 }, { "epoch": 0.15002863141820957, "grad_norm": 0.35786017775535583, "learning_rate": 1.973082742408523e-05, "loss": 0.5587, "step": 7074 }, { "epoch": 0.1500498398761426, "grad_norm": 0.3654501438140869, "learning_rate": 1.973075056266938e-05, "loss": 0.5696, "step": 7075 }, { "epoch": 0.15007104833407564, "grad_norm": 0.33392930030822754, "learning_rate": 1.9730673690431077e-05, "loss": 0.5854, "step": 7076 }, { "epoch": 0.15009225679200866, "grad_norm": 0.3173042833805084, "learning_rate": 1.9730596807370402e-05, "loss": 0.4793, "step": 7077 }, { "epoch": 0.15011346524994168, "grad_norm": 0.33716559410095215, "learning_rate": 1.9730519913487445e-05, "loss": 0.4957, "step": 7078 }, { "epoch": 0.1501346737078747, "grad_norm": 0.36636608839035034, "learning_rate": 1.9730443008782286e-05, "loss": 0.4829, "step": 7079 }, { "epoch": 0.15015588216580772, "grad_norm": 0.3322452902793884, "learning_rate": 1.9730366093255007e-05, "loss": 0.5805, "step": 7080 }, { "epoch": 0.15017709062374074, "grad_norm": 0.30617082118988037, "learning_rate": 1.9730289166905707e-05, "loss": 0.4577, "step": 7081 }, { "epoch": 0.15019829908167376, "grad_norm": 0.3503202795982361, "learning_rate": 1.9730212229734464e-05, "loss": 0.5071, "step": 7082 }, { "epoch": 0.1502195075396068, "grad_norm": 0.35270723700523376, "learning_rate": 1.9730135281741358e-05, "loss": 0.6726, "step": 7083 }, { "epoch": 0.15024071599753983, "grad_norm": 0.312973290681839, "learning_rate": 1.9730058322926484e-05, "loss": 0.546, "step": 7084 }, { "epoch": 0.15026192445547285, "grad_norm": 0.3284173905849457, "learning_rate": 1.972998135328992e-05, "loss": 0.4774, "step": 7085 }, { "epoch": 0.15028313291340586, "grad_norm": 0.33570924401283264, "learning_rate": 1.972990437283176e-05, "loss": 0.5524, "step": 7086 }, { "epoch": 0.15030434137133888, "grad_norm": 0.30225124955177307, "learning_rate": 1.9729827381552082e-05, "loss": 0.5236, "step": 7087 }, { "epoch": 0.1503255498292719, "grad_norm": 0.3191435933113098, "learning_rate": 1.972975037945098e-05, "loss": 0.5201, "step": 7088 }, { "epoch": 0.15034675828720492, "grad_norm": 0.3013460040092468, "learning_rate": 1.9729673366528525e-05, "loss": 0.525, "step": 7089 }, { "epoch": 0.15036796674513797, "grad_norm": 0.34188491106033325, "learning_rate": 1.972959634278482e-05, "loss": 0.5045, "step": 7090 }, { "epoch": 0.150389175203071, "grad_norm": 0.32174962759017944, "learning_rate": 1.9729519308219935e-05, "loss": 0.5003, "step": 7091 }, { "epoch": 0.150410383661004, "grad_norm": 0.3298852741718292, "learning_rate": 1.9729442262833967e-05, "loss": 0.5623, "step": 7092 }, { "epoch": 0.15043159211893703, "grad_norm": 0.37660834193229675, "learning_rate": 1.9729365206627e-05, "loss": 0.5143, "step": 7093 }, { "epoch": 0.15045280057687005, "grad_norm": 0.36423197388648987, "learning_rate": 1.9729288139599115e-05, "loss": 0.5363, "step": 7094 }, { "epoch": 0.15047400903480307, "grad_norm": 0.2983816862106323, "learning_rate": 1.97292110617504e-05, "loss": 0.4979, "step": 7095 }, { "epoch": 0.15049521749273612, "grad_norm": 0.37548744678497314, "learning_rate": 1.972913397308094e-05, "loss": 0.6739, "step": 7096 }, { "epoch": 0.15051642595066914, "grad_norm": 0.3136516511440277, "learning_rate": 1.9729056873590825e-05, "loss": 0.4602, "step": 7097 }, { "epoch": 0.15053763440860216, "grad_norm": 0.33895957469940186, "learning_rate": 1.9728979763280135e-05, "loss": 0.5223, "step": 7098 }, { "epoch": 0.15055884286653518, "grad_norm": 0.33967986702919006, "learning_rate": 1.9728902642148958e-05, "loss": 0.5328, "step": 7099 }, { "epoch": 0.1505800513244682, "grad_norm": 0.3669241666793823, "learning_rate": 1.9728825510197382e-05, "loss": 0.5282, "step": 7100 }, { "epoch": 0.15060125978240121, "grad_norm": 0.33741146326065063, "learning_rate": 1.972874836742549e-05, "loss": 0.5574, "step": 7101 }, { "epoch": 0.15062246824033423, "grad_norm": 0.3484894037246704, "learning_rate": 1.972867121383337e-05, "loss": 0.6367, "step": 7102 }, { "epoch": 0.15064367669826728, "grad_norm": 0.37934526801109314, "learning_rate": 1.9728594049421103e-05, "loss": 0.5571, "step": 7103 }, { "epoch": 0.1506648851562003, "grad_norm": 0.3325307369232178, "learning_rate": 1.972851687418878e-05, "loss": 0.533, "step": 7104 }, { "epoch": 0.15068609361413332, "grad_norm": 0.3036656975746155, "learning_rate": 1.9728439688136484e-05, "loss": 0.4634, "step": 7105 }, { "epoch": 0.15070730207206634, "grad_norm": 0.30973029136657715, "learning_rate": 1.9728362491264305e-05, "loss": 0.4957, "step": 7106 }, { "epoch": 0.15072851052999936, "grad_norm": 0.36588600277900696, "learning_rate": 1.972828528357232e-05, "loss": 0.5686, "step": 7107 }, { "epoch": 0.15074971898793238, "grad_norm": 0.3539406359195709, "learning_rate": 1.9728208065060625e-05, "loss": 0.5341, "step": 7108 }, { "epoch": 0.1507709274458654, "grad_norm": 0.31784775853157043, "learning_rate": 1.97281308357293e-05, "loss": 0.467, "step": 7109 }, { "epoch": 0.15079213590379845, "grad_norm": 0.30585646629333496, "learning_rate": 1.9728053595578433e-05, "loss": 0.5043, "step": 7110 }, { "epoch": 0.15081334436173147, "grad_norm": 0.573807954788208, "learning_rate": 1.972797634460811e-05, "loss": 0.5027, "step": 7111 }, { "epoch": 0.15083455281966449, "grad_norm": 0.34290120005607605, "learning_rate": 1.9727899082818415e-05, "loss": 0.5276, "step": 7112 }, { "epoch": 0.1508557612775975, "grad_norm": 0.3672618865966797, "learning_rate": 1.972782181020944e-05, "loss": 0.6035, "step": 7113 }, { "epoch": 0.15087696973553053, "grad_norm": 0.3061952590942383, "learning_rate": 1.972774452678126e-05, "loss": 0.4617, "step": 7114 }, { "epoch": 0.15089817819346354, "grad_norm": 0.34375038743019104, "learning_rate": 1.972766723253397e-05, "loss": 0.5355, "step": 7115 }, { "epoch": 0.15091938665139656, "grad_norm": 0.32147300243377686, "learning_rate": 1.972758992746765e-05, "loss": 0.5141, "step": 7116 }, { "epoch": 0.1509405951093296, "grad_norm": 0.36102133989334106, "learning_rate": 1.972751261158239e-05, "loss": 0.577, "step": 7117 }, { "epoch": 0.15096180356726263, "grad_norm": 0.34164610505104065, "learning_rate": 1.9727435284878277e-05, "loss": 0.4961, "step": 7118 }, { "epoch": 0.15098301202519565, "grad_norm": 0.3696339726448059, "learning_rate": 1.9727357947355393e-05, "loss": 0.5028, "step": 7119 }, { "epoch": 0.15100422048312867, "grad_norm": 0.316423624753952, "learning_rate": 1.9727280599013828e-05, "loss": 0.5131, "step": 7120 }, { "epoch": 0.1510254289410617, "grad_norm": 0.32871678471565247, "learning_rate": 1.9727203239853664e-05, "loss": 0.602, "step": 7121 }, { "epoch": 0.1510466373989947, "grad_norm": 0.33770954608917236, "learning_rate": 1.9727125869874988e-05, "loss": 0.6086, "step": 7122 }, { "epoch": 0.15106784585692773, "grad_norm": 0.32131335139274597, "learning_rate": 1.9727048489077888e-05, "loss": 0.5306, "step": 7123 }, { "epoch": 0.15108905431486078, "grad_norm": 0.3182728588581085, "learning_rate": 1.9726971097462454e-05, "loss": 0.5269, "step": 7124 }, { "epoch": 0.1511102627727938, "grad_norm": 0.3287563920021057, "learning_rate": 1.972689369502876e-05, "loss": 0.5662, "step": 7125 }, { "epoch": 0.15113147123072682, "grad_norm": 0.3262871503829956, "learning_rate": 1.9726816281776907e-05, "loss": 0.5541, "step": 7126 }, { "epoch": 0.15115267968865984, "grad_norm": 0.3174647092819214, "learning_rate": 1.9726738857706968e-05, "loss": 0.5008, "step": 7127 }, { "epoch": 0.15117388814659286, "grad_norm": 0.33598729968070984, "learning_rate": 1.9726661422819036e-05, "loss": 0.5462, "step": 7128 }, { "epoch": 0.15119509660452587, "grad_norm": 0.34141868352890015, "learning_rate": 1.9726583977113195e-05, "loss": 0.5213, "step": 7129 }, { "epoch": 0.15121630506245892, "grad_norm": 0.3192191421985626, "learning_rate": 1.972650652058953e-05, "loss": 0.5008, "step": 7130 }, { "epoch": 0.15123751352039194, "grad_norm": 0.3284054696559906, "learning_rate": 1.972642905324813e-05, "loss": 0.4461, "step": 7131 }, { "epoch": 0.15125872197832496, "grad_norm": 0.3298414349555969, "learning_rate": 1.9726351575089083e-05, "loss": 0.4889, "step": 7132 }, { "epoch": 0.15127993043625798, "grad_norm": 0.3270578980445862, "learning_rate": 1.972627408611247e-05, "loss": 0.567, "step": 7133 }, { "epoch": 0.151301138894191, "grad_norm": 0.3439449667930603, "learning_rate": 1.972619658631838e-05, "loss": 0.5514, "step": 7134 }, { "epoch": 0.15132234735212402, "grad_norm": 0.3312893807888031, "learning_rate": 1.97261190757069e-05, "loss": 0.4984, "step": 7135 }, { "epoch": 0.15134355581005704, "grad_norm": 0.36144089698791504, "learning_rate": 1.9726041554278113e-05, "loss": 0.5519, "step": 7136 }, { "epoch": 0.1513647642679901, "grad_norm": 0.3335871994495392, "learning_rate": 1.972596402203211e-05, "loss": 0.5296, "step": 7137 }, { "epoch": 0.1513859727259231, "grad_norm": 0.32802069187164307, "learning_rate": 1.9725886478968968e-05, "loss": 0.5751, "step": 7138 }, { "epoch": 0.15140718118385613, "grad_norm": 0.3285602927207947, "learning_rate": 1.9725808925088784e-05, "loss": 0.5067, "step": 7139 }, { "epoch": 0.15142838964178915, "grad_norm": 0.35762014985084534, "learning_rate": 1.972573136039164e-05, "loss": 0.611, "step": 7140 }, { "epoch": 0.15144959809972217, "grad_norm": 0.41684389114379883, "learning_rate": 1.972565378487762e-05, "loss": 0.5333, "step": 7141 }, { "epoch": 0.15147080655765519, "grad_norm": 0.3391675055027008, "learning_rate": 1.9725576198546814e-05, "loss": 0.5285, "step": 7142 }, { "epoch": 0.1514920150155882, "grad_norm": 0.3823765218257904, "learning_rate": 1.9725498601399305e-05, "loss": 0.61, "step": 7143 }, { "epoch": 0.15151322347352125, "grad_norm": 0.3170467019081116, "learning_rate": 1.9725420993435183e-05, "loss": 0.4753, "step": 7144 }, { "epoch": 0.15153443193145427, "grad_norm": 0.3284062147140503, "learning_rate": 1.972534337465453e-05, "loss": 0.4965, "step": 7145 }, { "epoch": 0.1515556403893873, "grad_norm": 0.3302502930164337, "learning_rate": 1.9725265745057438e-05, "loss": 0.5274, "step": 7146 }, { "epoch": 0.1515768488473203, "grad_norm": 0.43728503584861755, "learning_rate": 1.9725188104643988e-05, "loss": 0.4856, "step": 7147 }, { "epoch": 0.15159805730525333, "grad_norm": 0.35289862751960754, "learning_rate": 1.9725110453414266e-05, "loss": 0.5744, "step": 7148 }, { "epoch": 0.15161926576318635, "grad_norm": 0.329375296831131, "learning_rate": 1.9725032791368367e-05, "loss": 0.5348, "step": 7149 }, { "epoch": 0.15164047422111937, "grad_norm": 0.3287900984287262, "learning_rate": 1.9724955118506366e-05, "loss": 0.5254, "step": 7150 }, { "epoch": 0.15166168267905242, "grad_norm": 0.31714576482772827, "learning_rate": 1.9724877434828357e-05, "loss": 0.456, "step": 7151 }, { "epoch": 0.15168289113698544, "grad_norm": 0.31862521171569824, "learning_rate": 1.9724799740334418e-05, "loss": 0.5413, "step": 7152 }, { "epoch": 0.15170409959491846, "grad_norm": 0.3597536087036133, "learning_rate": 1.9724722035024648e-05, "loss": 0.5413, "step": 7153 }, { "epoch": 0.15172530805285148, "grad_norm": 0.8961549997329712, "learning_rate": 1.972464431889912e-05, "loss": 0.5548, "step": 7154 }, { "epoch": 0.1517465165107845, "grad_norm": 0.32068178057670593, "learning_rate": 1.9724566591957935e-05, "loss": 0.5102, "step": 7155 }, { "epoch": 0.15176772496871752, "grad_norm": 0.2993648946285248, "learning_rate": 1.9724488854201165e-05, "loss": 0.58, "step": 7156 }, { "epoch": 0.15178893342665054, "grad_norm": 0.3944433331489563, "learning_rate": 1.9724411105628905e-05, "loss": 0.4681, "step": 7157 }, { "epoch": 0.15181014188458358, "grad_norm": 0.276589035987854, "learning_rate": 1.972433334624124e-05, "loss": 0.4856, "step": 7158 }, { "epoch": 0.1518313503425166, "grad_norm": 0.2902848720550537, "learning_rate": 1.9724255576038258e-05, "loss": 0.5169, "step": 7159 }, { "epoch": 0.15185255880044962, "grad_norm": 0.3411172926425934, "learning_rate": 1.9724177795020042e-05, "loss": 0.5015, "step": 7160 }, { "epoch": 0.15187376725838264, "grad_norm": 0.30499717593193054, "learning_rate": 1.9724100003186677e-05, "loss": 0.5288, "step": 7161 }, { "epoch": 0.15189497571631566, "grad_norm": 0.3417057693004608, "learning_rate": 1.9724022200538255e-05, "loss": 0.602, "step": 7162 }, { "epoch": 0.15191618417424868, "grad_norm": 0.3398955464363098, "learning_rate": 1.972394438707486e-05, "loss": 0.4663, "step": 7163 }, { "epoch": 0.1519373926321817, "grad_norm": 0.3133576810359955, "learning_rate": 1.9723866562796578e-05, "loss": 0.5307, "step": 7164 }, { "epoch": 0.15195860109011475, "grad_norm": 0.31989142298698425, "learning_rate": 1.9723788727703494e-05, "loss": 0.5059, "step": 7165 }, { "epoch": 0.15197980954804777, "grad_norm": 0.32416874170303345, "learning_rate": 1.9723710881795697e-05, "loss": 0.4584, "step": 7166 }, { "epoch": 0.1520010180059808, "grad_norm": 0.34212300181388855, "learning_rate": 1.972363302507328e-05, "loss": 0.593, "step": 7167 }, { "epoch": 0.1520222264639138, "grad_norm": 0.3662222623825073, "learning_rate": 1.9723555157536316e-05, "loss": 0.6067, "step": 7168 }, { "epoch": 0.15204343492184683, "grad_norm": 0.3900753855705261, "learning_rate": 1.97234772791849e-05, "loss": 0.494, "step": 7169 }, { "epoch": 0.15206464337977985, "grad_norm": 0.45628440380096436, "learning_rate": 1.9723399390019118e-05, "loss": 0.5606, "step": 7170 }, { "epoch": 0.1520858518377129, "grad_norm": 0.3558356761932373, "learning_rate": 1.9723321490039055e-05, "loss": 0.6044, "step": 7171 }, { "epoch": 0.1521070602956459, "grad_norm": 0.35219576954841614, "learning_rate": 1.97232435792448e-05, "loss": 0.5797, "step": 7172 }, { "epoch": 0.15212826875357893, "grad_norm": 0.34277936816215515, "learning_rate": 1.9723165657636433e-05, "loss": 0.645, "step": 7173 }, { "epoch": 0.15214947721151195, "grad_norm": 0.29030391573905945, "learning_rate": 1.9723087725214053e-05, "loss": 0.4968, "step": 7174 }, { "epoch": 0.15217068566944497, "grad_norm": 0.35198017954826355, "learning_rate": 1.9723009781977732e-05, "loss": 0.4959, "step": 7175 }, { "epoch": 0.152191894127378, "grad_norm": 0.3517872989177704, "learning_rate": 1.972293182792757e-05, "loss": 0.5355, "step": 7176 }, { "epoch": 0.152213102585311, "grad_norm": 0.5595842003822327, "learning_rate": 1.972285386306364e-05, "loss": 0.5425, "step": 7177 }, { "epoch": 0.15223431104324406, "grad_norm": 0.3208792507648468, "learning_rate": 1.9722775887386043e-05, "loss": 0.5076, "step": 7178 }, { "epoch": 0.15225551950117708, "grad_norm": 0.3340685963630676, "learning_rate": 1.972269790089486e-05, "loss": 0.5276, "step": 7179 }, { "epoch": 0.1522767279591101, "grad_norm": 0.3217921555042267, "learning_rate": 1.9722619903590172e-05, "loss": 0.5117, "step": 7180 }, { "epoch": 0.15229793641704312, "grad_norm": 0.33627429604530334, "learning_rate": 1.9722541895472073e-05, "loss": 0.6153, "step": 7181 }, { "epoch": 0.15231914487497614, "grad_norm": 0.31470444798469543, "learning_rate": 1.9722463876540646e-05, "loss": 0.5683, "step": 7182 }, { "epoch": 0.15234035333290916, "grad_norm": 0.3381914794445038, "learning_rate": 1.9722385846795983e-05, "loss": 0.6277, "step": 7183 }, { "epoch": 0.15236156179084218, "grad_norm": 0.3361406624317169, "learning_rate": 1.9722307806238165e-05, "loss": 0.4821, "step": 7184 }, { "epoch": 0.15238277024877522, "grad_norm": 0.3261617124080658, "learning_rate": 1.972222975486728e-05, "loss": 0.4909, "step": 7185 }, { "epoch": 0.15240397870670824, "grad_norm": 0.35688915848731995, "learning_rate": 1.9722151692683417e-05, "loss": 0.5563, "step": 7186 }, { "epoch": 0.15242518716464126, "grad_norm": 0.3800486922264099, "learning_rate": 1.9722073619686656e-05, "loss": 0.5491, "step": 7187 }, { "epoch": 0.15244639562257428, "grad_norm": 0.39889055490493774, "learning_rate": 1.9721995535877093e-05, "loss": 0.5959, "step": 7188 }, { "epoch": 0.1524676040805073, "grad_norm": 0.3249931037425995, "learning_rate": 1.9721917441254812e-05, "loss": 0.5087, "step": 7189 }, { "epoch": 0.15248881253844032, "grad_norm": 0.36297449469566345, "learning_rate": 1.9721839335819898e-05, "loss": 0.5307, "step": 7190 }, { "epoch": 0.15251002099637334, "grad_norm": 0.3492041826248169, "learning_rate": 1.9721761219572436e-05, "loss": 0.5184, "step": 7191 }, { "epoch": 0.1525312294543064, "grad_norm": 0.3495144546031952, "learning_rate": 1.9721683092512523e-05, "loss": 0.5406, "step": 7192 }, { "epoch": 0.1525524379122394, "grad_norm": 0.31716933846473694, "learning_rate": 1.9721604954640232e-05, "loss": 0.4797, "step": 7193 }, { "epoch": 0.15257364637017243, "grad_norm": 0.30950435996055603, "learning_rate": 1.9721526805955656e-05, "loss": 0.5382, "step": 7194 }, { "epoch": 0.15259485482810545, "grad_norm": 0.3130140006542206, "learning_rate": 1.9721448646458885e-05, "loss": 0.4783, "step": 7195 }, { "epoch": 0.15261606328603847, "grad_norm": 0.3154192566871643, "learning_rate": 1.9721370476150005e-05, "loss": 0.5487, "step": 7196 }, { "epoch": 0.1526372717439715, "grad_norm": 0.4052237570285797, "learning_rate": 1.97212922950291e-05, "loss": 0.4703, "step": 7197 }, { "epoch": 0.1526584802019045, "grad_norm": 0.3460741937160492, "learning_rate": 1.9721214103096254e-05, "loss": 0.5635, "step": 7198 }, { "epoch": 0.15267968865983755, "grad_norm": 0.3103505074977875, "learning_rate": 1.9721135900351563e-05, "loss": 0.4939, "step": 7199 }, { "epoch": 0.15270089711777057, "grad_norm": 0.417378693819046, "learning_rate": 1.972105768679511e-05, "loss": 0.5554, "step": 7200 }, { "epoch": 0.1527221055757036, "grad_norm": 0.3259357213973999, "learning_rate": 1.9720979462426974e-05, "loss": 0.5127, "step": 7201 }, { "epoch": 0.1527433140336366, "grad_norm": 0.298814982175827, "learning_rate": 1.9720901227247256e-05, "loss": 0.5303, "step": 7202 }, { "epoch": 0.15276452249156963, "grad_norm": 0.3713344931602478, "learning_rate": 1.9720822981256034e-05, "loss": 0.5443, "step": 7203 }, { "epoch": 0.15278573094950265, "grad_norm": 0.310269832611084, "learning_rate": 1.9720744724453395e-05, "loss": 0.514, "step": 7204 }, { "epoch": 0.1528069394074357, "grad_norm": 0.3214024305343628, "learning_rate": 1.9720666456839434e-05, "loss": 0.5518, "step": 7205 }, { "epoch": 0.15282814786536872, "grad_norm": 0.36668577790260315, "learning_rate": 1.9720588178414225e-05, "loss": 0.5715, "step": 7206 }, { "epoch": 0.15284935632330174, "grad_norm": 0.3188350796699524, "learning_rate": 1.9720509889177864e-05, "loss": 0.5317, "step": 7207 }, { "epoch": 0.15287056478123476, "grad_norm": 0.37276673316955566, "learning_rate": 1.972043158913044e-05, "loss": 0.6552, "step": 7208 }, { "epoch": 0.15289177323916778, "grad_norm": 0.5893368124961853, "learning_rate": 1.9720353278272034e-05, "loss": 0.4881, "step": 7209 }, { "epoch": 0.1529129816971008, "grad_norm": 0.33689337968826294, "learning_rate": 1.9720274956602738e-05, "loss": 0.4859, "step": 7210 }, { "epoch": 0.15293419015503382, "grad_norm": 0.34371164441108704, "learning_rate": 1.972019662412263e-05, "loss": 0.5736, "step": 7211 }, { "epoch": 0.15295539861296686, "grad_norm": 0.2860965132713318, "learning_rate": 1.972011828083181e-05, "loss": 0.4567, "step": 7212 }, { "epoch": 0.15297660707089988, "grad_norm": 0.3609355092048645, "learning_rate": 1.972003992673036e-05, "loss": 0.5256, "step": 7213 }, { "epoch": 0.1529978155288329, "grad_norm": 0.3498959541320801, "learning_rate": 1.971996156181836e-05, "loss": 0.5523, "step": 7214 }, { "epoch": 0.15301902398676592, "grad_norm": 0.3791692852973938, "learning_rate": 1.9719883186095907e-05, "loss": 0.5482, "step": 7215 }, { "epoch": 0.15304023244469894, "grad_norm": 0.32743313908576965, "learning_rate": 1.9719804799563084e-05, "loss": 0.5879, "step": 7216 }, { "epoch": 0.15306144090263196, "grad_norm": 0.30195629596710205, "learning_rate": 1.971972640221998e-05, "loss": 0.4888, "step": 7217 }, { "epoch": 0.15308264936056498, "grad_norm": 0.3241075873374939, "learning_rate": 1.9719647994066674e-05, "loss": 0.5322, "step": 7218 }, { "epoch": 0.15310385781849803, "grad_norm": 0.33477193117141724, "learning_rate": 1.9719569575103267e-05, "loss": 0.546, "step": 7219 }, { "epoch": 0.15312506627643105, "grad_norm": 0.3831222653388977, "learning_rate": 1.9719491145329836e-05, "loss": 0.5033, "step": 7220 }, { "epoch": 0.15314627473436407, "grad_norm": 0.3306438624858856, "learning_rate": 1.9719412704746475e-05, "loss": 0.5779, "step": 7221 }, { "epoch": 0.1531674831922971, "grad_norm": 0.30825474858283997, "learning_rate": 1.9719334253353263e-05, "loss": 0.5057, "step": 7222 }, { "epoch": 0.1531886916502301, "grad_norm": 0.3434178829193115, "learning_rate": 1.9719255791150297e-05, "loss": 0.4957, "step": 7223 }, { "epoch": 0.15320990010816313, "grad_norm": 0.3636077344417572, "learning_rate": 1.9719177318137653e-05, "loss": 0.4984, "step": 7224 }, { "epoch": 0.15323110856609615, "grad_norm": 0.33648020029067993, "learning_rate": 1.971909883431543e-05, "loss": 0.6119, "step": 7225 }, { "epoch": 0.1532523170240292, "grad_norm": 0.3708789050579071, "learning_rate": 1.9719020339683705e-05, "loss": 0.5076, "step": 7226 }, { "epoch": 0.15327352548196221, "grad_norm": 0.3008768856525421, "learning_rate": 1.971894183424257e-05, "loss": 0.5408, "step": 7227 }, { "epoch": 0.15329473393989523, "grad_norm": 0.31983840465545654, "learning_rate": 1.9718863317992117e-05, "loss": 0.5469, "step": 7228 }, { "epoch": 0.15331594239782825, "grad_norm": 0.34247976541519165, "learning_rate": 1.9718784790932425e-05, "loss": 0.5868, "step": 7229 }, { "epoch": 0.15333715085576127, "grad_norm": 0.3226839005947113, "learning_rate": 1.971870625306359e-05, "loss": 0.5269, "step": 7230 }, { "epoch": 0.1533583593136943, "grad_norm": 0.37176090478897095, "learning_rate": 1.971862770438569e-05, "loss": 0.5299, "step": 7231 }, { "epoch": 0.1533795677716273, "grad_norm": 0.3520998954772949, "learning_rate": 1.9718549144898814e-05, "loss": 0.5622, "step": 7232 }, { "epoch": 0.15340077622956036, "grad_norm": 0.3171077072620392, "learning_rate": 1.9718470574603054e-05, "loss": 0.5463, "step": 7233 }, { "epoch": 0.15342198468749338, "grad_norm": 0.43179991841316223, "learning_rate": 1.97183919934985e-05, "loss": 0.4819, "step": 7234 }, { "epoch": 0.1534431931454264, "grad_norm": 0.29448431730270386, "learning_rate": 1.9718313401585228e-05, "loss": 0.5779, "step": 7235 }, { "epoch": 0.15346440160335942, "grad_norm": 0.3239784836769104, "learning_rate": 1.9718234798863336e-05, "loss": 0.5641, "step": 7236 }, { "epoch": 0.15348561006129244, "grad_norm": 0.3196874260902405, "learning_rate": 1.971815618533291e-05, "loss": 0.5422, "step": 7237 }, { "epoch": 0.15350681851922546, "grad_norm": 0.3524302542209625, "learning_rate": 1.971807756099403e-05, "loss": 0.5865, "step": 7238 }, { "epoch": 0.15352802697715848, "grad_norm": 0.37376609444618225, "learning_rate": 1.9717998925846793e-05, "loss": 0.5971, "step": 7239 }, { "epoch": 0.15354923543509152, "grad_norm": 0.3228122591972351, "learning_rate": 1.9717920279891278e-05, "loss": 0.5286, "step": 7240 }, { "epoch": 0.15357044389302454, "grad_norm": 0.30938616394996643, "learning_rate": 1.9717841623127582e-05, "loss": 0.4851, "step": 7241 }, { "epoch": 0.15359165235095756, "grad_norm": 0.48471999168395996, "learning_rate": 1.9717762955555783e-05, "loss": 0.5011, "step": 7242 }, { "epoch": 0.15361286080889058, "grad_norm": 0.3434571325778961, "learning_rate": 1.9717684277175973e-05, "loss": 0.5335, "step": 7243 }, { "epoch": 0.1536340692668236, "grad_norm": 0.3240949511528015, "learning_rate": 1.971760558798824e-05, "loss": 0.4727, "step": 7244 }, { "epoch": 0.15365527772475662, "grad_norm": 0.4356628358364105, "learning_rate": 1.9717526887992665e-05, "loss": 0.6644, "step": 7245 }, { "epoch": 0.15367648618268967, "grad_norm": 0.33720508217811584, "learning_rate": 1.971744817718935e-05, "loss": 0.5191, "step": 7246 }, { "epoch": 0.1536976946406227, "grad_norm": 0.3316243290901184, "learning_rate": 1.9717369455578365e-05, "loss": 0.5387, "step": 7247 }, { "epoch": 0.1537189030985557, "grad_norm": 0.3175158202648163, "learning_rate": 1.971729072315981e-05, "loss": 0.5507, "step": 7248 }, { "epoch": 0.15374011155648873, "grad_norm": 0.3143053948879242, "learning_rate": 1.971721197993377e-05, "loss": 0.5352, "step": 7249 }, { "epoch": 0.15376132001442175, "grad_norm": 0.32487064599990845, "learning_rate": 1.971713322590033e-05, "loss": 0.501, "step": 7250 }, { "epoch": 0.15378252847235477, "grad_norm": 0.3564794957637787, "learning_rate": 1.971705446105958e-05, "loss": 0.4802, "step": 7251 }, { "epoch": 0.1538037369302878, "grad_norm": 0.4177689850330353, "learning_rate": 1.9716975685411606e-05, "loss": 0.5656, "step": 7252 }, { "epoch": 0.15382494538822084, "grad_norm": 0.40241920948028564, "learning_rate": 1.9716896898956494e-05, "loss": 0.5648, "step": 7253 }, { "epoch": 0.15384615384615385, "grad_norm": 0.2996871769428253, "learning_rate": 1.971681810169434e-05, "loss": 0.5553, "step": 7254 }, { "epoch": 0.15386736230408687, "grad_norm": 0.34895017743110657, "learning_rate": 1.971673929362522e-05, "loss": 0.5226, "step": 7255 }, { "epoch": 0.1538885707620199, "grad_norm": 0.3227180540561676, "learning_rate": 1.9716660474749222e-05, "loss": 0.5311, "step": 7256 }, { "epoch": 0.1539097792199529, "grad_norm": 0.3460502624511719, "learning_rate": 1.9716581645066447e-05, "loss": 0.5655, "step": 7257 }, { "epoch": 0.15393098767788593, "grad_norm": 0.3441792130470276, "learning_rate": 1.971650280457697e-05, "loss": 0.5547, "step": 7258 }, { "epoch": 0.15395219613581895, "grad_norm": 0.33846890926361084, "learning_rate": 1.9716423953280884e-05, "loss": 0.5153, "step": 7259 }, { "epoch": 0.153973404593752, "grad_norm": 0.34560611844062805, "learning_rate": 1.9716345091178275e-05, "loss": 0.5164, "step": 7260 }, { "epoch": 0.15399461305168502, "grad_norm": 0.3317185640335083, "learning_rate": 1.9716266218269233e-05, "loss": 0.5278, "step": 7261 }, { "epoch": 0.15401582150961804, "grad_norm": 0.33871012926101685, "learning_rate": 1.9716187334553844e-05, "loss": 0.619, "step": 7262 }, { "epoch": 0.15403702996755106, "grad_norm": 0.34579306840896606, "learning_rate": 1.9716108440032194e-05, "loss": 0.4987, "step": 7263 }, { "epoch": 0.15405823842548408, "grad_norm": 0.3330840766429901, "learning_rate": 1.9716029534704376e-05, "loss": 0.5601, "step": 7264 }, { "epoch": 0.1540794468834171, "grad_norm": 0.6284958124160767, "learning_rate": 1.971595061857047e-05, "loss": 0.5384, "step": 7265 }, { "epoch": 0.15410065534135012, "grad_norm": 0.367503821849823, "learning_rate": 1.9715871691630573e-05, "loss": 0.521, "step": 7266 }, { "epoch": 0.15412186379928317, "grad_norm": 0.3955592215061188, "learning_rate": 1.9715792753884765e-05, "loss": 0.5409, "step": 7267 }, { "epoch": 0.15414307225721618, "grad_norm": 0.300946980714798, "learning_rate": 1.9715713805333136e-05, "loss": 0.5373, "step": 7268 }, { "epoch": 0.1541642807151492, "grad_norm": 0.40840864181518555, "learning_rate": 1.9715634845975777e-05, "loss": 0.5297, "step": 7269 }, { "epoch": 0.15418548917308222, "grad_norm": 0.357509583234787, "learning_rate": 1.971555587581277e-05, "loss": 0.579, "step": 7270 }, { "epoch": 0.15420669763101524, "grad_norm": 0.3261517584323883, "learning_rate": 1.971547689484421e-05, "loss": 0.5137, "step": 7271 }, { "epoch": 0.15422790608894826, "grad_norm": 0.3307749927043915, "learning_rate": 1.9715397903070177e-05, "loss": 0.5487, "step": 7272 }, { "epoch": 0.15424911454688128, "grad_norm": 0.4675317406654358, "learning_rate": 1.9715318900490765e-05, "loss": 0.6007, "step": 7273 }, { "epoch": 0.15427032300481433, "grad_norm": 0.30514195561408997, "learning_rate": 1.971523988710606e-05, "loss": 0.5241, "step": 7274 }, { "epoch": 0.15429153146274735, "grad_norm": 0.31245648860931396, "learning_rate": 1.971516086291615e-05, "loss": 0.4767, "step": 7275 }, { "epoch": 0.15431273992068037, "grad_norm": 0.3189011812210083, "learning_rate": 1.971508182792112e-05, "loss": 0.5334, "step": 7276 }, { "epoch": 0.1543339483786134, "grad_norm": 0.3474385440349579, "learning_rate": 1.971500278212106e-05, "loss": 0.5417, "step": 7277 }, { "epoch": 0.1543551568365464, "grad_norm": 0.3526565134525299, "learning_rate": 1.9714923725516065e-05, "loss": 0.5398, "step": 7278 }, { "epoch": 0.15437636529447943, "grad_norm": 0.32989969849586487, "learning_rate": 1.9714844658106213e-05, "loss": 0.5354, "step": 7279 }, { "epoch": 0.15439757375241248, "grad_norm": 0.3564082682132721, "learning_rate": 1.971476557989159e-05, "loss": 0.5443, "step": 7280 }, { "epoch": 0.1544187822103455, "grad_norm": 0.36178410053253174, "learning_rate": 1.971468649087229e-05, "loss": 0.4786, "step": 7281 }, { "epoch": 0.15443999066827852, "grad_norm": 0.3398599922657013, "learning_rate": 1.9714607391048405e-05, "loss": 0.5452, "step": 7282 }, { "epoch": 0.15446119912621153, "grad_norm": 0.35531190037727356, "learning_rate": 1.9714528280420017e-05, "loss": 0.5551, "step": 7283 }, { "epoch": 0.15448240758414455, "grad_norm": 0.4487341344356537, "learning_rate": 1.9714449158987213e-05, "loss": 0.4709, "step": 7284 }, { "epoch": 0.15450361604207757, "grad_norm": 0.28780055046081543, "learning_rate": 1.9714370026750087e-05, "loss": 0.4825, "step": 7285 }, { "epoch": 0.1545248245000106, "grad_norm": 0.32296475768089294, "learning_rate": 1.9714290883708716e-05, "loss": 0.5036, "step": 7286 }, { "epoch": 0.15454603295794364, "grad_norm": 0.3208151161670685, "learning_rate": 1.9714211729863203e-05, "loss": 0.5331, "step": 7287 }, { "epoch": 0.15456724141587666, "grad_norm": 0.3383985757827759, "learning_rate": 1.9714132565213623e-05, "loss": 0.5653, "step": 7288 }, { "epoch": 0.15458844987380968, "grad_norm": 0.3141504228115082, "learning_rate": 1.971405338976007e-05, "loss": 0.5304, "step": 7289 }, { "epoch": 0.1546096583317427, "grad_norm": 0.2958756387233734, "learning_rate": 1.9713974203502634e-05, "loss": 0.5114, "step": 7290 }, { "epoch": 0.15463086678967572, "grad_norm": 0.7341009378433228, "learning_rate": 1.9713895006441395e-05, "loss": 0.4995, "step": 7291 }, { "epoch": 0.15465207524760874, "grad_norm": 0.3597865104675293, "learning_rate": 1.971381579857645e-05, "loss": 0.4804, "step": 7292 }, { "epoch": 0.15467328370554176, "grad_norm": 0.3623715341091156, "learning_rate": 1.9713736579907883e-05, "loss": 0.6035, "step": 7293 }, { "epoch": 0.1546944921634748, "grad_norm": 0.3062964081764221, "learning_rate": 1.9713657350435782e-05, "loss": 0.557, "step": 7294 }, { "epoch": 0.15471570062140783, "grad_norm": 0.3319087326526642, "learning_rate": 1.9713578110160237e-05, "loss": 0.5567, "step": 7295 }, { "epoch": 0.15473690907934085, "grad_norm": 0.3417074382305145, "learning_rate": 1.971349885908133e-05, "loss": 0.5431, "step": 7296 }, { "epoch": 0.15475811753727386, "grad_norm": 0.36039939522743225, "learning_rate": 1.9713419597199162e-05, "loss": 0.6222, "step": 7297 }, { "epoch": 0.15477932599520688, "grad_norm": 0.3255428373813629, "learning_rate": 1.971334032451381e-05, "loss": 0.5175, "step": 7298 }, { "epoch": 0.1548005344531399, "grad_norm": 0.34114086627960205, "learning_rate": 1.9713261041025363e-05, "loss": 0.5534, "step": 7299 }, { "epoch": 0.15482174291107292, "grad_norm": 0.33112531900405884, "learning_rate": 1.971318174673391e-05, "loss": 0.5881, "step": 7300 }, { "epoch": 0.15484295136900597, "grad_norm": 0.33278918266296387, "learning_rate": 1.9713102441639545e-05, "loss": 0.5444, "step": 7301 }, { "epoch": 0.154864159826939, "grad_norm": 0.3535918891429901, "learning_rate": 1.971302312574235e-05, "loss": 0.5548, "step": 7302 }, { "epoch": 0.154885368284872, "grad_norm": 0.3190385103225708, "learning_rate": 1.9712943799042415e-05, "loss": 0.5426, "step": 7303 }, { "epoch": 0.15490657674280503, "grad_norm": 0.3517399728298187, "learning_rate": 1.971286446153983e-05, "loss": 0.5185, "step": 7304 }, { "epoch": 0.15492778520073805, "grad_norm": 0.4012773633003235, "learning_rate": 1.971278511323468e-05, "loss": 0.5816, "step": 7305 }, { "epoch": 0.15494899365867107, "grad_norm": 0.4070347249507904, "learning_rate": 1.9712705754127052e-05, "loss": 0.4401, "step": 7306 }, { "epoch": 0.1549702021166041, "grad_norm": 0.31215280294418335, "learning_rate": 1.971262638421704e-05, "loss": 0.4916, "step": 7307 }, { "epoch": 0.15499141057453714, "grad_norm": 0.332275927066803, "learning_rate": 1.971254700350473e-05, "loss": 0.4805, "step": 7308 }, { "epoch": 0.15501261903247016, "grad_norm": 0.3092542290687561, "learning_rate": 1.9712467611990206e-05, "loss": 0.4865, "step": 7309 }, { "epoch": 0.15503382749040318, "grad_norm": 0.41042792797088623, "learning_rate": 1.9712388209673565e-05, "loss": 0.5532, "step": 7310 }, { "epoch": 0.1550550359483362, "grad_norm": 0.3157746493816376, "learning_rate": 1.971230879655489e-05, "loss": 0.497, "step": 7311 }, { "epoch": 0.15507624440626921, "grad_norm": 0.449204683303833, "learning_rate": 1.9712229372634263e-05, "loss": 0.5493, "step": 7312 }, { "epoch": 0.15509745286420223, "grad_norm": 0.31713250279426575, "learning_rate": 1.9712149937911784e-05, "loss": 0.5028, "step": 7313 }, { "epoch": 0.15511866132213525, "grad_norm": 0.3436022996902466, "learning_rate": 1.9712070492387535e-05, "loss": 0.5067, "step": 7314 }, { "epoch": 0.1551398697800683, "grad_norm": 0.3254148066043854, "learning_rate": 1.9711991036061607e-05, "loss": 0.5277, "step": 7315 }, { "epoch": 0.15516107823800132, "grad_norm": 0.42264407873153687, "learning_rate": 1.9711911568934087e-05, "loss": 0.5087, "step": 7316 }, { "epoch": 0.15518228669593434, "grad_norm": 0.3503553569316864, "learning_rate": 1.971183209100506e-05, "loss": 0.4566, "step": 7317 }, { "epoch": 0.15520349515386736, "grad_norm": 0.3413141667842865, "learning_rate": 1.971175260227462e-05, "loss": 0.5285, "step": 7318 }, { "epoch": 0.15522470361180038, "grad_norm": 0.33139705657958984, "learning_rate": 1.9711673102742853e-05, "loss": 0.4841, "step": 7319 }, { "epoch": 0.1552459120697334, "grad_norm": 0.3216284513473511, "learning_rate": 1.9711593592409846e-05, "loss": 0.5278, "step": 7320 }, { "epoch": 0.15526712052766645, "grad_norm": 0.31255394220352173, "learning_rate": 1.9711514071275692e-05, "loss": 0.4659, "step": 7321 }, { "epoch": 0.15528832898559947, "grad_norm": 0.36923497915267944, "learning_rate": 1.9711434539340476e-05, "loss": 0.5587, "step": 7322 }, { "epoch": 0.15530953744353249, "grad_norm": 0.34654179215431213, "learning_rate": 1.9711354996604285e-05, "loss": 0.4733, "step": 7323 }, { "epoch": 0.1553307459014655, "grad_norm": 0.33211567997932434, "learning_rate": 1.971127544306721e-05, "loss": 0.4867, "step": 7324 }, { "epoch": 0.15535195435939853, "grad_norm": 2.0224483013153076, "learning_rate": 1.971119587872934e-05, "loss": 0.6027, "step": 7325 }, { "epoch": 0.15537316281733154, "grad_norm": 0.31270715594291687, "learning_rate": 1.971111630359076e-05, "loss": 0.5105, "step": 7326 }, { "epoch": 0.15539437127526456, "grad_norm": 0.3659677803516388, "learning_rate": 1.9711036717651565e-05, "loss": 0.5622, "step": 7327 }, { "epoch": 0.1554155797331976, "grad_norm": 0.3444335162639618, "learning_rate": 1.9710957120911833e-05, "loss": 0.5408, "step": 7328 }, { "epoch": 0.15543678819113063, "grad_norm": 0.3696707487106323, "learning_rate": 1.971087751337166e-05, "loss": 0.5474, "step": 7329 }, { "epoch": 0.15545799664906365, "grad_norm": 0.3331206738948822, "learning_rate": 1.9710797895031136e-05, "loss": 0.4928, "step": 7330 }, { "epoch": 0.15547920510699667, "grad_norm": 0.38856983184814453, "learning_rate": 1.971071826589035e-05, "loss": 0.5932, "step": 7331 }, { "epoch": 0.1555004135649297, "grad_norm": 0.4566076993942261, "learning_rate": 1.9710638625949384e-05, "loss": 0.5466, "step": 7332 }, { "epoch": 0.1555216220228627, "grad_norm": 0.3600362241268158, "learning_rate": 1.971055897520833e-05, "loss": 0.57, "step": 7333 }, { "epoch": 0.15554283048079573, "grad_norm": 0.3544316291809082, "learning_rate": 1.9710479313667274e-05, "loss": 0.575, "step": 7334 }, { "epoch": 0.15556403893872878, "grad_norm": 0.3260807991027832, "learning_rate": 1.971039964132631e-05, "loss": 0.5898, "step": 7335 }, { "epoch": 0.1555852473966618, "grad_norm": 0.3442724645137787, "learning_rate": 1.9710319958185525e-05, "loss": 0.4671, "step": 7336 }, { "epoch": 0.15560645585459482, "grad_norm": 0.3232212960720062, "learning_rate": 1.9710240264245005e-05, "loss": 0.5162, "step": 7337 }, { "epoch": 0.15562766431252784, "grad_norm": 0.2974421977996826, "learning_rate": 1.9710160559504842e-05, "loss": 0.4656, "step": 7338 }, { "epoch": 0.15564887277046086, "grad_norm": 0.3117537498474121, "learning_rate": 1.971008084396512e-05, "loss": 0.5132, "step": 7339 }, { "epoch": 0.15567008122839388, "grad_norm": 0.34049010276794434, "learning_rate": 1.9710001117625933e-05, "loss": 0.6197, "step": 7340 }, { "epoch": 0.1556912896863269, "grad_norm": 0.29361969232559204, "learning_rate": 1.9709921380487366e-05, "loss": 0.5828, "step": 7341 }, { "epoch": 0.15571249814425994, "grad_norm": 0.33619940280914307, "learning_rate": 1.970984163254951e-05, "loss": 0.4976, "step": 7342 }, { "epoch": 0.15573370660219296, "grad_norm": 0.2860155999660492, "learning_rate": 1.970976187381245e-05, "loss": 0.5444, "step": 7343 }, { "epoch": 0.15575491506012598, "grad_norm": 0.35251009464263916, "learning_rate": 1.9709682104276277e-05, "loss": 0.5802, "step": 7344 }, { "epoch": 0.155776123518059, "grad_norm": 0.32984450459480286, "learning_rate": 1.9709602323941083e-05, "loss": 0.4291, "step": 7345 }, { "epoch": 0.15579733197599202, "grad_norm": 0.3363215923309326, "learning_rate": 1.970952253280695e-05, "loss": 0.5066, "step": 7346 }, { "epoch": 0.15581854043392504, "grad_norm": 0.34149378538131714, "learning_rate": 1.9709442730873972e-05, "loss": 0.4927, "step": 7347 }, { "epoch": 0.15583974889185806, "grad_norm": 0.48294976353645325, "learning_rate": 1.9709362918142235e-05, "loss": 0.3984, "step": 7348 }, { "epoch": 0.1558609573497911, "grad_norm": 0.3292936384677887, "learning_rate": 1.9709283094611833e-05, "loss": 0.5691, "step": 7349 }, { "epoch": 0.15588216580772413, "grad_norm": 0.33184701204299927, "learning_rate": 1.970920326028285e-05, "loss": 0.5432, "step": 7350 }, { "epoch": 0.15590337426565715, "grad_norm": 0.3748520016670227, "learning_rate": 1.970912341515537e-05, "loss": 0.5579, "step": 7351 }, { "epoch": 0.15592458272359017, "grad_norm": 0.3411487340927124, "learning_rate": 1.9709043559229493e-05, "loss": 0.5179, "step": 7352 }, { "epoch": 0.15594579118152319, "grad_norm": 0.34856218099594116, "learning_rate": 1.97089636925053e-05, "loss": 0.4777, "step": 7353 }, { "epoch": 0.1559669996394562, "grad_norm": 0.3330496847629547, "learning_rate": 1.970888381498288e-05, "loss": 0.4602, "step": 7354 }, { "epoch": 0.15598820809738925, "grad_norm": 0.3141564726829529, "learning_rate": 1.9708803926662327e-05, "loss": 0.4496, "step": 7355 }, { "epoch": 0.15600941655532227, "grad_norm": 0.3986707031726837, "learning_rate": 1.9708724027543725e-05, "loss": 0.5321, "step": 7356 }, { "epoch": 0.1560306250132553, "grad_norm": 0.30447518825531006, "learning_rate": 1.9708644117627164e-05, "loss": 0.4849, "step": 7357 }, { "epoch": 0.1560518334711883, "grad_norm": 0.3375810384750366, "learning_rate": 1.9708564196912735e-05, "loss": 0.5426, "step": 7358 }, { "epoch": 0.15607304192912133, "grad_norm": 0.3923947215080261, "learning_rate": 1.9708484265400524e-05, "loss": 0.5803, "step": 7359 }, { "epoch": 0.15609425038705435, "grad_norm": 0.2947390079498291, "learning_rate": 1.970840432309062e-05, "loss": 0.5281, "step": 7360 }, { "epoch": 0.15611545884498737, "grad_norm": 0.34817036986351013, "learning_rate": 1.9708324369983115e-05, "loss": 0.5997, "step": 7361 }, { "epoch": 0.15613666730292042, "grad_norm": 0.3412787914276123, "learning_rate": 1.9708244406078097e-05, "loss": 0.4998, "step": 7362 }, { "epoch": 0.15615787576085344, "grad_norm": 0.3306634724140167, "learning_rate": 1.9708164431375652e-05, "loss": 0.495, "step": 7363 }, { "epoch": 0.15617908421878646, "grad_norm": 0.4168315529823303, "learning_rate": 1.970808444587587e-05, "loss": 0.5604, "step": 7364 }, { "epoch": 0.15620029267671948, "grad_norm": 0.30424049496650696, "learning_rate": 1.970800444957884e-05, "loss": 0.4839, "step": 7365 }, { "epoch": 0.1562215011346525, "grad_norm": 0.34948745369911194, "learning_rate": 1.9707924442484655e-05, "loss": 0.5837, "step": 7366 }, { "epoch": 0.15624270959258552, "grad_norm": 0.3696560859680176, "learning_rate": 1.97078444245934e-05, "loss": 0.5538, "step": 7367 }, { "epoch": 0.15626391805051854, "grad_norm": 0.37985190749168396, "learning_rate": 1.9707764395905164e-05, "loss": 0.4827, "step": 7368 }, { "epoch": 0.15628512650845158, "grad_norm": 0.3343673348426819, "learning_rate": 1.9707684356420037e-05, "loss": 0.4596, "step": 7369 }, { "epoch": 0.1563063349663846, "grad_norm": 0.3304770588874817, "learning_rate": 1.970760430613811e-05, "loss": 0.523, "step": 7370 }, { "epoch": 0.15632754342431762, "grad_norm": 0.3218335807323456, "learning_rate": 1.9707524245059465e-05, "loss": 0.5304, "step": 7371 }, { "epoch": 0.15634875188225064, "grad_norm": 0.5614614486694336, "learning_rate": 1.97074441731842e-05, "loss": 0.5583, "step": 7372 }, { "epoch": 0.15636996034018366, "grad_norm": 0.31124359369277954, "learning_rate": 1.9707364090512398e-05, "loss": 0.4872, "step": 7373 }, { "epoch": 0.15639116879811668, "grad_norm": 0.3405967652797699, "learning_rate": 1.970728399704415e-05, "loss": 0.496, "step": 7374 }, { "epoch": 0.1564123772560497, "grad_norm": 0.5288668274879456, "learning_rate": 1.9707203892779545e-05, "loss": 0.5208, "step": 7375 }, { "epoch": 0.15643358571398275, "grad_norm": 0.33989572525024414, "learning_rate": 1.9707123777718675e-05, "loss": 0.4929, "step": 7376 }, { "epoch": 0.15645479417191577, "grad_norm": 0.5229209661483765, "learning_rate": 1.9707043651861625e-05, "loss": 0.5152, "step": 7377 }, { "epoch": 0.1564760026298488, "grad_norm": 0.44468799233436584, "learning_rate": 1.9706963515208483e-05, "loss": 0.5827, "step": 7378 }, { "epoch": 0.1564972110877818, "grad_norm": 0.3438912034034729, "learning_rate": 1.9706883367759344e-05, "loss": 0.511, "step": 7379 }, { "epoch": 0.15651841954571483, "grad_norm": 0.330142080783844, "learning_rate": 1.9706803209514288e-05, "loss": 0.5467, "step": 7380 }, { "epoch": 0.15653962800364785, "grad_norm": 0.3383365571498871, "learning_rate": 1.9706723040473415e-05, "loss": 0.5034, "step": 7381 }, { "epoch": 0.15656083646158087, "grad_norm": 0.3399238586425781, "learning_rate": 1.9706642860636808e-05, "loss": 0.5372, "step": 7382 }, { "epoch": 0.1565820449195139, "grad_norm": 0.3855666220188141, "learning_rate": 1.9706562670004553e-05, "loss": 0.5047, "step": 7383 }, { "epoch": 0.15660325337744693, "grad_norm": 0.3797509968280792, "learning_rate": 1.970648246857675e-05, "loss": 0.4611, "step": 7384 }, { "epoch": 0.15662446183537995, "grad_norm": 0.32935672998428345, "learning_rate": 1.9706402256353475e-05, "loss": 0.5433, "step": 7385 }, { "epoch": 0.15664567029331297, "grad_norm": 0.3282531201839447, "learning_rate": 1.9706322033334827e-05, "loss": 0.5184, "step": 7386 }, { "epoch": 0.156666878751246, "grad_norm": 0.33318427205085754, "learning_rate": 1.9706241799520894e-05, "loss": 0.5552, "step": 7387 }, { "epoch": 0.156688087209179, "grad_norm": 0.32530373334884644, "learning_rate": 1.970616155491176e-05, "loss": 0.5842, "step": 7388 }, { "epoch": 0.15670929566711203, "grad_norm": 0.35885465145111084, "learning_rate": 1.970608129950752e-05, "loss": 0.4639, "step": 7389 }, { "epoch": 0.15673050412504508, "grad_norm": 0.3272004723548889, "learning_rate": 1.9706001033308256e-05, "loss": 0.5208, "step": 7390 }, { "epoch": 0.1567517125829781, "grad_norm": 0.37975114583969116, "learning_rate": 1.9705920756314067e-05, "loss": 0.6466, "step": 7391 }, { "epoch": 0.15677292104091112, "grad_norm": 0.302998423576355, "learning_rate": 1.9705840468525036e-05, "loss": 0.4351, "step": 7392 }, { "epoch": 0.15679412949884414, "grad_norm": 0.32166776061058044, "learning_rate": 1.970576016994125e-05, "loss": 0.4576, "step": 7393 }, { "epoch": 0.15681533795677716, "grad_norm": 0.34258997440338135, "learning_rate": 1.9705679860562807e-05, "loss": 0.5341, "step": 7394 }, { "epoch": 0.15683654641471018, "grad_norm": 0.32013750076293945, "learning_rate": 1.970559954038979e-05, "loss": 0.425, "step": 7395 }, { "epoch": 0.15685775487264322, "grad_norm": 0.3166458308696747, "learning_rate": 1.9705519209422288e-05, "loss": 0.5664, "step": 7396 }, { "epoch": 0.15687896333057624, "grad_norm": 0.30239182710647583, "learning_rate": 1.970543886766039e-05, "loss": 0.4947, "step": 7397 }, { "epoch": 0.15690017178850926, "grad_norm": 0.5955783128738403, "learning_rate": 1.9705358515104192e-05, "loss": 0.5707, "step": 7398 }, { "epoch": 0.15692138024644228, "grad_norm": 0.38542866706848145, "learning_rate": 1.9705278151753778e-05, "loss": 0.5822, "step": 7399 }, { "epoch": 0.1569425887043753, "grad_norm": 0.35812729597091675, "learning_rate": 1.9705197777609233e-05, "loss": 0.5758, "step": 7400 }, { "epoch": 0.15696379716230832, "grad_norm": 0.35339757800102234, "learning_rate": 1.9705117392670656e-05, "loss": 0.4903, "step": 7401 }, { "epoch": 0.15698500562024134, "grad_norm": 0.333144873380661, "learning_rate": 1.9705036996938128e-05, "loss": 0.4882, "step": 7402 }, { "epoch": 0.1570062140781744, "grad_norm": 0.5544474720954895, "learning_rate": 1.9704956590411744e-05, "loss": 0.4858, "step": 7403 }, { "epoch": 0.1570274225361074, "grad_norm": 0.3632897734642029, "learning_rate": 1.9704876173091595e-05, "loss": 0.5212, "step": 7404 }, { "epoch": 0.15704863099404043, "grad_norm": 0.3916466236114502, "learning_rate": 1.9704795744977764e-05, "loss": 0.4981, "step": 7405 }, { "epoch": 0.15706983945197345, "grad_norm": 0.31431815028190613, "learning_rate": 1.9704715306070343e-05, "loss": 0.4555, "step": 7406 }, { "epoch": 0.15709104790990647, "grad_norm": 0.3365709185600281, "learning_rate": 1.970463485636942e-05, "loss": 0.5444, "step": 7407 }, { "epoch": 0.1571122563678395, "grad_norm": 0.3395942151546478, "learning_rate": 1.970455439587509e-05, "loss": 0.5554, "step": 7408 }, { "epoch": 0.1571334648257725, "grad_norm": 0.3657262921333313, "learning_rate": 1.9704473924587437e-05, "loss": 0.4822, "step": 7409 }, { "epoch": 0.15715467328370555, "grad_norm": 0.370055615901947, "learning_rate": 1.9704393442506554e-05, "loss": 0.6293, "step": 7410 }, { "epoch": 0.15717588174163857, "grad_norm": 0.3523937165737152, "learning_rate": 1.9704312949632527e-05, "loss": 0.5245, "step": 7411 }, { "epoch": 0.1571970901995716, "grad_norm": 0.33041468262672424, "learning_rate": 1.9704232445965446e-05, "loss": 0.5247, "step": 7412 }, { "epoch": 0.1572182986575046, "grad_norm": 0.4346459209918976, "learning_rate": 1.9704151931505408e-05, "loss": 0.4973, "step": 7413 }, { "epoch": 0.15723950711543763, "grad_norm": 0.3753754794597626, "learning_rate": 1.9704071406252493e-05, "loss": 0.5458, "step": 7414 }, { "epoch": 0.15726071557337065, "grad_norm": 0.3736540675163269, "learning_rate": 1.9703990870206792e-05, "loss": 0.5454, "step": 7415 }, { "epoch": 0.15728192403130367, "grad_norm": 0.30046817660331726, "learning_rate": 1.97039103233684e-05, "loss": 0.4086, "step": 7416 }, { "epoch": 0.15730313248923672, "grad_norm": 0.3325607478618622, "learning_rate": 1.97038297657374e-05, "loss": 0.578, "step": 7417 }, { "epoch": 0.15732434094716974, "grad_norm": 0.29699525237083435, "learning_rate": 1.970374919731389e-05, "loss": 0.5037, "step": 7418 }, { "epoch": 0.15734554940510276, "grad_norm": 0.3722735643386841, "learning_rate": 1.970366861809795e-05, "loss": 0.6161, "step": 7419 }, { "epoch": 0.15736675786303578, "grad_norm": 0.33971142768859863, "learning_rate": 1.9703588028089675e-05, "loss": 0.4995, "step": 7420 }, { "epoch": 0.1573879663209688, "grad_norm": 0.3530440032482147, "learning_rate": 1.9703507427289153e-05, "loss": 0.5876, "step": 7421 }, { "epoch": 0.15740917477890182, "grad_norm": 0.4730372726917267, "learning_rate": 1.9703426815696475e-05, "loss": 0.4588, "step": 7422 }, { "epoch": 0.15743038323683484, "grad_norm": 0.3408415615558624, "learning_rate": 1.970334619331173e-05, "loss": 0.5176, "step": 7423 }, { "epoch": 0.15745159169476788, "grad_norm": 0.36029213666915894, "learning_rate": 1.970326556013501e-05, "loss": 0.5919, "step": 7424 }, { "epoch": 0.1574728001527009, "grad_norm": 0.32861554622650146, "learning_rate": 1.97031849161664e-05, "loss": 0.5148, "step": 7425 }, { "epoch": 0.15749400861063392, "grad_norm": 0.31728875637054443, "learning_rate": 1.970310426140599e-05, "loss": 0.5795, "step": 7426 }, { "epoch": 0.15751521706856694, "grad_norm": 0.3560570776462555, "learning_rate": 1.9703023595853876e-05, "loss": 0.5527, "step": 7427 }, { "epoch": 0.15753642552649996, "grad_norm": 0.313997745513916, "learning_rate": 1.970294291951014e-05, "loss": 0.5333, "step": 7428 }, { "epoch": 0.15755763398443298, "grad_norm": 0.3284395635128021, "learning_rate": 1.9702862232374877e-05, "loss": 0.5494, "step": 7429 }, { "epoch": 0.15757884244236603, "grad_norm": 0.3199906647205353, "learning_rate": 1.9702781534448176e-05, "loss": 0.5326, "step": 7430 }, { "epoch": 0.15760005090029905, "grad_norm": 0.306755930185318, "learning_rate": 1.9702700825730123e-05, "loss": 0.5149, "step": 7431 }, { "epoch": 0.15762125935823207, "grad_norm": 0.3107220232486725, "learning_rate": 1.970262010622081e-05, "loss": 0.5929, "step": 7432 }, { "epoch": 0.1576424678161651, "grad_norm": 0.3158741891384125, "learning_rate": 1.970253937592033e-05, "loss": 0.5552, "step": 7433 }, { "epoch": 0.1576636762740981, "grad_norm": 0.32984980940818787, "learning_rate": 1.970245863482877e-05, "loss": 0.5239, "step": 7434 }, { "epoch": 0.15768488473203113, "grad_norm": 0.30227130651474, "learning_rate": 1.9702377882946216e-05, "loss": 0.5216, "step": 7435 }, { "epoch": 0.15770609318996415, "grad_norm": 0.34688130021095276, "learning_rate": 1.9702297120272767e-05, "loss": 0.526, "step": 7436 }, { "epoch": 0.1577273016478972, "grad_norm": 0.31769153475761414, "learning_rate": 1.9702216346808505e-05, "loss": 0.5264, "step": 7437 }, { "epoch": 0.15774851010583021, "grad_norm": 0.32687604427337646, "learning_rate": 1.970213556255352e-05, "loss": 0.5147, "step": 7438 }, { "epoch": 0.15776971856376323, "grad_norm": 0.638216495513916, "learning_rate": 1.9702054767507907e-05, "loss": 0.5791, "step": 7439 }, { "epoch": 0.15779092702169625, "grad_norm": 0.3525645434856415, "learning_rate": 1.9701973961671753e-05, "loss": 0.5083, "step": 7440 }, { "epoch": 0.15781213547962927, "grad_norm": 0.3382490575313568, "learning_rate": 1.9701893145045145e-05, "loss": 0.5925, "step": 7441 }, { "epoch": 0.1578333439375623, "grad_norm": 0.36845293641090393, "learning_rate": 1.9701812317628178e-05, "loss": 0.452, "step": 7442 }, { "epoch": 0.1578545523954953, "grad_norm": 0.34682080149650574, "learning_rate": 1.970173147942094e-05, "loss": 0.481, "step": 7443 }, { "epoch": 0.15787576085342836, "grad_norm": 0.3833986520767212, "learning_rate": 1.970165063042352e-05, "loss": 0.573, "step": 7444 }, { "epoch": 0.15789696931136138, "grad_norm": 0.3128008246421814, "learning_rate": 1.9701569770636006e-05, "loss": 0.5404, "step": 7445 }, { "epoch": 0.1579181777692944, "grad_norm": 0.4195975959300995, "learning_rate": 1.9701488900058495e-05, "loss": 0.5542, "step": 7446 }, { "epoch": 0.15793938622722742, "grad_norm": 0.44449079036712646, "learning_rate": 1.9701408018691072e-05, "loss": 0.5097, "step": 7447 }, { "epoch": 0.15796059468516044, "grad_norm": 0.37428978085517883, "learning_rate": 1.970132712653382e-05, "loss": 0.5927, "step": 7448 }, { "epoch": 0.15798180314309346, "grad_norm": 0.3198349177837372, "learning_rate": 1.9701246223586845e-05, "loss": 0.4555, "step": 7449 }, { "epoch": 0.15800301160102648, "grad_norm": 0.4153670370578766, "learning_rate": 1.9701165309850223e-05, "loss": 0.6037, "step": 7450 }, { "epoch": 0.15802422005895952, "grad_norm": 0.31828317046165466, "learning_rate": 1.970108438532405e-05, "loss": 0.5123, "step": 7451 }, { "epoch": 0.15804542851689254, "grad_norm": 0.3283945918083191, "learning_rate": 1.9701003450008416e-05, "loss": 0.5356, "step": 7452 }, { "epoch": 0.15806663697482556, "grad_norm": 0.31543076038360596, "learning_rate": 1.9700922503903414e-05, "loss": 0.4793, "step": 7453 }, { "epoch": 0.15808784543275858, "grad_norm": 0.3265003263950348, "learning_rate": 1.9700841547009125e-05, "loss": 0.4911, "step": 7454 }, { "epoch": 0.1581090538906916, "grad_norm": 0.3205181360244751, "learning_rate": 1.9700760579325643e-05, "loss": 0.4912, "step": 7455 }, { "epoch": 0.15813026234862462, "grad_norm": 0.36827588081359863, "learning_rate": 1.9700679600853062e-05, "loss": 0.5254, "step": 7456 }, { "epoch": 0.15815147080655764, "grad_norm": 0.3277740180492401, "learning_rate": 1.9700598611591473e-05, "loss": 0.5567, "step": 7457 }, { "epoch": 0.1581726792644907, "grad_norm": 0.4486832320690155, "learning_rate": 1.9700517611540958e-05, "loss": 0.4817, "step": 7458 }, { "epoch": 0.1581938877224237, "grad_norm": 0.32357802987098694, "learning_rate": 1.9700436600701614e-05, "loss": 0.4839, "step": 7459 }, { "epoch": 0.15821509618035673, "grad_norm": 0.3109445869922638, "learning_rate": 1.9700355579073528e-05, "loss": 0.4887, "step": 7460 }, { "epoch": 0.15823630463828975, "grad_norm": 0.3407083749771118, "learning_rate": 1.970027454665679e-05, "loss": 0.448, "step": 7461 }, { "epoch": 0.15825751309622277, "grad_norm": 0.3077017068862915, "learning_rate": 1.9700193503451487e-05, "loss": 0.5095, "step": 7462 }, { "epoch": 0.1582787215541558, "grad_norm": 0.35540318489074707, "learning_rate": 1.970011244945772e-05, "loss": 0.6117, "step": 7463 }, { "epoch": 0.1582999300120888, "grad_norm": 0.33337637782096863, "learning_rate": 1.9700031384675567e-05, "loss": 0.521, "step": 7464 }, { "epoch": 0.15832113847002185, "grad_norm": 0.33017992973327637, "learning_rate": 1.9699950309105126e-05, "loss": 0.47, "step": 7465 }, { "epoch": 0.15834234692795487, "grad_norm": 0.44075486063957214, "learning_rate": 1.9699869222746483e-05, "loss": 0.5624, "step": 7466 }, { "epoch": 0.1583635553858879, "grad_norm": 0.5982236862182617, "learning_rate": 1.9699788125599734e-05, "loss": 0.4399, "step": 7467 }, { "epoch": 0.1583847638438209, "grad_norm": 0.32958975434303284, "learning_rate": 1.9699707017664963e-05, "loss": 0.4279, "step": 7468 }, { "epoch": 0.15840597230175393, "grad_norm": 0.3176177442073822, "learning_rate": 1.969962589894226e-05, "loss": 0.4952, "step": 7469 }, { "epoch": 0.15842718075968695, "grad_norm": 0.3216746747493744, "learning_rate": 1.969954476943172e-05, "loss": 0.499, "step": 7470 }, { "epoch": 0.15844838921762, "grad_norm": 0.3445594012737274, "learning_rate": 1.9699463629133426e-05, "loss": 0.51, "step": 7471 }, { "epoch": 0.15846959767555302, "grad_norm": 0.32064566016197205, "learning_rate": 1.9699382478047476e-05, "loss": 0.4859, "step": 7472 }, { "epoch": 0.15849080613348604, "grad_norm": 0.39517566561698914, "learning_rate": 1.9699301316173956e-05, "loss": 0.5031, "step": 7473 }, { "epoch": 0.15851201459141906, "grad_norm": 0.3339828550815582, "learning_rate": 1.969922014351296e-05, "loss": 0.4669, "step": 7474 }, { "epoch": 0.15853322304935208, "grad_norm": 0.36239734292030334, "learning_rate": 1.969913896006457e-05, "loss": 0.416, "step": 7475 }, { "epoch": 0.1585544315072851, "grad_norm": 0.3643595278263092, "learning_rate": 1.969905776582889e-05, "loss": 0.4941, "step": 7476 }, { "epoch": 0.15857563996521812, "grad_norm": 0.3503589928150177, "learning_rate": 1.9698976560805996e-05, "loss": 0.5698, "step": 7477 }, { "epoch": 0.15859684842315117, "grad_norm": 0.3785288333892822, "learning_rate": 1.9698895344995986e-05, "loss": 0.6164, "step": 7478 }, { "epoch": 0.15861805688108419, "grad_norm": 0.2983333468437195, "learning_rate": 1.9698814118398947e-05, "loss": 0.4806, "step": 7479 }, { "epoch": 0.1586392653390172, "grad_norm": 0.3644614517688751, "learning_rate": 1.9698732881014977e-05, "loss": 0.6647, "step": 7480 }, { "epoch": 0.15866047379695022, "grad_norm": 0.3002853989601135, "learning_rate": 1.9698651632844154e-05, "loss": 0.5315, "step": 7481 }, { "epoch": 0.15868168225488324, "grad_norm": 0.32319945096969604, "learning_rate": 1.969857037388658e-05, "loss": 0.5539, "step": 7482 }, { "epoch": 0.15870289071281626, "grad_norm": 0.33236613869667053, "learning_rate": 1.9698489104142336e-05, "loss": 0.4312, "step": 7483 }, { "epoch": 0.15872409917074928, "grad_norm": 0.31082695722579956, "learning_rate": 1.969840782361152e-05, "loss": 0.527, "step": 7484 }, { "epoch": 0.15874530762868233, "grad_norm": 0.35586193203926086, "learning_rate": 1.969832653229422e-05, "loss": 0.4803, "step": 7485 }, { "epoch": 0.15876651608661535, "grad_norm": 0.30036628246307373, "learning_rate": 1.969824523019052e-05, "loss": 0.4791, "step": 7486 }, { "epoch": 0.15878772454454837, "grad_norm": 0.34362131357192993, "learning_rate": 1.969816391730052e-05, "loss": 0.573, "step": 7487 }, { "epoch": 0.1588089330024814, "grad_norm": 0.3367610573768616, "learning_rate": 1.969808259362431e-05, "loss": 0.5647, "step": 7488 }, { "epoch": 0.1588301414604144, "grad_norm": 0.34603753685951233, "learning_rate": 1.969800125916197e-05, "loss": 0.4952, "step": 7489 }, { "epoch": 0.15885134991834743, "grad_norm": 0.32232844829559326, "learning_rate": 1.96979199139136e-05, "loss": 0.568, "step": 7490 }, { "epoch": 0.15887255837628045, "grad_norm": 0.3354809880256653, "learning_rate": 1.9697838557879288e-05, "loss": 0.5519, "step": 7491 }, { "epoch": 0.1588937668342135, "grad_norm": 0.33047521114349365, "learning_rate": 1.9697757191059122e-05, "loss": 0.5585, "step": 7492 }, { "epoch": 0.15891497529214652, "grad_norm": 0.3218998312950134, "learning_rate": 1.9697675813453198e-05, "loss": 0.47, "step": 7493 }, { "epoch": 0.15893618375007953, "grad_norm": 0.3460397720336914, "learning_rate": 1.96975944250616e-05, "loss": 0.5159, "step": 7494 }, { "epoch": 0.15895739220801255, "grad_norm": 0.32202988862991333, "learning_rate": 1.9697513025884424e-05, "loss": 0.4645, "step": 7495 }, { "epoch": 0.15897860066594557, "grad_norm": 0.29496461153030396, "learning_rate": 1.9697431615921756e-05, "loss": 0.5071, "step": 7496 }, { "epoch": 0.1589998091238786, "grad_norm": 0.3521040380001068, "learning_rate": 1.969735019517369e-05, "loss": 0.5765, "step": 7497 }, { "epoch": 0.1590210175818116, "grad_norm": 0.3082714080810547, "learning_rate": 1.9697268763640315e-05, "loss": 0.526, "step": 7498 }, { "epoch": 0.15904222603974466, "grad_norm": 0.3318830132484436, "learning_rate": 1.969718732132172e-05, "loss": 0.5731, "step": 7499 }, { "epoch": 0.15906343449767768, "grad_norm": 0.3147427439689636, "learning_rate": 1.9697105868218002e-05, "loss": 0.5131, "step": 7500 }, { "epoch": 0.1590846429556107, "grad_norm": 0.39475756883621216, "learning_rate": 1.9697024404329246e-05, "loss": 0.5641, "step": 7501 }, { "epoch": 0.15910585141354372, "grad_norm": 0.4097055494785309, "learning_rate": 1.9696942929655542e-05, "loss": 0.6244, "step": 7502 }, { "epoch": 0.15912705987147674, "grad_norm": 0.33562496304512024, "learning_rate": 1.9696861444196984e-05, "loss": 0.5236, "step": 7503 }, { "epoch": 0.15914826832940976, "grad_norm": 0.35745057463645935, "learning_rate": 1.969677994795366e-05, "loss": 0.5142, "step": 7504 }, { "epoch": 0.15916947678734278, "grad_norm": 0.39466142654418945, "learning_rate": 1.9696698440925662e-05, "loss": 0.5857, "step": 7505 }, { "epoch": 0.15919068524527583, "grad_norm": 0.7457078099250793, "learning_rate": 1.9696616923113078e-05, "loss": 0.4175, "step": 7506 }, { "epoch": 0.15921189370320885, "grad_norm": 0.3490907549858093, "learning_rate": 1.9696535394516006e-05, "loss": 0.5587, "step": 7507 }, { "epoch": 0.15923310216114187, "grad_norm": 0.30801430344581604, "learning_rate": 1.9696453855134528e-05, "loss": 0.5234, "step": 7508 }, { "epoch": 0.15925431061907488, "grad_norm": 0.31199973821640015, "learning_rate": 1.9696372304968738e-05, "loss": 0.5483, "step": 7509 }, { "epoch": 0.1592755190770079, "grad_norm": 0.30663490295410156, "learning_rate": 1.9696290744018727e-05, "loss": 0.5167, "step": 7510 }, { "epoch": 0.15929672753494092, "grad_norm": 0.45531272888183594, "learning_rate": 1.9696209172284585e-05, "loss": 0.556, "step": 7511 }, { "epoch": 0.15931793599287397, "grad_norm": 0.32603681087493896, "learning_rate": 1.9696127589766405e-05, "loss": 0.4798, "step": 7512 }, { "epoch": 0.159339144450807, "grad_norm": 0.3371431231498718, "learning_rate": 1.9696045996464276e-05, "loss": 0.4315, "step": 7513 }, { "epoch": 0.15936035290874, "grad_norm": 0.31487444043159485, "learning_rate": 1.969596439237829e-05, "loss": 0.5408, "step": 7514 }, { "epoch": 0.15938156136667303, "grad_norm": 0.2958737015724182, "learning_rate": 1.9695882777508533e-05, "loss": 0.4793, "step": 7515 }, { "epoch": 0.15940276982460605, "grad_norm": 0.3150826096534729, "learning_rate": 1.9695801151855103e-05, "loss": 0.4509, "step": 7516 }, { "epoch": 0.15942397828253907, "grad_norm": 0.3630463778972626, "learning_rate": 1.9695719515418085e-05, "loss": 0.606, "step": 7517 }, { "epoch": 0.1594451867404721, "grad_norm": 0.30137357115745544, "learning_rate": 1.9695637868197573e-05, "loss": 0.4771, "step": 7518 }, { "epoch": 0.15946639519840514, "grad_norm": 0.33937180042266846, "learning_rate": 1.9695556210193656e-05, "loss": 0.5616, "step": 7519 }, { "epoch": 0.15948760365633816, "grad_norm": 0.47027623653411865, "learning_rate": 1.9695474541406425e-05, "loss": 0.4864, "step": 7520 }, { "epoch": 0.15950881211427118, "grad_norm": 0.39692285656929016, "learning_rate": 1.9695392861835976e-05, "loss": 0.5368, "step": 7521 }, { "epoch": 0.1595300205722042, "grad_norm": 0.3174249827861786, "learning_rate": 1.969531117148239e-05, "loss": 0.5427, "step": 7522 }, { "epoch": 0.15955122903013721, "grad_norm": 0.32726791501045227, "learning_rate": 1.9695229470345764e-05, "loss": 0.502, "step": 7523 }, { "epoch": 0.15957243748807023, "grad_norm": 0.33465588092803955, "learning_rate": 1.969514775842619e-05, "loss": 0.5129, "step": 7524 }, { "epoch": 0.15959364594600325, "grad_norm": 0.3111109733581543, "learning_rate": 1.9695066035723758e-05, "loss": 0.4472, "step": 7525 }, { "epoch": 0.1596148544039363, "grad_norm": 0.34407860040664673, "learning_rate": 1.9694984302238554e-05, "loss": 0.5238, "step": 7526 }, { "epoch": 0.15963606286186932, "grad_norm": 0.32398292422294617, "learning_rate": 1.9694902557970674e-05, "loss": 0.5544, "step": 7527 }, { "epoch": 0.15965727131980234, "grad_norm": 0.33559650182724, "learning_rate": 1.9694820802920208e-05, "loss": 0.5087, "step": 7528 }, { "epoch": 0.15967847977773536, "grad_norm": 0.3319176137447357, "learning_rate": 1.9694739037087246e-05, "loss": 0.4058, "step": 7529 }, { "epoch": 0.15969968823566838, "grad_norm": 1.088135004043579, "learning_rate": 1.969465726047188e-05, "loss": 0.5884, "step": 7530 }, { "epoch": 0.1597208966936014, "grad_norm": 0.36187732219696045, "learning_rate": 1.96945754730742e-05, "loss": 0.5995, "step": 7531 }, { "epoch": 0.15974210515153442, "grad_norm": 0.3344414234161377, "learning_rate": 1.9694493674894297e-05, "loss": 0.4793, "step": 7532 }, { "epoch": 0.15976331360946747, "grad_norm": 0.29883140325546265, "learning_rate": 1.9694411865932267e-05, "loss": 0.5215, "step": 7533 }, { "epoch": 0.1597845220674005, "grad_norm": 0.3613136410713196, "learning_rate": 1.969433004618819e-05, "loss": 0.4757, "step": 7534 }, { "epoch": 0.1598057305253335, "grad_norm": 0.33697086572647095, "learning_rate": 1.9694248215662168e-05, "loss": 0.5947, "step": 7535 }, { "epoch": 0.15982693898326653, "grad_norm": 0.32411330938339233, "learning_rate": 1.9694166374354284e-05, "loss": 0.5554, "step": 7536 }, { "epoch": 0.15984814744119955, "grad_norm": 0.30748772621154785, "learning_rate": 1.9694084522264633e-05, "loss": 0.4874, "step": 7537 }, { "epoch": 0.15986935589913256, "grad_norm": 0.31655579805374146, "learning_rate": 1.9694002659393306e-05, "loss": 0.4998, "step": 7538 }, { "epoch": 0.15989056435706558, "grad_norm": 0.36600494384765625, "learning_rate": 1.9693920785740396e-05, "loss": 0.5384, "step": 7539 }, { "epoch": 0.15991177281499863, "grad_norm": 0.6689257621765137, "learning_rate": 1.9693838901305985e-05, "loss": 0.5338, "step": 7540 }, { "epoch": 0.15993298127293165, "grad_norm": 0.2967382073402405, "learning_rate": 1.9693757006090174e-05, "loss": 0.5088, "step": 7541 }, { "epoch": 0.15995418973086467, "grad_norm": 0.39538469910621643, "learning_rate": 1.9693675100093052e-05, "loss": 0.5698, "step": 7542 }, { "epoch": 0.1599753981887977, "grad_norm": 0.32505708932876587, "learning_rate": 1.969359318331471e-05, "loss": 0.5176, "step": 7543 }, { "epoch": 0.1599966066467307, "grad_norm": 0.34523895382881165, "learning_rate": 1.9693511255755236e-05, "loss": 0.5514, "step": 7544 }, { "epoch": 0.16001781510466373, "grad_norm": 0.3209712505340576, "learning_rate": 1.969342931741472e-05, "loss": 0.4931, "step": 7545 }, { "epoch": 0.16003902356259678, "grad_norm": 0.49407580494880676, "learning_rate": 1.969334736829326e-05, "loss": 0.4988, "step": 7546 }, { "epoch": 0.1600602320205298, "grad_norm": 0.32037556171417236, "learning_rate": 1.9693265408390943e-05, "loss": 0.4507, "step": 7547 }, { "epoch": 0.16008144047846282, "grad_norm": 0.3041698634624481, "learning_rate": 1.969318343770786e-05, "loss": 0.541, "step": 7548 }, { "epoch": 0.16010264893639584, "grad_norm": 0.31764447689056396, "learning_rate": 1.96931014562441e-05, "loss": 0.5126, "step": 7549 }, { "epoch": 0.16012385739432886, "grad_norm": 0.35140135884284973, "learning_rate": 1.969301946399976e-05, "loss": 0.6076, "step": 7550 }, { "epoch": 0.16014506585226188, "grad_norm": 0.31951525807380676, "learning_rate": 1.9692937460974927e-05, "loss": 0.5359, "step": 7551 }, { "epoch": 0.1601662743101949, "grad_norm": 0.3256804347038269, "learning_rate": 1.9692855447169692e-05, "loss": 0.5206, "step": 7552 }, { "epoch": 0.16018748276812794, "grad_norm": 0.3280188739299774, "learning_rate": 1.969277342258415e-05, "loss": 0.5412, "step": 7553 }, { "epoch": 0.16020869122606096, "grad_norm": 0.2937008738517761, "learning_rate": 1.969269138721839e-05, "loss": 0.458, "step": 7554 }, { "epoch": 0.16022989968399398, "grad_norm": 0.3830616772174835, "learning_rate": 1.9692609341072497e-05, "loss": 0.5493, "step": 7555 }, { "epoch": 0.160251108141927, "grad_norm": 0.35102832317352295, "learning_rate": 1.9692527284146572e-05, "loss": 0.4781, "step": 7556 }, { "epoch": 0.16027231659986002, "grad_norm": 0.3228001296520233, "learning_rate": 1.9692445216440704e-05, "loss": 0.4725, "step": 7557 }, { "epoch": 0.16029352505779304, "grad_norm": 0.3571508824825287, "learning_rate": 1.969236313795498e-05, "loss": 0.5133, "step": 7558 }, { "epoch": 0.16031473351572606, "grad_norm": 0.34742018580436707, "learning_rate": 1.9692281048689496e-05, "loss": 0.4787, "step": 7559 }, { "epoch": 0.1603359419736591, "grad_norm": 0.31696465611457825, "learning_rate": 1.969219894864434e-05, "loss": 0.4931, "step": 7560 }, { "epoch": 0.16035715043159213, "grad_norm": 0.6658770442008972, "learning_rate": 1.9692116837819605e-05, "loss": 0.4797, "step": 7561 }, { "epoch": 0.16037835888952515, "grad_norm": 0.36850762367248535, "learning_rate": 1.969203471621538e-05, "loss": 0.5184, "step": 7562 }, { "epoch": 0.16039956734745817, "grad_norm": 0.3395819664001465, "learning_rate": 1.969195258383176e-05, "loss": 0.5343, "step": 7563 }, { "epoch": 0.16042077580539119, "grad_norm": 0.2962873876094818, "learning_rate": 1.9691870440668835e-05, "loss": 0.5329, "step": 7564 }, { "epoch": 0.1604419842633242, "grad_norm": 0.3269408643245697, "learning_rate": 1.9691788286726694e-05, "loss": 0.4537, "step": 7565 }, { "epoch": 0.16046319272125723, "grad_norm": 0.3034103810787201, "learning_rate": 1.969170612200543e-05, "loss": 0.4738, "step": 7566 }, { "epoch": 0.16048440117919027, "grad_norm": 0.31587839126586914, "learning_rate": 1.9691623946505135e-05, "loss": 0.3991, "step": 7567 }, { "epoch": 0.1605056096371233, "grad_norm": 0.36528483033180237, "learning_rate": 1.9691541760225902e-05, "loss": 0.5953, "step": 7568 }, { "epoch": 0.1605268180950563, "grad_norm": 0.3303271234035492, "learning_rate": 1.969145956316782e-05, "loss": 0.4985, "step": 7569 }, { "epoch": 0.16054802655298933, "grad_norm": 0.30891457200050354, "learning_rate": 1.969137735533098e-05, "loss": 0.4152, "step": 7570 }, { "epoch": 0.16056923501092235, "grad_norm": 0.34133651852607727, "learning_rate": 1.9691295136715473e-05, "loss": 0.5098, "step": 7571 }, { "epoch": 0.16059044346885537, "grad_norm": 0.33067598938941956, "learning_rate": 1.9691212907321396e-05, "loss": 0.5304, "step": 7572 }, { "epoch": 0.1606116519267884, "grad_norm": 0.30895110964775085, "learning_rate": 1.9691130667148832e-05, "loss": 0.5152, "step": 7573 }, { "epoch": 0.16063286038472144, "grad_norm": 0.3864249289035797, "learning_rate": 1.9691048416197878e-05, "loss": 0.6003, "step": 7574 }, { "epoch": 0.16065406884265446, "grad_norm": 0.3176128566265106, "learning_rate": 1.9690966154468623e-05, "loss": 0.4642, "step": 7575 }, { "epoch": 0.16067527730058748, "grad_norm": 0.4151540994644165, "learning_rate": 1.9690883881961166e-05, "loss": 0.5735, "step": 7576 }, { "epoch": 0.1606964857585205, "grad_norm": 0.2952152192592621, "learning_rate": 1.9690801598675583e-05, "loss": 0.4496, "step": 7577 }, { "epoch": 0.16071769421645352, "grad_norm": 0.3233847916126251, "learning_rate": 1.969071930461198e-05, "loss": 0.52, "step": 7578 }, { "epoch": 0.16073890267438654, "grad_norm": 0.3362017273902893, "learning_rate": 1.9690636999770443e-05, "loss": 0.6031, "step": 7579 }, { "epoch": 0.16076011113231956, "grad_norm": 0.36675360798835754, "learning_rate": 1.969055468415106e-05, "loss": 0.5501, "step": 7580 }, { "epoch": 0.1607813195902526, "grad_norm": 0.33728262782096863, "learning_rate": 1.969047235775393e-05, "loss": 0.5849, "step": 7581 }, { "epoch": 0.16080252804818562, "grad_norm": 0.3375820517539978, "learning_rate": 1.969039002057914e-05, "loss": 0.4483, "step": 7582 }, { "epoch": 0.16082373650611864, "grad_norm": 0.3431186079978943, "learning_rate": 1.9690307672626782e-05, "loss": 0.6176, "step": 7583 }, { "epoch": 0.16084494496405166, "grad_norm": 0.38947245478630066, "learning_rate": 1.9690225313896948e-05, "loss": 0.5881, "step": 7584 }, { "epoch": 0.16086615342198468, "grad_norm": 0.45997294783592224, "learning_rate": 1.969014294438973e-05, "loss": 0.503, "step": 7585 }, { "epoch": 0.1608873618799177, "grad_norm": 0.3223167359828949, "learning_rate": 1.969006056410522e-05, "loss": 0.4682, "step": 7586 }, { "epoch": 0.16090857033785075, "grad_norm": 0.30119654536247253, "learning_rate": 1.9689978173043508e-05, "loss": 0.4727, "step": 7587 }, { "epoch": 0.16092977879578377, "grad_norm": 0.2979087829589844, "learning_rate": 1.9689895771204687e-05, "loss": 0.5348, "step": 7588 }, { "epoch": 0.1609509872537168, "grad_norm": 0.31926658749580383, "learning_rate": 1.9689813358588845e-05, "loss": 0.5143, "step": 7589 }, { "epoch": 0.1609721957116498, "grad_norm": 0.3194887340068817, "learning_rate": 1.9689730935196078e-05, "loss": 0.5773, "step": 7590 }, { "epoch": 0.16099340416958283, "grad_norm": 0.32842907309532166, "learning_rate": 1.9689648501026477e-05, "loss": 0.5546, "step": 7591 }, { "epoch": 0.16101461262751585, "grad_norm": 0.43803495168685913, "learning_rate": 1.9689566056080136e-05, "loss": 0.4835, "step": 7592 }, { "epoch": 0.16103582108544887, "grad_norm": 0.3715745508670807, "learning_rate": 1.968948360035714e-05, "loss": 0.5683, "step": 7593 }, { "epoch": 0.1610570295433819, "grad_norm": 0.32541269063949585, "learning_rate": 1.9689401133857587e-05, "loss": 0.5755, "step": 7594 }, { "epoch": 0.16107823800131493, "grad_norm": 0.34545573592185974, "learning_rate": 1.9689318656581564e-05, "loss": 0.4682, "step": 7595 }, { "epoch": 0.16109944645924795, "grad_norm": 0.31834229826927185, "learning_rate": 1.9689236168529168e-05, "loss": 0.4682, "step": 7596 }, { "epoch": 0.16112065491718097, "grad_norm": 0.35557469725608826, "learning_rate": 1.9689153669700483e-05, "loss": 0.4862, "step": 7597 }, { "epoch": 0.161141863375114, "grad_norm": 0.3080303966999054, "learning_rate": 1.968907116009561e-05, "loss": 0.4918, "step": 7598 }, { "epoch": 0.161163071833047, "grad_norm": 0.3288802206516266, "learning_rate": 1.968898863971463e-05, "loss": 0.5475, "step": 7599 }, { "epoch": 0.16118428029098003, "grad_norm": 0.30210092663764954, "learning_rate": 1.9688906108557647e-05, "loss": 0.513, "step": 7600 }, { "epoch": 0.16120548874891308, "grad_norm": 0.3283959627151489, "learning_rate": 1.9688823566624747e-05, "loss": 0.5051, "step": 7601 }, { "epoch": 0.1612266972068461, "grad_norm": 0.32887646555900574, "learning_rate": 1.968874101391602e-05, "loss": 0.5294, "step": 7602 }, { "epoch": 0.16124790566477912, "grad_norm": 0.4046865999698639, "learning_rate": 1.968865845043156e-05, "loss": 0.57, "step": 7603 }, { "epoch": 0.16126911412271214, "grad_norm": 0.3667803704738617, "learning_rate": 1.9688575876171455e-05, "loss": 0.5563, "step": 7604 }, { "epoch": 0.16129032258064516, "grad_norm": 0.32568034529685974, "learning_rate": 1.9688493291135806e-05, "loss": 0.5346, "step": 7605 }, { "epoch": 0.16131153103857818, "grad_norm": 0.35675516724586487, "learning_rate": 1.9688410695324695e-05, "loss": 0.5919, "step": 7606 }, { "epoch": 0.1613327394965112, "grad_norm": 0.3387904763221741, "learning_rate": 1.9688328088738217e-05, "loss": 0.5698, "step": 7607 }, { "epoch": 0.16135394795444424, "grad_norm": 0.3471513092517853, "learning_rate": 1.9688245471376466e-05, "loss": 0.6044, "step": 7608 }, { "epoch": 0.16137515641237726, "grad_norm": 0.34501340985298157, "learning_rate": 1.9688162843239532e-05, "loss": 0.5218, "step": 7609 }, { "epoch": 0.16139636487031028, "grad_norm": 0.34636247158050537, "learning_rate": 1.968808020432751e-05, "loss": 0.4402, "step": 7610 }, { "epoch": 0.1614175733282433, "grad_norm": 0.34518659114837646, "learning_rate": 1.9687997554640484e-05, "loss": 0.5336, "step": 7611 }, { "epoch": 0.16143878178617632, "grad_norm": 0.39353522658348083, "learning_rate": 1.9687914894178557e-05, "loss": 0.5589, "step": 7612 }, { "epoch": 0.16145999024410934, "grad_norm": 0.31452617049217224, "learning_rate": 1.968783222294181e-05, "loss": 0.4794, "step": 7613 }, { "epoch": 0.16148119870204236, "grad_norm": 0.3323087990283966, "learning_rate": 1.9687749540930346e-05, "loss": 0.444, "step": 7614 }, { "epoch": 0.1615024071599754, "grad_norm": 0.31381580233573914, "learning_rate": 1.9687666848144246e-05, "loss": 0.458, "step": 7615 }, { "epoch": 0.16152361561790843, "grad_norm": 0.3426262140274048, "learning_rate": 1.968758414458361e-05, "loss": 0.5417, "step": 7616 }, { "epoch": 0.16154482407584145, "grad_norm": 0.41546565294265747, "learning_rate": 1.9687501430248526e-05, "loss": 0.5047, "step": 7617 }, { "epoch": 0.16156603253377447, "grad_norm": 0.3176825940608978, "learning_rate": 1.968741870513909e-05, "loss": 0.5571, "step": 7618 }, { "epoch": 0.1615872409917075, "grad_norm": 0.3520953059196472, "learning_rate": 1.9687335969255386e-05, "loss": 0.4642, "step": 7619 }, { "epoch": 0.1616084494496405, "grad_norm": 0.3091583549976349, "learning_rate": 1.9687253222597513e-05, "loss": 0.511, "step": 7620 }, { "epoch": 0.16162965790757355, "grad_norm": 0.3441636562347412, "learning_rate": 1.9687170465165563e-05, "loss": 0.5623, "step": 7621 }, { "epoch": 0.16165086636550657, "grad_norm": 0.3030284345149994, "learning_rate": 1.9687087696959625e-05, "loss": 0.4701, "step": 7622 }, { "epoch": 0.1616720748234396, "grad_norm": 0.39827579259872437, "learning_rate": 1.968700491797979e-05, "loss": 0.493, "step": 7623 }, { "epoch": 0.1616932832813726, "grad_norm": 0.3800097107887268, "learning_rate": 1.9686922128226155e-05, "loss": 0.593, "step": 7624 }, { "epoch": 0.16171449173930563, "grad_norm": 0.28791746497154236, "learning_rate": 1.968683932769881e-05, "loss": 0.5086, "step": 7625 }, { "epoch": 0.16173570019723865, "grad_norm": 0.35743042826652527, "learning_rate": 1.9686756516397843e-05, "loss": 0.5443, "step": 7626 }, { "epoch": 0.16175690865517167, "grad_norm": 0.31965887546539307, "learning_rate": 1.9686673694323354e-05, "loss": 0.5514, "step": 7627 }, { "epoch": 0.16177811711310472, "grad_norm": 0.3282545804977417, "learning_rate": 1.9686590861475426e-05, "loss": 0.5654, "step": 7628 }, { "epoch": 0.16179932557103774, "grad_norm": 0.42698967456817627, "learning_rate": 1.9686508017854157e-05, "loss": 0.5745, "step": 7629 }, { "epoch": 0.16182053402897076, "grad_norm": 0.3343559503555298, "learning_rate": 1.968642516345964e-05, "loss": 0.5098, "step": 7630 }, { "epoch": 0.16184174248690378, "grad_norm": 0.3491191565990448, "learning_rate": 1.9686342298291966e-05, "loss": 0.5068, "step": 7631 }, { "epoch": 0.1618629509448368, "grad_norm": 0.2864345610141754, "learning_rate": 1.9686259422351223e-05, "loss": 0.5067, "step": 7632 }, { "epoch": 0.16188415940276982, "grad_norm": 0.34762632846832275, "learning_rate": 1.968617653563751e-05, "loss": 0.5675, "step": 7633 }, { "epoch": 0.16190536786070284, "grad_norm": 0.35407403111457825, "learning_rate": 1.9686093638150908e-05, "loss": 0.4416, "step": 7634 }, { "epoch": 0.16192657631863588, "grad_norm": 0.32910051941871643, "learning_rate": 1.9686010729891525e-05, "loss": 0.5783, "step": 7635 }, { "epoch": 0.1619477847765689, "grad_norm": 0.33216366171836853, "learning_rate": 1.968592781085944e-05, "loss": 0.5688, "step": 7636 }, { "epoch": 0.16196899323450192, "grad_norm": 0.3521578013896942, "learning_rate": 1.9685844881054755e-05, "loss": 0.6095, "step": 7637 }, { "epoch": 0.16199020169243494, "grad_norm": 0.31009379029273987, "learning_rate": 1.9685761940477555e-05, "loss": 0.503, "step": 7638 }, { "epoch": 0.16201141015036796, "grad_norm": 0.3903088867664337, "learning_rate": 1.9685678989127935e-05, "loss": 0.4483, "step": 7639 }, { "epoch": 0.16203261860830098, "grad_norm": 0.3498995006084442, "learning_rate": 1.9685596027005984e-05, "loss": 0.5345, "step": 7640 }, { "epoch": 0.162053827066234, "grad_norm": 0.30424249172210693, "learning_rate": 1.96855130541118e-05, "loss": 0.5149, "step": 7641 }, { "epoch": 0.16207503552416705, "grad_norm": 0.33844295144081116, "learning_rate": 1.9685430070445472e-05, "loss": 0.5044, "step": 7642 }, { "epoch": 0.16209624398210007, "grad_norm": 0.3188997507095337, "learning_rate": 1.9685347076007093e-05, "loss": 0.4938, "step": 7643 }, { "epoch": 0.1621174524400331, "grad_norm": 0.33985480666160583, "learning_rate": 1.9685264070796756e-05, "loss": 0.4941, "step": 7644 }, { "epoch": 0.1621386608979661, "grad_norm": 0.3412010073661804, "learning_rate": 1.9685181054814552e-05, "loss": 0.4944, "step": 7645 }, { "epoch": 0.16215986935589913, "grad_norm": 0.3399169445037842, "learning_rate": 1.9685098028060575e-05, "loss": 0.4872, "step": 7646 }, { "epoch": 0.16218107781383215, "grad_norm": 0.31717780232429504, "learning_rate": 1.9685014990534915e-05, "loss": 0.5056, "step": 7647 }, { "epoch": 0.16220228627176517, "grad_norm": 0.30430489778518677, "learning_rate": 1.968493194223766e-05, "loss": 0.4829, "step": 7648 }, { "epoch": 0.16222349472969821, "grad_norm": 1.0776571035385132, "learning_rate": 1.9684848883168914e-05, "loss": 0.5601, "step": 7649 }, { "epoch": 0.16224470318763123, "grad_norm": 0.32542482018470764, "learning_rate": 1.9684765813328764e-05, "loss": 0.5367, "step": 7650 }, { "epoch": 0.16226591164556425, "grad_norm": 0.3133644759654999, "learning_rate": 1.9684682732717298e-05, "loss": 0.4328, "step": 7651 }, { "epoch": 0.16228712010349727, "grad_norm": 0.3249257206916809, "learning_rate": 1.9684599641334613e-05, "loss": 0.5714, "step": 7652 }, { "epoch": 0.1623083285614303, "grad_norm": 0.34818145632743835, "learning_rate": 1.9684516539180803e-05, "loss": 0.6186, "step": 7653 }, { "epoch": 0.1623295370193633, "grad_norm": 0.34186989068984985, "learning_rate": 1.9684433426255955e-05, "loss": 0.6017, "step": 7654 }, { "epoch": 0.16235074547729633, "grad_norm": 0.3501235842704773, "learning_rate": 1.9684350302560166e-05, "loss": 0.5453, "step": 7655 }, { "epoch": 0.16237195393522938, "grad_norm": 0.3610374629497528, "learning_rate": 1.9684267168093526e-05, "loss": 0.5571, "step": 7656 }, { "epoch": 0.1623931623931624, "grad_norm": 0.3212788999080658, "learning_rate": 1.9684184022856127e-05, "loss": 0.4222, "step": 7657 }, { "epoch": 0.16241437085109542, "grad_norm": 0.3467318117618561, "learning_rate": 1.9684100866848067e-05, "loss": 0.5094, "step": 7658 }, { "epoch": 0.16243557930902844, "grad_norm": 0.2943150997161865, "learning_rate": 1.9684017700069427e-05, "loss": 0.4696, "step": 7659 }, { "epoch": 0.16245678776696146, "grad_norm": 0.3247172236442566, "learning_rate": 1.9683934522520313e-05, "loss": 0.5129, "step": 7660 }, { "epoch": 0.16247799622489448, "grad_norm": 0.3341711163520813, "learning_rate": 1.968385133420081e-05, "loss": 0.5241, "step": 7661 }, { "epoch": 0.16249920468282752, "grad_norm": 0.34125301241874695, "learning_rate": 1.9683768135111007e-05, "loss": 0.5676, "step": 7662 }, { "epoch": 0.16252041314076054, "grad_norm": 0.34173327684402466, "learning_rate": 1.9683684925251004e-05, "loss": 0.5294, "step": 7663 }, { "epoch": 0.16254162159869356, "grad_norm": 0.38232114911079407, "learning_rate": 1.9683601704620893e-05, "loss": 0.4822, "step": 7664 }, { "epoch": 0.16256283005662658, "grad_norm": 0.35569334030151367, "learning_rate": 1.968351847322076e-05, "loss": 0.527, "step": 7665 }, { "epoch": 0.1625840385145596, "grad_norm": 0.315836101770401, "learning_rate": 1.9683435231050704e-05, "loss": 0.473, "step": 7666 }, { "epoch": 0.16260524697249262, "grad_norm": 0.3914496600627899, "learning_rate": 1.968335197811082e-05, "loss": 0.5822, "step": 7667 }, { "epoch": 0.16262645543042564, "grad_norm": 0.35534700751304626, "learning_rate": 1.9683268714401192e-05, "loss": 0.5192, "step": 7668 }, { "epoch": 0.1626476638883587, "grad_norm": 0.3795223534107208, "learning_rate": 1.9683185439921913e-05, "loss": 0.4391, "step": 7669 }, { "epoch": 0.1626688723462917, "grad_norm": 0.3269432485103607, "learning_rate": 1.9683102154673085e-05, "loss": 0.5329, "step": 7670 }, { "epoch": 0.16269008080422473, "grad_norm": 0.322625994682312, "learning_rate": 1.9683018858654792e-05, "loss": 0.5293, "step": 7671 }, { "epoch": 0.16271128926215775, "grad_norm": 0.3599817156791687, "learning_rate": 1.9682935551867132e-05, "loss": 0.5981, "step": 7672 }, { "epoch": 0.16273249772009077, "grad_norm": 0.30823564529418945, "learning_rate": 1.968285223431019e-05, "loss": 0.4119, "step": 7673 }, { "epoch": 0.1627537061780238, "grad_norm": 0.3167249262332916, "learning_rate": 1.968276890598407e-05, "loss": 0.4891, "step": 7674 }, { "epoch": 0.1627749146359568, "grad_norm": 0.32107535004615784, "learning_rate": 1.9682685566888854e-05, "loss": 0.599, "step": 7675 }, { "epoch": 0.16279612309388986, "grad_norm": 0.6275511384010315, "learning_rate": 1.9682602217024643e-05, "loss": 0.5636, "step": 7676 }, { "epoch": 0.16281733155182287, "grad_norm": 0.327497661113739, "learning_rate": 1.9682518856391522e-05, "loss": 0.5553, "step": 7677 }, { "epoch": 0.1628385400097559, "grad_norm": 0.3181867301464081, "learning_rate": 1.968243548498959e-05, "loss": 0.5108, "step": 7678 }, { "epoch": 0.16285974846768891, "grad_norm": 0.32944199442863464, "learning_rate": 1.968235210281894e-05, "loss": 0.5409, "step": 7679 }, { "epoch": 0.16288095692562193, "grad_norm": 0.3185918927192688, "learning_rate": 1.9682268709879657e-05, "loss": 0.4643, "step": 7680 }, { "epoch": 0.16290216538355495, "grad_norm": 0.3490845263004303, "learning_rate": 1.9682185306171838e-05, "loss": 0.4843, "step": 7681 }, { "epoch": 0.16292337384148797, "grad_norm": 0.38871070742607117, "learning_rate": 1.9682101891695584e-05, "loss": 0.5425, "step": 7682 }, { "epoch": 0.16294458229942102, "grad_norm": 0.3372574746608734, "learning_rate": 1.9682018466450973e-05, "loss": 0.5371, "step": 7683 }, { "epoch": 0.16296579075735404, "grad_norm": 0.3684970438480377, "learning_rate": 1.968193503043811e-05, "loss": 0.5799, "step": 7684 }, { "epoch": 0.16298699921528706, "grad_norm": 0.5088722705841064, "learning_rate": 1.968185158365708e-05, "loss": 0.5187, "step": 7685 }, { "epoch": 0.16300820767322008, "grad_norm": 0.4740995466709137, "learning_rate": 1.968176812610798e-05, "loss": 0.4511, "step": 7686 }, { "epoch": 0.1630294161311531, "grad_norm": 0.34214460849761963, "learning_rate": 1.9681684657790904e-05, "loss": 0.5893, "step": 7687 }, { "epoch": 0.16305062458908612, "grad_norm": 0.3258626163005829, "learning_rate": 1.9681601178705938e-05, "loss": 0.5216, "step": 7688 }, { "epoch": 0.16307183304701914, "grad_norm": 0.3053811192512512, "learning_rate": 1.968151768885318e-05, "loss": 0.5151, "step": 7689 }, { "epoch": 0.16309304150495219, "grad_norm": 0.3267476260662079, "learning_rate": 1.9681434188232724e-05, "loss": 0.5255, "step": 7690 }, { "epoch": 0.1631142499628852, "grad_norm": 0.3474469482898712, "learning_rate": 1.968135067684466e-05, "loss": 0.4977, "step": 7691 }, { "epoch": 0.16313545842081822, "grad_norm": 0.33765465021133423, "learning_rate": 1.9681267154689084e-05, "loss": 0.475, "step": 7692 }, { "epoch": 0.16315666687875124, "grad_norm": 0.3224729299545288, "learning_rate": 1.9681183621766084e-05, "loss": 0.5504, "step": 7693 }, { "epoch": 0.16317787533668426, "grad_norm": 0.3247957229614258, "learning_rate": 1.9681100078075757e-05, "loss": 0.4743, "step": 7694 }, { "epoch": 0.16319908379461728, "grad_norm": 0.31639790534973145, "learning_rate": 1.9681016523618194e-05, "loss": 0.5656, "step": 7695 }, { "epoch": 0.16322029225255033, "grad_norm": 0.3404028117656708, "learning_rate": 1.9680932958393488e-05, "loss": 0.4716, "step": 7696 }, { "epoch": 0.16324150071048335, "grad_norm": 0.34502437710762024, "learning_rate": 1.9680849382401735e-05, "loss": 0.5648, "step": 7697 }, { "epoch": 0.16326270916841637, "grad_norm": 0.30584290623664856, "learning_rate": 1.9680765795643025e-05, "loss": 0.4738, "step": 7698 }, { "epoch": 0.1632839176263494, "grad_norm": 0.3530735671520233, "learning_rate": 1.9680682198117446e-05, "loss": 0.5062, "step": 7699 }, { "epoch": 0.1633051260842824, "grad_norm": 0.33303025364875793, "learning_rate": 1.96805985898251e-05, "loss": 0.5756, "step": 7700 }, { "epoch": 0.16332633454221543, "grad_norm": 0.3820837736129761, "learning_rate": 1.968051497076608e-05, "loss": 0.5499, "step": 7701 }, { "epoch": 0.16334754300014845, "grad_norm": 0.35214707255363464, "learning_rate": 1.9680431340940472e-05, "loss": 0.5494, "step": 7702 }, { "epoch": 0.1633687514580815, "grad_norm": 0.3415480852127075, "learning_rate": 1.9680347700348373e-05, "loss": 0.5087, "step": 7703 }, { "epoch": 0.16338995991601452, "grad_norm": 0.3505895435810089, "learning_rate": 1.9680264048989875e-05, "loss": 0.5737, "step": 7704 }, { "epoch": 0.16341116837394754, "grad_norm": 0.3166581988334656, "learning_rate": 1.968018038686507e-05, "loss": 0.5054, "step": 7705 }, { "epoch": 0.16343237683188055, "grad_norm": 0.32930222153663635, "learning_rate": 1.9680096713974053e-05, "loss": 0.5545, "step": 7706 }, { "epoch": 0.16345358528981357, "grad_norm": 0.316261351108551, "learning_rate": 1.9680013030316915e-05, "loss": 0.4856, "step": 7707 }, { "epoch": 0.1634747937477466, "grad_norm": 0.3270593285560608, "learning_rate": 1.9679929335893755e-05, "loss": 0.5097, "step": 7708 }, { "epoch": 0.1634960022056796, "grad_norm": 0.3724086284637451, "learning_rate": 1.967984563070466e-05, "loss": 0.5409, "step": 7709 }, { "epoch": 0.16351721066361266, "grad_norm": 0.31039878726005554, "learning_rate": 1.9679761914749726e-05, "loss": 0.5117, "step": 7710 }, { "epoch": 0.16353841912154568, "grad_norm": 0.36922940611839294, "learning_rate": 1.967967818802904e-05, "loss": 0.6093, "step": 7711 }, { "epoch": 0.1635596275794787, "grad_norm": 0.30291640758514404, "learning_rate": 1.9679594450542704e-05, "loss": 0.4448, "step": 7712 }, { "epoch": 0.16358083603741172, "grad_norm": 0.3147680163383484, "learning_rate": 1.9679510702290806e-05, "loss": 0.5132, "step": 7713 }, { "epoch": 0.16360204449534474, "grad_norm": 0.3429095447063446, "learning_rate": 1.9679426943273442e-05, "loss": 0.5063, "step": 7714 }, { "epoch": 0.16362325295327776, "grad_norm": 0.32391658425331116, "learning_rate": 1.9679343173490702e-05, "loss": 0.594, "step": 7715 }, { "epoch": 0.16364446141121078, "grad_norm": 0.37607839703559875, "learning_rate": 1.967925939294268e-05, "loss": 0.5392, "step": 7716 }, { "epoch": 0.16366566986914383, "grad_norm": 0.3337768316268921, "learning_rate": 1.967917560162947e-05, "loss": 0.5554, "step": 7717 }, { "epoch": 0.16368687832707685, "grad_norm": 0.3515574038028717, "learning_rate": 1.9679091799551165e-05, "loss": 0.5546, "step": 7718 }, { "epoch": 0.16370808678500987, "grad_norm": 0.3178495168685913, "learning_rate": 1.967900798670786e-05, "loss": 0.4928, "step": 7719 }, { "epoch": 0.16372929524294288, "grad_norm": 0.3430626094341278, "learning_rate": 1.9678924163099644e-05, "loss": 0.5552, "step": 7720 }, { "epoch": 0.1637505037008759, "grad_norm": 0.3596774935722351, "learning_rate": 1.9678840328726614e-05, "loss": 0.4903, "step": 7721 }, { "epoch": 0.16377171215880892, "grad_norm": 0.35134053230285645, "learning_rate": 1.9678756483588862e-05, "loss": 0.6091, "step": 7722 }, { "epoch": 0.16379292061674194, "grad_norm": 0.3175915777683258, "learning_rate": 1.967867262768648e-05, "loss": 0.4837, "step": 7723 }, { "epoch": 0.163814129074675, "grad_norm": 0.3661501407623291, "learning_rate": 1.9678588761019562e-05, "loss": 0.6051, "step": 7724 }, { "epoch": 0.163835337532608, "grad_norm": 0.29657280445098877, "learning_rate": 1.9678504883588205e-05, "loss": 0.4062, "step": 7725 }, { "epoch": 0.16385654599054103, "grad_norm": 0.4174933135509491, "learning_rate": 1.9678420995392495e-05, "loss": 0.5601, "step": 7726 }, { "epoch": 0.16387775444847405, "grad_norm": 0.3352338671684265, "learning_rate": 1.967833709643253e-05, "loss": 0.5472, "step": 7727 }, { "epoch": 0.16389896290640707, "grad_norm": 0.3620757758617401, "learning_rate": 1.9678253186708405e-05, "loss": 0.4498, "step": 7728 }, { "epoch": 0.1639201713643401, "grad_norm": 0.3173031508922577, "learning_rate": 1.9678169266220208e-05, "loss": 0.5209, "step": 7729 }, { "epoch": 0.1639413798222731, "grad_norm": 0.3568289279937744, "learning_rate": 1.967808533496804e-05, "loss": 0.5884, "step": 7730 }, { "epoch": 0.16396258828020616, "grad_norm": 0.33254456520080566, "learning_rate": 1.967800139295198e-05, "loss": 0.4961, "step": 7731 }, { "epoch": 0.16398379673813918, "grad_norm": 0.29243001341819763, "learning_rate": 1.967791744017214e-05, "loss": 0.5348, "step": 7732 }, { "epoch": 0.1640050051960722, "grad_norm": 0.3013089895248413, "learning_rate": 1.9677833476628604e-05, "loss": 0.525, "step": 7733 }, { "epoch": 0.16402621365400522, "grad_norm": 0.3199976682662964, "learning_rate": 1.9677749502321458e-05, "loss": 0.4914, "step": 7734 }, { "epoch": 0.16404742211193823, "grad_norm": 0.40270206332206726, "learning_rate": 1.967766551725081e-05, "loss": 0.6053, "step": 7735 }, { "epoch": 0.16406863056987125, "grad_norm": 0.31197309494018555, "learning_rate": 1.9677581521416746e-05, "loss": 0.5615, "step": 7736 }, { "epoch": 0.1640898390278043, "grad_norm": 0.32493582367897034, "learning_rate": 1.967749751481936e-05, "loss": 0.5227, "step": 7737 }, { "epoch": 0.16411104748573732, "grad_norm": 0.30458757281303406, "learning_rate": 1.967741349745874e-05, "loss": 0.5058, "step": 7738 }, { "epoch": 0.16413225594367034, "grad_norm": 0.34097737073898315, "learning_rate": 1.967732946933499e-05, "loss": 0.5208, "step": 7739 }, { "epoch": 0.16415346440160336, "grad_norm": 0.3539108335971832, "learning_rate": 1.9677245430448197e-05, "loss": 0.5835, "step": 7740 }, { "epoch": 0.16417467285953638, "grad_norm": 0.34091687202453613, "learning_rate": 1.9677161380798456e-05, "loss": 0.4857, "step": 7741 }, { "epoch": 0.1641958813174694, "grad_norm": 0.31510841846466064, "learning_rate": 1.9677077320385857e-05, "loss": 0.5411, "step": 7742 }, { "epoch": 0.16421708977540242, "grad_norm": 0.3393811881542206, "learning_rate": 1.9676993249210502e-05, "loss": 0.5682, "step": 7743 }, { "epoch": 0.16423829823333547, "grad_norm": 0.4101240634918213, "learning_rate": 1.9676909167272476e-05, "loss": 0.6053, "step": 7744 }, { "epoch": 0.1642595066912685, "grad_norm": 0.34358471632003784, "learning_rate": 1.9676825074571875e-05, "loss": 0.5751, "step": 7745 }, { "epoch": 0.1642807151492015, "grad_norm": 0.39003103971481323, "learning_rate": 1.9676740971108797e-05, "loss": 0.5264, "step": 7746 }, { "epoch": 0.16430192360713453, "grad_norm": 0.3396044969558716, "learning_rate": 1.9676656856883328e-05, "loss": 0.6115, "step": 7747 }, { "epoch": 0.16432313206506755, "grad_norm": 0.3504602909088135, "learning_rate": 1.9676572731895563e-05, "loss": 0.5184, "step": 7748 }, { "epoch": 0.16434434052300056, "grad_norm": 0.3534454107284546, "learning_rate": 1.9676488596145604e-05, "loss": 0.5077, "step": 7749 }, { "epoch": 0.16436554898093358, "grad_norm": 0.3432430028915405, "learning_rate": 1.9676404449633536e-05, "loss": 0.5519, "step": 7750 }, { "epoch": 0.16438675743886663, "grad_norm": 0.31518498063087463, "learning_rate": 1.9676320292359454e-05, "loss": 0.5781, "step": 7751 }, { "epoch": 0.16440796589679965, "grad_norm": 0.37138810753822327, "learning_rate": 1.9676236124323456e-05, "loss": 0.5017, "step": 7752 }, { "epoch": 0.16442917435473267, "grad_norm": 0.3230573832988739, "learning_rate": 1.967615194552563e-05, "loss": 0.517, "step": 7753 }, { "epoch": 0.1644503828126657, "grad_norm": 0.3074510395526886, "learning_rate": 1.967606775596607e-05, "loss": 0.4847, "step": 7754 }, { "epoch": 0.1644715912705987, "grad_norm": 0.32415103912353516, "learning_rate": 1.967598355564487e-05, "loss": 0.566, "step": 7755 }, { "epoch": 0.16449279972853173, "grad_norm": 0.3179681599140167, "learning_rate": 1.967589934456213e-05, "loss": 0.4348, "step": 7756 }, { "epoch": 0.16451400818646475, "grad_norm": 0.315679669380188, "learning_rate": 1.9675815122717937e-05, "loss": 0.5579, "step": 7757 }, { "epoch": 0.1645352166443978, "grad_norm": 0.35652634501457214, "learning_rate": 1.9675730890112386e-05, "loss": 0.5217, "step": 7758 }, { "epoch": 0.16455642510233082, "grad_norm": 0.3510356545448303, "learning_rate": 1.967564664674557e-05, "loss": 0.5801, "step": 7759 }, { "epoch": 0.16457763356026384, "grad_norm": 0.40039142966270447, "learning_rate": 1.9675562392617587e-05, "loss": 0.5401, "step": 7760 }, { "epoch": 0.16459884201819686, "grad_norm": 0.3799007534980774, "learning_rate": 1.9675478127728528e-05, "loss": 0.5098, "step": 7761 }, { "epoch": 0.16462005047612988, "grad_norm": 0.3445313572883606, "learning_rate": 1.9675393852078484e-05, "loss": 0.4322, "step": 7762 }, { "epoch": 0.1646412589340629, "grad_norm": 0.34615471959114075, "learning_rate": 1.967530956566755e-05, "loss": 0.5076, "step": 7763 }, { "epoch": 0.16466246739199591, "grad_norm": 0.34085145592689514, "learning_rate": 1.9675225268495823e-05, "loss": 0.5179, "step": 7764 }, { "epoch": 0.16468367584992896, "grad_norm": 0.36232414841651917, "learning_rate": 1.9675140960563396e-05, "loss": 0.502, "step": 7765 }, { "epoch": 0.16470488430786198, "grad_norm": 0.3387924134731293, "learning_rate": 1.9675056641870356e-05, "loss": 0.5304, "step": 7766 }, { "epoch": 0.164726092765795, "grad_norm": 0.33050084114074707, "learning_rate": 1.9674972312416804e-05, "loss": 0.588, "step": 7767 }, { "epoch": 0.16474730122372802, "grad_norm": 0.31431126594543457, "learning_rate": 1.9674887972202832e-05, "loss": 0.5493, "step": 7768 }, { "epoch": 0.16476850968166104, "grad_norm": 0.29465070366859436, "learning_rate": 1.9674803621228538e-05, "loss": 0.4533, "step": 7769 }, { "epoch": 0.16478971813959406, "grad_norm": 0.32896605134010315, "learning_rate": 1.9674719259494007e-05, "loss": 0.5218, "step": 7770 }, { "epoch": 0.1648109265975271, "grad_norm": 0.32741880416870117, "learning_rate": 1.967463488699934e-05, "loss": 0.585, "step": 7771 }, { "epoch": 0.16483213505546013, "grad_norm": 0.3485998511314392, "learning_rate": 1.9674550503744622e-05, "loss": 0.5402, "step": 7772 }, { "epoch": 0.16485334351339315, "grad_norm": 0.3003000020980835, "learning_rate": 1.9674466109729957e-05, "loss": 0.4717, "step": 7773 }, { "epoch": 0.16487455197132617, "grad_norm": 0.31251493096351624, "learning_rate": 1.9674381704955436e-05, "loss": 0.6437, "step": 7774 }, { "epoch": 0.16489576042925919, "grad_norm": 0.41210049390792847, "learning_rate": 1.967429728942115e-05, "loss": 0.5496, "step": 7775 }, { "epoch": 0.1649169688871922, "grad_norm": 0.38660866022109985, "learning_rate": 1.9674212863127198e-05, "loss": 0.5642, "step": 7776 }, { "epoch": 0.16493817734512523, "grad_norm": 0.31320807337760925, "learning_rate": 1.9674128426073668e-05, "loss": 0.487, "step": 7777 }, { "epoch": 0.16495938580305827, "grad_norm": 0.3397260308265686, "learning_rate": 1.9674043978260654e-05, "loss": 0.5032, "step": 7778 }, { "epoch": 0.1649805942609913, "grad_norm": 0.32656195759773254, "learning_rate": 1.9673959519688255e-05, "loss": 0.5641, "step": 7779 }, { "epoch": 0.1650018027189243, "grad_norm": 0.34560292959213257, "learning_rate": 1.9673875050356562e-05, "loss": 0.597, "step": 7780 }, { "epoch": 0.16502301117685733, "grad_norm": 0.30997031927108765, "learning_rate": 1.967379057026567e-05, "loss": 0.5207, "step": 7781 }, { "epoch": 0.16504421963479035, "grad_norm": 0.3166740834712982, "learning_rate": 1.9673706079415672e-05, "loss": 0.4654, "step": 7782 }, { "epoch": 0.16506542809272337, "grad_norm": 0.3034175634384155, "learning_rate": 1.967362157780666e-05, "loss": 0.5224, "step": 7783 }, { "epoch": 0.1650866365506564, "grad_norm": 0.3087008595466614, "learning_rate": 1.967353706543873e-05, "loss": 0.5271, "step": 7784 }, { "epoch": 0.16510784500858944, "grad_norm": 0.3289221227169037, "learning_rate": 1.967345254231198e-05, "loss": 0.5554, "step": 7785 }, { "epoch": 0.16512905346652246, "grad_norm": 0.3132789433002472, "learning_rate": 1.9673368008426497e-05, "loss": 0.5043, "step": 7786 }, { "epoch": 0.16515026192445548, "grad_norm": 0.3987828493118286, "learning_rate": 1.967328346378238e-05, "loss": 0.4812, "step": 7787 }, { "epoch": 0.1651714703823885, "grad_norm": 0.525779128074646, "learning_rate": 1.967319890837972e-05, "loss": 0.5716, "step": 7788 }, { "epoch": 0.16519267884032152, "grad_norm": 0.3255690932273865, "learning_rate": 1.9673114342218612e-05, "loss": 0.5461, "step": 7789 }, { "epoch": 0.16521388729825454, "grad_norm": 0.32379090785980225, "learning_rate": 1.967302976529915e-05, "loss": 0.4515, "step": 7790 }, { "epoch": 0.16523509575618756, "grad_norm": 0.3256257474422455, "learning_rate": 1.967294517762143e-05, "loss": 0.5207, "step": 7791 }, { "epoch": 0.1652563042141206, "grad_norm": 0.374159574508667, "learning_rate": 1.967286057918554e-05, "loss": 0.5858, "step": 7792 }, { "epoch": 0.16527751267205362, "grad_norm": 0.31511351466178894, "learning_rate": 1.967277596999158e-05, "loss": 0.5193, "step": 7793 }, { "epoch": 0.16529872112998664, "grad_norm": 0.3340921998023987, "learning_rate": 1.9672691350039646e-05, "loss": 0.5009, "step": 7794 }, { "epoch": 0.16531992958791966, "grad_norm": 0.33313366770744324, "learning_rate": 1.9672606719329827e-05, "loss": 0.4577, "step": 7795 }, { "epoch": 0.16534113804585268, "grad_norm": 0.5166072249412537, "learning_rate": 1.9672522077862218e-05, "loss": 0.6395, "step": 7796 }, { "epoch": 0.1653623465037857, "grad_norm": 0.35043004155158997, "learning_rate": 1.9672437425636915e-05, "loss": 0.4262, "step": 7797 }, { "epoch": 0.16538355496171872, "grad_norm": 0.3962863087654114, "learning_rate": 1.9672352762654012e-05, "loss": 0.6328, "step": 7798 }, { "epoch": 0.16540476341965177, "grad_norm": 0.28938040137290955, "learning_rate": 1.96722680889136e-05, "loss": 0.4823, "step": 7799 }, { "epoch": 0.1654259718775848, "grad_norm": 0.3327440917491913, "learning_rate": 1.9672183404415777e-05, "loss": 0.54, "step": 7800 }, { "epoch": 0.1654471803355178, "grad_norm": 0.3635352551937103, "learning_rate": 1.9672098709160638e-05, "loss": 0.5867, "step": 7801 }, { "epoch": 0.16546838879345083, "grad_norm": 0.3166603147983551, "learning_rate": 1.967201400314827e-05, "loss": 0.5354, "step": 7802 }, { "epoch": 0.16548959725138385, "grad_norm": 0.3034187853336334, "learning_rate": 1.9671929286378776e-05, "loss": 0.4804, "step": 7803 }, { "epoch": 0.16551080570931687, "grad_norm": 0.29139411449432373, "learning_rate": 1.9671844558852244e-05, "loss": 0.4257, "step": 7804 }, { "epoch": 0.16553201416724989, "grad_norm": 0.34056079387664795, "learning_rate": 1.967175982056877e-05, "loss": 0.4777, "step": 7805 }, { "epoch": 0.16555322262518293, "grad_norm": 0.34313085675239563, "learning_rate": 1.967167507152845e-05, "loss": 0.4772, "step": 7806 }, { "epoch": 0.16557443108311595, "grad_norm": 0.33192726969718933, "learning_rate": 1.9671590311731376e-05, "loss": 0.5147, "step": 7807 }, { "epoch": 0.16559563954104897, "grad_norm": 0.33573606610298157, "learning_rate": 1.9671505541177644e-05, "loss": 0.5383, "step": 7808 }, { "epoch": 0.165616847998982, "grad_norm": 0.3223132789134979, "learning_rate": 1.967142075986735e-05, "loss": 0.5044, "step": 7809 }, { "epoch": 0.165638056456915, "grad_norm": 0.35964423418045044, "learning_rate": 1.967133596780058e-05, "loss": 0.5955, "step": 7810 }, { "epoch": 0.16565926491484803, "grad_norm": 0.44298413395881653, "learning_rate": 1.967125116497744e-05, "loss": 0.5307, "step": 7811 }, { "epoch": 0.16568047337278108, "grad_norm": 0.2992032468318939, "learning_rate": 1.9671166351398014e-05, "loss": 0.5084, "step": 7812 }, { "epoch": 0.1657016818307141, "grad_norm": 0.36467814445495605, "learning_rate": 1.9671081527062403e-05, "loss": 0.559, "step": 7813 }, { "epoch": 0.16572289028864712, "grad_norm": 0.36365875601768494, "learning_rate": 1.96709966919707e-05, "loss": 0.507, "step": 7814 }, { "epoch": 0.16574409874658014, "grad_norm": 0.3322356343269348, "learning_rate": 1.9670911846123e-05, "loss": 0.5613, "step": 7815 }, { "epoch": 0.16576530720451316, "grad_norm": 0.30940720438957214, "learning_rate": 1.9670826989519392e-05, "loss": 0.5368, "step": 7816 }, { "epoch": 0.16578651566244618, "grad_norm": 0.32412827014923096, "learning_rate": 1.9670742122159972e-05, "loss": 0.5213, "step": 7817 }, { "epoch": 0.1658077241203792, "grad_norm": 0.347732812166214, "learning_rate": 1.9670657244044843e-05, "loss": 0.5575, "step": 7818 }, { "epoch": 0.16582893257831224, "grad_norm": 0.31791895627975464, "learning_rate": 1.967057235517409e-05, "loss": 0.4936, "step": 7819 }, { "epoch": 0.16585014103624526, "grad_norm": 0.33102160692214966, "learning_rate": 1.967048745554781e-05, "loss": 0.5378, "step": 7820 }, { "epoch": 0.16587134949417828, "grad_norm": 0.5103563070297241, "learning_rate": 1.9670402545166098e-05, "loss": 0.5137, "step": 7821 }, { "epoch": 0.1658925579521113, "grad_norm": 0.36977916955947876, "learning_rate": 1.967031762402905e-05, "loss": 0.5613, "step": 7822 }, { "epoch": 0.16591376641004432, "grad_norm": 0.3624485433101654, "learning_rate": 1.9670232692136758e-05, "loss": 0.5561, "step": 7823 }, { "epoch": 0.16593497486797734, "grad_norm": 0.3077828586101532, "learning_rate": 1.9670147749489316e-05, "loss": 0.511, "step": 7824 }, { "epoch": 0.16595618332591036, "grad_norm": 0.320119708776474, "learning_rate": 1.967006279608682e-05, "loss": 0.4823, "step": 7825 }, { "epoch": 0.1659773917838434, "grad_norm": 0.31612545251846313, "learning_rate": 1.9669977831929364e-05, "loss": 0.4982, "step": 7826 }, { "epoch": 0.16599860024177643, "grad_norm": 0.3262411057949066, "learning_rate": 1.9669892857017044e-05, "loss": 0.5079, "step": 7827 }, { "epoch": 0.16601980869970945, "grad_norm": 0.3304290175437927, "learning_rate": 1.966980787134995e-05, "loss": 0.5574, "step": 7828 }, { "epoch": 0.16604101715764247, "grad_norm": 0.3451993465423584, "learning_rate": 1.9669722874928184e-05, "loss": 0.5671, "step": 7829 }, { "epoch": 0.1660622256155755, "grad_norm": 0.3139393925666809, "learning_rate": 1.9669637867751834e-05, "loss": 0.455, "step": 7830 }, { "epoch": 0.1660834340735085, "grad_norm": 0.335738867521286, "learning_rate": 1.9669552849820998e-05, "loss": 0.4743, "step": 7831 }, { "epoch": 0.16610464253144153, "grad_norm": 0.41472283005714417, "learning_rate": 1.9669467821135767e-05, "loss": 0.4826, "step": 7832 }, { "epoch": 0.16612585098937457, "grad_norm": 0.32081127166748047, "learning_rate": 1.9669382781696237e-05, "loss": 0.5563, "step": 7833 }, { "epoch": 0.1661470594473076, "grad_norm": 0.32636043429374695, "learning_rate": 1.966929773150251e-05, "loss": 0.524, "step": 7834 }, { "epoch": 0.1661682679052406, "grad_norm": 0.5687659978866577, "learning_rate": 1.966921267055467e-05, "loss": 0.4612, "step": 7835 }, { "epoch": 0.16618947636317363, "grad_norm": 0.34401416778564453, "learning_rate": 1.9669127598852814e-05, "loss": 0.5245, "step": 7836 }, { "epoch": 0.16621068482110665, "grad_norm": 0.3261828124523163, "learning_rate": 1.966904251639704e-05, "loss": 0.5162, "step": 7837 }, { "epoch": 0.16623189327903967, "grad_norm": 0.32885000109672546, "learning_rate": 1.9668957423187442e-05, "loss": 0.552, "step": 7838 }, { "epoch": 0.1662531017369727, "grad_norm": 0.31675004959106445, "learning_rate": 1.966887231922411e-05, "loss": 0.4958, "step": 7839 }, { "epoch": 0.16627431019490574, "grad_norm": 0.31872642040252686, "learning_rate": 1.9668787204507142e-05, "loss": 0.4667, "step": 7840 }, { "epoch": 0.16629551865283876, "grad_norm": 0.3457120954990387, "learning_rate": 1.9668702079036637e-05, "loss": 0.5432, "step": 7841 }, { "epoch": 0.16631672711077178, "grad_norm": 0.3240779638290405, "learning_rate": 1.9668616942812682e-05, "loss": 0.5541, "step": 7842 }, { "epoch": 0.1663379355687048, "grad_norm": 0.3064408600330353, "learning_rate": 1.966853179583538e-05, "loss": 0.508, "step": 7843 }, { "epoch": 0.16635914402663782, "grad_norm": 0.3322322368621826, "learning_rate": 1.9668446638104816e-05, "loss": 0.5172, "step": 7844 }, { "epoch": 0.16638035248457084, "grad_norm": 0.3226476311683655, "learning_rate": 1.966836146962109e-05, "loss": 0.5715, "step": 7845 }, { "epoch": 0.16640156094250388, "grad_norm": 0.36779117584228516, "learning_rate": 1.96682762903843e-05, "loss": 0.5704, "step": 7846 }, { "epoch": 0.1664227694004369, "grad_norm": 0.3124273121356964, "learning_rate": 1.9668191100394537e-05, "loss": 0.5071, "step": 7847 }, { "epoch": 0.16644397785836992, "grad_norm": 0.38068702816963196, "learning_rate": 1.9668105899651894e-05, "loss": 0.4708, "step": 7848 }, { "epoch": 0.16646518631630294, "grad_norm": 0.33883780241012573, "learning_rate": 1.9668020688156465e-05, "loss": 0.5331, "step": 7849 }, { "epoch": 0.16648639477423596, "grad_norm": 0.32748883962631226, "learning_rate": 1.966793546590835e-05, "loss": 0.5904, "step": 7850 }, { "epoch": 0.16650760323216898, "grad_norm": 0.3118728995323181, "learning_rate": 1.966785023290764e-05, "loss": 0.5497, "step": 7851 }, { "epoch": 0.166528811690102, "grad_norm": 0.45552441477775574, "learning_rate": 1.9667764989154434e-05, "loss": 0.5918, "step": 7852 }, { "epoch": 0.16655002014803505, "grad_norm": 0.3111667335033417, "learning_rate": 1.966767973464882e-05, "loss": 0.5034, "step": 7853 }, { "epoch": 0.16657122860596807, "grad_norm": 0.43830230832099915, "learning_rate": 1.9667594469390902e-05, "loss": 0.5122, "step": 7854 }, { "epoch": 0.1665924370639011, "grad_norm": 0.32514145970344543, "learning_rate": 1.966750919338076e-05, "loss": 0.428, "step": 7855 }, { "epoch": 0.1666136455218341, "grad_norm": 0.3127553164958954, "learning_rate": 1.9667423906618507e-05, "loss": 0.4864, "step": 7856 }, { "epoch": 0.16663485397976713, "grad_norm": 0.3057302236557007, "learning_rate": 1.9667338609104226e-05, "loss": 0.4857, "step": 7857 }, { "epoch": 0.16665606243770015, "grad_norm": 0.3357883095741272, "learning_rate": 1.9667253300838017e-05, "loss": 0.4686, "step": 7858 }, { "epoch": 0.16667727089563317, "grad_norm": 0.3391973674297333, "learning_rate": 1.966716798181997e-05, "loss": 0.5582, "step": 7859 }, { "epoch": 0.16669847935356621, "grad_norm": 0.30668532848358154, "learning_rate": 1.9667082652050182e-05, "loss": 0.4895, "step": 7860 }, { "epoch": 0.16671968781149923, "grad_norm": 0.3351266384124756, "learning_rate": 1.9666997311528752e-05, "loss": 0.5618, "step": 7861 }, { "epoch": 0.16674089626943225, "grad_norm": 0.3291281461715698, "learning_rate": 1.966691196025577e-05, "loss": 0.5057, "step": 7862 }, { "epoch": 0.16676210472736527, "grad_norm": 0.3274019658565521, "learning_rate": 1.966682659823133e-05, "loss": 0.4737, "step": 7863 }, { "epoch": 0.1667833131852983, "grad_norm": 0.34515634179115295, "learning_rate": 1.9666741225455534e-05, "loss": 0.5577, "step": 7864 }, { "epoch": 0.1668045216432313, "grad_norm": 0.3436712920665741, "learning_rate": 1.966665584192847e-05, "loss": 0.4594, "step": 7865 }, { "epoch": 0.16682573010116433, "grad_norm": 0.3498069941997528, "learning_rate": 1.9666570447650235e-05, "loss": 0.5306, "step": 7866 }, { "epoch": 0.16684693855909738, "grad_norm": 0.3058972656726837, "learning_rate": 1.9666485042620924e-05, "loss": 0.5526, "step": 7867 }, { "epoch": 0.1668681470170304, "grad_norm": 0.36110448837280273, "learning_rate": 1.9666399626840637e-05, "loss": 0.5709, "step": 7868 }, { "epoch": 0.16688935547496342, "grad_norm": 0.4116155803203583, "learning_rate": 1.966631420030946e-05, "loss": 0.5114, "step": 7869 }, { "epoch": 0.16691056393289644, "grad_norm": 0.3142271041870117, "learning_rate": 1.9666228763027494e-05, "loss": 0.5684, "step": 7870 }, { "epoch": 0.16693177239082946, "grad_norm": 0.36521559953689575, "learning_rate": 1.966614331499483e-05, "loss": 0.6306, "step": 7871 }, { "epoch": 0.16695298084876248, "grad_norm": 0.3206324875354767, "learning_rate": 1.9666057856211566e-05, "loss": 0.495, "step": 7872 }, { "epoch": 0.1669741893066955, "grad_norm": 0.3505188822746277, "learning_rate": 1.9665972386677796e-05, "loss": 0.506, "step": 7873 }, { "epoch": 0.16699539776462854, "grad_norm": 0.36557185649871826, "learning_rate": 1.9665886906393615e-05, "loss": 0.4887, "step": 7874 }, { "epoch": 0.16701660622256156, "grad_norm": 0.3669770658016205, "learning_rate": 1.966580141535912e-05, "loss": 0.4826, "step": 7875 }, { "epoch": 0.16703781468049458, "grad_norm": 0.3068099617958069, "learning_rate": 1.9665715913574407e-05, "loss": 0.5418, "step": 7876 }, { "epoch": 0.1670590231384276, "grad_norm": 0.3461228609085083, "learning_rate": 1.9665630401039566e-05, "loss": 0.5245, "step": 7877 }, { "epoch": 0.16708023159636062, "grad_norm": 0.7621294856071472, "learning_rate": 1.9665544877754694e-05, "loss": 0.5605, "step": 7878 }, { "epoch": 0.16710144005429364, "grad_norm": 0.3312544524669647, "learning_rate": 1.966545934371989e-05, "loss": 0.5687, "step": 7879 }, { "epoch": 0.16712264851222666, "grad_norm": 0.30980774760246277, "learning_rate": 1.9665373798935244e-05, "loss": 0.5755, "step": 7880 }, { "epoch": 0.1671438569701597, "grad_norm": 0.345680832862854, "learning_rate": 1.9665288243400853e-05, "loss": 0.526, "step": 7881 }, { "epoch": 0.16716506542809273, "grad_norm": 0.4056127071380615, "learning_rate": 1.966520267711681e-05, "loss": 0.5958, "step": 7882 }, { "epoch": 0.16718627388602575, "grad_norm": 0.3467404246330261, "learning_rate": 1.9665117100083217e-05, "loss": 0.5378, "step": 7883 }, { "epoch": 0.16720748234395877, "grad_norm": 0.37417247891426086, "learning_rate": 1.9665031512300167e-05, "loss": 0.5299, "step": 7884 }, { "epoch": 0.1672286908018918, "grad_norm": 0.33955857157707214, "learning_rate": 1.9664945913767744e-05, "loss": 0.5755, "step": 7885 }, { "epoch": 0.1672498992598248, "grad_norm": 0.33340102434158325, "learning_rate": 1.966486030448606e-05, "loss": 0.5716, "step": 7886 }, { "epoch": 0.16727110771775786, "grad_norm": 0.33637353777885437, "learning_rate": 1.96647746844552e-05, "loss": 0.5655, "step": 7887 }, { "epoch": 0.16729231617569087, "grad_norm": 0.3024848401546478, "learning_rate": 1.966468905367526e-05, "loss": 0.4617, "step": 7888 }, { "epoch": 0.1673135246336239, "grad_norm": 0.3259713053703308, "learning_rate": 1.9664603412146337e-05, "loss": 0.4954, "step": 7889 }, { "epoch": 0.16733473309155691, "grad_norm": 0.4231249988079071, "learning_rate": 1.9664517759868528e-05, "loss": 0.5107, "step": 7890 }, { "epoch": 0.16735594154948993, "grad_norm": 0.33391159772872925, "learning_rate": 1.9664432096841922e-05, "loss": 0.5447, "step": 7891 }, { "epoch": 0.16737715000742295, "grad_norm": 0.2975420355796814, "learning_rate": 1.966434642306662e-05, "loss": 0.4507, "step": 7892 }, { "epoch": 0.16739835846535597, "grad_norm": 0.3640885651111603, "learning_rate": 1.9664260738542722e-05, "loss": 0.5955, "step": 7893 }, { "epoch": 0.16741956692328902, "grad_norm": 0.3787681758403778, "learning_rate": 1.966417504327031e-05, "loss": 0.4758, "step": 7894 }, { "epoch": 0.16744077538122204, "grad_norm": 0.2877613604068756, "learning_rate": 1.9664089337249492e-05, "loss": 0.5056, "step": 7895 }, { "epoch": 0.16746198383915506, "grad_norm": 0.3229256272315979, "learning_rate": 1.9664003620480353e-05, "loss": 0.5035, "step": 7896 }, { "epoch": 0.16748319229708808, "grad_norm": 0.36646783351898193, "learning_rate": 1.9663917892962997e-05, "loss": 0.5017, "step": 7897 }, { "epoch": 0.1675044007550211, "grad_norm": 0.35563158988952637, "learning_rate": 1.9663832154697514e-05, "loss": 0.4804, "step": 7898 }, { "epoch": 0.16752560921295412, "grad_norm": 0.360807865858078, "learning_rate": 1.9663746405684e-05, "loss": 0.5529, "step": 7899 }, { "epoch": 0.16754681767088714, "grad_norm": 0.3472036123275757, "learning_rate": 1.966366064592255e-05, "loss": 0.4515, "step": 7900 }, { "epoch": 0.16756802612882019, "grad_norm": 0.3380372226238251, "learning_rate": 1.9663574875413265e-05, "loss": 0.5669, "step": 7901 }, { "epoch": 0.1675892345867532, "grad_norm": 0.3771321177482605, "learning_rate": 1.9663489094156232e-05, "loss": 0.6078, "step": 7902 }, { "epoch": 0.16761044304468622, "grad_norm": 0.37620019912719727, "learning_rate": 1.966340330215155e-05, "loss": 0.5098, "step": 7903 }, { "epoch": 0.16763165150261924, "grad_norm": 0.3153112232685089, "learning_rate": 1.9663317499399318e-05, "loss": 0.5627, "step": 7904 }, { "epoch": 0.16765285996055226, "grad_norm": 0.34155136346817017, "learning_rate": 1.9663231685899627e-05, "loss": 0.5642, "step": 7905 }, { "epoch": 0.16767406841848528, "grad_norm": 0.5596669316291809, "learning_rate": 1.966314586165257e-05, "loss": 0.6124, "step": 7906 }, { "epoch": 0.1676952768764183, "grad_norm": 0.34521228075027466, "learning_rate": 1.9663060026658253e-05, "loss": 0.5534, "step": 7907 }, { "epoch": 0.16771648533435135, "grad_norm": 0.3285190761089325, "learning_rate": 1.966297418091676e-05, "loss": 0.5141, "step": 7908 }, { "epoch": 0.16773769379228437, "grad_norm": 0.3877219557762146, "learning_rate": 1.9662888324428193e-05, "loss": 0.5015, "step": 7909 }, { "epoch": 0.1677589022502174, "grad_norm": 0.35547366738319397, "learning_rate": 1.9662802457192643e-05, "loss": 0.546, "step": 7910 }, { "epoch": 0.1677801107081504, "grad_norm": 0.3152852952480316, "learning_rate": 1.966271657921021e-05, "loss": 0.5468, "step": 7911 }, { "epoch": 0.16780131916608343, "grad_norm": 0.3504212498664856, "learning_rate": 1.9662630690480987e-05, "loss": 0.5214, "step": 7912 }, { "epoch": 0.16782252762401645, "grad_norm": 0.35766729712486267, "learning_rate": 1.9662544791005073e-05, "loss": 0.58, "step": 7913 }, { "epoch": 0.16784373608194947, "grad_norm": 0.32651275396347046, "learning_rate": 1.9662458880782557e-05, "loss": 0.5442, "step": 7914 }, { "epoch": 0.16786494453988252, "grad_norm": 0.3228720426559448, "learning_rate": 1.9662372959813538e-05, "loss": 0.5169, "step": 7915 }, { "epoch": 0.16788615299781554, "grad_norm": 0.38524433970451355, "learning_rate": 1.9662287028098113e-05, "loss": 0.5067, "step": 7916 }, { "epoch": 0.16790736145574855, "grad_norm": 0.30730482935905457, "learning_rate": 1.9662201085636375e-05, "loss": 0.5472, "step": 7917 }, { "epoch": 0.16792856991368157, "grad_norm": 0.3485843539237976, "learning_rate": 1.9662115132428425e-05, "loss": 0.4577, "step": 7918 }, { "epoch": 0.1679497783716146, "grad_norm": 0.3200901448726654, "learning_rate": 1.966202916847435e-05, "loss": 0.513, "step": 7919 }, { "epoch": 0.1679709868295476, "grad_norm": 0.3327781558036804, "learning_rate": 1.966194319377425e-05, "loss": 0.5334, "step": 7920 }, { "epoch": 0.16799219528748063, "grad_norm": 0.3112074136734009, "learning_rate": 1.9661857208328222e-05, "loss": 0.4454, "step": 7921 }, { "epoch": 0.16801340374541368, "grad_norm": 0.3208441138267517, "learning_rate": 1.9661771212136362e-05, "loss": 0.5296, "step": 7922 }, { "epoch": 0.1680346122033467, "grad_norm": 0.3574432134628296, "learning_rate": 1.966168520519876e-05, "loss": 0.5572, "step": 7923 }, { "epoch": 0.16805582066127972, "grad_norm": 0.3287939131259918, "learning_rate": 1.9661599187515523e-05, "loss": 0.5707, "step": 7924 }, { "epoch": 0.16807702911921274, "grad_norm": 0.3508046269416809, "learning_rate": 1.966151315908673e-05, "loss": 0.5569, "step": 7925 }, { "epoch": 0.16809823757714576, "grad_norm": 0.3598640561103821, "learning_rate": 1.9661427119912494e-05, "loss": 0.5882, "step": 7926 }, { "epoch": 0.16811944603507878, "grad_norm": 0.3280905783176422, "learning_rate": 1.9661341069992897e-05, "loss": 0.527, "step": 7927 }, { "epoch": 0.16814065449301183, "grad_norm": 0.3539598882198334, "learning_rate": 1.9661255009328043e-05, "loss": 0.5676, "step": 7928 }, { "epoch": 0.16816186295094485, "grad_norm": 0.32871609926223755, "learning_rate": 1.9661168937918024e-05, "loss": 0.5931, "step": 7929 }, { "epoch": 0.16818307140887787, "grad_norm": 0.33690595626831055, "learning_rate": 1.9661082855762938e-05, "loss": 0.5773, "step": 7930 }, { "epoch": 0.16820427986681089, "grad_norm": 0.3530431389808655, "learning_rate": 1.966099676286288e-05, "loss": 0.4815, "step": 7931 }, { "epoch": 0.1682254883247439, "grad_norm": 0.32689163088798523, "learning_rate": 1.9660910659217944e-05, "loss": 0.5994, "step": 7932 }, { "epoch": 0.16824669678267692, "grad_norm": 0.3204191327095032, "learning_rate": 1.9660824544828226e-05, "loss": 0.4993, "step": 7933 }, { "epoch": 0.16826790524060994, "grad_norm": 0.35125845670700073, "learning_rate": 1.9660738419693824e-05, "loss": 0.5708, "step": 7934 }, { "epoch": 0.168289113698543, "grad_norm": 0.3397361934185028, "learning_rate": 1.9660652283814837e-05, "loss": 0.5491, "step": 7935 }, { "epoch": 0.168310322156476, "grad_norm": 0.37140825390815735, "learning_rate": 1.966056613719135e-05, "loss": 0.5343, "step": 7936 }, { "epoch": 0.16833153061440903, "grad_norm": 0.36754247546195984, "learning_rate": 1.9660479979823466e-05, "loss": 0.6052, "step": 7937 }, { "epoch": 0.16835273907234205, "grad_norm": 0.33903440833091736, "learning_rate": 1.9660393811711284e-05, "loss": 0.5438, "step": 7938 }, { "epoch": 0.16837394753027507, "grad_norm": 0.3538813591003418, "learning_rate": 1.9660307632854894e-05, "loss": 0.5331, "step": 7939 }, { "epoch": 0.1683951559882081, "grad_norm": 0.30743587017059326, "learning_rate": 1.9660221443254394e-05, "loss": 0.4742, "step": 7940 }, { "epoch": 0.1684163644461411, "grad_norm": 0.3443131744861603, "learning_rate": 1.966013524290988e-05, "loss": 0.5678, "step": 7941 }, { "epoch": 0.16843757290407416, "grad_norm": 0.334526389837265, "learning_rate": 1.9660049031821448e-05, "loss": 0.5692, "step": 7942 }, { "epoch": 0.16845878136200718, "grad_norm": 0.33382976055145264, "learning_rate": 1.965996280998919e-05, "loss": 0.5466, "step": 7943 }, { "epoch": 0.1684799898199402, "grad_norm": 0.46485787630081177, "learning_rate": 1.9659876577413208e-05, "loss": 0.4721, "step": 7944 }, { "epoch": 0.16850119827787322, "grad_norm": 0.3749081492424011, "learning_rate": 1.9659790334093593e-05, "loss": 0.5149, "step": 7945 }, { "epoch": 0.16852240673580623, "grad_norm": 0.33451882004737854, "learning_rate": 1.9659704080030446e-05, "loss": 0.5175, "step": 7946 }, { "epoch": 0.16854361519373925, "grad_norm": 0.3272165358066559, "learning_rate": 1.965961781522386e-05, "loss": 0.5093, "step": 7947 }, { "epoch": 0.16856482365167227, "grad_norm": 0.3582291901111603, "learning_rate": 1.965953153967393e-05, "loss": 0.4743, "step": 7948 }, { "epoch": 0.16858603210960532, "grad_norm": 1.5659332275390625, "learning_rate": 1.965944525338075e-05, "loss": 0.4235, "step": 7949 }, { "epoch": 0.16860724056753834, "grad_norm": 0.46749255061149597, "learning_rate": 1.9659358956344425e-05, "loss": 0.5308, "step": 7950 }, { "epoch": 0.16862844902547136, "grad_norm": 0.34001174569129944, "learning_rate": 1.9659272648565037e-05, "loss": 0.5099, "step": 7951 }, { "epoch": 0.16864965748340438, "grad_norm": 0.31489336490631104, "learning_rate": 1.9659186330042693e-05, "loss": 0.484, "step": 7952 }, { "epoch": 0.1686708659413374, "grad_norm": 0.35782694816589355, "learning_rate": 1.965910000077749e-05, "loss": 0.5802, "step": 7953 }, { "epoch": 0.16869207439927042, "grad_norm": 0.3259136378765106, "learning_rate": 1.9659013660769516e-05, "loss": 0.5452, "step": 7954 }, { "epoch": 0.16871328285720344, "grad_norm": 0.3405255675315857, "learning_rate": 1.9658927310018873e-05, "loss": 0.5364, "step": 7955 }, { "epoch": 0.1687344913151365, "grad_norm": 0.5106521844863892, "learning_rate": 1.9658840948525656e-05, "loss": 0.517, "step": 7956 }, { "epoch": 0.1687556997730695, "grad_norm": 0.3292810320854187, "learning_rate": 1.9658754576289955e-05, "loss": 0.4912, "step": 7957 }, { "epoch": 0.16877690823100253, "grad_norm": 0.36342695355415344, "learning_rate": 1.9658668193311876e-05, "loss": 0.4826, "step": 7958 }, { "epoch": 0.16879811668893555, "grad_norm": 0.3329876661300659, "learning_rate": 1.9658581799591506e-05, "loss": 0.5355, "step": 7959 }, { "epoch": 0.16881932514686857, "grad_norm": 0.3623568117618561, "learning_rate": 1.965849539512895e-05, "loss": 0.4572, "step": 7960 }, { "epoch": 0.16884053360480158, "grad_norm": 0.35392066836357117, "learning_rate": 1.9658408979924295e-05, "loss": 0.5906, "step": 7961 }, { "epoch": 0.16886174206273463, "grad_norm": 0.2816896140575409, "learning_rate": 1.965832255397764e-05, "loss": 0.4523, "step": 7962 }, { "epoch": 0.16888295052066765, "grad_norm": 0.33446574211120605, "learning_rate": 1.965823611728909e-05, "loss": 0.4843, "step": 7963 }, { "epoch": 0.16890415897860067, "grad_norm": 0.3155224919319153, "learning_rate": 1.965814966985873e-05, "loss": 0.5627, "step": 7964 }, { "epoch": 0.1689253674365337, "grad_norm": 0.35794854164123535, "learning_rate": 1.965806321168666e-05, "loss": 0.5763, "step": 7965 }, { "epoch": 0.1689465758944667, "grad_norm": 0.3218311369419098, "learning_rate": 1.9657976742772975e-05, "loss": 0.5472, "step": 7966 }, { "epoch": 0.16896778435239973, "grad_norm": 0.39659959077835083, "learning_rate": 1.965789026311777e-05, "loss": 0.5438, "step": 7967 }, { "epoch": 0.16898899281033275, "grad_norm": 0.30083537101745605, "learning_rate": 1.9657803772721146e-05, "loss": 0.47, "step": 7968 }, { "epoch": 0.1690102012682658, "grad_norm": 0.3459186255931854, "learning_rate": 1.9657717271583198e-05, "loss": 0.6954, "step": 7969 }, { "epoch": 0.16903140972619882, "grad_norm": 0.3286084532737732, "learning_rate": 1.9657630759704015e-05, "loss": 0.5813, "step": 7970 }, { "epoch": 0.16905261818413184, "grad_norm": 0.3461834192276001, "learning_rate": 1.9657544237083702e-05, "loss": 0.537, "step": 7971 }, { "epoch": 0.16907382664206486, "grad_norm": 0.33790940046310425, "learning_rate": 1.9657457703722357e-05, "loss": 0.5824, "step": 7972 }, { "epoch": 0.16909503509999788, "grad_norm": 0.3222126066684723, "learning_rate": 1.9657371159620066e-05, "loss": 0.4663, "step": 7973 }, { "epoch": 0.1691162435579309, "grad_norm": 0.33839869499206543, "learning_rate": 1.9657284604776935e-05, "loss": 0.5328, "step": 7974 }, { "epoch": 0.16913745201586391, "grad_norm": 0.3433498442173004, "learning_rate": 1.965719803919305e-05, "loss": 0.5729, "step": 7975 }, { "epoch": 0.16915866047379696, "grad_norm": 0.28495171666145325, "learning_rate": 1.965711146286852e-05, "loss": 0.4128, "step": 7976 }, { "epoch": 0.16917986893172998, "grad_norm": 0.37939634919166565, "learning_rate": 1.9657024875803428e-05, "loss": 0.543, "step": 7977 }, { "epoch": 0.169201077389663, "grad_norm": 0.3213672339916229, "learning_rate": 1.965693827799788e-05, "loss": 0.4296, "step": 7978 }, { "epoch": 0.16922228584759602, "grad_norm": 0.317315936088562, "learning_rate": 1.9656851669451972e-05, "loss": 0.5111, "step": 7979 }, { "epoch": 0.16924349430552904, "grad_norm": 0.3279615640640259, "learning_rate": 1.9656765050165793e-05, "loss": 0.6208, "step": 7980 }, { "epoch": 0.16926470276346206, "grad_norm": 0.29269304871559143, "learning_rate": 1.9656678420139442e-05, "loss": 0.4142, "step": 7981 }, { "epoch": 0.16928591122139508, "grad_norm": 0.32901811599731445, "learning_rate": 1.9656591779373022e-05, "loss": 0.509, "step": 7982 }, { "epoch": 0.16930711967932813, "grad_norm": 0.3780806362628937, "learning_rate": 1.9656505127866624e-05, "loss": 0.5281, "step": 7983 }, { "epoch": 0.16932832813726115, "grad_norm": 0.31843334436416626, "learning_rate": 1.9656418465620343e-05, "loss": 0.5696, "step": 7984 }, { "epoch": 0.16934953659519417, "grad_norm": 0.32641366124153137, "learning_rate": 1.9656331792634277e-05, "loss": 0.4816, "step": 7985 }, { "epoch": 0.1693707450531272, "grad_norm": 0.3228071928024292, "learning_rate": 1.9656245108908525e-05, "loss": 0.4808, "step": 7986 }, { "epoch": 0.1693919535110602, "grad_norm": 0.3290194272994995, "learning_rate": 1.965615841444318e-05, "loss": 0.562, "step": 7987 }, { "epoch": 0.16941316196899323, "grad_norm": 0.2998923063278198, "learning_rate": 1.965607170923834e-05, "loss": 0.5337, "step": 7988 }, { "epoch": 0.16943437042692625, "grad_norm": 0.41583022475242615, "learning_rate": 1.96559849932941e-05, "loss": 0.4657, "step": 7989 }, { "epoch": 0.1694555788848593, "grad_norm": 0.34363144636154175, "learning_rate": 1.965589826661056e-05, "loss": 0.5279, "step": 7990 }, { "epoch": 0.1694767873427923, "grad_norm": 0.37644001841545105, "learning_rate": 1.9655811529187807e-05, "loss": 0.5175, "step": 7991 }, { "epoch": 0.16949799580072533, "grad_norm": 0.3499612510204315, "learning_rate": 1.9655724781025948e-05, "loss": 0.5484, "step": 7992 }, { "epoch": 0.16951920425865835, "grad_norm": 0.42708736658096313, "learning_rate": 1.965563802212508e-05, "loss": 0.5287, "step": 7993 }, { "epoch": 0.16954041271659137, "grad_norm": 0.35587164759635925, "learning_rate": 1.965555125248529e-05, "loss": 0.6349, "step": 7994 }, { "epoch": 0.1695616211745244, "grad_norm": 0.3421044945716858, "learning_rate": 1.965546447210668e-05, "loss": 0.6006, "step": 7995 }, { "epoch": 0.1695828296324574, "grad_norm": 0.30191367864608765, "learning_rate": 1.9655377680989348e-05, "loss": 0.4559, "step": 7996 }, { "epoch": 0.16960403809039046, "grad_norm": 0.5334346294403076, "learning_rate": 1.965529087913339e-05, "loss": 0.5421, "step": 7997 }, { "epoch": 0.16962524654832348, "grad_norm": 0.34415173530578613, "learning_rate": 1.96552040665389e-05, "loss": 0.5384, "step": 7998 }, { "epoch": 0.1696464550062565, "grad_norm": 0.3312399387359619, "learning_rate": 1.9655117243205976e-05, "loss": 0.5384, "step": 7999 }, { "epoch": 0.16966766346418952, "grad_norm": 0.31540438532829285, "learning_rate": 1.9655030409134714e-05, "loss": 0.5676, "step": 8000 }, { "epoch": 0.16968887192212254, "grad_norm": 0.40258803963661194, "learning_rate": 1.965494356432521e-05, "loss": 0.593, "step": 8001 }, { "epoch": 0.16971008038005556, "grad_norm": 0.33363842964172363, "learning_rate": 1.9654856708777563e-05, "loss": 0.6327, "step": 8002 }, { "epoch": 0.1697312888379886, "grad_norm": 0.3226083219051361, "learning_rate": 1.965476984249187e-05, "loss": 0.5421, "step": 8003 }, { "epoch": 0.16975249729592162, "grad_norm": 0.3294890224933624, "learning_rate": 1.9654682965468222e-05, "loss": 0.5676, "step": 8004 }, { "epoch": 0.16977370575385464, "grad_norm": 0.41401877999305725, "learning_rate": 1.9654596077706724e-05, "loss": 0.4955, "step": 8005 }, { "epoch": 0.16979491421178766, "grad_norm": 0.3059181571006775, "learning_rate": 1.9654509179207466e-05, "loss": 0.483, "step": 8006 }, { "epoch": 0.16981612266972068, "grad_norm": 0.42236506938934326, "learning_rate": 1.9654422269970545e-05, "loss": 0.5494, "step": 8007 }, { "epoch": 0.1698373311276537, "grad_norm": 0.3672642111778259, "learning_rate": 1.9654335349996062e-05, "loss": 0.5448, "step": 8008 }, { "epoch": 0.16985853958558672, "grad_norm": 0.3087850511074066, "learning_rate": 1.965424841928411e-05, "loss": 0.5398, "step": 8009 }, { "epoch": 0.16987974804351977, "grad_norm": 0.30624741315841675, "learning_rate": 1.9654161477834788e-05, "loss": 0.5039, "step": 8010 }, { "epoch": 0.1699009565014528, "grad_norm": 0.32917720079421997, "learning_rate": 1.965407452564819e-05, "loss": 0.5687, "step": 8011 }, { "epoch": 0.1699221649593858, "grad_norm": 0.33372944593429565, "learning_rate": 1.9653987562724414e-05, "loss": 0.4682, "step": 8012 }, { "epoch": 0.16994337341731883, "grad_norm": 0.30097484588623047, "learning_rate": 1.9653900589063556e-05, "loss": 0.5269, "step": 8013 }, { "epoch": 0.16996458187525185, "grad_norm": 0.9366502165794373, "learning_rate": 1.965381360466572e-05, "loss": 0.5541, "step": 8014 }, { "epoch": 0.16998579033318487, "grad_norm": 0.34118589758872986, "learning_rate": 1.9653726609530986e-05, "loss": 0.5361, "step": 8015 }, { "epoch": 0.17000699879111789, "grad_norm": 0.39572426676750183, "learning_rate": 1.9653639603659468e-05, "loss": 0.5623, "step": 8016 }, { "epoch": 0.17002820724905093, "grad_norm": 0.3512907922267914, "learning_rate": 1.9653552587051254e-05, "loss": 0.5532, "step": 8017 }, { "epoch": 0.17004941570698395, "grad_norm": 0.41068145632743835, "learning_rate": 1.9653465559706444e-05, "loss": 0.5534, "step": 8018 }, { "epoch": 0.17007062416491697, "grad_norm": 0.34640228748321533, "learning_rate": 1.965337852162513e-05, "loss": 0.4969, "step": 8019 }, { "epoch": 0.17009183262285, "grad_norm": 0.35620078444480896, "learning_rate": 1.9653291472807417e-05, "loss": 0.6099, "step": 8020 }, { "epoch": 0.170113041080783, "grad_norm": 0.3325735628604889, "learning_rate": 1.9653204413253397e-05, "loss": 0.5145, "step": 8021 }, { "epoch": 0.17013424953871603, "grad_norm": 0.33951547741889954, "learning_rate": 1.9653117342963162e-05, "loss": 0.5202, "step": 8022 }, { "epoch": 0.17015545799664905, "grad_norm": 0.3114028871059418, "learning_rate": 1.9653030261936816e-05, "loss": 0.5304, "step": 8023 }, { "epoch": 0.1701766664545821, "grad_norm": 0.3977419137954712, "learning_rate": 1.9652943170174456e-05, "loss": 0.5702, "step": 8024 }, { "epoch": 0.17019787491251512, "grad_norm": 0.3634893596172333, "learning_rate": 1.9652856067676173e-05, "loss": 0.4812, "step": 8025 }, { "epoch": 0.17021908337044814, "grad_norm": 0.3099672198295593, "learning_rate": 1.965276895444207e-05, "loss": 0.4808, "step": 8026 }, { "epoch": 0.17024029182838116, "grad_norm": 0.31606125831604004, "learning_rate": 1.965268183047224e-05, "loss": 0.5713, "step": 8027 }, { "epoch": 0.17026150028631418, "grad_norm": 0.368155837059021, "learning_rate": 1.9652594695766784e-05, "loss": 0.6153, "step": 8028 }, { "epoch": 0.1702827087442472, "grad_norm": 0.31589609384536743, "learning_rate": 1.965250755032579e-05, "loss": 0.658, "step": 8029 }, { "epoch": 0.17030391720218022, "grad_norm": 0.32645174860954285, "learning_rate": 1.9652420394149363e-05, "loss": 0.5528, "step": 8030 }, { "epoch": 0.17032512566011326, "grad_norm": 0.3356727063655853, "learning_rate": 1.96523332272376e-05, "loss": 0.6366, "step": 8031 }, { "epoch": 0.17034633411804628, "grad_norm": 0.3572651147842407, "learning_rate": 1.9652246049590595e-05, "loss": 0.4952, "step": 8032 }, { "epoch": 0.1703675425759793, "grad_norm": 0.6211830973625183, "learning_rate": 1.9652158861208447e-05, "loss": 0.6634, "step": 8033 }, { "epoch": 0.17038875103391232, "grad_norm": 0.3024573028087616, "learning_rate": 1.965207166209125e-05, "loss": 0.4557, "step": 8034 }, { "epoch": 0.17040995949184534, "grad_norm": 0.2836581766605377, "learning_rate": 1.9651984452239104e-05, "loss": 0.4706, "step": 8035 }, { "epoch": 0.17043116794977836, "grad_norm": 0.34388571977615356, "learning_rate": 1.9651897231652102e-05, "loss": 0.5843, "step": 8036 }, { "epoch": 0.1704523764077114, "grad_norm": 0.3558659851551056, "learning_rate": 1.9651810000330347e-05, "loss": 0.5637, "step": 8037 }, { "epoch": 0.17047358486564443, "grad_norm": 0.32749927043914795, "learning_rate": 1.965172275827393e-05, "loss": 0.4052, "step": 8038 }, { "epoch": 0.17049479332357745, "grad_norm": 0.3341008722782135, "learning_rate": 1.9651635505482957e-05, "loss": 0.579, "step": 8039 }, { "epoch": 0.17051600178151047, "grad_norm": 0.48030033707618713, "learning_rate": 1.965154824195751e-05, "loss": 0.54, "step": 8040 }, { "epoch": 0.1705372102394435, "grad_norm": 0.3219803273677826, "learning_rate": 1.96514609676977e-05, "loss": 0.5744, "step": 8041 }, { "epoch": 0.1705584186973765, "grad_norm": 0.299111932516098, "learning_rate": 1.9651373682703618e-05, "loss": 0.4942, "step": 8042 }, { "epoch": 0.17057962715530953, "grad_norm": 0.30895742774009705, "learning_rate": 1.9651286386975365e-05, "loss": 0.5024, "step": 8043 }, { "epoch": 0.17060083561324257, "grad_norm": 0.4209241271018982, "learning_rate": 1.965119908051303e-05, "loss": 0.5294, "step": 8044 }, { "epoch": 0.1706220440711756, "grad_norm": 0.31595849990844727, "learning_rate": 1.9651111763316716e-05, "loss": 0.4905, "step": 8045 }, { "epoch": 0.1706432525291086, "grad_norm": 0.38295769691467285, "learning_rate": 1.965102443538652e-05, "loss": 0.5199, "step": 8046 }, { "epoch": 0.17066446098704163, "grad_norm": 0.3434109389781952, "learning_rate": 1.965093709672254e-05, "loss": 0.4778, "step": 8047 }, { "epoch": 0.17068566944497465, "grad_norm": 0.3410186767578125, "learning_rate": 1.9650849747324873e-05, "loss": 0.6125, "step": 8048 }, { "epoch": 0.17070687790290767, "grad_norm": 0.35809874534606934, "learning_rate": 1.965076238719361e-05, "loss": 0.5791, "step": 8049 }, { "epoch": 0.1707280863608407, "grad_norm": 0.3697841763496399, "learning_rate": 1.9650675016328854e-05, "loss": 0.6277, "step": 8050 }, { "epoch": 0.17074929481877374, "grad_norm": 0.3337717354297638, "learning_rate": 1.9650587634730704e-05, "loss": 0.4862, "step": 8051 }, { "epoch": 0.17077050327670676, "grad_norm": 0.32279086112976074, "learning_rate": 1.9650500242399255e-05, "loss": 0.4963, "step": 8052 }, { "epoch": 0.17079171173463978, "grad_norm": 0.3152889013290405, "learning_rate": 1.9650412839334597e-05, "loss": 0.4827, "step": 8053 }, { "epoch": 0.1708129201925728, "grad_norm": 0.38291454315185547, "learning_rate": 1.9650325425536843e-05, "loss": 0.4549, "step": 8054 }, { "epoch": 0.17083412865050582, "grad_norm": 0.33441826701164246, "learning_rate": 1.9650238001006073e-05, "loss": 0.5844, "step": 8055 }, { "epoch": 0.17085533710843884, "grad_norm": 0.34595122933387756, "learning_rate": 1.9650150565742393e-05, "loss": 0.5631, "step": 8056 }, { "epoch": 0.17087654556637186, "grad_norm": 0.3115863800048828, "learning_rate": 1.9650063119745904e-05, "loss": 0.5721, "step": 8057 }, { "epoch": 0.1708977540243049, "grad_norm": 0.3346084654331207, "learning_rate": 1.9649975663016694e-05, "loss": 0.5432, "step": 8058 }, { "epoch": 0.17091896248223792, "grad_norm": 0.3448732793331146, "learning_rate": 1.964988819555487e-05, "loss": 0.4589, "step": 8059 }, { "epoch": 0.17094017094017094, "grad_norm": 0.3147747218608856, "learning_rate": 1.964980071736052e-05, "loss": 0.5053, "step": 8060 }, { "epoch": 0.17096137939810396, "grad_norm": 0.3339157700538635, "learning_rate": 1.9649713228433745e-05, "loss": 0.5871, "step": 8061 }, { "epoch": 0.17098258785603698, "grad_norm": 0.3828202188014984, "learning_rate": 1.9649625728774646e-05, "loss": 0.4918, "step": 8062 }, { "epoch": 0.17100379631397, "grad_norm": 0.4093123972415924, "learning_rate": 1.9649538218383315e-05, "loss": 0.5755, "step": 8063 }, { "epoch": 0.17102500477190302, "grad_norm": 0.31306901574134827, "learning_rate": 1.9649450697259852e-05, "loss": 0.5527, "step": 8064 }, { "epoch": 0.17104621322983607, "grad_norm": 0.334306538105011, "learning_rate": 1.9649363165404353e-05, "loss": 0.495, "step": 8065 }, { "epoch": 0.1710674216877691, "grad_norm": 0.42612746357917786, "learning_rate": 1.9649275622816916e-05, "loss": 0.5226, "step": 8066 }, { "epoch": 0.1710886301457021, "grad_norm": 0.4660470485687256, "learning_rate": 1.964918806949764e-05, "loss": 0.5562, "step": 8067 }, { "epoch": 0.17110983860363513, "grad_norm": 0.33472204208374023, "learning_rate": 1.964910050544662e-05, "loss": 0.4959, "step": 8068 }, { "epoch": 0.17113104706156815, "grad_norm": 0.3179815411567688, "learning_rate": 1.9649012930663956e-05, "loss": 0.4995, "step": 8069 }, { "epoch": 0.17115225551950117, "grad_norm": 0.3220504820346832, "learning_rate": 1.964892534514974e-05, "loss": 0.4756, "step": 8070 }, { "epoch": 0.1711734639774342, "grad_norm": 0.338979572057724, "learning_rate": 1.9648837748904074e-05, "loss": 0.4443, "step": 8071 }, { "epoch": 0.17119467243536723, "grad_norm": 0.3378220200538635, "learning_rate": 1.9648750141927056e-05, "loss": 0.5467, "step": 8072 }, { "epoch": 0.17121588089330025, "grad_norm": 0.305645227432251, "learning_rate": 1.9648662524218784e-05, "loss": 0.5168, "step": 8073 }, { "epoch": 0.17123708935123327, "grad_norm": 0.3398289680480957, "learning_rate": 1.964857489577935e-05, "loss": 0.4899, "step": 8074 }, { "epoch": 0.1712582978091663, "grad_norm": 0.29786866903305054, "learning_rate": 1.9648487256608853e-05, "loss": 0.5262, "step": 8075 }, { "epoch": 0.1712795062670993, "grad_norm": 0.3066675364971161, "learning_rate": 1.9648399606707396e-05, "loss": 0.5242, "step": 8076 }, { "epoch": 0.17130071472503233, "grad_norm": 0.336257666349411, "learning_rate": 1.9648311946075073e-05, "loss": 0.5266, "step": 8077 }, { "epoch": 0.17132192318296538, "grad_norm": 0.331073522567749, "learning_rate": 1.9648224274711977e-05, "loss": 0.5034, "step": 8078 }, { "epoch": 0.1713431316408984, "grad_norm": 0.34381866455078125, "learning_rate": 1.9648136592618213e-05, "loss": 0.6109, "step": 8079 }, { "epoch": 0.17136434009883142, "grad_norm": 0.5363833904266357, "learning_rate": 1.9648048899793878e-05, "loss": 0.5847, "step": 8080 }, { "epoch": 0.17138554855676444, "grad_norm": 0.3527214825153351, "learning_rate": 1.9647961196239064e-05, "loss": 0.4771, "step": 8081 }, { "epoch": 0.17140675701469746, "grad_norm": 0.32044994831085205, "learning_rate": 1.9647873481953868e-05, "loss": 0.5001, "step": 8082 }, { "epoch": 0.17142796547263048, "grad_norm": 0.32899877429008484, "learning_rate": 1.9647785756938394e-05, "loss": 0.5053, "step": 8083 }, { "epoch": 0.1714491739305635, "grad_norm": 0.37115857005119324, "learning_rate": 1.9647698021192737e-05, "loss": 0.5238, "step": 8084 }, { "epoch": 0.17147038238849655, "grad_norm": 0.7636674046516418, "learning_rate": 1.9647610274716995e-05, "loss": 0.5094, "step": 8085 }, { "epoch": 0.17149159084642956, "grad_norm": 0.3260020613670349, "learning_rate": 1.964752251751126e-05, "loss": 0.5236, "step": 8086 }, { "epoch": 0.17151279930436258, "grad_norm": 0.3240368664264679, "learning_rate": 1.964743474957564e-05, "loss": 0.4922, "step": 8087 }, { "epoch": 0.1715340077622956, "grad_norm": 0.3293939232826233, "learning_rate": 1.9647346970910225e-05, "loss": 0.5069, "step": 8088 }, { "epoch": 0.17155521622022862, "grad_norm": 0.3228308856487274, "learning_rate": 1.9647259181515114e-05, "loss": 0.5343, "step": 8089 }, { "epoch": 0.17157642467816164, "grad_norm": 0.31192752718925476, "learning_rate": 1.9647171381390403e-05, "loss": 0.5499, "step": 8090 }, { "epoch": 0.17159763313609466, "grad_norm": 0.31279635429382324, "learning_rate": 1.9647083570536193e-05, "loss": 0.549, "step": 8091 }, { "epoch": 0.1716188415940277, "grad_norm": 0.3428346514701843, "learning_rate": 1.9646995748952583e-05, "loss": 0.4935, "step": 8092 }, { "epoch": 0.17164005005196073, "grad_norm": 0.3265431821346283, "learning_rate": 1.9646907916639664e-05, "loss": 0.5137, "step": 8093 }, { "epoch": 0.17166125850989375, "grad_norm": 0.3938644230365753, "learning_rate": 1.9646820073597542e-05, "loss": 0.6135, "step": 8094 }, { "epoch": 0.17168246696782677, "grad_norm": 0.31749770045280457, "learning_rate": 1.964673221982631e-05, "loss": 0.4947, "step": 8095 }, { "epoch": 0.1717036754257598, "grad_norm": 0.30425935983657837, "learning_rate": 1.9646644355326065e-05, "loss": 0.509, "step": 8096 }, { "epoch": 0.1717248838836928, "grad_norm": 0.30286335945129395, "learning_rate": 1.9646556480096905e-05, "loss": 0.5183, "step": 8097 }, { "epoch": 0.17174609234162583, "grad_norm": 0.3216659724712372, "learning_rate": 1.964646859413893e-05, "loss": 0.5423, "step": 8098 }, { "epoch": 0.17176730079955888, "grad_norm": 0.3415985107421875, "learning_rate": 1.9646380697452235e-05, "loss": 0.5382, "step": 8099 }, { "epoch": 0.1717885092574919, "grad_norm": 0.3495566248893738, "learning_rate": 1.964629279003692e-05, "loss": 0.5798, "step": 8100 }, { "epoch": 0.17180971771542491, "grad_norm": 0.4560559391975403, "learning_rate": 1.9646204871893082e-05, "loss": 0.4776, "step": 8101 }, { "epoch": 0.17183092617335793, "grad_norm": 0.3232910931110382, "learning_rate": 1.964611694302082e-05, "loss": 0.5211, "step": 8102 }, { "epoch": 0.17185213463129095, "grad_norm": 0.42233383655548096, "learning_rate": 1.964602900342023e-05, "loss": 0.4736, "step": 8103 }, { "epoch": 0.17187334308922397, "grad_norm": 0.32516539096832275, "learning_rate": 1.9645941053091412e-05, "loss": 0.5107, "step": 8104 }, { "epoch": 0.171894551547157, "grad_norm": 0.3339052200317383, "learning_rate": 1.9645853092034456e-05, "loss": 0.5592, "step": 8105 }, { "epoch": 0.17191576000509004, "grad_norm": 0.44511938095092773, "learning_rate": 1.964576512024947e-05, "loss": 0.5968, "step": 8106 }, { "epoch": 0.17193696846302306, "grad_norm": 0.3480050563812256, "learning_rate": 1.9645677137736547e-05, "loss": 0.5116, "step": 8107 }, { "epoch": 0.17195817692095608, "grad_norm": 0.37448564171791077, "learning_rate": 1.9645589144495788e-05, "loss": 0.5235, "step": 8108 }, { "epoch": 0.1719793853788891, "grad_norm": 0.31926682591438293, "learning_rate": 1.964550114052729e-05, "loss": 0.5088, "step": 8109 }, { "epoch": 0.17200059383682212, "grad_norm": 0.30769312381744385, "learning_rate": 1.9645413125831144e-05, "loss": 0.4598, "step": 8110 }, { "epoch": 0.17202180229475514, "grad_norm": 0.32630661129951477, "learning_rate": 1.9645325100407456e-05, "loss": 0.4228, "step": 8111 }, { "epoch": 0.17204301075268819, "grad_norm": 0.3167860805988312, "learning_rate": 1.9645237064256323e-05, "loss": 0.4544, "step": 8112 }, { "epoch": 0.1720642192106212, "grad_norm": 0.3433659076690674, "learning_rate": 1.9645149017377836e-05, "loss": 0.5477, "step": 8113 }, { "epoch": 0.17208542766855423, "grad_norm": 0.33829301595687866, "learning_rate": 1.96450609597721e-05, "loss": 0.5328, "step": 8114 }, { "epoch": 0.17210663612648724, "grad_norm": 0.44439542293548584, "learning_rate": 1.9644972891439215e-05, "loss": 0.5509, "step": 8115 }, { "epoch": 0.17212784458442026, "grad_norm": 0.339639812707901, "learning_rate": 1.9644884812379273e-05, "loss": 0.5039, "step": 8116 }, { "epoch": 0.17214905304235328, "grad_norm": 0.3212050795555115, "learning_rate": 1.9644796722592373e-05, "loss": 0.539, "step": 8117 }, { "epoch": 0.1721702615002863, "grad_norm": 0.3558911681175232, "learning_rate": 1.9644708622078612e-05, "loss": 0.596, "step": 8118 }, { "epoch": 0.17219146995821935, "grad_norm": 0.29585564136505127, "learning_rate": 1.964462051083809e-05, "loss": 0.5342, "step": 8119 }, { "epoch": 0.17221267841615237, "grad_norm": 0.33740895986557007, "learning_rate": 1.9644532388870907e-05, "loss": 0.5039, "step": 8120 }, { "epoch": 0.1722338868740854, "grad_norm": 0.36962270736694336, "learning_rate": 1.9644444256177158e-05, "loss": 0.5069, "step": 8121 }, { "epoch": 0.1722550953320184, "grad_norm": 0.32642433047294617, "learning_rate": 1.9644356112756942e-05, "loss": 0.5371, "step": 8122 }, { "epoch": 0.17227630378995143, "grad_norm": 0.3099856674671173, "learning_rate": 1.964426795861036e-05, "loss": 0.4768, "step": 8123 }, { "epoch": 0.17229751224788445, "grad_norm": 0.3767665922641754, "learning_rate": 1.9644179793737502e-05, "loss": 0.5275, "step": 8124 }, { "epoch": 0.17231872070581747, "grad_norm": 0.33221960067749023, "learning_rate": 1.964409161813847e-05, "loss": 0.5164, "step": 8125 }, { "epoch": 0.17233992916375052, "grad_norm": 0.29544663429260254, "learning_rate": 1.964400343181337e-05, "loss": 0.4775, "step": 8126 }, { "epoch": 0.17236113762168354, "grad_norm": 0.363265722990036, "learning_rate": 1.964391523476229e-05, "loss": 0.4933, "step": 8127 }, { "epoch": 0.17238234607961656, "grad_norm": 0.33066612482070923, "learning_rate": 1.9643827026985328e-05, "loss": 0.527, "step": 8128 }, { "epoch": 0.17240355453754957, "grad_norm": 0.3407335579395294, "learning_rate": 1.964373880848259e-05, "loss": 0.5631, "step": 8129 }, { "epoch": 0.1724247629954826, "grad_norm": 0.34560275077819824, "learning_rate": 1.9643650579254167e-05, "loss": 0.5609, "step": 8130 }, { "epoch": 0.17244597145341561, "grad_norm": 0.4021657109260559, "learning_rate": 1.964356233930016e-05, "loss": 0.4132, "step": 8131 }, { "epoch": 0.17246717991134863, "grad_norm": 0.3353722393512726, "learning_rate": 1.9643474088620668e-05, "loss": 0.4893, "step": 8132 }, { "epoch": 0.17248838836928168, "grad_norm": 0.49997565150260925, "learning_rate": 1.9643385827215786e-05, "loss": 0.6334, "step": 8133 }, { "epoch": 0.1725095968272147, "grad_norm": 0.3064442276954651, "learning_rate": 1.9643297555085614e-05, "loss": 0.432, "step": 8134 }, { "epoch": 0.17253080528514772, "grad_norm": 0.2990984320640564, "learning_rate": 1.964320927223025e-05, "loss": 0.3802, "step": 8135 }, { "epoch": 0.17255201374308074, "grad_norm": 0.3470008969306946, "learning_rate": 1.9643120978649795e-05, "loss": 0.5492, "step": 8136 }, { "epoch": 0.17257322220101376, "grad_norm": 0.3496241867542267, "learning_rate": 1.9643032674344345e-05, "loss": 0.524, "step": 8137 }, { "epoch": 0.17259443065894678, "grad_norm": 0.38480937480926514, "learning_rate": 1.9642944359313994e-05, "loss": 0.5628, "step": 8138 }, { "epoch": 0.1726156391168798, "grad_norm": 0.33163681626319885, "learning_rate": 1.9642856033558847e-05, "loss": 0.4152, "step": 8139 }, { "epoch": 0.17263684757481285, "grad_norm": 0.30735236406326294, "learning_rate": 1.9642767697079e-05, "loss": 0.4845, "step": 8140 }, { "epoch": 0.17265805603274587, "grad_norm": 0.3055994510650635, "learning_rate": 1.964267934987455e-05, "loss": 0.4762, "step": 8141 }, { "epoch": 0.17267926449067889, "grad_norm": 0.3384915888309479, "learning_rate": 1.964259099194559e-05, "loss": 0.5601, "step": 8142 }, { "epoch": 0.1727004729486119, "grad_norm": 0.3341870903968811, "learning_rate": 1.964250262329223e-05, "loss": 0.4717, "step": 8143 }, { "epoch": 0.17272168140654492, "grad_norm": 0.3131243884563446, "learning_rate": 1.9642414243914557e-05, "loss": 0.478, "step": 8144 }, { "epoch": 0.17274288986447794, "grad_norm": 0.38154685497283936, "learning_rate": 1.9642325853812682e-05, "loss": 0.5051, "step": 8145 }, { "epoch": 0.17276409832241096, "grad_norm": 0.325064092874527, "learning_rate": 1.9642237452986692e-05, "loss": 0.5856, "step": 8146 }, { "epoch": 0.172785306780344, "grad_norm": 0.32365682721138, "learning_rate": 1.9642149041436693e-05, "loss": 0.4685, "step": 8147 }, { "epoch": 0.17280651523827703, "grad_norm": 0.3525024354457855, "learning_rate": 1.9642060619162772e-05, "loss": 0.4822, "step": 8148 }, { "epoch": 0.17282772369621005, "grad_norm": 0.3143569231033325, "learning_rate": 1.964197218616504e-05, "loss": 0.5327, "step": 8149 }, { "epoch": 0.17284893215414307, "grad_norm": 0.3346574008464813, "learning_rate": 1.964188374244359e-05, "loss": 0.4932, "step": 8150 }, { "epoch": 0.1728701406120761, "grad_norm": 0.34499263763427734, "learning_rate": 1.9641795287998522e-05, "loss": 0.561, "step": 8151 }, { "epoch": 0.1728913490700091, "grad_norm": 0.34621232748031616, "learning_rate": 1.964170682282993e-05, "loss": 0.4948, "step": 8152 }, { "epoch": 0.17291255752794216, "grad_norm": 0.33515894412994385, "learning_rate": 1.9641618346937917e-05, "loss": 0.4939, "step": 8153 }, { "epoch": 0.17293376598587518, "grad_norm": 0.3561004102230072, "learning_rate": 1.964152986032258e-05, "loss": 0.5427, "step": 8154 }, { "epoch": 0.1729549744438082, "grad_norm": 0.31957659125328064, "learning_rate": 1.9641441362984016e-05, "loss": 0.5809, "step": 8155 }, { "epoch": 0.17297618290174122, "grad_norm": 0.29576802253723145, "learning_rate": 1.9641352854922324e-05, "loss": 0.4003, "step": 8156 }, { "epoch": 0.17299739135967424, "grad_norm": 0.32362350821495056, "learning_rate": 1.9641264336137608e-05, "loss": 0.4482, "step": 8157 }, { "epoch": 0.17301859981760725, "grad_norm": 0.3426903188228607, "learning_rate": 1.9641175806629956e-05, "loss": 0.4696, "step": 8158 }, { "epoch": 0.17303980827554027, "grad_norm": 0.35487890243530273, "learning_rate": 1.9641087266399473e-05, "loss": 0.4856, "step": 8159 }, { "epoch": 0.17306101673347332, "grad_norm": 0.46164608001708984, "learning_rate": 1.964099871544626e-05, "loss": 0.5504, "step": 8160 }, { "epoch": 0.17308222519140634, "grad_norm": 0.32324346899986267, "learning_rate": 1.964091015377041e-05, "loss": 0.5542, "step": 8161 }, { "epoch": 0.17310343364933936, "grad_norm": 0.33176374435424805, "learning_rate": 1.964082158137202e-05, "loss": 0.5388, "step": 8162 }, { "epoch": 0.17312464210727238, "grad_norm": 0.31731387972831726, "learning_rate": 1.96407329982512e-05, "loss": 0.5627, "step": 8163 }, { "epoch": 0.1731458505652054, "grad_norm": 0.35818374156951904, "learning_rate": 1.964064440440803e-05, "loss": 0.5303, "step": 8164 }, { "epoch": 0.17316705902313842, "grad_norm": 0.3201504051685333, "learning_rate": 1.9640555799842627e-05, "loss": 0.4347, "step": 8165 }, { "epoch": 0.17318826748107144, "grad_norm": 0.3255631625652313, "learning_rate": 1.9640467184555078e-05, "loss": 0.5369, "step": 8166 }, { "epoch": 0.1732094759390045, "grad_norm": 0.30758222937583923, "learning_rate": 1.9640378558545488e-05, "loss": 0.5372, "step": 8167 }, { "epoch": 0.1732306843969375, "grad_norm": 0.3451884090900421, "learning_rate": 1.964028992181395e-05, "loss": 0.6366, "step": 8168 }, { "epoch": 0.17325189285487053, "grad_norm": 0.3833731710910797, "learning_rate": 1.9640201274360566e-05, "loss": 0.5615, "step": 8169 }, { "epoch": 0.17327310131280355, "grad_norm": 0.29772692918777466, "learning_rate": 1.9640112616185435e-05, "loss": 0.6093, "step": 8170 }, { "epoch": 0.17329430977073657, "grad_norm": 0.29829442501068115, "learning_rate": 1.9640023947288656e-05, "loss": 0.4406, "step": 8171 }, { "epoch": 0.17331551822866959, "grad_norm": 0.33009108901023865, "learning_rate": 1.9639935267670323e-05, "loss": 0.4948, "step": 8172 }, { "epoch": 0.1733367266866026, "grad_norm": 0.3329661786556244, "learning_rate": 1.963984657733054e-05, "loss": 0.5508, "step": 8173 }, { "epoch": 0.17335793514453565, "grad_norm": 0.34920695424079895, "learning_rate": 1.9639757876269398e-05, "loss": 0.5691, "step": 8174 }, { "epoch": 0.17337914360246867, "grad_norm": 0.2906491458415985, "learning_rate": 1.9639669164487008e-05, "loss": 0.516, "step": 8175 }, { "epoch": 0.1734003520604017, "grad_norm": 0.40256214141845703, "learning_rate": 1.9639580441983458e-05, "loss": 0.5357, "step": 8176 }, { "epoch": 0.1734215605183347, "grad_norm": 0.32979652285575867, "learning_rate": 1.963949170875885e-05, "loss": 0.5441, "step": 8177 }, { "epoch": 0.17344276897626773, "grad_norm": 0.317818284034729, "learning_rate": 1.9639402964813284e-05, "loss": 0.5537, "step": 8178 }, { "epoch": 0.17346397743420075, "grad_norm": 0.32018113136291504, "learning_rate": 1.963931421014686e-05, "loss": 0.507, "step": 8179 }, { "epoch": 0.17348518589213377, "grad_norm": 0.34397631883621216, "learning_rate": 1.9639225444759672e-05, "loss": 0.5459, "step": 8180 }, { "epoch": 0.17350639435006682, "grad_norm": 0.32979992032051086, "learning_rate": 1.963913666865182e-05, "loss": 0.5276, "step": 8181 }, { "epoch": 0.17352760280799984, "grad_norm": 0.29419466853141785, "learning_rate": 1.9639047881823407e-05, "loss": 0.4668, "step": 8182 }, { "epoch": 0.17354881126593286, "grad_norm": 0.32480376958847046, "learning_rate": 1.9638959084274527e-05, "loss": 0.5212, "step": 8183 }, { "epoch": 0.17357001972386588, "grad_norm": 0.2845105230808258, "learning_rate": 1.963887027600528e-05, "loss": 0.5535, "step": 8184 }, { "epoch": 0.1735912281817989, "grad_norm": 0.4311197102069855, "learning_rate": 1.9638781457015767e-05, "loss": 0.5972, "step": 8185 }, { "epoch": 0.17361243663973192, "grad_norm": 0.3300466239452362, "learning_rate": 1.9638692627306086e-05, "loss": 0.5574, "step": 8186 }, { "epoch": 0.17363364509766496, "grad_norm": 0.3388728201389313, "learning_rate": 1.963860378687633e-05, "loss": 0.4919, "step": 8187 }, { "epoch": 0.17365485355559798, "grad_norm": 0.32558730244636536, "learning_rate": 1.9638514935726607e-05, "loss": 0.5676, "step": 8188 }, { "epoch": 0.173676062013531, "grad_norm": 0.2974023222923279, "learning_rate": 1.963842607385701e-05, "loss": 0.4699, "step": 8189 }, { "epoch": 0.17369727047146402, "grad_norm": 0.4094794690608978, "learning_rate": 1.9638337201267638e-05, "loss": 0.5577, "step": 8190 }, { "epoch": 0.17371847892939704, "grad_norm": 0.3145158886909485, "learning_rate": 1.9638248317958593e-05, "loss": 0.5111, "step": 8191 }, { "epoch": 0.17373968738733006, "grad_norm": 0.29589900374412537, "learning_rate": 1.963815942392997e-05, "loss": 0.5014, "step": 8192 }, { "epoch": 0.17376089584526308, "grad_norm": 0.3502073287963867, "learning_rate": 1.9638070519181868e-05, "loss": 0.6282, "step": 8193 }, { "epoch": 0.17378210430319613, "grad_norm": 0.32199403643608093, "learning_rate": 1.963798160371439e-05, "loss": 0.5341, "step": 8194 }, { "epoch": 0.17380331276112915, "grad_norm": 0.3162390887737274, "learning_rate": 1.9637892677527633e-05, "loss": 0.5391, "step": 8195 }, { "epoch": 0.17382452121906217, "grad_norm": 0.3565497100353241, "learning_rate": 1.9637803740621697e-05, "loss": 0.6131, "step": 8196 }, { "epoch": 0.1738457296769952, "grad_norm": 0.34680232405662537, "learning_rate": 1.9637714792996682e-05, "loss": 0.4557, "step": 8197 }, { "epoch": 0.1738669381349282, "grad_norm": 0.33628562092781067, "learning_rate": 1.9637625834652678e-05, "loss": 0.5561, "step": 8198 }, { "epoch": 0.17388814659286123, "grad_norm": 0.5157576203346252, "learning_rate": 1.9637536865589794e-05, "loss": 0.4376, "step": 8199 }, { "epoch": 0.17390935505079425, "grad_norm": 0.32268065214157104, "learning_rate": 1.963744788580812e-05, "loss": 0.5269, "step": 8200 }, { "epoch": 0.1739305635087273, "grad_norm": 0.37646543979644775, "learning_rate": 1.9637358895307765e-05, "loss": 0.5051, "step": 8201 }, { "epoch": 0.1739517719666603, "grad_norm": 0.4664739966392517, "learning_rate": 1.9637269894088824e-05, "loss": 0.555, "step": 8202 }, { "epoch": 0.17397298042459333, "grad_norm": 0.30226200819015503, "learning_rate": 1.963718088215139e-05, "loss": 0.4352, "step": 8203 }, { "epoch": 0.17399418888252635, "grad_norm": 0.307486355304718, "learning_rate": 1.9637091859495574e-05, "loss": 0.5512, "step": 8204 }, { "epoch": 0.17401539734045937, "grad_norm": 0.3497768044471741, "learning_rate": 1.9637002826121463e-05, "loss": 0.5572, "step": 8205 }, { "epoch": 0.1740366057983924, "grad_norm": 0.3154637813568115, "learning_rate": 1.9636913782029163e-05, "loss": 0.5454, "step": 8206 }, { "epoch": 0.1740578142563254, "grad_norm": 0.3363175094127655, "learning_rate": 1.9636824727218772e-05, "loss": 0.5061, "step": 8207 }, { "epoch": 0.17407902271425846, "grad_norm": 0.4160710573196411, "learning_rate": 1.9636735661690385e-05, "loss": 0.532, "step": 8208 }, { "epoch": 0.17410023117219148, "grad_norm": 0.3294384181499481, "learning_rate": 1.9636646585444108e-05, "loss": 0.5565, "step": 8209 }, { "epoch": 0.1741214396301245, "grad_norm": 0.3265491724014282, "learning_rate": 1.9636557498480035e-05, "loss": 0.4931, "step": 8210 }, { "epoch": 0.17414264808805752, "grad_norm": 0.3269492983818054, "learning_rate": 1.9636468400798264e-05, "loss": 0.4469, "step": 8211 }, { "epoch": 0.17416385654599054, "grad_norm": 0.3495899438858032, "learning_rate": 1.96363792923989e-05, "loss": 0.5627, "step": 8212 }, { "epoch": 0.17418506500392356, "grad_norm": 0.31943902373313904, "learning_rate": 1.963629017328204e-05, "loss": 0.5063, "step": 8213 }, { "epoch": 0.17420627346185658, "grad_norm": 0.36721426248550415, "learning_rate": 1.963620104344778e-05, "loss": 0.464, "step": 8214 }, { "epoch": 0.17422748191978962, "grad_norm": 0.3172854781150818, "learning_rate": 1.9636111902896217e-05, "loss": 0.4716, "step": 8215 }, { "epoch": 0.17424869037772264, "grad_norm": 0.31721553206443787, "learning_rate": 1.963602275162746e-05, "loss": 0.5679, "step": 8216 }, { "epoch": 0.17426989883565566, "grad_norm": 0.33673518896102905, "learning_rate": 1.96359335896416e-05, "loss": 0.5233, "step": 8217 }, { "epoch": 0.17429110729358868, "grad_norm": 0.35324183106422424, "learning_rate": 1.9635844416938737e-05, "loss": 0.5149, "step": 8218 }, { "epoch": 0.1743123157515217, "grad_norm": 0.32585233449935913, "learning_rate": 1.963575523351897e-05, "loss": 0.5828, "step": 8219 }, { "epoch": 0.17433352420945472, "grad_norm": 0.33186182379722595, "learning_rate": 1.9635666039382403e-05, "loss": 0.5328, "step": 8220 }, { "epoch": 0.17435473266738774, "grad_norm": 0.35955771803855896, "learning_rate": 1.963557683452913e-05, "loss": 0.5785, "step": 8221 }, { "epoch": 0.1743759411253208, "grad_norm": 0.3941442370414734, "learning_rate": 1.9635487618959253e-05, "loss": 0.5817, "step": 8222 }, { "epoch": 0.1743971495832538, "grad_norm": 0.3108111023902893, "learning_rate": 1.9635398392672873e-05, "loss": 0.5018, "step": 8223 }, { "epoch": 0.17441835804118683, "grad_norm": 0.328611820936203, "learning_rate": 1.9635309155670083e-05, "loss": 0.4736, "step": 8224 }, { "epoch": 0.17443956649911985, "grad_norm": 0.29898902773857117, "learning_rate": 1.9635219907950988e-05, "loss": 0.504, "step": 8225 }, { "epoch": 0.17446077495705287, "grad_norm": 0.3703526556491852, "learning_rate": 1.9635130649515685e-05, "loss": 0.5204, "step": 8226 }, { "epoch": 0.1744819834149859, "grad_norm": 0.3533759117126465, "learning_rate": 1.963504138036427e-05, "loss": 0.4931, "step": 8227 }, { "epoch": 0.17450319187291893, "grad_norm": 0.32274332642555237, "learning_rate": 1.963495210049685e-05, "loss": 0.5463, "step": 8228 }, { "epoch": 0.17452440033085195, "grad_norm": 0.3373994529247284, "learning_rate": 1.9634862809913517e-05, "loss": 0.5483, "step": 8229 }, { "epoch": 0.17454560878878497, "grad_norm": 0.32556861639022827, "learning_rate": 1.9634773508614374e-05, "loss": 0.5301, "step": 8230 }, { "epoch": 0.174566817246718, "grad_norm": 0.35027363896369934, "learning_rate": 1.963468419659952e-05, "loss": 0.498, "step": 8231 }, { "epoch": 0.174588025704651, "grad_norm": 0.37482455372810364, "learning_rate": 1.9634594873869055e-05, "loss": 0.5731, "step": 8232 }, { "epoch": 0.17460923416258403, "grad_norm": 0.37357383966445923, "learning_rate": 1.9634505540423077e-05, "loss": 0.5361, "step": 8233 }, { "epoch": 0.17463044262051705, "grad_norm": 0.36427628993988037, "learning_rate": 1.9634416196261688e-05, "loss": 0.549, "step": 8234 }, { "epoch": 0.1746516510784501, "grad_norm": 0.3321743905544281, "learning_rate": 1.963432684138498e-05, "loss": 0.5274, "step": 8235 }, { "epoch": 0.17467285953638312, "grad_norm": 0.3222603499889374, "learning_rate": 1.9634237475793057e-05, "loss": 0.5661, "step": 8236 }, { "epoch": 0.17469406799431614, "grad_norm": 0.3416273295879364, "learning_rate": 1.9634148099486022e-05, "loss": 0.5273, "step": 8237 }, { "epoch": 0.17471527645224916, "grad_norm": 0.3330436646938324, "learning_rate": 1.9634058712463973e-05, "loss": 0.5418, "step": 8238 }, { "epoch": 0.17473648491018218, "grad_norm": 0.35178571939468384, "learning_rate": 1.9633969314727005e-05, "loss": 0.5351, "step": 8239 }, { "epoch": 0.1747576933681152, "grad_norm": 0.30838605761528015, "learning_rate": 1.963387990627522e-05, "loss": 0.5637, "step": 8240 }, { "epoch": 0.17477890182604822, "grad_norm": 0.3298596441745758, "learning_rate": 1.9633790487108717e-05, "loss": 0.5575, "step": 8241 }, { "epoch": 0.17480011028398126, "grad_norm": 0.34405580163002014, "learning_rate": 1.9633701057227593e-05, "loss": 0.4523, "step": 8242 }, { "epoch": 0.17482131874191428, "grad_norm": 0.40171173214912415, "learning_rate": 1.9633611616631957e-05, "loss": 0.6095, "step": 8243 }, { "epoch": 0.1748425271998473, "grad_norm": 0.35675737261772156, "learning_rate": 1.9633522165321896e-05, "loss": 0.5203, "step": 8244 }, { "epoch": 0.17486373565778032, "grad_norm": 0.3388776481151581, "learning_rate": 1.9633432703297523e-05, "loss": 0.53, "step": 8245 }, { "epoch": 0.17488494411571334, "grad_norm": 0.3388253152370453, "learning_rate": 1.963334323055892e-05, "loss": 0.503, "step": 8246 }, { "epoch": 0.17490615257364636, "grad_norm": 0.34927114844322205, "learning_rate": 1.9633253747106202e-05, "loss": 0.5802, "step": 8247 }, { "epoch": 0.17492736103157938, "grad_norm": 0.3581748902797699, "learning_rate": 1.9633164252939463e-05, "loss": 0.4748, "step": 8248 }, { "epoch": 0.17494856948951243, "grad_norm": 0.3552666902542114, "learning_rate": 1.96330747480588e-05, "loss": 0.5638, "step": 8249 }, { "epoch": 0.17496977794744545, "grad_norm": 0.34331125020980835, "learning_rate": 1.9632985232464315e-05, "loss": 0.4874, "step": 8250 }, { "epoch": 0.17499098640537847, "grad_norm": 0.35634905099868774, "learning_rate": 1.963289570615611e-05, "loss": 0.583, "step": 8251 }, { "epoch": 0.1750121948633115, "grad_norm": 0.3363933861255646, "learning_rate": 1.963280616913428e-05, "loss": 0.5335, "step": 8252 }, { "epoch": 0.1750334033212445, "grad_norm": 0.3408825397491455, "learning_rate": 1.963271662139893e-05, "loss": 0.6282, "step": 8253 }, { "epoch": 0.17505461177917753, "grad_norm": 0.3279193043708801, "learning_rate": 1.9632627062950153e-05, "loss": 0.4627, "step": 8254 }, { "epoch": 0.17507582023711055, "grad_norm": 0.36682984232902527, "learning_rate": 1.963253749378805e-05, "loss": 0.5318, "step": 8255 }, { "epoch": 0.1750970286950436, "grad_norm": 0.31407177448272705, "learning_rate": 1.9632447913912728e-05, "loss": 0.5235, "step": 8256 }, { "epoch": 0.1751182371529766, "grad_norm": 0.3947368264198303, "learning_rate": 1.9632358323324277e-05, "loss": 0.5337, "step": 8257 }, { "epoch": 0.17513944561090963, "grad_norm": 0.33406946063041687, "learning_rate": 1.96322687220228e-05, "loss": 0.4323, "step": 8258 }, { "epoch": 0.17516065406884265, "grad_norm": 0.31138524413108826, "learning_rate": 1.96321791100084e-05, "loss": 0.4114, "step": 8259 }, { "epoch": 0.17518186252677567, "grad_norm": 0.32801178097724915, "learning_rate": 1.9632089487281173e-05, "loss": 0.4623, "step": 8260 }, { "epoch": 0.1752030709847087, "grad_norm": 0.33348461985588074, "learning_rate": 1.9631999853841224e-05, "loss": 0.5257, "step": 8261 }, { "epoch": 0.17522427944264174, "grad_norm": 0.35552430152893066, "learning_rate": 1.9631910209688642e-05, "loss": 0.507, "step": 8262 }, { "epoch": 0.17524548790057476, "grad_norm": 0.39696887135505676, "learning_rate": 1.9631820554823537e-05, "loss": 0.5004, "step": 8263 }, { "epoch": 0.17526669635850778, "grad_norm": 0.313462495803833, "learning_rate": 1.9631730889246002e-05, "loss": 0.5701, "step": 8264 }, { "epoch": 0.1752879048164408, "grad_norm": 0.8418727517127991, "learning_rate": 1.9631641212956144e-05, "loss": 0.4742, "step": 8265 }, { "epoch": 0.17530911327437382, "grad_norm": 0.34698963165283203, "learning_rate": 1.9631551525954053e-05, "loss": 0.5722, "step": 8266 }, { "epoch": 0.17533032173230684, "grad_norm": 0.3472754955291748, "learning_rate": 1.9631461828239837e-05, "loss": 0.5732, "step": 8267 }, { "epoch": 0.17535153019023986, "grad_norm": 0.33263060450553894, "learning_rate": 1.9631372119813593e-05, "loss": 0.5942, "step": 8268 }, { "epoch": 0.1753727386481729, "grad_norm": 0.3267647325992584, "learning_rate": 1.963128240067542e-05, "loss": 0.5098, "step": 8269 }, { "epoch": 0.17539394710610592, "grad_norm": 0.30222275853157043, "learning_rate": 1.963119267082542e-05, "loss": 0.5757, "step": 8270 }, { "epoch": 0.17541515556403894, "grad_norm": 0.3400629162788391, "learning_rate": 1.963110293026369e-05, "loss": 0.5065, "step": 8271 }, { "epoch": 0.17543636402197196, "grad_norm": 0.2980835437774658, "learning_rate": 1.963101317899033e-05, "loss": 0.4783, "step": 8272 }, { "epoch": 0.17545757247990498, "grad_norm": 0.3157082200050354, "learning_rate": 1.963092341700544e-05, "loss": 0.517, "step": 8273 }, { "epoch": 0.175478780937838, "grad_norm": 0.3424088954925537, "learning_rate": 1.9630833644309125e-05, "loss": 0.5055, "step": 8274 }, { "epoch": 0.17549998939577102, "grad_norm": 0.3315250873565674, "learning_rate": 1.9630743860901475e-05, "loss": 0.5828, "step": 8275 }, { "epoch": 0.17552119785370407, "grad_norm": 0.3014235198497772, "learning_rate": 1.9630654066782597e-05, "loss": 0.5865, "step": 8276 }, { "epoch": 0.1755424063116371, "grad_norm": 0.339037150144577, "learning_rate": 1.9630564261952592e-05, "loss": 0.5501, "step": 8277 }, { "epoch": 0.1755636147695701, "grad_norm": 0.3287106454372406, "learning_rate": 1.9630474446411554e-05, "loss": 0.5575, "step": 8278 }, { "epoch": 0.17558482322750313, "grad_norm": 0.4629420042037964, "learning_rate": 1.9630384620159588e-05, "loss": 0.4334, "step": 8279 }, { "epoch": 0.17560603168543615, "grad_norm": 0.4035005271434784, "learning_rate": 1.9630294783196788e-05, "loss": 0.5084, "step": 8280 }, { "epoch": 0.17562724014336917, "grad_norm": 0.3419942855834961, "learning_rate": 1.9630204935523263e-05, "loss": 0.5625, "step": 8281 }, { "epoch": 0.1756484486013022, "grad_norm": 0.3057268261909485, "learning_rate": 1.9630115077139106e-05, "loss": 0.5017, "step": 8282 }, { "epoch": 0.17566965705923523, "grad_norm": 0.379067987203598, "learning_rate": 1.963002520804442e-05, "loss": 0.5916, "step": 8283 }, { "epoch": 0.17569086551716825, "grad_norm": 0.33353984355926514, "learning_rate": 1.96299353282393e-05, "loss": 0.5317, "step": 8284 }, { "epoch": 0.17571207397510127, "grad_norm": 0.34974679350852966, "learning_rate": 1.962984543772385e-05, "loss": 0.5821, "step": 8285 }, { "epoch": 0.1757332824330343, "grad_norm": 0.3247528374195099, "learning_rate": 1.9629755536498173e-05, "loss": 0.5352, "step": 8286 }, { "epoch": 0.1757544908909673, "grad_norm": 0.40007469058036804, "learning_rate": 1.9629665624562363e-05, "loss": 0.5056, "step": 8287 }, { "epoch": 0.17577569934890033, "grad_norm": 0.40651825070381165, "learning_rate": 1.9629575701916523e-05, "loss": 0.5159, "step": 8288 }, { "epoch": 0.17579690780683335, "grad_norm": 0.3371775150299072, "learning_rate": 1.9629485768560753e-05, "loss": 0.5357, "step": 8289 }, { "epoch": 0.1758181162647664, "grad_norm": 0.3306824266910553, "learning_rate": 1.962939582449515e-05, "loss": 0.5897, "step": 8290 }, { "epoch": 0.17583932472269942, "grad_norm": 0.6343250870704651, "learning_rate": 1.962930586971982e-05, "loss": 0.491, "step": 8291 }, { "epoch": 0.17586053318063244, "grad_norm": 0.33803290128707886, "learning_rate": 1.9629215904234857e-05, "loss": 0.4651, "step": 8292 }, { "epoch": 0.17588174163856546, "grad_norm": 0.4015531837940216, "learning_rate": 1.9629125928040366e-05, "loss": 0.5643, "step": 8293 }, { "epoch": 0.17590295009649848, "grad_norm": 0.3225039541721344, "learning_rate": 1.9629035941136445e-05, "loss": 0.4491, "step": 8294 }, { "epoch": 0.1759241585544315, "grad_norm": 0.3528294563293457, "learning_rate": 1.962894594352319e-05, "loss": 0.5535, "step": 8295 }, { "epoch": 0.17594536701236452, "grad_norm": 0.3362683355808258, "learning_rate": 1.962885593520071e-05, "loss": 0.4348, "step": 8296 }, { "epoch": 0.17596657547029756, "grad_norm": 0.32433679699897766, "learning_rate": 1.96287659161691e-05, "loss": 0.5524, "step": 8297 }, { "epoch": 0.17598778392823058, "grad_norm": 0.35375356674194336, "learning_rate": 1.9628675886428455e-05, "loss": 0.5538, "step": 8298 }, { "epoch": 0.1760089923861636, "grad_norm": 0.40161067247390747, "learning_rate": 1.9628585845978882e-05, "loss": 0.627, "step": 8299 }, { "epoch": 0.17603020084409662, "grad_norm": 0.3988371789455414, "learning_rate": 1.962849579482048e-05, "loss": 0.4157, "step": 8300 }, { "epoch": 0.17605140930202964, "grad_norm": 0.32724031805992126, "learning_rate": 1.9628405732953354e-05, "loss": 0.595, "step": 8301 }, { "epoch": 0.17607261775996266, "grad_norm": 0.3432060182094574, "learning_rate": 1.9628315660377593e-05, "loss": 0.5437, "step": 8302 }, { "epoch": 0.1760938262178957, "grad_norm": 0.29499903321266174, "learning_rate": 1.9628225577093306e-05, "loss": 0.5149, "step": 8303 }, { "epoch": 0.17611503467582873, "grad_norm": 0.318276584148407, "learning_rate": 1.9628135483100585e-05, "loss": 0.5504, "step": 8304 }, { "epoch": 0.17613624313376175, "grad_norm": 0.3009433448314667, "learning_rate": 1.9628045378399537e-05, "loss": 0.5146, "step": 8305 }, { "epoch": 0.17615745159169477, "grad_norm": 0.2920837998390198, "learning_rate": 1.9627955262990265e-05, "loss": 0.4679, "step": 8306 }, { "epoch": 0.1761786600496278, "grad_norm": 0.34995150566101074, "learning_rate": 1.962786513687286e-05, "loss": 0.5484, "step": 8307 }, { "epoch": 0.1761998685075608, "grad_norm": 0.2935539186000824, "learning_rate": 1.962777500004743e-05, "loss": 0.4365, "step": 8308 }, { "epoch": 0.17622107696549383, "grad_norm": 0.35761627554893494, "learning_rate": 1.9627684852514072e-05, "loss": 0.6502, "step": 8309 }, { "epoch": 0.17624228542342688, "grad_norm": 0.30323976278305054, "learning_rate": 1.9627594694272888e-05, "loss": 0.5516, "step": 8310 }, { "epoch": 0.1762634938813599, "grad_norm": 0.3274824321269989, "learning_rate": 1.962750452532397e-05, "loss": 0.4717, "step": 8311 }, { "epoch": 0.17628470233929291, "grad_norm": 0.3736690878868103, "learning_rate": 1.9627414345667433e-05, "loss": 0.5083, "step": 8312 }, { "epoch": 0.17630591079722593, "grad_norm": 0.34743499755859375, "learning_rate": 1.9627324155303365e-05, "loss": 0.5854, "step": 8313 }, { "epoch": 0.17632711925515895, "grad_norm": 0.3900822103023529, "learning_rate": 1.9627233954231873e-05, "loss": 0.5034, "step": 8314 }, { "epoch": 0.17634832771309197, "grad_norm": 0.30938616394996643, "learning_rate": 1.9627143742453057e-05, "loss": 0.4448, "step": 8315 }, { "epoch": 0.176369536171025, "grad_norm": 0.35792243480682373, "learning_rate": 1.962705351996701e-05, "loss": 0.5255, "step": 8316 }, { "epoch": 0.17639074462895804, "grad_norm": 0.29344385862350464, "learning_rate": 1.962696328677384e-05, "loss": 0.5894, "step": 8317 }, { "epoch": 0.17641195308689106, "grad_norm": 0.3080078065395355, "learning_rate": 1.9626873042873646e-05, "loss": 0.5623, "step": 8318 }, { "epoch": 0.17643316154482408, "grad_norm": 0.36445125937461853, "learning_rate": 1.9626782788266524e-05, "loss": 0.5332, "step": 8319 }, { "epoch": 0.1764543700027571, "grad_norm": 0.2785823345184326, "learning_rate": 1.9626692522952583e-05, "loss": 0.4518, "step": 8320 }, { "epoch": 0.17647557846069012, "grad_norm": 0.33102837204933167, "learning_rate": 1.9626602246931915e-05, "loss": 0.5228, "step": 8321 }, { "epoch": 0.17649678691862314, "grad_norm": 0.3103219270706177, "learning_rate": 1.9626511960204622e-05, "loss": 0.5176, "step": 8322 }, { "epoch": 0.17651799537655616, "grad_norm": 0.3633461892604828, "learning_rate": 1.962642166277081e-05, "loss": 0.4874, "step": 8323 }, { "epoch": 0.1765392038344892, "grad_norm": 0.4447450339794159, "learning_rate": 1.9626331354630574e-05, "loss": 0.5715, "step": 8324 }, { "epoch": 0.17656041229242223, "grad_norm": 0.3985694646835327, "learning_rate": 1.9626241035784016e-05, "loss": 0.5326, "step": 8325 }, { "epoch": 0.17658162075035524, "grad_norm": 0.36423346400260925, "learning_rate": 1.9626150706231235e-05, "loss": 0.6285, "step": 8326 }, { "epoch": 0.17660282920828826, "grad_norm": 0.3196031153202057, "learning_rate": 1.9626060365972334e-05, "loss": 0.4876, "step": 8327 }, { "epoch": 0.17662403766622128, "grad_norm": 0.3883531391620636, "learning_rate": 1.962597001500741e-05, "loss": 0.5753, "step": 8328 }, { "epoch": 0.1766452461241543, "grad_norm": 0.31137996912002563, "learning_rate": 1.962587965333657e-05, "loss": 0.5262, "step": 8329 }, { "epoch": 0.17666645458208732, "grad_norm": 0.36404165625572205, "learning_rate": 1.9625789280959908e-05, "loss": 0.5219, "step": 8330 }, { "epoch": 0.17668766304002037, "grad_norm": 0.4333660304546356, "learning_rate": 1.962569889787753e-05, "loss": 0.4627, "step": 8331 }, { "epoch": 0.1767088714979534, "grad_norm": 0.3025231957435608, "learning_rate": 1.9625608504089526e-05, "loss": 0.5317, "step": 8332 }, { "epoch": 0.1767300799558864, "grad_norm": 0.3184448778629303, "learning_rate": 1.9625518099596007e-05, "loss": 0.5601, "step": 8333 }, { "epoch": 0.17675128841381943, "grad_norm": 0.35806018114089966, "learning_rate": 1.9625427684397072e-05, "loss": 0.466, "step": 8334 }, { "epoch": 0.17677249687175245, "grad_norm": 0.31118401885032654, "learning_rate": 1.962533725849282e-05, "loss": 0.5019, "step": 8335 }, { "epoch": 0.17679370532968547, "grad_norm": 0.40070977807044983, "learning_rate": 1.962524682188335e-05, "loss": 0.5881, "step": 8336 }, { "epoch": 0.1768149137876185, "grad_norm": 0.4101950526237488, "learning_rate": 1.9625156374568767e-05, "loss": 0.5663, "step": 8337 }, { "epoch": 0.17683612224555154, "grad_norm": 0.3869422674179077, "learning_rate": 1.9625065916549165e-05, "loss": 0.5603, "step": 8338 }, { "epoch": 0.17685733070348456, "grad_norm": 0.3346238434314728, "learning_rate": 1.962497544782465e-05, "loss": 0.5789, "step": 8339 }, { "epoch": 0.17687853916141758, "grad_norm": 0.3415355980396271, "learning_rate": 1.9624884968395325e-05, "loss": 0.5786, "step": 8340 }, { "epoch": 0.1768997476193506, "grad_norm": 0.3699812889099121, "learning_rate": 1.962479447826128e-05, "loss": 0.481, "step": 8341 }, { "epoch": 0.17692095607728361, "grad_norm": 0.37204429507255554, "learning_rate": 1.9624703977422624e-05, "loss": 0.5437, "step": 8342 }, { "epoch": 0.17694216453521663, "grad_norm": 0.3513600826263428, "learning_rate": 1.9624613465879455e-05, "loss": 0.5315, "step": 8343 }, { "epoch": 0.17696337299314968, "grad_norm": 0.31833598017692566, "learning_rate": 1.9624522943631876e-05, "loss": 0.5345, "step": 8344 }, { "epoch": 0.1769845814510827, "grad_norm": 0.3102538287639618, "learning_rate": 1.9624432410679986e-05, "loss": 0.5294, "step": 8345 }, { "epoch": 0.17700578990901572, "grad_norm": 0.3063432276248932, "learning_rate": 1.9624341867023887e-05, "loss": 0.6077, "step": 8346 }, { "epoch": 0.17702699836694874, "grad_norm": 0.37109652161598206, "learning_rate": 1.9624251312663676e-05, "loss": 0.5513, "step": 8347 }, { "epoch": 0.17704820682488176, "grad_norm": 0.353082537651062, "learning_rate": 1.962416074759946e-05, "loss": 0.5705, "step": 8348 }, { "epoch": 0.17706941528281478, "grad_norm": 0.6343327760696411, "learning_rate": 1.9624070171831335e-05, "loss": 0.579, "step": 8349 }, { "epoch": 0.1770906237407478, "grad_norm": 0.35920530557632446, "learning_rate": 1.96239795853594e-05, "loss": 0.5481, "step": 8350 }, { "epoch": 0.17711183219868085, "grad_norm": 0.30920007824897766, "learning_rate": 1.9623888988183762e-05, "loss": 0.4399, "step": 8351 }, { "epoch": 0.17713304065661387, "grad_norm": 0.3419862389564514, "learning_rate": 1.9623798380304516e-05, "loss": 0.6241, "step": 8352 }, { "epoch": 0.17715424911454689, "grad_norm": 0.3986258804798126, "learning_rate": 1.9623707761721763e-05, "loss": 0.5407, "step": 8353 }, { "epoch": 0.1771754575724799, "grad_norm": 0.47048938274383545, "learning_rate": 1.962361713243561e-05, "loss": 0.49, "step": 8354 }, { "epoch": 0.17719666603041292, "grad_norm": 0.33224251866340637, "learning_rate": 1.9623526492446152e-05, "loss": 0.511, "step": 8355 }, { "epoch": 0.17721787448834594, "grad_norm": 0.31644219160079956, "learning_rate": 1.962343584175349e-05, "loss": 0.4215, "step": 8356 }, { "epoch": 0.17723908294627896, "grad_norm": 0.32123440504074097, "learning_rate": 1.9623345180357728e-05, "loss": 0.4703, "step": 8357 }, { "epoch": 0.177260291404212, "grad_norm": 0.3216693103313446, "learning_rate": 1.9623254508258965e-05, "loss": 0.6174, "step": 8358 }, { "epoch": 0.17728149986214503, "grad_norm": 0.316609650850296, "learning_rate": 1.9623163825457305e-05, "loss": 0.5003, "step": 8359 }, { "epoch": 0.17730270832007805, "grad_norm": 0.32799193263053894, "learning_rate": 1.962307313195284e-05, "loss": 0.5137, "step": 8360 }, { "epoch": 0.17732391677801107, "grad_norm": 0.30538371205329895, "learning_rate": 1.9622982427745678e-05, "loss": 0.5202, "step": 8361 }, { "epoch": 0.1773451252359441, "grad_norm": 0.37892335653305054, "learning_rate": 1.9622891712835922e-05, "loss": 0.47, "step": 8362 }, { "epoch": 0.1773663336938771, "grad_norm": 0.3130708634853363, "learning_rate": 1.9622800987223667e-05, "loss": 0.5178, "step": 8363 }, { "epoch": 0.17738754215181013, "grad_norm": 0.35260680317878723, "learning_rate": 1.9622710250909013e-05, "loss": 0.5682, "step": 8364 }, { "epoch": 0.17740875060974318, "grad_norm": 0.32477301359176636, "learning_rate": 1.962261950389207e-05, "loss": 0.5471, "step": 8365 }, { "epoch": 0.1774299590676762, "grad_norm": 0.5731520652770996, "learning_rate": 1.962252874617293e-05, "loss": 0.5502, "step": 8366 }, { "epoch": 0.17745116752560922, "grad_norm": 0.35702577233314514, "learning_rate": 1.96224379777517e-05, "loss": 0.5477, "step": 8367 }, { "epoch": 0.17747237598354224, "grad_norm": 0.3249843716621399, "learning_rate": 1.9622347198628476e-05, "loss": 0.5319, "step": 8368 }, { "epoch": 0.17749358444147526, "grad_norm": 0.3224327564239502, "learning_rate": 1.962225640880336e-05, "loss": 0.5174, "step": 8369 }, { "epoch": 0.17751479289940827, "grad_norm": 0.3219025731086731, "learning_rate": 1.9622165608276454e-05, "loss": 0.4616, "step": 8370 }, { "epoch": 0.1775360013573413, "grad_norm": 0.31892627477645874, "learning_rate": 1.962207479704786e-05, "loss": 0.4307, "step": 8371 }, { "epoch": 0.17755720981527434, "grad_norm": 0.32647624611854553, "learning_rate": 1.962198397511768e-05, "loss": 0.5056, "step": 8372 }, { "epoch": 0.17757841827320736, "grad_norm": 0.38755106925964355, "learning_rate": 1.962189314248601e-05, "loss": 0.4715, "step": 8373 }, { "epoch": 0.17759962673114038, "grad_norm": 0.3223007619380951, "learning_rate": 1.9621802299152953e-05, "loss": 0.4557, "step": 8374 }, { "epoch": 0.1776208351890734, "grad_norm": 0.3179668188095093, "learning_rate": 1.9621711445118615e-05, "loss": 0.6227, "step": 8375 }, { "epoch": 0.17764204364700642, "grad_norm": 0.3541063070297241, "learning_rate": 1.9621620580383093e-05, "loss": 0.5043, "step": 8376 }, { "epoch": 0.17766325210493944, "grad_norm": 0.3232802450656891, "learning_rate": 1.9621529704946485e-05, "loss": 0.5213, "step": 8377 }, { "epoch": 0.1776844605628725, "grad_norm": 0.321336954832077, "learning_rate": 1.9621438818808897e-05, "loss": 0.5875, "step": 8378 }, { "epoch": 0.1777056690208055, "grad_norm": 0.3435264527797699, "learning_rate": 1.962134792197043e-05, "loss": 0.5164, "step": 8379 }, { "epoch": 0.17772687747873853, "grad_norm": 0.3384063243865967, "learning_rate": 1.962125701443118e-05, "loss": 0.6026, "step": 8380 }, { "epoch": 0.17774808593667155, "grad_norm": 0.3286692798137665, "learning_rate": 1.962116609619125e-05, "loss": 0.5323, "step": 8381 }, { "epoch": 0.17776929439460457, "grad_norm": 0.3260402977466583, "learning_rate": 1.962107516725075e-05, "loss": 0.57, "step": 8382 }, { "epoch": 0.17779050285253759, "grad_norm": 0.33201175928115845, "learning_rate": 1.9620984227609767e-05, "loss": 0.6018, "step": 8383 }, { "epoch": 0.1778117113104706, "grad_norm": 0.34280914068222046, "learning_rate": 1.962089327726841e-05, "loss": 0.456, "step": 8384 }, { "epoch": 0.17783291976840365, "grad_norm": 0.3695451319217682, "learning_rate": 1.9620802316226784e-05, "loss": 0.5541, "step": 8385 }, { "epoch": 0.17785412822633667, "grad_norm": 0.3877107799053192, "learning_rate": 1.9620711344484983e-05, "loss": 0.5465, "step": 8386 }, { "epoch": 0.1778753366842697, "grad_norm": 0.36772677302360535, "learning_rate": 1.962062036204311e-05, "loss": 0.5149, "step": 8387 }, { "epoch": 0.1778965451422027, "grad_norm": 0.3275330662727356, "learning_rate": 1.9620529368901262e-05, "loss": 0.5017, "step": 8388 }, { "epoch": 0.17791775360013573, "grad_norm": 0.4670145511627197, "learning_rate": 1.962043836505955e-05, "loss": 0.5458, "step": 8389 }, { "epoch": 0.17793896205806875, "grad_norm": 0.3795657455921173, "learning_rate": 1.9620347350518067e-05, "loss": 0.5337, "step": 8390 }, { "epoch": 0.17796017051600177, "grad_norm": 0.35984006524086, "learning_rate": 1.9620256325276917e-05, "loss": 0.5831, "step": 8391 }, { "epoch": 0.17798137897393482, "grad_norm": 0.32714954018592834, "learning_rate": 1.9620165289336202e-05, "loss": 0.5146, "step": 8392 }, { "epoch": 0.17800258743186784, "grad_norm": 0.3089023232460022, "learning_rate": 1.9620074242696028e-05, "loss": 0.5004, "step": 8393 }, { "epoch": 0.17802379588980086, "grad_norm": 0.31040138006210327, "learning_rate": 1.9619983185356486e-05, "loss": 0.4892, "step": 8394 }, { "epoch": 0.17804500434773388, "grad_norm": 0.35365089774131775, "learning_rate": 1.961989211731768e-05, "loss": 0.5762, "step": 8395 }, { "epoch": 0.1780662128056669, "grad_norm": 0.3209899365901947, "learning_rate": 1.9619801038579722e-05, "loss": 0.57, "step": 8396 }, { "epoch": 0.17808742126359992, "grad_norm": 0.36358052492141724, "learning_rate": 1.9619709949142697e-05, "loss": 0.514, "step": 8397 }, { "epoch": 0.17810862972153294, "grad_norm": 0.3470495343208313, "learning_rate": 1.9619618849006717e-05, "loss": 0.5855, "step": 8398 }, { "epoch": 0.17812983817946598, "grad_norm": 0.3255265951156616, "learning_rate": 1.961952773817188e-05, "loss": 0.476, "step": 8399 }, { "epoch": 0.178151046637399, "grad_norm": 0.38647720217704773, "learning_rate": 1.961943661663829e-05, "loss": 0.5956, "step": 8400 }, { "epoch": 0.17817225509533202, "grad_norm": 0.3129788339138031, "learning_rate": 1.9619345484406044e-05, "loss": 0.542, "step": 8401 }, { "epoch": 0.17819346355326504, "grad_norm": 0.32483407855033875, "learning_rate": 1.9619254341475245e-05, "loss": 0.5828, "step": 8402 }, { "epoch": 0.17821467201119806, "grad_norm": 0.31784430146217346, "learning_rate": 1.9619163187845998e-05, "loss": 0.5793, "step": 8403 }, { "epoch": 0.17823588046913108, "grad_norm": 0.3290441930294037, "learning_rate": 1.96190720235184e-05, "loss": 0.5249, "step": 8404 }, { "epoch": 0.1782570889270641, "grad_norm": 0.3476908206939697, "learning_rate": 1.961898084849255e-05, "loss": 0.507, "step": 8405 }, { "epoch": 0.17827829738499715, "grad_norm": 0.3583669066429138, "learning_rate": 1.9618889662768556e-05, "loss": 0.4592, "step": 8406 }, { "epoch": 0.17829950584293017, "grad_norm": 0.345133900642395, "learning_rate": 1.9618798466346516e-05, "loss": 0.5467, "step": 8407 }, { "epoch": 0.1783207143008632, "grad_norm": 0.46433913707733154, "learning_rate": 1.961870725922653e-05, "loss": 0.5072, "step": 8408 }, { "epoch": 0.1783419227587962, "grad_norm": 0.333599328994751, "learning_rate": 1.9618616041408706e-05, "loss": 0.4571, "step": 8409 }, { "epoch": 0.17836313121672923, "grad_norm": 0.3634617328643799, "learning_rate": 1.961852481289314e-05, "loss": 0.6122, "step": 8410 }, { "epoch": 0.17838433967466225, "grad_norm": 0.3509231507778168, "learning_rate": 1.961843357367993e-05, "loss": 0.5504, "step": 8411 }, { "epoch": 0.17840554813259527, "grad_norm": 0.3265168070793152, "learning_rate": 1.9618342323769182e-05, "loss": 0.5158, "step": 8412 }, { "epoch": 0.1784267565905283, "grad_norm": 0.31583964824676514, "learning_rate": 1.9618251063161e-05, "loss": 0.4916, "step": 8413 }, { "epoch": 0.17844796504846133, "grad_norm": 0.3074156939983368, "learning_rate": 1.9618159791855485e-05, "loss": 0.5733, "step": 8414 }, { "epoch": 0.17846917350639435, "grad_norm": 0.3768123388290405, "learning_rate": 1.9618068509852733e-05, "loss": 0.5382, "step": 8415 }, { "epoch": 0.17849038196432737, "grad_norm": 0.3549468219280243, "learning_rate": 1.961797721715285e-05, "loss": 0.5291, "step": 8416 }, { "epoch": 0.1785115904222604, "grad_norm": 0.32660239934921265, "learning_rate": 1.9617885913755934e-05, "loss": 0.5051, "step": 8417 }, { "epoch": 0.1785327988801934, "grad_norm": 0.33290889859199524, "learning_rate": 1.961779459966209e-05, "loss": 0.5799, "step": 8418 }, { "epoch": 0.17855400733812646, "grad_norm": 0.31069841980934143, "learning_rate": 1.961770327487142e-05, "loss": 0.494, "step": 8419 }, { "epoch": 0.17857521579605948, "grad_norm": 0.3533162474632263, "learning_rate": 1.961761193938402e-05, "loss": 0.4955, "step": 8420 }, { "epoch": 0.1785964242539925, "grad_norm": 0.4106931984424591, "learning_rate": 1.96175205932e-05, "loss": 0.5764, "step": 8421 }, { "epoch": 0.17861763271192552, "grad_norm": 0.3191203773021698, "learning_rate": 1.9617429236319454e-05, "loss": 0.5463, "step": 8422 }, { "epoch": 0.17863884116985854, "grad_norm": 0.33286866545677185, "learning_rate": 1.9617337868742487e-05, "loss": 0.5305, "step": 8423 }, { "epoch": 0.17866004962779156, "grad_norm": 0.2988380193710327, "learning_rate": 1.9617246490469205e-05, "loss": 0.5828, "step": 8424 }, { "epoch": 0.17868125808572458, "grad_norm": 0.37228238582611084, "learning_rate": 1.96171551014997e-05, "loss": 0.5278, "step": 8425 }, { "epoch": 0.17870246654365762, "grad_norm": 0.3243049681186676, "learning_rate": 1.961706370183408e-05, "loss": 0.5624, "step": 8426 }, { "epoch": 0.17872367500159064, "grad_norm": 0.34406086802482605, "learning_rate": 1.9616972291472445e-05, "loss": 0.5693, "step": 8427 }, { "epoch": 0.17874488345952366, "grad_norm": 0.3097560703754425, "learning_rate": 1.96168808704149e-05, "loss": 0.481, "step": 8428 }, { "epoch": 0.17876609191745668, "grad_norm": 0.35096195340156555, "learning_rate": 1.961678943866154e-05, "loss": 0.5131, "step": 8429 }, { "epoch": 0.1787873003753897, "grad_norm": 0.31403031945228577, "learning_rate": 1.961669799621247e-05, "loss": 0.5263, "step": 8430 }, { "epoch": 0.17880850883332272, "grad_norm": 0.3451429605484009, "learning_rate": 1.9616606543067792e-05, "loss": 0.5546, "step": 8431 }, { "epoch": 0.17882971729125574, "grad_norm": 0.36006706953048706, "learning_rate": 1.961651507922761e-05, "loss": 0.5335, "step": 8432 }, { "epoch": 0.1788509257491888, "grad_norm": 0.30948230624198914, "learning_rate": 1.9616423604692024e-05, "loss": 0.5743, "step": 8433 }, { "epoch": 0.1788721342071218, "grad_norm": 0.32375308871269226, "learning_rate": 1.9616332119461134e-05, "loss": 0.5424, "step": 8434 }, { "epoch": 0.17889334266505483, "grad_norm": 0.3911967873573303, "learning_rate": 1.9616240623535044e-05, "loss": 0.5169, "step": 8435 }, { "epoch": 0.17891455112298785, "grad_norm": 0.45496633648872375, "learning_rate": 1.961614911691385e-05, "loss": 0.4681, "step": 8436 }, { "epoch": 0.17893575958092087, "grad_norm": 0.35535067319869995, "learning_rate": 1.9616057599597665e-05, "loss": 0.5223, "step": 8437 }, { "epoch": 0.1789569680388539, "grad_norm": 0.44084957242012024, "learning_rate": 1.961596607158658e-05, "loss": 0.6128, "step": 8438 }, { "epoch": 0.1789781764967869, "grad_norm": 0.34173455834388733, "learning_rate": 1.96158745328807e-05, "loss": 0.5723, "step": 8439 }, { "epoch": 0.17899938495471995, "grad_norm": 0.3297421932220459, "learning_rate": 1.9615782983480133e-05, "loss": 0.5039, "step": 8440 }, { "epoch": 0.17902059341265297, "grad_norm": 0.38685333728790283, "learning_rate": 1.961569142338497e-05, "loss": 0.5345, "step": 8441 }, { "epoch": 0.179041801870586, "grad_norm": 0.3288699686527252, "learning_rate": 1.961559985259532e-05, "loss": 0.5533, "step": 8442 }, { "epoch": 0.179063010328519, "grad_norm": 0.3558124303817749, "learning_rate": 1.9615508271111286e-05, "loss": 0.4812, "step": 8443 }, { "epoch": 0.17908421878645203, "grad_norm": 0.3182375133037567, "learning_rate": 1.961541667893297e-05, "loss": 0.5199, "step": 8444 }, { "epoch": 0.17910542724438505, "grad_norm": 0.3270491361618042, "learning_rate": 1.9615325076060462e-05, "loss": 0.5349, "step": 8445 }, { "epoch": 0.17912663570231807, "grad_norm": 0.38547948002815247, "learning_rate": 1.9615233462493882e-05, "loss": 0.5636, "step": 8446 }, { "epoch": 0.17914784416025112, "grad_norm": 0.44430795311927795, "learning_rate": 1.961514183823332e-05, "loss": 0.5007, "step": 8447 }, { "epoch": 0.17916905261818414, "grad_norm": 0.30913224816322327, "learning_rate": 1.961505020327888e-05, "loss": 0.5597, "step": 8448 }, { "epoch": 0.17919026107611716, "grad_norm": 0.3060133755207062, "learning_rate": 1.9614958557630662e-05, "loss": 0.513, "step": 8449 }, { "epoch": 0.17921146953405018, "grad_norm": 0.31940871477127075, "learning_rate": 1.9614866901288773e-05, "loss": 0.4658, "step": 8450 }, { "epoch": 0.1792326779919832, "grad_norm": 0.33290165662765503, "learning_rate": 1.9614775234253313e-05, "loss": 0.4995, "step": 8451 }, { "epoch": 0.17925388644991622, "grad_norm": 0.36917203664779663, "learning_rate": 1.9614683556524384e-05, "loss": 0.5487, "step": 8452 }, { "epoch": 0.17927509490784926, "grad_norm": 0.3003862500190735, "learning_rate": 1.9614591868102084e-05, "loss": 0.4676, "step": 8453 }, { "epoch": 0.17929630336578228, "grad_norm": 0.35243844985961914, "learning_rate": 1.9614500168986522e-05, "loss": 0.5212, "step": 8454 }, { "epoch": 0.1793175118237153, "grad_norm": 0.3827001750469208, "learning_rate": 1.9614408459177796e-05, "loss": 0.5732, "step": 8455 }, { "epoch": 0.17933872028164832, "grad_norm": 0.3345598876476288, "learning_rate": 1.9614316738676007e-05, "loss": 0.5658, "step": 8456 }, { "epoch": 0.17935992873958134, "grad_norm": 0.41981980204582214, "learning_rate": 1.961422500748126e-05, "loss": 0.5813, "step": 8457 }, { "epoch": 0.17938113719751436, "grad_norm": 0.30860769748687744, "learning_rate": 1.9614133265593655e-05, "loss": 0.4211, "step": 8458 }, { "epoch": 0.17940234565544738, "grad_norm": 0.33759382367134094, "learning_rate": 1.9614041513013295e-05, "loss": 0.4935, "step": 8459 }, { "epoch": 0.17942355411338043, "grad_norm": 0.3467066287994385, "learning_rate": 1.961394974974028e-05, "loss": 0.533, "step": 8460 }, { "epoch": 0.17944476257131345, "grad_norm": 0.3124599754810333, "learning_rate": 1.9613857975774712e-05, "loss": 0.5262, "step": 8461 }, { "epoch": 0.17946597102924647, "grad_norm": 0.3610069751739502, "learning_rate": 1.96137661911167e-05, "loss": 0.5777, "step": 8462 }, { "epoch": 0.1794871794871795, "grad_norm": 0.34139183163642883, "learning_rate": 1.9613674395766336e-05, "loss": 0.5379, "step": 8463 }, { "epoch": 0.1795083879451125, "grad_norm": 0.29721036553382874, "learning_rate": 1.961358258972373e-05, "loss": 0.5164, "step": 8464 }, { "epoch": 0.17952959640304553, "grad_norm": 1.4790520668029785, "learning_rate": 1.961349077298898e-05, "loss": 0.54, "step": 8465 }, { "epoch": 0.17955080486097855, "grad_norm": 0.37265345454216003, "learning_rate": 1.9613398945562187e-05, "loss": 0.5166, "step": 8466 }, { "epoch": 0.1795720133189116, "grad_norm": 0.3277627229690552, "learning_rate": 1.961330710744346e-05, "loss": 0.5528, "step": 8467 }, { "epoch": 0.1795932217768446, "grad_norm": 0.36247333884239197, "learning_rate": 1.9613215258632893e-05, "loss": 0.5706, "step": 8468 }, { "epoch": 0.17961443023477763, "grad_norm": 0.6302181482315063, "learning_rate": 1.961312339913059e-05, "loss": 0.6201, "step": 8469 }, { "epoch": 0.17963563869271065, "grad_norm": 0.32061177492141724, "learning_rate": 1.9613031528936658e-05, "loss": 0.4996, "step": 8470 }, { "epoch": 0.17965684715064367, "grad_norm": 0.3418075740337372, "learning_rate": 1.9612939648051193e-05, "loss": 0.5263, "step": 8471 }, { "epoch": 0.1796780556085767, "grad_norm": 0.32491379976272583, "learning_rate": 1.9612847756474302e-05, "loss": 0.5081, "step": 8472 }, { "epoch": 0.1796992640665097, "grad_norm": 0.3209324777126312, "learning_rate": 1.9612755854206083e-05, "loss": 0.5256, "step": 8473 }, { "epoch": 0.17972047252444276, "grad_norm": 0.36452460289001465, "learning_rate": 1.9612663941246645e-05, "loss": 0.5259, "step": 8474 }, { "epoch": 0.17974168098237578, "grad_norm": 0.4302886426448822, "learning_rate": 1.9612572017596082e-05, "loss": 0.5974, "step": 8475 }, { "epoch": 0.1797628894403088, "grad_norm": 0.3019102215766907, "learning_rate": 1.96124800832545e-05, "loss": 0.545, "step": 8476 }, { "epoch": 0.17978409789824182, "grad_norm": 0.35175201296806335, "learning_rate": 1.9612388138222e-05, "loss": 0.6073, "step": 8477 }, { "epoch": 0.17980530635617484, "grad_norm": 0.43321841955184937, "learning_rate": 1.9612296182498687e-05, "loss": 0.5825, "step": 8478 }, { "epoch": 0.17982651481410786, "grad_norm": 0.3153776228427887, "learning_rate": 1.9612204216084664e-05, "loss": 0.5152, "step": 8479 }, { "epoch": 0.17984772327204088, "grad_norm": 0.3043138086795807, "learning_rate": 1.9612112238980027e-05, "loss": 0.5085, "step": 8480 }, { "epoch": 0.17986893172997392, "grad_norm": 0.3674757480621338, "learning_rate": 1.9612020251184886e-05, "loss": 0.4165, "step": 8481 }, { "epoch": 0.17989014018790694, "grad_norm": 0.3275773525238037, "learning_rate": 1.9611928252699335e-05, "loss": 0.5663, "step": 8482 }, { "epoch": 0.17991134864583996, "grad_norm": 0.323520302772522, "learning_rate": 1.9611836243523486e-05, "loss": 0.4847, "step": 8483 }, { "epoch": 0.17993255710377298, "grad_norm": 0.2963329553604126, "learning_rate": 1.961174422365743e-05, "loss": 0.5367, "step": 8484 }, { "epoch": 0.179953765561706, "grad_norm": 0.32258978486061096, "learning_rate": 1.9611652193101282e-05, "loss": 0.522, "step": 8485 }, { "epoch": 0.17997497401963902, "grad_norm": 0.30371469259262085, "learning_rate": 1.9611560151855136e-05, "loss": 0.523, "step": 8486 }, { "epoch": 0.17999618247757204, "grad_norm": 0.3945295214653015, "learning_rate": 1.9611468099919096e-05, "loss": 0.541, "step": 8487 }, { "epoch": 0.1800173909355051, "grad_norm": 0.29995864629745483, "learning_rate": 1.9611376037293262e-05, "loss": 0.5201, "step": 8488 }, { "epoch": 0.1800385993934381, "grad_norm": 0.3282872140407562, "learning_rate": 1.9611283963977745e-05, "loss": 0.5333, "step": 8489 }, { "epoch": 0.18005980785137113, "grad_norm": 0.3758113384246826, "learning_rate": 1.9611191879972634e-05, "loss": 0.5235, "step": 8490 }, { "epoch": 0.18008101630930415, "grad_norm": 0.32265183329582214, "learning_rate": 1.961109978527804e-05, "loss": 0.4694, "step": 8491 }, { "epoch": 0.18010222476723717, "grad_norm": 0.3405489921569824, "learning_rate": 1.961100767989407e-05, "loss": 0.4319, "step": 8492 }, { "epoch": 0.1801234332251702, "grad_norm": 0.3401108682155609, "learning_rate": 1.961091556382082e-05, "loss": 0.5548, "step": 8493 }, { "epoch": 0.18014464168310323, "grad_norm": 0.31472399830818176, "learning_rate": 1.9610823437058387e-05, "loss": 0.4901, "step": 8494 }, { "epoch": 0.18016585014103625, "grad_norm": 0.35140612721443176, "learning_rate": 1.9610731299606887e-05, "loss": 0.4805, "step": 8495 }, { "epoch": 0.18018705859896927, "grad_norm": 0.3568122684955597, "learning_rate": 1.961063915146641e-05, "loss": 0.6206, "step": 8496 }, { "epoch": 0.1802082670569023, "grad_norm": 0.694268524646759, "learning_rate": 1.9610546992637064e-05, "loss": 0.5468, "step": 8497 }, { "epoch": 0.1802294755148353, "grad_norm": 0.2927243411540985, "learning_rate": 1.9610454823118953e-05, "loss": 0.5568, "step": 8498 }, { "epoch": 0.18025068397276833, "grad_norm": 0.3227093815803528, "learning_rate": 1.961036264291218e-05, "loss": 0.5856, "step": 8499 }, { "epoch": 0.18027189243070135, "grad_norm": 0.3563172221183777, "learning_rate": 1.961027045201684e-05, "loss": 0.5168, "step": 8500 }, { "epoch": 0.1802931008886344, "grad_norm": 0.5066132545471191, "learning_rate": 1.9610178250433043e-05, "loss": 0.6367, "step": 8501 }, { "epoch": 0.18031430934656742, "grad_norm": 0.42134276032447815, "learning_rate": 1.961008603816089e-05, "loss": 0.5917, "step": 8502 }, { "epoch": 0.18033551780450044, "grad_norm": 0.31897297501564026, "learning_rate": 1.9609993815200485e-05, "loss": 0.538, "step": 8503 }, { "epoch": 0.18035672626243346, "grad_norm": 0.3098803758621216, "learning_rate": 1.9609901581551923e-05, "loss": 0.4741, "step": 8504 }, { "epoch": 0.18037793472036648, "grad_norm": 0.330880343914032, "learning_rate": 1.9609809337215317e-05, "loss": 0.4734, "step": 8505 }, { "epoch": 0.1803991431782995, "grad_norm": 0.31845027208328247, "learning_rate": 1.9609717082190757e-05, "loss": 0.5112, "step": 8506 }, { "epoch": 0.18042035163623252, "grad_norm": 0.3365340828895569, "learning_rate": 1.960962481647836e-05, "loss": 0.4925, "step": 8507 }, { "epoch": 0.18044156009416557, "grad_norm": 0.349312424659729, "learning_rate": 1.960953254007822e-05, "loss": 0.5246, "step": 8508 }, { "epoch": 0.18046276855209858, "grad_norm": 0.4067673087120056, "learning_rate": 1.960944025299044e-05, "loss": 0.5446, "step": 8509 }, { "epoch": 0.1804839770100316, "grad_norm": 4.8801469802856445, "learning_rate": 1.9609347955215125e-05, "loss": 0.5129, "step": 8510 }, { "epoch": 0.18050518546796462, "grad_norm": 0.33800122141838074, "learning_rate": 1.960925564675238e-05, "loss": 0.539, "step": 8511 }, { "epoch": 0.18052639392589764, "grad_norm": 0.36317718029022217, "learning_rate": 1.96091633276023e-05, "loss": 0.5797, "step": 8512 }, { "epoch": 0.18054760238383066, "grad_norm": 0.3321249186992645, "learning_rate": 1.9609070997764994e-05, "loss": 0.4649, "step": 8513 }, { "epoch": 0.18056881084176368, "grad_norm": 0.32350027561187744, "learning_rate": 1.960897865724056e-05, "loss": 0.5158, "step": 8514 }, { "epoch": 0.18059001929969673, "grad_norm": 0.35719794034957886, "learning_rate": 1.9608886306029107e-05, "loss": 0.4362, "step": 8515 }, { "epoch": 0.18061122775762975, "grad_norm": 0.3364737629890442, "learning_rate": 1.960879394413073e-05, "loss": 0.5501, "step": 8516 }, { "epoch": 0.18063243621556277, "grad_norm": 0.33473992347717285, "learning_rate": 1.9608701571545537e-05, "loss": 0.4649, "step": 8517 }, { "epoch": 0.1806536446734958, "grad_norm": 0.7760273218154907, "learning_rate": 1.9608609188273634e-05, "loss": 0.52, "step": 8518 }, { "epoch": 0.1806748531314288, "grad_norm": 0.4477660059928894, "learning_rate": 1.9608516794315115e-05, "loss": 0.5187, "step": 8519 }, { "epoch": 0.18069606158936183, "grad_norm": 0.3290015161037445, "learning_rate": 1.960842438967009e-05, "loss": 0.5142, "step": 8520 }, { "epoch": 0.18071727004729485, "grad_norm": 0.35561177134513855, "learning_rate": 1.9608331974338653e-05, "loss": 0.5202, "step": 8521 }, { "epoch": 0.1807384785052279, "grad_norm": 0.3253765106201172, "learning_rate": 1.960823954832092e-05, "loss": 0.5477, "step": 8522 }, { "epoch": 0.18075968696316091, "grad_norm": 0.32156607508659363, "learning_rate": 1.9608147111616982e-05, "loss": 0.5357, "step": 8523 }, { "epoch": 0.18078089542109393, "grad_norm": 0.32632023096084595, "learning_rate": 1.960805466422695e-05, "loss": 0.5889, "step": 8524 }, { "epoch": 0.18080210387902695, "grad_norm": 0.3152877688407898, "learning_rate": 1.9607962206150918e-05, "loss": 0.5532, "step": 8525 }, { "epoch": 0.18082331233695997, "grad_norm": 0.3155134916305542, "learning_rate": 1.9607869737389e-05, "loss": 0.4533, "step": 8526 }, { "epoch": 0.180844520794893, "grad_norm": 0.3177003860473633, "learning_rate": 1.9607777257941288e-05, "loss": 0.5548, "step": 8527 }, { "epoch": 0.18086572925282604, "grad_norm": 0.3185825049877167, "learning_rate": 1.960768476780789e-05, "loss": 0.5036, "step": 8528 }, { "epoch": 0.18088693771075906, "grad_norm": 0.33360812067985535, "learning_rate": 1.9607592266988908e-05, "loss": 0.456, "step": 8529 }, { "epoch": 0.18090814616869208, "grad_norm": 0.3748854696750641, "learning_rate": 1.9607499755484446e-05, "loss": 0.5294, "step": 8530 }, { "epoch": 0.1809293546266251, "grad_norm": 0.33740681409835815, "learning_rate": 1.960740723329461e-05, "loss": 0.5013, "step": 8531 }, { "epoch": 0.18095056308455812, "grad_norm": 0.3510362207889557, "learning_rate": 1.9607314700419495e-05, "loss": 0.5587, "step": 8532 }, { "epoch": 0.18097177154249114, "grad_norm": 0.31657132506370544, "learning_rate": 1.960722215685921e-05, "loss": 0.4742, "step": 8533 }, { "epoch": 0.18099298000042416, "grad_norm": 0.33645811676979065, "learning_rate": 1.9607129602613852e-05, "loss": 0.5719, "step": 8534 }, { "epoch": 0.1810141884583572, "grad_norm": 0.3374197483062744, "learning_rate": 1.9607037037683532e-05, "loss": 0.5702, "step": 8535 }, { "epoch": 0.18103539691629023, "grad_norm": 0.31569933891296387, "learning_rate": 1.960694446206835e-05, "loss": 0.4602, "step": 8536 }, { "epoch": 0.18105660537422325, "grad_norm": 0.3125697672367096, "learning_rate": 1.9606851875768404e-05, "loss": 0.5366, "step": 8537 }, { "epoch": 0.18107781383215626, "grad_norm": 0.3293406665325165, "learning_rate": 1.9606759278783803e-05, "loss": 0.568, "step": 8538 }, { "epoch": 0.18109902229008928, "grad_norm": 0.36271360516548157, "learning_rate": 1.9606666671114646e-05, "loss": 0.5517, "step": 8539 }, { "epoch": 0.1811202307480223, "grad_norm": 0.3410978317260742, "learning_rate": 1.9606574052761037e-05, "loss": 0.4914, "step": 8540 }, { "epoch": 0.18114143920595532, "grad_norm": 0.32913175225257874, "learning_rate": 1.9606481423723083e-05, "loss": 0.5629, "step": 8541 }, { "epoch": 0.18116264766388837, "grad_norm": 0.33213573694229126, "learning_rate": 1.9606388784000882e-05, "loss": 0.4791, "step": 8542 }, { "epoch": 0.1811838561218214, "grad_norm": 0.33401820063591003, "learning_rate": 1.960629613359454e-05, "loss": 0.4693, "step": 8543 }, { "epoch": 0.1812050645797544, "grad_norm": 0.30133286118507385, "learning_rate": 1.960620347250416e-05, "loss": 0.4934, "step": 8544 }, { "epoch": 0.18122627303768743, "grad_norm": 0.3120463192462921, "learning_rate": 1.960611080072984e-05, "loss": 0.5264, "step": 8545 }, { "epoch": 0.18124748149562045, "grad_norm": 0.3825959265232086, "learning_rate": 1.9606018118271692e-05, "loss": 0.5741, "step": 8546 }, { "epoch": 0.18126868995355347, "grad_norm": 0.33126136660575867, "learning_rate": 1.960592542512981e-05, "loss": 0.5208, "step": 8547 }, { "epoch": 0.1812898984114865, "grad_norm": 0.3577519953250885, "learning_rate": 1.96058327213043e-05, "loss": 0.5017, "step": 8548 }, { "epoch": 0.18131110686941954, "grad_norm": 0.30619189143180847, "learning_rate": 1.960574000679527e-05, "loss": 0.5033, "step": 8549 }, { "epoch": 0.18133231532735256, "grad_norm": 0.3707554340362549, "learning_rate": 1.960564728160282e-05, "loss": 0.462, "step": 8550 }, { "epoch": 0.18135352378528558, "grad_norm": 0.40850117802619934, "learning_rate": 1.9605554545727053e-05, "loss": 0.6156, "step": 8551 }, { "epoch": 0.1813747322432186, "grad_norm": 0.33774706721305847, "learning_rate": 1.9605461799168068e-05, "loss": 0.5062, "step": 8552 }, { "epoch": 0.18139594070115161, "grad_norm": 0.34459438920021057, "learning_rate": 1.960536904192597e-05, "loss": 0.4816, "step": 8553 }, { "epoch": 0.18141714915908463, "grad_norm": 0.34992265701293945, "learning_rate": 1.9605276274000872e-05, "loss": 0.5742, "step": 8554 }, { "epoch": 0.18143835761701765, "grad_norm": 0.31367063522338867, "learning_rate": 1.9605183495392864e-05, "loss": 0.529, "step": 8555 }, { "epoch": 0.1814595660749507, "grad_norm": 0.31549784541130066, "learning_rate": 1.9605090706102056e-05, "loss": 0.6144, "step": 8556 }, { "epoch": 0.18148077453288372, "grad_norm": 0.3219181299209595, "learning_rate": 1.9604997906128543e-05, "loss": 0.5584, "step": 8557 }, { "epoch": 0.18150198299081674, "grad_norm": 0.36220553517341614, "learning_rate": 1.960490509547244e-05, "loss": 0.5439, "step": 8558 }, { "epoch": 0.18152319144874976, "grad_norm": 0.3712880611419678, "learning_rate": 1.960481227413385e-05, "loss": 0.5365, "step": 8559 }, { "epoch": 0.18154439990668278, "grad_norm": 0.30136582255363464, "learning_rate": 1.9604719442112866e-05, "loss": 0.4744, "step": 8560 }, { "epoch": 0.1815656083646158, "grad_norm": 0.3297170102596283, "learning_rate": 1.9604626599409594e-05, "loss": 0.4881, "step": 8561 }, { "epoch": 0.18158681682254882, "grad_norm": 0.3315819203853607, "learning_rate": 1.9604533746024145e-05, "loss": 0.5355, "step": 8562 }, { "epoch": 0.18160802528048187, "grad_norm": 0.3943294882774353, "learning_rate": 1.9604440881956615e-05, "loss": 0.5187, "step": 8563 }, { "epoch": 0.18162923373841489, "grad_norm": 0.3425477147102356, "learning_rate": 1.9604348007207108e-05, "loss": 0.5504, "step": 8564 }, { "epoch": 0.1816504421963479, "grad_norm": 0.37539142370224, "learning_rate": 1.960425512177573e-05, "loss": 0.5663, "step": 8565 }, { "epoch": 0.18167165065428093, "grad_norm": 0.3648891746997833, "learning_rate": 1.960416222566258e-05, "loss": 0.4663, "step": 8566 }, { "epoch": 0.18169285911221394, "grad_norm": 0.3325117230415344, "learning_rate": 1.9604069318867768e-05, "loss": 0.5888, "step": 8567 }, { "epoch": 0.18171406757014696, "grad_norm": 0.3286101818084717, "learning_rate": 1.9603976401391392e-05, "loss": 0.5039, "step": 8568 }, { "epoch": 0.18173527602808, "grad_norm": 0.28696826100349426, "learning_rate": 1.9603883473233554e-05, "loss": 0.3971, "step": 8569 }, { "epoch": 0.18175648448601303, "grad_norm": 0.49946826696395874, "learning_rate": 1.9603790534394367e-05, "loss": 0.5806, "step": 8570 }, { "epoch": 0.18177769294394605, "grad_norm": 0.3425370752811432, "learning_rate": 1.960369758487392e-05, "loss": 0.4892, "step": 8571 }, { "epoch": 0.18179890140187907, "grad_norm": 0.3328791856765747, "learning_rate": 1.9603604624672332e-05, "loss": 0.5002, "step": 8572 }, { "epoch": 0.1818201098598121, "grad_norm": 0.30059173703193665, "learning_rate": 1.960351165378969e-05, "loss": 0.4417, "step": 8573 }, { "epoch": 0.1818413183177451, "grad_norm": 0.30696895718574524, "learning_rate": 1.9603418672226115e-05, "loss": 0.4652, "step": 8574 }, { "epoch": 0.18186252677567813, "grad_norm": 0.5399624109268188, "learning_rate": 1.9603325679981693e-05, "loss": 0.5264, "step": 8575 }, { "epoch": 0.18188373523361118, "grad_norm": 0.3807968199253082, "learning_rate": 1.960323267705654e-05, "loss": 0.6001, "step": 8576 }, { "epoch": 0.1819049436915442, "grad_norm": 0.3748618960380554, "learning_rate": 1.9603139663450755e-05, "loss": 0.4817, "step": 8577 }, { "epoch": 0.18192615214947722, "grad_norm": 0.4570655822753906, "learning_rate": 1.960304663916444e-05, "loss": 0.5551, "step": 8578 }, { "epoch": 0.18194736060741024, "grad_norm": 0.33267003297805786, "learning_rate": 1.96029536041977e-05, "loss": 0.4739, "step": 8579 }, { "epoch": 0.18196856906534326, "grad_norm": 0.3379777669906616, "learning_rate": 1.9602860558550638e-05, "loss": 0.5456, "step": 8580 }, { "epoch": 0.18198977752327627, "grad_norm": 0.33296915888786316, "learning_rate": 1.960276750222336e-05, "loss": 0.5131, "step": 8581 }, { "epoch": 0.1820109859812093, "grad_norm": 0.33857262134552, "learning_rate": 1.9602674435215964e-05, "loss": 0.6311, "step": 8582 }, { "epoch": 0.18203219443914234, "grad_norm": 0.3274456560611725, "learning_rate": 1.9602581357528557e-05, "loss": 0.5872, "step": 8583 }, { "epoch": 0.18205340289707536, "grad_norm": 0.37758296728134155, "learning_rate": 1.9602488269161248e-05, "loss": 0.5711, "step": 8584 }, { "epoch": 0.18207461135500838, "grad_norm": 0.3063410818576813, "learning_rate": 1.960239517011413e-05, "loss": 0.5191, "step": 8585 }, { "epoch": 0.1820958198129414, "grad_norm": 0.3096190094947815, "learning_rate": 1.9602302060387312e-05, "loss": 0.4857, "step": 8586 }, { "epoch": 0.18211702827087442, "grad_norm": 0.3532133400440216, "learning_rate": 1.9602208939980897e-05, "loss": 0.4485, "step": 8587 }, { "epoch": 0.18213823672880744, "grad_norm": 0.32019302248954773, "learning_rate": 1.960211580889499e-05, "loss": 0.4537, "step": 8588 }, { "epoch": 0.18215944518674046, "grad_norm": 0.5326231122016907, "learning_rate": 1.960202266712969e-05, "loss": 0.499, "step": 8589 }, { "epoch": 0.1821806536446735, "grad_norm": 0.31469300389289856, "learning_rate": 1.960192951468511e-05, "loss": 0.5622, "step": 8590 }, { "epoch": 0.18220186210260653, "grad_norm": 0.3169667720794678, "learning_rate": 1.9601836351561344e-05, "loss": 0.4449, "step": 8591 }, { "epoch": 0.18222307056053955, "grad_norm": 0.33176594972610474, "learning_rate": 1.9601743177758497e-05, "loss": 0.5852, "step": 8592 }, { "epoch": 0.18224427901847257, "grad_norm": 0.3645356297492981, "learning_rate": 1.9601649993276678e-05, "loss": 0.6305, "step": 8593 }, { "epoch": 0.18226548747640559, "grad_norm": 0.31317654252052307, "learning_rate": 1.9601556798115987e-05, "loss": 0.5103, "step": 8594 }, { "epoch": 0.1822866959343386, "grad_norm": 0.4448886513710022, "learning_rate": 1.9601463592276524e-05, "loss": 0.4892, "step": 8595 }, { "epoch": 0.18230790439227162, "grad_norm": 0.36189231276512146, "learning_rate": 1.96013703757584e-05, "loss": 0.5086, "step": 8596 }, { "epoch": 0.18232911285020467, "grad_norm": 0.3375801146030426, "learning_rate": 1.9601277148561714e-05, "loss": 0.6178, "step": 8597 }, { "epoch": 0.1823503213081377, "grad_norm": 0.3501357436180115, "learning_rate": 1.9601183910686573e-05, "loss": 0.5399, "step": 8598 }, { "epoch": 0.1823715297660707, "grad_norm": 0.33388808369636536, "learning_rate": 1.9601090662133075e-05, "loss": 0.5358, "step": 8599 }, { "epoch": 0.18239273822400373, "grad_norm": 0.4055389165878296, "learning_rate": 1.960099740290133e-05, "loss": 0.5188, "step": 8600 }, { "epoch": 0.18241394668193675, "grad_norm": 0.30144253373146057, "learning_rate": 1.9600904132991442e-05, "loss": 0.5046, "step": 8601 }, { "epoch": 0.18243515513986977, "grad_norm": 0.3464416563510895, "learning_rate": 1.9600810852403507e-05, "loss": 0.5801, "step": 8602 }, { "epoch": 0.18245636359780282, "grad_norm": 0.3570939004421234, "learning_rate": 1.9600717561137633e-05, "loss": 0.521, "step": 8603 }, { "epoch": 0.18247757205573584, "grad_norm": 0.28005167841911316, "learning_rate": 1.9600624259193927e-05, "loss": 0.5552, "step": 8604 }, { "epoch": 0.18249878051366886, "grad_norm": 0.36181706190109253, "learning_rate": 1.960053094657249e-05, "loss": 0.5887, "step": 8605 }, { "epoch": 0.18251998897160188, "grad_norm": 0.3039344251155853, "learning_rate": 1.9600437623273427e-05, "loss": 0.5212, "step": 8606 }, { "epoch": 0.1825411974295349, "grad_norm": 0.32106518745422363, "learning_rate": 1.960034428929684e-05, "loss": 0.5065, "step": 8607 }, { "epoch": 0.18256240588746792, "grad_norm": 0.3508255183696747, "learning_rate": 1.960025094464283e-05, "loss": 0.5055, "step": 8608 }, { "epoch": 0.18258361434540094, "grad_norm": 0.3301902115345001, "learning_rate": 1.960015758931151e-05, "loss": 0.5329, "step": 8609 }, { "epoch": 0.18260482280333398, "grad_norm": 0.3365696370601654, "learning_rate": 1.960006422330297e-05, "loss": 0.5814, "step": 8610 }, { "epoch": 0.182626031261267, "grad_norm": 0.3475232422351837, "learning_rate": 1.959997084661733e-05, "loss": 0.5329, "step": 8611 }, { "epoch": 0.18264723971920002, "grad_norm": 0.32256367802619934, "learning_rate": 1.959987745925468e-05, "loss": 0.4744, "step": 8612 }, { "epoch": 0.18266844817713304, "grad_norm": 0.40317612886428833, "learning_rate": 1.9599784061215133e-05, "loss": 0.5839, "step": 8613 }, { "epoch": 0.18268965663506606, "grad_norm": 0.31888461112976074, "learning_rate": 1.959969065249879e-05, "loss": 0.4866, "step": 8614 }, { "epoch": 0.18271086509299908, "grad_norm": 0.3648757338523865, "learning_rate": 1.9599597233105754e-05, "loss": 0.4987, "step": 8615 }, { "epoch": 0.1827320735509321, "grad_norm": 0.3328002095222473, "learning_rate": 1.9599503803036127e-05, "loss": 0.4877, "step": 8616 }, { "epoch": 0.18275328200886515, "grad_norm": 0.3071383237838745, "learning_rate": 1.959941036229002e-05, "loss": 0.4409, "step": 8617 }, { "epoch": 0.18277449046679817, "grad_norm": 0.3503030240535736, "learning_rate": 1.9599316910867526e-05, "loss": 0.5631, "step": 8618 }, { "epoch": 0.1827956989247312, "grad_norm": 0.33286169171333313, "learning_rate": 1.9599223448768762e-05, "loss": 0.5031, "step": 8619 }, { "epoch": 0.1828169073826642, "grad_norm": 0.32459744811058044, "learning_rate": 1.959912997599382e-05, "loss": 0.5743, "step": 8620 }, { "epoch": 0.18283811584059723, "grad_norm": 0.32251980900764465, "learning_rate": 1.959903649254281e-05, "loss": 0.4629, "step": 8621 }, { "epoch": 0.18285932429853025, "grad_norm": 0.3694128692150116, "learning_rate": 1.9598942998415836e-05, "loss": 0.4888, "step": 8622 }, { "epoch": 0.18288053275646327, "grad_norm": 0.33689287304878235, "learning_rate": 1.9598849493613e-05, "loss": 0.5011, "step": 8623 }, { "epoch": 0.1829017412143963, "grad_norm": 0.31956690549850464, "learning_rate": 1.9598755978134406e-05, "loss": 0.5661, "step": 8624 }, { "epoch": 0.18292294967232933, "grad_norm": 0.34570685029029846, "learning_rate": 1.959866245198016e-05, "loss": 0.5119, "step": 8625 }, { "epoch": 0.18294415813026235, "grad_norm": 0.4350047707557678, "learning_rate": 1.959856891515037e-05, "loss": 0.5035, "step": 8626 }, { "epoch": 0.18296536658819537, "grad_norm": 0.35612598061561584, "learning_rate": 1.959847536764513e-05, "loss": 0.5128, "step": 8627 }, { "epoch": 0.1829865750461284, "grad_norm": 0.33276721835136414, "learning_rate": 1.959838180946455e-05, "loss": 0.5071, "step": 8628 }, { "epoch": 0.1830077835040614, "grad_norm": 0.3264450430870056, "learning_rate": 1.959828824060873e-05, "loss": 0.5122, "step": 8629 }, { "epoch": 0.18302899196199443, "grad_norm": 0.3280104398727417, "learning_rate": 1.959819466107778e-05, "loss": 0.5333, "step": 8630 }, { "epoch": 0.18305020041992748, "grad_norm": 0.5128837823867798, "learning_rate": 1.9598101070871803e-05, "loss": 0.5361, "step": 8631 }, { "epoch": 0.1830714088778605, "grad_norm": 0.35396745800971985, "learning_rate": 1.9598007469990898e-05, "loss": 0.591, "step": 8632 }, { "epoch": 0.18309261733579352, "grad_norm": 0.34995484352111816, "learning_rate": 1.9597913858435173e-05, "loss": 0.483, "step": 8633 }, { "epoch": 0.18311382579372654, "grad_norm": 0.3414352536201477, "learning_rate": 1.9597820236204735e-05, "loss": 0.6041, "step": 8634 }, { "epoch": 0.18313503425165956, "grad_norm": 0.3470432162284851, "learning_rate": 1.959772660329968e-05, "loss": 0.4787, "step": 8635 }, { "epoch": 0.18315624270959258, "grad_norm": 0.34133797883987427, "learning_rate": 1.959763295972012e-05, "loss": 0.5636, "step": 8636 }, { "epoch": 0.1831774511675256, "grad_norm": 0.39661261439323425, "learning_rate": 1.9597539305466156e-05, "loss": 0.6184, "step": 8637 }, { "epoch": 0.18319865962545864, "grad_norm": 0.3347340226173401, "learning_rate": 1.9597445640537888e-05, "loss": 0.5266, "step": 8638 }, { "epoch": 0.18321986808339166, "grad_norm": 0.33054184913635254, "learning_rate": 1.959735196493543e-05, "loss": 0.4742, "step": 8639 }, { "epoch": 0.18324107654132468, "grad_norm": 0.32157137989997864, "learning_rate": 1.9597258278658876e-05, "loss": 0.478, "step": 8640 }, { "epoch": 0.1832622849992577, "grad_norm": 0.33715179562568665, "learning_rate": 1.9597164581708338e-05, "loss": 0.5357, "step": 8641 }, { "epoch": 0.18328349345719072, "grad_norm": 0.38492798805236816, "learning_rate": 1.9597070874083917e-05, "loss": 0.5076, "step": 8642 }, { "epoch": 0.18330470191512374, "grad_norm": 0.31533122062683105, "learning_rate": 1.9596977155785716e-05, "loss": 0.6063, "step": 8643 }, { "epoch": 0.1833259103730568, "grad_norm": 0.3452679514884949, "learning_rate": 1.959688342681384e-05, "loss": 0.6296, "step": 8644 }, { "epoch": 0.1833471188309898, "grad_norm": 0.32772403955459595, "learning_rate": 1.9596789687168392e-05, "loss": 0.4533, "step": 8645 }, { "epoch": 0.18336832728892283, "grad_norm": 0.3683689534664154, "learning_rate": 1.9596695936849483e-05, "loss": 0.5477, "step": 8646 }, { "epoch": 0.18338953574685585, "grad_norm": 0.3520316481590271, "learning_rate": 1.9596602175857206e-05, "loss": 0.5526, "step": 8647 }, { "epoch": 0.18341074420478887, "grad_norm": 0.34276652336120605, "learning_rate": 1.9596508404191678e-05, "loss": 0.5342, "step": 8648 }, { "epoch": 0.1834319526627219, "grad_norm": 0.3704020380973816, "learning_rate": 1.959641462185299e-05, "loss": 0.4484, "step": 8649 }, { "epoch": 0.1834531611206549, "grad_norm": 0.29866257309913635, "learning_rate": 1.9596320828841257e-05, "loss": 0.4039, "step": 8650 }, { "epoch": 0.18347436957858795, "grad_norm": 0.34743309020996094, "learning_rate": 1.9596227025156578e-05, "loss": 0.5566, "step": 8651 }, { "epoch": 0.18349557803652097, "grad_norm": 0.38233649730682373, "learning_rate": 1.959613321079906e-05, "loss": 0.5571, "step": 8652 }, { "epoch": 0.183516786494454, "grad_norm": 0.32727885246276855, "learning_rate": 1.9596039385768803e-05, "loss": 0.5649, "step": 8653 }, { "epoch": 0.183537994952387, "grad_norm": 0.34466972947120667, "learning_rate": 1.9595945550065916e-05, "loss": 0.4784, "step": 8654 }, { "epoch": 0.18355920341032003, "grad_norm": 0.32020363211631775, "learning_rate": 1.9595851703690506e-05, "loss": 0.4792, "step": 8655 }, { "epoch": 0.18358041186825305, "grad_norm": 0.47975632548332214, "learning_rate": 1.9595757846642667e-05, "loss": 0.5056, "step": 8656 }, { "epoch": 0.18360162032618607, "grad_norm": 0.3188686668872833, "learning_rate": 1.959566397892251e-05, "loss": 0.4804, "step": 8657 }, { "epoch": 0.18362282878411912, "grad_norm": 0.36593496799468994, "learning_rate": 1.9595570100530143e-05, "loss": 0.5229, "step": 8658 }, { "epoch": 0.18364403724205214, "grad_norm": 0.3688802719116211, "learning_rate": 1.9595476211465665e-05, "loss": 0.5429, "step": 8659 }, { "epoch": 0.18366524569998516, "grad_norm": 0.31564095616340637, "learning_rate": 1.959538231172918e-05, "loss": 0.4725, "step": 8660 }, { "epoch": 0.18368645415791818, "grad_norm": 0.3312613368034363, "learning_rate": 1.9595288401320794e-05, "loss": 0.5069, "step": 8661 }, { "epoch": 0.1837076626158512, "grad_norm": 0.32806187868118286, "learning_rate": 1.9595194480240612e-05, "loss": 0.4307, "step": 8662 }, { "epoch": 0.18372887107378422, "grad_norm": 0.3318927586078644, "learning_rate": 1.959510054848874e-05, "loss": 0.5337, "step": 8663 }, { "epoch": 0.18375007953171724, "grad_norm": 0.34489497542381287, "learning_rate": 1.959500660606528e-05, "loss": 0.5205, "step": 8664 }, { "epoch": 0.18377128798965028, "grad_norm": 0.3189534842967987, "learning_rate": 1.9594912652970334e-05, "loss": 0.5459, "step": 8665 }, { "epoch": 0.1837924964475833, "grad_norm": 0.35391512513160706, "learning_rate": 1.9594818689204014e-05, "loss": 0.5264, "step": 8666 }, { "epoch": 0.18381370490551632, "grad_norm": 0.31954941153526306, "learning_rate": 1.9594724714766414e-05, "loss": 0.5455, "step": 8667 }, { "epoch": 0.18383491336344934, "grad_norm": 0.3266739845275879, "learning_rate": 1.959463072965765e-05, "loss": 0.529, "step": 8668 }, { "epoch": 0.18385612182138236, "grad_norm": 0.32171639800071716, "learning_rate": 1.959453673387782e-05, "loss": 0.5357, "step": 8669 }, { "epoch": 0.18387733027931538, "grad_norm": 0.3265921473503113, "learning_rate": 1.9594442727427028e-05, "loss": 0.5226, "step": 8670 }, { "epoch": 0.1838985387372484, "grad_norm": 0.294996976852417, "learning_rate": 1.959434871030538e-05, "loss": 0.4848, "step": 8671 }, { "epoch": 0.18391974719518145, "grad_norm": 0.320233553647995, "learning_rate": 1.9594254682512983e-05, "loss": 0.5207, "step": 8672 }, { "epoch": 0.18394095565311447, "grad_norm": 0.33303365111351013, "learning_rate": 1.9594160644049935e-05, "loss": 0.5468, "step": 8673 }, { "epoch": 0.1839621641110475, "grad_norm": 0.3159131407737732, "learning_rate": 1.9594066594916348e-05, "loss": 0.5091, "step": 8674 }, { "epoch": 0.1839833725689805, "grad_norm": 0.3213924169540405, "learning_rate": 1.9593972535112325e-05, "loss": 0.5492, "step": 8675 }, { "epoch": 0.18400458102691353, "grad_norm": 0.3599291741847992, "learning_rate": 1.9593878464637967e-05, "loss": 0.4612, "step": 8676 }, { "epoch": 0.18402578948484655, "grad_norm": 0.3656013309955597, "learning_rate": 1.959378438349338e-05, "loss": 0.4558, "step": 8677 }, { "epoch": 0.1840469979427796, "grad_norm": 0.3323820233345032, "learning_rate": 1.959369029167867e-05, "loss": 0.4975, "step": 8678 }, { "epoch": 0.18406820640071261, "grad_norm": 0.3352849781513214, "learning_rate": 1.959359618919394e-05, "loss": 0.5847, "step": 8679 }, { "epoch": 0.18408941485864563, "grad_norm": 0.4120081067085266, "learning_rate": 1.95935020760393e-05, "loss": 0.5225, "step": 8680 }, { "epoch": 0.18411062331657865, "grad_norm": 0.3324977159500122, "learning_rate": 1.9593407952214846e-05, "loss": 0.4365, "step": 8681 }, { "epoch": 0.18413183177451167, "grad_norm": 0.30899521708488464, "learning_rate": 1.959331381772069e-05, "loss": 0.5036, "step": 8682 }, { "epoch": 0.1841530402324447, "grad_norm": 0.4471619725227356, "learning_rate": 1.9593219672556933e-05, "loss": 0.4231, "step": 8683 }, { "epoch": 0.1841742486903777, "grad_norm": 0.4067367613315582, "learning_rate": 1.9593125516723676e-05, "loss": 0.5989, "step": 8684 }, { "epoch": 0.18419545714831076, "grad_norm": 0.3706192076206207, "learning_rate": 1.9593031350221034e-05, "loss": 0.4434, "step": 8685 }, { "epoch": 0.18421666560624378, "grad_norm": 0.44608283042907715, "learning_rate": 1.9592937173049103e-05, "loss": 0.5277, "step": 8686 }, { "epoch": 0.1842378740641768, "grad_norm": 0.3082942068576813, "learning_rate": 1.959284298520799e-05, "loss": 0.5082, "step": 8687 }, { "epoch": 0.18425908252210982, "grad_norm": 0.32334181666374207, "learning_rate": 1.9592748786697802e-05, "loss": 0.5229, "step": 8688 }, { "epoch": 0.18428029098004284, "grad_norm": 0.322595477104187, "learning_rate": 1.959265457751864e-05, "loss": 0.5345, "step": 8689 }, { "epoch": 0.18430149943797586, "grad_norm": 0.3242131769657135, "learning_rate": 1.9592560357670612e-05, "loss": 0.5032, "step": 8690 }, { "epoch": 0.18432270789590888, "grad_norm": 0.3295897841453552, "learning_rate": 1.9592466127153824e-05, "loss": 0.574, "step": 8691 }, { "epoch": 0.18434391635384192, "grad_norm": 0.42765703797340393, "learning_rate": 1.9592371885968378e-05, "loss": 0.5167, "step": 8692 }, { "epoch": 0.18436512481177494, "grad_norm": 0.3411886394023895, "learning_rate": 1.9592277634114376e-05, "loss": 0.5228, "step": 8693 }, { "epoch": 0.18438633326970796, "grad_norm": 0.33159664273262024, "learning_rate": 1.959218337159193e-05, "loss": 0.5219, "step": 8694 }, { "epoch": 0.18440754172764098, "grad_norm": 0.349740207195282, "learning_rate": 1.959208909840114e-05, "loss": 0.5398, "step": 8695 }, { "epoch": 0.184428750185574, "grad_norm": 0.3150680363178253, "learning_rate": 1.9591994814542107e-05, "loss": 0.4626, "step": 8696 }, { "epoch": 0.18444995864350702, "grad_norm": 0.3662862777709961, "learning_rate": 1.9591900520014947e-05, "loss": 0.4962, "step": 8697 }, { "epoch": 0.18447116710144004, "grad_norm": 0.3132604658603668, "learning_rate": 1.9591806214819757e-05, "loss": 0.4698, "step": 8698 }, { "epoch": 0.1844923755593731, "grad_norm": 0.3093734681606293, "learning_rate": 1.9591711898956644e-05, "loss": 0.4996, "step": 8699 }, { "epoch": 0.1845135840173061, "grad_norm": 0.333416223526001, "learning_rate": 1.959161757242571e-05, "loss": 0.5581, "step": 8700 }, { "epoch": 0.18453479247523913, "grad_norm": 0.31539592146873474, "learning_rate": 1.959152323522706e-05, "loss": 0.4179, "step": 8701 }, { "epoch": 0.18455600093317215, "grad_norm": 0.45931729674339294, "learning_rate": 1.9591428887360806e-05, "loss": 0.6205, "step": 8702 }, { "epoch": 0.18457720939110517, "grad_norm": 0.30854344367980957, "learning_rate": 1.959133452882705e-05, "loss": 0.4792, "step": 8703 }, { "epoch": 0.1845984178490382, "grad_norm": 0.341627299785614, "learning_rate": 1.959124015962589e-05, "loss": 0.5325, "step": 8704 }, { "epoch": 0.1846196263069712, "grad_norm": 0.3627355992794037, "learning_rate": 1.9591145779757436e-05, "loss": 0.5508, "step": 8705 }, { "epoch": 0.18464083476490425, "grad_norm": 0.320165753364563, "learning_rate": 1.9591051389221792e-05, "loss": 0.5494, "step": 8706 }, { "epoch": 0.18466204322283727, "grad_norm": 0.3538552224636078, "learning_rate": 1.959095698801907e-05, "loss": 0.6028, "step": 8707 }, { "epoch": 0.1846832516807703, "grad_norm": 0.35288187861442566, "learning_rate": 1.9590862576149366e-05, "loss": 0.4976, "step": 8708 }, { "epoch": 0.1847044601387033, "grad_norm": 1.9364646673202515, "learning_rate": 1.9590768153612787e-05, "loss": 0.5869, "step": 8709 }, { "epoch": 0.18472566859663633, "grad_norm": 0.3263159692287445, "learning_rate": 1.9590673720409438e-05, "loss": 0.4835, "step": 8710 }, { "epoch": 0.18474687705456935, "grad_norm": 0.323286771774292, "learning_rate": 1.9590579276539426e-05, "loss": 0.5503, "step": 8711 }, { "epoch": 0.18476808551250237, "grad_norm": 0.32073989510536194, "learning_rate": 1.9590484822002857e-05, "loss": 0.5517, "step": 8712 }, { "epoch": 0.18478929397043542, "grad_norm": 0.3308587670326233, "learning_rate": 1.9590390356799833e-05, "loss": 0.4963, "step": 8713 }, { "epoch": 0.18481050242836844, "grad_norm": 0.30675965547561646, "learning_rate": 1.9590295880930458e-05, "loss": 0.4233, "step": 8714 }, { "epoch": 0.18483171088630146, "grad_norm": 0.49564749002456665, "learning_rate": 1.9590201394394844e-05, "loss": 0.4655, "step": 8715 }, { "epoch": 0.18485291934423448, "grad_norm": 0.32749471068382263, "learning_rate": 1.9590106897193087e-05, "loss": 0.5341, "step": 8716 }, { "epoch": 0.1848741278021675, "grad_norm": 0.3185475766658783, "learning_rate": 1.9590012389325297e-05, "loss": 0.5224, "step": 8717 }, { "epoch": 0.18489533626010052, "grad_norm": 0.32059043645858765, "learning_rate": 1.9589917870791578e-05, "loss": 0.5127, "step": 8718 }, { "epoch": 0.18491654471803357, "grad_norm": 0.29415956139564514, "learning_rate": 1.958982334159204e-05, "loss": 0.4757, "step": 8719 }, { "epoch": 0.18493775317596658, "grad_norm": 0.35261139273643494, "learning_rate": 1.958972880172678e-05, "loss": 0.4817, "step": 8720 }, { "epoch": 0.1849589616338996, "grad_norm": 0.3158293664455414, "learning_rate": 1.9589634251195907e-05, "loss": 0.558, "step": 8721 }, { "epoch": 0.18498017009183262, "grad_norm": 0.3276177942752838, "learning_rate": 1.9589539689999527e-05, "loss": 0.5587, "step": 8722 }, { "epoch": 0.18500137854976564, "grad_norm": 0.4370858371257782, "learning_rate": 1.9589445118137744e-05, "loss": 0.61, "step": 8723 }, { "epoch": 0.18502258700769866, "grad_norm": 0.3260515034198761, "learning_rate": 1.958935053561066e-05, "loss": 0.5363, "step": 8724 }, { "epoch": 0.18504379546563168, "grad_norm": 0.3434469997882843, "learning_rate": 1.958925594241839e-05, "loss": 0.611, "step": 8725 }, { "epoch": 0.18506500392356473, "grad_norm": 0.4306989312171936, "learning_rate": 1.9589161338561033e-05, "loss": 0.6467, "step": 8726 }, { "epoch": 0.18508621238149775, "grad_norm": 0.35869625210762024, "learning_rate": 1.958906672403869e-05, "loss": 0.5164, "step": 8727 }, { "epoch": 0.18510742083943077, "grad_norm": 0.3866337537765503, "learning_rate": 1.958897209885147e-05, "loss": 0.5296, "step": 8728 }, { "epoch": 0.1851286292973638, "grad_norm": 0.3253227770328522, "learning_rate": 1.958887746299948e-05, "loss": 0.5388, "step": 8729 }, { "epoch": 0.1851498377552968, "grad_norm": 0.3228372633457184, "learning_rate": 1.9588782816482822e-05, "loss": 0.4976, "step": 8730 }, { "epoch": 0.18517104621322983, "grad_norm": 0.3256101906299591, "learning_rate": 1.9588688159301607e-05, "loss": 0.4247, "step": 8731 }, { "epoch": 0.18519225467116285, "grad_norm": 0.3256104588508606, "learning_rate": 1.9588593491455934e-05, "loss": 0.4827, "step": 8732 }, { "epoch": 0.1852134631290959, "grad_norm": 0.30288663506507874, "learning_rate": 1.958849881294591e-05, "loss": 0.5024, "step": 8733 }, { "epoch": 0.18523467158702892, "grad_norm": 0.35561442375183105, "learning_rate": 1.958840412377164e-05, "loss": 0.5031, "step": 8734 }, { "epoch": 0.18525588004496193, "grad_norm": 0.35993286967277527, "learning_rate": 1.9588309423933232e-05, "loss": 0.5841, "step": 8735 }, { "epoch": 0.18527708850289495, "grad_norm": 0.3359600007534027, "learning_rate": 1.9588214713430788e-05, "loss": 0.4822, "step": 8736 }, { "epoch": 0.18529829696082797, "grad_norm": 0.34168052673339844, "learning_rate": 1.958811999226442e-05, "loss": 0.5359, "step": 8737 }, { "epoch": 0.185319505418761, "grad_norm": 0.34193095564842224, "learning_rate": 1.9588025260434223e-05, "loss": 0.5315, "step": 8738 }, { "epoch": 0.185340713876694, "grad_norm": 0.3213980495929718, "learning_rate": 1.958793051794031e-05, "loss": 0.5274, "step": 8739 }, { "epoch": 0.18536192233462706, "grad_norm": 0.4282354712486267, "learning_rate": 1.958783576478278e-05, "loss": 0.4571, "step": 8740 }, { "epoch": 0.18538313079256008, "grad_norm": 0.30296754837036133, "learning_rate": 1.9587741000961746e-05, "loss": 0.4843, "step": 8741 }, { "epoch": 0.1854043392504931, "grad_norm": 0.4780537188053131, "learning_rate": 1.958764622647731e-05, "loss": 0.5769, "step": 8742 }, { "epoch": 0.18542554770842612, "grad_norm": 0.3184758722782135, "learning_rate": 1.9587551441329578e-05, "loss": 0.5069, "step": 8743 }, { "epoch": 0.18544675616635914, "grad_norm": 0.32453879714012146, "learning_rate": 1.9587456645518654e-05, "loss": 0.5076, "step": 8744 }, { "epoch": 0.18546796462429216, "grad_norm": 0.35479769110679626, "learning_rate": 1.958736183904464e-05, "loss": 0.5238, "step": 8745 }, { "epoch": 0.18548917308222518, "grad_norm": 0.30713438987731934, "learning_rate": 1.9587267021907648e-05, "loss": 0.5379, "step": 8746 }, { "epoch": 0.18551038154015823, "grad_norm": 0.347409188747406, "learning_rate": 1.9587172194107782e-05, "loss": 0.4861, "step": 8747 }, { "epoch": 0.18553158999809125, "grad_norm": 0.33033043146133423, "learning_rate": 1.9587077355645145e-05, "loss": 0.5293, "step": 8748 }, { "epoch": 0.18555279845602426, "grad_norm": 0.36508065462112427, "learning_rate": 1.9586982506519842e-05, "loss": 0.5224, "step": 8749 }, { "epoch": 0.18557400691395728, "grad_norm": 0.34582260251045227, "learning_rate": 1.9586887646731985e-05, "loss": 0.6552, "step": 8750 }, { "epoch": 0.1855952153718903, "grad_norm": 0.33064013719558716, "learning_rate": 1.9586792776281673e-05, "loss": 0.5133, "step": 8751 }, { "epoch": 0.18561642382982332, "grad_norm": 0.6389004588127136, "learning_rate": 1.9586697895169015e-05, "loss": 0.6484, "step": 8752 }, { "epoch": 0.18563763228775634, "grad_norm": 0.3118845224380493, "learning_rate": 1.958660300339411e-05, "loss": 0.4998, "step": 8753 }, { "epoch": 0.1856588407456894, "grad_norm": 0.34548962116241455, "learning_rate": 1.9586508100957072e-05, "loss": 0.3764, "step": 8754 }, { "epoch": 0.1856800492036224, "grad_norm": 0.29282015562057495, "learning_rate": 1.9586413187858e-05, "loss": 0.4436, "step": 8755 }, { "epoch": 0.18570125766155543, "grad_norm": 0.3607330918312073, "learning_rate": 1.9586318264097007e-05, "loss": 0.5774, "step": 8756 }, { "epoch": 0.18572246611948845, "grad_norm": 0.31005924940109253, "learning_rate": 1.9586223329674193e-05, "loss": 0.4875, "step": 8757 }, { "epoch": 0.18574367457742147, "grad_norm": 0.3496728241443634, "learning_rate": 1.9586128384589664e-05, "loss": 0.4736, "step": 8758 }, { "epoch": 0.1857648830353545, "grad_norm": 0.3958985209465027, "learning_rate": 1.9586033428843524e-05, "loss": 0.5943, "step": 8759 }, { "epoch": 0.18578609149328754, "grad_norm": 0.31371235847473145, "learning_rate": 1.958593846243588e-05, "loss": 0.5686, "step": 8760 }, { "epoch": 0.18580729995122056, "grad_norm": 0.30878669023513794, "learning_rate": 1.9585843485366842e-05, "loss": 0.491, "step": 8761 }, { "epoch": 0.18582850840915358, "grad_norm": 0.331825852394104, "learning_rate": 1.958574849763651e-05, "loss": 0.5942, "step": 8762 }, { "epoch": 0.1858497168670866, "grad_norm": 0.35782867670059204, "learning_rate": 1.9585653499244996e-05, "loss": 0.4856, "step": 8763 }, { "epoch": 0.18587092532501961, "grad_norm": 0.33211809396743774, "learning_rate": 1.95855584901924e-05, "loss": 0.5348, "step": 8764 }, { "epoch": 0.18589213378295263, "grad_norm": 0.38298657536506653, "learning_rate": 1.9585463470478826e-05, "loss": 0.6534, "step": 8765 }, { "epoch": 0.18591334224088565, "grad_norm": 0.4117191731929779, "learning_rate": 1.9585368440104384e-05, "loss": 0.629, "step": 8766 }, { "epoch": 0.1859345506988187, "grad_norm": 0.3293907642364502, "learning_rate": 1.958527339906918e-05, "loss": 0.5394, "step": 8767 }, { "epoch": 0.18595575915675172, "grad_norm": 0.3224831223487854, "learning_rate": 1.9585178347373318e-05, "loss": 0.5197, "step": 8768 }, { "epoch": 0.18597696761468474, "grad_norm": 0.3132375478744507, "learning_rate": 1.95850832850169e-05, "loss": 0.4613, "step": 8769 }, { "epoch": 0.18599817607261776, "grad_norm": 0.3093658983707428, "learning_rate": 1.958498821200004e-05, "loss": 0.4435, "step": 8770 }, { "epoch": 0.18601938453055078, "grad_norm": 0.3372679352760315, "learning_rate": 1.958489312832284e-05, "loss": 0.4789, "step": 8771 }, { "epoch": 0.1860405929884838, "grad_norm": 0.3520706295967102, "learning_rate": 1.9584798033985405e-05, "loss": 0.4954, "step": 8772 }, { "epoch": 0.18606180144641682, "grad_norm": 0.3233819305896759, "learning_rate": 1.9584702928987838e-05, "loss": 0.5515, "step": 8773 }, { "epoch": 0.18608300990434987, "grad_norm": 0.38037484884262085, "learning_rate": 1.9584607813330246e-05, "loss": 0.5229, "step": 8774 }, { "epoch": 0.18610421836228289, "grad_norm": 0.32215869426727295, "learning_rate": 1.958451268701274e-05, "loss": 0.5166, "step": 8775 }, { "epoch": 0.1861254268202159, "grad_norm": 0.36748799681663513, "learning_rate": 1.958441755003542e-05, "loss": 0.5189, "step": 8776 }, { "epoch": 0.18614663527814893, "grad_norm": 0.28659701347351074, "learning_rate": 1.95843224023984e-05, "loss": 0.4801, "step": 8777 }, { "epoch": 0.18616784373608194, "grad_norm": 0.36416077613830566, "learning_rate": 1.9584227244101775e-05, "loss": 0.5371, "step": 8778 }, { "epoch": 0.18618905219401496, "grad_norm": 0.38054534792900085, "learning_rate": 1.9584132075145655e-05, "loss": 0.5706, "step": 8779 }, { "epoch": 0.18621026065194798, "grad_norm": 0.30471181869506836, "learning_rate": 1.9584036895530147e-05, "loss": 0.4884, "step": 8780 }, { "epoch": 0.18623146910988103, "grad_norm": 0.632310688495636, "learning_rate": 1.958394170525536e-05, "loss": 0.5047, "step": 8781 }, { "epoch": 0.18625267756781405, "grad_norm": 0.35510894656181335, "learning_rate": 1.958384650432139e-05, "loss": 0.4811, "step": 8782 }, { "epoch": 0.18627388602574707, "grad_norm": 0.3271509110927582, "learning_rate": 1.9583751292728356e-05, "loss": 0.5405, "step": 8783 }, { "epoch": 0.1862950944836801, "grad_norm": 0.32777222990989685, "learning_rate": 1.958365607047635e-05, "loss": 0.5981, "step": 8784 }, { "epoch": 0.1863163029416131, "grad_norm": 0.3514721691608429, "learning_rate": 1.958356083756549e-05, "loss": 0.5765, "step": 8785 }, { "epoch": 0.18633751139954613, "grad_norm": 0.3563404977321625, "learning_rate": 1.9583465593995875e-05, "loss": 0.4725, "step": 8786 }, { "epoch": 0.18635871985747915, "grad_norm": 0.33291906118392944, "learning_rate": 1.9583370339767613e-05, "loss": 0.4852, "step": 8787 }, { "epoch": 0.1863799283154122, "grad_norm": 0.34597185254096985, "learning_rate": 1.9583275074880814e-05, "loss": 0.5461, "step": 8788 }, { "epoch": 0.18640113677334522, "grad_norm": 0.40302345156669617, "learning_rate": 1.958317979933557e-05, "loss": 0.581, "step": 8789 }, { "epoch": 0.18642234523127824, "grad_norm": 0.32657483220100403, "learning_rate": 1.9583084513132006e-05, "loss": 0.5019, "step": 8790 }, { "epoch": 0.18644355368921126, "grad_norm": 0.33133819699287415, "learning_rate": 1.9582989216270214e-05, "loss": 0.5578, "step": 8791 }, { "epoch": 0.18646476214714428, "grad_norm": 0.34604793787002563, "learning_rate": 1.9582893908750305e-05, "loss": 0.5698, "step": 8792 }, { "epoch": 0.1864859706050773, "grad_norm": 0.299020379781723, "learning_rate": 1.9582798590572383e-05, "loss": 0.5076, "step": 8793 }, { "epoch": 0.18650717906301034, "grad_norm": 0.32476142048835754, "learning_rate": 1.958270326173656e-05, "loss": 0.411, "step": 8794 }, { "epoch": 0.18652838752094336, "grad_norm": 0.36077821254730225, "learning_rate": 1.9582607922242938e-05, "loss": 0.5079, "step": 8795 }, { "epoch": 0.18654959597887638, "grad_norm": 0.3334204852581024, "learning_rate": 1.958251257209162e-05, "loss": 0.533, "step": 8796 }, { "epoch": 0.1865708044368094, "grad_norm": 0.4124809801578522, "learning_rate": 1.9582417211282716e-05, "loss": 0.4153, "step": 8797 }, { "epoch": 0.18659201289474242, "grad_norm": 0.32996878027915955, "learning_rate": 1.958232183981633e-05, "loss": 0.4904, "step": 8798 }, { "epoch": 0.18661322135267544, "grad_norm": 0.3563759922981262, "learning_rate": 1.9582226457692567e-05, "loss": 0.5277, "step": 8799 }, { "epoch": 0.18663442981060846, "grad_norm": 0.42330214381217957, "learning_rate": 1.9582131064911538e-05, "loss": 0.5645, "step": 8800 }, { "epoch": 0.1866556382685415, "grad_norm": 0.33655136823654175, "learning_rate": 1.9582035661473347e-05, "loss": 0.5869, "step": 8801 }, { "epoch": 0.18667684672647453, "grad_norm": 0.34622833132743835, "learning_rate": 1.9581940247378096e-05, "loss": 0.5211, "step": 8802 }, { "epoch": 0.18669805518440755, "grad_norm": 0.5601972341537476, "learning_rate": 1.9581844822625896e-05, "loss": 0.59, "step": 8803 }, { "epoch": 0.18671926364234057, "grad_norm": 0.317704439163208, "learning_rate": 1.958174938721685e-05, "loss": 0.5397, "step": 8804 }, { "epoch": 0.18674047210027359, "grad_norm": 0.3968132734298706, "learning_rate": 1.9581653941151068e-05, "loss": 0.5163, "step": 8805 }, { "epoch": 0.1867616805582066, "grad_norm": 0.2987583577632904, "learning_rate": 1.958155848442865e-05, "loss": 0.5314, "step": 8806 }, { "epoch": 0.18678288901613962, "grad_norm": 0.3296227753162384, "learning_rate": 1.958146301704971e-05, "loss": 0.5058, "step": 8807 }, { "epoch": 0.18680409747407267, "grad_norm": 0.3163793385028839, "learning_rate": 1.9581367539014347e-05, "loss": 0.5716, "step": 8808 }, { "epoch": 0.1868253059320057, "grad_norm": 0.309840589761734, "learning_rate": 1.958127205032267e-05, "loss": 0.5164, "step": 8809 }, { "epoch": 0.1868465143899387, "grad_norm": 0.3152933716773987, "learning_rate": 1.958117655097479e-05, "loss": 0.5517, "step": 8810 }, { "epoch": 0.18686772284787173, "grad_norm": 0.383853942155838, "learning_rate": 1.9581081040970803e-05, "loss": 0.4206, "step": 8811 }, { "epoch": 0.18688893130580475, "grad_norm": 0.3455245792865753, "learning_rate": 1.9580985520310825e-05, "loss": 0.4872, "step": 8812 }, { "epoch": 0.18691013976373777, "grad_norm": 0.314655065536499, "learning_rate": 1.9580889988994955e-05, "loss": 0.4349, "step": 8813 }, { "epoch": 0.1869313482216708, "grad_norm": 0.40649881958961487, "learning_rate": 1.9580794447023302e-05, "loss": 0.5685, "step": 8814 }, { "epoch": 0.18695255667960384, "grad_norm": 0.431064248085022, "learning_rate": 1.958069889439598e-05, "loss": 0.477, "step": 8815 }, { "epoch": 0.18697376513753686, "grad_norm": 0.3363734185695648, "learning_rate": 1.958060333111308e-05, "loss": 0.5781, "step": 8816 }, { "epoch": 0.18699497359546988, "grad_norm": 0.3020452857017517, "learning_rate": 1.9580507757174718e-05, "loss": 0.5182, "step": 8817 }, { "epoch": 0.1870161820534029, "grad_norm": 0.3263368308544159, "learning_rate": 1.9580412172581e-05, "loss": 0.4462, "step": 8818 }, { "epoch": 0.18703739051133592, "grad_norm": 0.4440929889678955, "learning_rate": 1.958031657733203e-05, "loss": 0.5908, "step": 8819 }, { "epoch": 0.18705859896926894, "grad_norm": 0.3988659679889679, "learning_rate": 1.9580220971427915e-05, "loss": 0.542, "step": 8820 }, { "epoch": 0.18707980742720196, "grad_norm": 0.2950785756111145, "learning_rate": 1.958012535486876e-05, "loss": 0.4715, "step": 8821 }, { "epoch": 0.187101015885135, "grad_norm": 0.32014063000679016, "learning_rate": 1.9580029727654673e-05, "loss": 0.4581, "step": 8822 }, { "epoch": 0.18712222434306802, "grad_norm": 0.34477168321609497, "learning_rate": 1.9579934089785765e-05, "loss": 0.5077, "step": 8823 }, { "epoch": 0.18714343280100104, "grad_norm": 0.3752533197402954, "learning_rate": 1.9579838441262135e-05, "loss": 0.5883, "step": 8824 }, { "epoch": 0.18716464125893406, "grad_norm": 0.35108035802841187, "learning_rate": 1.957974278208389e-05, "loss": 0.5204, "step": 8825 }, { "epoch": 0.18718584971686708, "grad_norm": 0.369659960269928, "learning_rate": 1.957964711225114e-05, "loss": 0.5776, "step": 8826 }, { "epoch": 0.1872070581748001, "grad_norm": 0.32104718685150146, "learning_rate": 1.9579551431763987e-05, "loss": 0.5272, "step": 8827 }, { "epoch": 0.18722826663273312, "grad_norm": 0.3306577503681183, "learning_rate": 1.9579455740622544e-05, "loss": 0.5611, "step": 8828 }, { "epoch": 0.18724947509066617, "grad_norm": 0.3603897988796234, "learning_rate": 1.957936003882691e-05, "loss": 0.5378, "step": 8829 }, { "epoch": 0.1872706835485992, "grad_norm": 0.31941789388656616, "learning_rate": 1.9579264326377196e-05, "loss": 0.5984, "step": 8830 }, { "epoch": 0.1872918920065322, "grad_norm": 0.3377186059951782, "learning_rate": 1.9579168603273507e-05, "loss": 0.5549, "step": 8831 }, { "epoch": 0.18731310046446523, "grad_norm": 0.31970998644828796, "learning_rate": 1.9579072869515953e-05, "loss": 0.5994, "step": 8832 }, { "epoch": 0.18733430892239825, "grad_norm": 0.3264966905117035, "learning_rate": 1.9578977125104636e-05, "loss": 0.4919, "step": 8833 }, { "epoch": 0.18735551738033127, "grad_norm": 0.3630320131778717, "learning_rate": 1.9578881370039663e-05, "loss": 0.5811, "step": 8834 }, { "epoch": 0.1873767258382643, "grad_norm": 0.32413744926452637, "learning_rate": 1.9578785604321144e-05, "loss": 0.5349, "step": 8835 }, { "epoch": 0.18739793429619733, "grad_norm": 0.31746402382850647, "learning_rate": 1.957868982794918e-05, "loss": 0.5244, "step": 8836 }, { "epoch": 0.18741914275413035, "grad_norm": 0.32085686922073364, "learning_rate": 1.9578594040923878e-05, "loss": 0.4907, "step": 8837 }, { "epoch": 0.18744035121206337, "grad_norm": 0.31523397564888, "learning_rate": 1.957849824324535e-05, "loss": 0.5069, "step": 8838 }, { "epoch": 0.1874615596699964, "grad_norm": 0.35805097222328186, "learning_rate": 1.95784024349137e-05, "loss": 0.5923, "step": 8839 }, { "epoch": 0.1874827681279294, "grad_norm": 0.33981195092201233, "learning_rate": 1.9578306615929032e-05, "loss": 0.4951, "step": 8840 }, { "epoch": 0.18750397658586243, "grad_norm": 0.3163127899169922, "learning_rate": 1.9578210786291455e-05, "loss": 0.5829, "step": 8841 }, { "epoch": 0.18752518504379548, "grad_norm": 0.3282144069671631, "learning_rate": 1.957811494600108e-05, "loss": 0.6152, "step": 8842 }, { "epoch": 0.1875463935017285, "grad_norm": 0.35414668917655945, "learning_rate": 1.9578019095058e-05, "loss": 0.4315, "step": 8843 }, { "epoch": 0.18756760195966152, "grad_norm": 0.3609924912452698, "learning_rate": 1.957792323346234e-05, "loss": 0.5869, "step": 8844 }, { "epoch": 0.18758881041759454, "grad_norm": 0.39833158254623413, "learning_rate": 1.957782736121419e-05, "loss": 0.5791, "step": 8845 }, { "epoch": 0.18761001887552756, "grad_norm": 0.3277871608734131, "learning_rate": 1.9577731478313666e-05, "loss": 0.5516, "step": 8846 }, { "epoch": 0.18763122733346058, "grad_norm": 0.3198601305484772, "learning_rate": 1.9577635584760873e-05, "loss": 0.4808, "step": 8847 }, { "epoch": 0.1876524357913936, "grad_norm": 0.33093395829200745, "learning_rate": 1.9577539680555916e-05, "loss": 0.5354, "step": 8848 }, { "epoch": 0.18767364424932664, "grad_norm": 0.3200930058956146, "learning_rate": 1.95774437656989e-05, "loss": 0.5082, "step": 8849 }, { "epoch": 0.18769485270725966, "grad_norm": 0.34577375650405884, "learning_rate": 1.957734784018994e-05, "loss": 0.5897, "step": 8850 }, { "epoch": 0.18771606116519268, "grad_norm": 0.3316195607185364, "learning_rate": 1.957725190402913e-05, "loss": 0.6449, "step": 8851 }, { "epoch": 0.1877372696231257, "grad_norm": 0.29392150044441223, "learning_rate": 1.957715595721659e-05, "loss": 0.5507, "step": 8852 }, { "epoch": 0.18775847808105872, "grad_norm": 0.3158722221851349, "learning_rate": 1.9577059999752416e-05, "loss": 0.5038, "step": 8853 }, { "epoch": 0.18777968653899174, "grad_norm": 0.35718846321105957, "learning_rate": 1.9576964031636722e-05, "loss": 0.539, "step": 8854 }, { "epoch": 0.18780089499692476, "grad_norm": 0.2949049472808838, "learning_rate": 1.9576868052869613e-05, "loss": 0.4468, "step": 8855 }, { "epoch": 0.1878221034548578, "grad_norm": 0.41542497277259827, "learning_rate": 1.957677206345119e-05, "loss": 0.5747, "step": 8856 }, { "epoch": 0.18784331191279083, "grad_norm": 0.31850844621658325, "learning_rate": 1.9576676063381564e-05, "loss": 0.5097, "step": 8857 }, { "epoch": 0.18786452037072385, "grad_norm": 0.33052584528923035, "learning_rate": 1.9576580052660845e-05, "loss": 0.4627, "step": 8858 }, { "epoch": 0.18788572882865687, "grad_norm": 0.3171238601207733, "learning_rate": 1.9576484031289137e-05, "loss": 0.5213, "step": 8859 }, { "epoch": 0.1879069372865899, "grad_norm": 0.4721755087375641, "learning_rate": 1.9576387999266546e-05, "loss": 0.5406, "step": 8860 }, { "epoch": 0.1879281457445229, "grad_norm": 0.3074197471141815, "learning_rate": 1.957629195659318e-05, "loss": 0.4422, "step": 8861 }, { "epoch": 0.18794935420245593, "grad_norm": 0.31975361704826355, "learning_rate": 1.9576195903269145e-05, "loss": 0.5869, "step": 8862 }, { "epoch": 0.18797056266038897, "grad_norm": 0.3270023763179779, "learning_rate": 1.9576099839294547e-05, "loss": 0.6041, "step": 8863 }, { "epoch": 0.187991771118322, "grad_norm": 0.369590699672699, "learning_rate": 1.9576003764669495e-05, "loss": 0.5305, "step": 8864 }, { "epoch": 0.188012979576255, "grad_norm": 0.3359103798866272, "learning_rate": 1.9575907679394094e-05, "loss": 0.433, "step": 8865 }, { "epoch": 0.18803418803418803, "grad_norm": 0.3428969383239746, "learning_rate": 1.957581158346845e-05, "loss": 0.5694, "step": 8866 }, { "epoch": 0.18805539649212105, "grad_norm": 0.3232840597629547, "learning_rate": 1.9575715476892676e-05, "loss": 0.5101, "step": 8867 }, { "epoch": 0.18807660495005407, "grad_norm": 0.33164680004119873, "learning_rate": 1.957561935966687e-05, "loss": 0.5843, "step": 8868 }, { "epoch": 0.18809781340798712, "grad_norm": 0.30145063996315, "learning_rate": 1.9575523231791148e-05, "loss": 0.4613, "step": 8869 }, { "epoch": 0.18811902186592014, "grad_norm": 0.33379822969436646, "learning_rate": 1.9575427093265608e-05, "loss": 0.5573, "step": 8870 }, { "epoch": 0.18814023032385316, "grad_norm": 0.3176060616970062, "learning_rate": 1.9575330944090364e-05, "loss": 0.546, "step": 8871 }, { "epoch": 0.18816143878178618, "grad_norm": 0.4093368947505951, "learning_rate": 1.9575234784265517e-05, "loss": 0.4888, "step": 8872 }, { "epoch": 0.1881826472397192, "grad_norm": 0.38237935304641724, "learning_rate": 1.957513861379118e-05, "loss": 0.4838, "step": 8873 }, { "epoch": 0.18820385569765222, "grad_norm": 0.3468689024448395, "learning_rate": 1.9575042432667458e-05, "loss": 0.631, "step": 8874 }, { "epoch": 0.18822506415558524, "grad_norm": 0.33634260296821594, "learning_rate": 1.9574946240894453e-05, "loss": 0.5031, "step": 8875 }, { "epoch": 0.18824627261351828, "grad_norm": 0.343345046043396, "learning_rate": 1.9574850038472278e-05, "loss": 0.5478, "step": 8876 }, { "epoch": 0.1882674810714513, "grad_norm": 0.3095874786376953, "learning_rate": 1.957475382540104e-05, "loss": 0.5313, "step": 8877 }, { "epoch": 0.18828868952938432, "grad_norm": 0.3689745366573334, "learning_rate": 1.957465760168084e-05, "loss": 0.5963, "step": 8878 }, { "epoch": 0.18830989798731734, "grad_norm": 0.3005843460559845, "learning_rate": 1.957456136731179e-05, "loss": 0.4655, "step": 8879 }, { "epoch": 0.18833110644525036, "grad_norm": 0.3945053815841675, "learning_rate": 1.9574465122293998e-05, "loss": 0.5234, "step": 8880 }, { "epoch": 0.18835231490318338, "grad_norm": 0.3318767249584198, "learning_rate": 1.9574368866627566e-05, "loss": 0.4897, "step": 8881 }, { "epoch": 0.1883735233611164, "grad_norm": 0.3981970548629761, "learning_rate": 1.9574272600312606e-05, "loss": 0.5113, "step": 8882 }, { "epoch": 0.18839473181904945, "grad_norm": 0.3161270022392273, "learning_rate": 1.9574176323349224e-05, "loss": 0.4921, "step": 8883 }, { "epoch": 0.18841594027698247, "grad_norm": 0.3774503767490387, "learning_rate": 1.9574080035737528e-05, "loss": 0.514, "step": 8884 }, { "epoch": 0.1884371487349155, "grad_norm": 0.3777361810207367, "learning_rate": 1.957398373747762e-05, "loss": 0.4885, "step": 8885 }, { "epoch": 0.1884583571928485, "grad_norm": 0.3354484438896179, "learning_rate": 1.957388742856961e-05, "loss": 0.499, "step": 8886 }, { "epoch": 0.18847956565078153, "grad_norm": 0.33425813913345337, "learning_rate": 1.9573791109013605e-05, "loss": 0.6047, "step": 8887 }, { "epoch": 0.18850077410871455, "grad_norm": 0.3432060480117798, "learning_rate": 1.9573694778809718e-05, "loss": 0.506, "step": 8888 }, { "epoch": 0.18852198256664757, "grad_norm": 0.33271241188049316, "learning_rate": 1.957359843795805e-05, "loss": 0.5801, "step": 8889 }, { "epoch": 0.18854319102458061, "grad_norm": 0.30621102452278137, "learning_rate": 1.9573502086458703e-05, "loss": 0.4736, "step": 8890 }, { "epoch": 0.18856439948251363, "grad_norm": 0.42311644554138184, "learning_rate": 1.9573405724311793e-05, "loss": 0.6279, "step": 8891 }, { "epoch": 0.18858560794044665, "grad_norm": 0.3657148480415344, "learning_rate": 1.9573309351517425e-05, "loss": 0.5779, "step": 8892 }, { "epoch": 0.18860681639837967, "grad_norm": 0.32389509677886963, "learning_rate": 1.9573212968075704e-05, "loss": 0.5219, "step": 8893 }, { "epoch": 0.1886280248563127, "grad_norm": 0.4470888376235962, "learning_rate": 1.957311657398674e-05, "loss": 0.5604, "step": 8894 }, { "epoch": 0.1886492333142457, "grad_norm": 0.32244089245796204, "learning_rate": 1.957302016925064e-05, "loss": 0.5255, "step": 8895 }, { "epoch": 0.18867044177217873, "grad_norm": 0.3397682309150696, "learning_rate": 1.957292375386751e-05, "loss": 0.5865, "step": 8896 }, { "epoch": 0.18869165023011178, "grad_norm": 0.349551796913147, "learning_rate": 1.9572827327837455e-05, "loss": 0.511, "step": 8897 }, { "epoch": 0.1887128586880448, "grad_norm": 0.33814379572868347, "learning_rate": 1.957273089116059e-05, "loss": 0.5289, "step": 8898 }, { "epoch": 0.18873406714597782, "grad_norm": 0.29774796962738037, "learning_rate": 1.9572634443837014e-05, "loss": 0.4547, "step": 8899 }, { "epoch": 0.18875527560391084, "grad_norm": 0.3688832223415375, "learning_rate": 1.9572537985866832e-05, "loss": 0.5555, "step": 8900 }, { "epoch": 0.18877648406184386, "grad_norm": 0.3310639262199402, "learning_rate": 1.9572441517250164e-05, "loss": 0.4523, "step": 8901 }, { "epoch": 0.18879769251977688, "grad_norm": 0.3455469608306885, "learning_rate": 1.9572345037987106e-05, "loss": 0.5323, "step": 8902 }, { "epoch": 0.1888189009777099, "grad_norm": 0.31122735142707825, "learning_rate": 1.9572248548077768e-05, "loss": 0.5199, "step": 8903 }, { "epoch": 0.18884010943564294, "grad_norm": 0.34043189883232117, "learning_rate": 1.957215204752226e-05, "loss": 0.4589, "step": 8904 }, { "epoch": 0.18886131789357596, "grad_norm": 0.3587851822376251, "learning_rate": 1.9572055536320688e-05, "loss": 0.5481, "step": 8905 }, { "epoch": 0.18888252635150898, "grad_norm": 0.3768869936466217, "learning_rate": 1.9571959014473157e-05, "loss": 0.6547, "step": 8906 }, { "epoch": 0.188903734809442, "grad_norm": 0.30038589239120483, "learning_rate": 1.9571862481979776e-05, "loss": 0.5005, "step": 8907 }, { "epoch": 0.18892494326737502, "grad_norm": 0.35719650983810425, "learning_rate": 1.9571765938840655e-05, "loss": 0.5031, "step": 8908 }, { "epoch": 0.18894615172530804, "grad_norm": 0.37822526693344116, "learning_rate": 1.95716693850559e-05, "loss": 0.4345, "step": 8909 }, { "epoch": 0.1889673601832411, "grad_norm": 0.32933464646339417, "learning_rate": 1.9571572820625615e-05, "loss": 0.5373, "step": 8910 }, { "epoch": 0.1889885686411741, "grad_norm": 0.32161352038383484, "learning_rate": 1.957147624554991e-05, "loss": 0.4937, "step": 8911 }, { "epoch": 0.18900977709910713, "grad_norm": 0.38503319025039673, "learning_rate": 1.9571379659828894e-05, "loss": 0.5137, "step": 8912 }, { "epoch": 0.18903098555704015, "grad_norm": 0.3621874153614044, "learning_rate": 1.957128306346267e-05, "loss": 0.6343, "step": 8913 }, { "epoch": 0.18905219401497317, "grad_norm": 0.2967824339866638, "learning_rate": 1.9571186456451352e-05, "loss": 0.4101, "step": 8914 }, { "epoch": 0.1890734024729062, "grad_norm": 0.33125540614128113, "learning_rate": 1.957108983879504e-05, "loss": 0.4935, "step": 8915 }, { "epoch": 0.1890946109308392, "grad_norm": 0.30856984853744507, "learning_rate": 1.9570993210493844e-05, "loss": 0.5433, "step": 8916 }, { "epoch": 0.18911581938877225, "grad_norm": 0.3227551281452179, "learning_rate": 1.9570896571547877e-05, "loss": 0.4922, "step": 8917 }, { "epoch": 0.18913702784670527, "grad_norm": 0.3330567181110382, "learning_rate": 1.957079992195724e-05, "loss": 0.4742, "step": 8918 }, { "epoch": 0.1891582363046383, "grad_norm": 0.3263451159000397, "learning_rate": 1.957070326172204e-05, "loss": 0.5115, "step": 8919 }, { "epoch": 0.1891794447625713, "grad_norm": 0.33352017402648926, "learning_rate": 1.9570606590842393e-05, "loss": 0.6073, "step": 8920 }, { "epoch": 0.18920065322050433, "grad_norm": 0.31245607137680054, "learning_rate": 1.9570509909318397e-05, "loss": 0.5736, "step": 8921 }, { "epoch": 0.18922186167843735, "grad_norm": 0.31422650814056396, "learning_rate": 1.9570413217150158e-05, "loss": 0.5748, "step": 8922 }, { "epoch": 0.18924307013637037, "grad_norm": 0.32881274819374084, "learning_rate": 1.9570316514337798e-05, "loss": 0.437, "step": 8923 }, { "epoch": 0.18926427859430342, "grad_norm": 0.3470235764980316, "learning_rate": 1.9570219800881408e-05, "loss": 0.4693, "step": 8924 }, { "epoch": 0.18928548705223644, "grad_norm": 0.2891085147857666, "learning_rate": 1.9570123076781104e-05, "loss": 0.4995, "step": 8925 }, { "epoch": 0.18930669551016946, "grad_norm": 0.3580932915210724, "learning_rate": 1.9570026342036995e-05, "loss": 0.5267, "step": 8926 }, { "epoch": 0.18932790396810248, "grad_norm": 0.33853453397750854, "learning_rate": 1.9569929596649184e-05, "loss": 0.627, "step": 8927 }, { "epoch": 0.1893491124260355, "grad_norm": 0.34516140818595886, "learning_rate": 1.956983284061778e-05, "loss": 0.5672, "step": 8928 }, { "epoch": 0.18937032088396852, "grad_norm": 0.3561398684978485, "learning_rate": 1.956973607394289e-05, "loss": 0.4941, "step": 8929 }, { "epoch": 0.18939152934190154, "grad_norm": 0.3676573634147644, "learning_rate": 1.9569639296624626e-05, "loss": 0.466, "step": 8930 }, { "epoch": 0.18941273779983459, "grad_norm": 0.3237667381763458, "learning_rate": 1.956954250866309e-05, "loss": 0.468, "step": 8931 }, { "epoch": 0.1894339462577676, "grad_norm": 0.34694311022758484, "learning_rate": 1.956944571005839e-05, "loss": 0.5936, "step": 8932 }, { "epoch": 0.18945515471570062, "grad_norm": 0.339595764875412, "learning_rate": 1.956934890081064e-05, "loss": 0.5636, "step": 8933 }, { "epoch": 0.18947636317363364, "grad_norm": 0.31934255361557007, "learning_rate": 1.9569252080919942e-05, "loss": 0.5275, "step": 8934 }, { "epoch": 0.18949757163156666, "grad_norm": 0.30033648014068604, "learning_rate": 1.9569155250386408e-05, "loss": 0.5255, "step": 8935 }, { "epoch": 0.18951878008949968, "grad_norm": 0.2954360842704773, "learning_rate": 1.956905840921014e-05, "loss": 0.4996, "step": 8936 }, { "epoch": 0.1895399885474327, "grad_norm": 0.3210143744945526, "learning_rate": 1.9568961557391242e-05, "loss": 0.5217, "step": 8937 }, { "epoch": 0.18956119700536575, "grad_norm": 0.342870831489563, "learning_rate": 1.9568864694929838e-05, "loss": 0.5271, "step": 8938 }, { "epoch": 0.18958240546329877, "grad_norm": 0.3202785551548004, "learning_rate": 1.9568767821826017e-05, "loss": 0.5373, "step": 8939 }, { "epoch": 0.1896036139212318, "grad_norm": 0.3421732783317566, "learning_rate": 1.9568670938079905e-05, "loss": 0.5137, "step": 8940 }, { "epoch": 0.1896248223791648, "grad_norm": 0.34510788321495056, "learning_rate": 1.956857404369159e-05, "loss": 0.536, "step": 8941 }, { "epoch": 0.18964603083709783, "grad_norm": 0.3705981969833374, "learning_rate": 1.95684771386612e-05, "loss": 0.4952, "step": 8942 }, { "epoch": 0.18966723929503085, "grad_norm": 0.31214290857315063, "learning_rate": 1.9568380222988826e-05, "loss": 0.5183, "step": 8943 }, { "epoch": 0.1896884477529639, "grad_norm": 0.289897620677948, "learning_rate": 1.9568283296674585e-05, "loss": 0.4402, "step": 8944 }, { "epoch": 0.18970965621089692, "grad_norm": 0.2980995774269104, "learning_rate": 1.956818635971858e-05, "loss": 0.451, "step": 8945 }, { "epoch": 0.18973086466882993, "grad_norm": 0.3147372901439667, "learning_rate": 1.9568089412120926e-05, "loss": 0.5655, "step": 8946 }, { "epoch": 0.18975207312676295, "grad_norm": 0.3730298578739166, "learning_rate": 1.9567992453881725e-05, "loss": 0.5544, "step": 8947 }, { "epoch": 0.18977328158469597, "grad_norm": 0.3303830027580261, "learning_rate": 1.9567895485001083e-05, "loss": 0.5273, "step": 8948 }, { "epoch": 0.189794490042629, "grad_norm": 0.3166223466396332, "learning_rate": 1.956779850547911e-05, "loss": 0.5173, "step": 8949 }, { "epoch": 0.189815698500562, "grad_norm": 0.35044872760772705, "learning_rate": 1.9567701515315917e-05, "loss": 0.4901, "step": 8950 }, { "epoch": 0.18983690695849506, "grad_norm": 0.2880913317203522, "learning_rate": 1.956760451451161e-05, "loss": 0.4003, "step": 8951 }, { "epoch": 0.18985811541642808, "grad_norm": 0.34600406885147095, "learning_rate": 1.9567507503066296e-05, "loss": 0.4607, "step": 8952 }, { "epoch": 0.1898793238743611, "grad_norm": 0.3332274556159973, "learning_rate": 1.9567410480980082e-05, "loss": 0.558, "step": 8953 }, { "epoch": 0.18990053233229412, "grad_norm": 0.3292350172996521, "learning_rate": 1.956731344825308e-05, "loss": 0.4858, "step": 8954 }, { "epoch": 0.18992174079022714, "grad_norm": 0.36656463146209717, "learning_rate": 1.9567216404885393e-05, "loss": 0.5756, "step": 8955 }, { "epoch": 0.18994294924816016, "grad_norm": 0.33388060331344604, "learning_rate": 1.956711935087713e-05, "loss": 0.566, "step": 8956 }, { "epoch": 0.18996415770609318, "grad_norm": 0.39147472381591797, "learning_rate": 1.9567022286228402e-05, "loss": 0.5724, "step": 8957 }, { "epoch": 0.18998536616402623, "grad_norm": 0.30896255373954773, "learning_rate": 1.9566925210939314e-05, "loss": 0.57, "step": 8958 }, { "epoch": 0.19000657462195925, "grad_norm": 0.3241516053676605, "learning_rate": 1.9566828125009975e-05, "loss": 0.5325, "step": 8959 }, { "epoch": 0.19002778307989227, "grad_norm": 0.36884960532188416, "learning_rate": 1.956673102844049e-05, "loss": 0.5236, "step": 8960 }, { "epoch": 0.19004899153782528, "grad_norm": 0.29651564359664917, "learning_rate": 1.9566633921230974e-05, "loss": 0.4752, "step": 8961 }, { "epoch": 0.1900701999957583, "grad_norm": 0.3337703347206116, "learning_rate": 1.9566536803381533e-05, "loss": 0.5808, "step": 8962 }, { "epoch": 0.19009140845369132, "grad_norm": 0.34385013580322266, "learning_rate": 1.9566439674892267e-05, "loss": 0.5588, "step": 8963 }, { "epoch": 0.19011261691162434, "grad_norm": 0.36841732263565063, "learning_rate": 1.9566342535763292e-05, "loss": 0.4784, "step": 8964 }, { "epoch": 0.1901338253695574, "grad_norm": 0.34844133257865906, "learning_rate": 1.9566245385994715e-05, "loss": 0.5534, "step": 8965 }, { "epoch": 0.1901550338274904, "grad_norm": 0.4023967683315277, "learning_rate": 1.9566148225586643e-05, "loss": 0.5553, "step": 8966 }, { "epoch": 0.19017624228542343, "grad_norm": 0.3628746271133423, "learning_rate": 1.9566051054539182e-05, "loss": 0.6077, "step": 8967 }, { "epoch": 0.19019745074335645, "grad_norm": 0.35209089517593384, "learning_rate": 1.9565953872852446e-05, "loss": 0.5137, "step": 8968 }, { "epoch": 0.19021865920128947, "grad_norm": 0.33496513962745667, "learning_rate": 1.9565856680526532e-05, "loss": 0.3888, "step": 8969 }, { "epoch": 0.1902398676592225, "grad_norm": 0.33403831720352173, "learning_rate": 1.9565759477561563e-05, "loss": 0.5784, "step": 8970 }, { "epoch": 0.1902610761171555, "grad_norm": 0.41706013679504395, "learning_rate": 1.9565662263957636e-05, "loss": 0.5321, "step": 8971 }, { "epoch": 0.19028228457508856, "grad_norm": 0.3330562710762024, "learning_rate": 1.9565565039714863e-05, "loss": 0.541, "step": 8972 }, { "epoch": 0.19030349303302158, "grad_norm": 0.4465872049331665, "learning_rate": 1.956546780483335e-05, "loss": 0.6327, "step": 8973 }, { "epoch": 0.1903247014909546, "grad_norm": 0.3766725957393646, "learning_rate": 1.956537055931321e-05, "loss": 0.4544, "step": 8974 }, { "epoch": 0.19034590994888761, "grad_norm": 0.35740402340888977, "learning_rate": 1.9565273303154545e-05, "loss": 0.5258, "step": 8975 }, { "epoch": 0.19036711840682063, "grad_norm": 0.28754231333732605, "learning_rate": 1.956517603635747e-05, "loss": 0.4793, "step": 8976 }, { "epoch": 0.19038832686475365, "grad_norm": 0.42526543140411377, "learning_rate": 1.9565078758922086e-05, "loss": 0.5166, "step": 8977 }, { "epoch": 0.19040953532268667, "grad_norm": 0.330384761095047, "learning_rate": 1.9564981470848507e-05, "loss": 0.5408, "step": 8978 }, { "epoch": 0.19043074378061972, "grad_norm": 0.47710514068603516, "learning_rate": 1.9564884172136837e-05, "loss": 0.5977, "step": 8979 }, { "epoch": 0.19045195223855274, "grad_norm": 0.3260405957698822, "learning_rate": 1.9564786862787185e-05, "loss": 0.486, "step": 8980 }, { "epoch": 0.19047316069648576, "grad_norm": 0.3485283851623535, "learning_rate": 1.956468954279966e-05, "loss": 0.5225, "step": 8981 }, { "epoch": 0.19049436915441878, "grad_norm": 0.40631335973739624, "learning_rate": 1.956459221217437e-05, "loss": 0.5452, "step": 8982 }, { "epoch": 0.1905155776123518, "grad_norm": 0.3821653127670288, "learning_rate": 1.9564494870911425e-05, "loss": 0.4757, "step": 8983 }, { "epoch": 0.19053678607028482, "grad_norm": 0.34321796894073486, "learning_rate": 1.956439751901093e-05, "loss": 0.5291, "step": 8984 }, { "epoch": 0.19055799452821787, "grad_norm": 0.3311236798763275, "learning_rate": 1.9564300156473e-05, "loss": 0.4919, "step": 8985 }, { "epoch": 0.1905792029861509, "grad_norm": 0.33694136142730713, "learning_rate": 1.9564202783297737e-05, "loss": 0.5432, "step": 8986 }, { "epoch": 0.1906004114440839, "grad_norm": 0.35420650243759155, "learning_rate": 1.9564105399485246e-05, "loss": 0.5469, "step": 8987 }, { "epoch": 0.19062161990201693, "grad_norm": 0.36110952496528625, "learning_rate": 1.9564008005035644e-05, "loss": 0.4938, "step": 8988 }, { "epoch": 0.19064282835994995, "grad_norm": 0.35575222969055176, "learning_rate": 1.9563910599949037e-05, "loss": 0.4622, "step": 8989 }, { "epoch": 0.19066403681788296, "grad_norm": 0.44255587458610535, "learning_rate": 1.956381318422553e-05, "loss": 0.5183, "step": 8990 }, { "epoch": 0.19068524527581598, "grad_norm": 0.29314059019088745, "learning_rate": 1.9563715757865228e-05, "loss": 0.4459, "step": 8991 }, { "epoch": 0.19070645373374903, "grad_norm": 0.3103480041027069, "learning_rate": 1.9563618320868253e-05, "loss": 0.4985, "step": 8992 }, { "epoch": 0.19072766219168205, "grad_norm": 0.3478839099407196, "learning_rate": 1.9563520873234697e-05, "loss": 0.5292, "step": 8993 }, { "epoch": 0.19074887064961507, "grad_norm": 0.3688022196292877, "learning_rate": 1.9563423414964683e-05, "loss": 0.552, "step": 8994 }, { "epoch": 0.1907700791075481, "grad_norm": 0.321682870388031, "learning_rate": 1.956332594605831e-05, "loss": 0.4932, "step": 8995 }, { "epoch": 0.1907912875654811, "grad_norm": 0.317007839679718, "learning_rate": 1.9563228466515687e-05, "loss": 0.5682, "step": 8996 }, { "epoch": 0.19081249602341413, "grad_norm": 0.3020656406879425, "learning_rate": 1.9563130976336926e-05, "loss": 0.5215, "step": 8997 }, { "epoch": 0.19083370448134715, "grad_norm": 0.3134438991546631, "learning_rate": 1.9563033475522134e-05, "loss": 0.5674, "step": 8998 }, { "epoch": 0.1908549129392802, "grad_norm": 0.32932689785957336, "learning_rate": 1.9562935964071418e-05, "loss": 0.5707, "step": 8999 }, { "epoch": 0.19087612139721322, "grad_norm": 0.3333834409713745, "learning_rate": 1.9562838441984888e-05, "loss": 0.5716, "step": 9000 }, { "epoch": 0.19089732985514624, "grad_norm": 0.32966792583465576, "learning_rate": 1.9562740909262653e-05, "loss": 0.6068, "step": 9001 }, { "epoch": 0.19091853831307926, "grad_norm": 0.34366995096206665, "learning_rate": 1.9562643365904822e-05, "loss": 0.5303, "step": 9002 }, { "epoch": 0.19093974677101228, "grad_norm": 0.3120494782924652, "learning_rate": 1.95625458119115e-05, "loss": 0.497, "step": 9003 }, { "epoch": 0.1909609552289453, "grad_norm": 0.3435837924480438, "learning_rate": 1.9562448247282798e-05, "loss": 0.54, "step": 9004 }, { "epoch": 0.19098216368687831, "grad_norm": 0.3402247130870819, "learning_rate": 1.9562350672018824e-05, "loss": 0.5088, "step": 9005 }, { "epoch": 0.19100337214481136, "grad_norm": 0.36568018794059753, "learning_rate": 1.9562253086119685e-05, "loss": 0.5519, "step": 9006 }, { "epoch": 0.19102458060274438, "grad_norm": 0.3160358667373657, "learning_rate": 1.956215548958549e-05, "loss": 0.5073, "step": 9007 }, { "epoch": 0.1910457890606774, "grad_norm": 0.3394257724285126, "learning_rate": 1.9562057882416355e-05, "loss": 0.517, "step": 9008 }, { "epoch": 0.19106699751861042, "grad_norm": 0.3267126679420471, "learning_rate": 1.9561960264612382e-05, "loss": 0.5578, "step": 9009 }, { "epoch": 0.19108820597654344, "grad_norm": 0.40340930223464966, "learning_rate": 1.9561862636173673e-05, "loss": 0.6077, "step": 9010 }, { "epoch": 0.19110941443447646, "grad_norm": 0.33791428804397583, "learning_rate": 1.956176499710035e-05, "loss": 0.5068, "step": 9011 }, { "epoch": 0.19113062289240948, "grad_norm": 0.38815271854400635, "learning_rate": 1.956166734739251e-05, "loss": 0.5315, "step": 9012 }, { "epoch": 0.19115183135034253, "grad_norm": 0.3358430564403534, "learning_rate": 1.956156968705027e-05, "loss": 0.4873, "step": 9013 }, { "epoch": 0.19117303980827555, "grad_norm": 0.3302117884159088, "learning_rate": 1.9561472016073733e-05, "loss": 0.5371, "step": 9014 }, { "epoch": 0.19119424826620857, "grad_norm": 0.2934514582157135, "learning_rate": 1.9561374334463012e-05, "loss": 0.4855, "step": 9015 }, { "epoch": 0.19121545672414159, "grad_norm": 0.29427796602249146, "learning_rate": 1.9561276642218212e-05, "loss": 0.4372, "step": 9016 }, { "epoch": 0.1912366651820746, "grad_norm": 0.33034762740135193, "learning_rate": 1.9561178939339445e-05, "loss": 0.5402, "step": 9017 }, { "epoch": 0.19125787364000763, "grad_norm": 0.3039841949939728, "learning_rate": 1.9561081225826817e-05, "loss": 0.4668, "step": 9018 }, { "epoch": 0.19127908209794067, "grad_norm": 0.35414940118789673, "learning_rate": 1.9560983501680435e-05, "loss": 0.4417, "step": 9019 }, { "epoch": 0.1913002905558737, "grad_norm": 0.3317181169986725, "learning_rate": 1.9560885766900415e-05, "loss": 0.472, "step": 9020 }, { "epoch": 0.1913214990138067, "grad_norm": 0.3338148593902588, "learning_rate": 1.9560788021486856e-05, "loss": 0.549, "step": 9021 }, { "epoch": 0.19134270747173973, "grad_norm": 0.8593029975891113, "learning_rate": 1.9560690265439875e-05, "loss": 0.63, "step": 9022 }, { "epoch": 0.19136391592967275, "grad_norm": 0.350900262594223, "learning_rate": 1.9560592498759578e-05, "loss": 0.5528, "step": 9023 }, { "epoch": 0.19138512438760577, "grad_norm": 0.33812659978866577, "learning_rate": 1.956049472144607e-05, "loss": 0.5737, "step": 9024 }, { "epoch": 0.1914063328455388, "grad_norm": 0.48419588804244995, "learning_rate": 1.9560396933499462e-05, "loss": 0.6413, "step": 9025 }, { "epoch": 0.19142754130347184, "grad_norm": 0.747696578502655, "learning_rate": 1.9560299134919867e-05, "loss": 0.5137, "step": 9026 }, { "epoch": 0.19144874976140486, "grad_norm": 0.35937076807022095, "learning_rate": 1.956020132570739e-05, "loss": 0.5609, "step": 9027 }, { "epoch": 0.19146995821933788, "grad_norm": 0.35983648896217346, "learning_rate": 1.956010350586214e-05, "loss": 0.533, "step": 9028 }, { "epoch": 0.1914911666772709, "grad_norm": 0.32703086733818054, "learning_rate": 1.9560005675384224e-05, "loss": 0.506, "step": 9029 }, { "epoch": 0.19151237513520392, "grad_norm": 0.3612675666809082, "learning_rate": 1.9559907834273756e-05, "loss": 0.5469, "step": 9030 }, { "epoch": 0.19153358359313694, "grad_norm": 0.3227074444293976, "learning_rate": 1.9559809982530836e-05, "loss": 0.6204, "step": 9031 }, { "epoch": 0.19155479205106996, "grad_norm": 0.3279741108417511, "learning_rate": 1.9559712120155582e-05, "loss": 0.4956, "step": 9032 }, { "epoch": 0.191576000509003, "grad_norm": 0.34795883297920227, "learning_rate": 1.95596142471481e-05, "loss": 0.5944, "step": 9033 }, { "epoch": 0.19159720896693602, "grad_norm": 0.347489595413208, "learning_rate": 1.9559516363508497e-05, "loss": 0.5738, "step": 9034 }, { "epoch": 0.19161841742486904, "grad_norm": 0.3031734824180603, "learning_rate": 1.9559418469236883e-05, "loss": 0.4833, "step": 9035 }, { "epoch": 0.19163962588280206, "grad_norm": 0.3181517720222473, "learning_rate": 1.9559320564333366e-05, "loss": 0.5365, "step": 9036 }, { "epoch": 0.19166083434073508, "grad_norm": 0.3804231286048889, "learning_rate": 1.9559222648798057e-05, "loss": 0.5197, "step": 9037 }, { "epoch": 0.1916820427986681, "grad_norm": 0.310634046792984, "learning_rate": 1.9559124722631064e-05, "loss": 0.5161, "step": 9038 }, { "epoch": 0.19170325125660112, "grad_norm": 0.3099343776702881, "learning_rate": 1.9559026785832494e-05, "loss": 0.5483, "step": 9039 }, { "epoch": 0.19172445971453417, "grad_norm": 0.37883105874061584, "learning_rate": 1.955892883840246e-05, "loss": 0.5598, "step": 9040 }, { "epoch": 0.1917456681724672, "grad_norm": 0.35247188806533813, "learning_rate": 1.9558830880341063e-05, "loss": 0.5978, "step": 9041 }, { "epoch": 0.1917668766304002, "grad_norm": 0.3241996765136719, "learning_rate": 1.9558732911648422e-05, "loss": 0.5145, "step": 9042 }, { "epoch": 0.19178808508833323, "grad_norm": 0.3069225251674652, "learning_rate": 1.9558634932324644e-05, "loss": 0.541, "step": 9043 }, { "epoch": 0.19180929354626625, "grad_norm": 0.3294859528541565, "learning_rate": 1.955853694236983e-05, "loss": 0.4893, "step": 9044 }, { "epoch": 0.19183050200419927, "grad_norm": 0.34686121344566345, "learning_rate": 1.9558438941784095e-05, "loss": 0.5197, "step": 9045 }, { "epoch": 0.19185171046213229, "grad_norm": 0.3025037944316864, "learning_rate": 1.9558340930567548e-05, "loss": 0.4364, "step": 9046 }, { "epoch": 0.19187291892006533, "grad_norm": 0.3196095824241638, "learning_rate": 1.9558242908720297e-05, "loss": 0.5422, "step": 9047 }, { "epoch": 0.19189412737799835, "grad_norm": 0.314594566822052, "learning_rate": 1.9558144876242454e-05, "loss": 0.5162, "step": 9048 }, { "epoch": 0.19191533583593137, "grad_norm": 0.36951884627342224, "learning_rate": 1.955804683313412e-05, "loss": 0.4393, "step": 9049 }, { "epoch": 0.1919365442938644, "grad_norm": 0.3269248306751251, "learning_rate": 1.9557948779395414e-05, "loss": 0.4921, "step": 9050 }, { "epoch": 0.1919577527517974, "grad_norm": 0.3587474822998047, "learning_rate": 1.9557850715026435e-05, "loss": 0.5481, "step": 9051 }, { "epoch": 0.19197896120973043, "grad_norm": 0.31838592886924744, "learning_rate": 1.9557752640027304e-05, "loss": 0.4829, "step": 9052 }, { "epoch": 0.19200016966766345, "grad_norm": 0.34387069940567017, "learning_rate": 1.955765455439812e-05, "loss": 0.4486, "step": 9053 }, { "epoch": 0.1920213781255965, "grad_norm": 0.34014150500297546, "learning_rate": 1.9557556458138996e-05, "loss": 0.4877, "step": 9054 }, { "epoch": 0.19204258658352952, "grad_norm": 0.29535961151123047, "learning_rate": 1.955745835125004e-05, "loss": 0.476, "step": 9055 }, { "epoch": 0.19206379504146254, "grad_norm": 0.33402183651924133, "learning_rate": 1.9557360233731363e-05, "loss": 0.5008, "step": 9056 }, { "epoch": 0.19208500349939556, "grad_norm": 0.4590485692024231, "learning_rate": 1.9557262105583073e-05, "loss": 0.46, "step": 9057 }, { "epoch": 0.19210621195732858, "grad_norm": 0.3728232681751251, "learning_rate": 1.9557163966805278e-05, "loss": 0.5237, "step": 9058 }, { "epoch": 0.1921274204152616, "grad_norm": 0.30471473932266235, "learning_rate": 1.955706581739809e-05, "loss": 0.4183, "step": 9059 }, { "epoch": 0.19214862887319464, "grad_norm": 0.3351106643676758, "learning_rate": 1.9556967657361615e-05, "loss": 0.6062, "step": 9060 }, { "epoch": 0.19216983733112766, "grad_norm": 0.35251668095588684, "learning_rate": 1.9556869486695964e-05, "loss": 0.4905, "step": 9061 }, { "epoch": 0.19219104578906068, "grad_norm": 0.3117122948169708, "learning_rate": 1.955677130540125e-05, "loss": 0.5047, "step": 9062 }, { "epoch": 0.1922122542469937, "grad_norm": 0.352236270904541, "learning_rate": 1.955667311347757e-05, "loss": 0.5279, "step": 9063 }, { "epoch": 0.19223346270492672, "grad_norm": 0.32480937242507935, "learning_rate": 1.9556574910925046e-05, "loss": 0.4652, "step": 9064 }, { "epoch": 0.19225467116285974, "grad_norm": 0.3173174560070038, "learning_rate": 1.955647669774378e-05, "loss": 0.5046, "step": 9065 }, { "epoch": 0.19227587962079276, "grad_norm": 0.34138163924217224, "learning_rate": 1.9556378473933885e-05, "loss": 0.4746, "step": 9066 }, { "epoch": 0.1922970880787258, "grad_norm": 0.33478066325187683, "learning_rate": 1.955628023949547e-05, "loss": 0.5516, "step": 9067 }, { "epoch": 0.19231829653665883, "grad_norm": 0.3002221882343292, "learning_rate": 1.955618199442864e-05, "loss": 0.5239, "step": 9068 }, { "epoch": 0.19233950499459185, "grad_norm": 0.37196680903434753, "learning_rate": 1.955608373873351e-05, "loss": 0.4975, "step": 9069 }, { "epoch": 0.19236071345252487, "grad_norm": 0.35669052600860596, "learning_rate": 1.9555985472410186e-05, "loss": 0.5129, "step": 9070 }, { "epoch": 0.1923819219104579, "grad_norm": 0.3419059216976166, "learning_rate": 1.955588719545878e-05, "loss": 0.5059, "step": 9071 }, { "epoch": 0.1924031303683909, "grad_norm": 0.30897271633148193, "learning_rate": 1.9555788907879397e-05, "loss": 0.5133, "step": 9072 }, { "epoch": 0.19242433882632393, "grad_norm": 0.3391414284706116, "learning_rate": 1.955569060967215e-05, "loss": 0.5329, "step": 9073 }, { "epoch": 0.19244554728425697, "grad_norm": 0.33092713356018066, "learning_rate": 1.9555592300837146e-05, "loss": 0.4765, "step": 9074 }, { "epoch": 0.19246675574219, "grad_norm": 0.365436851978302, "learning_rate": 1.9555493981374494e-05, "loss": 0.5012, "step": 9075 }, { "epoch": 0.192487964200123, "grad_norm": 0.3600910007953644, "learning_rate": 1.9555395651284306e-05, "loss": 0.5128, "step": 9076 }, { "epoch": 0.19250917265805603, "grad_norm": 0.3386285603046417, "learning_rate": 1.955529731056669e-05, "loss": 0.5212, "step": 9077 }, { "epoch": 0.19253038111598905, "grad_norm": 0.2983817160129547, "learning_rate": 1.9555198959221756e-05, "loss": 0.5446, "step": 9078 }, { "epoch": 0.19255158957392207, "grad_norm": 0.29952317476272583, "learning_rate": 1.955510059724961e-05, "loss": 0.5054, "step": 9079 }, { "epoch": 0.1925727980318551, "grad_norm": 0.3233781158924103, "learning_rate": 1.9555002224650367e-05, "loss": 0.5852, "step": 9080 }, { "epoch": 0.19259400648978814, "grad_norm": 0.30791234970092773, "learning_rate": 1.9554903841424132e-05, "loss": 0.5199, "step": 9081 }, { "epoch": 0.19261521494772116, "grad_norm": 0.338989794254303, "learning_rate": 1.9554805447571015e-05, "loss": 0.5145, "step": 9082 }, { "epoch": 0.19263642340565418, "grad_norm": 0.3297017812728882, "learning_rate": 1.955470704309113e-05, "loss": 0.5409, "step": 9083 }, { "epoch": 0.1926576318635872, "grad_norm": 0.3256508409976959, "learning_rate": 1.955460862798458e-05, "loss": 0.5735, "step": 9084 }, { "epoch": 0.19267884032152022, "grad_norm": 0.3355054557323456, "learning_rate": 1.955451020225148e-05, "loss": 0.497, "step": 9085 }, { "epoch": 0.19270004877945324, "grad_norm": 0.3193248212337494, "learning_rate": 1.955441176589193e-05, "loss": 0.4846, "step": 9086 }, { "epoch": 0.19272125723738626, "grad_norm": 0.3416776955127716, "learning_rate": 1.9554313318906054e-05, "loss": 0.5623, "step": 9087 }, { "epoch": 0.1927424656953193, "grad_norm": 0.32561370730400085, "learning_rate": 1.955421486129395e-05, "loss": 0.4948, "step": 9088 }, { "epoch": 0.19276367415325232, "grad_norm": 0.29683494567871094, "learning_rate": 1.9554116393055732e-05, "loss": 0.5526, "step": 9089 }, { "epoch": 0.19278488261118534, "grad_norm": 0.29381322860717773, "learning_rate": 1.955401791419151e-05, "loss": 0.5438, "step": 9090 }, { "epoch": 0.19280609106911836, "grad_norm": 0.3272048830986023, "learning_rate": 1.955391942470139e-05, "loss": 0.4672, "step": 9091 }, { "epoch": 0.19282729952705138, "grad_norm": 0.3359290361404419, "learning_rate": 1.9553820924585482e-05, "loss": 0.5028, "step": 9092 }, { "epoch": 0.1928485079849844, "grad_norm": 0.3363441824913025, "learning_rate": 1.9553722413843902e-05, "loss": 0.4741, "step": 9093 }, { "epoch": 0.19286971644291745, "grad_norm": 0.5579574108123779, "learning_rate": 1.955362389247675e-05, "loss": 0.5888, "step": 9094 }, { "epoch": 0.19289092490085047, "grad_norm": 0.3495161831378937, "learning_rate": 1.9553525360484148e-05, "loss": 0.4963, "step": 9095 }, { "epoch": 0.1929121333587835, "grad_norm": 0.46969810128211975, "learning_rate": 1.9553426817866194e-05, "loss": 0.6386, "step": 9096 }, { "epoch": 0.1929333418167165, "grad_norm": 0.38312530517578125, "learning_rate": 1.9553328264623e-05, "loss": 0.5281, "step": 9097 }, { "epoch": 0.19295455027464953, "grad_norm": 0.6100209951400757, "learning_rate": 1.9553229700754678e-05, "loss": 0.5453, "step": 9098 }, { "epoch": 0.19297575873258255, "grad_norm": 0.30800408124923706, "learning_rate": 1.955313112626134e-05, "loss": 0.5273, "step": 9099 }, { "epoch": 0.19299696719051557, "grad_norm": 0.3277183771133423, "learning_rate": 1.9553032541143087e-05, "loss": 0.5587, "step": 9100 }, { "epoch": 0.19301817564844861, "grad_norm": 0.3977847695350647, "learning_rate": 1.9552933945400036e-05, "loss": 0.4072, "step": 9101 }, { "epoch": 0.19303938410638163, "grad_norm": 0.2962535321712494, "learning_rate": 1.9552835339032295e-05, "loss": 0.4229, "step": 9102 }, { "epoch": 0.19306059256431465, "grad_norm": 0.3274942934513092, "learning_rate": 1.9552736722039977e-05, "loss": 0.5047, "step": 9103 }, { "epoch": 0.19308180102224767, "grad_norm": 0.3703705668449402, "learning_rate": 1.9552638094423188e-05, "loss": 0.482, "step": 9104 }, { "epoch": 0.1931030094801807, "grad_norm": 0.32145556807518005, "learning_rate": 1.9552539456182035e-05, "loss": 0.5231, "step": 9105 }, { "epoch": 0.1931242179381137, "grad_norm": 0.30020052194595337, "learning_rate": 1.9552440807316628e-05, "loss": 0.494, "step": 9106 }, { "epoch": 0.19314542639604673, "grad_norm": 0.3630784749984741, "learning_rate": 1.9552342147827084e-05, "loss": 0.531, "step": 9107 }, { "epoch": 0.19316663485397978, "grad_norm": 0.34909722208976746, "learning_rate": 1.9552243477713507e-05, "loss": 0.5613, "step": 9108 }, { "epoch": 0.1931878433119128, "grad_norm": 0.4202093482017517, "learning_rate": 1.9552144796976006e-05, "loss": 0.5037, "step": 9109 }, { "epoch": 0.19320905176984582, "grad_norm": 0.3452287018299103, "learning_rate": 1.9552046105614696e-05, "loss": 0.6141, "step": 9110 }, { "epoch": 0.19323026022777884, "grad_norm": 0.3234134316444397, "learning_rate": 1.9551947403629682e-05, "loss": 0.5448, "step": 9111 }, { "epoch": 0.19325146868571186, "grad_norm": 0.3051866888999939, "learning_rate": 1.9551848691021075e-05, "loss": 0.5577, "step": 9112 }, { "epoch": 0.19327267714364488, "grad_norm": 0.3198871910572052, "learning_rate": 1.955174996778898e-05, "loss": 0.5997, "step": 9113 }, { "epoch": 0.1932938856015779, "grad_norm": 0.3280767500400543, "learning_rate": 1.9551651233933516e-05, "loss": 0.5073, "step": 9114 }, { "epoch": 0.19331509405951094, "grad_norm": 0.3432201147079468, "learning_rate": 1.955155248945479e-05, "loss": 0.5212, "step": 9115 }, { "epoch": 0.19333630251744396, "grad_norm": 0.30986982583999634, "learning_rate": 1.955145373435291e-05, "loss": 0.4579, "step": 9116 }, { "epoch": 0.19335751097537698, "grad_norm": 0.34380415081977844, "learning_rate": 1.9551354968627984e-05, "loss": 0.583, "step": 9117 }, { "epoch": 0.19337871943331, "grad_norm": 0.4031250476837158, "learning_rate": 1.9551256192280125e-05, "loss": 0.476, "step": 9118 }, { "epoch": 0.19339992789124302, "grad_norm": 0.3270024061203003, "learning_rate": 1.955115740530944e-05, "loss": 0.5239, "step": 9119 }, { "epoch": 0.19342113634917604, "grad_norm": 0.33560794591903687, "learning_rate": 1.9551058607716045e-05, "loss": 0.5082, "step": 9120 }, { "epoch": 0.19344234480710906, "grad_norm": 0.3192598819732666, "learning_rate": 1.955095979950004e-05, "loss": 0.468, "step": 9121 }, { "epoch": 0.1934635532650421, "grad_norm": 0.32954075932502747, "learning_rate": 1.9550860980661546e-05, "loss": 0.4689, "step": 9122 }, { "epoch": 0.19348476172297513, "grad_norm": 0.3389740288257599, "learning_rate": 1.9550762151200665e-05, "loss": 0.5332, "step": 9123 }, { "epoch": 0.19350597018090815, "grad_norm": 0.3399777412414551, "learning_rate": 1.9550663311117507e-05, "loss": 0.5283, "step": 9124 }, { "epoch": 0.19352717863884117, "grad_norm": 0.3603091239929199, "learning_rate": 1.9550564460412186e-05, "loss": 0.5564, "step": 9125 }, { "epoch": 0.1935483870967742, "grad_norm": 0.393595427274704, "learning_rate": 1.955046559908481e-05, "loss": 0.5425, "step": 9126 }, { "epoch": 0.1935695955547072, "grad_norm": 0.33910509943962097, "learning_rate": 1.955036672713549e-05, "loss": 0.503, "step": 9127 }, { "epoch": 0.19359080401264023, "grad_norm": 0.36085715889930725, "learning_rate": 1.9550267844564335e-05, "loss": 0.5234, "step": 9128 }, { "epoch": 0.19361201247057327, "grad_norm": 0.293710857629776, "learning_rate": 1.9550168951371455e-05, "loss": 0.4908, "step": 9129 }, { "epoch": 0.1936332209285063, "grad_norm": 0.3545270264148712, "learning_rate": 1.955007004755696e-05, "loss": 0.5961, "step": 9130 }, { "epoch": 0.19365442938643931, "grad_norm": 0.3327273726463318, "learning_rate": 1.954997113312096e-05, "loss": 0.6092, "step": 9131 }, { "epoch": 0.19367563784437233, "grad_norm": 0.38901662826538086, "learning_rate": 1.9549872208063565e-05, "loss": 0.4675, "step": 9132 }, { "epoch": 0.19369684630230535, "grad_norm": 0.3264743983745575, "learning_rate": 1.9549773272384884e-05, "loss": 0.5742, "step": 9133 }, { "epoch": 0.19371805476023837, "grad_norm": 0.2979828119277954, "learning_rate": 1.9549674326085027e-05, "loss": 0.4382, "step": 9134 }, { "epoch": 0.19373926321817142, "grad_norm": 0.31623533368110657, "learning_rate": 1.954957536916411e-05, "loss": 0.5267, "step": 9135 }, { "epoch": 0.19376047167610444, "grad_norm": 0.31291675567626953, "learning_rate": 1.9549476401622235e-05, "loss": 0.5626, "step": 9136 }, { "epoch": 0.19378168013403746, "grad_norm": 0.31111857295036316, "learning_rate": 1.9549377423459516e-05, "loss": 0.4912, "step": 9137 }, { "epoch": 0.19380288859197048, "grad_norm": 0.332344114780426, "learning_rate": 1.954927843467606e-05, "loss": 0.4934, "step": 9138 }, { "epoch": 0.1938240970499035, "grad_norm": 0.7485111951828003, "learning_rate": 1.9549179435271985e-05, "loss": 0.5812, "step": 9139 }, { "epoch": 0.19384530550783652, "grad_norm": 0.4158782362937927, "learning_rate": 1.954908042524739e-05, "loss": 0.6143, "step": 9140 }, { "epoch": 0.19386651396576954, "grad_norm": 0.30707651376724243, "learning_rate": 1.9548981404602394e-05, "loss": 0.5062, "step": 9141 }, { "epoch": 0.19388772242370259, "grad_norm": 0.31304043531417847, "learning_rate": 1.9548882373337103e-05, "loss": 0.5268, "step": 9142 }, { "epoch": 0.1939089308816356, "grad_norm": 0.32277238368988037, "learning_rate": 1.9548783331451628e-05, "loss": 0.5079, "step": 9143 }, { "epoch": 0.19393013933956862, "grad_norm": 0.3480767607688904, "learning_rate": 1.9548684278946077e-05, "loss": 0.522, "step": 9144 }, { "epoch": 0.19395134779750164, "grad_norm": 0.31653234362602234, "learning_rate": 1.9548585215820566e-05, "loss": 0.5234, "step": 9145 }, { "epoch": 0.19397255625543466, "grad_norm": 0.36283931136131287, "learning_rate": 1.9548486142075197e-05, "loss": 0.5909, "step": 9146 }, { "epoch": 0.19399376471336768, "grad_norm": 0.30284640192985535, "learning_rate": 1.954838705771009e-05, "loss": 0.4802, "step": 9147 }, { "epoch": 0.1940149731713007, "grad_norm": 0.3586302399635315, "learning_rate": 1.9548287962725347e-05, "loss": 0.5052, "step": 9148 }, { "epoch": 0.19403618162923375, "grad_norm": 0.34486132860183716, "learning_rate": 1.9548188857121083e-05, "loss": 0.4802, "step": 9149 }, { "epoch": 0.19405739008716677, "grad_norm": 0.36186647415161133, "learning_rate": 1.95480897408974e-05, "loss": 0.5979, "step": 9150 }, { "epoch": 0.1940785985450998, "grad_norm": 0.3490132689476013, "learning_rate": 1.954799061405442e-05, "loss": 0.578, "step": 9151 }, { "epoch": 0.1940998070030328, "grad_norm": 0.3221866190433502, "learning_rate": 1.9547891476592248e-05, "loss": 0.4205, "step": 9152 }, { "epoch": 0.19412101546096583, "grad_norm": 0.3873756229877472, "learning_rate": 1.9547792328510993e-05, "loss": 0.5639, "step": 9153 }, { "epoch": 0.19414222391889885, "grad_norm": 0.3550685942173004, "learning_rate": 1.954769316981077e-05, "loss": 0.5756, "step": 9154 }, { "epoch": 0.19416343237683187, "grad_norm": 0.34095701575279236, "learning_rate": 1.9547594000491678e-05, "loss": 0.517, "step": 9155 }, { "epoch": 0.19418464083476492, "grad_norm": 0.3334227204322815, "learning_rate": 1.954749482055384e-05, "loss": 0.5368, "step": 9156 }, { "epoch": 0.19420584929269794, "grad_norm": 0.3096643090248108, "learning_rate": 1.9547395629997358e-05, "loss": 0.4307, "step": 9157 }, { "epoch": 0.19422705775063095, "grad_norm": 0.3279954195022583, "learning_rate": 1.954729642882235e-05, "loss": 0.6578, "step": 9158 }, { "epoch": 0.19424826620856397, "grad_norm": 0.31968826055526733, "learning_rate": 1.9547197217028916e-05, "loss": 0.4502, "step": 9159 }, { "epoch": 0.194269474666497, "grad_norm": 0.42849406599998474, "learning_rate": 1.9547097994617176e-05, "loss": 0.5054, "step": 9160 }, { "epoch": 0.19429068312443, "grad_norm": 0.5309965014457703, "learning_rate": 1.9546998761587237e-05, "loss": 0.5536, "step": 9161 }, { "epoch": 0.19431189158236303, "grad_norm": 0.32209256291389465, "learning_rate": 1.954689951793921e-05, "loss": 0.4855, "step": 9162 }, { "epoch": 0.19433310004029608, "grad_norm": 0.35093972086906433, "learning_rate": 1.9546800263673202e-05, "loss": 0.4854, "step": 9163 }, { "epoch": 0.1943543084982291, "grad_norm": 0.31621241569519043, "learning_rate": 1.9546700998789325e-05, "loss": 0.4998, "step": 9164 }, { "epoch": 0.19437551695616212, "grad_norm": 0.3141343295574188, "learning_rate": 1.954660172328769e-05, "loss": 0.4671, "step": 9165 }, { "epoch": 0.19439672541409514, "grad_norm": 0.32235950231552124, "learning_rate": 1.954650243716841e-05, "loss": 0.5393, "step": 9166 }, { "epoch": 0.19441793387202816, "grad_norm": 0.30937033891677856, "learning_rate": 1.9546403140431593e-05, "loss": 0.4728, "step": 9167 }, { "epoch": 0.19443914232996118, "grad_norm": 0.3390180766582489, "learning_rate": 1.9546303833077345e-05, "loss": 0.4751, "step": 9168 }, { "epoch": 0.1944603507878942, "grad_norm": 0.31943508982658386, "learning_rate": 1.9546204515105784e-05, "loss": 0.4764, "step": 9169 }, { "epoch": 0.19448155924582725, "grad_norm": 0.685204029083252, "learning_rate": 1.9546105186517017e-05, "loss": 0.4873, "step": 9170 }, { "epoch": 0.19450276770376027, "grad_norm": 0.4070281982421875, "learning_rate": 1.9546005847311153e-05, "loss": 0.5884, "step": 9171 }, { "epoch": 0.19452397616169328, "grad_norm": 0.3586117923259735, "learning_rate": 1.954590649748831e-05, "loss": 0.5436, "step": 9172 }, { "epoch": 0.1945451846196263, "grad_norm": 0.366201788187027, "learning_rate": 1.9545807137048587e-05, "loss": 0.6083, "step": 9173 }, { "epoch": 0.19456639307755932, "grad_norm": 0.4146416485309601, "learning_rate": 1.9545707765992102e-05, "loss": 0.5657, "step": 9174 }, { "epoch": 0.19458760153549234, "grad_norm": 0.33811357617378235, "learning_rate": 1.9545608384318963e-05, "loss": 0.4462, "step": 9175 }, { "epoch": 0.1946088099934254, "grad_norm": 0.5341445207595825, "learning_rate": 1.954550899202928e-05, "loss": 0.4919, "step": 9176 }, { "epoch": 0.1946300184513584, "grad_norm": 0.37673601508140564, "learning_rate": 1.9545409589123167e-05, "loss": 0.5386, "step": 9177 }, { "epoch": 0.19465122690929143, "grad_norm": 0.350841760635376, "learning_rate": 1.9545310175600733e-05, "loss": 0.5197, "step": 9178 }, { "epoch": 0.19467243536722445, "grad_norm": 0.35693174600601196, "learning_rate": 1.9545210751462085e-05, "loss": 0.5404, "step": 9179 }, { "epoch": 0.19469364382515747, "grad_norm": 0.3569504916667938, "learning_rate": 1.954511131670734e-05, "loss": 0.6472, "step": 9180 }, { "epoch": 0.1947148522830905, "grad_norm": 0.27659285068511963, "learning_rate": 1.9545011871336603e-05, "loss": 0.4416, "step": 9181 }, { "epoch": 0.1947360607410235, "grad_norm": 0.3165196478366852, "learning_rate": 1.9544912415349988e-05, "loss": 0.4585, "step": 9182 }, { "epoch": 0.19475726919895656, "grad_norm": 0.3197905719280243, "learning_rate": 1.9544812948747604e-05, "loss": 0.462, "step": 9183 }, { "epoch": 0.19477847765688958, "grad_norm": 0.34318435192108154, "learning_rate": 1.9544713471529562e-05, "loss": 0.5614, "step": 9184 }, { "epoch": 0.1947996861148226, "grad_norm": 0.3493315577507019, "learning_rate": 1.9544613983695972e-05, "loss": 0.5588, "step": 9185 }, { "epoch": 0.19482089457275562, "grad_norm": 0.35455450415611267, "learning_rate": 1.9544514485246945e-05, "loss": 0.4705, "step": 9186 }, { "epoch": 0.19484210303068863, "grad_norm": 0.3799602687358856, "learning_rate": 1.9544414976182592e-05, "loss": 0.5339, "step": 9187 }, { "epoch": 0.19486331148862165, "grad_norm": 0.30023258924484253, "learning_rate": 1.9544315456503026e-05, "loss": 0.4763, "step": 9188 }, { "epoch": 0.19488451994655467, "grad_norm": 0.39062848687171936, "learning_rate": 1.9544215926208352e-05, "loss": 0.5381, "step": 9189 }, { "epoch": 0.19490572840448772, "grad_norm": 0.33725330233573914, "learning_rate": 1.9544116385298684e-05, "loss": 0.5881, "step": 9190 }, { "epoch": 0.19492693686242074, "grad_norm": 0.4049285352230072, "learning_rate": 1.9544016833774132e-05, "loss": 0.5685, "step": 9191 }, { "epoch": 0.19494814532035376, "grad_norm": 0.3405046761035919, "learning_rate": 1.954391727163481e-05, "loss": 0.5469, "step": 9192 }, { "epoch": 0.19496935377828678, "grad_norm": 0.31558260321617126, "learning_rate": 1.9543817698880827e-05, "loss": 0.5407, "step": 9193 }, { "epoch": 0.1949905622362198, "grad_norm": 0.3275965750217438, "learning_rate": 1.9543718115512292e-05, "loss": 0.48, "step": 9194 }, { "epoch": 0.19501177069415282, "grad_norm": 0.32963550090789795, "learning_rate": 1.9543618521529315e-05, "loss": 0.5387, "step": 9195 }, { "epoch": 0.19503297915208584, "grad_norm": 0.4235794246196747, "learning_rate": 1.954351891693201e-05, "loss": 0.483, "step": 9196 }, { "epoch": 0.1950541876100189, "grad_norm": 0.37997105717658997, "learning_rate": 1.9543419301720483e-05, "loss": 0.6004, "step": 9197 }, { "epoch": 0.1950753960679519, "grad_norm": 0.3148883879184723, "learning_rate": 1.954331967589485e-05, "loss": 0.4442, "step": 9198 }, { "epoch": 0.19509660452588493, "grad_norm": 0.35493335127830505, "learning_rate": 1.9543220039455222e-05, "loss": 0.5408, "step": 9199 }, { "epoch": 0.19511781298381795, "grad_norm": 0.3240819573402405, "learning_rate": 1.9543120392401705e-05, "loss": 0.4935, "step": 9200 }, { "epoch": 0.19513902144175096, "grad_norm": 0.29778289794921875, "learning_rate": 1.9543020734734414e-05, "loss": 0.4933, "step": 9201 }, { "epoch": 0.19516022989968398, "grad_norm": 0.3272469639778137, "learning_rate": 1.9542921066453458e-05, "loss": 0.5685, "step": 9202 }, { "epoch": 0.195181438357617, "grad_norm": 0.3184017837047577, "learning_rate": 1.9542821387558945e-05, "loss": 0.4905, "step": 9203 }, { "epoch": 0.19520264681555005, "grad_norm": 0.39318153262138367, "learning_rate": 1.954272169805099e-05, "loss": 0.4891, "step": 9204 }, { "epoch": 0.19522385527348307, "grad_norm": 0.42199835181236267, "learning_rate": 1.9542621997929708e-05, "loss": 0.5455, "step": 9205 }, { "epoch": 0.1952450637314161, "grad_norm": 0.34658029675483704, "learning_rate": 1.95425222871952e-05, "loss": 0.5336, "step": 9206 }, { "epoch": 0.1952662721893491, "grad_norm": 0.35298702120780945, "learning_rate": 1.9542422565847582e-05, "loss": 0.4743, "step": 9207 }, { "epoch": 0.19528748064728213, "grad_norm": 0.36885932087898254, "learning_rate": 1.9542322833886963e-05, "loss": 0.6359, "step": 9208 }, { "epoch": 0.19530868910521515, "grad_norm": 0.38699260354042053, "learning_rate": 1.954222309131346e-05, "loss": 0.5531, "step": 9209 }, { "epoch": 0.1953298975631482, "grad_norm": 0.37011969089508057, "learning_rate": 1.9542123338127177e-05, "loss": 0.6103, "step": 9210 }, { "epoch": 0.19535110602108122, "grad_norm": 0.29757753014564514, "learning_rate": 1.9542023574328227e-05, "loss": 0.5253, "step": 9211 }, { "epoch": 0.19537231447901424, "grad_norm": 0.474342405796051, "learning_rate": 1.954192379991672e-05, "loss": 0.5426, "step": 9212 }, { "epoch": 0.19539352293694726, "grad_norm": 0.30996018648147583, "learning_rate": 1.954182401489277e-05, "loss": 0.4337, "step": 9213 }, { "epoch": 0.19541473139488028, "grad_norm": 0.34200114011764526, "learning_rate": 1.9541724219256486e-05, "loss": 0.6331, "step": 9214 }, { "epoch": 0.1954359398528133, "grad_norm": 0.35100042819976807, "learning_rate": 1.954162441300798e-05, "loss": 0.5242, "step": 9215 }, { "epoch": 0.19545714831074631, "grad_norm": 0.3624305725097656, "learning_rate": 1.954152459614736e-05, "loss": 0.6456, "step": 9216 }, { "epoch": 0.19547835676867936, "grad_norm": 0.3423305153846741, "learning_rate": 1.954142476867474e-05, "loss": 0.5067, "step": 9217 }, { "epoch": 0.19549956522661238, "grad_norm": 0.30557215213775635, "learning_rate": 1.954132493059023e-05, "loss": 0.4768, "step": 9218 }, { "epoch": 0.1955207736845454, "grad_norm": 0.3178982436656952, "learning_rate": 1.954122508189394e-05, "loss": 0.5484, "step": 9219 }, { "epoch": 0.19554198214247842, "grad_norm": 0.30590468645095825, "learning_rate": 1.9541125222585988e-05, "loss": 0.5401, "step": 9220 }, { "epoch": 0.19556319060041144, "grad_norm": 0.31333065032958984, "learning_rate": 1.9541025352666474e-05, "loss": 0.5052, "step": 9221 }, { "epoch": 0.19558439905834446, "grad_norm": 0.3682478070259094, "learning_rate": 1.9540925472135517e-05, "loss": 0.6298, "step": 9222 }, { "epoch": 0.19560560751627748, "grad_norm": 0.35329470038414, "learning_rate": 1.9540825580993227e-05, "loss": 0.507, "step": 9223 }, { "epoch": 0.19562681597421053, "grad_norm": 0.33509159088134766, "learning_rate": 1.954072567923971e-05, "loss": 0.5404, "step": 9224 }, { "epoch": 0.19564802443214355, "grad_norm": 0.3175010681152344, "learning_rate": 1.954062576687508e-05, "loss": 0.3777, "step": 9225 }, { "epoch": 0.19566923289007657, "grad_norm": 0.3259255588054657, "learning_rate": 1.9540525843899453e-05, "loss": 0.5388, "step": 9226 }, { "epoch": 0.19569044134800959, "grad_norm": 0.3450017273426056, "learning_rate": 1.9540425910312934e-05, "loss": 0.6074, "step": 9227 }, { "epoch": 0.1957116498059426, "grad_norm": 0.35608971118927, "learning_rate": 1.9540325966115637e-05, "loss": 0.4915, "step": 9228 }, { "epoch": 0.19573285826387563, "grad_norm": 0.32604604959487915, "learning_rate": 1.9540226011307674e-05, "loss": 0.5173, "step": 9229 }, { "epoch": 0.19575406672180864, "grad_norm": 0.3205234110355377, "learning_rate": 1.9540126045889152e-05, "loss": 0.5569, "step": 9230 }, { "epoch": 0.1957752751797417, "grad_norm": 0.4130273759365082, "learning_rate": 1.9540026069860185e-05, "loss": 0.5157, "step": 9231 }, { "epoch": 0.1957964836376747, "grad_norm": 0.3108254671096802, "learning_rate": 1.9539926083220882e-05, "loss": 0.5139, "step": 9232 }, { "epoch": 0.19581769209560773, "grad_norm": 0.3459242284297943, "learning_rate": 1.9539826085971357e-05, "loss": 0.5634, "step": 9233 }, { "epoch": 0.19583890055354075, "grad_norm": 0.3565155267715454, "learning_rate": 1.953972607811172e-05, "loss": 0.5465, "step": 9234 }, { "epoch": 0.19586010901147377, "grad_norm": 0.3245510756969452, "learning_rate": 1.9539626059642086e-05, "loss": 0.5903, "step": 9235 }, { "epoch": 0.1958813174694068, "grad_norm": 0.33700740337371826, "learning_rate": 1.953952603056256e-05, "loss": 0.478, "step": 9236 }, { "epoch": 0.1959025259273398, "grad_norm": 0.6330617070198059, "learning_rate": 1.9539425990873256e-05, "loss": 0.4027, "step": 9237 }, { "epoch": 0.19592373438527286, "grad_norm": 0.3348838984966278, "learning_rate": 1.9539325940574288e-05, "loss": 0.4995, "step": 9238 }, { "epoch": 0.19594494284320588, "grad_norm": 0.4190053939819336, "learning_rate": 1.953922587966576e-05, "loss": 0.5548, "step": 9239 }, { "epoch": 0.1959661513011389, "grad_norm": 0.3389776349067688, "learning_rate": 1.953912580814779e-05, "loss": 0.5305, "step": 9240 }, { "epoch": 0.19598735975907192, "grad_norm": 0.3115772306919098, "learning_rate": 1.953902572602049e-05, "loss": 0.4843, "step": 9241 }, { "epoch": 0.19600856821700494, "grad_norm": 0.320385217666626, "learning_rate": 1.9538925633283965e-05, "loss": 0.535, "step": 9242 }, { "epoch": 0.19602977667493796, "grad_norm": 0.3153727948665619, "learning_rate": 1.953882552993833e-05, "loss": 0.5227, "step": 9243 }, { "epoch": 0.19605098513287098, "grad_norm": 0.36015117168426514, "learning_rate": 1.9538725415983696e-05, "loss": 0.5521, "step": 9244 }, { "epoch": 0.19607219359080402, "grad_norm": 0.3084399998188019, "learning_rate": 1.9538625291420176e-05, "loss": 0.4474, "step": 9245 }, { "epoch": 0.19609340204873704, "grad_norm": 0.3002050518989563, "learning_rate": 1.953852515624788e-05, "loss": 0.5354, "step": 9246 }, { "epoch": 0.19611461050667006, "grad_norm": 0.4090595245361328, "learning_rate": 1.9538425010466916e-05, "loss": 0.6186, "step": 9247 }, { "epoch": 0.19613581896460308, "grad_norm": 0.3066784739494324, "learning_rate": 1.9538324854077403e-05, "loss": 0.3708, "step": 9248 }, { "epoch": 0.1961570274225361, "grad_norm": 0.3376876413822174, "learning_rate": 1.9538224687079446e-05, "loss": 0.5064, "step": 9249 }, { "epoch": 0.19617823588046912, "grad_norm": 0.33173292875289917, "learning_rate": 1.9538124509473158e-05, "loss": 0.5174, "step": 9250 }, { "epoch": 0.19619944433840217, "grad_norm": 0.32970690727233887, "learning_rate": 1.9538024321258652e-05, "loss": 0.573, "step": 9251 }, { "epoch": 0.1962206527963352, "grad_norm": 0.3073074221611023, "learning_rate": 1.9537924122436035e-05, "loss": 0.4701, "step": 9252 }, { "epoch": 0.1962418612542682, "grad_norm": 0.34321731328964233, "learning_rate": 1.9537823913005424e-05, "loss": 0.5415, "step": 9253 }, { "epoch": 0.19626306971220123, "grad_norm": 0.3348880410194397, "learning_rate": 1.9537723692966928e-05, "loss": 0.4291, "step": 9254 }, { "epoch": 0.19628427817013425, "grad_norm": 0.388239324092865, "learning_rate": 1.9537623462320656e-05, "loss": 0.5455, "step": 9255 }, { "epoch": 0.19630548662806727, "grad_norm": 0.3304884433746338, "learning_rate": 1.953752322106673e-05, "loss": 0.492, "step": 9256 }, { "epoch": 0.19632669508600029, "grad_norm": 0.3277091383934021, "learning_rate": 1.9537422969205243e-05, "loss": 0.5029, "step": 9257 }, { "epoch": 0.19634790354393333, "grad_norm": 0.39290350675582886, "learning_rate": 1.9537322706736322e-05, "loss": 0.4492, "step": 9258 }, { "epoch": 0.19636911200186635, "grad_norm": 0.348626971244812, "learning_rate": 1.9537222433660076e-05, "loss": 0.5282, "step": 9259 }, { "epoch": 0.19639032045979937, "grad_norm": 0.3481038212776184, "learning_rate": 1.953712214997661e-05, "loss": 0.4597, "step": 9260 }, { "epoch": 0.1964115289177324, "grad_norm": 0.33119329810142517, "learning_rate": 1.953702185568604e-05, "loss": 0.5247, "step": 9261 }, { "epoch": 0.1964327373756654, "grad_norm": 0.32771363854408264, "learning_rate": 1.953692155078848e-05, "loss": 0.507, "step": 9262 }, { "epoch": 0.19645394583359843, "grad_norm": 0.3631216585636139, "learning_rate": 1.9536821235284038e-05, "loss": 0.5427, "step": 9263 }, { "epoch": 0.19647515429153145, "grad_norm": 0.3055051267147064, "learning_rate": 1.9536720909172823e-05, "loss": 0.4544, "step": 9264 }, { "epoch": 0.1964963627494645, "grad_norm": 0.2883816957473755, "learning_rate": 1.9536620572454952e-05, "loss": 0.4826, "step": 9265 }, { "epoch": 0.19651757120739752, "grad_norm": 0.31390413641929626, "learning_rate": 1.9536520225130535e-05, "loss": 0.5311, "step": 9266 }, { "epoch": 0.19653877966533054, "grad_norm": 0.31240084767341614, "learning_rate": 1.9536419867199684e-05, "loss": 0.5076, "step": 9267 }, { "epoch": 0.19655998812326356, "grad_norm": 0.315926730632782, "learning_rate": 1.953631949866251e-05, "loss": 0.4667, "step": 9268 }, { "epoch": 0.19658119658119658, "grad_norm": 0.3363768458366394, "learning_rate": 1.953621911951912e-05, "loss": 0.6091, "step": 9269 }, { "epoch": 0.1966024050391296, "grad_norm": 0.29887211322784424, "learning_rate": 1.953611872976963e-05, "loss": 0.5102, "step": 9270 }, { "epoch": 0.19662361349706262, "grad_norm": 0.32804471254348755, "learning_rate": 1.9536018329414156e-05, "loss": 0.5521, "step": 9271 }, { "epoch": 0.19664482195499566, "grad_norm": 0.5015822649002075, "learning_rate": 1.9535917918452804e-05, "loss": 0.5406, "step": 9272 }, { "epoch": 0.19666603041292868, "grad_norm": 0.3573596477508545, "learning_rate": 1.9535817496885685e-05, "loss": 0.433, "step": 9273 }, { "epoch": 0.1966872388708617, "grad_norm": 0.3851258456707001, "learning_rate": 1.9535717064712917e-05, "loss": 0.4963, "step": 9274 }, { "epoch": 0.19670844732879472, "grad_norm": 0.3465943932533264, "learning_rate": 1.9535616621934602e-05, "loss": 0.5253, "step": 9275 }, { "epoch": 0.19672965578672774, "grad_norm": 0.30662575364112854, "learning_rate": 1.953551616855086e-05, "loss": 0.5694, "step": 9276 }, { "epoch": 0.19675086424466076, "grad_norm": 0.3629958927631378, "learning_rate": 1.95354157045618e-05, "loss": 0.4664, "step": 9277 }, { "epoch": 0.19677207270259378, "grad_norm": 0.338768869638443, "learning_rate": 1.953531522996753e-05, "loss": 0.4652, "step": 9278 }, { "epoch": 0.19679328116052683, "grad_norm": 0.364597350358963, "learning_rate": 1.953521474476817e-05, "loss": 0.5815, "step": 9279 }, { "epoch": 0.19681448961845985, "grad_norm": 0.3555184304714203, "learning_rate": 1.9535114248963824e-05, "loss": 0.5174, "step": 9280 }, { "epoch": 0.19683569807639287, "grad_norm": 0.39780139923095703, "learning_rate": 1.953501374255461e-05, "loss": 0.521, "step": 9281 }, { "epoch": 0.1968569065343259, "grad_norm": 0.31596678495407104, "learning_rate": 1.953491322554063e-05, "loss": 0.579, "step": 9282 }, { "epoch": 0.1968781149922589, "grad_norm": 0.3369101881980896, "learning_rate": 1.9534812697922007e-05, "loss": 0.5638, "step": 9283 }, { "epoch": 0.19689932345019193, "grad_norm": 0.8845121264457703, "learning_rate": 1.9534712159698847e-05, "loss": 0.5249, "step": 9284 }, { "epoch": 0.19692053190812497, "grad_norm": 0.31390416622161865, "learning_rate": 1.9534611610871262e-05, "loss": 0.5037, "step": 9285 }, { "epoch": 0.196941740366058, "grad_norm": 0.34377458691596985, "learning_rate": 1.9534511051439366e-05, "loss": 0.532, "step": 9286 }, { "epoch": 0.196962948823991, "grad_norm": 0.6360540390014648, "learning_rate": 1.9534410481403272e-05, "loss": 0.5419, "step": 9287 }, { "epoch": 0.19698415728192403, "grad_norm": 0.3494529724121094, "learning_rate": 1.9534309900763088e-05, "loss": 0.5028, "step": 9288 }, { "epoch": 0.19700536573985705, "grad_norm": 0.4167480766773224, "learning_rate": 1.9534209309518925e-05, "loss": 0.5042, "step": 9289 }, { "epoch": 0.19702657419779007, "grad_norm": 0.3423067629337311, "learning_rate": 1.95341087076709e-05, "loss": 0.5304, "step": 9290 }, { "epoch": 0.1970477826557231, "grad_norm": 0.3342851996421814, "learning_rate": 1.9534008095219122e-05, "loss": 0.5258, "step": 9291 }, { "epoch": 0.19706899111365614, "grad_norm": 0.3397236466407776, "learning_rate": 1.95339074721637e-05, "loss": 0.5132, "step": 9292 }, { "epoch": 0.19709019957158916, "grad_norm": 0.3282329738140106, "learning_rate": 1.953380683850475e-05, "loss": 0.5164, "step": 9293 }, { "epoch": 0.19711140802952218, "grad_norm": 0.35316380858421326, "learning_rate": 1.9533706194242387e-05, "loss": 0.4793, "step": 9294 }, { "epoch": 0.1971326164874552, "grad_norm": 0.3429722487926483, "learning_rate": 1.9533605539376716e-05, "loss": 0.5273, "step": 9295 }, { "epoch": 0.19715382494538822, "grad_norm": 0.353567898273468, "learning_rate": 1.9533504873907852e-05, "loss": 0.5459, "step": 9296 }, { "epoch": 0.19717503340332124, "grad_norm": 0.3283314108848572, "learning_rate": 1.9533404197835905e-05, "loss": 0.539, "step": 9297 }, { "epoch": 0.19719624186125426, "grad_norm": 0.31032341718673706, "learning_rate": 1.953330351116099e-05, "loss": 0.4574, "step": 9298 }, { "epoch": 0.1972174503191873, "grad_norm": 0.4348984956741333, "learning_rate": 1.9533202813883215e-05, "loss": 0.5507, "step": 9299 }, { "epoch": 0.19723865877712032, "grad_norm": 0.3183552920818329, "learning_rate": 1.95331021060027e-05, "loss": 0.4519, "step": 9300 }, { "epoch": 0.19725986723505334, "grad_norm": 0.3330278992652893, "learning_rate": 1.9533001387519546e-05, "loss": 0.542, "step": 9301 }, { "epoch": 0.19728107569298636, "grad_norm": 0.3417006731033325, "learning_rate": 1.9532900658433876e-05, "loss": 0.5585, "step": 9302 }, { "epoch": 0.19730228415091938, "grad_norm": 0.3823412358760834, "learning_rate": 1.9532799918745793e-05, "loss": 0.5297, "step": 9303 }, { "epoch": 0.1973234926088524, "grad_norm": 0.3305908739566803, "learning_rate": 1.9532699168455413e-05, "loss": 0.4531, "step": 9304 }, { "epoch": 0.19734470106678542, "grad_norm": 0.34233325719833374, "learning_rate": 1.953259840756285e-05, "loss": 0.6071, "step": 9305 }, { "epoch": 0.19736590952471847, "grad_norm": 0.33111175894737244, "learning_rate": 1.953249763606821e-05, "loss": 0.5493, "step": 9306 }, { "epoch": 0.1973871179826515, "grad_norm": 0.31085678935050964, "learning_rate": 1.9532396853971613e-05, "loss": 0.4847, "step": 9307 }, { "epoch": 0.1974083264405845, "grad_norm": 0.30208247900009155, "learning_rate": 1.9532296061273166e-05, "loss": 0.4626, "step": 9308 }, { "epoch": 0.19742953489851753, "grad_norm": 0.36534881591796875, "learning_rate": 1.953219525797298e-05, "loss": 0.5407, "step": 9309 }, { "epoch": 0.19745074335645055, "grad_norm": 0.39232316613197327, "learning_rate": 1.9532094444071174e-05, "loss": 0.4947, "step": 9310 }, { "epoch": 0.19747195181438357, "grad_norm": 0.3885939121246338, "learning_rate": 1.953199361956785e-05, "loss": 0.5372, "step": 9311 }, { "epoch": 0.1974931602723166, "grad_norm": 0.3461821377277374, "learning_rate": 1.9531892784463128e-05, "loss": 0.534, "step": 9312 }, { "epoch": 0.19751436873024963, "grad_norm": 0.3035323917865753, "learning_rate": 1.9531791938757115e-05, "loss": 0.4658, "step": 9313 }, { "epoch": 0.19753557718818265, "grad_norm": 0.3453359007835388, "learning_rate": 1.953169108244993e-05, "loss": 0.4466, "step": 9314 }, { "epoch": 0.19755678564611567, "grad_norm": 0.31865230202674866, "learning_rate": 1.953159021554168e-05, "loss": 0.5797, "step": 9315 }, { "epoch": 0.1975779941040487, "grad_norm": 0.4283677637577057, "learning_rate": 1.953148933803248e-05, "loss": 0.5132, "step": 9316 }, { "epoch": 0.1975992025619817, "grad_norm": 0.31412753462791443, "learning_rate": 1.9531388449922436e-05, "loss": 0.478, "step": 9317 }, { "epoch": 0.19762041101991473, "grad_norm": 0.30685245990753174, "learning_rate": 1.9531287551211666e-05, "loss": 0.6028, "step": 9318 }, { "epoch": 0.19764161947784775, "grad_norm": 0.3184311091899872, "learning_rate": 1.953118664190028e-05, "loss": 0.4673, "step": 9319 }, { "epoch": 0.1976628279357808, "grad_norm": 0.33936944603919983, "learning_rate": 1.9531085721988393e-05, "loss": 0.4587, "step": 9320 }, { "epoch": 0.19768403639371382, "grad_norm": 0.4116035997867584, "learning_rate": 1.9530984791476113e-05, "loss": 0.5603, "step": 9321 }, { "epoch": 0.19770524485164684, "grad_norm": 0.3365705609321594, "learning_rate": 1.9530883850363555e-05, "loss": 0.5963, "step": 9322 }, { "epoch": 0.19772645330957986, "grad_norm": 0.37916791439056396, "learning_rate": 1.9530782898650834e-05, "loss": 0.5904, "step": 9323 }, { "epoch": 0.19774766176751288, "grad_norm": 0.3061438500881195, "learning_rate": 1.9530681936338057e-05, "loss": 0.5068, "step": 9324 }, { "epoch": 0.1977688702254459, "grad_norm": 0.33994829654693604, "learning_rate": 1.9530580963425338e-05, "loss": 0.5712, "step": 9325 }, { "epoch": 0.19779007868337894, "grad_norm": 0.33848509192466736, "learning_rate": 1.9530479979912787e-05, "loss": 0.5298, "step": 9326 }, { "epoch": 0.19781128714131196, "grad_norm": 0.31875577569007874, "learning_rate": 1.9530378985800524e-05, "loss": 0.556, "step": 9327 }, { "epoch": 0.19783249559924498, "grad_norm": 0.3073900640010834, "learning_rate": 1.9530277981088653e-05, "loss": 0.499, "step": 9328 }, { "epoch": 0.197853704057178, "grad_norm": 0.3265160322189331, "learning_rate": 1.953017696577729e-05, "loss": 0.465, "step": 9329 }, { "epoch": 0.19787491251511102, "grad_norm": 0.3376273810863495, "learning_rate": 1.9530075939866545e-05, "loss": 0.616, "step": 9330 }, { "epoch": 0.19789612097304404, "grad_norm": 0.3307163119316101, "learning_rate": 1.9529974903356535e-05, "loss": 0.5943, "step": 9331 }, { "epoch": 0.19791732943097706, "grad_norm": 0.36083224415779114, "learning_rate": 1.9529873856247372e-05, "loss": 0.5832, "step": 9332 }, { "epoch": 0.1979385378889101, "grad_norm": 0.42591264843940735, "learning_rate": 1.9529772798539163e-05, "loss": 0.5848, "step": 9333 }, { "epoch": 0.19795974634684313, "grad_norm": 0.3291211426258087, "learning_rate": 1.952967173023202e-05, "loss": 0.5863, "step": 9334 }, { "epoch": 0.19798095480477615, "grad_norm": 0.27937620878219604, "learning_rate": 1.9529570651326068e-05, "loss": 0.4955, "step": 9335 }, { "epoch": 0.19800216326270917, "grad_norm": 0.38045769929885864, "learning_rate": 1.9529469561821404e-05, "loss": 0.4849, "step": 9336 }, { "epoch": 0.1980233717206422, "grad_norm": 0.3225944936275482, "learning_rate": 1.9529368461718146e-05, "loss": 0.567, "step": 9337 }, { "epoch": 0.1980445801785752, "grad_norm": 0.3810681402683258, "learning_rate": 1.9529267351016412e-05, "loss": 0.513, "step": 9338 }, { "epoch": 0.19806578863650823, "grad_norm": 0.3999873995780945, "learning_rate": 1.9529166229716304e-05, "loss": 0.6093, "step": 9339 }, { "epoch": 0.19808699709444128, "grad_norm": 0.32338547706604004, "learning_rate": 1.9529065097817947e-05, "loss": 0.5211, "step": 9340 }, { "epoch": 0.1981082055523743, "grad_norm": 0.30136433243751526, "learning_rate": 1.9528963955321442e-05, "loss": 0.5042, "step": 9341 }, { "epoch": 0.19812941401030731, "grad_norm": 0.34330812096595764, "learning_rate": 1.9528862802226905e-05, "loss": 0.542, "step": 9342 }, { "epoch": 0.19815062246824033, "grad_norm": 0.42919859290122986, "learning_rate": 1.952876163853445e-05, "loss": 0.4747, "step": 9343 }, { "epoch": 0.19817183092617335, "grad_norm": 0.33263739943504333, "learning_rate": 1.952866046424419e-05, "loss": 0.4695, "step": 9344 }, { "epoch": 0.19819303938410637, "grad_norm": 0.41393718123435974, "learning_rate": 1.952855927935624e-05, "loss": 0.4766, "step": 9345 }, { "epoch": 0.1982142478420394, "grad_norm": 0.3256368935108185, "learning_rate": 1.95284580838707e-05, "loss": 0.5129, "step": 9346 }, { "epoch": 0.19823545629997244, "grad_norm": 0.35864174365997314, "learning_rate": 1.95283568777877e-05, "loss": 0.5604, "step": 9347 }, { "epoch": 0.19825666475790546, "grad_norm": 0.33894988894462585, "learning_rate": 1.9528255661107343e-05, "loss": 0.4862, "step": 9348 }, { "epoch": 0.19827787321583848, "grad_norm": 0.3309881091117859, "learning_rate": 1.952815443382974e-05, "loss": 0.4563, "step": 9349 }, { "epoch": 0.1982990816737715, "grad_norm": 0.3371272385120392, "learning_rate": 1.952805319595501e-05, "loss": 0.5758, "step": 9350 }, { "epoch": 0.19832029013170452, "grad_norm": 0.3221634328365326, "learning_rate": 1.952795194748326e-05, "loss": 0.511, "step": 9351 }, { "epoch": 0.19834149858963754, "grad_norm": 0.35648900270462036, "learning_rate": 1.9527850688414606e-05, "loss": 0.5483, "step": 9352 }, { "epoch": 0.19836270704757056, "grad_norm": 0.32672378420829773, "learning_rate": 1.952774941874916e-05, "loss": 0.5601, "step": 9353 }, { "epoch": 0.1983839155055036, "grad_norm": 0.3308665454387665, "learning_rate": 1.952764813848703e-05, "loss": 0.5339, "step": 9354 }, { "epoch": 0.19840512396343662, "grad_norm": 0.3565937578678131, "learning_rate": 1.9527546847628335e-05, "loss": 0.4994, "step": 9355 }, { "epoch": 0.19842633242136964, "grad_norm": 0.3793688714504242, "learning_rate": 1.9527445546173182e-05, "loss": 0.5262, "step": 9356 }, { "epoch": 0.19844754087930266, "grad_norm": 0.3463164269924164, "learning_rate": 1.952734423412169e-05, "loss": 0.5382, "step": 9357 }, { "epoch": 0.19846874933723568, "grad_norm": 0.3392223119735718, "learning_rate": 1.952724291147397e-05, "loss": 0.4746, "step": 9358 }, { "epoch": 0.1984899577951687, "grad_norm": 0.39173686504364014, "learning_rate": 1.9527141578230128e-05, "loss": 0.4684, "step": 9359 }, { "epoch": 0.19851116625310175, "grad_norm": 0.32012462615966797, "learning_rate": 1.9527040234390287e-05, "loss": 0.4536, "step": 9360 }, { "epoch": 0.19853237471103477, "grad_norm": 0.3098694086074829, "learning_rate": 1.952693887995455e-05, "loss": 0.4758, "step": 9361 }, { "epoch": 0.1985535831689678, "grad_norm": 0.3074169456958771, "learning_rate": 1.952683751492304e-05, "loss": 0.4943, "step": 9362 }, { "epoch": 0.1985747916269008, "grad_norm": 0.3131004571914673, "learning_rate": 1.9526736139295858e-05, "loss": 0.5328, "step": 9363 }, { "epoch": 0.19859600008483383, "grad_norm": 0.32630497217178345, "learning_rate": 1.9526634753073127e-05, "loss": 0.5305, "step": 9364 }, { "epoch": 0.19861720854276685, "grad_norm": 0.31325945258140564, "learning_rate": 1.9526533356254957e-05, "loss": 0.5149, "step": 9365 }, { "epoch": 0.19863841700069987, "grad_norm": 0.33094489574432373, "learning_rate": 1.9526431948841457e-05, "loss": 0.5151, "step": 9366 }, { "epoch": 0.19865962545863292, "grad_norm": 0.3192109167575836, "learning_rate": 1.952633053083274e-05, "loss": 0.455, "step": 9367 }, { "epoch": 0.19868083391656594, "grad_norm": 0.37281912565231323, "learning_rate": 1.952622910222892e-05, "loss": 0.5323, "step": 9368 }, { "epoch": 0.19870204237449896, "grad_norm": 0.388369083404541, "learning_rate": 1.9526127663030116e-05, "loss": 0.4265, "step": 9369 }, { "epoch": 0.19872325083243197, "grad_norm": 0.327891081571579, "learning_rate": 1.952602621323643e-05, "loss": 0.5158, "step": 9370 }, { "epoch": 0.198744459290365, "grad_norm": 0.5225048661231995, "learning_rate": 1.9525924752847987e-05, "loss": 0.4547, "step": 9371 }, { "epoch": 0.198765667748298, "grad_norm": 0.3274812698364258, "learning_rate": 1.952582328186489e-05, "loss": 0.5437, "step": 9372 }, { "epoch": 0.19878687620623103, "grad_norm": 0.3294725716114044, "learning_rate": 1.9525721800287255e-05, "loss": 0.5772, "step": 9373 }, { "epoch": 0.19880808466416408, "grad_norm": 0.33566102385520935, "learning_rate": 1.9525620308115193e-05, "loss": 0.4788, "step": 9374 }, { "epoch": 0.1988292931220971, "grad_norm": 0.34067633748054504, "learning_rate": 1.9525518805348823e-05, "loss": 0.5101, "step": 9375 }, { "epoch": 0.19885050158003012, "grad_norm": 0.3458077907562256, "learning_rate": 1.952541729198825e-05, "loss": 0.4673, "step": 9376 }, { "epoch": 0.19887171003796314, "grad_norm": 0.31321588158607483, "learning_rate": 1.952531576803359e-05, "loss": 0.5033, "step": 9377 }, { "epoch": 0.19889291849589616, "grad_norm": 0.29997801780700684, "learning_rate": 1.9525214233484963e-05, "loss": 0.5304, "step": 9378 }, { "epoch": 0.19891412695382918, "grad_norm": 0.7201460003852844, "learning_rate": 1.952511268834247e-05, "loss": 0.5026, "step": 9379 }, { "epoch": 0.1989353354117622, "grad_norm": 0.3001025915145874, "learning_rate": 1.952501113260623e-05, "loss": 0.4822, "step": 9380 }, { "epoch": 0.19895654386969525, "grad_norm": 0.3108092248439789, "learning_rate": 1.9524909566276357e-05, "loss": 0.4891, "step": 9381 }, { "epoch": 0.19897775232762827, "grad_norm": 0.3383752107620239, "learning_rate": 1.9524807989352957e-05, "loss": 0.521, "step": 9382 }, { "epoch": 0.19899896078556129, "grad_norm": 0.40481775999069214, "learning_rate": 1.9524706401836156e-05, "loss": 0.4941, "step": 9383 }, { "epoch": 0.1990201692434943, "grad_norm": 0.43664219975471497, "learning_rate": 1.9524604803726054e-05, "loss": 0.4843, "step": 9384 }, { "epoch": 0.19904137770142732, "grad_norm": 0.554943859577179, "learning_rate": 1.952450319502277e-05, "loss": 0.5438, "step": 9385 }, { "epoch": 0.19906258615936034, "grad_norm": 0.35335302352905273, "learning_rate": 1.952440157572642e-05, "loss": 0.5895, "step": 9386 }, { "epoch": 0.19908379461729336, "grad_norm": 0.33142808079719543, "learning_rate": 1.952429994583711e-05, "loss": 0.5114, "step": 9387 }, { "epoch": 0.1991050030752264, "grad_norm": 0.3221542537212372, "learning_rate": 1.9524198305354957e-05, "loss": 0.5199, "step": 9388 }, { "epoch": 0.19912621153315943, "grad_norm": 0.3843468129634857, "learning_rate": 1.9524096654280074e-05, "loss": 0.4808, "step": 9389 }, { "epoch": 0.19914741999109245, "grad_norm": 0.383724182844162, "learning_rate": 1.9523994992612573e-05, "loss": 0.4527, "step": 9390 }, { "epoch": 0.19916862844902547, "grad_norm": 0.3048425316810608, "learning_rate": 1.9523893320352565e-05, "loss": 0.5283, "step": 9391 }, { "epoch": 0.1991898369069585, "grad_norm": 0.3349921405315399, "learning_rate": 1.952379163750017e-05, "loss": 0.4894, "step": 9392 }, { "epoch": 0.1992110453648915, "grad_norm": 0.33344903588294983, "learning_rate": 1.9523689944055497e-05, "loss": 0.5307, "step": 9393 }, { "epoch": 0.19923225382282453, "grad_norm": 0.34601524472236633, "learning_rate": 1.9523588240018655e-05, "loss": 0.5949, "step": 9394 }, { "epoch": 0.19925346228075758, "grad_norm": 0.3718462586402893, "learning_rate": 1.9523486525389762e-05, "loss": 0.5963, "step": 9395 }, { "epoch": 0.1992746707386906, "grad_norm": 0.3630438446998596, "learning_rate": 1.9523384800168928e-05, "loss": 0.5581, "step": 9396 }, { "epoch": 0.19929587919662362, "grad_norm": 0.3112136721611023, "learning_rate": 1.9523283064356273e-05, "loss": 0.4959, "step": 9397 }, { "epoch": 0.19931708765455664, "grad_norm": 0.35546383261680603, "learning_rate": 1.95231813179519e-05, "loss": 0.5911, "step": 9398 }, { "epoch": 0.19933829611248965, "grad_norm": 0.34315788745880127, "learning_rate": 1.952307956095593e-05, "loss": 0.5224, "step": 9399 }, { "epoch": 0.19935950457042267, "grad_norm": 0.3170499801635742, "learning_rate": 1.9522977793368473e-05, "loss": 0.4489, "step": 9400 }, { "epoch": 0.19938071302835572, "grad_norm": 0.36546170711517334, "learning_rate": 1.9522876015189645e-05, "loss": 0.5767, "step": 9401 }, { "epoch": 0.19940192148628874, "grad_norm": 0.3075625002384186, "learning_rate": 1.9522774226419555e-05, "loss": 0.47, "step": 9402 }, { "epoch": 0.19942312994422176, "grad_norm": 0.31567490100860596, "learning_rate": 1.952267242705832e-05, "loss": 0.4234, "step": 9403 }, { "epoch": 0.19944433840215478, "grad_norm": 0.4196387231349945, "learning_rate": 1.952257061710605e-05, "loss": 0.5447, "step": 9404 }, { "epoch": 0.1994655468600878, "grad_norm": 0.2985115647315979, "learning_rate": 1.952246879656286e-05, "loss": 0.4834, "step": 9405 }, { "epoch": 0.19948675531802082, "grad_norm": 0.3430221974849701, "learning_rate": 1.9522366965428863e-05, "loss": 0.4799, "step": 9406 }, { "epoch": 0.19950796377595384, "grad_norm": 0.29105380177497864, "learning_rate": 1.9522265123704173e-05, "loss": 0.5189, "step": 9407 }, { "epoch": 0.1995291722338869, "grad_norm": 0.30216920375823975, "learning_rate": 1.9522163271388902e-05, "loss": 0.5062, "step": 9408 }, { "epoch": 0.1995503806918199, "grad_norm": 0.3291025459766388, "learning_rate": 1.952206140848316e-05, "loss": 0.4863, "step": 9409 }, { "epoch": 0.19957158914975293, "grad_norm": 0.45161673426628113, "learning_rate": 1.9521959534987068e-05, "loss": 0.5238, "step": 9410 }, { "epoch": 0.19959279760768595, "grad_norm": 0.3513898551464081, "learning_rate": 1.9521857650900736e-05, "loss": 0.526, "step": 9411 }, { "epoch": 0.19961400606561897, "grad_norm": 0.315778523683548, "learning_rate": 1.9521755756224277e-05, "loss": 0.5413, "step": 9412 }, { "epoch": 0.19963521452355198, "grad_norm": 0.40636932849884033, "learning_rate": 1.95216538509578e-05, "loss": 0.5752, "step": 9413 }, { "epoch": 0.199656422981485, "grad_norm": 0.3340834975242615, "learning_rate": 1.9521551935101425e-05, "loss": 0.5755, "step": 9414 }, { "epoch": 0.19967763143941805, "grad_norm": 0.3240133225917816, "learning_rate": 1.9521450008655263e-05, "loss": 0.6155, "step": 9415 }, { "epoch": 0.19969883989735107, "grad_norm": 0.33133968710899353, "learning_rate": 1.9521348071619424e-05, "loss": 0.4822, "step": 9416 }, { "epoch": 0.1997200483552841, "grad_norm": 0.3308211863040924, "learning_rate": 1.9521246123994027e-05, "loss": 0.542, "step": 9417 }, { "epoch": 0.1997412568132171, "grad_norm": 0.33365240693092346, "learning_rate": 1.9521144165779183e-05, "loss": 0.4979, "step": 9418 }, { "epoch": 0.19976246527115013, "grad_norm": 0.3905285596847534, "learning_rate": 1.9521042196975008e-05, "loss": 0.5833, "step": 9419 }, { "epoch": 0.19978367372908315, "grad_norm": 0.3539716601371765, "learning_rate": 1.9520940217581606e-05, "loss": 0.4743, "step": 9420 }, { "epoch": 0.19980488218701617, "grad_norm": 0.30470308661460876, "learning_rate": 1.95208382275991e-05, "loss": 0.4859, "step": 9421 }, { "epoch": 0.19982609064494922, "grad_norm": 0.33721813559532166, "learning_rate": 1.95207362270276e-05, "loss": 0.5467, "step": 9422 }, { "epoch": 0.19984729910288224, "grad_norm": 0.3229307532310486, "learning_rate": 1.952063421586722e-05, "loss": 0.5786, "step": 9423 }, { "epoch": 0.19986850756081526, "grad_norm": 0.3762274980545044, "learning_rate": 1.9520532194118072e-05, "loss": 0.4936, "step": 9424 }, { "epoch": 0.19988971601874828, "grad_norm": 0.3360294699668884, "learning_rate": 1.9520430161780277e-05, "loss": 0.5596, "step": 9425 }, { "epoch": 0.1999109244766813, "grad_norm": 0.33093008399009705, "learning_rate": 1.952032811885394e-05, "loss": 0.4541, "step": 9426 }, { "epoch": 0.19993213293461432, "grad_norm": 0.321037232875824, "learning_rate": 1.952022606533917e-05, "loss": 0.5052, "step": 9427 }, { "epoch": 0.19995334139254733, "grad_norm": 0.34021973609924316, "learning_rate": 1.9520124001236094e-05, "loss": 0.512, "step": 9428 }, { "epoch": 0.19997454985048038, "grad_norm": 0.3876040279865265, "learning_rate": 1.9520021926544818e-05, "loss": 0.516, "step": 9429 }, { "epoch": 0.1999957583084134, "grad_norm": 0.33703920245170593, "learning_rate": 1.9519919841265454e-05, "loss": 0.5272, "step": 9430 }, { "epoch": 0.20001696676634642, "grad_norm": 0.3339388966560364, "learning_rate": 1.9519817745398117e-05, "loss": 0.5058, "step": 9431 }, { "epoch": 0.20003817522427944, "grad_norm": 0.4448543190956116, "learning_rate": 1.9519715638942925e-05, "loss": 0.611, "step": 9432 }, { "epoch": 0.20005938368221246, "grad_norm": 0.4220781922340393, "learning_rate": 1.9519613521899985e-05, "loss": 0.5155, "step": 9433 }, { "epoch": 0.20008059214014548, "grad_norm": 0.3441442549228668, "learning_rate": 1.9519511394269414e-05, "loss": 0.5029, "step": 9434 }, { "epoch": 0.20010180059807853, "grad_norm": 0.3751620054244995, "learning_rate": 1.9519409256051327e-05, "loss": 0.4612, "step": 9435 }, { "epoch": 0.20012300905601155, "grad_norm": 0.36704084277153015, "learning_rate": 1.9519307107245834e-05, "loss": 0.6018, "step": 9436 }, { "epoch": 0.20014421751394457, "grad_norm": 0.364158034324646, "learning_rate": 1.951920494785305e-05, "loss": 0.4641, "step": 9437 }, { "epoch": 0.2001654259718776, "grad_norm": 0.2992940843105316, "learning_rate": 1.951910277787309e-05, "loss": 0.4854, "step": 9438 }, { "epoch": 0.2001866344298106, "grad_norm": 0.3909836709499359, "learning_rate": 1.951900059730607e-05, "loss": 0.5056, "step": 9439 }, { "epoch": 0.20020784288774363, "grad_norm": 0.3463674783706665, "learning_rate": 1.9518898406152093e-05, "loss": 0.5228, "step": 9440 }, { "epoch": 0.20022905134567665, "grad_norm": 0.3487260341644287, "learning_rate": 1.9518796204411282e-05, "loss": 0.5864, "step": 9441 }, { "epoch": 0.2002502598036097, "grad_norm": 0.307620644569397, "learning_rate": 1.9518693992083752e-05, "loss": 0.4928, "step": 9442 }, { "epoch": 0.2002714682615427, "grad_norm": 0.46059873700141907, "learning_rate": 1.951859176916961e-05, "loss": 0.5848, "step": 9443 }, { "epoch": 0.20029267671947573, "grad_norm": 0.3267240524291992, "learning_rate": 1.9518489535668975e-05, "loss": 0.5184, "step": 9444 }, { "epoch": 0.20031388517740875, "grad_norm": 0.318239688873291, "learning_rate": 1.9518387291581958e-05, "loss": 0.5687, "step": 9445 }, { "epoch": 0.20033509363534177, "grad_norm": 0.3097754418849945, "learning_rate": 1.9518285036908676e-05, "loss": 0.5674, "step": 9446 }, { "epoch": 0.2003563020932748, "grad_norm": 0.43725821375846863, "learning_rate": 1.9518182771649234e-05, "loss": 0.5083, "step": 9447 }, { "epoch": 0.2003775105512078, "grad_norm": 0.43767276406288147, "learning_rate": 1.9518080495803757e-05, "loss": 0.5009, "step": 9448 }, { "epoch": 0.20039871900914086, "grad_norm": 0.33385491371154785, "learning_rate": 1.9517978209372353e-05, "loss": 0.5721, "step": 9449 }, { "epoch": 0.20041992746707388, "grad_norm": 0.33990514278411865, "learning_rate": 1.9517875912355135e-05, "loss": 0.48, "step": 9450 }, { "epoch": 0.2004411359250069, "grad_norm": 0.4041895270347595, "learning_rate": 1.9517773604752218e-05, "loss": 0.5886, "step": 9451 }, { "epoch": 0.20046234438293992, "grad_norm": 0.315527081489563, "learning_rate": 1.9517671286563716e-05, "loss": 0.5289, "step": 9452 }, { "epoch": 0.20048355284087294, "grad_norm": 0.4974469840526581, "learning_rate": 1.9517568957789742e-05, "loss": 0.5256, "step": 9453 }, { "epoch": 0.20050476129880596, "grad_norm": 0.4014093577861786, "learning_rate": 1.951746661843041e-05, "loss": 0.5687, "step": 9454 }, { "epoch": 0.20052596975673898, "grad_norm": 0.3552967607975006, "learning_rate": 1.9517364268485838e-05, "loss": 0.5068, "step": 9455 }, { "epoch": 0.20054717821467202, "grad_norm": 0.3217734098434448, "learning_rate": 1.951726190795613e-05, "loss": 0.5793, "step": 9456 }, { "epoch": 0.20056838667260504, "grad_norm": 0.30651766061782837, "learning_rate": 1.9517159536841413e-05, "loss": 0.5484, "step": 9457 }, { "epoch": 0.20058959513053806, "grad_norm": 0.3193749189376831, "learning_rate": 1.9517057155141788e-05, "loss": 0.4955, "step": 9458 }, { "epoch": 0.20061080358847108, "grad_norm": 0.3521183431148529, "learning_rate": 1.9516954762857375e-05, "loss": 0.5609, "step": 9459 }, { "epoch": 0.2006320120464041, "grad_norm": 0.31379982829093933, "learning_rate": 1.9516852359988293e-05, "loss": 0.4954, "step": 9460 }, { "epoch": 0.20065322050433712, "grad_norm": 0.3692609667778015, "learning_rate": 1.951674994653465e-05, "loss": 0.4803, "step": 9461 }, { "epoch": 0.20067442896227014, "grad_norm": 0.3786241114139557, "learning_rate": 1.9516647522496553e-05, "loss": 0.4882, "step": 9462 }, { "epoch": 0.2006956374202032, "grad_norm": 0.3731289207935333, "learning_rate": 1.9516545087874127e-05, "loss": 0.5781, "step": 9463 }, { "epoch": 0.2007168458781362, "grad_norm": 0.3186799883842468, "learning_rate": 1.9516442642667485e-05, "loss": 0.6341, "step": 9464 }, { "epoch": 0.20073805433606923, "grad_norm": 0.3202093541622162, "learning_rate": 1.9516340186876736e-05, "loss": 0.4851, "step": 9465 }, { "epoch": 0.20075926279400225, "grad_norm": 0.3119167387485504, "learning_rate": 1.9516237720501996e-05, "loss": 0.4643, "step": 9466 }, { "epoch": 0.20078047125193527, "grad_norm": 0.31099817156791687, "learning_rate": 1.951613524354338e-05, "loss": 0.4328, "step": 9467 }, { "epoch": 0.20080167970986829, "grad_norm": 0.3274736702442169, "learning_rate": 1.9516032756001004e-05, "loss": 0.5479, "step": 9468 }, { "epoch": 0.2008228881678013, "grad_norm": 0.3599313497543335, "learning_rate": 1.9515930257874972e-05, "loss": 0.5256, "step": 9469 }, { "epoch": 0.20084409662573435, "grad_norm": 0.38884323835372925, "learning_rate": 1.951582774916541e-05, "loss": 0.567, "step": 9470 }, { "epoch": 0.20086530508366737, "grad_norm": 0.35776281356811523, "learning_rate": 1.9515725229872425e-05, "loss": 0.5251, "step": 9471 }, { "epoch": 0.2008865135416004, "grad_norm": 0.3682478964328766, "learning_rate": 1.9515622699996132e-05, "loss": 0.5495, "step": 9472 }, { "epoch": 0.2009077219995334, "grad_norm": 0.30623435974121094, "learning_rate": 1.9515520159536648e-05, "loss": 0.4808, "step": 9473 }, { "epoch": 0.20092893045746643, "grad_norm": 0.3344179391860962, "learning_rate": 1.9515417608494086e-05, "loss": 0.5334, "step": 9474 }, { "epoch": 0.20095013891539945, "grad_norm": 0.34751272201538086, "learning_rate": 1.9515315046868556e-05, "loss": 0.4645, "step": 9475 }, { "epoch": 0.2009713473733325, "grad_norm": 0.33155909180641174, "learning_rate": 1.951521247466018e-05, "loss": 0.5165, "step": 9476 }, { "epoch": 0.20099255583126552, "grad_norm": 0.3409701883792877, "learning_rate": 1.9515109891869064e-05, "loss": 0.4751, "step": 9477 }, { "epoch": 0.20101376428919854, "grad_norm": 0.3251573145389557, "learning_rate": 1.9515007298495327e-05, "loss": 0.4941, "step": 9478 }, { "epoch": 0.20103497274713156, "grad_norm": 0.3407169580459595, "learning_rate": 1.951490469453908e-05, "loss": 0.5112, "step": 9479 }, { "epoch": 0.20105618120506458, "grad_norm": 0.33557286858558655, "learning_rate": 1.951480208000044e-05, "loss": 0.516, "step": 9480 }, { "epoch": 0.2010773896629976, "grad_norm": 0.41383740305900574, "learning_rate": 1.9514699454879517e-05, "loss": 0.6002, "step": 9481 }, { "epoch": 0.20109859812093062, "grad_norm": 0.35548898577690125, "learning_rate": 1.9514596819176433e-05, "loss": 0.5592, "step": 9482 }, { "epoch": 0.20111980657886366, "grad_norm": 0.3192096948623657, "learning_rate": 1.951449417289129e-05, "loss": 0.5926, "step": 9483 }, { "epoch": 0.20114101503679668, "grad_norm": 0.4729078412055969, "learning_rate": 1.9514391516024218e-05, "loss": 0.4043, "step": 9484 }, { "epoch": 0.2011622234947297, "grad_norm": 0.3455940783023834, "learning_rate": 1.9514288848575314e-05, "loss": 0.5862, "step": 9485 }, { "epoch": 0.20118343195266272, "grad_norm": 0.3403078317642212, "learning_rate": 1.9514186170544706e-05, "loss": 0.5356, "step": 9486 }, { "epoch": 0.20120464041059574, "grad_norm": 0.3728860318660736, "learning_rate": 1.95140834819325e-05, "loss": 0.4913, "step": 9487 }, { "epoch": 0.20122584886852876, "grad_norm": 0.3237648904323578, "learning_rate": 1.9513980782738815e-05, "loss": 0.5396, "step": 9488 }, { "epoch": 0.20124705732646178, "grad_norm": 0.3395325243473053, "learning_rate": 1.951387807296376e-05, "loss": 0.5603, "step": 9489 }, { "epoch": 0.20126826578439483, "grad_norm": 0.33043187856674194, "learning_rate": 1.9513775352607457e-05, "loss": 0.5672, "step": 9490 }, { "epoch": 0.20128947424232785, "grad_norm": 0.5963116884231567, "learning_rate": 1.9513672621670015e-05, "loss": 0.4878, "step": 9491 }, { "epoch": 0.20131068270026087, "grad_norm": 0.3373081684112549, "learning_rate": 1.9513569880151547e-05, "loss": 0.4542, "step": 9492 }, { "epoch": 0.2013318911581939, "grad_norm": 0.3567318618297577, "learning_rate": 1.951346712805217e-05, "loss": 0.4911, "step": 9493 }, { "epoch": 0.2013530996161269, "grad_norm": 0.3488997519016266, "learning_rate": 1.9513364365371995e-05, "loss": 0.4786, "step": 9494 }, { "epoch": 0.20137430807405993, "grad_norm": 0.3043217658996582, "learning_rate": 1.951326159211114e-05, "loss": 0.5316, "step": 9495 }, { "epoch": 0.20139551653199295, "grad_norm": 0.4115515947341919, "learning_rate": 1.951315880826972e-05, "loss": 0.7014, "step": 9496 }, { "epoch": 0.201416724989926, "grad_norm": 0.3088059425354004, "learning_rate": 1.9513056013847848e-05, "loss": 0.4207, "step": 9497 }, { "epoch": 0.201437933447859, "grad_norm": 0.3618362247943878, "learning_rate": 1.9512953208845635e-05, "loss": 0.5486, "step": 9498 }, { "epoch": 0.20145914190579203, "grad_norm": 0.3461819887161255, "learning_rate": 1.9512850393263202e-05, "loss": 0.4633, "step": 9499 }, { "epoch": 0.20148035036372505, "grad_norm": 0.333396315574646, "learning_rate": 1.9512747567100656e-05, "loss": 0.5047, "step": 9500 }, { "epoch": 0.20150155882165807, "grad_norm": 0.4764624238014221, "learning_rate": 1.9512644730358113e-05, "loss": 0.4934, "step": 9501 }, { "epoch": 0.2015227672795911, "grad_norm": 0.3130545914173126, "learning_rate": 1.9512541883035695e-05, "loss": 0.5057, "step": 9502 }, { "epoch": 0.2015439757375241, "grad_norm": 0.33117154240608215, "learning_rate": 1.9512439025133504e-05, "loss": 0.5658, "step": 9503 }, { "epoch": 0.20156518419545716, "grad_norm": 0.3383903205394745, "learning_rate": 1.9512336156651667e-05, "loss": 0.5463, "step": 9504 }, { "epoch": 0.20158639265339018, "grad_norm": 0.30713123083114624, "learning_rate": 1.9512233277590288e-05, "loss": 0.4264, "step": 9505 }, { "epoch": 0.2016076011113232, "grad_norm": 0.3458022177219391, "learning_rate": 1.9512130387949487e-05, "loss": 0.5593, "step": 9506 }, { "epoch": 0.20162880956925622, "grad_norm": 0.34779953956604004, "learning_rate": 1.951202748772938e-05, "loss": 0.5583, "step": 9507 }, { "epoch": 0.20165001802718924, "grad_norm": 0.33765867352485657, "learning_rate": 1.9511924576930076e-05, "loss": 0.4767, "step": 9508 }, { "epoch": 0.20167122648512226, "grad_norm": 0.46687451004981995, "learning_rate": 1.951182165555169e-05, "loss": 0.5427, "step": 9509 }, { "epoch": 0.2016924349430553, "grad_norm": 0.29974454641342163, "learning_rate": 1.9511718723594342e-05, "loss": 0.4986, "step": 9510 }, { "epoch": 0.20171364340098832, "grad_norm": 0.3219775855541229, "learning_rate": 1.951161578105814e-05, "loss": 0.5261, "step": 9511 }, { "epoch": 0.20173485185892134, "grad_norm": 0.3612029254436493, "learning_rate": 1.9511512827943204e-05, "loss": 0.5232, "step": 9512 }, { "epoch": 0.20175606031685436, "grad_norm": 0.3018159866333008, "learning_rate": 1.9511409864249646e-05, "loss": 0.4594, "step": 9513 }, { "epoch": 0.20177726877478738, "grad_norm": 0.38692808151245117, "learning_rate": 1.9511306889977583e-05, "loss": 0.5652, "step": 9514 }, { "epoch": 0.2017984772327204, "grad_norm": 0.39388859272003174, "learning_rate": 1.951120390512712e-05, "loss": 0.5635, "step": 9515 }, { "epoch": 0.20181968569065342, "grad_norm": 0.33478614687919617, "learning_rate": 1.9511100909698383e-05, "loss": 0.5352, "step": 9516 }, { "epoch": 0.20184089414858647, "grad_norm": 0.4299944043159485, "learning_rate": 1.9510997903691484e-05, "loss": 0.6013, "step": 9517 }, { "epoch": 0.2018621026065195, "grad_norm": 0.3314966857433319, "learning_rate": 1.9510894887106533e-05, "loss": 0.5321, "step": 9518 }, { "epoch": 0.2018833110644525, "grad_norm": 0.32619592547416687, "learning_rate": 1.951079185994365e-05, "loss": 0.5217, "step": 9519 }, { "epoch": 0.20190451952238553, "grad_norm": 0.330782949924469, "learning_rate": 1.9510688822202945e-05, "loss": 0.5033, "step": 9520 }, { "epoch": 0.20192572798031855, "grad_norm": 0.31893429160118103, "learning_rate": 1.9510585773884535e-05, "loss": 0.4925, "step": 9521 }, { "epoch": 0.20194693643825157, "grad_norm": 0.33351871371269226, "learning_rate": 1.9510482714988534e-05, "loss": 0.4688, "step": 9522 }, { "epoch": 0.2019681448961846, "grad_norm": 0.35960206389427185, "learning_rate": 1.9510379645515056e-05, "loss": 0.5673, "step": 9523 }, { "epoch": 0.20198935335411763, "grad_norm": 0.3118928372859955, "learning_rate": 1.9510276565464216e-05, "loss": 0.5603, "step": 9524 }, { "epoch": 0.20201056181205065, "grad_norm": 0.3128701150417328, "learning_rate": 1.951017347483613e-05, "loss": 0.4677, "step": 9525 }, { "epoch": 0.20203177026998367, "grad_norm": 0.36516067385673523, "learning_rate": 1.951007037363091e-05, "loss": 0.5735, "step": 9526 }, { "epoch": 0.2020529787279167, "grad_norm": 0.32055842876434326, "learning_rate": 1.9509967261848676e-05, "loss": 0.6002, "step": 9527 }, { "epoch": 0.2020741871858497, "grad_norm": 0.36089619994163513, "learning_rate": 1.9509864139489537e-05, "loss": 0.5553, "step": 9528 }, { "epoch": 0.20209539564378273, "grad_norm": 0.3148450255393982, "learning_rate": 1.950976100655361e-05, "loss": 0.4039, "step": 9529 }, { "epoch": 0.20211660410171575, "grad_norm": 0.3119492530822754, "learning_rate": 1.9509657863041006e-05, "loss": 0.5392, "step": 9530 }, { "epoch": 0.2021378125596488, "grad_norm": 0.36603620648384094, "learning_rate": 1.9509554708951846e-05, "loss": 0.548, "step": 9531 }, { "epoch": 0.20215902101758182, "grad_norm": 0.40791383385658264, "learning_rate": 1.9509451544286244e-05, "loss": 0.5689, "step": 9532 }, { "epoch": 0.20218022947551484, "grad_norm": 0.35038453340530396, "learning_rate": 1.950934836904431e-05, "loss": 0.4871, "step": 9533 }, { "epoch": 0.20220143793344786, "grad_norm": 0.32880765199661255, "learning_rate": 1.950924518322616e-05, "loss": 0.5755, "step": 9534 }, { "epoch": 0.20222264639138088, "grad_norm": 0.3360961079597473, "learning_rate": 1.9509141986831915e-05, "loss": 0.5257, "step": 9535 }, { "epoch": 0.2022438548493139, "grad_norm": 0.34806859493255615, "learning_rate": 1.9509038779861682e-05, "loss": 0.5197, "step": 9536 }, { "epoch": 0.20226506330724692, "grad_norm": 0.35202184319496155, "learning_rate": 1.950893556231558e-05, "loss": 0.509, "step": 9537 }, { "epoch": 0.20228627176517996, "grad_norm": 0.3069812059402466, "learning_rate": 1.950883233419372e-05, "loss": 0.5886, "step": 9538 }, { "epoch": 0.20230748022311298, "grad_norm": 0.3520941734313965, "learning_rate": 1.950872909549622e-05, "loss": 0.4858, "step": 9539 }, { "epoch": 0.202328688681046, "grad_norm": 0.32270196080207825, "learning_rate": 1.9508625846223194e-05, "loss": 0.4945, "step": 9540 }, { "epoch": 0.20234989713897902, "grad_norm": 0.3420138359069824, "learning_rate": 1.9508522586374758e-05, "loss": 0.4711, "step": 9541 }, { "epoch": 0.20237110559691204, "grad_norm": 0.3476825952529907, "learning_rate": 1.9508419315951028e-05, "loss": 0.476, "step": 9542 }, { "epoch": 0.20239231405484506, "grad_norm": 0.31806278228759766, "learning_rate": 1.950831603495211e-05, "loss": 0.4956, "step": 9543 }, { "epoch": 0.20241352251277808, "grad_norm": 0.33580464124679565, "learning_rate": 1.950821274337813e-05, "loss": 0.5586, "step": 9544 }, { "epoch": 0.20243473097071113, "grad_norm": 0.37832093238830566, "learning_rate": 1.9508109441229198e-05, "loss": 0.4879, "step": 9545 }, { "epoch": 0.20245593942864415, "grad_norm": 0.40376660227775574, "learning_rate": 1.950800612850543e-05, "loss": 0.5746, "step": 9546 }, { "epoch": 0.20247714788657717, "grad_norm": 0.32221436500549316, "learning_rate": 1.950790280520694e-05, "loss": 0.5116, "step": 9547 }, { "epoch": 0.2024983563445102, "grad_norm": 0.34107503294944763, "learning_rate": 1.9507799471333842e-05, "loss": 0.5333, "step": 9548 }, { "epoch": 0.2025195648024432, "grad_norm": 0.3376481831073761, "learning_rate": 1.9507696126886252e-05, "loss": 0.5047, "step": 9549 }, { "epoch": 0.20254077326037623, "grad_norm": 0.33069995045661926, "learning_rate": 1.9507592771864287e-05, "loss": 0.5132, "step": 9550 }, { "epoch": 0.20256198171830928, "grad_norm": 0.4265955686569214, "learning_rate": 1.950748940626806e-05, "loss": 0.5385, "step": 9551 }, { "epoch": 0.2025831901762423, "grad_norm": 0.36609867215156555, "learning_rate": 1.950738603009768e-05, "loss": 0.5586, "step": 9552 }, { "epoch": 0.20260439863417531, "grad_norm": 0.4798339307308197, "learning_rate": 1.9507282643353273e-05, "loss": 0.5761, "step": 9553 }, { "epoch": 0.20262560709210833, "grad_norm": 0.3597734868526459, "learning_rate": 1.9507179246034947e-05, "loss": 0.5226, "step": 9554 }, { "epoch": 0.20264681555004135, "grad_norm": 0.33812275528907776, "learning_rate": 1.9507075838142818e-05, "loss": 0.511, "step": 9555 }, { "epoch": 0.20266802400797437, "grad_norm": 0.32204607129096985, "learning_rate": 1.9506972419677007e-05, "loss": 0.5356, "step": 9556 }, { "epoch": 0.2026892324659074, "grad_norm": 0.38447338342666626, "learning_rate": 1.950686899063762e-05, "loss": 0.4426, "step": 9557 }, { "epoch": 0.20271044092384044, "grad_norm": 0.36778247356414795, "learning_rate": 1.9506765551024777e-05, "loss": 0.5838, "step": 9558 }, { "epoch": 0.20273164938177346, "grad_norm": 0.32660895586013794, "learning_rate": 1.9506662100838588e-05, "loss": 0.4823, "step": 9559 }, { "epoch": 0.20275285783970648, "grad_norm": 0.3532625436782837, "learning_rate": 1.9506558640079175e-05, "loss": 0.4959, "step": 9560 }, { "epoch": 0.2027740662976395, "grad_norm": 0.3489510715007782, "learning_rate": 1.9506455168746653e-05, "loss": 0.5335, "step": 9561 }, { "epoch": 0.20279527475557252, "grad_norm": 0.34763798117637634, "learning_rate": 1.950635168684113e-05, "loss": 0.529, "step": 9562 }, { "epoch": 0.20281648321350554, "grad_norm": 0.3403957188129425, "learning_rate": 1.9506248194362726e-05, "loss": 0.487, "step": 9563 }, { "epoch": 0.20283769167143856, "grad_norm": 0.33956021070480347, "learning_rate": 1.9506144691311557e-05, "loss": 0.5296, "step": 9564 }, { "epoch": 0.2028589001293716, "grad_norm": 0.3332923948764801, "learning_rate": 1.9506041177687737e-05, "loss": 0.5256, "step": 9565 }, { "epoch": 0.20288010858730463, "grad_norm": 0.37786009907722473, "learning_rate": 1.9505937653491377e-05, "loss": 0.4629, "step": 9566 }, { "epoch": 0.20290131704523764, "grad_norm": 0.3499179482460022, "learning_rate": 1.95058341187226e-05, "loss": 0.5576, "step": 9567 }, { "epoch": 0.20292252550317066, "grad_norm": 0.37377503514289856, "learning_rate": 1.9505730573381512e-05, "loss": 0.4736, "step": 9568 }, { "epoch": 0.20294373396110368, "grad_norm": 0.37808454036712646, "learning_rate": 1.9505627017468237e-05, "loss": 0.4633, "step": 9569 }, { "epoch": 0.2029649424190367, "grad_norm": 0.3429412543773651, "learning_rate": 1.9505523450982883e-05, "loss": 0.5144, "step": 9570 }, { "epoch": 0.20298615087696972, "grad_norm": 0.3277018070220947, "learning_rate": 1.950541987392557e-05, "loss": 0.4879, "step": 9571 }, { "epoch": 0.20300735933490277, "grad_norm": 0.3408442437648773, "learning_rate": 1.9505316286296413e-05, "loss": 0.5399, "step": 9572 }, { "epoch": 0.2030285677928358, "grad_norm": 0.3413129448890686, "learning_rate": 1.9505212688095526e-05, "loss": 0.4897, "step": 9573 }, { "epoch": 0.2030497762507688, "grad_norm": 0.4140574336051941, "learning_rate": 1.9505109079323025e-05, "loss": 0.4912, "step": 9574 }, { "epoch": 0.20307098470870183, "grad_norm": 0.3314419686794281, "learning_rate": 1.950500545997902e-05, "loss": 0.536, "step": 9575 }, { "epoch": 0.20309219316663485, "grad_norm": 0.30296093225479126, "learning_rate": 1.950490183006363e-05, "loss": 0.4358, "step": 9576 }, { "epoch": 0.20311340162456787, "grad_norm": 0.3120151162147522, "learning_rate": 1.9504798189576976e-05, "loss": 0.4882, "step": 9577 }, { "epoch": 0.2031346100825009, "grad_norm": 0.3071749806404114, "learning_rate": 1.9504694538519164e-05, "loss": 0.5592, "step": 9578 }, { "epoch": 0.20315581854043394, "grad_norm": 0.3077525496482849, "learning_rate": 1.9504590876890315e-05, "loss": 0.4785, "step": 9579 }, { "epoch": 0.20317702699836696, "grad_norm": 0.3185173571109772, "learning_rate": 1.950448720469054e-05, "loss": 0.5021, "step": 9580 }, { "epoch": 0.20319823545629997, "grad_norm": 0.33855554461479187, "learning_rate": 1.950438352191996e-05, "loss": 0.5493, "step": 9581 }, { "epoch": 0.203219443914233, "grad_norm": 0.42786142230033875, "learning_rate": 1.9504279828578686e-05, "loss": 0.4743, "step": 9582 }, { "epoch": 0.20324065237216601, "grad_norm": 0.31384581327438354, "learning_rate": 1.9504176124666837e-05, "loss": 0.4944, "step": 9583 }, { "epoch": 0.20326186083009903, "grad_norm": 0.3315731883049011, "learning_rate": 1.9504072410184523e-05, "loss": 0.5213, "step": 9584 }, { "epoch": 0.20328306928803205, "grad_norm": 0.3256186842918396, "learning_rate": 1.9503968685131863e-05, "loss": 0.5247, "step": 9585 }, { "epoch": 0.2033042777459651, "grad_norm": 0.3172297179698944, "learning_rate": 1.950386494950897e-05, "loss": 0.5115, "step": 9586 }, { "epoch": 0.20332548620389812, "grad_norm": 0.330201119184494, "learning_rate": 1.9503761203315962e-05, "loss": 0.4744, "step": 9587 }, { "epoch": 0.20334669466183114, "grad_norm": 0.3474386930465698, "learning_rate": 1.9503657446552954e-05, "loss": 0.5095, "step": 9588 }, { "epoch": 0.20336790311976416, "grad_norm": 0.30554476380348206, "learning_rate": 1.950355367922006e-05, "loss": 0.5208, "step": 9589 }, { "epoch": 0.20338911157769718, "grad_norm": 0.3854447603225708, "learning_rate": 1.9503449901317396e-05, "loss": 0.5095, "step": 9590 }, { "epoch": 0.2034103200356302, "grad_norm": 0.3763882517814636, "learning_rate": 1.950334611284508e-05, "loss": 0.4706, "step": 9591 }, { "epoch": 0.20343152849356325, "grad_norm": 0.3538320064544678, "learning_rate": 1.9503242313803225e-05, "loss": 0.5233, "step": 9592 }, { "epoch": 0.20345273695149627, "grad_norm": 0.33163201808929443, "learning_rate": 1.9503138504191944e-05, "loss": 0.4298, "step": 9593 }, { "epoch": 0.20347394540942929, "grad_norm": 0.3036808967590332, "learning_rate": 1.9503034684011352e-05, "loss": 0.5099, "step": 9594 }, { "epoch": 0.2034951538673623, "grad_norm": 0.3928235173225403, "learning_rate": 1.950293085326157e-05, "loss": 0.5651, "step": 9595 }, { "epoch": 0.20351636232529532, "grad_norm": 0.3410847783088684, "learning_rate": 1.9502827011942717e-05, "loss": 0.5498, "step": 9596 }, { "epoch": 0.20353757078322834, "grad_norm": 0.3141317069530487, "learning_rate": 1.9502723160054893e-05, "loss": 0.484, "step": 9597 }, { "epoch": 0.20355877924116136, "grad_norm": 0.34966254234313965, "learning_rate": 1.950261929759823e-05, "loss": 0.5224, "step": 9598 }, { "epoch": 0.2035799876990944, "grad_norm": 0.5428673624992371, "learning_rate": 1.950251542457283e-05, "loss": 0.5038, "step": 9599 }, { "epoch": 0.20360119615702743, "grad_norm": 0.3272596001625061, "learning_rate": 1.950241154097882e-05, "loss": 0.4967, "step": 9600 }, { "epoch": 0.20362240461496045, "grad_norm": 0.3662976324558258, "learning_rate": 1.950230764681631e-05, "loss": 0.5087, "step": 9601 }, { "epoch": 0.20364361307289347, "grad_norm": 0.298665314912796, "learning_rate": 1.9502203742085415e-05, "loss": 0.4571, "step": 9602 }, { "epoch": 0.2036648215308265, "grad_norm": 0.3340919315814972, "learning_rate": 1.950209982678625e-05, "loss": 0.5343, "step": 9603 }, { "epoch": 0.2036860299887595, "grad_norm": 0.317611426115036, "learning_rate": 1.9501995900918934e-05, "loss": 0.5778, "step": 9604 }, { "epoch": 0.20370723844669253, "grad_norm": 0.2984684109687805, "learning_rate": 1.9501891964483582e-05, "loss": 0.4695, "step": 9605 }, { "epoch": 0.20372844690462558, "grad_norm": 0.42349669337272644, "learning_rate": 1.9501788017480304e-05, "loss": 0.5636, "step": 9606 }, { "epoch": 0.2037496553625586, "grad_norm": 0.34566110372543335, "learning_rate": 1.9501684059909224e-05, "loss": 0.526, "step": 9607 }, { "epoch": 0.20377086382049162, "grad_norm": 0.34989234805107117, "learning_rate": 1.950158009177045e-05, "loss": 0.5031, "step": 9608 }, { "epoch": 0.20379207227842464, "grad_norm": 0.30625802278518677, "learning_rate": 1.9501476113064103e-05, "loss": 0.513, "step": 9609 }, { "epoch": 0.20381328073635765, "grad_norm": 0.32979387044906616, "learning_rate": 1.95013721237903e-05, "loss": 0.5021, "step": 9610 }, { "epoch": 0.20383448919429067, "grad_norm": 0.3311024010181427, "learning_rate": 1.9501268123949145e-05, "loss": 0.6011, "step": 9611 }, { "epoch": 0.2038556976522237, "grad_norm": 0.33800753951072693, "learning_rate": 1.950116411354077e-05, "loss": 0.4459, "step": 9612 }, { "epoch": 0.20387690611015674, "grad_norm": 0.33390912413597107, "learning_rate": 1.950106009256528e-05, "loss": 0.4424, "step": 9613 }, { "epoch": 0.20389811456808976, "grad_norm": 0.36901718378067017, "learning_rate": 1.9500956061022793e-05, "loss": 0.6069, "step": 9614 }, { "epoch": 0.20391932302602278, "grad_norm": 0.33339083194732666, "learning_rate": 1.9500852018913426e-05, "loss": 0.5009, "step": 9615 }, { "epoch": 0.2039405314839558, "grad_norm": 0.43634122610092163, "learning_rate": 1.9500747966237293e-05, "loss": 0.4482, "step": 9616 }, { "epoch": 0.20396173994188882, "grad_norm": 0.3477972149848938, "learning_rate": 1.9500643902994515e-05, "loss": 0.4487, "step": 9617 }, { "epoch": 0.20398294839982184, "grad_norm": 0.35980328917503357, "learning_rate": 1.9500539829185198e-05, "loss": 0.4966, "step": 9618 }, { "epoch": 0.20400415685775486, "grad_norm": 0.33146634697914124, "learning_rate": 1.9500435744809463e-05, "loss": 0.5332, "step": 9619 }, { "epoch": 0.2040253653156879, "grad_norm": 0.3362760543823242, "learning_rate": 1.950033164986743e-05, "loss": 0.5602, "step": 9620 }, { "epoch": 0.20404657377362093, "grad_norm": 0.3046261668205261, "learning_rate": 1.9500227544359206e-05, "loss": 0.4822, "step": 9621 }, { "epoch": 0.20406778223155395, "grad_norm": 0.3285072445869446, "learning_rate": 1.9500123428284916e-05, "loss": 0.446, "step": 9622 }, { "epoch": 0.20408899068948697, "grad_norm": 0.3713652789592743, "learning_rate": 1.9500019301644667e-05, "loss": 0.5033, "step": 9623 }, { "epoch": 0.20411019914741999, "grad_norm": 0.3156273066997528, "learning_rate": 1.9499915164438582e-05, "loss": 0.498, "step": 9624 }, { "epoch": 0.204131407605353, "grad_norm": 0.3980913758277893, "learning_rate": 1.9499811016666775e-05, "loss": 0.482, "step": 9625 }, { "epoch": 0.20415261606328605, "grad_norm": 0.3462944030761719, "learning_rate": 1.949970685832936e-05, "loss": 0.5012, "step": 9626 }, { "epoch": 0.20417382452121907, "grad_norm": 0.3292682468891144, "learning_rate": 1.9499602689426453e-05, "loss": 0.4585, "step": 9627 }, { "epoch": 0.2041950329791521, "grad_norm": 0.31952235102653503, "learning_rate": 1.9499498509958168e-05, "loss": 0.5112, "step": 9628 }, { "epoch": 0.2042162414370851, "grad_norm": 0.672997236251831, "learning_rate": 1.9499394319924624e-05, "loss": 0.4693, "step": 9629 }, { "epoch": 0.20423744989501813, "grad_norm": 0.32941552996635437, "learning_rate": 1.9499290119325937e-05, "loss": 0.5235, "step": 9630 }, { "epoch": 0.20425865835295115, "grad_norm": 0.4336995482444763, "learning_rate": 1.9499185908162224e-05, "loss": 0.5204, "step": 9631 }, { "epoch": 0.20427986681088417, "grad_norm": 0.3215439021587372, "learning_rate": 1.9499081686433595e-05, "loss": 0.5816, "step": 9632 }, { "epoch": 0.20430107526881722, "grad_norm": 0.362906813621521, "learning_rate": 1.9498977454140172e-05, "loss": 0.5245, "step": 9633 }, { "epoch": 0.20432228372675024, "grad_norm": 0.3219072222709656, "learning_rate": 1.9498873211282072e-05, "loss": 0.4953, "step": 9634 }, { "epoch": 0.20434349218468326, "grad_norm": 0.42159032821655273, "learning_rate": 1.9498768957859405e-05, "loss": 0.503, "step": 9635 }, { "epoch": 0.20436470064261628, "grad_norm": 0.4028785824775696, "learning_rate": 1.949866469387229e-05, "loss": 0.5267, "step": 9636 }, { "epoch": 0.2043859091005493, "grad_norm": 0.31047746539115906, "learning_rate": 1.949856041932084e-05, "loss": 0.4579, "step": 9637 }, { "epoch": 0.20440711755848232, "grad_norm": 0.32613810896873474, "learning_rate": 1.9498456134205173e-05, "loss": 0.5472, "step": 9638 }, { "epoch": 0.20442832601641533, "grad_norm": 0.34772664308547974, "learning_rate": 1.949835183852541e-05, "loss": 0.5503, "step": 9639 }, { "epoch": 0.20444953447434838, "grad_norm": 0.3356119692325592, "learning_rate": 1.949824753228166e-05, "loss": 0.4975, "step": 9640 }, { "epoch": 0.2044707429322814, "grad_norm": 0.3452235162258148, "learning_rate": 1.9498143215474045e-05, "loss": 0.4569, "step": 9641 }, { "epoch": 0.20449195139021442, "grad_norm": 0.3506946265697479, "learning_rate": 1.9498038888102674e-05, "loss": 0.6056, "step": 9642 }, { "epoch": 0.20451315984814744, "grad_norm": 0.32371383905410767, "learning_rate": 1.9497934550167667e-05, "loss": 0.508, "step": 9643 }, { "epoch": 0.20453436830608046, "grad_norm": 0.3363853394985199, "learning_rate": 1.949783020166914e-05, "loss": 0.5724, "step": 9644 }, { "epoch": 0.20455557676401348, "grad_norm": 0.3416900336742401, "learning_rate": 1.949772584260721e-05, "loss": 0.4737, "step": 9645 }, { "epoch": 0.2045767852219465, "grad_norm": 0.3521105945110321, "learning_rate": 1.9497621472981994e-05, "loss": 0.5739, "step": 9646 }, { "epoch": 0.20459799367987955, "grad_norm": 0.38824349641799927, "learning_rate": 1.9497517092793602e-05, "loss": 0.5035, "step": 9647 }, { "epoch": 0.20461920213781257, "grad_norm": 0.3652758300304413, "learning_rate": 1.9497412702042154e-05, "loss": 0.4929, "step": 9648 }, { "epoch": 0.2046404105957456, "grad_norm": 0.30846133828163147, "learning_rate": 1.9497308300727768e-05, "loss": 0.5564, "step": 9649 }, { "epoch": 0.2046616190536786, "grad_norm": 0.31828364729881287, "learning_rate": 1.9497203888850556e-05, "loss": 0.4592, "step": 9650 }, { "epoch": 0.20468282751161163, "grad_norm": 0.3539882004261017, "learning_rate": 1.949709946641064e-05, "loss": 0.482, "step": 9651 }, { "epoch": 0.20470403596954465, "grad_norm": 0.33211749792099, "learning_rate": 1.9496995033408128e-05, "loss": 0.549, "step": 9652 }, { "epoch": 0.20472524442747767, "grad_norm": 0.33142176270484924, "learning_rate": 1.9496890589843142e-05, "loss": 0.5049, "step": 9653 }, { "epoch": 0.2047464528854107, "grad_norm": 0.4739517867565155, "learning_rate": 1.9496786135715798e-05, "loss": 0.4668, "step": 9654 }, { "epoch": 0.20476766134334373, "grad_norm": 0.31387126445770264, "learning_rate": 1.949668167102621e-05, "loss": 0.5929, "step": 9655 }, { "epoch": 0.20478886980127675, "grad_norm": 0.3487508296966553, "learning_rate": 1.9496577195774494e-05, "loss": 0.5531, "step": 9656 }, { "epoch": 0.20481007825920977, "grad_norm": 0.3080988824367523, "learning_rate": 1.949647270996077e-05, "loss": 0.4802, "step": 9657 }, { "epoch": 0.2048312867171428, "grad_norm": 0.3217259645462036, "learning_rate": 1.949636821358515e-05, "loss": 0.4744, "step": 9658 }, { "epoch": 0.2048524951750758, "grad_norm": 0.33210641145706177, "learning_rate": 1.9496263706647753e-05, "loss": 0.5092, "step": 9659 }, { "epoch": 0.20487370363300883, "grad_norm": 0.31750136613845825, "learning_rate": 1.9496159189148694e-05, "loss": 0.4974, "step": 9660 }, { "epoch": 0.20489491209094188, "grad_norm": 0.3463842570781708, "learning_rate": 1.949605466108809e-05, "loss": 0.5184, "step": 9661 }, { "epoch": 0.2049161205488749, "grad_norm": 0.33462437987327576, "learning_rate": 1.949595012246605e-05, "loss": 0.5399, "step": 9662 }, { "epoch": 0.20493732900680792, "grad_norm": 0.3434200584888458, "learning_rate": 1.9495845573282707e-05, "loss": 0.4384, "step": 9663 }, { "epoch": 0.20495853746474094, "grad_norm": 0.31454747915267944, "learning_rate": 1.9495741013538157e-05, "loss": 0.5517, "step": 9664 }, { "epoch": 0.20497974592267396, "grad_norm": 0.35322126746177673, "learning_rate": 1.9495636443232533e-05, "loss": 0.6215, "step": 9665 }, { "epoch": 0.20500095438060698, "grad_norm": 0.3398328423500061, "learning_rate": 1.9495531862365942e-05, "loss": 0.5084, "step": 9666 }, { "epoch": 0.20502216283854002, "grad_norm": 0.3368193507194519, "learning_rate": 1.9495427270938505e-05, "loss": 0.4433, "step": 9667 }, { "epoch": 0.20504337129647304, "grad_norm": 0.3500564396381378, "learning_rate": 1.9495322668950335e-05, "loss": 0.587, "step": 9668 }, { "epoch": 0.20506457975440606, "grad_norm": 0.3554368019104004, "learning_rate": 1.949521805640155e-05, "loss": 0.5164, "step": 9669 }, { "epoch": 0.20508578821233908, "grad_norm": 0.4099459946155548, "learning_rate": 1.9495113433292265e-05, "loss": 0.5537, "step": 9670 }, { "epoch": 0.2051069966702721, "grad_norm": 0.33308708667755127, "learning_rate": 1.9495008799622596e-05, "loss": 0.5441, "step": 9671 }, { "epoch": 0.20512820512820512, "grad_norm": 0.31576088070869446, "learning_rate": 1.9494904155392663e-05, "loss": 0.5852, "step": 9672 }, { "epoch": 0.20514941358613814, "grad_norm": 0.3467605710029602, "learning_rate": 1.9494799500602583e-05, "loss": 0.5601, "step": 9673 }, { "epoch": 0.2051706220440712, "grad_norm": 0.31896305084228516, "learning_rate": 1.9494694835252462e-05, "loss": 0.5326, "step": 9674 }, { "epoch": 0.2051918305020042, "grad_norm": 0.29540643095970154, "learning_rate": 1.949459015934243e-05, "loss": 0.4731, "step": 9675 }, { "epoch": 0.20521303895993723, "grad_norm": 0.31840211153030396, "learning_rate": 1.9494485472872596e-05, "loss": 0.447, "step": 9676 }, { "epoch": 0.20523424741787025, "grad_norm": 0.3423663377761841, "learning_rate": 1.9494380775843077e-05, "loss": 0.5976, "step": 9677 }, { "epoch": 0.20525545587580327, "grad_norm": 0.3274984359741211, "learning_rate": 1.949427606825399e-05, "loss": 0.4937, "step": 9678 }, { "epoch": 0.2052766643337363, "grad_norm": 0.3421446979045868, "learning_rate": 1.949417135010545e-05, "loss": 0.5911, "step": 9679 }, { "epoch": 0.2052978727916693, "grad_norm": 0.3388066291809082, "learning_rate": 1.9494066621397577e-05, "loss": 0.5579, "step": 9680 }, { "epoch": 0.20531908124960235, "grad_norm": 0.29622673988342285, "learning_rate": 1.949396188213049e-05, "loss": 0.5469, "step": 9681 }, { "epoch": 0.20534028970753537, "grad_norm": 0.33087781071662903, "learning_rate": 1.9493857132304295e-05, "loss": 0.497, "step": 9682 }, { "epoch": 0.2053614981654684, "grad_norm": 0.3760436773300171, "learning_rate": 1.9493752371919114e-05, "loss": 0.45, "step": 9683 }, { "epoch": 0.2053827066234014, "grad_norm": 0.33011236786842346, "learning_rate": 1.9493647600975068e-05, "loss": 0.4743, "step": 9684 }, { "epoch": 0.20540391508133443, "grad_norm": 0.31227290630340576, "learning_rate": 1.9493542819472268e-05, "loss": 0.4856, "step": 9685 }, { "epoch": 0.20542512353926745, "grad_norm": 0.3102584183216095, "learning_rate": 1.9493438027410833e-05, "loss": 0.5288, "step": 9686 }, { "epoch": 0.20544633199720047, "grad_norm": 0.5947972536087036, "learning_rate": 1.9493333224790874e-05, "loss": 0.507, "step": 9687 }, { "epoch": 0.20546754045513352, "grad_norm": 0.3427417576313019, "learning_rate": 1.9493228411612518e-05, "loss": 0.5192, "step": 9688 }, { "epoch": 0.20548874891306654, "grad_norm": 0.3344586491584778, "learning_rate": 1.9493123587875873e-05, "loss": 0.5366, "step": 9689 }, { "epoch": 0.20550995737099956, "grad_norm": 0.33291611075401306, "learning_rate": 1.949301875358106e-05, "loss": 0.5755, "step": 9690 }, { "epoch": 0.20553116582893258, "grad_norm": 0.36532270908355713, "learning_rate": 1.949291390872819e-05, "loss": 0.5531, "step": 9691 }, { "epoch": 0.2055523742868656, "grad_norm": 0.46316421031951904, "learning_rate": 1.949280905331739e-05, "loss": 0.5213, "step": 9692 }, { "epoch": 0.20557358274479862, "grad_norm": 0.3609742820262909, "learning_rate": 1.9492704187348765e-05, "loss": 0.5741, "step": 9693 }, { "epoch": 0.20559479120273164, "grad_norm": 0.43453124165534973, "learning_rate": 1.949259931082244e-05, "loss": 0.5337, "step": 9694 }, { "epoch": 0.20561599966066468, "grad_norm": 0.31473854184150696, "learning_rate": 1.949249442373853e-05, "loss": 0.484, "step": 9695 }, { "epoch": 0.2056372081185977, "grad_norm": 0.31787440180778503, "learning_rate": 1.9492389526097147e-05, "loss": 0.5779, "step": 9696 }, { "epoch": 0.20565841657653072, "grad_norm": 0.34872791171073914, "learning_rate": 1.9492284617898408e-05, "loss": 0.6115, "step": 9697 }, { "epoch": 0.20567962503446374, "grad_norm": 0.3243468701839447, "learning_rate": 1.9492179699142437e-05, "loss": 0.4441, "step": 9698 }, { "epoch": 0.20570083349239676, "grad_norm": 0.3427657186985016, "learning_rate": 1.9492074769829345e-05, "loss": 0.5734, "step": 9699 }, { "epoch": 0.20572204195032978, "grad_norm": 0.3255818784236908, "learning_rate": 1.949196982995925e-05, "loss": 0.4977, "step": 9700 }, { "epoch": 0.20574325040826283, "grad_norm": 0.33903539180755615, "learning_rate": 1.949186487953227e-05, "loss": 0.4788, "step": 9701 }, { "epoch": 0.20576445886619585, "grad_norm": 0.33068013191223145, "learning_rate": 1.949175991854852e-05, "loss": 0.4809, "step": 9702 }, { "epoch": 0.20578566732412887, "grad_norm": 0.34447038173675537, "learning_rate": 1.9491654947008113e-05, "loss": 0.5366, "step": 9703 }, { "epoch": 0.2058068757820619, "grad_norm": 0.3453676402568817, "learning_rate": 1.9491549964911174e-05, "loss": 0.5116, "step": 9704 }, { "epoch": 0.2058280842399949, "grad_norm": 0.3192175626754761, "learning_rate": 1.9491444972257815e-05, "loss": 0.4557, "step": 9705 }, { "epoch": 0.20584929269792793, "grad_norm": 0.3107147812843323, "learning_rate": 1.9491339969048156e-05, "loss": 0.4948, "step": 9706 }, { "epoch": 0.20587050115586095, "grad_norm": 0.3372787535190582, "learning_rate": 1.9491234955282305e-05, "loss": 0.5269, "step": 9707 }, { "epoch": 0.205891709613794, "grad_norm": 0.37449392676353455, "learning_rate": 1.949112993096039e-05, "loss": 0.4308, "step": 9708 }, { "epoch": 0.205912918071727, "grad_norm": 0.3059205114841461, "learning_rate": 1.9491024896082517e-05, "loss": 0.435, "step": 9709 }, { "epoch": 0.20593412652966003, "grad_norm": 0.3700677752494812, "learning_rate": 1.9490919850648814e-05, "loss": 0.4814, "step": 9710 }, { "epoch": 0.20595533498759305, "grad_norm": 0.6697940826416016, "learning_rate": 1.949081479465939e-05, "loss": 0.534, "step": 9711 }, { "epoch": 0.20597654344552607, "grad_norm": 0.40152186155319214, "learning_rate": 1.9490709728114367e-05, "loss": 0.5733, "step": 9712 }, { "epoch": 0.2059977519034591, "grad_norm": 0.3727615475654602, "learning_rate": 1.9490604651013857e-05, "loss": 0.5225, "step": 9713 }, { "epoch": 0.2060189603613921, "grad_norm": 0.3328246474266052, "learning_rate": 1.9490499563357978e-05, "loss": 0.5163, "step": 9714 }, { "epoch": 0.20604016881932516, "grad_norm": 0.3191778361797333, "learning_rate": 1.9490394465146848e-05, "loss": 0.5207, "step": 9715 }, { "epoch": 0.20606137727725818, "grad_norm": 0.3148355185985565, "learning_rate": 1.9490289356380587e-05, "loss": 0.4797, "step": 9716 }, { "epoch": 0.2060825857351912, "grad_norm": 0.3337508738040924, "learning_rate": 1.9490184237059306e-05, "loss": 0.5146, "step": 9717 }, { "epoch": 0.20610379419312422, "grad_norm": 0.38221603631973267, "learning_rate": 1.9490079107183123e-05, "loss": 0.5813, "step": 9718 }, { "epoch": 0.20612500265105724, "grad_norm": 0.3679008185863495, "learning_rate": 1.9489973966752158e-05, "loss": 0.5586, "step": 9719 }, { "epoch": 0.20614621110899026, "grad_norm": 0.3318862318992615, "learning_rate": 1.9489868815766525e-05, "loss": 0.5736, "step": 9720 }, { "epoch": 0.20616741956692328, "grad_norm": 0.3400398790836334, "learning_rate": 1.9489763654226347e-05, "loss": 0.5, "step": 9721 }, { "epoch": 0.20618862802485632, "grad_norm": 0.3578912317752838, "learning_rate": 1.948965848213173e-05, "loss": 0.5231, "step": 9722 }, { "epoch": 0.20620983648278934, "grad_norm": 0.3494109511375427, "learning_rate": 1.9489553299482802e-05, "loss": 0.5656, "step": 9723 }, { "epoch": 0.20623104494072236, "grad_norm": 0.3358054459095001, "learning_rate": 1.948944810627967e-05, "loss": 0.5331, "step": 9724 }, { "epoch": 0.20625225339865538, "grad_norm": 0.322860985994339, "learning_rate": 1.9489342902522463e-05, "loss": 0.5182, "step": 9725 }, { "epoch": 0.2062734618565884, "grad_norm": 0.3273603022098541, "learning_rate": 1.948923768821129e-05, "loss": 0.4997, "step": 9726 }, { "epoch": 0.20629467031452142, "grad_norm": 0.36459407210350037, "learning_rate": 1.9489132463346264e-05, "loss": 0.5568, "step": 9727 }, { "epoch": 0.20631587877245444, "grad_norm": 0.33897775411605835, "learning_rate": 1.948902722792751e-05, "loss": 0.5379, "step": 9728 }, { "epoch": 0.2063370872303875, "grad_norm": 0.3252461850643158, "learning_rate": 1.9488921981955145e-05, "loss": 0.4873, "step": 9729 }, { "epoch": 0.2063582956883205, "grad_norm": 0.32636624574661255, "learning_rate": 1.948881672542928e-05, "loss": 0.5992, "step": 9730 }, { "epoch": 0.20637950414625353, "grad_norm": 0.32898667454719543, "learning_rate": 1.9488711458350038e-05, "loss": 0.5792, "step": 9731 }, { "epoch": 0.20640071260418655, "grad_norm": 0.32935062050819397, "learning_rate": 1.9488606180717534e-05, "loss": 0.5675, "step": 9732 }, { "epoch": 0.20642192106211957, "grad_norm": 0.37468600273132324, "learning_rate": 1.9488500892531883e-05, "loss": 0.5834, "step": 9733 }, { "epoch": 0.2064431295200526, "grad_norm": 0.33398282527923584, "learning_rate": 1.9488395593793204e-05, "loss": 0.4837, "step": 9734 }, { "epoch": 0.2064643379779856, "grad_norm": 0.36836791038513184, "learning_rate": 1.9488290284501613e-05, "loss": 0.5355, "step": 9735 }, { "epoch": 0.20648554643591865, "grad_norm": 0.37153705954551697, "learning_rate": 1.9488184964657228e-05, "loss": 0.5738, "step": 9736 }, { "epoch": 0.20650675489385167, "grad_norm": 0.3671020567417145, "learning_rate": 1.9488079634260165e-05, "loss": 0.6149, "step": 9737 }, { "epoch": 0.2065279633517847, "grad_norm": 0.33234903216362, "learning_rate": 1.9487974293310545e-05, "loss": 0.5523, "step": 9738 }, { "epoch": 0.2065491718097177, "grad_norm": 0.321664959192276, "learning_rate": 1.9487868941808483e-05, "loss": 0.5421, "step": 9739 }, { "epoch": 0.20657038026765073, "grad_norm": 0.39953532814979553, "learning_rate": 1.948776357975409e-05, "loss": 0.5098, "step": 9740 }, { "epoch": 0.20659158872558375, "grad_norm": 0.332243412733078, "learning_rate": 1.9487658207147496e-05, "loss": 0.5055, "step": 9741 }, { "epoch": 0.2066127971835168, "grad_norm": 0.3771342635154724, "learning_rate": 1.9487552823988808e-05, "loss": 0.4864, "step": 9742 }, { "epoch": 0.20663400564144982, "grad_norm": 0.31455132365226746, "learning_rate": 1.9487447430278146e-05, "loss": 0.5199, "step": 9743 }, { "epoch": 0.20665521409938284, "grad_norm": 0.31298667192459106, "learning_rate": 1.9487342026015628e-05, "loss": 0.5042, "step": 9744 }, { "epoch": 0.20667642255731586, "grad_norm": 0.32947713136672974, "learning_rate": 1.9487236611201367e-05, "loss": 0.4813, "step": 9745 }, { "epoch": 0.20669763101524888, "grad_norm": 0.27947089076042175, "learning_rate": 1.9487131185835488e-05, "loss": 0.4421, "step": 9746 }, { "epoch": 0.2067188394731819, "grad_norm": 0.33017852902412415, "learning_rate": 1.9487025749918102e-05, "loss": 0.5348, "step": 9747 }, { "epoch": 0.20674004793111492, "grad_norm": 0.3401745855808258, "learning_rate": 1.948692030344933e-05, "loss": 0.5558, "step": 9748 }, { "epoch": 0.20676125638904796, "grad_norm": 0.31693392992019653, "learning_rate": 1.9486814846429287e-05, "loss": 0.5984, "step": 9749 }, { "epoch": 0.20678246484698098, "grad_norm": 0.2953781187534332, "learning_rate": 1.948670937885809e-05, "loss": 0.4354, "step": 9750 }, { "epoch": 0.206803673304914, "grad_norm": 0.28694307804107666, "learning_rate": 1.9486603900735858e-05, "loss": 0.5164, "step": 9751 }, { "epoch": 0.20682488176284702, "grad_norm": 0.3402114808559418, "learning_rate": 1.948649841206271e-05, "loss": 0.5306, "step": 9752 }, { "epoch": 0.20684609022078004, "grad_norm": 0.2963560223579407, "learning_rate": 1.948639291283876e-05, "loss": 0.5215, "step": 9753 }, { "epoch": 0.20686729867871306, "grad_norm": 0.37692221999168396, "learning_rate": 1.9486287403064122e-05, "loss": 0.5509, "step": 9754 }, { "epoch": 0.20688850713664608, "grad_norm": 0.3230181336402893, "learning_rate": 1.9486181882738918e-05, "loss": 0.5364, "step": 9755 }, { "epoch": 0.20690971559457913, "grad_norm": 0.35902097821235657, "learning_rate": 1.948607635186327e-05, "loss": 0.5528, "step": 9756 }, { "epoch": 0.20693092405251215, "grad_norm": 0.3127518594264984, "learning_rate": 1.9485970810437286e-05, "loss": 0.5477, "step": 9757 }, { "epoch": 0.20695213251044517, "grad_norm": 0.3729197382926941, "learning_rate": 1.9485865258461093e-05, "loss": 0.5088, "step": 9758 }, { "epoch": 0.2069733409683782, "grad_norm": 0.6475042104721069, "learning_rate": 1.94857596959348e-05, "loss": 0.5216, "step": 9759 }, { "epoch": 0.2069945494263112, "grad_norm": 0.4537280797958374, "learning_rate": 1.9485654122858524e-05, "loss": 0.5333, "step": 9760 }, { "epoch": 0.20701575788424423, "grad_norm": 0.4026433229446411, "learning_rate": 1.948554853923239e-05, "loss": 0.5478, "step": 9761 }, { "epoch": 0.20703696634217725, "grad_norm": 0.32895681262016296, "learning_rate": 1.948544294505651e-05, "loss": 0.4743, "step": 9762 }, { "epoch": 0.2070581748001103, "grad_norm": 0.3714873492717743, "learning_rate": 1.9485337340331e-05, "loss": 0.5029, "step": 9763 }, { "epoch": 0.20707938325804331, "grad_norm": 0.3331719934940338, "learning_rate": 1.948523172505598e-05, "loss": 0.6007, "step": 9764 }, { "epoch": 0.20710059171597633, "grad_norm": 0.3547605872154236, "learning_rate": 1.9485126099231575e-05, "loss": 0.4127, "step": 9765 }, { "epoch": 0.20712180017390935, "grad_norm": 0.32851535081863403, "learning_rate": 1.948502046285789e-05, "loss": 0.5796, "step": 9766 }, { "epoch": 0.20714300863184237, "grad_norm": 0.33113977313041687, "learning_rate": 1.9484914815935046e-05, "loss": 0.5083, "step": 9767 }, { "epoch": 0.2071642170897754, "grad_norm": 0.3503640294075012, "learning_rate": 1.9484809158463166e-05, "loss": 0.5515, "step": 9768 }, { "epoch": 0.2071854255477084, "grad_norm": 0.29560282826423645, "learning_rate": 1.948470349044236e-05, "loss": 0.5007, "step": 9769 }, { "epoch": 0.20720663400564146, "grad_norm": 0.3377980887889862, "learning_rate": 1.9484597811872753e-05, "loss": 0.5548, "step": 9770 }, { "epoch": 0.20722784246357448, "grad_norm": 0.31610623002052307, "learning_rate": 1.948449212275446e-05, "loss": 0.5851, "step": 9771 }, { "epoch": 0.2072490509215075, "grad_norm": 0.33157360553741455, "learning_rate": 1.948438642308759e-05, "loss": 0.6304, "step": 9772 }, { "epoch": 0.20727025937944052, "grad_norm": 0.5877414345741272, "learning_rate": 1.9484280712872274e-05, "loss": 0.5507, "step": 9773 }, { "epoch": 0.20729146783737354, "grad_norm": 0.3357025980949402, "learning_rate": 1.948417499210862e-05, "loss": 0.4907, "step": 9774 }, { "epoch": 0.20731267629530656, "grad_norm": 0.38939565420150757, "learning_rate": 1.9484069260796752e-05, "loss": 0.646, "step": 9775 }, { "epoch": 0.2073338847532396, "grad_norm": 0.34526363015174866, "learning_rate": 1.9483963518936782e-05, "loss": 0.5358, "step": 9776 }, { "epoch": 0.20735509321117263, "grad_norm": 0.3560614287853241, "learning_rate": 1.9483857766528833e-05, "loss": 0.4543, "step": 9777 }, { "epoch": 0.20737630166910564, "grad_norm": 0.33962276577949524, "learning_rate": 1.948375200357302e-05, "loss": 0.5016, "step": 9778 }, { "epoch": 0.20739751012703866, "grad_norm": 0.4048258364200592, "learning_rate": 1.9483646230069456e-05, "loss": 0.512, "step": 9779 }, { "epoch": 0.20741871858497168, "grad_norm": 0.3765302002429962, "learning_rate": 1.9483540446018267e-05, "loss": 0.6166, "step": 9780 }, { "epoch": 0.2074399270429047, "grad_norm": 0.3339782655239105, "learning_rate": 1.9483434651419562e-05, "loss": 0.4885, "step": 9781 }, { "epoch": 0.20746113550083772, "grad_norm": 0.41906142234802246, "learning_rate": 1.9483328846273467e-05, "loss": 0.533, "step": 9782 }, { "epoch": 0.20748234395877077, "grad_norm": 0.3131132423877716, "learning_rate": 1.9483223030580095e-05, "loss": 0.4712, "step": 9783 }, { "epoch": 0.2075035524167038, "grad_norm": 0.3808681070804596, "learning_rate": 1.9483117204339566e-05, "loss": 0.5211, "step": 9784 }, { "epoch": 0.2075247608746368, "grad_norm": 0.3832640051841736, "learning_rate": 1.9483011367551997e-05, "loss": 0.5326, "step": 9785 }, { "epoch": 0.20754596933256983, "grad_norm": 0.30487534403800964, "learning_rate": 1.9482905520217502e-05, "loss": 0.4571, "step": 9786 }, { "epoch": 0.20756717779050285, "grad_norm": 0.3515581488609314, "learning_rate": 1.9482799662336203e-05, "loss": 0.5887, "step": 9787 }, { "epoch": 0.20758838624843587, "grad_norm": 0.32329005002975464, "learning_rate": 1.948269379390822e-05, "loss": 0.4415, "step": 9788 }, { "epoch": 0.2076095947063689, "grad_norm": 0.2986803948879242, "learning_rate": 1.9482587914933663e-05, "loss": 0.5388, "step": 9789 }, { "epoch": 0.20763080316430194, "grad_norm": 0.3268169164657593, "learning_rate": 1.9482482025412658e-05, "loss": 0.4494, "step": 9790 }, { "epoch": 0.20765201162223496, "grad_norm": 0.33956918120384216, "learning_rate": 1.9482376125345314e-05, "loss": 0.5133, "step": 9791 }, { "epoch": 0.20767322008016798, "grad_norm": 0.322596937417984, "learning_rate": 1.9482270214731756e-05, "loss": 0.4984, "step": 9792 }, { "epoch": 0.207694428538101, "grad_norm": 0.3425803482532501, "learning_rate": 1.9482164293572098e-05, "loss": 0.5583, "step": 9793 }, { "epoch": 0.20771563699603401, "grad_norm": 0.336758017539978, "learning_rate": 1.9482058361866464e-05, "loss": 0.4768, "step": 9794 }, { "epoch": 0.20773684545396703, "grad_norm": 0.2949553430080414, "learning_rate": 1.948195241961496e-05, "loss": 0.4527, "step": 9795 }, { "epoch": 0.20775805391190005, "grad_norm": 0.3256467580795288, "learning_rate": 1.948184646681772e-05, "loss": 0.5532, "step": 9796 }, { "epoch": 0.2077792623698331, "grad_norm": 0.35065239667892456, "learning_rate": 1.9481740503474845e-05, "loss": 0.5624, "step": 9797 }, { "epoch": 0.20780047082776612, "grad_norm": 0.331562876701355, "learning_rate": 1.9481634529586465e-05, "loss": 0.4905, "step": 9798 }, { "epoch": 0.20782167928569914, "grad_norm": 0.3866788148880005, "learning_rate": 1.9481528545152688e-05, "loss": 0.5129, "step": 9799 }, { "epoch": 0.20784288774363216, "grad_norm": 0.30806049704551697, "learning_rate": 1.9481422550173642e-05, "loss": 0.4614, "step": 9800 }, { "epoch": 0.20786409620156518, "grad_norm": 0.3562133014202118, "learning_rate": 1.948131654464944e-05, "loss": 0.608, "step": 9801 }, { "epoch": 0.2078853046594982, "grad_norm": 0.36231592297554016, "learning_rate": 1.9481210528580197e-05, "loss": 0.4859, "step": 9802 }, { "epoch": 0.20790651311743122, "grad_norm": 0.30602124333381653, "learning_rate": 1.9481104501966036e-05, "loss": 0.5303, "step": 9803 }, { "epoch": 0.20792772157536427, "grad_norm": 0.3270730674266815, "learning_rate": 1.9480998464807077e-05, "loss": 0.5122, "step": 9804 }, { "epoch": 0.20794893003329729, "grad_norm": 0.32457852363586426, "learning_rate": 1.9480892417103428e-05, "loss": 0.4946, "step": 9805 }, { "epoch": 0.2079701384912303, "grad_norm": 0.4639424681663513, "learning_rate": 1.9480786358855215e-05, "loss": 0.5281, "step": 9806 }, { "epoch": 0.20799134694916332, "grad_norm": 0.32488811016082764, "learning_rate": 1.9480680290062553e-05, "loss": 0.528, "step": 9807 }, { "epoch": 0.20801255540709634, "grad_norm": 0.3462659418582916, "learning_rate": 1.948057421072556e-05, "loss": 0.5067, "step": 9808 }, { "epoch": 0.20803376386502936, "grad_norm": 0.3246771991252899, "learning_rate": 1.9480468120844354e-05, "loss": 0.5311, "step": 9809 }, { "epoch": 0.20805497232296238, "grad_norm": 0.3506946563720703, "learning_rate": 1.9480362020419057e-05, "loss": 0.5273, "step": 9810 }, { "epoch": 0.20807618078089543, "grad_norm": 0.4088980555534363, "learning_rate": 1.9480255909449783e-05, "loss": 0.5051, "step": 9811 }, { "epoch": 0.20809738923882845, "grad_norm": 0.389499694108963, "learning_rate": 1.9480149787936652e-05, "loss": 0.5669, "step": 9812 }, { "epoch": 0.20811859769676147, "grad_norm": 0.31444329023361206, "learning_rate": 1.948004365587978e-05, "loss": 0.5207, "step": 9813 }, { "epoch": 0.2081398061546945, "grad_norm": 0.33695292472839355, "learning_rate": 1.9479937513279283e-05, "loss": 0.4714, "step": 9814 }, { "epoch": 0.2081610146126275, "grad_norm": 0.2957266867160797, "learning_rate": 1.9479831360135283e-05, "loss": 0.458, "step": 9815 }, { "epoch": 0.20818222307056053, "grad_norm": 0.3374098241329193, "learning_rate": 1.9479725196447897e-05, "loss": 0.4987, "step": 9816 }, { "epoch": 0.20820343152849358, "grad_norm": 0.2766788601875305, "learning_rate": 1.9479619022217245e-05, "loss": 0.4451, "step": 9817 }, { "epoch": 0.2082246399864266, "grad_norm": 0.3540465831756592, "learning_rate": 1.947951283744344e-05, "loss": 0.5691, "step": 9818 }, { "epoch": 0.20824584844435962, "grad_norm": 0.36483749747276306, "learning_rate": 1.9479406642126606e-05, "loss": 0.5531, "step": 9819 }, { "epoch": 0.20826705690229264, "grad_norm": 0.31927910447120667, "learning_rate": 1.947930043626686e-05, "loss": 0.5299, "step": 9820 }, { "epoch": 0.20828826536022566, "grad_norm": 0.3348275125026703, "learning_rate": 1.9479194219864313e-05, "loss": 0.4509, "step": 9821 }, { "epoch": 0.20830947381815867, "grad_norm": 0.3460718095302582, "learning_rate": 1.947908799291909e-05, "loss": 0.5123, "step": 9822 }, { "epoch": 0.2083306822760917, "grad_norm": 0.31133294105529785, "learning_rate": 1.947898175543131e-05, "loss": 0.5595, "step": 9823 }, { "epoch": 0.20835189073402474, "grad_norm": 0.34489676356315613, "learning_rate": 1.9478875507401086e-05, "loss": 0.6077, "step": 9824 }, { "epoch": 0.20837309919195776, "grad_norm": 0.3293651342391968, "learning_rate": 1.947876924882854e-05, "loss": 0.5266, "step": 9825 }, { "epoch": 0.20839430764989078, "grad_norm": 0.35194873809814453, "learning_rate": 1.947866297971379e-05, "loss": 0.5482, "step": 9826 }, { "epoch": 0.2084155161078238, "grad_norm": 0.35439208149909973, "learning_rate": 1.9478556700056956e-05, "loss": 0.5728, "step": 9827 }, { "epoch": 0.20843672456575682, "grad_norm": 0.3146975636482239, "learning_rate": 1.947845040985815e-05, "loss": 0.5422, "step": 9828 }, { "epoch": 0.20845793302368984, "grad_norm": 0.3051590621471405, "learning_rate": 1.9478344109117495e-05, "loss": 0.5156, "step": 9829 }, { "epoch": 0.20847914148162286, "grad_norm": 0.33421987295150757, "learning_rate": 1.9478237797835106e-05, "loss": 0.5328, "step": 9830 }, { "epoch": 0.2085003499395559, "grad_norm": 0.3432319462299347, "learning_rate": 1.9478131476011107e-05, "loss": 0.5405, "step": 9831 }, { "epoch": 0.20852155839748893, "grad_norm": 0.3160233497619629, "learning_rate": 1.9478025143645608e-05, "loss": 0.5112, "step": 9832 }, { "epoch": 0.20854276685542195, "grad_norm": 0.36737900972366333, "learning_rate": 1.9477918800738735e-05, "loss": 0.492, "step": 9833 }, { "epoch": 0.20856397531335497, "grad_norm": 0.31310588121414185, "learning_rate": 1.9477812447290603e-05, "loss": 0.5189, "step": 9834 }, { "epoch": 0.20858518377128799, "grad_norm": 0.31289324164390564, "learning_rate": 1.9477706083301328e-05, "loss": 0.542, "step": 9835 }, { "epoch": 0.208606392229221, "grad_norm": 0.3098519444465637, "learning_rate": 1.9477599708771033e-05, "loss": 0.5307, "step": 9836 }, { "epoch": 0.20862760068715402, "grad_norm": 0.3319796919822693, "learning_rate": 1.9477493323699833e-05, "loss": 0.4568, "step": 9837 }, { "epoch": 0.20864880914508707, "grad_norm": 0.3265455961227417, "learning_rate": 1.9477386928087843e-05, "loss": 0.4843, "step": 9838 }, { "epoch": 0.2086700176030201, "grad_norm": 0.2749060094356537, "learning_rate": 1.9477280521935193e-05, "loss": 0.4835, "step": 9839 }, { "epoch": 0.2086912260609531, "grad_norm": 0.3565599322319031, "learning_rate": 1.9477174105241993e-05, "loss": 0.5572, "step": 9840 }, { "epoch": 0.20871243451888613, "grad_norm": 0.48522502183914185, "learning_rate": 1.9477067678008356e-05, "loss": 0.5699, "step": 9841 }, { "epoch": 0.20873364297681915, "grad_norm": 0.34752652049064636, "learning_rate": 1.947696124023441e-05, "loss": 0.4669, "step": 9842 }, { "epoch": 0.20875485143475217, "grad_norm": 0.3975980877876282, "learning_rate": 1.947685479192027e-05, "loss": 0.5448, "step": 9843 }, { "epoch": 0.2087760598926852, "grad_norm": 0.3066483438014984, "learning_rate": 1.9476748333066058e-05, "loss": 0.4429, "step": 9844 }, { "epoch": 0.20879726835061824, "grad_norm": 0.3364622890949249, "learning_rate": 1.9476641863671884e-05, "loss": 0.4567, "step": 9845 }, { "epoch": 0.20881847680855126, "grad_norm": 0.3404618799686432, "learning_rate": 1.9476535383737873e-05, "loss": 0.6135, "step": 9846 }, { "epoch": 0.20883968526648428, "grad_norm": 0.36950090527534485, "learning_rate": 1.9476428893264143e-05, "loss": 0.5447, "step": 9847 }, { "epoch": 0.2088608937244173, "grad_norm": 0.36218249797821045, "learning_rate": 1.947632239225081e-05, "loss": 0.496, "step": 9848 }, { "epoch": 0.20888210218235032, "grad_norm": 0.3523678779602051, "learning_rate": 1.9476215880697997e-05, "loss": 0.4865, "step": 9849 }, { "epoch": 0.20890331064028334, "grad_norm": 0.3369922935962677, "learning_rate": 1.9476109358605814e-05, "loss": 0.4528, "step": 9850 }, { "epoch": 0.20892451909821638, "grad_norm": 0.33088815212249756, "learning_rate": 1.9476002825974383e-05, "loss": 0.4871, "step": 9851 }, { "epoch": 0.2089457275561494, "grad_norm": 0.3160775899887085, "learning_rate": 1.9475896282803827e-05, "loss": 0.5061, "step": 9852 }, { "epoch": 0.20896693601408242, "grad_norm": 0.3375539779663086, "learning_rate": 1.9475789729094262e-05, "loss": 0.646, "step": 9853 }, { "epoch": 0.20898814447201544, "grad_norm": 0.3548169434070587, "learning_rate": 1.9475683164845806e-05, "loss": 0.5841, "step": 9854 }, { "epoch": 0.20900935292994846, "grad_norm": 0.453222393989563, "learning_rate": 1.947557659005858e-05, "loss": 0.5114, "step": 9855 }, { "epoch": 0.20903056138788148, "grad_norm": 0.3489002585411072, "learning_rate": 1.9475470004732696e-05, "loss": 0.4824, "step": 9856 }, { "epoch": 0.2090517698458145, "grad_norm": 0.3432440161705017, "learning_rate": 1.9475363408868278e-05, "loss": 0.5874, "step": 9857 }, { "epoch": 0.20907297830374755, "grad_norm": 0.3064439296722412, "learning_rate": 1.947525680246544e-05, "loss": 0.5133, "step": 9858 }, { "epoch": 0.20909418676168057, "grad_norm": 0.34850189089775085, "learning_rate": 1.9475150185524308e-05, "loss": 0.5309, "step": 9859 }, { "epoch": 0.2091153952196136, "grad_norm": 0.36485061049461365, "learning_rate": 1.9475043558044994e-05, "loss": 0.6325, "step": 9860 }, { "epoch": 0.2091366036775466, "grad_norm": 0.30884087085723877, "learning_rate": 1.947493692002762e-05, "loss": 0.4409, "step": 9861 }, { "epoch": 0.20915781213547963, "grad_norm": 0.7605577111244202, "learning_rate": 1.9474830271472302e-05, "loss": 0.5794, "step": 9862 }, { "epoch": 0.20917902059341265, "grad_norm": 0.31645309925079346, "learning_rate": 1.947472361237916e-05, "loss": 0.5314, "step": 9863 }, { "epoch": 0.20920022905134567, "grad_norm": 0.6312916278839111, "learning_rate": 1.9474616942748316e-05, "loss": 0.4388, "step": 9864 }, { "epoch": 0.2092214375092787, "grad_norm": 0.4478392004966736, "learning_rate": 1.9474510262579884e-05, "loss": 0.5419, "step": 9865 }, { "epoch": 0.20924264596721173, "grad_norm": 0.31054434180259705, "learning_rate": 1.9474403571873982e-05, "loss": 0.5883, "step": 9866 }, { "epoch": 0.20926385442514475, "grad_norm": 0.329178124666214, "learning_rate": 1.9474296870630733e-05, "loss": 0.536, "step": 9867 }, { "epoch": 0.20928506288307777, "grad_norm": 0.39000874757766724, "learning_rate": 1.947419015885025e-05, "loss": 0.5352, "step": 9868 }, { "epoch": 0.2093062713410108, "grad_norm": 0.34349656105041504, "learning_rate": 1.9474083436532657e-05, "loss": 0.4956, "step": 9869 }, { "epoch": 0.2093274797989438, "grad_norm": 0.32160329818725586, "learning_rate": 1.9473976703678074e-05, "loss": 0.4911, "step": 9870 }, { "epoch": 0.20934868825687683, "grad_norm": 0.4057396352291107, "learning_rate": 1.947386996028661e-05, "loss": 0.5559, "step": 9871 }, { "epoch": 0.20936989671480988, "grad_norm": 0.3546459972858429, "learning_rate": 1.9473763206358394e-05, "loss": 0.5447, "step": 9872 }, { "epoch": 0.2093911051727429, "grad_norm": 0.38193634152412415, "learning_rate": 1.947365644189354e-05, "loss": 0.5627, "step": 9873 }, { "epoch": 0.20941231363067592, "grad_norm": 0.3303707242012024, "learning_rate": 1.947354966689217e-05, "loss": 0.5347, "step": 9874 }, { "epoch": 0.20943352208860894, "grad_norm": 4.1844048500061035, "learning_rate": 1.9473442881354398e-05, "loss": 0.4659, "step": 9875 }, { "epoch": 0.20945473054654196, "grad_norm": 0.385114848613739, "learning_rate": 1.9473336085280345e-05, "loss": 0.5122, "step": 9876 }, { "epoch": 0.20947593900447498, "grad_norm": 0.41427725553512573, "learning_rate": 1.947322927867013e-05, "loss": 0.5015, "step": 9877 }, { "epoch": 0.209497147462408, "grad_norm": 0.36278316378593445, "learning_rate": 1.947312246152387e-05, "loss": 0.4975, "step": 9878 }, { "epoch": 0.20951835592034104, "grad_norm": 0.3435783088207245, "learning_rate": 1.947301563384169e-05, "loss": 0.5754, "step": 9879 }, { "epoch": 0.20953956437827406, "grad_norm": 0.337531179189682, "learning_rate": 1.94729087956237e-05, "loss": 0.551, "step": 9880 }, { "epoch": 0.20956077283620708, "grad_norm": 0.33904048800468445, "learning_rate": 1.9472801946870024e-05, "loss": 0.5115, "step": 9881 }, { "epoch": 0.2095819812941401, "grad_norm": 0.362155944108963, "learning_rate": 1.947269508758078e-05, "loss": 0.533, "step": 9882 }, { "epoch": 0.20960318975207312, "grad_norm": 0.3078882396221161, "learning_rate": 1.947258821775609e-05, "loss": 0.5104, "step": 9883 }, { "epoch": 0.20962439821000614, "grad_norm": 0.3163241446018219, "learning_rate": 1.9472481337396067e-05, "loss": 0.5111, "step": 9884 }, { "epoch": 0.20964560666793916, "grad_norm": 0.3369506895542145, "learning_rate": 1.9472374446500834e-05, "loss": 0.5874, "step": 9885 }, { "epoch": 0.2096668151258722, "grad_norm": 0.3574679493904114, "learning_rate": 1.9472267545070506e-05, "loss": 0.5225, "step": 9886 }, { "epoch": 0.20968802358380523, "grad_norm": 0.3044567108154297, "learning_rate": 1.9472160633105205e-05, "loss": 0.4426, "step": 9887 }, { "epoch": 0.20970923204173825, "grad_norm": 0.32140159606933594, "learning_rate": 1.947205371060505e-05, "loss": 0.5026, "step": 9888 }, { "epoch": 0.20973044049967127, "grad_norm": 0.3061457574367523, "learning_rate": 1.947194677757016e-05, "loss": 0.4978, "step": 9889 }, { "epoch": 0.2097516489576043, "grad_norm": 0.2911575138568878, "learning_rate": 1.947183983400065e-05, "loss": 0.4876, "step": 9890 }, { "epoch": 0.2097728574155373, "grad_norm": 0.28598496317863464, "learning_rate": 1.9471732879896646e-05, "loss": 0.448, "step": 9891 }, { "epoch": 0.20979406587347035, "grad_norm": 0.32301297783851624, "learning_rate": 1.947162591525826e-05, "loss": 0.5402, "step": 9892 }, { "epoch": 0.20981527433140337, "grad_norm": 0.3989724814891815, "learning_rate": 1.9471518940085613e-05, "loss": 0.5392, "step": 9893 }, { "epoch": 0.2098364827893364, "grad_norm": 0.3395189940929413, "learning_rate": 1.9471411954378827e-05, "loss": 0.6006, "step": 9894 }, { "epoch": 0.2098576912472694, "grad_norm": 0.4686235785484314, "learning_rate": 1.947130495813802e-05, "loss": 0.5137, "step": 9895 }, { "epoch": 0.20987889970520243, "grad_norm": 0.3722090721130371, "learning_rate": 1.947119795136331e-05, "loss": 0.5974, "step": 9896 }, { "epoch": 0.20990010816313545, "grad_norm": 0.3102366626262665, "learning_rate": 1.9471090934054815e-05, "loss": 0.4543, "step": 9897 }, { "epoch": 0.20992131662106847, "grad_norm": 0.3697376251220703, "learning_rate": 1.9470983906212653e-05, "loss": 0.4516, "step": 9898 }, { "epoch": 0.20994252507900152, "grad_norm": 0.3330320417881012, "learning_rate": 1.947087686783695e-05, "loss": 0.529, "step": 9899 }, { "epoch": 0.20996373353693454, "grad_norm": 0.3710039258003235, "learning_rate": 1.9470769818927817e-05, "loss": 0.4725, "step": 9900 }, { "epoch": 0.20998494199486756, "grad_norm": 0.3223016858100891, "learning_rate": 1.947066275948537e-05, "loss": 0.5121, "step": 9901 }, { "epoch": 0.21000615045280058, "grad_norm": 0.3100949823856354, "learning_rate": 1.9470555689509742e-05, "loss": 0.4704, "step": 9902 }, { "epoch": 0.2100273589107336, "grad_norm": 0.34003758430480957, "learning_rate": 1.9470448609001043e-05, "loss": 0.4883, "step": 9903 }, { "epoch": 0.21004856736866662, "grad_norm": 0.4357720613479614, "learning_rate": 1.947034151795939e-05, "loss": 0.4792, "step": 9904 }, { "epoch": 0.21006977582659964, "grad_norm": 0.3814459443092346, "learning_rate": 1.947023441638491e-05, "loss": 0.5391, "step": 9905 }, { "epoch": 0.21009098428453268, "grad_norm": 0.3327101171016693, "learning_rate": 1.9470127304277713e-05, "loss": 0.5845, "step": 9906 }, { "epoch": 0.2101121927424657, "grad_norm": 0.3642885088920593, "learning_rate": 1.9470020181637928e-05, "loss": 0.4888, "step": 9907 }, { "epoch": 0.21013340120039872, "grad_norm": 1.907538652420044, "learning_rate": 1.9469913048465664e-05, "loss": 0.5248, "step": 9908 }, { "epoch": 0.21015460965833174, "grad_norm": 0.3315471410751343, "learning_rate": 1.9469805904761046e-05, "loss": 0.4959, "step": 9909 }, { "epoch": 0.21017581811626476, "grad_norm": 0.34916579723358154, "learning_rate": 1.9469698750524194e-05, "loss": 0.4958, "step": 9910 }, { "epoch": 0.21019702657419778, "grad_norm": 0.3118217885494232, "learning_rate": 1.9469591585755222e-05, "loss": 0.5013, "step": 9911 }, { "epoch": 0.2102182350321308, "grad_norm": 0.37302979826927185, "learning_rate": 1.9469484410454257e-05, "loss": 0.5288, "step": 9912 }, { "epoch": 0.21023944349006385, "grad_norm": 0.3802984058856964, "learning_rate": 1.9469377224621412e-05, "loss": 0.5323, "step": 9913 }, { "epoch": 0.21026065194799687, "grad_norm": 0.3953332006931305, "learning_rate": 1.9469270028256807e-05, "loss": 0.5198, "step": 9914 }, { "epoch": 0.2102818604059299, "grad_norm": 0.3764856457710266, "learning_rate": 1.9469162821360564e-05, "loss": 0.5863, "step": 9915 }, { "epoch": 0.2103030688638629, "grad_norm": 0.38438212871551514, "learning_rate": 1.9469055603932797e-05, "loss": 0.505, "step": 9916 }, { "epoch": 0.21032427732179593, "grad_norm": 0.788210391998291, "learning_rate": 1.946894837597363e-05, "loss": 0.5004, "step": 9917 }, { "epoch": 0.21034548577972895, "grad_norm": 0.337276428937912, "learning_rate": 1.9468841137483183e-05, "loss": 0.5625, "step": 9918 }, { "epoch": 0.21036669423766197, "grad_norm": 0.39534369111061096, "learning_rate": 1.946873388846157e-05, "loss": 0.5397, "step": 9919 }, { "epoch": 0.210387902695595, "grad_norm": 0.3639046549797058, "learning_rate": 1.9468626628908915e-05, "loss": 0.4871, "step": 9920 }, { "epoch": 0.21040911115352803, "grad_norm": 0.34591060876846313, "learning_rate": 1.9468519358825336e-05, "loss": 0.5464, "step": 9921 }, { "epoch": 0.21043031961146105, "grad_norm": 0.31894081830978394, "learning_rate": 1.946841207821095e-05, "loss": 0.5509, "step": 9922 }, { "epoch": 0.21045152806939407, "grad_norm": 0.3636469542980194, "learning_rate": 1.946830478706588e-05, "loss": 0.5485, "step": 9923 }, { "epoch": 0.2104727365273271, "grad_norm": 0.3349165916442871, "learning_rate": 1.9468197485390244e-05, "loss": 0.5258, "step": 9924 }, { "epoch": 0.2104939449852601, "grad_norm": 0.3251776695251465, "learning_rate": 1.9468090173184163e-05, "loss": 0.4898, "step": 9925 }, { "epoch": 0.21051515344319316, "grad_norm": 0.425371378660202, "learning_rate": 1.946798285044775e-05, "loss": 0.5712, "step": 9926 }, { "epoch": 0.21053636190112618, "grad_norm": 0.38584792613983154, "learning_rate": 1.946787551718113e-05, "loss": 0.4904, "step": 9927 }, { "epoch": 0.2105575703590592, "grad_norm": 0.2851002514362335, "learning_rate": 1.946776817338442e-05, "loss": 0.4111, "step": 9928 }, { "epoch": 0.21057877881699222, "grad_norm": 0.37340763211250305, "learning_rate": 1.9467660819057747e-05, "loss": 0.459, "step": 9929 }, { "epoch": 0.21059998727492524, "grad_norm": 0.35238057374954224, "learning_rate": 1.946755345420122e-05, "loss": 0.5322, "step": 9930 }, { "epoch": 0.21062119573285826, "grad_norm": 0.34914883971214294, "learning_rate": 1.9467446078814957e-05, "loss": 0.5043, "step": 9931 }, { "epoch": 0.21064240419079128, "grad_norm": 0.3858893811702728, "learning_rate": 1.946733869289909e-05, "loss": 0.5646, "step": 9932 }, { "epoch": 0.21066361264872432, "grad_norm": 0.35859569907188416, "learning_rate": 1.946723129645373e-05, "loss": 0.5274, "step": 9933 }, { "epoch": 0.21068482110665734, "grad_norm": 0.33980563282966614, "learning_rate": 1.9467123889478993e-05, "loss": 0.4973, "step": 9934 }, { "epoch": 0.21070602956459036, "grad_norm": 0.33287858963012695, "learning_rate": 1.9467016471975007e-05, "loss": 0.4795, "step": 9935 }, { "epoch": 0.21072723802252338, "grad_norm": 0.3502678871154785, "learning_rate": 1.9466909043941887e-05, "loss": 0.5271, "step": 9936 }, { "epoch": 0.2107484464804564, "grad_norm": 0.33855724334716797, "learning_rate": 1.9466801605379754e-05, "loss": 0.4817, "step": 9937 }, { "epoch": 0.21076965493838942, "grad_norm": 0.29149582982063293, "learning_rate": 1.9466694156288724e-05, "loss": 0.459, "step": 9938 }, { "epoch": 0.21079086339632244, "grad_norm": 0.3340209126472473, "learning_rate": 1.9466586696668918e-05, "loss": 0.5098, "step": 9939 }, { "epoch": 0.2108120718542555, "grad_norm": 0.3232889175415039, "learning_rate": 1.946647922652046e-05, "loss": 0.487, "step": 9940 }, { "epoch": 0.2108332803121885, "grad_norm": 0.32741817831993103, "learning_rate": 1.9466371745843465e-05, "loss": 0.4995, "step": 9941 }, { "epoch": 0.21085448877012153, "grad_norm": 0.34368523955345154, "learning_rate": 1.9466264254638052e-05, "loss": 0.5264, "step": 9942 }, { "epoch": 0.21087569722805455, "grad_norm": 0.3319767415523529, "learning_rate": 1.9466156752904344e-05, "loss": 0.4788, "step": 9943 }, { "epoch": 0.21089690568598757, "grad_norm": 0.3051202595233917, "learning_rate": 1.9466049240642457e-05, "loss": 0.4384, "step": 9944 }, { "epoch": 0.2109181141439206, "grad_norm": 0.3856049180030823, "learning_rate": 1.9465941717852513e-05, "loss": 0.5383, "step": 9945 }, { "epoch": 0.2109393226018536, "grad_norm": 0.5228453278541565, "learning_rate": 1.946583418453463e-05, "loss": 0.5823, "step": 9946 }, { "epoch": 0.21096053105978665, "grad_norm": 0.3449718952178955, "learning_rate": 1.946572664068893e-05, "loss": 0.5229, "step": 9947 }, { "epoch": 0.21098173951771967, "grad_norm": 0.6712345480918884, "learning_rate": 1.946561908631553e-05, "loss": 0.5146, "step": 9948 }, { "epoch": 0.2110029479756527, "grad_norm": 15.537702560424805, "learning_rate": 1.946551152141455e-05, "loss": 0.5658, "step": 9949 }, { "epoch": 0.2110241564335857, "grad_norm": 0.37445998191833496, "learning_rate": 1.9465403945986114e-05, "loss": 0.5949, "step": 9950 }, { "epoch": 0.21104536489151873, "grad_norm": 0.32991907000541687, "learning_rate": 1.9465296360030335e-05, "loss": 0.573, "step": 9951 }, { "epoch": 0.21106657334945175, "grad_norm": 0.3463308811187744, "learning_rate": 1.9465188763547335e-05, "loss": 0.5869, "step": 9952 }, { "epoch": 0.21108778180738477, "grad_norm": 0.38223639130592346, "learning_rate": 1.9465081156537237e-05, "loss": 0.6027, "step": 9953 }, { "epoch": 0.21110899026531782, "grad_norm": 0.3302719295024872, "learning_rate": 1.9464973539000157e-05, "loss": 0.5692, "step": 9954 }, { "epoch": 0.21113019872325084, "grad_norm": 0.33970779180526733, "learning_rate": 1.9464865910936214e-05, "loss": 0.5343, "step": 9955 }, { "epoch": 0.21115140718118386, "grad_norm": 0.37234851717948914, "learning_rate": 1.9464758272345533e-05, "loss": 0.6604, "step": 9956 }, { "epoch": 0.21117261563911688, "grad_norm": 0.32192108035087585, "learning_rate": 1.9464650623228225e-05, "loss": 0.4604, "step": 9957 }, { "epoch": 0.2111938240970499, "grad_norm": 0.35365745425224304, "learning_rate": 1.9464542963584418e-05, "loss": 0.5213, "step": 9958 }, { "epoch": 0.21121503255498292, "grad_norm": 0.3274790942668915, "learning_rate": 1.9464435293414227e-05, "loss": 0.5145, "step": 9959 }, { "epoch": 0.21123624101291594, "grad_norm": 0.342416375875473, "learning_rate": 1.9464327612717776e-05, "loss": 0.5159, "step": 9960 }, { "epoch": 0.21125744947084898, "grad_norm": 0.32414671778678894, "learning_rate": 1.946421992149518e-05, "loss": 0.4607, "step": 9961 }, { "epoch": 0.211278657928782, "grad_norm": 0.33947858214378357, "learning_rate": 1.946411221974656e-05, "loss": 0.4857, "step": 9962 }, { "epoch": 0.21129986638671502, "grad_norm": 0.3646588921546936, "learning_rate": 1.9464004507472038e-05, "loss": 0.5447, "step": 9963 }, { "epoch": 0.21132107484464804, "grad_norm": 0.3618296682834625, "learning_rate": 1.9463896784671733e-05, "loss": 0.5195, "step": 9964 }, { "epoch": 0.21134228330258106, "grad_norm": 0.32138070464134216, "learning_rate": 1.9463789051345763e-05, "loss": 0.4519, "step": 9965 }, { "epoch": 0.21136349176051408, "grad_norm": 0.3596726059913635, "learning_rate": 1.946368130749425e-05, "loss": 0.5374, "step": 9966 }, { "epoch": 0.21138470021844713, "grad_norm": 0.3213684558868408, "learning_rate": 1.9463573553117313e-05, "loss": 0.4497, "step": 9967 }, { "epoch": 0.21140590867638015, "grad_norm": 0.338617205619812, "learning_rate": 1.946346578821507e-05, "loss": 0.5596, "step": 9968 }, { "epoch": 0.21142711713431317, "grad_norm": 0.3373337686061859, "learning_rate": 1.9463358012787642e-05, "loss": 0.6002, "step": 9969 }, { "epoch": 0.2114483255922462, "grad_norm": 0.3679533004760742, "learning_rate": 1.946325022683515e-05, "loss": 0.4663, "step": 9970 }, { "epoch": 0.2114695340501792, "grad_norm": 0.40053582191467285, "learning_rate": 1.9463142430357713e-05, "loss": 0.5142, "step": 9971 }, { "epoch": 0.21149074250811223, "grad_norm": 0.31279319524765015, "learning_rate": 1.9463034623355455e-05, "loss": 0.468, "step": 9972 }, { "epoch": 0.21151195096604525, "grad_norm": 0.34092605113983154, "learning_rate": 1.9462926805828488e-05, "loss": 0.495, "step": 9973 }, { "epoch": 0.2115331594239783, "grad_norm": 0.3634423613548279, "learning_rate": 1.9462818977776937e-05, "loss": 0.5355, "step": 9974 }, { "epoch": 0.21155436788191131, "grad_norm": 0.3126707971096039, "learning_rate": 1.9462711139200923e-05, "loss": 0.416, "step": 9975 }, { "epoch": 0.21157557633984433, "grad_norm": 0.34010010957717896, "learning_rate": 1.9462603290100562e-05, "loss": 0.561, "step": 9976 }, { "epoch": 0.21159678479777735, "grad_norm": 0.37059593200683594, "learning_rate": 1.9462495430475976e-05, "loss": 0.5118, "step": 9977 }, { "epoch": 0.21161799325571037, "grad_norm": 0.3547059893608093, "learning_rate": 1.9462387560327283e-05, "loss": 0.5083, "step": 9978 }, { "epoch": 0.2116392017136434, "grad_norm": 0.3209668695926666, "learning_rate": 1.9462279679654606e-05, "loss": 0.5339, "step": 9979 }, { "epoch": 0.2116604101715764, "grad_norm": 0.3397533893585205, "learning_rate": 1.9462171788458066e-05, "loss": 0.5875, "step": 9980 }, { "epoch": 0.21168161862950946, "grad_norm": 0.3569956421852112, "learning_rate": 1.946206388673778e-05, "loss": 0.5577, "step": 9981 }, { "epoch": 0.21170282708744248, "grad_norm": 0.3179718554019928, "learning_rate": 1.9461955974493867e-05, "loss": 0.5084, "step": 9982 }, { "epoch": 0.2117240355453755, "grad_norm": 0.4007760286331177, "learning_rate": 1.9461848051726447e-05, "loss": 0.5206, "step": 9983 }, { "epoch": 0.21174524400330852, "grad_norm": 0.40049058198928833, "learning_rate": 1.9461740118435646e-05, "loss": 0.5443, "step": 9984 }, { "epoch": 0.21176645246124154, "grad_norm": 0.2928086817264557, "learning_rate": 1.946163217462158e-05, "loss": 0.5053, "step": 9985 }, { "epoch": 0.21178766091917456, "grad_norm": 0.34127435088157654, "learning_rate": 1.9461524220284364e-05, "loss": 0.5711, "step": 9986 }, { "epoch": 0.21180886937710758, "grad_norm": 0.4015769064426422, "learning_rate": 1.9461416255424127e-05, "loss": 0.5707, "step": 9987 }, { "epoch": 0.21183007783504063, "grad_norm": 0.33723747730255127, "learning_rate": 1.9461308280040986e-05, "loss": 0.5745, "step": 9988 }, { "epoch": 0.21185128629297365, "grad_norm": 0.5190816521644592, "learning_rate": 1.9461200294135057e-05, "loss": 0.5166, "step": 9989 }, { "epoch": 0.21187249475090666, "grad_norm": 0.33902931213378906, "learning_rate": 1.9461092297706464e-05, "loss": 0.58, "step": 9990 }, { "epoch": 0.21189370320883968, "grad_norm": 0.3774922490119934, "learning_rate": 1.9460984290755326e-05, "loss": 0.5651, "step": 9991 }, { "epoch": 0.2119149116667727, "grad_norm": 0.558348536491394, "learning_rate": 1.9460876273281766e-05, "loss": 0.5352, "step": 9992 }, { "epoch": 0.21193612012470572, "grad_norm": 0.32228776812553406, "learning_rate": 1.94607682452859e-05, "loss": 0.4558, "step": 9993 }, { "epoch": 0.21195732858263874, "grad_norm": 0.33424049615859985, "learning_rate": 1.946066020676785e-05, "loss": 0.5227, "step": 9994 }, { "epoch": 0.2119785370405718, "grad_norm": 0.32843536138534546, "learning_rate": 1.9460552157727733e-05, "loss": 0.4243, "step": 9995 }, { "epoch": 0.2119997454985048, "grad_norm": 0.3123900890350342, "learning_rate": 1.9460444098165675e-05, "loss": 0.4398, "step": 9996 }, { "epoch": 0.21202095395643783, "grad_norm": 0.3013211190700531, "learning_rate": 1.9460336028081794e-05, "loss": 0.4876, "step": 9997 }, { "epoch": 0.21204216241437085, "grad_norm": 0.31335148215293884, "learning_rate": 1.946022794747621e-05, "loss": 0.4473, "step": 9998 }, { "epoch": 0.21206337087230387, "grad_norm": 0.3459216356277466, "learning_rate": 1.946011985634904e-05, "loss": 0.5353, "step": 9999 }, { "epoch": 0.2120845793302369, "grad_norm": 0.34628185629844666, "learning_rate": 1.9460011754700407e-05, "loss": 0.4355, "step": 10000 }, { "epoch": 0.2121057877881699, "grad_norm": 0.3185775578022003, "learning_rate": 1.945990364253043e-05, "loss": 0.4734, "step": 10001 }, { "epoch": 0.21212699624610296, "grad_norm": 0.31767332553863525, "learning_rate": 1.9459795519839237e-05, "loss": 0.4893, "step": 10002 }, { "epoch": 0.21214820470403598, "grad_norm": 0.3027952313423157, "learning_rate": 1.9459687386626932e-05, "loss": 0.4328, "step": 10003 }, { "epoch": 0.212169413161969, "grad_norm": 0.34773024916648865, "learning_rate": 1.9459579242893652e-05, "loss": 0.5218, "step": 10004 }, { "epoch": 0.21219062161990201, "grad_norm": 0.3809153139591217, "learning_rate": 1.9459471088639507e-05, "loss": 0.5647, "step": 10005 }, { "epoch": 0.21221183007783503, "grad_norm": 0.3866242468357086, "learning_rate": 1.945936292386462e-05, "loss": 0.5057, "step": 10006 }, { "epoch": 0.21223303853576805, "grad_norm": 0.3714812994003296, "learning_rate": 1.9459254748569114e-05, "loss": 0.5944, "step": 10007 }, { "epoch": 0.2122542469937011, "grad_norm": 0.3832508325576782, "learning_rate": 1.9459146562753105e-05, "loss": 0.6001, "step": 10008 }, { "epoch": 0.21227545545163412, "grad_norm": 0.34299352765083313, "learning_rate": 1.9459038366416717e-05, "loss": 0.5284, "step": 10009 }, { "epoch": 0.21229666390956714, "grad_norm": 0.31567394733428955, "learning_rate": 1.945893015956007e-05, "loss": 0.5135, "step": 10010 }, { "epoch": 0.21231787236750016, "grad_norm": 1.6541125774383545, "learning_rate": 1.945882194218328e-05, "loss": 0.5372, "step": 10011 }, { "epoch": 0.21233908082543318, "grad_norm": 0.33167392015457153, "learning_rate": 1.9458713714286472e-05, "loss": 0.5297, "step": 10012 }, { "epoch": 0.2123602892833662, "grad_norm": 0.34786853194236755, "learning_rate": 1.9458605475869765e-05, "loss": 0.5791, "step": 10013 }, { "epoch": 0.21238149774129922, "grad_norm": 0.3650001287460327, "learning_rate": 1.9458497226933278e-05, "loss": 0.5436, "step": 10014 }, { "epoch": 0.21240270619923227, "grad_norm": 0.34911566972732544, "learning_rate": 1.9458388967477133e-05, "loss": 0.5544, "step": 10015 }, { "epoch": 0.21242391465716529, "grad_norm": 0.3458707332611084, "learning_rate": 1.945828069750145e-05, "loss": 0.5815, "step": 10016 }, { "epoch": 0.2124451231150983, "grad_norm": 0.4176730811595917, "learning_rate": 1.9458172417006347e-05, "loss": 0.5762, "step": 10017 }, { "epoch": 0.21246633157303133, "grad_norm": 0.34517937898635864, "learning_rate": 1.945806412599195e-05, "loss": 0.524, "step": 10018 }, { "epoch": 0.21248754003096434, "grad_norm": 0.3363981544971466, "learning_rate": 1.9457955824458373e-05, "loss": 0.5864, "step": 10019 }, { "epoch": 0.21250874848889736, "grad_norm": 0.342532753944397, "learning_rate": 1.9457847512405745e-05, "loss": 0.5579, "step": 10020 }, { "epoch": 0.21252995694683038, "grad_norm": 0.35505300760269165, "learning_rate": 1.945773918983418e-05, "loss": 0.3995, "step": 10021 }, { "epoch": 0.21255116540476343, "grad_norm": 0.3402644991874695, "learning_rate": 1.9457630856743797e-05, "loss": 0.5266, "step": 10022 }, { "epoch": 0.21257237386269645, "grad_norm": 0.38319793343544006, "learning_rate": 1.945752251313472e-05, "loss": 0.5122, "step": 10023 }, { "epoch": 0.21259358232062947, "grad_norm": 0.40885838866233826, "learning_rate": 1.9457414159007067e-05, "loss": 0.4588, "step": 10024 }, { "epoch": 0.2126147907785625, "grad_norm": 0.35361650586128235, "learning_rate": 1.9457305794360964e-05, "loss": 0.5974, "step": 10025 }, { "epoch": 0.2126359992364955, "grad_norm": 0.31941455602645874, "learning_rate": 1.9457197419196527e-05, "loss": 0.5609, "step": 10026 }, { "epoch": 0.21265720769442853, "grad_norm": 0.3498286306858063, "learning_rate": 1.9457089033513875e-05, "loss": 0.5458, "step": 10027 }, { "epoch": 0.21267841615236155, "grad_norm": 0.3563653230667114, "learning_rate": 1.945698063731313e-05, "loss": 0.5204, "step": 10028 }, { "epoch": 0.2126996246102946, "grad_norm": 0.3305646479129791, "learning_rate": 1.9456872230594413e-05, "loss": 0.6105, "step": 10029 }, { "epoch": 0.21272083306822762, "grad_norm": 0.29885774850845337, "learning_rate": 1.9456763813357847e-05, "loss": 0.4243, "step": 10030 }, { "epoch": 0.21274204152616064, "grad_norm": 0.40285271406173706, "learning_rate": 1.945665538560355e-05, "loss": 0.5057, "step": 10031 }, { "epoch": 0.21276324998409366, "grad_norm": 0.3497054874897003, "learning_rate": 1.945654694733164e-05, "loss": 0.4763, "step": 10032 }, { "epoch": 0.21278445844202667, "grad_norm": 0.30777299404144287, "learning_rate": 1.9456438498542244e-05, "loss": 0.4045, "step": 10033 }, { "epoch": 0.2128056668999597, "grad_norm": 0.37256231904029846, "learning_rate": 1.9456330039235482e-05, "loss": 0.5334, "step": 10034 }, { "epoch": 0.21282687535789271, "grad_norm": 0.39093878865242004, "learning_rate": 1.9456221569411467e-05, "loss": 0.5358, "step": 10035 }, { "epoch": 0.21284808381582576, "grad_norm": 0.36404260993003845, "learning_rate": 1.9456113089070326e-05, "loss": 0.469, "step": 10036 }, { "epoch": 0.21286929227375878, "grad_norm": 0.32751351594924927, "learning_rate": 1.945600459821218e-05, "loss": 0.4159, "step": 10037 }, { "epoch": 0.2128905007316918, "grad_norm": 0.34716475009918213, "learning_rate": 1.9455896096837147e-05, "loss": 0.4954, "step": 10038 }, { "epoch": 0.21291170918962482, "grad_norm": 0.33776405453681946, "learning_rate": 1.9455787584945346e-05, "loss": 0.5884, "step": 10039 }, { "epoch": 0.21293291764755784, "grad_norm": 0.3297989070415497, "learning_rate": 1.94556790625369e-05, "loss": 0.4751, "step": 10040 }, { "epoch": 0.21295412610549086, "grad_norm": 0.3294879198074341, "learning_rate": 1.9455570529611933e-05, "loss": 0.5531, "step": 10041 }, { "epoch": 0.2129753345634239, "grad_norm": 0.31157100200653076, "learning_rate": 1.9455461986170564e-05, "loss": 0.5534, "step": 10042 }, { "epoch": 0.21299654302135693, "grad_norm": 0.4704820513725281, "learning_rate": 1.9455353432212908e-05, "loss": 0.507, "step": 10043 }, { "epoch": 0.21301775147928995, "grad_norm": 0.3243016302585602, "learning_rate": 1.945524486773909e-05, "loss": 0.4729, "step": 10044 }, { "epoch": 0.21303895993722297, "grad_norm": 0.345162034034729, "learning_rate": 1.9455136292749235e-05, "loss": 0.4819, "step": 10045 }, { "epoch": 0.21306016839515599, "grad_norm": 0.30652278661727905, "learning_rate": 1.9455027707243457e-05, "loss": 0.487, "step": 10046 }, { "epoch": 0.213081376853089, "grad_norm": 0.31171655654907227, "learning_rate": 1.9454919111221878e-05, "loss": 0.4737, "step": 10047 }, { "epoch": 0.21310258531102202, "grad_norm": 0.34305575489997864, "learning_rate": 1.945481050468462e-05, "loss": 0.5035, "step": 10048 }, { "epoch": 0.21312379376895507, "grad_norm": 0.33369138836860657, "learning_rate": 1.9454701887631807e-05, "loss": 0.5982, "step": 10049 }, { "epoch": 0.2131450022268881, "grad_norm": 0.3040520250797272, "learning_rate": 1.9454593260063556e-05, "loss": 0.4645, "step": 10050 }, { "epoch": 0.2131662106848211, "grad_norm": 0.3348599374294281, "learning_rate": 1.9454484621979987e-05, "loss": 0.5133, "step": 10051 }, { "epoch": 0.21318741914275413, "grad_norm": 0.35981589555740356, "learning_rate": 1.9454375973381223e-05, "loss": 0.5028, "step": 10052 }, { "epoch": 0.21320862760068715, "grad_norm": 0.33297693729400635, "learning_rate": 1.9454267314267385e-05, "loss": 0.4658, "step": 10053 }, { "epoch": 0.21322983605862017, "grad_norm": 0.3139282166957855, "learning_rate": 1.945415864463859e-05, "loss": 0.5428, "step": 10054 }, { "epoch": 0.2132510445165532, "grad_norm": 0.36750540137290955, "learning_rate": 1.9454049964494968e-05, "loss": 0.548, "step": 10055 }, { "epoch": 0.21327225297448624, "grad_norm": 0.3287835121154785, "learning_rate": 1.9453941273836627e-05, "loss": 0.5237, "step": 10056 }, { "epoch": 0.21329346143241926, "grad_norm": 0.36291953921318054, "learning_rate": 1.94538325726637e-05, "loss": 0.5189, "step": 10057 }, { "epoch": 0.21331466989035228, "grad_norm": 0.3851301372051239, "learning_rate": 1.9453723860976298e-05, "loss": 0.4633, "step": 10058 }, { "epoch": 0.2133358783482853, "grad_norm": 0.34230515360832214, "learning_rate": 1.9453615138774548e-05, "loss": 0.4473, "step": 10059 }, { "epoch": 0.21335708680621832, "grad_norm": 0.35608041286468506, "learning_rate": 1.945350640605857e-05, "loss": 0.4899, "step": 10060 }, { "epoch": 0.21337829526415134, "grad_norm": 0.3139006495475769, "learning_rate": 1.9453397662828486e-05, "loss": 0.4936, "step": 10061 }, { "epoch": 0.21339950372208435, "grad_norm": 0.3511347472667694, "learning_rate": 1.9453288909084412e-05, "loss": 0.5381, "step": 10062 }, { "epoch": 0.2134207121800174, "grad_norm": 0.3458736538887024, "learning_rate": 1.9453180144826473e-05, "loss": 0.6246, "step": 10063 }, { "epoch": 0.21344192063795042, "grad_norm": 0.3439004719257355, "learning_rate": 1.945307137005479e-05, "loss": 0.4635, "step": 10064 }, { "epoch": 0.21346312909588344, "grad_norm": 0.3305276036262512, "learning_rate": 1.9452962584769484e-05, "loss": 0.4817, "step": 10065 }, { "epoch": 0.21348433755381646, "grad_norm": 0.35635408759117126, "learning_rate": 1.9452853788970675e-05, "loss": 0.5834, "step": 10066 }, { "epoch": 0.21350554601174948, "grad_norm": 0.31465694308280945, "learning_rate": 1.9452744982658482e-05, "loss": 0.5283, "step": 10067 }, { "epoch": 0.2135267544696825, "grad_norm": 0.36885663866996765, "learning_rate": 1.945263616583303e-05, "loss": 0.5699, "step": 10068 }, { "epoch": 0.21354796292761552, "grad_norm": 0.28675705194473267, "learning_rate": 1.9452527338494435e-05, "loss": 0.4269, "step": 10069 }, { "epoch": 0.21356917138554857, "grad_norm": 0.29971638321876526, "learning_rate": 1.9452418500642824e-05, "loss": 0.5339, "step": 10070 }, { "epoch": 0.2135903798434816, "grad_norm": 0.33178848028182983, "learning_rate": 1.9452309652278315e-05, "loss": 0.5111, "step": 10071 }, { "epoch": 0.2136115883014146, "grad_norm": 0.31927523016929626, "learning_rate": 1.945220079340103e-05, "loss": 0.5407, "step": 10072 }, { "epoch": 0.21363279675934763, "grad_norm": 0.3292485177516937, "learning_rate": 1.9452091924011086e-05, "loss": 0.5824, "step": 10073 }, { "epoch": 0.21365400521728065, "grad_norm": 0.3692556917667389, "learning_rate": 1.945198304410861e-05, "loss": 0.4791, "step": 10074 }, { "epoch": 0.21367521367521367, "grad_norm": 0.3109048306941986, "learning_rate": 1.9451874153693723e-05, "loss": 0.5352, "step": 10075 }, { "epoch": 0.21369642213314669, "grad_norm": 0.42555782198905945, "learning_rate": 1.945176525276654e-05, "loss": 0.4963, "step": 10076 }, { "epoch": 0.21371763059107973, "grad_norm": 0.3672058582305908, "learning_rate": 1.9451656341327188e-05, "loss": 0.5112, "step": 10077 }, { "epoch": 0.21373883904901275, "grad_norm": 0.3242381811141968, "learning_rate": 1.9451547419375787e-05, "loss": 0.5068, "step": 10078 }, { "epoch": 0.21376004750694577, "grad_norm": 0.31374019384384155, "learning_rate": 1.9451438486912453e-05, "loss": 0.4959, "step": 10079 }, { "epoch": 0.2137812559648788, "grad_norm": 0.34414902329444885, "learning_rate": 1.9451329543937315e-05, "loss": 0.5464, "step": 10080 }, { "epoch": 0.2138024644228118, "grad_norm": 0.28725290298461914, "learning_rate": 1.9451220590450488e-05, "loss": 0.3575, "step": 10081 }, { "epoch": 0.21382367288074483, "grad_norm": 0.3409334123134613, "learning_rate": 1.9451111626452098e-05, "loss": 0.4447, "step": 10082 }, { "epoch": 0.21384488133867788, "grad_norm": 0.37120407819747925, "learning_rate": 1.9451002651942258e-05, "loss": 0.6115, "step": 10083 }, { "epoch": 0.2138660897966109, "grad_norm": 0.3252631425857544, "learning_rate": 1.94508936669211e-05, "loss": 0.3991, "step": 10084 }, { "epoch": 0.21388729825454392, "grad_norm": 0.2938717007637024, "learning_rate": 1.945078467138874e-05, "loss": 0.4196, "step": 10085 }, { "epoch": 0.21390850671247694, "grad_norm": 0.36941179633140564, "learning_rate": 1.94506756653453e-05, "loss": 0.5939, "step": 10086 }, { "epoch": 0.21392971517040996, "grad_norm": 0.3592841625213623, "learning_rate": 1.94505666487909e-05, "loss": 0.5643, "step": 10087 }, { "epoch": 0.21395092362834298, "grad_norm": 0.34290069341659546, "learning_rate": 1.945045762172566e-05, "loss": 0.4851, "step": 10088 }, { "epoch": 0.213972132086276, "grad_norm": 0.35971537232398987, "learning_rate": 1.9450348584149703e-05, "loss": 0.6148, "step": 10089 }, { "epoch": 0.21399334054420904, "grad_norm": 0.3215760886669159, "learning_rate": 1.945023953606315e-05, "loss": 0.5221, "step": 10090 }, { "epoch": 0.21401454900214206, "grad_norm": 0.33027124404907227, "learning_rate": 1.9450130477466124e-05, "loss": 0.5368, "step": 10091 }, { "epoch": 0.21403575746007508, "grad_norm": 0.3865007162094116, "learning_rate": 1.9450021408358747e-05, "loss": 0.6045, "step": 10092 }, { "epoch": 0.2140569659180081, "grad_norm": 0.34861457347869873, "learning_rate": 1.9449912328741135e-05, "loss": 0.4193, "step": 10093 }, { "epoch": 0.21407817437594112, "grad_norm": 0.48212143778800964, "learning_rate": 1.9449803238613416e-05, "loss": 0.4423, "step": 10094 }, { "epoch": 0.21409938283387414, "grad_norm": 0.3468214273452759, "learning_rate": 1.9449694137975705e-05, "loss": 0.5264, "step": 10095 }, { "epoch": 0.21412059129180716, "grad_norm": 0.3784550130367279, "learning_rate": 1.9449585026828125e-05, "loss": 0.4915, "step": 10096 }, { "epoch": 0.2141417997497402, "grad_norm": 0.3160358965396881, "learning_rate": 1.9449475905170804e-05, "loss": 0.4916, "step": 10097 }, { "epoch": 0.21416300820767323, "grad_norm": 0.39201247692108154, "learning_rate": 1.9449366773003854e-05, "loss": 0.5288, "step": 10098 }, { "epoch": 0.21418421666560625, "grad_norm": 0.3257567286491394, "learning_rate": 1.9449257630327404e-05, "loss": 0.5626, "step": 10099 }, { "epoch": 0.21420542512353927, "grad_norm": 0.2978845536708832, "learning_rate": 1.9449148477141565e-05, "loss": 0.5141, "step": 10100 }, { "epoch": 0.2142266335814723, "grad_norm": 0.3555595278739929, "learning_rate": 1.944903931344647e-05, "loss": 0.5605, "step": 10101 }, { "epoch": 0.2142478420394053, "grad_norm": 0.44136297702789307, "learning_rate": 1.9448930139242237e-05, "loss": 0.6118, "step": 10102 }, { "epoch": 0.21426905049733833, "grad_norm": 0.3325583338737488, "learning_rate": 1.9448820954528984e-05, "loss": 0.4756, "step": 10103 }, { "epoch": 0.21429025895527137, "grad_norm": 0.35207822918891907, "learning_rate": 1.944871175930683e-05, "loss": 0.5548, "step": 10104 }, { "epoch": 0.2143114674132044, "grad_norm": 0.34137603640556335, "learning_rate": 1.9448602553575907e-05, "loss": 0.5503, "step": 10105 }, { "epoch": 0.2143326758711374, "grad_norm": 0.40132883191108704, "learning_rate": 1.944849333733633e-05, "loss": 0.5321, "step": 10106 }, { "epoch": 0.21435388432907043, "grad_norm": 0.2958172857761383, "learning_rate": 1.9448384110588216e-05, "loss": 0.5097, "step": 10107 }, { "epoch": 0.21437509278700345, "grad_norm": 0.38155892491340637, "learning_rate": 1.9448274873331698e-05, "loss": 0.4912, "step": 10108 }, { "epoch": 0.21439630124493647, "grad_norm": 0.3781687915325165, "learning_rate": 1.9448165625566885e-05, "loss": 0.4258, "step": 10109 }, { "epoch": 0.2144175097028695, "grad_norm": 0.3447780907154083, "learning_rate": 1.9448056367293907e-05, "loss": 0.624, "step": 10110 }, { "epoch": 0.21443871816080254, "grad_norm": 0.36494383215904236, "learning_rate": 1.9447947098512882e-05, "loss": 0.6212, "step": 10111 }, { "epoch": 0.21445992661873556, "grad_norm": 0.31990712881088257, "learning_rate": 1.9447837819223932e-05, "loss": 0.4929, "step": 10112 }, { "epoch": 0.21448113507666858, "grad_norm": 0.3215450644493103, "learning_rate": 1.944772852942718e-05, "loss": 0.4628, "step": 10113 }, { "epoch": 0.2145023435346016, "grad_norm": 0.3287466764450073, "learning_rate": 1.9447619229122746e-05, "loss": 0.5274, "step": 10114 }, { "epoch": 0.21452355199253462, "grad_norm": 0.3369019329547882, "learning_rate": 1.944750991831075e-05, "loss": 0.5556, "step": 10115 }, { "epoch": 0.21454476045046764, "grad_norm": 0.31143951416015625, "learning_rate": 1.9447400596991318e-05, "loss": 0.4472, "step": 10116 }, { "epoch": 0.21456596890840068, "grad_norm": 0.3273148834705353, "learning_rate": 1.944729126516457e-05, "loss": 0.5314, "step": 10117 }, { "epoch": 0.2145871773663337, "grad_norm": 0.3558017909526825, "learning_rate": 1.9447181922830626e-05, "loss": 0.5239, "step": 10118 }, { "epoch": 0.21460838582426672, "grad_norm": 0.3340623676776886, "learning_rate": 1.9447072569989608e-05, "loss": 0.457, "step": 10119 }, { "epoch": 0.21462959428219974, "grad_norm": 0.6679432988166809, "learning_rate": 1.9446963206641633e-05, "loss": 0.4908, "step": 10120 }, { "epoch": 0.21465080274013276, "grad_norm": 0.3311351239681244, "learning_rate": 1.9446853832786836e-05, "loss": 0.4877, "step": 10121 }, { "epoch": 0.21467201119806578, "grad_norm": 0.3560574948787689, "learning_rate": 1.9446744448425322e-05, "loss": 0.6172, "step": 10122 }, { "epoch": 0.2146932196559988, "grad_norm": 0.3360486328601837, "learning_rate": 1.9446635053557226e-05, "loss": 0.5168, "step": 10123 }, { "epoch": 0.21471442811393185, "grad_norm": 0.34656694531440735, "learning_rate": 1.9446525648182665e-05, "loss": 0.4974, "step": 10124 }, { "epoch": 0.21473563657186487, "grad_norm": 0.35790687799453735, "learning_rate": 1.944641623230176e-05, "loss": 0.5861, "step": 10125 }, { "epoch": 0.2147568450297979, "grad_norm": 0.3513942062854767, "learning_rate": 1.944630680591463e-05, "loss": 0.5487, "step": 10126 }, { "epoch": 0.2147780534877309, "grad_norm": 0.3967548906803131, "learning_rate": 1.94461973690214e-05, "loss": 0.5002, "step": 10127 }, { "epoch": 0.21479926194566393, "grad_norm": 0.34272921085357666, "learning_rate": 1.9446087921622195e-05, "loss": 0.6055, "step": 10128 }, { "epoch": 0.21482047040359695, "grad_norm": 1.8951438665390015, "learning_rate": 1.944597846371713e-05, "loss": 0.5982, "step": 10129 }, { "epoch": 0.21484167886152997, "grad_norm": 0.33761391043663025, "learning_rate": 1.944586899530633e-05, "loss": 0.5619, "step": 10130 }, { "epoch": 0.21486288731946301, "grad_norm": 0.36674821376800537, "learning_rate": 1.944575951638992e-05, "loss": 0.4934, "step": 10131 }, { "epoch": 0.21488409577739603, "grad_norm": 0.33410748839378357, "learning_rate": 1.9445650026968014e-05, "loss": 0.5457, "step": 10132 }, { "epoch": 0.21490530423532905, "grad_norm": 0.3254593312740326, "learning_rate": 1.944554052704074e-05, "loss": 0.5302, "step": 10133 }, { "epoch": 0.21492651269326207, "grad_norm": 0.32042786478996277, "learning_rate": 1.9445431016608215e-05, "loss": 0.4414, "step": 10134 }, { "epoch": 0.2149477211511951, "grad_norm": 0.4050571322441101, "learning_rate": 1.9445321495670568e-05, "loss": 0.6115, "step": 10135 }, { "epoch": 0.2149689296091281, "grad_norm": 0.3418094217777252, "learning_rate": 1.9445211964227916e-05, "loss": 0.4703, "step": 10136 }, { "epoch": 0.21499013806706113, "grad_norm": 0.3351600170135498, "learning_rate": 1.9445102422280378e-05, "loss": 0.573, "step": 10137 }, { "epoch": 0.21501134652499418, "grad_norm": 0.42655983567237854, "learning_rate": 1.9444992869828084e-05, "loss": 0.5016, "step": 10138 }, { "epoch": 0.2150325549829272, "grad_norm": 0.3372649550437927, "learning_rate": 1.9444883306871144e-05, "loss": 0.4545, "step": 10139 }, { "epoch": 0.21505376344086022, "grad_norm": 0.3735376298427582, "learning_rate": 1.9444773733409695e-05, "loss": 0.5217, "step": 10140 }, { "epoch": 0.21507497189879324, "grad_norm": 0.3550288677215576, "learning_rate": 1.9444664149443848e-05, "loss": 0.4075, "step": 10141 }, { "epoch": 0.21509618035672626, "grad_norm": 0.32054761052131653, "learning_rate": 1.9444554554973725e-05, "loss": 0.4893, "step": 10142 }, { "epoch": 0.21511738881465928, "grad_norm": 0.3213011622428894, "learning_rate": 1.944444494999945e-05, "loss": 0.5649, "step": 10143 }, { "epoch": 0.2151385972725923, "grad_norm": 0.3654039204120636, "learning_rate": 1.9444335334521146e-05, "loss": 0.4951, "step": 10144 }, { "epoch": 0.21515980573052534, "grad_norm": 0.29821768403053284, "learning_rate": 1.9444225708538938e-05, "loss": 0.4983, "step": 10145 }, { "epoch": 0.21518101418845836, "grad_norm": 0.4295811951160431, "learning_rate": 1.944411607205294e-05, "loss": 0.5877, "step": 10146 }, { "epoch": 0.21520222264639138, "grad_norm": 0.36429914832115173, "learning_rate": 1.944400642506328e-05, "loss": 0.5862, "step": 10147 }, { "epoch": 0.2152234311043244, "grad_norm": 0.2891172170639038, "learning_rate": 1.9443896767570077e-05, "loss": 0.507, "step": 10148 }, { "epoch": 0.21524463956225742, "grad_norm": 0.32927629351615906, "learning_rate": 1.9443787099573456e-05, "loss": 0.5139, "step": 10149 }, { "epoch": 0.21526584802019044, "grad_norm": 0.3019395172595978, "learning_rate": 1.9443677421073538e-05, "loss": 0.532, "step": 10150 }, { "epoch": 0.21528705647812346, "grad_norm": 0.30767664313316345, "learning_rate": 1.944356773207044e-05, "loss": 0.5854, "step": 10151 }, { "epoch": 0.2153082649360565, "grad_norm": 0.7321821451187134, "learning_rate": 1.944345803256429e-05, "loss": 0.4189, "step": 10152 }, { "epoch": 0.21532947339398953, "grad_norm": 0.3205510079860687, "learning_rate": 1.9443348322555207e-05, "loss": 0.4914, "step": 10153 }, { "epoch": 0.21535068185192255, "grad_norm": 0.3385627865791321, "learning_rate": 1.9443238602043315e-05, "loss": 0.5216, "step": 10154 }, { "epoch": 0.21537189030985557, "grad_norm": 0.7721005082130432, "learning_rate": 1.9443128871028737e-05, "loss": 0.4893, "step": 10155 }, { "epoch": 0.2153930987677886, "grad_norm": 0.4306941330432892, "learning_rate": 1.944301912951159e-05, "loss": 0.4974, "step": 10156 }, { "epoch": 0.2154143072257216, "grad_norm": 0.3351149559020996, "learning_rate": 1.9442909377491998e-05, "loss": 0.5242, "step": 10157 }, { "epoch": 0.21543551568365465, "grad_norm": 0.3728245496749878, "learning_rate": 1.9442799614970087e-05, "loss": 0.5521, "step": 10158 }, { "epoch": 0.21545672414158767, "grad_norm": 0.3959890305995941, "learning_rate": 1.9442689841945975e-05, "loss": 0.5707, "step": 10159 }, { "epoch": 0.2154779325995207, "grad_norm": 0.4417303502559662, "learning_rate": 1.944258005841979e-05, "loss": 0.5547, "step": 10160 }, { "epoch": 0.2154991410574537, "grad_norm": 0.37098371982574463, "learning_rate": 1.9442470264391645e-05, "loss": 0.5194, "step": 10161 }, { "epoch": 0.21552034951538673, "grad_norm": 0.31438130140304565, "learning_rate": 1.9442360459861667e-05, "loss": 0.5307, "step": 10162 }, { "epoch": 0.21554155797331975, "grad_norm": 0.3238823711872101, "learning_rate": 1.9442250644829976e-05, "loss": 0.5021, "step": 10163 }, { "epoch": 0.21556276643125277, "grad_norm": 0.353296160697937, "learning_rate": 1.94421408192967e-05, "loss": 0.4783, "step": 10164 }, { "epoch": 0.21558397488918582, "grad_norm": 0.3490111827850342, "learning_rate": 1.9442030983261954e-05, "loss": 0.6326, "step": 10165 }, { "epoch": 0.21560518334711884, "grad_norm": 0.31276530027389526, "learning_rate": 1.9441921136725865e-05, "loss": 0.5121, "step": 10166 }, { "epoch": 0.21562639180505186, "grad_norm": 0.3325653672218323, "learning_rate": 1.9441811279688552e-05, "loss": 0.5334, "step": 10167 }, { "epoch": 0.21564760026298488, "grad_norm": 0.3352547585964203, "learning_rate": 1.944170141215014e-05, "loss": 0.4994, "step": 10168 }, { "epoch": 0.2156688087209179, "grad_norm": 0.36333391070365906, "learning_rate": 1.9441591534110746e-05, "loss": 0.5891, "step": 10169 }, { "epoch": 0.21569001717885092, "grad_norm": 0.32279130816459656, "learning_rate": 1.94414816455705e-05, "loss": 0.5246, "step": 10170 }, { "epoch": 0.21571122563678394, "grad_norm": 0.35716691613197327, "learning_rate": 1.9441371746529517e-05, "loss": 0.519, "step": 10171 }, { "epoch": 0.21573243409471698, "grad_norm": 0.33547380566596985, "learning_rate": 1.9441261836987926e-05, "loss": 0.5311, "step": 10172 }, { "epoch": 0.21575364255265, "grad_norm": 0.35149747133255005, "learning_rate": 1.9441151916945843e-05, "loss": 0.4852, "step": 10173 }, { "epoch": 0.21577485101058302, "grad_norm": 0.3298606276512146, "learning_rate": 1.9441041986403394e-05, "loss": 0.4511, "step": 10174 }, { "epoch": 0.21579605946851604, "grad_norm": 0.3465506136417389, "learning_rate": 1.9440932045360698e-05, "loss": 0.5197, "step": 10175 }, { "epoch": 0.21581726792644906, "grad_norm": 0.38310369849205017, "learning_rate": 1.9440822093817883e-05, "loss": 0.5378, "step": 10176 }, { "epoch": 0.21583847638438208, "grad_norm": 0.3622848391532898, "learning_rate": 1.9440712131775065e-05, "loss": 0.5469, "step": 10177 }, { "epoch": 0.2158596848423151, "grad_norm": 0.3634965717792511, "learning_rate": 1.944060215923237e-05, "loss": 0.4821, "step": 10178 }, { "epoch": 0.21588089330024815, "grad_norm": 0.3032902181148529, "learning_rate": 1.9440492176189917e-05, "loss": 0.5189, "step": 10179 }, { "epoch": 0.21590210175818117, "grad_norm": 0.28699973225593567, "learning_rate": 1.9440382182647834e-05, "loss": 0.4854, "step": 10180 }, { "epoch": 0.2159233102161142, "grad_norm": 0.35593315958976746, "learning_rate": 1.9440272178606236e-05, "loss": 0.477, "step": 10181 }, { "epoch": 0.2159445186740472, "grad_norm": 0.3151872754096985, "learning_rate": 1.9440162164065254e-05, "loss": 0.471, "step": 10182 }, { "epoch": 0.21596572713198023, "grad_norm": 0.3117970824241638, "learning_rate": 1.9440052139025004e-05, "loss": 0.5293, "step": 10183 }, { "epoch": 0.21598693558991325, "grad_norm": 0.3042357563972473, "learning_rate": 1.9439942103485608e-05, "loss": 0.5168, "step": 10184 }, { "epoch": 0.21600814404784627, "grad_norm": 0.33952245116233826, "learning_rate": 1.943983205744719e-05, "loss": 0.5448, "step": 10185 }, { "epoch": 0.21602935250577932, "grad_norm": 0.3521674871444702, "learning_rate": 1.9439722000909876e-05, "loss": 0.5345, "step": 10186 }, { "epoch": 0.21605056096371233, "grad_norm": 0.7131077647209167, "learning_rate": 1.9439611933873787e-05, "loss": 0.4931, "step": 10187 }, { "epoch": 0.21607176942164535, "grad_norm": 0.31747299432754517, "learning_rate": 1.943950185633904e-05, "loss": 0.5775, "step": 10188 }, { "epoch": 0.21609297787957837, "grad_norm": 0.35321488976478577, "learning_rate": 1.943939176830576e-05, "loss": 0.5294, "step": 10189 }, { "epoch": 0.2161141863375114, "grad_norm": 0.3240099251270294, "learning_rate": 1.943928166977407e-05, "loss": 0.5309, "step": 10190 }, { "epoch": 0.2161353947954444, "grad_norm": 0.3432181179523468, "learning_rate": 1.9439171560744098e-05, "loss": 0.5002, "step": 10191 }, { "epoch": 0.21615660325337746, "grad_norm": 0.34291064739227295, "learning_rate": 1.9439061441215957e-05, "loss": 0.4897, "step": 10192 }, { "epoch": 0.21617781171131048, "grad_norm": 0.3305758237838745, "learning_rate": 1.9438951311189775e-05, "loss": 0.5953, "step": 10193 }, { "epoch": 0.2161990201692435, "grad_norm": 0.31474676728248596, "learning_rate": 1.9438841170665673e-05, "loss": 0.5303, "step": 10194 }, { "epoch": 0.21622022862717652, "grad_norm": 0.33620133996009827, "learning_rate": 1.9438731019643774e-05, "loss": 0.513, "step": 10195 }, { "epoch": 0.21624143708510954, "grad_norm": 0.3189365565776825, "learning_rate": 1.9438620858124202e-05, "loss": 0.5148, "step": 10196 }, { "epoch": 0.21626264554304256, "grad_norm": 0.3329528570175171, "learning_rate": 1.9438510686107076e-05, "loss": 0.4579, "step": 10197 }, { "epoch": 0.21628385400097558, "grad_norm": 0.35068395733833313, "learning_rate": 1.943840050359252e-05, "loss": 0.56, "step": 10198 }, { "epoch": 0.21630506245890863, "grad_norm": 0.32414668798446655, "learning_rate": 1.943829031058066e-05, "loss": 0.5186, "step": 10199 }, { "epoch": 0.21632627091684165, "grad_norm": 0.325260728597641, "learning_rate": 1.943818010707161e-05, "loss": 0.5522, "step": 10200 }, { "epoch": 0.21634747937477466, "grad_norm": 0.37471944093704224, "learning_rate": 1.9438069893065503e-05, "loss": 0.5788, "step": 10201 }, { "epoch": 0.21636868783270768, "grad_norm": 0.3343842029571533, "learning_rate": 1.9437959668562455e-05, "loss": 0.4842, "step": 10202 }, { "epoch": 0.2163898962906407, "grad_norm": 0.34238508343696594, "learning_rate": 1.943784943356259e-05, "loss": 0.5341, "step": 10203 }, { "epoch": 0.21641110474857372, "grad_norm": 0.2997633218765259, "learning_rate": 1.9437739188066033e-05, "loss": 0.5237, "step": 10204 }, { "epoch": 0.21643231320650674, "grad_norm": 0.32081982493400574, "learning_rate": 1.9437628932072902e-05, "loss": 0.4475, "step": 10205 }, { "epoch": 0.2164535216644398, "grad_norm": 0.33050522208213806, "learning_rate": 1.9437518665583322e-05, "loss": 0.582, "step": 10206 }, { "epoch": 0.2164747301223728, "grad_norm": 0.3624976873397827, "learning_rate": 1.9437408388597418e-05, "loss": 0.5607, "step": 10207 }, { "epoch": 0.21649593858030583, "grad_norm": 0.3712410628795624, "learning_rate": 1.9437298101115306e-05, "loss": 0.5048, "step": 10208 }, { "epoch": 0.21651714703823885, "grad_norm": 0.3219548463821411, "learning_rate": 1.9437187803137117e-05, "loss": 0.4717, "step": 10209 }, { "epoch": 0.21653835549617187, "grad_norm": 0.33250170946121216, "learning_rate": 1.943707749466297e-05, "loss": 0.5052, "step": 10210 }, { "epoch": 0.2165595639541049, "grad_norm": 0.3330170512199402, "learning_rate": 1.9436967175692984e-05, "loss": 0.5529, "step": 10211 }, { "epoch": 0.2165807724120379, "grad_norm": 0.3066064715385437, "learning_rate": 1.9436856846227287e-05, "loss": 0.4422, "step": 10212 }, { "epoch": 0.21660198086997096, "grad_norm": 0.30882689356803894, "learning_rate": 1.9436746506266e-05, "loss": 0.458, "step": 10213 }, { "epoch": 0.21662318932790398, "grad_norm": 0.3744887411594391, "learning_rate": 1.9436636155809245e-05, "loss": 0.5171, "step": 10214 }, { "epoch": 0.216644397785837, "grad_norm": 0.5053172707557678, "learning_rate": 1.943652579485715e-05, "loss": 0.4912, "step": 10215 }, { "epoch": 0.21666560624377001, "grad_norm": 0.3244301676750183, "learning_rate": 1.9436415423409826e-05, "loss": 0.4762, "step": 10216 }, { "epoch": 0.21668681470170303, "grad_norm": 0.32966697216033936, "learning_rate": 1.943630504146741e-05, "loss": 0.4719, "step": 10217 }, { "epoch": 0.21670802315963605, "grad_norm": 0.3346456289291382, "learning_rate": 1.9436194649030006e-05, "loss": 0.4813, "step": 10218 }, { "epoch": 0.21672923161756907, "grad_norm": 0.3260524570941925, "learning_rate": 1.9436084246097757e-05, "loss": 0.4739, "step": 10219 }, { "epoch": 0.21675044007550212, "grad_norm": 0.3306760787963867, "learning_rate": 1.9435973832670776e-05, "loss": 0.5118, "step": 10220 }, { "epoch": 0.21677164853343514, "grad_norm": 0.31506189703941345, "learning_rate": 1.9435863408749186e-05, "loss": 0.5086, "step": 10221 }, { "epoch": 0.21679285699136816, "grad_norm": 0.44188934564590454, "learning_rate": 1.9435752974333112e-05, "loss": 0.4398, "step": 10222 }, { "epoch": 0.21681406544930118, "grad_norm": 0.33375120162963867, "learning_rate": 1.9435642529422672e-05, "loss": 0.4066, "step": 10223 }, { "epoch": 0.2168352739072342, "grad_norm": 0.3443335294723511, "learning_rate": 1.9435532074017996e-05, "loss": 0.5435, "step": 10224 }, { "epoch": 0.21685648236516722, "grad_norm": 0.340032696723938, "learning_rate": 1.94354216081192e-05, "loss": 0.4944, "step": 10225 }, { "epoch": 0.21687769082310024, "grad_norm": 0.35450220108032227, "learning_rate": 1.9435311131726414e-05, "loss": 0.5531, "step": 10226 }, { "epoch": 0.21689889928103329, "grad_norm": 0.3513728380203247, "learning_rate": 1.9435200644839755e-05, "loss": 0.4762, "step": 10227 }, { "epoch": 0.2169201077389663, "grad_norm": 0.3342348039150238, "learning_rate": 1.9435090147459345e-05, "loss": 0.5335, "step": 10228 }, { "epoch": 0.21694131619689933, "grad_norm": 0.3027143180370331, "learning_rate": 1.9434979639585314e-05, "loss": 0.5496, "step": 10229 }, { "epoch": 0.21696252465483234, "grad_norm": 0.31917715072631836, "learning_rate": 1.9434869121217778e-05, "loss": 0.4732, "step": 10230 }, { "epoch": 0.21698373311276536, "grad_norm": 0.34016430377960205, "learning_rate": 1.9434758592356866e-05, "loss": 0.5572, "step": 10231 }, { "epoch": 0.21700494157069838, "grad_norm": 0.3325600326061249, "learning_rate": 1.9434648053002694e-05, "loss": 0.455, "step": 10232 }, { "epoch": 0.21702615002863143, "grad_norm": 0.33799371123313904, "learning_rate": 1.943453750315539e-05, "loss": 0.4912, "step": 10233 }, { "epoch": 0.21704735848656445, "grad_norm": 0.3094629645347595, "learning_rate": 1.9434426942815073e-05, "loss": 0.5124, "step": 10234 }, { "epoch": 0.21706856694449747, "grad_norm": 0.32935309410095215, "learning_rate": 1.9434316371981873e-05, "loss": 0.5663, "step": 10235 }, { "epoch": 0.2170897754024305, "grad_norm": 0.32185643911361694, "learning_rate": 1.9434205790655903e-05, "loss": 0.4437, "step": 10236 }, { "epoch": 0.2171109838603635, "grad_norm": 0.2881622910499573, "learning_rate": 1.9434095198837292e-05, "loss": 0.5132, "step": 10237 }, { "epoch": 0.21713219231829653, "grad_norm": 0.30299490690231323, "learning_rate": 1.9433984596526165e-05, "loss": 0.5493, "step": 10238 }, { "epoch": 0.21715340077622955, "grad_norm": 0.3400469422340393, "learning_rate": 1.943387398372264e-05, "loss": 0.5905, "step": 10239 }, { "epoch": 0.2171746092341626, "grad_norm": 0.3432469666004181, "learning_rate": 1.9433763360426843e-05, "loss": 0.478, "step": 10240 }, { "epoch": 0.21719581769209562, "grad_norm": 0.3077351450920105, "learning_rate": 1.9433652726638898e-05, "loss": 0.48, "step": 10241 }, { "epoch": 0.21721702615002864, "grad_norm": 0.3379710614681244, "learning_rate": 1.9433542082358925e-05, "loss": 0.5308, "step": 10242 }, { "epoch": 0.21723823460796166, "grad_norm": 0.35810497403144836, "learning_rate": 1.943343142758705e-05, "loss": 0.6574, "step": 10243 }, { "epoch": 0.21725944306589468, "grad_norm": 0.3242904543876648, "learning_rate": 1.9433320762323392e-05, "loss": 0.5247, "step": 10244 }, { "epoch": 0.2172806515238277, "grad_norm": 0.3302992880344391, "learning_rate": 1.9433210086568078e-05, "loss": 0.5347, "step": 10245 }, { "epoch": 0.21730185998176071, "grad_norm": 0.35240957140922546, "learning_rate": 1.9433099400321228e-05, "loss": 0.5804, "step": 10246 }, { "epoch": 0.21732306843969376, "grad_norm": 0.31404298543930054, "learning_rate": 1.943298870358297e-05, "loss": 0.5232, "step": 10247 }, { "epoch": 0.21734427689762678, "grad_norm": 0.3331986963748932, "learning_rate": 1.9432877996353423e-05, "loss": 0.4873, "step": 10248 }, { "epoch": 0.2173654853555598, "grad_norm": 0.29327628016471863, "learning_rate": 1.9432767278632705e-05, "loss": 0.4204, "step": 10249 }, { "epoch": 0.21738669381349282, "grad_norm": 0.31380927562713623, "learning_rate": 1.9432656550420953e-05, "loss": 0.5404, "step": 10250 }, { "epoch": 0.21740790227142584, "grad_norm": 0.32529789209365845, "learning_rate": 1.943254581171828e-05, "loss": 0.5195, "step": 10251 }, { "epoch": 0.21742911072935886, "grad_norm": 0.30421826243400574, "learning_rate": 1.9432435062524814e-05, "loss": 0.5607, "step": 10252 }, { "epoch": 0.21745031918729188, "grad_norm": 0.40373945236206055, "learning_rate": 1.943232430284067e-05, "loss": 0.5147, "step": 10253 }, { "epoch": 0.21747152764522493, "grad_norm": 0.354192316532135, "learning_rate": 1.943221353266598e-05, "loss": 0.5484, "step": 10254 }, { "epoch": 0.21749273610315795, "grad_norm": 0.3334483504295349, "learning_rate": 1.9432102752000864e-05, "loss": 0.5575, "step": 10255 }, { "epoch": 0.21751394456109097, "grad_norm": 0.3141072988510132, "learning_rate": 1.9431991960845446e-05, "loss": 0.4419, "step": 10256 }, { "epoch": 0.21753515301902399, "grad_norm": 0.3213030993938446, "learning_rate": 1.9431881159199847e-05, "loss": 0.5615, "step": 10257 }, { "epoch": 0.217556361476957, "grad_norm": 0.3359060287475586, "learning_rate": 1.943177034706419e-05, "loss": 0.5664, "step": 10258 }, { "epoch": 0.21757756993489002, "grad_norm": 0.3488682806491852, "learning_rate": 1.9431659524438603e-05, "loss": 0.5738, "step": 10259 }, { "epoch": 0.21759877839282304, "grad_norm": 0.32230719923973083, "learning_rate": 1.9431548691323206e-05, "loss": 0.5142, "step": 10260 }, { "epoch": 0.2176199868507561, "grad_norm": 0.3227815628051758, "learning_rate": 1.9431437847718122e-05, "loss": 0.5, "step": 10261 }, { "epoch": 0.2176411953086891, "grad_norm": 0.30383747816085815, "learning_rate": 1.9431326993623474e-05, "loss": 0.4732, "step": 10262 }, { "epoch": 0.21766240376662213, "grad_norm": 0.3938305974006653, "learning_rate": 1.9431216129039386e-05, "loss": 0.5387, "step": 10263 }, { "epoch": 0.21768361222455515, "grad_norm": 0.3719094693660736, "learning_rate": 1.943110525396598e-05, "loss": 0.5039, "step": 10264 }, { "epoch": 0.21770482068248817, "grad_norm": 0.3646852970123291, "learning_rate": 1.9430994368403385e-05, "loss": 0.4622, "step": 10265 }, { "epoch": 0.2177260291404212, "grad_norm": 0.3185681998729706, "learning_rate": 1.943088347235172e-05, "loss": 0.5192, "step": 10266 }, { "epoch": 0.21774723759835424, "grad_norm": 0.36617448925971985, "learning_rate": 1.9430772565811104e-05, "loss": 0.5879, "step": 10267 }, { "epoch": 0.21776844605628726, "grad_norm": 0.3628205358982086, "learning_rate": 1.9430661648781667e-05, "loss": 0.5678, "step": 10268 }, { "epoch": 0.21778965451422028, "grad_norm": 0.35457518696784973, "learning_rate": 1.9430550721263527e-05, "loss": 0.5821, "step": 10269 }, { "epoch": 0.2178108629721533, "grad_norm": 0.30241936445236206, "learning_rate": 1.9430439783256814e-05, "loss": 0.5371, "step": 10270 }, { "epoch": 0.21783207143008632, "grad_norm": 0.3227050006389618, "learning_rate": 1.9430328834761648e-05, "loss": 0.5008, "step": 10271 }, { "epoch": 0.21785327988801934, "grad_norm": 0.27840110659599304, "learning_rate": 1.943021787577815e-05, "loss": 0.4089, "step": 10272 }, { "epoch": 0.21787448834595236, "grad_norm": 0.3796961307525635, "learning_rate": 1.9430106906306445e-05, "loss": 0.4762, "step": 10273 }, { "epoch": 0.2178956968038854, "grad_norm": 0.3206133246421814, "learning_rate": 1.942999592634666e-05, "loss": 0.4939, "step": 10274 }, { "epoch": 0.21791690526181842, "grad_norm": 0.3211301565170288, "learning_rate": 1.9429884935898916e-05, "loss": 0.5684, "step": 10275 }, { "epoch": 0.21793811371975144, "grad_norm": 0.3224429488182068, "learning_rate": 1.9429773934963334e-05, "loss": 0.506, "step": 10276 }, { "epoch": 0.21795932217768446, "grad_norm": 0.33183103799819946, "learning_rate": 1.9429662923540037e-05, "loss": 0.4876, "step": 10277 }, { "epoch": 0.21798053063561748, "grad_norm": 0.33436819911003113, "learning_rate": 1.9429551901629154e-05, "loss": 0.5388, "step": 10278 }, { "epoch": 0.2180017390935505, "grad_norm": 0.34631776809692383, "learning_rate": 1.9429440869230804e-05, "loss": 0.5132, "step": 10279 }, { "epoch": 0.21802294755148352, "grad_norm": 0.37422993779182434, "learning_rate": 1.9429329826345113e-05, "loss": 0.6061, "step": 10280 }, { "epoch": 0.21804415600941657, "grad_norm": 0.3627752959728241, "learning_rate": 1.9429218772972204e-05, "loss": 0.4518, "step": 10281 }, { "epoch": 0.2180653644673496, "grad_norm": 0.3341749906539917, "learning_rate": 1.9429107709112195e-05, "loss": 0.5015, "step": 10282 }, { "epoch": 0.2180865729252826, "grad_norm": 0.32058390974998474, "learning_rate": 1.942899663476522e-05, "loss": 0.4331, "step": 10283 }, { "epoch": 0.21810778138321563, "grad_norm": 0.3416297137737274, "learning_rate": 1.9428885549931393e-05, "loss": 0.5389, "step": 10284 }, { "epoch": 0.21812898984114865, "grad_norm": 0.36305350065231323, "learning_rate": 1.9428774454610845e-05, "loss": 0.5481, "step": 10285 }, { "epoch": 0.21815019829908167, "grad_norm": 0.5078104138374329, "learning_rate": 1.9428663348803693e-05, "loss": 0.536, "step": 10286 }, { "epoch": 0.21817140675701469, "grad_norm": 0.3337228000164032, "learning_rate": 1.9428552232510064e-05, "loss": 0.5339, "step": 10287 }, { "epoch": 0.21819261521494773, "grad_norm": 0.34376391768455505, "learning_rate": 1.9428441105730082e-05, "loss": 0.5564, "step": 10288 }, { "epoch": 0.21821382367288075, "grad_norm": 0.27834978699684143, "learning_rate": 1.942832996846387e-05, "loss": 0.4238, "step": 10289 }, { "epoch": 0.21823503213081377, "grad_norm": 0.37204769253730774, "learning_rate": 1.9428218820711548e-05, "loss": 0.4924, "step": 10290 }, { "epoch": 0.2182562405887468, "grad_norm": 0.334805965423584, "learning_rate": 1.9428107662473247e-05, "loss": 0.4404, "step": 10291 }, { "epoch": 0.2182774490466798, "grad_norm": 0.4540828764438629, "learning_rate": 1.9427996493749085e-05, "loss": 0.4807, "step": 10292 }, { "epoch": 0.21829865750461283, "grad_norm": 0.3135025203227997, "learning_rate": 1.9427885314539186e-05, "loss": 0.4959, "step": 10293 }, { "epoch": 0.21831986596254585, "grad_norm": 0.34101441502571106, "learning_rate": 1.9427774124843677e-05, "loss": 0.5545, "step": 10294 }, { "epoch": 0.2183410744204789, "grad_norm": 0.3370032012462616, "learning_rate": 1.9427662924662676e-05, "loss": 0.569, "step": 10295 }, { "epoch": 0.21836228287841192, "grad_norm": 0.3393182158470154, "learning_rate": 1.9427551713996316e-05, "loss": 0.5237, "step": 10296 }, { "epoch": 0.21838349133634494, "grad_norm": 0.34173253178596497, "learning_rate": 1.942744049284471e-05, "loss": 0.5192, "step": 10297 }, { "epoch": 0.21840469979427796, "grad_norm": 0.3586156666278839, "learning_rate": 1.942732926120799e-05, "loss": 0.5335, "step": 10298 }, { "epoch": 0.21842590825221098, "grad_norm": 0.31912004947662354, "learning_rate": 1.9427218019086273e-05, "loss": 0.5076, "step": 10299 }, { "epoch": 0.218447116710144, "grad_norm": 0.31857025623321533, "learning_rate": 1.942710676647969e-05, "loss": 0.4512, "step": 10300 }, { "epoch": 0.21846832516807702, "grad_norm": 0.3350915312767029, "learning_rate": 1.9426995503388355e-05, "loss": 0.4188, "step": 10301 }, { "epoch": 0.21848953362601006, "grad_norm": 0.31945741176605225, "learning_rate": 1.94268842298124e-05, "loss": 0.4916, "step": 10302 }, { "epoch": 0.21851074208394308, "grad_norm": 0.3132414221763611, "learning_rate": 1.942677294575195e-05, "loss": 0.4741, "step": 10303 }, { "epoch": 0.2185319505418761, "grad_norm": 0.32543638348579407, "learning_rate": 1.942666165120712e-05, "loss": 0.5194, "step": 10304 }, { "epoch": 0.21855315899980912, "grad_norm": 0.3859182298183441, "learning_rate": 1.942655034617804e-05, "loss": 0.5604, "step": 10305 }, { "epoch": 0.21857436745774214, "grad_norm": 0.34018877148628235, "learning_rate": 1.9426439030664833e-05, "loss": 0.5205, "step": 10306 }, { "epoch": 0.21859557591567516, "grad_norm": 0.38328981399536133, "learning_rate": 1.9426327704667625e-05, "loss": 0.5149, "step": 10307 }, { "epoch": 0.2186167843736082, "grad_norm": 0.3949626684188843, "learning_rate": 1.9426216368186534e-05, "loss": 0.575, "step": 10308 }, { "epoch": 0.21863799283154123, "grad_norm": 0.3256103992462158, "learning_rate": 1.942610502122169e-05, "loss": 0.4522, "step": 10309 }, { "epoch": 0.21865920128947425, "grad_norm": 0.327974408864975, "learning_rate": 1.942599366377321e-05, "loss": 0.4993, "step": 10310 }, { "epoch": 0.21868040974740727, "grad_norm": 0.3596166968345642, "learning_rate": 1.9425882295841223e-05, "loss": 0.5533, "step": 10311 }, { "epoch": 0.2187016182053403, "grad_norm": 0.3243330419063568, "learning_rate": 1.942577091742585e-05, "loss": 0.5717, "step": 10312 }, { "epoch": 0.2187228266632733, "grad_norm": 0.3478291630744934, "learning_rate": 1.9425659528527225e-05, "loss": 0.4817, "step": 10313 }, { "epoch": 0.21874403512120633, "grad_norm": 0.3192669153213501, "learning_rate": 1.9425548129145454e-05, "loss": 0.5948, "step": 10314 }, { "epoch": 0.21876524357913937, "grad_norm": 0.3266499936580658, "learning_rate": 1.9425436719280674e-05, "loss": 0.4782, "step": 10315 }, { "epoch": 0.2187864520370724, "grad_norm": 0.3201521039009094, "learning_rate": 1.9425325298933005e-05, "loss": 0.5361, "step": 10316 }, { "epoch": 0.2188076604950054, "grad_norm": 0.46455976366996765, "learning_rate": 1.942521386810257e-05, "loss": 0.4521, "step": 10317 }, { "epoch": 0.21882886895293843, "grad_norm": 0.35028743743896484, "learning_rate": 1.9425102426789494e-05, "loss": 0.5091, "step": 10318 }, { "epoch": 0.21885007741087145, "grad_norm": 0.32605019211769104, "learning_rate": 1.9424990974993903e-05, "loss": 0.5209, "step": 10319 }, { "epoch": 0.21887128586880447, "grad_norm": 0.2982937693595886, "learning_rate": 1.9424879512715918e-05, "loss": 0.4193, "step": 10320 }, { "epoch": 0.2188924943267375, "grad_norm": 0.31735989451408386, "learning_rate": 1.9424768039955662e-05, "loss": 0.5159, "step": 10321 }, { "epoch": 0.21891370278467054, "grad_norm": 0.3265394866466522, "learning_rate": 1.9424656556713262e-05, "loss": 0.4664, "step": 10322 }, { "epoch": 0.21893491124260356, "grad_norm": 0.30485987663269043, "learning_rate": 1.942454506298884e-05, "loss": 0.4576, "step": 10323 }, { "epoch": 0.21895611970053658, "grad_norm": 0.33201149106025696, "learning_rate": 1.9424433558782523e-05, "loss": 0.5318, "step": 10324 }, { "epoch": 0.2189773281584696, "grad_norm": 0.36285078525543213, "learning_rate": 1.942432204409443e-05, "loss": 0.5063, "step": 10325 }, { "epoch": 0.21899853661640262, "grad_norm": 0.33056730031967163, "learning_rate": 1.942421051892469e-05, "loss": 0.4012, "step": 10326 }, { "epoch": 0.21901974507433564, "grad_norm": 0.5520161986351013, "learning_rate": 1.9424098983273423e-05, "loss": 0.5506, "step": 10327 }, { "epoch": 0.21904095353226866, "grad_norm": 0.3284372389316559, "learning_rate": 1.942398743714076e-05, "loss": 0.4643, "step": 10328 }, { "epoch": 0.2190621619902017, "grad_norm": 0.3044782876968384, "learning_rate": 1.9423875880526816e-05, "loss": 0.4506, "step": 10329 }, { "epoch": 0.21908337044813472, "grad_norm": 0.32609879970550537, "learning_rate": 1.9423764313431718e-05, "loss": 0.457, "step": 10330 }, { "epoch": 0.21910457890606774, "grad_norm": 0.3617344796657562, "learning_rate": 1.9423652735855594e-05, "loss": 0.4474, "step": 10331 }, { "epoch": 0.21912578736400076, "grad_norm": 0.3630121648311615, "learning_rate": 1.9423541147798562e-05, "loss": 0.5495, "step": 10332 }, { "epoch": 0.21914699582193378, "grad_norm": 0.3477976322174072, "learning_rate": 1.9423429549260755e-05, "loss": 0.6112, "step": 10333 }, { "epoch": 0.2191682042798668, "grad_norm": 0.2801039218902588, "learning_rate": 1.9423317940242286e-05, "loss": 0.4531, "step": 10334 }, { "epoch": 0.21918941273779982, "grad_norm": 0.3109031021595001, "learning_rate": 1.942320632074329e-05, "loss": 0.4882, "step": 10335 }, { "epoch": 0.21921062119573287, "grad_norm": 0.3246013820171356, "learning_rate": 1.942309469076388e-05, "loss": 0.5472, "step": 10336 }, { "epoch": 0.2192318296536659, "grad_norm": 0.32156768441200256, "learning_rate": 1.942298305030419e-05, "loss": 0.4593, "step": 10337 }, { "epoch": 0.2192530381115989, "grad_norm": 0.3141314685344696, "learning_rate": 1.942287139936434e-05, "loss": 0.5276, "step": 10338 }, { "epoch": 0.21927424656953193, "grad_norm": 0.38549983501434326, "learning_rate": 1.9422759737944452e-05, "loss": 0.5406, "step": 10339 }, { "epoch": 0.21929545502746495, "grad_norm": 0.3174167275428772, "learning_rate": 1.9422648066044654e-05, "loss": 0.4415, "step": 10340 }, { "epoch": 0.21931666348539797, "grad_norm": 0.30911293625831604, "learning_rate": 1.942253638366507e-05, "loss": 0.4576, "step": 10341 }, { "epoch": 0.21933787194333101, "grad_norm": 0.3351205885410309, "learning_rate": 1.942242469080582e-05, "loss": 0.4986, "step": 10342 }, { "epoch": 0.21935908040126403, "grad_norm": 0.3466508984565735, "learning_rate": 1.942231298746703e-05, "loss": 0.4704, "step": 10343 }, { "epoch": 0.21938028885919705, "grad_norm": 0.3386828303337097, "learning_rate": 1.9422201273648825e-05, "loss": 0.5488, "step": 10344 }, { "epoch": 0.21940149731713007, "grad_norm": 0.44673117995262146, "learning_rate": 1.9422089549351334e-05, "loss": 0.5061, "step": 10345 }, { "epoch": 0.2194227057750631, "grad_norm": 0.3709375560283661, "learning_rate": 1.9421977814574674e-05, "loss": 0.5126, "step": 10346 }, { "epoch": 0.2194439142329961, "grad_norm": 0.3281693458557129, "learning_rate": 1.9421866069318972e-05, "loss": 0.471, "step": 10347 }, { "epoch": 0.21946512269092913, "grad_norm": 0.3547968566417694, "learning_rate": 1.9421754313584355e-05, "loss": 0.5047, "step": 10348 }, { "epoch": 0.21948633114886218, "grad_norm": 0.3797164559364319, "learning_rate": 1.942164254737094e-05, "loss": 0.5801, "step": 10349 }, { "epoch": 0.2195075396067952, "grad_norm": 0.335883766412735, "learning_rate": 1.9421530770678862e-05, "loss": 0.5195, "step": 10350 }, { "epoch": 0.21952874806472822, "grad_norm": 0.3500826358795166, "learning_rate": 1.9421418983508237e-05, "loss": 0.5078, "step": 10351 }, { "epoch": 0.21954995652266124, "grad_norm": 0.31871744990348816, "learning_rate": 1.942130718585919e-05, "loss": 0.5017, "step": 10352 }, { "epoch": 0.21957116498059426, "grad_norm": 0.33352386951446533, "learning_rate": 1.942119537773185e-05, "loss": 0.5017, "step": 10353 }, { "epoch": 0.21959237343852728, "grad_norm": 0.41411182284355164, "learning_rate": 1.9421083559126336e-05, "loss": 0.5622, "step": 10354 }, { "epoch": 0.2196135818964603, "grad_norm": 0.3282100558280945, "learning_rate": 1.9420971730042776e-05, "loss": 0.5065, "step": 10355 }, { "epoch": 0.21963479035439334, "grad_norm": 0.30962294340133667, "learning_rate": 1.942085989048129e-05, "loss": 0.451, "step": 10356 }, { "epoch": 0.21965599881232636, "grad_norm": 0.3562384247779846, "learning_rate": 1.942074804044201e-05, "loss": 0.5956, "step": 10357 }, { "epoch": 0.21967720727025938, "grad_norm": 0.48025384545326233, "learning_rate": 1.942063617992505e-05, "loss": 0.5032, "step": 10358 }, { "epoch": 0.2196984157281924, "grad_norm": 0.3587583601474762, "learning_rate": 1.9420524308930545e-05, "loss": 0.4263, "step": 10359 }, { "epoch": 0.21971962418612542, "grad_norm": 0.360019326210022, "learning_rate": 1.9420412427458613e-05, "loss": 0.5106, "step": 10360 }, { "epoch": 0.21974083264405844, "grad_norm": 0.3478391766548157, "learning_rate": 1.9420300535509382e-05, "loss": 0.4996, "step": 10361 }, { "epoch": 0.21976204110199146, "grad_norm": 0.3725569248199463, "learning_rate": 1.9420188633082973e-05, "loss": 0.5624, "step": 10362 }, { "epoch": 0.2197832495599245, "grad_norm": 0.33792006969451904, "learning_rate": 1.9420076720179513e-05, "loss": 0.5498, "step": 10363 }, { "epoch": 0.21980445801785753, "grad_norm": 0.3261312246322632, "learning_rate": 1.9419964796799125e-05, "loss": 0.5475, "step": 10364 }, { "epoch": 0.21982566647579055, "grad_norm": 0.33370423316955566, "learning_rate": 1.9419852862941932e-05, "loss": 0.5401, "step": 10365 }, { "epoch": 0.21984687493372357, "grad_norm": 0.3347747027873993, "learning_rate": 1.9419740918608064e-05, "loss": 0.4485, "step": 10366 }, { "epoch": 0.2198680833916566, "grad_norm": 0.3240547478199005, "learning_rate": 1.941962896379764e-05, "loss": 0.5742, "step": 10367 }, { "epoch": 0.2198892918495896, "grad_norm": 0.32493048906326294, "learning_rate": 1.9419516998510786e-05, "loss": 0.4314, "step": 10368 }, { "epoch": 0.21991050030752263, "grad_norm": 0.33161836862564087, "learning_rate": 1.9419405022747628e-05, "loss": 0.4597, "step": 10369 }, { "epoch": 0.21993170876545567, "grad_norm": 0.320963978767395, "learning_rate": 1.941929303650829e-05, "loss": 0.5409, "step": 10370 }, { "epoch": 0.2199529172233887, "grad_norm": 0.7878864407539368, "learning_rate": 1.9419181039792896e-05, "loss": 0.6008, "step": 10371 }, { "epoch": 0.2199741256813217, "grad_norm": 0.3324665427207947, "learning_rate": 1.941906903260157e-05, "loss": 0.5399, "step": 10372 }, { "epoch": 0.21999533413925473, "grad_norm": 0.3255901038646698, "learning_rate": 1.9418957014934435e-05, "loss": 0.5079, "step": 10373 }, { "epoch": 0.22001654259718775, "grad_norm": 0.37730711698532104, "learning_rate": 1.941884498679162e-05, "loss": 0.503, "step": 10374 }, { "epoch": 0.22003775105512077, "grad_norm": 0.3548450171947479, "learning_rate": 1.941873294817325e-05, "loss": 0.6118, "step": 10375 }, { "epoch": 0.2200589595130538, "grad_norm": 0.3275044560432434, "learning_rate": 1.9418620899079444e-05, "loss": 0.5083, "step": 10376 }, { "epoch": 0.22008016797098684, "grad_norm": 0.30783650279045105, "learning_rate": 1.941850883951033e-05, "loss": 0.4946, "step": 10377 }, { "epoch": 0.22010137642891986, "grad_norm": 0.3327330946922302, "learning_rate": 1.9418396769466033e-05, "loss": 0.5324, "step": 10378 }, { "epoch": 0.22012258488685288, "grad_norm": 0.3359862267971039, "learning_rate": 1.9418284688946675e-05, "loss": 0.5113, "step": 10379 }, { "epoch": 0.2201437933447859, "grad_norm": 0.3213080167770386, "learning_rate": 1.9418172597952386e-05, "loss": 0.507, "step": 10380 }, { "epoch": 0.22016500180271892, "grad_norm": 0.35565096139907837, "learning_rate": 1.9418060496483283e-05, "loss": 0.5874, "step": 10381 }, { "epoch": 0.22018621026065194, "grad_norm": 0.3466915786266327, "learning_rate": 1.9417948384539498e-05, "loss": 0.4984, "step": 10382 }, { "epoch": 0.22020741871858499, "grad_norm": 0.32592394948005676, "learning_rate": 1.9417836262121153e-05, "loss": 0.5091, "step": 10383 }, { "epoch": 0.220228627176518, "grad_norm": 0.3025643527507782, "learning_rate": 1.941772412922837e-05, "loss": 0.456, "step": 10384 }, { "epoch": 0.22024983563445102, "grad_norm": 0.3294787108898163, "learning_rate": 1.9417611985861277e-05, "loss": 0.5569, "step": 10385 }, { "epoch": 0.22027104409238404, "grad_norm": 0.3367915749549866, "learning_rate": 1.9417499832019995e-05, "loss": 0.4987, "step": 10386 }, { "epoch": 0.22029225255031706, "grad_norm": 0.31223103404045105, "learning_rate": 1.941738766770466e-05, "loss": 0.5226, "step": 10387 }, { "epoch": 0.22031346100825008, "grad_norm": 0.3637015223503113, "learning_rate": 1.941727549291538e-05, "loss": 0.6027, "step": 10388 }, { "epoch": 0.2203346694661831, "grad_norm": 0.3759884238243103, "learning_rate": 1.941716330765229e-05, "loss": 0.6064, "step": 10389 }, { "epoch": 0.22035587792411615, "grad_norm": 0.32619935274124146, "learning_rate": 1.9417051111915516e-05, "loss": 0.5616, "step": 10390 }, { "epoch": 0.22037708638204917, "grad_norm": 0.3105362355709076, "learning_rate": 1.9416938905705176e-05, "loss": 0.5345, "step": 10391 }, { "epoch": 0.2203982948399822, "grad_norm": 0.32179832458496094, "learning_rate": 1.94168266890214e-05, "loss": 0.5071, "step": 10392 }, { "epoch": 0.2204195032979152, "grad_norm": 0.32092905044555664, "learning_rate": 1.941671446186431e-05, "loss": 0.4688, "step": 10393 }, { "epoch": 0.22044071175584823, "grad_norm": 0.32065722346305847, "learning_rate": 1.941660222423403e-05, "loss": 0.503, "step": 10394 }, { "epoch": 0.22046192021378125, "grad_norm": 0.3166600167751312, "learning_rate": 1.941648997613069e-05, "loss": 0.4963, "step": 10395 }, { "epoch": 0.22048312867171427, "grad_norm": 0.3472278416156769, "learning_rate": 1.9416377717554413e-05, "loss": 0.521, "step": 10396 }, { "epoch": 0.22050433712964732, "grad_norm": 0.35425785183906555, "learning_rate": 1.941626544850532e-05, "loss": 0.5716, "step": 10397 }, { "epoch": 0.22052554558758033, "grad_norm": 0.36497390270233154, "learning_rate": 1.941615316898354e-05, "loss": 0.546, "step": 10398 }, { "epoch": 0.22054675404551335, "grad_norm": 0.46035057306289673, "learning_rate": 1.9416040878989194e-05, "loss": 0.5727, "step": 10399 }, { "epoch": 0.22056796250344637, "grad_norm": 0.4626096785068512, "learning_rate": 1.941592857852241e-05, "loss": 0.4579, "step": 10400 }, { "epoch": 0.2205891709613794, "grad_norm": 0.2873261570930481, "learning_rate": 1.9415816267583314e-05, "loss": 0.4928, "step": 10401 }, { "epoch": 0.2206103794193124, "grad_norm": 0.3531064987182617, "learning_rate": 1.9415703946172027e-05, "loss": 0.5527, "step": 10402 }, { "epoch": 0.22063158787724543, "grad_norm": 0.37488290667533875, "learning_rate": 1.9415591614288676e-05, "loss": 0.5734, "step": 10403 }, { "epoch": 0.22065279633517848, "grad_norm": 0.3403976261615753, "learning_rate": 1.9415479271933385e-05, "loss": 0.5247, "step": 10404 }, { "epoch": 0.2206740047931115, "grad_norm": 0.3049877882003784, "learning_rate": 1.9415366919106284e-05, "loss": 0.4928, "step": 10405 }, { "epoch": 0.22069521325104452, "grad_norm": 0.30114051699638367, "learning_rate": 1.9415254555807487e-05, "loss": 0.4997, "step": 10406 }, { "epoch": 0.22071642170897754, "grad_norm": 0.35259348154067993, "learning_rate": 1.941514218203713e-05, "loss": 0.4767, "step": 10407 }, { "epoch": 0.22073763016691056, "grad_norm": 0.3557835817337036, "learning_rate": 1.9415029797795334e-05, "loss": 0.5472, "step": 10408 }, { "epoch": 0.22075883862484358, "grad_norm": 0.33341917395591736, "learning_rate": 1.941491740308222e-05, "loss": 0.547, "step": 10409 }, { "epoch": 0.2207800470827766, "grad_norm": 0.3283233642578125, "learning_rate": 1.941480499789792e-05, "loss": 0.5008, "step": 10410 }, { "epoch": 0.22080125554070965, "grad_norm": 0.3143453896045685, "learning_rate": 1.9414692582242557e-05, "loss": 0.5755, "step": 10411 }, { "epoch": 0.22082246399864267, "grad_norm": 0.43496596813201904, "learning_rate": 1.941458015611625e-05, "loss": 0.4706, "step": 10412 }, { "epoch": 0.22084367245657568, "grad_norm": 0.321857750415802, "learning_rate": 1.9414467719519132e-05, "loss": 0.518, "step": 10413 }, { "epoch": 0.2208648809145087, "grad_norm": 0.3343469798564911, "learning_rate": 1.9414355272451324e-05, "loss": 0.4709, "step": 10414 }, { "epoch": 0.22088608937244172, "grad_norm": 0.3570043444633484, "learning_rate": 1.941424281491295e-05, "loss": 0.4927, "step": 10415 }, { "epoch": 0.22090729783037474, "grad_norm": 0.31156450510025024, "learning_rate": 1.9414130346904142e-05, "loss": 0.411, "step": 10416 }, { "epoch": 0.22092850628830776, "grad_norm": 0.3118399381637573, "learning_rate": 1.9414017868425016e-05, "loss": 0.5276, "step": 10417 }, { "epoch": 0.2209497147462408, "grad_norm": 0.3771243095397949, "learning_rate": 1.9413905379475698e-05, "loss": 0.5892, "step": 10418 }, { "epoch": 0.22097092320417383, "grad_norm": 0.6199904084205627, "learning_rate": 1.941379288005632e-05, "loss": 0.4751, "step": 10419 }, { "epoch": 0.22099213166210685, "grad_norm": 0.33084753155708313, "learning_rate": 1.9413680370167003e-05, "loss": 0.513, "step": 10420 }, { "epoch": 0.22101334012003987, "grad_norm": 0.3105413019657135, "learning_rate": 1.9413567849807874e-05, "loss": 0.4574, "step": 10421 }, { "epoch": 0.2210345485779729, "grad_norm": 0.29999229311943054, "learning_rate": 1.9413455318979055e-05, "loss": 0.5314, "step": 10422 }, { "epoch": 0.2210557570359059, "grad_norm": 0.38091301918029785, "learning_rate": 1.9413342777680675e-05, "loss": 0.5898, "step": 10423 }, { "epoch": 0.22107696549383896, "grad_norm": 0.33901458978652954, "learning_rate": 1.9413230225912856e-05, "loss": 0.5434, "step": 10424 }, { "epoch": 0.22109817395177198, "grad_norm": 0.37661877274513245, "learning_rate": 1.9413117663675724e-05, "loss": 0.5424, "step": 10425 }, { "epoch": 0.221119382409705, "grad_norm": 0.3063525855541229, "learning_rate": 1.9413005090969403e-05, "loss": 0.5377, "step": 10426 }, { "epoch": 0.22114059086763801, "grad_norm": 0.5028313398361206, "learning_rate": 1.941289250779402e-05, "loss": 0.5682, "step": 10427 }, { "epoch": 0.22116179932557103, "grad_norm": 0.3310869336128235, "learning_rate": 1.9412779914149697e-05, "loss": 0.4663, "step": 10428 }, { "epoch": 0.22118300778350405, "grad_norm": 0.31918641924858093, "learning_rate": 1.9412667310036567e-05, "loss": 0.5029, "step": 10429 }, { "epoch": 0.22120421624143707, "grad_norm": 0.49925872683525085, "learning_rate": 1.9412554695454747e-05, "loss": 0.6004, "step": 10430 }, { "epoch": 0.22122542469937012, "grad_norm": 0.3392350375652313, "learning_rate": 1.9412442070404368e-05, "loss": 0.4878, "step": 10431 }, { "epoch": 0.22124663315730314, "grad_norm": 0.34790918231010437, "learning_rate": 1.941232943488555e-05, "loss": 0.467, "step": 10432 }, { "epoch": 0.22126784161523616, "grad_norm": 0.6588135957717896, "learning_rate": 1.941221678889842e-05, "loss": 0.5875, "step": 10433 }, { "epoch": 0.22128905007316918, "grad_norm": 0.3643683195114136, "learning_rate": 1.941210413244311e-05, "loss": 0.5838, "step": 10434 }, { "epoch": 0.2213102585311022, "grad_norm": 0.3269599974155426, "learning_rate": 1.9411991465519735e-05, "loss": 0.5609, "step": 10435 }, { "epoch": 0.22133146698903522, "grad_norm": 0.3159753680229187, "learning_rate": 1.9411878788128426e-05, "loss": 0.5452, "step": 10436 }, { "epoch": 0.22135267544696824, "grad_norm": 0.37842345237731934, "learning_rate": 1.9411766100269304e-05, "loss": 0.6105, "step": 10437 }, { "epoch": 0.2213738839049013, "grad_norm": 0.5973609089851379, "learning_rate": 1.94116534019425e-05, "loss": 0.4703, "step": 10438 }, { "epoch": 0.2213950923628343, "grad_norm": 0.33551904559135437, "learning_rate": 1.941154069314814e-05, "loss": 0.5266, "step": 10439 }, { "epoch": 0.22141630082076733, "grad_norm": 0.38875794410705566, "learning_rate": 1.9411427973886342e-05, "loss": 0.4078, "step": 10440 }, { "epoch": 0.22143750927870035, "grad_norm": 0.35174793004989624, "learning_rate": 1.941131524415724e-05, "loss": 0.4613, "step": 10441 }, { "epoch": 0.22145871773663336, "grad_norm": 0.36671608686447144, "learning_rate": 1.9411202503960948e-05, "loss": 0.553, "step": 10442 }, { "epoch": 0.22147992619456638, "grad_norm": 0.3912331461906433, "learning_rate": 1.9411089753297602e-05, "loss": 0.5216, "step": 10443 }, { "epoch": 0.2215011346524994, "grad_norm": 0.32747510075569153, "learning_rate": 1.9410976992167325e-05, "loss": 0.4225, "step": 10444 }, { "epoch": 0.22152234311043245, "grad_norm": 0.40306341648101807, "learning_rate": 1.941086422057024e-05, "loss": 0.5446, "step": 10445 }, { "epoch": 0.22154355156836547, "grad_norm": 0.33216729760169983, "learning_rate": 1.9410751438506473e-05, "loss": 0.4979, "step": 10446 }, { "epoch": 0.2215647600262985, "grad_norm": 0.3749758005142212, "learning_rate": 1.9410638645976153e-05, "loss": 0.4833, "step": 10447 }, { "epoch": 0.2215859684842315, "grad_norm": 0.32525938749313354, "learning_rate": 1.9410525842979398e-05, "loss": 0.5352, "step": 10448 }, { "epoch": 0.22160717694216453, "grad_norm": 0.41862019896507263, "learning_rate": 1.941041302951634e-05, "loss": 0.481, "step": 10449 }, { "epoch": 0.22162838540009755, "grad_norm": 0.34007981419563293, "learning_rate": 1.9410300205587103e-05, "loss": 0.5453, "step": 10450 }, { "epoch": 0.22164959385803057, "grad_norm": 0.3716122508049011, "learning_rate": 1.941018737119181e-05, "loss": 0.5496, "step": 10451 }, { "epoch": 0.22167080231596362, "grad_norm": 0.32535508275032043, "learning_rate": 1.941007452633059e-05, "loss": 0.6031, "step": 10452 }, { "epoch": 0.22169201077389664, "grad_norm": 0.360451877117157, "learning_rate": 1.940996167100357e-05, "loss": 0.5221, "step": 10453 }, { "epoch": 0.22171321923182966, "grad_norm": 0.9758414030075073, "learning_rate": 1.9409848805210868e-05, "loss": 0.5324, "step": 10454 }, { "epoch": 0.22173442768976268, "grad_norm": 0.3311210572719574, "learning_rate": 1.9409735928952614e-05, "loss": 0.547, "step": 10455 }, { "epoch": 0.2217556361476957, "grad_norm": 0.33379197120666504, "learning_rate": 1.9409623042228934e-05, "loss": 0.5188, "step": 10456 }, { "epoch": 0.22177684460562871, "grad_norm": 0.3494528532028198, "learning_rate": 1.940951014503995e-05, "loss": 0.5344, "step": 10457 }, { "epoch": 0.22179805306356176, "grad_norm": 0.421994149684906, "learning_rate": 1.9409397237385797e-05, "loss": 0.4545, "step": 10458 }, { "epoch": 0.22181926152149478, "grad_norm": 0.31555360555648804, "learning_rate": 1.940928431926659e-05, "loss": 0.4991, "step": 10459 }, { "epoch": 0.2218404699794278, "grad_norm": 0.37099361419677734, "learning_rate": 1.940917139068246e-05, "loss": 0.4762, "step": 10460 }, { "epoch": 0.22186167843736082, "grad_norm": 0.3328476548194885, "learning_rate": 1.940905845163353e-05, "loss": 0.5004, "step": 10461 }, { "epoch": 0.22188288689529384, "grad_norm": 0.30160242319107056, "learning_rate": 1.9408945502119928e-05, "loss": 0.5095, "step": 10462 }, { "epoch": 0.22190409535322686, "grad_norm": 0.3892498016357422, "learning_rate": 1.9408832542141778e-05, "loss": 0.4167, "step": 10463 }, { "epoch": 0.22192530381115988, "grad_norm": 0.2845875918865204, "learning_rate": 1.9408719571699207e-05, "loss": 0.4377, "step": 10464 }, { "epoch": 0.22194651226909293, "grad_norm": 0.32577529549598694, "learning_rate": 1.940860659079234e-05, "loss": 0.53, "step": 10465 }, { "epoch": 0.22196772072702595, "grad_norm": 0.32702651619911194, "learning_rate": 1.94084935994213e-05, "loss": 0.5499, "step": 10466 }, { "epoch": 0.22198892918495897, "grad_norm": 0.34544622898101807, "learning_rate": 1.940838059758622e-05, "loss": 0.5935, "step": 10467 }, { "epoch": 0.22201013764289199, "grad_norm": 0.3483462333679199, "learning_rate": 1.940826758528722e-05, "loss": 0.5193, "step": 10468 }, { "epoch": 0.222031346100825, "grad_norm": 0.3846227824687958, "learning_rate": 1.9408154562524422e-05, "loss": 0.5226, "step": 10469 }, { "epoch": 0.22205255455875803, "grad_norm": 0.32994890213012695, "learning_rate": 1.9408041529297956e-05, "loss": 0.5466, "step": 10470 }, { "epoch": 0.22207376301669104, "grad_norm": 0.35010001063346863, "learning_rate": 1.940792848560795e-05, "loss": 0.6315, "step": 10471 }, { "epoch": 0.2220949714746241, "grad_norm": 0.47265878319740295, "learning_rate": 1.940781543145453e-05, "loss": 0.5029, "step": 10472 }, { "epoch": 0.2221161799325571, "grad_norm": 0.3155251145362854, "learning_rate": 1.940770236683782e-05, "loss": 0.5415, "step": 10473 }, { "epoch": 0.22213738839049013, "grad_norm": 0.3176557123661041, "learning_rate": 1.940758929175794e-05, "loss": 0.5493, "step": 10474 }, { "epoch": 0.22215859684842315, "grad_norm": 0.33021116256713867, "learning_rate": 1.9407476206215024e-05, "loss": 0.4995, "step": 10475 }, { "epoch": 0.22217980530635617, "grad_norm": 0.32293376326560974, "learning_rate": 1.9407363110209197e-05, "loss": 0.5544, "step": 10476 }, { "epoch": 0.2222010137642892, "grad_norm": 0.29752087593078613, "learning_rate": 1.940725000374058e-05, "loss": 0.5585, "step": 10477 }, { "epoch": 0.2222222222222222, "grad_norm": 0.3567342460155487, "learning_rate": 1.94071368868093e-05, "loss": 0.5724, "step": 10478 }, { "epoch": 0.22224343068015526, "grad_norm": 0.32181432843208313, "learning_rate": 1.9407023759415485e-05, "loss": 0.4766, "step": 10479 }, { "epoch": 0.22226463913808828, "grad_norm": 0.345051109790802, "learning_rate": 1.940691062155926e-05, "loss": 0.5379, "step": 10480 }, { "epoch": 0.2222858475960213, "grad_norm": 0.3488868772983551, "learning_rate": 1.940679747324075e-05, "loss": 0.4734, "step": 10481 }, { "epoch": 0.22230705605395432, "grad_norm": 0.39246734976768494, "learning_rate": 1.9406684314460082e-05, "loss": 0.5371, "step": 10482 }, { "epoch": 0.22232826451188734, "grad_norm": 0.3462683856487274, "learning_rate": 1.9406571145217384e-05, "loss": 0.5214, "step": 10483 }, { "epoch": 0.22234947296982036, "grad_norm": 0.37615519762039185, "learning_rate": 1.9406457965512775e-05, "loss": 0.5108, "step": 10484 }, { "epoch": 0.22237068142775337, "grad_norm": 0.335033655166626, "learning_rate": 1.940634477534639e-05, "loss": 0.4948, "step": 10485 }, { "epoch": 0.22239188988568642, "grad_norm": 0.34732359647750854, "learning_rate": 1.9406231574718345e-05, "loss": 0.5182, "step": 10486 }, { "epoch": 0.22241309834361944, "grad_norm": 0.3517259955406189, "learning_rate": 1.9406118363628774e-05, "loss": 0.6418, "step": 10487 }, { "epoch": 0.22243430680155246, "grad_norm": 0.38780996203422546, "learning_rate": 1.94060051420778e-05, "loss": 0.4907, "step": 10488 }, { "epoch": 0.22245551525948548, "grad_norm": 0.3189190924167633, "learning_rate": 1.9405891910065546e-05, "loss": 0.4646, "step": 10489 }, { "epoch": 0.2224767237174185, "grad_norm": 0.37002724409103394, "learning_rate": 1.9405778667592144e-05, "loss": 0.5006, "step": 10490 }, { "epoch": 0.22249793217535152, "grad_norm": 0.379926860332489, "learning_rate": 1.9405665414657716e-05, "loss": 0.5001, "step": 10491 }, { "epoch": 0.22251914063328454, "grad_norm": 0.348889023065567, "learning_rate": 1.9405552151262386e-05, "loss": 0.4107, "step": 10492 }, { "epoch": 0.2225403490912176, "grad_norm": 0.3581061065196991, "learning_rate": 1.9405438877406288e-05, "loss": 0.524, "step": 10493 }, { "epoch": 0.2225615575491506, "grad_norm": 0.44261059165000916, "learning_rate": 1.9405325593089536e-05, "loss": 0.5711, "step": 10494 }, { "epoch": 0.22258276600708363, "grad_norm": 0.36039644479751587, "learning_rate": 1.9405212298312266e-05, "loss": 0.5321, "step": 10495 }, { "epoch": 0.22260397446501665, "grad_norm": 1.242139458656311, "learning_rate": 1.94050989930746e-05, "loss": 0.4911, "step": 10496 }, { "epoch": 0.22262518292294967, "grad_norm": 0.32603535056114197, "learning_rate": 1.9404985677376666e-05, "loss": 0.5257, "step": 10497 }, { "epoch": 0.22264639138088269, "grad_norm": 0.36115971207618713, "learning_rate": 1.9404872351218588e-05, "loss": 0.6083, "step": 10498 }, { "epoch": 0.22266759983881573, "grad_norm": 0.39568936824798584, "learning_rate": 1.9404759014600492e-05, "loss": 0.5169, "step": 10499 }, { "epoch": 0.22268880829674875, "grad_norm": 0.3524985611438751, "learning_rate": 1.9404645667522505e-05, "loss": 0.5677, "step": 10500 }, { "epoch": 0.22271001675468177, "grad_norm": 0.32983431220054626, "learning_rate": 1.9404532309984753e-05, "loss": 0.5717, "step": 10501 }, { "epoch": 0.2227312252126148, "grad_norm": 0.3418312072753906, "learning_rate": 1.940441894198736e-05, "loss": 0.5129, "step": 10502 }, { "epoch": 0.2227524336705478, "grad_norm": 0.3359888195991516, "learning_rate": 1.9404305563530458e-05, "loss": 0.458, "step": 10503 }, { "epoch": 0.22277364212848083, "grad_norm": 0.3210225999355316, "learning_rate": 1.9404192174614166e-05, "loss": 0.5396, "step": 10504 }, { "epoch": 0.22279485058641385, "grad_norm": 0.40531837940216064, "learning_rate": 1.9404078775238613e-05, "loss": 0.4938, "step": 10505 }, { "epoch": 0.2228160590443469, "grad_norm": 0.3055112659931183, "learning_rate": 1.9403965365403925e-05, "loss": 0.4511, "step": 10506 }, { "epoch": 0.22283726750227992, "grad_norm": 0.33763015270233154, "learning_rate": 1.940385194511023e-05, "loss": 0.4712, "step": 10507 }, { "epoch": 0.22285847596021294, "grad_norm": 0.3430040776729584, "learning_rate": 1.9403738514357653e-05, "loss": 0.4646, "step": 10508 }, { "epoch": 0.22287968441814596, "grad_norm": 0.34946346282958984, "learning_rate": 1.9403625073146316e-05, "loss": 0.562, "step": 10509 }, { "epoch": 0.22290089287607898, "grad_norm": 0.3360157012939453, "learning_rate": 1.9403511621476352e-05, "loss": 0.5218, "step": 10510 }, { "epoch": 0.222922101334012, "grad_norm": 0.3301010727882385, "learning_rate": 1.9403398159347886e-05, "loss": 0.4704, "step": 10511 }, { "epoch": 0.22294330979194502, "grad_norm": 0.3195541203022003, "learning_rate": 1.940328468676104e-05, "loss": 0.5723, "step": 10512 }, { "epoch": 0.22296451824987806, "grad_norm": 0.3236735761165619, "learning_rate": 1.9403171203715942e-05, "loss": 0.5499, "step": 10513 }, { "epoch": 0.22298572670781108, "grad_norm": 0.3833291530609131, "learning_rate": 1.9403057710212718e-05, "loss": 0.5689, "step": 10514 }, { "epoch": 0.2230069351657441, "grad_norm": 0.3987710475921631, "learning_rate": 1.9402944206251497e-05, "loss": 0.5652, "step": 10515 }, { "epoch": 0.22302814362367712, "grad_norm": 0.3754901885986328, "learning_rate": 1.94028306918324e-05, "loss": 0.5511, "step": 10516 }, { "epoch": 0.22304935208161014, "grad_norm": 0.38463693857192993, "learning_rate": 1.940271716695556e-05, "loss": 0.5187, "step": 10517 }, { "epoch": 0.22307056053954316, "grad_norm": 0.40767669677734375, "learning_rate": 1.9402603631621096e-05, "loss": 0.4481, "step": 10518 }, { "epoch": 0.22309176899747618, "grad_norm": 0.33364760875701904, "learning_rate": 1.9402490085829142e-05, "loss": 0.5837, "step": 10519 }, { "epoch": 0.22311297745540923, "grad_norm": 0.33820798993110657, "learning_rate": 1.940237652957982e-05, "loss": 0.57, "step": 10520 }, { "epoch": 0.22313418591334225, "grad_norm": 0.3595139980316162, "learning_rate": 1.9402262962873253e-05, "loss": 0.4975, "step": 10521 }, { "epoch": 0.22315539437127527, "grad_norm": 0.3495270907878876, "learning_rate": 1.940214938570957e-05, "loss": 0.5361, "step": 10522 }, { "epoch": 0.2231766028292083, "grad_norm": 0.3603237271308899, "learning_rate": 1.9402035798088904e-05, "loss": 0.5274, "step": 10523 }, { "epoch": 0.2231978112871413, "grad_norm": 0.3507903814315796, "learning_rate": 1.9401922200011372e-05, "loss": 0.5581, "step": 10524 }, { "epoch": 0.22321901974507433, "grad_norm": 0.33627617359161377, "learning_rate": 1.9401808591477103e-05, "loss": 0.4869, "step": 10525 }, { "epoch": 0.22324022820300735, "grad_norm": 0.4084664583206177, "learning_rate": 1.9401694972486226e-05, "loss": 0.5127, "step": 10526 }, { "epoch": 0.2232614366609404, "grad_norm": 0.3319011628627777, "learning_rate": 1.9401581343038862e-05, "loss": 0.5311, "step": 10527 }, { "epoch": 0.2232826451188734, "grad_norm": 0.3401017487049103, "learning_rate": 1.9401467703135145e-05, "loss": 0.5631, "step": 10528 }, { "epoch": 0.22330385357680643, "grad_norm": 0.2995961010456085, "learning_rate": 1.9401354052775195e-05, "loss": 0.486, "step": 10529 }, { "epoch": 0.22332506203473945, "grad_norm": 0.41960594058036804, "learning_rate": 1.9401240391959142e-05, "loss": 0.5759, "step": 10530 }, { "epoch": 0.22334627049267247, "grad_norm": 0.37845391035079956, "learning_rate": 1.940112672068711e-05, "loss": 0.5798, "step": 10531 }, { "epoch": 0.2233674789506055, "grad_norm": 0.4401351809501648, "learning_rate": 1.9401013038959228e-05, "loss": 0.6001, "step": 10532 }, { "epoch": 0.22338868740853854, "grad_norm": 0.2913244962692261, "learning_rate": 1.940089934677562e-05, "loss": 0.5234, "step": 10533 }, { "epoch": 0.22340989586647156, "grad_norm": 0.35212811827659607, "learning_rate": 1.9400785644136414e-05, "loss": 0.5066, "step": 10534 }, { "epoch": 0.22343110432440458, "grad_norm": 0.34295374155044556, "learning_rate": 1.9400671931041735e-05, "loss": 0.4717, "step": 10535 }, { "epoch": 0.2234523127823376, "grad_norm": 0.3380040228366852, "learning_rate": 1.9400558207491712e-05, "loss": 0.5295, "step": 10536 }, { "epoch": 0.22347352124027062, "grad_norm": 0.39822128415107727, "learning_rate": 1.940044447348647e-05, "loss": 0.5376, "step": 10537 }, { "epoch": 0.22349472969820364, "grad_norm": 0.32962894439697266, "learning_rate": 1.940033072902613e-05, "loss": 0.5319, "step": 10538 }, { "epoch": 0.22351593815613666, "grad_norm": 0.3320520520210266, "learning_rate": 1.9400216974110832e-05, "loss": 0.4904, "step": 10539 }, { "epoch": 0.2235371466140697, "grad_norm": 0.3363901376724243, "learning_rate": 1.9400103208740688e-05, "loss": 0.5123, "step": 10540 }, { "epoch": 0.22355835507200272, "grad_norm": 0.3790375292301178, "learning_rate": 1.9399989432915833e-05, "loss": 0.4932, "step": 10541 }, { "epoch": 0.22357956352993574, "grad_norm": 0.3333020508289337, "learning_rate": 1.939987564663639e-05, "loss": 0.38, "step": 10542 }, { "epoch": 0.22360077198786876, "grad_norm": 0.49467259645462036, "learning_rate": 1.939976184990249e-05, "loss": 0.4633, "step": 10543 }, { "epoch": 0.22362198044580178, "grad_norm": 0.32437455654144287, "learning_rate": 1.9399648042714253e-05, "loss": 0.4967, "step": 10544 }, { "epoch": 0.2236431889037348, "grad_norm": 0.41746416687965393, "learning_rate": 1.9399534225071812e-05, "loss": 0.5212, "step": 10545 }, { "epoch": 0.22366439736166782, "grad_norm": 0.3559863567352295, "learning_rate": 1.939942039697529e-05, "loss": 0.5515, "step": 10546 }, { "epoch": 0.22368560581960087, "grad_norm": 0.3005695044994354, "learning_rate": 1.9399306558424813e-05, "loss": 0.4665, "step": 10547 }, { "epoch": 0.2237068142775339, "grad_norm": 0.35398489236831665, "learning_rate": 1.939919270942051e-05, "loss": 0.4797, "step": 10548 }, { "epoch": 0.2237280227354669, "grad_norm": 0.3395288586616516, "learning_rate": 1.9399078849962507e-05, "loss": 0.5297, "step": 10549 }, { "epoch": 0.22374923119339993, "grad_norm": 0.3002605736255646, "learning_rate": 1.939896498005093e-05, "loss": 0.5182, "step": 10550 }, { "epoch": 0.22377043965133295, "grad_norm": 0.3659600615501404, "learning_rate": 1.9398851099685906e-05, "loss": 0.5483, "step": 10551 }, { "epoch": 0.22379164810926597, "grad_norm": 0.34171414375305176, "learning_rate": 1.939873720886756e-05, "loss": 0.5007, "step": 10552 }, { "epoch": 0.223812856567199, "grad_norm": 0.3495838940143585, "learning_rate": 1.939862330759602e-05, "loss": 0.5978, "step": 10553 }, { "epoch": 0.22383406502513203, "grad_norm": 0.353595495223999, "learning_rate": 1.9398509395871414e-05, "loss": 0.5445, "step": 10554 }, { "epoch": 0.22385527348306505, "grad_norm": 0.35422661900520325, "learning_rate": 1.939839547369387e-05, "loss": 0.541, "step": 10555 }, { "epoch": 0.22387648194099807, "grad_norm": 0.31964612007141113, "learning_rate": 1.9398281541063506e-05, "loss": 0.4921, "step": 10556 }, { "epoch": 0.2238976903989311, "grad_norm": 0.34830889105796814, "learning_rate": 1.939816759798046e-05, "loss": 0.5394, "step": 10557 }, { "epoch": 0.2239188988568641, "grad_norm": 0.3327386975288391, "learning_rate": 1.9398053644444853e-05, "loss": 0.5082, "step": 10558 }, { "epoch": 0.22394010731479713, "grad_norm": 0.33653178811073303, "learning_rate": 1.939793968045681e-05, "loss": 0.4456, "step": 10559 }, { "epoch": 0.22396131577273015, "grad_norm": 0.4846949577331543, "learning_rate": 1.9397825706016464e-05, "loss": 0.5379, "step": 10560 }, { "epoch": 0.2239825242306632, "grad_norm": 0.33222702145576477, "learning_rate": 1.939771172112393e-05, "loss": 0.5504, "step": 10561 }, { "epoch": 0.22400373268859622, "grad_norm": 0.4243123531341553, "learning_rate": 1.939759772577935e-05, "loss": 0.5501, "step": 10562 }, { "epoch": 0.22402494114652924, "grad_norm": 0.4360288977622986, "learning_rate": 1.9397483719982842e-05, "loss": 0.5215, "step": 10563 }, { "epoch": 0.22404614960446226, "grad_norm": 0.3957356810569763, "learning_rate": 1.9397369703734532e-05, "loss": 0.4834, "step": 10564 }, { "epoch": 0.22406735806239528, "grad_norm": 0.34735190868377686, "learning_rate": 1.939725567703455e-05, "loss": 0.4747, "step": 10565 }, { "epoch": 0.2240885665203283, "grad_norm": 0.35908836126327515, "learning_rate": 1.9397141639883027e-05, "loss": 0.5687, "step": 10566 }, { "epoch": 0.22410977497826132, "grad_norm": 0.3947818875312805, "learning_rate": 1.939702759228008e-05, "loss": 0.5683, "step": 10567 }, { "epoch": 0.22413098343619436, "grad_norm": 0.3880973756313324, "learning_rate": 1.9396913534225838e-05, "loss": 0.4621, "step": 10568 }, { "epoch": 0.22415219189412738, "grad_norm": 0.9151740074157715, "learning_rate": 1.9396799465720437e-05, "loss": 0.5293, "step": 10569 }, { "epoch": 0.2241734003520604, "grad_norm": 0.41112250089645386, "learning_rate": 1.9396685386763988e-05, "loss": 0.5661, "step": 10570 }, { "epoch": 0.22419460880999342, "grad_norm": 0.36042866110801697, "learning_rate": 1.9396571297356636e-05, "loss": 0.5329, "step": 10571 }, { "epoch": 0.22421581726792644, "grad_norm": 0.3211803436279297, "learning_rate": 1.9396457197498493e-05, "loss": 0.5679, "step": 10572 }, { "epoch": 0.22423702572585946, "grad_norm": 0.34911495447158813, "learning_rate": 1.9396343087189696e-05, "loss": 0.4202, "step": 10573 }, { "epoch": 0.2242582341837925, "grad_norm": 0.375731498003006, "learning_rate": 1.9396228966430368e-05, "loss": 0.5664, "step": 10574 }, { "epoch": 0.22427944264172553, "grad_norm": 0.3120328485965729, "learning_rate": 1.9396114835220633e-05, "loss": 0.5203, "step": 10575 }, { "epoch": 0.22430065109965855, "grad_norm": 0.40669724345207214, "learning_rate": 1.939600069356062e-05, "loss": 0.4815, "step": 10576 }, { "epoch": 0.22432185955759157, "grad_norm": 0.32077908515930176, "learning_rate": 1.939588654145046e-05, "loss": 0.5096, "step": 10577 }, { "epoch": 0.2243430680155246, "grad_norm": 0.3098447024822235, "learning_rate": 1.9395772378890277e-05, "loss": 0.5244, "step": 10578 }, { "epoch": 0.2243642764734576, "grad_norm": 0.3431778848171234, "learning_rate": 1.9395658205880194e-05, "loss": 0.5917, "step": 10579 }, { "epoch": 0.22438548493139063, "grad_norm": 0.46530652046203613, "learning_rate": 1.9395544022420343e-05, "loss": 0.442, "step": 10580 }, { "epoch": 0.22440669338932367, "grad_norm": 0.3569703996181488, "learning_rate": 1.939542982851085e-05, "loss": 0.515, "step": 10581 }, { "epoch": 0.2244279018472567, "grad_norm": 0.3758380115032196, "learning_rate": 1.939531562415184e-05, "loss": 0.5023, "step": 10582 }, { "epoch": 0.22444911030518971, "grad_norm": 0.3769858479499817, "learning_rate": 1.9395201409343444e-05, "loss": 0.4358, "step": 10583 }, { "epoch": 0.22447031876312273, "grad_norm": 0.324607789516449, "learning_rate": 1.9395087184085787e-05, "loss": 0.5716, "step": 10584 }, { "epoch": 0.22449152722105575, "grad_norm": 0.35333365201950073, "learning_rate": 1.9394972948378993e-05, "loss": 0.6014, "step": 10585 }, { "epoch": 0.22451273567898877, "grad_norm": 0.3875390589237213, "learning_rate": 1.9394858702223193e-05, "loss": 0.593, "step": 10586 }, { "epoch": 0.2245339441369218, "grad_norm": 0.3191227316856384, "learning_rate": 1.9394744445618514e-05, "loss": 0.4209, "step": 10587 }, { "epoch": 0.22455515259485484, "grad_norm": 0.2937946617603302, "learning_rate": 1.939463017856508e-05, "loss": 0.4657, "step": 10588 }, { "epoch": 0.22457636105278786, "grad_norm": 0.33446940779685974, "learning_rate": 1.9394515901063024e-05, "loss": 0.5332, "step": 10589 }, { "epoch": 0.22459756951072088, "grad_norm": 0.3188472390174866, "learning_rate": 1.939440161311246e-05, "loss": 0.5969, "step": 10590 }, { "epoch": 0.2246187779686539, "grad_norm": 0.3086860179901123, "learning_rate": 1.9394287314713532e-05, "loss": 0.4873, "step": 10591 }, { "epoch": 0.22463998642658692, "grad_norm": 0.3458672761917114, "learning_rate": 1.9394173005866358e-05, "loss": 0.4885, "step": 10592 }, { "epoch": 0.22466119488451994, "grad_norm": 0.34123459458351135, "learning_rate": 1.9394058686571066e-05, "loss": 0.4868, "step": 10593 }, { "epoch": 0.22468240334245296, "grad_norm": 0.3788013160228729, "learning_rate": 1.9393944356827782e-05, "loss": 0.4681, "step": 10594 }, { "epoch": 0.224703611800386, "grad_norm": 0.32359635829925537, "learning_rate": 1.9393830016636634e-05, "loss": 0.5352, "step": 10595 }, { "epoch": 0.22472482025831902, "grad_norm": 0.35618525743484497, "learning_rate": 1.9393715665997752e-05, "loss": 0.4807, "step": 10596 }, { "epoch": 0.22474602871625204, "grad_norm": 0.7160555720329285, "learning_rate": 1.9393601304911264e-05, "loss": 0.5547, "step": 10597 }, { "epoch": 0.22476723717418506, "grad_norm": 0.33429455757141113, "learning_rate": 1.939348693337729e-05, "loss": 0.4612, "step": 10598 }, { "epoch": 0.22478844563211808, "grad_norm": 0.49764755368232727, "learning_rate": 1.9393372551395958e-05, "loss": 0.5161, "step": 10599 }, { "epoch": 0.2248096540900511, "grad_norm": 0.39120179414749146, "learning_rate": 1.9393258158967404e-05, "loss": 0.4919, "step": 10600 }, { "epoch": 0.22483086254798412, "grad_norm": 0.3725835680961609, "learning_rate": 1.939314375609175e-05, "loss": 0.535, "step": 10601 }, { "epoch": 0.22485207100591717, "grad_norm": 0.32184842228889465, "learning_rate": 1.9393029342769122e-05, "loss": 0.4767, "step": 10602 }, { "epoch": 0.2248732794638502, "grad_norm": 0.35138845443725586, "learning_rate": 1.939291491899965e-05, "loss": 0.4815, "step": 10603 }, { "epoch": 0.2248944879217832, "grad_norm": 0.3309003412723541, "learning_rate": 1.9392800484783456e-05, "loss": 0.5233, "step": 10604 }, { "epoch": 0.22491569637971623, "grad_norm": 0.31768855452537537, "learning_rate": 1.939268604012067e-05, "loss": 0.5413, "step": 10605 }, { "epoch": 0.22493690483764925, "grad_norm": 0.4609416127204895, "learning_rate": 1.9392571585011425e-05, "loss": 0.4869, "step": 10606 }, { "epoch": 0.22495811329558227, "grad_norm": 0.3529761731624603, "learning_rate": 1.939245711945584e-05, "loss": 0.5284, "step": 10607 }, { "epoch": 0.22497932175351532, "grad_norm": 0.30607861280441284, "learning_rate": 1.939234264345405e-05, "loss": 0.5158, "step": 10608 }, { "epoch": 0.22500053021144834, "grad_norm": 0.35801008343696594, "learning_rate": 1.9392228157006173e-05, "loss": 0.6145, "step": 10609 }, { "epoch": 0.22502173866938135, "grad_norm": 0.33271709084510803, "learning_rate": 1.9392113660112345e-05, "loss": 0.5271, "step": 10610 }, { "epoch": 0.22504294712731437, "grad_norm": 0.3781363070011139, "learning_rate": 1.9391999152772686e-05, "loss": 0.5748, "step": 10611 }, { "epoch": 0.2250641555852474, "grad_norm": 0.3211010694503784, "learning_rate": 1.9391884634987332e-05, "loss": 0.515, "step": 10612 }, { "epoch": 0.2250853640431804, "grad_norm": 0.37950631976127625, "learning_rate": 1.93917701067564e-05, "loss": 0.596, "step": 10613 }, { "epoch": 0.22510657250111343, "grad_norm": 0.7305687665939331, "learning_rate": 1.9391655568080027e-05, "loss": 0.5475, "step": 10614 }, { "epoch": 0.22512778095904648, "grad_norm": 0.335194855928421, "learning_rate": 1.9391541018958337e-05, "loss": 0.4402, "step": 10615 }, { "epoch": 0.2251489894169795, "grad_norm": 0.4049391448497772, "learning_rate": 1.9391426459391453e-05, "loss": 0.6318, "step": 10616 }, { "epoch": 0.22517019787491252, "grad_norm": 0.3021998703479767, "learning_rate": 1.939131188937951e-05, "loss": 0.5445, "step": 10617 }, { "epoch": 0.22519140633284554, "grad_norm": 0.817636251449585, "learning_rate": 1.9391197308922626e-05, "loss": 0.4747, "step": 10618 }, { "epoch": 0.22521261479077856, "grad_norm": 0.3160152733325958, "learning_rate": 1.9391082718020938e-05, "loss": 0.4958, "step": 10619 }, { "epoch": 0.22523382324871158, "grad_norm": 0.3454282879829407, "learning_rate": 1.939096811667457e-05, "loss": 0.534, "step": 10620 }, { "epoch": 0.2252550317066446, "grad_norm": 0.36844339966773987, "learning_rate": 1.9390853504883647e-05, "loss": 0.5147, "step": 10621 }, { "epoch": 0.22527624016457765, "grad_norm": 0.39196449518203735, "learning_rate": 1.93907388826483e-05, "loss": 0.4983, "step": 10622 }, { "epoch": 0.22529744862251067, "grad_norm": 0.3327624201774597, "learning_rate": 1.939062424996865e-05, "loss": 0.6015, "step": 10623 }, { "epoch": 0.22531865708044369, "grad_norm": 0.6279634833335876, "learning_rate": 1.9390509606844837e-05, "loss": 0.5566, "step": 10624 }, { "epoch": 0.2253398655383767, "grad_norm": 0.3059830963611603, "learning_rate": 1.9390394953276978e-05, "loss": 0.4915, "step": 10625 }, { "epoch": 0.22536107399630972, "grad_norm": 0.35443052649497986, "learning_rate": 1.93902802892652e-05, "loss": 0.5445, "step": 10626 }, { "epoch": 0.22538228245424274, "grad_norm": 0.3385974168777466, "learning_rate": 1.9390165614809636e-05, "loss": 0.5329, "step": 10627 }, { "epoch": 0.22540349091217576, "grad_norm": 1.1880772113800049, "learning_rate": 1.939005092991041e-05, "loss": 0.5983, "step": 10628 }, { "epoch": 0.2254246993701088, "grad_norm": 0.3262191414833069, "learning_rate": 1.9389936234567656e-05, "loss": 0.5172, "step": 10629 }, { "epoch": 0.22544590782804183, "grad_norm": 0.3395731747150421, "learning_rate": 1.9389821528781493e-05, "loss": 0.5709, "step": 10630 }, { "epoch": 0.22546711628597485, "grad_norm": 0.30562683939933777, "learning_rate": 1.938970681255205e-05, "loss": 0.5138, "step": 10631 }, { "epoch": 0.22548832474390787, "grad_norm": 0.34080231189727783, "learning_rate": 1.9389592085879457e-05, "loss": 0.4697, "step": 10632 }, { "epoch": 0.2255095332018409, "grad_norm": 1.410874843597412, "learning_rate": 1.9389477348763842e-05, "loss": 0.4787, "step": 10633 }, { "epoch": 0.2255307416597739, "grad_norm": 0.3086399734020233, "learning_rate": 1.938936260120533e-05, "loss": 0.5239, "step": 10634 }, { "epoch": 0.22555195011770693, "grad_norm": 0.6144660711288452, "learning_rate": 1.9389247843204056e-05, "loss": 0.5614, "step": 10635 }, { "epoch": 0.22557315857563998, "grad_norm": 0.3178655207157135, "learning_rate": 1.938913307476014e-05, "loss": 0.5221, "step": 10636 }, { "epoch": 0.225594367033573, "grad_norm": 0.371466726064682, "learning_rate": 1.938901829587371e-05, "loss": 0.4465, "step": 10637 }, { "epoch": 0.22561557549150602, "grad_norm": 0.5696245431900024, "learning_rate": 1.9388903506544898e-05, "loss": 0.5326, "step": 10638 }, { "epoch": 0.22563678394943903, "grad_norm": 0.3102416396141052, "learning_rate": 1.9388788706773826e-05, "loss": 0.4732, "step": 10639 }, { "epoch": 0.22565799240737205, "grad_norm": 0.3583340346813202, "learning_rate": 1.938867389656063e-05, "loss": 0.5731, "step": 10640 }, { "epoch": 0.22567920086530507, "grad_norm": 0.31355151534080505, "learning_rate": 1.9388559075905425e-05, "loss": 0.5087, "step": 10641 }, { "epoch": 0.2257004093232381, "grad_norm": 0.3223620355129242, "learning_rate": 1.938844424480835e-05, "loss": 0.5743, "step": 10642 }, { "epoch": 0.22572161778117114, "grad_norm": 0.3869640529155731, "learning_rate": 1.938832940326953e-05, "loss": 0.4157, "step": 10643 }, { "epoch": 0.22574282623910416, "grad_norm": 0.3382539749145508, "learning_rate": 1.938821455128909e-05, "loss": 0.6121, "step": 10644 }, { "epoch": 0.22576403469703718, "grad_norm": 0.36627575755119324, "learning_rate": 1.9388099688867158e-05, "loss": 0.5033, "step": 10645 }, { "epoch": 0.2257852431549702, "grad_norm": 0.36757391691207886, "learning_rate": 1.9387984816003868e-05, "loss": 0.5489, "step": 10646 }, { "epoch": 0.22580645161290322, "grad_norm": 0.329689621925354, "learning_rate": 1.938786993269934e-05, "loss": 0.4787, "step": 10647 }, { "epoch": 0.22582766007083624, "grad_norm": 0.3502179682254791, "learning_rate": 1.93877550389537e-05, "loss": 0.533, "step": 10648 }, { "epoch": 0.2258488685287693, "grad_norm": 0.3786369860172272, "learning_rate": 1.9387640134767086e-05, "loss": 0.4995, "step": 10649 }, { "epoch": 0.2258700769867023, "grad_norm": 0.3741457164287567, "learning_rate": 1.938752522013962e-05, "loss": 0.5314, "step": 10650 }, { "epoch": 0.22589128544463533, "grad_norm": 0.35377436876296997, "learning_rate": 1.9387410295071426e-05, "loss": 0.5431, "step": 10651 }, { "epoch": 0.22591249390256835, "grad_norm": 0.3846663236618042, "learning_rate": 1.938729535956264e-05, "loss": 0.4964, "step": 10652 }, { "epoch": 0.22593370236050137, "grad_norm": 0.30464982986450195, "learning_rate": 1.9387180413613383e-05, "loss": 0.5312, "step": 10653 }, { "epoch": 0.22595491081843438, "grad_norm": 0.3266403377056122, "learning_rate": 1.9387065457223788e-05, "loss": 0.5107, "step": 10654 }, { "epoch": 0.2259761192763674, "grad_norm": 0.38609588146209717, "learning_rate": 1.9386950490393976e-05, "loss": 0.416, "step": 10655 }, { "epoch": 0.22599732773430045, "grad_norm": 0.36275455355644226, "learning_rate": 1.9386835513124084e-05, "loss": 0.5998, "step": 10656 }, { "epoch": 0.22601853619223347, "grad_norm": 0.3809899091720581, "learning_rate": 1.9386720525414232e-05, "loss": 0.566, "step": 10657 }, { "epoch": 0.2260397446501665, "grad_norm": 0.3486890494823456, "learning_rate": 1.9386605527264554e-05, "loss": 0.4624, "step": 10658 }, { "epoch": 0.2260609531080995, "grad_norm": 0.3871174454689026, "learning_rate": 1.9386490518675175e-05, "loss": 0.4797, "step": 10659 }, { "epoch": 0.22608216156603253, "grad_norm": 0.36093732714653015, "learning_rate": 1.938637549964622e-05, "loss": 0.5132, "step": 10660 }, { "epoch": 0.22610337002396555, "grad_norm": 0.3451302647590637, "learning_rate": 1.938626047017782e-05, "loss": 0.5547, "step": 10661 }, { "epoch": 0.22612457848189857, "grad_norm": 0.33135294914245605, "learning_rate": 1.93861454302701e-05, "loss": 0.4633, "step": 10662 }, { "epoch": 0.22614578693983162, "grad_norm": 0.3372272253036499, "learning_rate": 1.9386030379923197e-05, "loss": 0.5145, "step": 10663 }, { "epoch": 0.22616699539776464, "grad_norm": 0.37246865034103394, "learning_rate": 1.9385915319137228e-05, "loss": 0.4736, "step": 10664 }, { "epoch": 0.22618820385569766, "grad_norm": 0.31832149624824524, "learning_rate": 1.9385800247912327e-05, "loss": 0.4751, "step": 10665 }, { "epoch": 0.22620941231363068, "grad_norm": 0.3533036708831787, "learning_rate": 1.9385685166248622e-05, "loss": 0.4652, "step": 10666 }, { "epoch": 0.2262306207715637, "grad_norm": 0.4269463121891022, "learning_rate": 1.9385570074146236e-05, "loss": 0.4937, "step": 10667 }, { "epoch": 0.22625182922949671, "grad_norm": 0.3409704864025116, "learning_rate": 1.9385454971605304e-05, "loss": 0.5641, "step": 10668 }, { "epoch": 0.22627303768742973, "grad_norm": 0.3980830907821655, "learning_rate": 1.9385339858625947e-05, "loss": 0.6805, "step": 10669 }, { "epoch": 0.22629424614536278, "grad_norm": 0.40585294365882874, "learning_rate": 1.9385224735208302e-05, "loss": 0.5862, "step": 10670 }, { "epoch": 0.2263154546032958, "grad_norm": 0.7365229725837708, "learning_rate": 1.9385109601352487e-05, "loss": 0.6061, "step": 10671 }, { "epoch": 0.22633666306122882, "grad_norm": 0.35526043176651, "learning_rate": 1.9384994457058635e-05, "loss": 0.559, "step": 10672 }, { "epoch": 0.22635787151916184, "grad_norm": 0.3114253580570221, "learning_rate": 1.9384879302326873e-05, "loss": 0.5295, "step": 10673 }, { "epoch": 0.22637907997709486, "grad_norm": 0.3291672170162201, "learning_rate": 1.9384764137157334e-05, "loss": 0.5603, "step": 10674 }, { "epoch": 0.22640028843502788, "grad_norm": 0.3084245026111603, "learning_rate": 1.9384648961550137e-05, "loss": 0.449, "step": 10675 }, { "epoch": 0.2264214968929609, "grad_norm": 0.3268301486968994, "learning_rate": 1.9384533775505417e-05, "loss": 0.5755, "step": 10676 }, { "epoch": 0.22644270535089395, "grad_norm": 0.34310904145240784, "learning_rate": 1.93844185790233e-05, "loss": 0.4481, "step": 10677 }, { "epoch": 0.22646391380882697, "grad_norm": 0.3822457492351532, "learning_rate": 1.9384303372103913e-05, "loss": 0.5243, "step": 10678 }, { "epoch": 0.22648512226676, "grad_norm": 0.3316442370414734, "learning_rate": 1.9384188154747387e-05, "loss": 0.6058, "step": 10679 }, { "epoch": 0.226506330724693, "grad_norm": 0.3530959486961365, "learning_rate": 1.9384072926953846e-05, "loss": 0.5856, "step": 10680 }, { "epoch": 0.22652753918262603, "grad_norm": 0.7053558826446533, "learning_rate": 1.9383957688723426e-05, "loss": 0.4414, "step": 10681 }, { "epoch": 0.22654874764055905, "grad_norm": 0.36769288778305054, "learning_rate": 1.9383842440056246e-05, "loss": 0.5884, "step": 10682 }, { "epoch": 0.2265699560984921, "grad_norm": 0.3447588086128235, "learning_rate": 1.9383727180952437e-05, "loss": 0.5396, "step": 10683 }, { "epoch": 0.2265911645564251, "grad_norm": 0.30600863695144653, "learning_rate": 1.938361191141213e-05, "loss": 0.4634, "step": 10684 }, { "epoch": 0.22661237301435813, "grad_norm": 0.32819151878356934, "learning_rate": 1.9383496631435453e-05, "loss": 0.536, "step": 10685 }, { "epoch": 0.22663358147229115, "grad_norm": 0.36944231390953064, "learning_rate": 1.9383381341022528e-05, "loss": 0.5143, "step": 10686 }, { "epoch": 0.22665478993022417, "grad_norm": 0.3269748389720917, "learning_rate": 1.938326604017349e-05, "loss": 0.5225, "step": 10687 }, { "epoch": 0.2266759983881572, "grad_norm": 0.32960203289985657, "learning_rate": 1.9383150728888464e-05, "loss": 0.5057, "step": 10688 }, { "epoch": 0.2266972068460902, "grad_norm": 0.3290840685367584, "learning_rate": 1.9383035407167582e-05, "loss": 0.4644, "step": 10689 }, { "epoch": 0.22671841530402326, "grad_norm": 0.9315539598464966, "learning_rate": 1.938292007501097e-05, "loss": 0.4602, "step": 10690 }, { "epoch": 0.22673962376195628, "grad_norm": 0.31143319606781006, "learning_rate": 1.9382804732418752e-05, "loss": 0.5087, "step": 10691 }, { "epoch": 0.2267608322198893, "grad_norm": 0.3158220052719116, "learning_rate": 1.938268937939106e-05, "loss": 0.4601, "step": 10692 }, { "epoch": 0.22678204067782232, "grad_norm": 0.330812007188797, "learning_rate": 1.9382574015928026e-05, "loss": 0.4724, "step": 10693 }, { "epoch": 0.22680324913575534, "grad_norm": 0.33626845479011536, "learning_rate": 1.9382458642029772e-05, "loss": 0.5167, "step": 10694 }, { "epoch": 0.22682445759368836, "grad_norm": 0.33552485704421997, "learning_rate": 1.938234325769643e-05, "loss": 0.5889, "step": 10695 }, { "epoch": 0.22684566605162138, "grad_norm": 0.30257827043533325, "learning_rate": 1.938222786292813e-05, "loss": 0.4492, "step": 10696 }, { "epoch": 0.22686687450955442, "grad_norm": 0.7865145206451416, "learning_rate": 1.938211245772499e-05, "loss": 0.584, "step": 10697 }, { "epoch": 0.22688808296748744, "grad_norm": 0.32525521516799927, "learning_rate": 1.9381997042087156e-05, "loss": 0.5331, "step": 10698 }, { "epoch": 0.22690929142542046, "grad_norm": 0.32278621196746826, "learning_rate": 1.938188161601474e-05, "loss": 0.5393, "step": 10699 }, { "epoch": 0.22693049988335348, "grad_norm": 0.3678187429904938, "learning_rate": 1.938176617950788e-05, "loss": 0.4907, "step": 10700 }, { "epoch": 0.2269517083412865, "grad_norm": 0.36590641736984253, "learning_rate": 1.9381650732566698e-05, "loss": 0.526, "step": 10701 }, { "epoch": 0.22697291679921952, "grad_norm": 0.48360973596572876, "learning_rate": 1.9381535275191324e-05, "loss": 0.5111, "step": 10702 }, { "epoch": 0.22699412525715254, "grad_norm": 0.3156995475292206, "learning_rate": 1.9381419807381894e-05, "loss": 0.4693, "step": 10703 }, { "epoch": 0.2270153337150856, "grad_norm": 0.3833242952823639, "learning_rate": 1.9381304329138526e-05, "loss": 0.5828, "step": 10704 }, { "epoch": 0.2270365421730186, "grad_norm": 0.36467716097831726, "learning_rate": 1.9381188840461354e-05, "loss": 0.5188, "step": 10705 }, { "epoch": 0.22705775063095163, "grad_norm": 0.33181360363960266, "learning_rate": 1.9381073341350506e-05, "loss": 0.4908, "step": 10706 }, { "epoch": 0.22707895908888465, "grad_norm": 0.29363715648651123, "learning_rate": 1.938095783180611e-05, "loss": 0.4095, "step": 10707 }, { "epoch": 0.22710016754681767, "grad_norm": 0.3636298179626465, "learning_rate": 1.9380842311828292e-05, "loss": 0.4883, "step": 10708 }, { "epoch": 0.22712137600475069, "grad_norm": 0.33234527707099915, "learning_rate": 1.9380726781417184e-05, "loss": 0.538, "step": 10709 }, { "epoch": 0.2271425844626837, "grad_norm": 0.341244101524353, "learning_rate": 1.938061124057291e-05, "loss": 0.4806, "step": 10710 }, { "epoch": 0.22716379292061675, "grad_norm": 0.4646371304988861, "learning_rate": 1.9380495689295607e-05, "loss": 0.4447, "step": 10711 }, { "epoch": 0.22718500137854977, "grad_norm": 0.5386449098587036, "learning_rate": 1.9380380127585395e-05, "loss": 0.6865, "step": 10712 }, { "epoch": 0.2272062098364828, "grad_norm": 0.32686635851860046, "learning_rate": 1.9380264555442407e-05, "loss": 0.5655, "step": 10713 }, { "epoch": 0.2272274182944158, "grad_norm": 0.37920695543289185, "learning_rate": 1.9380148972866772e-05, "loss": 0.4655, "step": 10714 }, { "epoch": 0.22724862675234883, "grad_norm": 0.34804224967956543, "learning_rate": 1.9380033379858614e-05, "loss": 0.5366, "step": 10715 }, { "epoch": 0.22726983521028185, "grad_norm": 0.36935678124427795, "learning_rate": 1.9379917776418064e-05, "loss": 0.5392, "step": 10716 }, { "epoch": 0.22729104366821487, "grad_norm": 0.3314574062824249, "learning_rate": 1.9379802162545252e-05, "loss": 0.4565, "step": 10717 }, { "epoch": 0.22731225212614792, "grad_norm": 0.4318382441997528, "learning_rate": 1.9379686538240305e-05, "loss": 0.64, "step": 10718 }, { "epoch": 0.22733346058408094, "grad_norm": 0.35649168491363525, "learning_rate": 1.9379570903503354e-05, "loss": 0.4826, "step": 10719 }, { "epoch": 0.22735466904201396, "grad_norm": 0.3164564371109009, "learning_rate": 1.9379455258334522e-05, "loss": 0.4157, "step": 10720 }, { "epoch": 0.22737587749994698, "grad_norm": 0.3983423411846161, "learning_rate": 1.9379339602733943e-05, "loss": 0.5908, "step": 10721 }, { "epoch": 0.22739708595788, "grad_norm": 0.36242368817329407, "learning_rate": 1.9379223936701748e-05, "loss": 0.5779, "step": 10722 }, { "epoch": 0.22741829441581302, "grad_norm": 0.3491402268409729, "learning_rate": 1.9379108260238056e-05, "loss": 0.5496, "step": 10723 }, { "epoch": 0.22743950287374606, "grad_norm": 0.3310782015323639, "learning_rate": 1.9378992573343006e-05, "loss": 0.4532, "step": 10724 }, { "epoch": 0.22746071133167908, "grad_norm": 0.43092140555381775, "learning_rate": 1.9378876876016717e-05, "loss": 0.6043, "step": 10725 }, { "epoch": 0.2274819197896121, "grad_norm": 0.36873680353164673, "learning_rate": 1.9378761168259322e-05, "loss": 0.5659, "step": 10726 }, { "epoch": 0.22750312824754512, "grad_norm": 0.3568631112575531, "learning_rate": 1.9378645450070957e-05, "loss": 0.5652, "step": 10727 }, { "epoch": 0.22752433670547814, "grad_norm": 0.5523717999458313, "learning_rate": 1.937852972145174e-05, "loss": 0.4957, "step": 10728 }, { "epoch": 0.22754554516341116, "grad_norm": 0.40996646881103516, "learning_rate": 1.9378413982401802e-05, "loss": 0.5119, "step": 10729 }, { "epoch": 0.22756675362134418, "grad_norm": 0.36783167719841003, "learning_rate": 1.937829823292128e-05, "loss": 0.5778, "step": 10730 }, { "epoch": 0.22758796207927723, "grad_norm": 0.4002714157104492, "learning_rate": 1.9378182473010287e-05, "loss": 0.5529, "step": 10731 }, { "epoch": 0.22760917053721025, "grad_norm": 0.3644309937953949, "learning_rate": 1.937806670266897e-05, "loss": 0.5858, "step": 10732 }, { "epoch": 0.22763037899514327, "grad_norm": 0.3067699074745178, "learning_rate": 1.9377950921897442e-05, "loss": 0.4579, "step": 10733 }, { "epoch": 0.2276515874530763, "grad_norm": 0.3907047510147095, "learning_rate": 1.937783513069584e-05, "loss": 0.5971, "step": 10734 }, { "epoch": 0.2276727959110093, "grad_norm": 0.405423104763031, "learning_rate": 1.937771932906429e-05, "loss": 0.5389, "step": 10735 }, { "epoch": 0.22769400436894233, "grad_norm": 0.3224876821041107, "learning_rate": 1.9377603517002925e-05, "loss": 0.5388, "step": 10736 }, { "epoch": 0.22771521282687535, "grad_norm": 0.34335649013519287, "learning_rate": 1.937748769451187e-05, "loss": 0.4613, "step": 10737 }, { "epoch": 0.2277364212848084, "grad_norm": 0.3113757371902466, "learning_rate": 1.9377371861591254e-05, "loss": 0.5216, "step": 10738 }, { "epoch": 0.2277576297427414, "grad_norm": 0.3433944582939148, "learning_rate": 1.9377256018241208e-05, "loss": 0.5011, "step": 10739 }, { "epoch": 0.22777883820067443, "grad_norm": 0.39855554699897766, "learning_rate": 1.937714016446186e-05, "loss": 0.5866, "step": 10740 }, { "epoch": 0.22780004665860745, "grad_norm": 0.35099297761917114, "learning_rate": 1.9377024300253335e-05, "loss": 0.5107, "step": 10741 }, { "epoch": 0.22782125511654047, "grad_norm": 0.3554076552391052, "learning_rate": 1.9376908425615766e-05, "loss": 0.5964, "step": 10742 }, { "epoch": 0.2278424635744735, "grad_norm": 0.3897457718849182, "learning_rate": 1.937679254054928e-05, "loss": 0.4757, "step": 10743 }, { "epoch": 0.2278636720324065, "grad_norm": 0.3579046428203583, "learning_rate": 1.937667664505401e-05, "loss": 0.5628, "step": 10744 }, { "epoch": 0.22788488049033956, "grad_norm": 0.3763372600078583, "learning_rate": 1.9376560739130078e-05, "loss": 0.5655, "step": 10745 }, { "epoch": 0.22790608894827258, "grad_norm": 0.38551247119903564, "learning_rate": 1.937644482277762e-05, "loss": 0.5139, "step": 10746 }, { "epoch": 0.2279272974062056, "grad_norm": 0.35117703676223755, "learning_rate": 1.937632889599676e-05, "loss": 0.5131, "step": 10747 }, { "epoch": 0.22794850586413862, "grad_norm": 0.3908969759941101, "learning_rate": 1.937621295878763e-05, "loss": 0.5576, "step": 10748 }, { "epoch": 0.22796971432207164, "grad_norm": 0.30811020731925964, "learning_rate": 1.9376097011150356e-05, "loss": 0.4835, "step": 10749 }, { "epoch": 0.22799092278000466, "grad_norm": 0.2941316068172455, "learning_rate": 1.9375981053085066e-05, "loss": 0.4589, "step": 10750 }, { "epoch": 0.22801213123793768, "grad_norm": 0.3438466191291809, "learning_rate": 1.9375865084591896e-05, "loss": 0.5153, "step": 10751 }, { "epoch": 0.22803333969587072, "grad_norm": 0.3354550302028656, "learning_rate": 1.9375749105670967e-05, "loss": 0.5504, "step": 10752 }, { "epoch": 0.22805454815380374, "grad_norm": 0.3160892426967621, "learning_rate": 1.9375633116322413e-05, "loss": 0.5978, "step": 10753 }, { "epoch": 0.22807575661173676, "grad_norm": 0.368746280670166, "learning_rate": 1.9375517116546358e-05, "loss": 0.5618, "step": 10754 }, { "epoch": 0.22809696506966978, "grad_norm": 0.45465561747550964, "learning_rate": 1.9375401106342937e-05, "loss": 0.596, "step": 10755 }, { "epoch": 0.2281181735276028, "grad_norm": 0.33478671312332153, "learning_rate": 1.9375285085712277e-05, "loss": 0.4729, "step": 10756 }, { "epoch": 0.22813938198553582, "grad_norm": 0.338187575340271, "learning_rate": 1.9375169054654505e-05, "loss": 0.4703, "step": 10757 }, { "epoch": 0.22816059044346887, "grad_norm": 0.3955353796482086, "learning_rate": 1.9375053013169755e-05, "loss": 0.5278, "step": 10758 }, { "epoch": 0.2281817989014019, "grad_norm": 0.3878181576728821, "learning_rate": 1.9374936961258147e-05, "loss": 0.586, "step": 10759 }, { "epoch": 0.2282030073593349, "grad_norm": 0.3489600718021393, "learning_rate": 1.9374820898919818e-05, "loss": 0.4931, "step": 10760 }, { "epoch": 0.22822421581726793, "grad_norm": 0.34260034561157227, "learning_rate": 1.9374704826154893e-05, "loss": 0.5023, "step": 10761 }, { "epoch": 0.22824542427520095, "grad_norm": 0.2803863286972046, "learning_rate": 1.9374588742963504e-05, "loss": 0.4275, "step": 10762 }, { "epoch": 0.22826663273313397, "grad_norm": 0.35208114981651306, "learning_rate": 1.937447264934578e-05, "loss": 0.6158, "step": 10763 }, { "epoch": 0.228287841191067, "grad_norm": 0.36045679450035095, "learning_rate": 1.9374356545301846e-05, "loss": 0.4849, "step": 10764 }, { "epoch": 0.22830904964900003, "grad_norm": 0.3245939612388611, "learning_rate": 1.9374240430831837e-05, "loss": 0.4923, "step": 10765 }, { "epoch": 0.22833025810693305, "grad_norm": 0.3037717342376709, "learning_rate": 1.937412430593588e-05, "loss": 0.4525, "step": 10766 }, { "epoch": 0.22835146656486607, "grad_norm": 0.531439483165741, "learning_rate": 1.93740081706141e-05, "loss": 0.5584, "step": 10767 }, { "epoch": 0.2283726750227991, "grad_norm": 0.3041742444038391, "learning_rate": 1.9373892024866633e-05, "loss": 0.5655, "step": 10768 }, { "epoch": 0.2283938834807321, "grad_norm": 0.2972208559513092, "learning_rate": 1.93737758686936e-05, "loss": 0.4743, "step": 10769 }, { "epoch": 0.22841509193866513, "grad_norm": 0.3783554136753082, "learning_rate": 1.937365970209514e-05, "loss": 0.5712, "step": 10770 }, { "epoch": 0.22843630039659815, "grad_norm": 0.3553978502750397, "learning_rate": 1.9373543525071375e-05, "loss": 0.5334, "step": 10771 }, { "epoch": 0.2284575088545312, "grad_norm": 0.3689815402030945, "learning_rate": 1.9373427337622437e-05, "loss": 0.4738, "step": 10772 }, { "epoch": 0.22847871731246422, "grad_norm": 0.368947833776474, "learning_rate": 1.9373311139748452e-05, "loss": 0.514, "step": 10773 }, { "epoch": 0.22849992577039724, "grad_norm": 0.3339686989784241, "learning_rate": 1.9373194931449555e-05, "loss": 0.4937, "step": 10774 }, { "epoch": 0.22852113422833026, "grad_norm": 0.34917357563972473, "learning_rate": 1.937307871272587e-05, "loss": 0.5212, "step": 10775 }, { "epoch": 0.22854234268626328, "grad_norm": 0.39852818846702576, "learning_rate": 1.9372962483577532e-05, "loss": 0.4995, "step": 10776 }, { "epoch": 0.2285635511441963, "grad_norm": 0.3270415663719177, "learning_rate": 1.937284624400466e-05, "loss": 0.5695, "step": 10777 }, { "epoch": 0.22858475960212932, "grad_norm": 0.3341551423072815, "learning_rate": 1.9372729994007395e-05, "loss": 0.5971, "step": 10778 }, { "epoch": 0.22860596806006236, "grad_norm": 0.3573448359966278, "learning_rate": 1.9372613733585858e-05, "loss": 0.545, "step": 10779 }, { "epoch": 0.22862717651799538, "grad_norm": 0.3615656793117523, "learning_rate": 1.9372497462740184e-05, "loss": 0.5207, "step": 10780 }, { "epoch": 0.2286483849759284, "grad_norm": 0.33161357045173645, "learning_rate": 1.93723811814705e-05, "loss": 0.5387, "step": 10781 }, { "epoch": 0.22866959343386142, "grad_norm": 0.3269863426685333, "learning_rate": 1.937226488977693e-05, "loss": 0.5687, "step": 10782 }, { "epoch": 0.22869080189179444, "grad_norm": 0.4369220435619354, "learning_rate": 1.9372148587659612e-05, "loss": 0.5533, "step": 10783 }, { "epoch": 0.22871201034972746, "grad_norm": 0.33186933398246765, "learning_rate": 1.9372032275118674e-05, "loss": 0.5755, "step": 10784 }, { "epoch": 0.22873321880766048, "grad_norm": 0.32707059383392334, "learning_rate": 1.937191595215424e-05, "loss": 0.5747, "step": 10785 }, { "epoch": 0.22875442726559353, "grad_norm": 0.3506978452205658, "learning_rate": 1.9371799618766445e-05, "loss": 0.5187, "step": 10786 }, { "epoch": 0.22877563572352655, "grad_norm": 0.32706159353256226, "learning_rate": 1.9371683274955412e-05, "loss": 0.4727, "step": 10787 }, { "epoch": 0.22879684418145957, "grad_norm": 0.35822585225105286, "learning_rate": 1.937156692072128e-05, "loss": 0.5371, "step": 10788 }, { "epoch": 0.2288180526393926, "grad_norm": 0.3287023603916168, "learning_rate": 1.9371450556064166e-05, "loss": 0.5481, "step": 10789 }, { "epoch": 0.2288392610973256, "grad_norm": 0.4113548994064331, "learning_rate": 1.937133418098421e-05, "loss": 0.516, "step": 10790 }, { "epoch": 0.22886046955525863, "grad_norm": 0.32822564244270325, "learning_rate": 1.9371217795481538e-05, "loss": 0.5076, "step": 10791 }, { "epoch": 0.22888167801319165, "grad_norm": 0.3172992169857025, "learning_rate": 1.9371101399556277e-05, "loss": 0.5255, "step": 10792 }, { "epoch": 0.2289028864711247, "grad_norm": 0.3383964002132416, "learning_rate": 1.937098499320856e-05, "loss": 0.5654, "step": 10793 }, { "epoch": 0.22892409492905771, "grad_norm": 0.31945157051086426, "learning_rate": 1.9370868576438515e-05, "loss": 0.5051, "step": 10794 }, { "epoch": 0.22894530338699073, "grad_norm": 0.43789079785346985, "learning_rate": 1.937075214924627e-05, "loss": 0.4487, "step": 10795 }, { "epoch": 0.22896651184492375, "grad_norm": 0.3474844992160797, "learning_rate": 1.937063571163196e-05, "loss": 0.6113, "step": 10796 }, { "epoch": 0.22898772030285677, "grad_norm": 0.36731842160224915, "learning_rate": 1.9370519263595703e-05, "loss": 0.4849, "step": 10797 }, { "epoch": 0.2290089287607898, "grad_norm": 0.382479727268219, "learning_rate": 1.937040280513764e-05, "loss": 0.5176, "step": 10798 }, { "epoch": 0.22903013721872284, "grad_norm": 0.33734259009361267, "learning_rate": 1.9370286336257896e-05, "loss": 0.5629, "step": 10799 }, { "epoch": 0.22905134567665586, "grad_norm": 0.31381967663764954, "learning_rate": 1.93701698569566e-05, "loss": 0.4989, "step": 10800 }, { "epoch": 0.22907255413458888, "grad_norm": 0.3179430365562439, "learning_rate": 1.9370053367233886e-05, "loss": 0.496, "step": 10801 }, { "epoch": 0.2290937625925219, "grad_norm": 0.3387986123561859, "learning_rate": 1.936993686708988e-05, "loss": 0.4861, "step": 10802 }, { "epoch": 0.22911497105045492, "grad_norm": 0.42607423663139343, "learning_rate": 1.9369820356524706e-05, "loss": 0.6291, "step": 10803 }, { "epoch": 0.22913617950838794, "grad_norm": 0.35003191232681274, "learning_rate": 1.9369703835538504e-05, "loss": 0.5225, "step": 10804 }, { "epoch": 0.22915738796632096, "grad_norm": 0.3073437809944153, "learning_rate": 1.9369587304131397e-05, "loss": 0.5142, "step": 10805 }, { "epoch": 0.229178596424254, "grad_norm": 0.3312198519706726, "learning_rate": 1.9369470762303516e-05, "loss": 0.5493, "step": 10806 }, { "epoch": 0.22919980488218702, "grad_norm": 0.3493369519710541, "learning_rate": 1.936935421005499e-05, "loss": 0.4366, "step": 10807 }, { "epoch": 0.22922101334012004, "grad_norm": 0.3597670793533325, "learning_rate": 1.936923764738595e-05, "loss": 0.5198, "step": 10808 }, { "epoch": 0.22924222179805306, "grad_norm": 0.34437307715415955, "learning_rate": 1.9369121074296527e-05, "loss": 0.5432, "step": 10809 }, { "epoch": 0.22926343025598608, "grad_norm": 0.36244702339172363, "learning_rate": 1.936900449078685e-05, "loss": 0.5339, "step": 10810 }, { "epoch": 0.2292846387139191, "grad_norm": 0.4229438006877899, "learning_rate": 1.9368887896857046e-05, "loss": 0.5137, "step": 10811 }, { "epoch": 0.22930584717185212, "grad_norm": 0.3552350401878357, "learning_rate": 1.9368771292507244e-05, "loss": 0.4754, "step": 10812 }, { "epoch": 0.22932705562978517, "grad_norm": 0.349671870470047, "learning_rate": 1.936865467773758e-05, "loss": 0.5057, "step": 10813 }, { "epoch": 0.2293482640877182, "grad_norm": 0.33928588032722473, "learning_rate": 1.936853805254818e-05, "loss": 0.5394, "step": 10814 }, { "epoch": 0.2293694725456512, "grad_norm": 0.31752440333366394, "learning_rate": 1.9368421416939168e-05, "loss": 0.5019, "step": 10815 }, { "epoch": 0.22939068100358423, "grad_norm": 0.3456984758377075, "learning_rate": 1.9368304770910682e-05, "loss": 0.5929, "step": 10816 }, { "epoch": 0.22941188946151725, "grad_norm": 0.3923594057559967, "learning_rate": 1.936818811446285e-05, "loss": 0.5927, "step": 10817 }, { "epoch": 0.22943309791945027, "grad_norm": 0.335484117269516, "learning_rate": 1.93680714475958e-05, "loss": 0.5447, "step": 10818 }, { "epoch": 0.2294543063773833, "grad_norm": 0.34410977363586426, "learning_rate": 1.9367954770309664e-05, "loss": 0.5187, "step": 10819 }, { "epoch": 0.22947551483531634, "grad_norm": 0.3528922200202942, "learning_rate": 1.9367838082604566e-05, "loss": 0.5392, "step": 10820 }, { "epoch": 0.22949672329324936, "grad_norm": 0.3425688147544861, "learning_rate": 1.9367721384480643e-05, "loss": 0.4933, "step": 10821 }, { "epoch": 0.22951793175118237, "grad_norm": 0.32145848870277405, "learning_rate": 1.936760467593802e-05, "loss": 0.5047, "step": 10822 }, { "epoch": 0.2295391402091154, "grad_norm": 0.33156317472457886, "learning_rate": 1.936748795697683e-05, "loss": 0.6181, "step": 10823 }, { "epoch": 0.22956034866704841, "grad_norm": 0.33597326278686523, "learning_rate": 1.9367371227597198e-05, "loss": 0.4122, "step": 10824 }, { "epoch": 0.22958155712498143, "grad_norm": 0.35137996077537537, "learning_rate": 1.9367254487799262e-05, "loss": 0.4974, "step": 10825 }, { "epoch": 0.22960276558291445, "grad_norm": 0.3438817262649536, "learning_rate": 1.9367137737583145e-05, "loss": 0.533, "step": 10826 }, { "epoch": 0.2296239740408475, "grad_norm": 0.4541090726852417, "learning_rate": 1.9367020976948974e-05, "loss": 0.6675, "step": 10827 }, { "epoch": 0.22964518249878052, "grad_norm": 0.4018685519695282, "learning_rate": 1.936690420589689e-05, "loss": 0.5938, "step": 10828 }, { "epoch": 0.22966639095671354, "grad_norm": 0.3210563063621521, "learning_rate": 1.9366787424427014e-05, "loss": 0.5159, "step": 10829 }, { "epoch": 0.22968759941464656, "grad_norm": 0.3218700587749481, "learning_rate": 1.936667063253948e-05, "loss": 0.5274, "step": 10830 }, { "epoch": 0.22970880787257958, "grad_norm": 0.33011114597320557, "learning_rate": 1.9366553830234417e-05, "loss": 0.5257, "step": 10831 }, { "epoch": 0.2297300163305126, "grad_norm": 0.3729466199874878, "learning_rate": 1.936643701751195e-05, "loss": 0.5514, "step": 10832 }, { "epoch": 0.22975122478844562, "grad_norm": 0.2903066873550415, "learning_rate": 1.9366320194372217e-05, "loss": 0.4607, "step": 10833 }, { "epoch": 0.22977243324637867, "grad_norm": 0.31393319368362427, "learning_rate": 1.936620336081534e-05, "loss": 0.5365, "step": 10834 }, { "epoch": 0.22979364170431169, "grad_norm": 0.36803027987480164, "learning_rate": 1.9366086516841455e-05, "loss": 0.5423, "step": 10835 }, { "epoch": 0.2298148501622447, "grad_norm": 0.5827233791351318, "learning_rate": 1.936596966245069e-05, "loss": 0.4631, "step": 10836 }, { "epoch": 0.22983605862017772, "grad_norm": 0.33051052689552307, "learning_rate": 1.9365852797643177e-05, "loss": 0.4471, "step": 10837 }, { "epoch": 0.22985726707811074, "grad_norm": 0.30666014552116394, "learning_rate": 1.936573592241904e-05, "loss": 0.5935, "step": 10838 }, { "epoch": 0.22987847553604376, "grad_norm": 0.359448105096817, "learning_rate": 1.936561903677842e-05, "loss": 0.4561, "step": 10839 }, { "epoch": 0.2298996839939768, "grad_norm": 0.33990490436553955, "learning_rate": 1.9365502140721434e-05, "loss": 0.4974, "step": 10840 }, { "epoch": 0.22992089245190983, "grad_norm": 0.3797559440135956, "learning_rate": 1.9365385234248217e-05, "loss": 0.4681, "step": 10841 }, { "epoch": 0.22994210090984285, "grad_norm": 0.8185741305351257, "learning_rate": 1.93652683173589e-05, "loss": 0.5169, "step": 10842 }, { "epoch": 0.22996330936777587, "grad_norm": 0.35266080498695374, "learning_rate": 1.9365151390053615e-05, "loss": 0.5694, "step": 10843 }, { "epoch": 0.2299845178257089, "grad_norm": 0.36081692576408386, "learning_rate": 1.936503445233249e-05, "loss": 0.5514, "step": 10844 }, { "epoch": 0.2300057262836419, "grad_norm": 0.35193586349487305, "learning_rate": 1.9364917504195655e-05, "loss": 0.492, "step": 10845 }, { "epoch": 0.23002693474157493, "grad_norm": 0.3141234517097473, "learning_rate": 1.9364800545643237e-05, "loss": 0.5469, "step": 10846 }, { "epoch": 0.23004814319950798, "grad_norm": 0.356216162443161, "learning_rate": 1.9364683576675373e-05, "loss": 0.532, "step": 10847 }, { "epoch": 0.230069351657441, "grad_norm": 0.3260294795036316, "learning_rate": 1.936456659729219e-05, "loss": 0.5282, "step": 10848 }, { "epoch": 0.23009056011537402, "grad_norm": 0.4111352860927582, "learning_rate": 1.9364449607493813e-05, "loss": 0.6388, "step": 10849 }, { "epoch": 0.23011176857330704, "grad_norm": 0.33117127418518066, "learning_rate": 1.936433260728038e-05, "loss": 0.5231, "step": 10850 }, { "epoch": 0.23013297703124005, "grad_norm": 0.3028222620487213, "learning_rate": 1.9364215596652015e-05, "loss": 0.5114, "step": 10851 }, { "epoch": 0.23015418548917307, "grad_norm": 0.36166954040527344, "learning_rate": 1.9364098575608853e-05, "loss": 0.5651, "step": 10852 }, { "epoch": 0.2301753939471061, "grad_norm": 0.36993885040283203, "learning_rate": 1.9363981544151022e-05, "loss": 0.4755, "step": 10853 }, { "epoch": 0.23019660240503914, "grad_norm": 0.3239046633243561, "learning_rate": 1.9363864502278653e-05, "loss": 0.5943, "step": 10854 }, { "epoch": 0.23021781086297216, "grad_norm": 0.3440162241458893, "learning_rate": 1.936374744999187e-05, "loss": 0.4893, "step": 10855 }, { "epoch": 0.23023901932090518, "grad_norm": 0.35377198457717896, "learning_rate": 1.9363630387290813e-05, "loss": 0.4733, "step": 10856 }, { "epoch": 0.2302602277788382, "grad_norm": 0.3780006766319275, "learning_rate": 1.9363513314175607e-05, "loss": 0.5386, "step": 10857 }, { "epoch": 0.23028143623677122, "grad_norm": 0.3121987581253052, "learning_rate": 1.9363396230646384e-05, "loss": 0.4924, "step": 10858 }, { "epoch": 0.23030264469470424, "grad_norm": 0.31646451354026794, "learning_rate": 1.936327913670327e-05, "loss": 0.4781, "step": 10859 }, { "epoch": 0.23032385315263726, "grad_norm": 0.33800622820854187, "learning_rate": 1.93631620323464e-05, "loss": 0.5442, "step": 10860 }, { "epoch": 0.2303450616105703, "grad_norm": 0.3649307191371918, "learning_rate": 1.9363044917575905e-05, "loss": 0.5207, "step": 10861 }, { "epoch": 0.23036627006850333, "grad_norm": 0.33195334672927856, "learning_rate": 1.936292779239191e-05, "loss": 0.605, "step": 10862 }, { "epoch": 0.23038747852643635, "grad_norm": 0.3410774767398834, "learning_rate": 1.9362810656794546e-05, "loss": 0.4484, "step": 10863 }, { "epoch": 0.23040868698436937, "grad_norm": 0.3838436007499695, "learning_rate": 1.9362693510783948e-05, "loss": 0.6085, "step": 10864 }, { "epoch": 0.23042989544230238, "grad_norm": 0.3880625367164612, "learning_rate": 1.9362576354360243e-05, "loss": 0.5969, "step": 10865 }, { "epoch": 0.2304511039002354, "grad_norm": 0.34183621406555176, "learning_rate": 1.9362459187523565e-05, "loss": 0.6096, "step": 10866 }, { "epoch": 0.23047231235816842, "grad_norm": 0.3443368077278137, "learning_rate": 1.9362342010274036e-05, "loss": 0.5295, "step": 10867 }, { "epoch": 0.23049352081610147, "grad_norm": 0.32035142183303833, "learning_rate": 1.9362224822611796e-05, "loss": 0.5722, "step": 10868 }, { "epoch": 0.2305147292740345, "grad_norm": 0.3512403666973114, "learning_rate": 1.9362107624536967e-05, "loss": 0.5361, "step": 10869 }, { "epoch": 0.2305359377319675, "grad_norm": 0.3103170096874237, "learning_rate": 1.9361990416049686e-05, "loss": 0.5494, "step": 10870 }, { "epoch": 0.23055714618990053, "grad_norm": 0.32368043065071106, "learning_rate": 1.9361873197150077e-05, "loss": 0.5712, "step": 10871 }, { "epoch": 0.23057835464783355, "grad_norm": 0.3468351364135742, "learning_rate": 1.936175596783828e-05, "loss": 0.5721, "step": 10872 }, { "epoch": 0.23059956310576657, "grad_norm": 0.3067508935928345, "learning_rate": 1.9361638728114412e-05, "loss": 0.5238, "step": 10873 }, { "epoch": 0.23062077156369962, "grad_norm": 0.31509754061698914, "learning_rate": 1.9361521477978617e-05, "loss": 0.486, "step": 10874 }, { "epoch": 0.23064198002163264, "grad_norm": 0.31685301661491394, "learning_rate": 1.9361404217431015e-05, "loss": 0.5788, "step": 10875 }, { "epoch": 0.23066318847956566, "grad_norm": 0.3431277573108673, "learning_rate": 1.936128694647174e-05, "loss": 0.4988, "step": 10876 }, { "epoch": 0.23068439693749868, "grad_norm": 0.32032251358032227, "learning_rate": 1.9361169665100928e-05, "loss": 0.4869, "step": 10877 }, { "epoch": 0.2307056053954317, "grad_norm": 0.34032174944877625, "learning_rate": 1.93610523733187e-05, "loss": 0.5823, "step": 10878 }, { "epoch": 0.23072681385336472, "grad_norm": 0.33234429359436035, "learning_rate": 1.9360935071125193e-05, "loss": 0.4862, "step": 10879 }, { "epoch": 0.23074802231129773, "grad_norm": 0.3122490644454956, "learning_rate": 1.9360817758520534e-05, "loss": 0.5479, "step": 10880 }, { "epoch": 0.23076923076923078, "grad_norm": 0.3025352656841278, "learning_rate": 1.936070043550486e-05, "loss": 0.5744, "step": 10881 }, { "epoch": 0.2307904392271638, "grad_norm": 0.3251987099647522, "learning_rate": 1.936058310207829e-05, "loss": 0.5231, "step": 10882 }, { "epoch": 0.23081164768509682, "grad_norm": 0.3102627992630005, "learning_rate": 1.9360465758240963e-05, "loss": 0.4866, "step": 10883 }, { "epoch": 0.23083285614302984, "grad_norm": 0.3104986250400543, "learning_rate": 1.9360348403993008e-05, "loss": 0.4606, "step": 10884 }, { "epoch": 0.23085406460096286, "grad_norm": 0.31613388657569885, "learning_rate": 1.9360231039334553e-05, "loss": 0.4772, "step": 10885 }, { "epoch": 0.23087527305889588, "grad_norm": 0.3230820596218109, "learning_rate": 1.936011366426573e-05, "loss": 0.4759, "step": 10886 }, { "epoch": 0.2308964815168289, "grad_norm": 0.32253995537757874, "learning_rate": 1.935999627878667e-05, "loss": 0.509, "step": 10887 }, { "epoch": 0.23091768997476195, "grad_norm": 0.3122303783893585, "learning_rate": 1.935987888289751e-05, "loss": 0.4832, "step": 10888 }, { "epoch": 0.23093889843269497, "grad_norm": 0.3252508044242859, "learning_rate": 1.9359761476598365e-05, "loss": 0.5541, "step": 10889 }, { "epoch": 0.230960106890628, "grad_norm": 0.33990731835365295, "learning_rate": 1.935964405988938e-05, "loss": 0.4264, "step": 10890 }, { "epoch": 0.230981315348561, "grad_norm": 0.38735756278038025, "learning_rate": 1.9359526632770675e-05, "loss": 0.5274, "step": 10891 }, { "epoch": 0.23100252380649403, "grad_norm": 0.29909518361091614, "learning_rate": 1.935940919524239e-05, "loss": 0.4682, "step": 10892 }, { "epoch": 0.23102373226442705, "grad_norm": 0.3675696849822998, "learning_rate": 1.935929174730465e-05, "loss": 0.5298, "step": 10893 }, { "epoch": 0.23104494072236006, "grad_norm": 0.3293059766292572, "learning_rate": 1.935917428895759e-05, "loss": 0.5777, "step": 10894 }, { "epoch": 0.2310661491802931, "grad_norm": 0.33493563532829285, "learning_rate": 1.9359056820201334e-05, "loss": 0.465, "step": 10895 }, { "epoch": 0.23108735763822613, "grad_norm": 0.34857964515686035, "learning_rate": 1.935893934103602e-05, "loss": 0.5676, "step": 10896 }, { "epoch": 0.23110856609615915, "grad_norm": 0.36256712675094604, "learning_rate": 1.935882185146177e-05, "loss": 0.4817, "step": 10897 }, { "epoch": 0.23112977455409217, "grad_norm": 0.3322659432888031, "learning_rate": 1.935870435147872e-05, "loss": 0.4526, "step": 10898 }, { "epoch": 0.2311509830120252, "grad_norm": 0.3608819842338562, "learning_rate": 1.9358586841087006e-05, "loss": 0.5047, "step": 10899 }, { "epoch": 0.2311721914699582, "grad_norm": 0.32488420605659485, "learning_rate": 1.935846932028675e-05, "loss": 0.4611, "step": 10900 }, { "epoch": 0.23119339992789123, "grad_norm": 0.3536836802959442, "learning_rate": 1.9358351789078082e-05, "loss": 0.6117, "step": 10901 }, { "epoch": 0.23121460838582428, "grad_norm": 0.676689088344574, "learning_rate": 1.935823424746114e-05, "loss": 0.4956, "step": 10902 }, { "epoch": 0.2312358168437573, "grad_norm": 0.32248827815055847, "learning_rate": 1.935811669543605e-05, "loss": 0.5125, "step": 10903 }, { "epoch": 0.23125702530169032, "grad_norm": 0.40540793538093567, "learning_rate": 1.9357999133002944e-05, "loss": 0.4979, "step": 10904 }, { "epoch": 0.23127823375962334, "grad_norm": 0.31619659066200256, "learning_rate": 1.9357881560161958e-05, "loss": 0.5142, "step": 10905 }, { "epoch": 0.23129944221755636, "grad_norm": 0.3247872292995453, "learning_rate": 1.935776397691321e-05, "loss": 0.4561, "step": 10906 }, { "epoch": 0.23132065067548938, "grad_norm": 0.3918183147907257, "learning_rate": 1.9357646383256843e-05, "loss": 0.4731, "step": 10907 }, { "epoch": 0.2313418591334224, "grad_norm": 0.40191546082496643, "learning_rate": 1.9357528779192976e-05, "loss": 0.5049, "step": 10908 }, { "epoch": 0.23136306759135544, "grad_norm": 0.3654775023460388, "learning_rate": 1.9357411164721754e-05, "loss": 0.506, "step": 10909 }, { "epoch": 0.23138427604928846, "grad_norm": 0.32865387201309204, "learning_rate": 1.93572935398433e-05, "loss": 0.4813, "step": 10910 }, { "epoch": 0.23140548450722148, "grad_norm": 0.31607335805892944, "learning_rate": 1.9357175904557743e-05, "loss": 0.549, "step": 10911 }, { "epoch": 0.2314266929651545, "grad_norm": 0.3226865530014038, "learning_rate": 1.9357058258865215e-05, "loss": 0.5841, "step": 10912 }, { "epoch": 0.23144790142308752, "grad_norm": 0.35747969150543213, "learning_rate": 1.9356940602765854e-05, "loss": 0.5609, "step": 10913 }, { "epoch": 0.23146910988102054, "grad_norm": 0.3911101222038269, "learning_rate": 1.935682293625978e-05, "loss": 0.5638, "step": 10914 }, { "epoch": 0.2314903183389536, "grad_norm": 0.2963898777961731, "learning_rate": 1.9356705259347127e-05, "loss": 0.4422, "step": 10915 }, { "epoch": 0.2315115267968866, "grad_norm": 0.3377787470817566, "learning_rate": 1.9356587572028032e-05, "loss": 0.5375, "step": 10916 }, { "epoch": 0.23153273525481963, "grad_norm": 0.33037567138671875, "learning_rate": 1.935646987430262e-05, "loss": 0.5, "step": 10917 }, { "epoch": 0.23155394371275265, "grad_norm": 0.32757312059402466, "learning_rate": 1.9356352166171024e-05, "loss": 0.533, "step": 10918 }, { "epoch": 0.23157515217068567, "grad_norm": 0.47163504362106323, "learning_rate": 1.9356234447633374e-05, "loss": 0.5049, "step": 10919 }, { "epoch": 0.23159636062861869, "grad_norm": 0.37749040126800537, "learning_rate": 1.93561167186898e-05, "loss": 0.5046, "step": 10920 }, { "epoch": 0.2316175690865517, "grad_norm": 0.374796599149704, "learning_rate": 1.9355998979340436e-05, "loss": 0.499, "step": 10921 }, { "epoch": 0.23163877754448475, "grad_norm": 0.3615482747554779, "learning_rate": 1.935588122958541e-05, "loss": 0.4621, "step": 10922 }, { "epoch": 0.23165998600241777, "grad_norm": 0.3388071358203888, "learning_rate": 1.9355763469424857e-05, "loss": 0.5716, "step": 10923 }, { "epoch": 0.2316811944603508, "grad_norm": 0.3405992388725281, "learning_rate": 1.9355645698858903e-05, "loss": 0.486, "step": 10924 }, { "epoch": 0.2317024029182838, "grad_norm": 0.30741772055625916, "learning_rate": 1.935552791788768e-05, "loss": 0.4767, "step": 10925 }, { "epoch": 0.23172361137621683, "grad_norm": 0.2955606281757355, "learning_rate": 1.935541012651132e-05, "loss": 0.3457, "step": 10926 }, { "epoch": 0.23174481983414985, "grad_norm": 0.3222302496433258, "learning_rate": 1.9355292324729957e-05, "loss": 0.5506, "step": 10927 }, { "epoch": 0.23176602829208287, "grad_norm": 0.3257240653038025, "learning_rate": 1.935517451254372e-05, "loss": 0.4854, "step": 10928 }, { "epoch": 0.23178723675001592, "grad_norm": 0.3241129219532013, "learning_rate": 1.9355056689952737e-05, "loss": 0.5637, "step": 10929 }, { "epoch": 0.23180844520794894, "grad_norm": 0.347190797328949, "learning_rate": 1.9354938856957138e-05, "loss": 0.4933, "step": 10930 }, { "epoch": 0.23182965366588196, "grad_norm": 0.3277919888496399, "learning_rate": 1.935482101355706e-05, "loss": 0.4424, "step": 10931 }, { "epoch": 0.23185086212381498, "grad_norm": 0.33186450600624084, "learning_rate": 1.9354703159752634e-05, "loss": 0.5141, "step": 10932 }, { "epoch": 0.231872070581748, "grad_norm": 0.30965808033943176, "learning_rate": 1.9354585295543983e-05, "loss": 0.4846, "step": 10933 }, { "epoch": 0.23189327903968102, "grad_norm": 0.3573049008846283, "learning_rate": 1.935446742093125e-05, "loss": 0.518, "step": 10934 }, { "epoch": 0.23191448749761404, "grad_norm": 1.0728758573532104, "learning_rate": 1.9354349535914557e-05, "loss": 0.4756, "step": 10935 }, { "epoch": 0.23193569595554708, "grad_norm": 0.31626787781715393, "learning_rate": 1.9354231640494036e-05, "loss": 0.4493, "step": 10936 }, { "epoch": 0.2319569044134801, "grad_norm": 0.3412128984928131, "learning_rate": 1.935411373466982e-05, "loss": 0.452, "step": 10937 }, { "epoch": 0.23197811287141312, "grad_norm": 0.35534173250198364, "learning_rate": 1.935399581844204e-05, "loss": 0.5377, "step": 10938 }, { "epoch": 0.23199932132934614, "grad_norm": 0.346332848072052, "learning_rate": 1.9353877891810827e-05, "loss": 0.5765, "step": 10939 }, { "epoch": 0.23202052978727916, "grad_norm": 0.340966135263443, "learning_rate": 1.9353759954776313e-05, "loss": 0.536, "step": 10940 }, { "epoch": 0.23204173824521218, "grad_norm": 0.3486384153366089, "learning_rate": 1.9353642007338628e-05, "loss": 0.4937, "step": 10941 }, { "epoch": 0.2320629467031452, "grad_norm": 0.31657662987709045, "learning_rate": 1.9353524049497904e-05, "loss": 0.524, "step": 10942 }, { "epoch": 0.23208415516107825, "grad_norm": 0.3545374274253845, "learning_rate": 1.9353406081254275e-05, "loss": 0.5182, "step": 10943 }, { "epoch": 0.23210536361901127, "grad_norm": 0.32354822754859924, "learning_rate": 1.9353288102607864e-05, "loss": 0.4966, "step": 10944 }, { "epoch": 0.2321265720769443, "grad_norm": 0.30562934279441833, "learning_rate": 1.9353170113558808e-05, "loss": 0.4602, "step": 10945 }, { "epoch": 0.2321477805348773, "grad_norm": 0.3378331661224365, "learning_rate": 1.935305211410724e-05, "loss": 0.4477, "step": 10946 }, { "epoch": 0.23216898899281033, "grad_norm": 0.36359843611717224, "learning_rate": 1.9352934104253288e-05, "loss": 0.5387, "step": 10947 }, { "epoch": 0.23219019745074335, "grad_norm": 0.43639224767684937, "learning_rate": 1.9352816083997082e-05, "loss": 0.4701, "step": 10948 }, { "epoch": 0.2322114059086764, "grad_norm": 0.4199211895465851, "learning_rate": 1.9352698053338755e-05, "loss": 0.4887, "step": 10949 }, { "epoch": 0.2322326143666094, "grad_norm": 0.31599387526512146, "learning_rate": 1.935258001227844e-05, "loss": 0.4901, "step": 10950 }, { "epoch": 0.23225382282454243, "grad_norm": 0.44535115361213684, "learning_rate": 1.935246196081627e-05, "loss": 0.5205, "step": 10951 }, { "epoch": 0.23227503128247545, "grad_norm": 0.35007143020629883, "learning_rate": 1.9352343898952366e-05, "loss": 0.4064, "step": 10952 }, { "epoch": 0.23229623974040847, "grad_norm": 1.1550796031951904, "learning_rate": 1.9352225826686873e-05, "loss": 0.4307, "step": 10953 }, { "epoch": 0.2323174481983415, "grad_norm": 0.35879603028297424, "learning_rate": 1.9352107744019914e-05, "loss": 0.6089, "step": 10954 }, { "epoch": 0.2323386566562745, "grad_norm": 0.32122910022735596, "learning_rate": 1.935198965095162e-05, "loss": 0.4511, "step": 10955 }, { "epoch": 0.23235986511420756, "grad_norm": 0.3257702589035034, "learning_rate": 1.935187154748212e-05, "loss": 0.5217, "step": 10956 }, { "epoch": 0.23238107357214058, "grad_norm": 0.3214726448059082, "learning_rate": 1.9351753433611556e-05, "loss": 0.5075, "step": 10957 }, { "epoch": 0.2324022820300736, "grad_norm": 0.33215558528900146, "learning_rate": 1.9351635309340053e-05, "loss": 0.4999, "step": 10958 }, { "epoch": 0.23242349048800662, "grad_norm": 0.3835143446922302, "learning_rate": 1.935151717466774e-05, "loss": 0.5533, "step": 10959 }, { "epoch": 0.23244469894593964, "grad_norm": 0.3438129723072052, "learning_rate": 1.935139902959475e-05, "loss": 0.5146, "step": 10960 }, { "epoch": 0.23246590740387266, "grad_norm": 0.30994266271591187, "learning_rate": 1.935128087412122e-05, "loss": 0.519, "step": 10961 }, { "epoch": 0.23248711586180568, "grad_norm": 0.3894325792789459, "learning_rate": 1.9351162708247274e-05, "loss": 0.5485, "step": 10962 }, { "epoch": 0.23250832431973872, "grad_norm": 0.37624379992485046, "learning_rate": 1.9351044531973047e-05, "loss": 0.5177, "step": 10963 }, { "epoch": 0.23252953277767174, "grad_norm": 0.40838930010795593, "learning_rate": 1.9350926345298672e-05, "loss": 0.524, "step": 10964 }, { "epoch": 0.23255074123560476, "grad_norm": 0.3465306758880615, "learning_rate": 1.935080814822427e-05, "loss": 0.5415, "step": 10965 }, { "epoch": 0.23257194969353778, "grad_norm": 0.35129314661026, "learning_rate": 1.9350689940749988e-05, "loss": 0.4943, "step": 10966 }, { "epoch": 0.2325931581514708, "grad_norm": 0.33723023533821106, "learning_rate": 1.9350571722875947e-05, "loss": 0.5007, "step": 10967 }, { "epoch": 0.23261436660940382, "grad_norm": 0.31152254343032837, "learning_rate": 1.935045349460228e-05, "loss": 0.5654, "step": 10968 }, { "epoch": 0.23263557506733684, "grad_norm": 0.35820272564888, "learning_rate": 1.935033525592912e-05, "loss": 0.524, "step": 10969 }, { "epoch": 0.2326567835252699, "grad_norm": 0.3208778202533722, "learning_rate": 1.93502170068566e-05, "loss": 0.6052, "step": 10970 }, { "epoch": 0.2326779919832029, "grad_norm": 0.38363221287727356, "learning_rate": 1.9350098747384848e-05, "loss": 0.5676, "step": 10971 }, { "epoch": 0.23269920044113593, "grad_norm": 0.3469752371311188, "learning_rate": 1.9349980477514002e-05, "loss": 0.5383, "step": 10972 }, { "epoch": 0.23272040889906895, "grad_norm": 0.56184321641922, "learning_rate": 1.9349862197244184e-05, "loss": 0.4881, "step": 10973 }, { "epoch": 0.23274161735700197, "grad_norm": 0.41809678077697754, "learning_rate": 1.934974390657553e-05, "loss": 0.5563, "step": 10974 }, { "epoch": 0.232762825814935, "grad_norm": 0.3995872735977173, "learning_rate": 1.9349625605508174e-05, "loss": 0.5021, "step": 10975 }, { "epoch": 0.232784034272868, "grad_norm": 0.3421217203140259, "learning_rate": 1.9349507294042247e-05, "loss": 0.5205, "step": 10976 }, { "epoch": 0.23280524273080105, "grad_norm": 0.3442528247833252, "learning_rate": 1.934938897217788e-05, "loss": 0.5707, "step": 10977 }, { "epoch": 0.23282645118873407, "grad_norm": 0.3886580467224121, "learning_rate": 1.93492706399152e-05, "loss": 0.4775, "step": 10978 }, { "epoch": 0.2328476596466671, "grad_norm": 0.4449678063392639, "learning_rate": 1.9349152297254344e-05, "loss": 0.555, "step": 10979 }, { "epoch": 0.2328688681046001, "grad_norm": 0.3381876051425934, "learning_rate": 1.934903394419544e-05, "loss": 0.5496, "step": 10980 }, { "epoch": 0.23289007656253313, "grad_norm": 0.36465486884117126, "learning_rate": 1.9348915580738628e-05, "loss": 0.5726, "step": 10981 }, { "epoch": 0.23291128502046615, "grad_norm": 0.3674522638320923, "learning_rate": 1.934879720688403e-05, "loss": 0.5241, "step": 10982 }, { "epoch": 0.23293249347839917, "grad_norm": 0.3934065103530884, "learning_rate": 1.9348678822631778e-05, "loss": 0.583, "step": 10983 }, { "epoch": 0.23295370193633222, "grad_norm": 0.3146085739135742, "learning_rate": 1.9348560427982008e-05, "loss": 0.4598, "step": 10984 }, { "epoch": 0.23297491039426524, "grad_norm": 0.34522420167922974, "learning_rate": 1.934844202293485e-05, "loss": 0.4461, "step": 10985 }, { "epoch": 0.23299611885219826, "grad_norm": 0.38540923595428467, "learning_rate": 1.934832360749044e-05, "loss": 0.5174, "step": 10986 }, { "epoch": 0.23301732731013128, "grad_norm": 0.3355128765106201, "learning_rate": 1.93482051816489e-05, "loss": 0.5005, "step": 10987 }, { "epoch": 0.2330385357680643, "grad_norm": 0.37947985529899597, "learning_rate": 1.9348086745410373e-05, "loss": 0.4986, "step": 10988 }, { "epoch": 0.23305974422599732, "grad_norm": 0.32035425305366516, "learning_rate": 1.934796829877498e-05, "loss": 0.4859, "step": 10989 }, { "epoch": 0.23308095268393036, "grad_norm": 0.4228648841381073, "learning_rate": 1.934784984174286e-05, "loss": 0.547, "step": 10990 }, { "epoch": 0.23310216114186338, "grad_norm": 0.3269408643245697, "learning_rate": 1.934773137431414e-05, "loss": 0.4467, "step": 10991 }, { "epoch": 0.2331233695997964, "grad_norm": 0.3455809950828552, "learning_rate": 1.9347612896488962e-05, "loss": 0.6038, "step": 10992 }, { "epoch": 0.23314457805772942, "grad_norm": 0.3137528598308563, "learning_rate": 1.9347494408267445e-05, "loss": 0.4607, "step": 10993 }, { "epoch": 0.23316578651566244, "grad_norm": 0.42604488134384155, "learning_rate": 1.9347375909649724e-05, "loss": 0.6013, "step": 10994 }, { "epoch": 0.23318699497359546, "grad_norm": 0.33405396342277527, "learning_rate": 1.9347257400635934e-05, "loss": 0.4513, "step": 10995 }, { "epoch": 0.23320820343152848, "grad_norm": 0.3235195279121399, "learning_rate": 1.9347138881226207e-05, "loss": 0.4424, "step": 10996 }, { "epoch": 0.23322941188946153, "grad_norm": 0.3349977433681488, "learning_rate": 1.9347020351420672e-05, "loss": 0.4895, "step": 10997 }, { "epoch": 0.23325062034739455, "grad_norm": 0.3028920590877533, "learning_rate": 1.9346901811219465e-05, "loss": 0.4925, "step": 10998 }, { "epoch": 0.23327182880532757, "grad_norm": 0.3824363350868225, "learning_rate": 1.9346783260622713e-05, "loss": 0.6637, "step": 10999 }, { "epoch": 0.2332930372632606, "grad_norm": 0.3341761529445648, "learning_rate": 1.934666469963055e-05, "loss": 0.3823, "step": 11000 }, { "epoch": 0.2333142457211936, "grad_norm": 0.3253323435783386, "learning_rate": 1.9346546128243107e-05, "loss": 0.44, "step": 11001 }, { "epoch": 0.23333545417912663, "grad_norm": 0.6415635347366333, "learning_rate": 1.9346427546460516e-05, "loss": 0.5415, "step": 11002 }, { "epoch": 0.23335666263705965, "grad_norm": 0.34072181582450867, "learning_rate": 1.934630895428291e-05, "loss": 0.507, "step": 11003 }, { "epoch": 0.2333778710949927, "grad_norm": 0.32258304953575134, "learning_rate": 1.934619035171042e-05, "loss": 0.5117, "step": 11004 }, { "epoch": 0.23339907955292571, "grad_norm": 0.37220442295074463, "learning_rate": 1.934607173874318e-05, "loss": 0.508, "step": 11005 }, { "epoch": 0.23342028801085873, "grad_norm": 0.3496266007423401, "learning_rate": 1.9345953115381323e-05, "loss": 0.5476, "step": 11006 }, { "epoch": 0.23344149646879175, "grad_norm": 0.322431743144989, "learning_rate": 1.9345834481624973e-05, "loss": 0.5135, "step": 11007 }, { "epoch": 0.23346270492672477, "grad_norm": 0.3191261291503906, "learning_rate": 1.934571583747427e-05, "loss": 0.523, "step": 11008 }, { "epoch": 0.2334839133846578, "grad_norm": 0.5439733266830444, "learning_rate": 1.934559718292934e-05, "loss": 0.5443, "step": 11009 }, { "epoch": 0.2335051218425908, "grad_norm": 0.32928115129470825, "learning_rate": 1.934547851799032e-05, "loss": 0.5955, "step": 11010 }, { "epoch": 0.23352633030052386, "grad_norm": 0.37120628356933594, "learning_rate": 1.934535984265734e-05, "loss": 0.5747, "step": 11011 }, { "epoch": 0.23354753875845688, "grad_norm": 0.3336915075778961, "learning_rate": 1.934524115693053e-05, "loss": 0.5586, "step": 11012 }, { "epoch": 0.2335687472163899, "grad_norm": 0.5083322525024414, "learning_rate": 1.9345122460810025e-05, "loss": 0.5275, "step": 11013 }, { "epoch": 0.23358995567432292, "grad_norm": 0.3078511357307434, "learning_rate": 1.934500375429596e-05, "loss": 0.4895, "step": 11014 }, { "epoch": 0.23361116413225594, "grad_norm": 0.5905048847198486, "learning_rate": 1.934488503738846e-05, "loss": 0.4309, "step": 11015 }, { "epoch": 0.23363237259018896, "grad_norm": 0.3102923035621643, "learning_rate": 1.934476631008766e-05, "loss": 0.5348, "step": 11016 }, { "epoch": 0.23365358104812198, "grad_norm": 0.35425081849098206, "learning_rate": 1.9344647572393688e-05, "loss": 0.5415, "step": 11017 }, { "epoch": 0.23367478950605503, "grad_norm": 0.33500152826309204, "learning_rate": 1.9344528824306685e-05, "loss": 0.6027, "step": 11018 }, { "epoch": 0.23369599796398804, "grad_norm": 0.3234853744506836, "learning_rate": 1.9344410065826777e-05, "loss": 0.5244, "step": 11019 }, { "epoch": 0.23371720642192106, "grad_norm": 0.33874407410621643, "learning_rate": 1.9344291296954097e-05, "loss": 0.5229, "step": 11020 }, { "epoch": 0.23373841487985408, "grad_norm": 0.3191481828689575, "learning_rate": 1.9344172517688777e-05, "loss": 0.4543, "step": 11021 }, { "epoch": 0.2337596233377871, "grad_norm": 0.3819323480129242, "learning_rate": 1.9344053728030952e-05, "loss": 0.5742, "step": 11022 }, { "epoch": 0.23378083179572012, "grad_norm": 0.4118969440460205, "learning_rate": 1.934393492798075e-05, "loss": 0.548, "step": 11023 }, { "epoch": 0.23380204025365317, "grad_norm": 0.4047975242137909, "learning_rate": 1.9343816117538306e-05, "loss": 0.526, "step": 11024 }, { "epoch": 0.2338232487115862, "grad_norm": 0.3753792643547058, "learning_rate": 1.9343697296703747e-05, "loss": 0.5668, "step": 11025 }, { "epoch": 0.2338444571695192, "grad_norm": 0.3216213285923004, "learning_rate": 1.9343578465477214e-05, "loss": 0.482, "step": 11026 }, { "epoch": 0.23386566562745223, "grad_norm": 0.32581308484077454, "learning_rate": 1.934345962385883e-05, "loss": 0.5815, "step": 11027 }, { "epoch": 0.23388687408538525, "grad_norm": 0.30036741495132446, "learning_rate": 1.9343340771848732e-05, "loss": 0.4901, "step": 11028 }, { "epoch": 0.23390808254331827, "grad_norm": 0.465470552444458, "learning_rate": 1.9343221909447053e-05, "loss": 0.4915, "step": 11029 }, { "epoch": 0.2339292910012513, "grad_norm": 0.3206412196159363, "learning_rate": 1.9343103036653924e-05, "loss": 0.4821, "step": 11030 }, { "epoch": 0.23395049945918434, "grad_norm": 0.4472370743751526, "learning_rate": 1.934298415346948e-05, "loss": 0.5617, "step": 11031 }, { "epoch": 0.23397170791711736, "grad_norm": 0.3147977292537689, "learning_rate": 1.9342865259893844e-05, "loss": 0.5387, "step": 11032 }, { "epoch": 0.23399291637505037, "grad_norm": 0.34110063314437866, "learning_rate": 1.9342746355927157e-05, "loss": 0.4655, "step": 11033 }, { "epoch": 0.2340141248329834, "grad_norm": 0.3389330804347992, "learning_rate": 1.934262744156955e-05, "loss": 0.497, "step": 11034 }, { "epoch": 0.23403533329091641, "grad_norm": 0.32203784584999084, "learning_rate": 1.934250851682115e-05, "loss": 0.5285, "step": 11035 }, { "epoch": 0.23405654174884943, "grad_norm": 0.3258395195007324, "learning_rate": 1.9342389581682095e-05, "loss": 0.4705, "step": 11036 }, { "epoch": 0.23407775020678245, "grad_norm": 0.38874053955078125, "learning_rate": 1.934227063615252e-05, "loss": 0.524, "step": 11037 }, { "epoch": 0.2340989586647155, "grad_norm": 0.3300250768661499, "learning_rate": 1.9342151680232545e-05, "loss": 0.5564, "step": 11038 }, { "epoch": 0.23412016712264852, "grad_norm": 0.3419240415096283, "learning_rate": 1.9342032713922316e-05, "loss": 0.46, "step": 11039 }, { "epoch": 0.23414137558058154, "grad_norm": 0.3158418536186218, "learning_rate": 1.9341913737221957e-05, "loss": 0.4475, "step": 11040 }, { "epoch": 0.23416258403851456, "grad_norm": 0.3102789521217346, "learning_rate": 1.9341794750131604e-05, "loss": 0.5078, "step": 11041 }, { "epoch": 0.23418379249644758, "grad_norm": 0.37532228231430054, "learning_rate": 1.9341675752651386e-05, "loss": 0.6004, "step": 11042 }, { "epoch": 0.2342050009543806, "grad_norm": 0.30404970049858093, "learning_rate": 1.9341556744781438e-05, "loss": 0.5155, "step": 11043 }, { "epoch": 0.23422620941231362, "grad_norm": 0.3883865177631378, "learning_rate": 1.9341437726521892e-05, "loss": 0.5273, "step": 11044 }, { "epoch": 0.23424741787024667, "grad_norm": 0.31043845415115356, "learning_rate": 1.934131869787288e-05, "loss": 0.5196, "step": 11045 }, { "epoch": 0.23426862632817969, "grad_norm": 0.35455816984176636, "learning_rate": 1.9341199658834536e-05, "loss": 0.5962, "step": 11046 }, { "epoch": 0.2342898347861127, "grad_norm": 0.42815715074539185, "learning_rate": 1.934108060940699e-05, "loss": 0.5597, "step": 11047 }, { "epoch": 0.23431104324404572, "grad_norm": 0.4121066927909851, "learning_rate": 1.9340961549590375e-05, "loss": 0.5076, "step": 11048 }, { "epoch": 0.23433225170197874, "grad_norm": 0.3416856825351715, "learning_rate": 1.9340842479384823e-05, "loss": 0.5224, "step": 11049 }, { "epoch": 0.23435346015991176, "grad_norm": 0.3648418188095093, "learning_rate": 1.934072339879047e-05, "loss": 0.6262, "step": 11050 }, { "epoch": 0.23437466861784478, "grad_norm": 0.36726996302604675, "learning_rate": 1.9340604307807443e-05, "loss": 0.5559, "step": 11051 }, { "epoch": 0.23439587707577783, "grad_norm": 0.34216490387916565, "learning_rate": 1.9340485206435876e-05, "loss": 0.4934, "step": 11052 }, { "epoch": 0.23441708553371085, "grad_norm": 0.3045426309108734, "learning_rate": 1.9340366094675904e-05, "loss": 0.4522, "step": 11053 }, { "epoch": 0.23443829399164387, "grad_norm": 0.3731829822063446, "learning_rate": 1.934024697252766e-05, "loss": 0.509, "step": 11054 }, { "epoch": 0.2344595024495769, "grad_norm": 0.3497096598148346, "learning_rate": 1.9340127839991273e-05, "loss": 0.5062, "step": 11055 }, { "epoch": 0.2344807109075099, "grad_norm": 0.3758322298526764, "learning_rate": 1.9340008697066875e-05, "loss": 0.5228, "step": 11056 }, { "epoch": 0.23450191936544293, "grad_norm": 0.3303760588169098, "learning_rate": 1.93398895437546e-05, "loss": 0.5641, "step": 11057 }, { "epoch": 0.23452312782337595, "grad_norm": 0.33379560708999634, "learning_rate": 1.9339770380054585e-05, "loss": 0.5273, "step": 11058 }, { "epoch": 0.234544336281309, "grad_norm": 0.35344651341438293, "learning_rate": 1.9339651205966957e-05, "loss": 0.4972, "step": 11059 }, { "epoch": 0.23456554473924202, "grad_norm": 0.3142872154712677, "learning_rate": 1.9339532021491846e-05, "loss": 0.5263, "step": 11060 }, { "epoch": 0.23458675319717504, "grad_norm": 0.366706907749176, "learning_rate": 1.9339412826629393e-05, "loss": 0.4945, "step": 11061 }, { "epoch": 0.23460796165510805, "grad_norm": 0.3252348005771637, "learning_rate": 1.9339293621379725e-05, "loss": 0.5303, "step": 11062 }, { "epoch": 0.23462917011304107, "grad_norm": 0.31014201045036316, "learning_rate": 1.9339174405742976e-05, "loss": 0.448, "step": 11063 }, { "epoch": 0.2346503785709741, "grad_norm": 0.43769362568855286, "learning_rate": 1.933905517971928e-05, "loss": 0.4956, "step": 11064 }, { "epoch": 0.23467158702890714, "grad_norm": 0.3646015226840973, "learning_rate": 1.9338935943308766e-05, "loss": 0.5117, "step": 11065 }, { "epoch": 0.23469279548684016, "grad_norm": 0.3474598824977875, "learning_rate": 1.933881669651157e-05, "loss": 0.4581, "step": 11066 }, { "epoch": 0.23471400394477318, "grad_norm": 0.3605497479438782, "learning_rate": 1.933869743932782e-05, "loss": 0.5149, "step": 11067 }, { "epoch": 0.2347352124027062, "grad_norm": 0.3336057662963867, "learning_rate": 1.9338578171757653e-05, "loss": 0.562, "step": 11068 }, { "epoch": 0.23475642086063922, "grad_norm": 0.3608788549900055, "learning_rate": 1.93384588938012e-05, "loss": 0.5319, "step": 11069 }, { "epoch": 0.23477762931857224, "grad_norm": 0.364930659532547, "learning_rate": 1.93383396054586e-05, "loss": 0.4943, "step": 11070 }, { "epoch": 0.23479883777650526, "grad_norm": 0.32311567664146423, "learning_rate": 1.9338220306729975e-05, "loss": 0.4893, "step": 11071 }, { "epoch": 0.2348200462344383, "grad_norm": 0.33758077025413513, "learning_rate": 1.933810099761546e-05, "loss": 0.5556, "step": 11072 }, { "epoch": 0.23484125469237133, "grad_norm": 0.3477874994277954, "learning_rate": 1.9337981678115196e-05, "loss": 0.5199, "step": 11073 }, { "epoch": 0.23486246315030435, "grad_norm": 0.31871360540390015, "learning_rate": 1.9337862348229305e-05, "loss": 0.4733, "step": 11074 }, { "epoch": 0.23488367160823737, "grad_norm": 0.35540616512298584, "learning_rate": 1.9337743007957926e-05, "loss": 0.5265, "step": 11075 }, { "epoch": 0.23490488006617039, "grad_norm": 0.503131628036499, "learning_rate": 1.933762365730119e-05, "loss": 0.4714, "step": 11076 }, { "epoch": 0.2349260885241034, "grad_norm": 0.40078020095825195, "learning_rate": 1.9337504296259233e-05, "loss": 0.5338, "step": 11077 }, { "epoch": 0.23494729698203642, "grad_norm": 0.3259495198726654, "learning_rate": 1.9337384924832183e-05, "loss": 0.457, "step": 11078 }, { "epoch": 0.23496850543996947, "grad_norm": 0.42534634470939636, "learning_rate": 1.9337265543020174e-05, "loss": 0.4637, "step": 11079 }, { "epoch": 0.2349897138979025, "grad_norm": 0.3330209255218506, "learning_rate": 1.9337146150823338e-05, "loss": 0.5381, "step": 11080 }, { "epoch": 0.2350109223558355, "grad_norm": 0.31399762630462646, "learning_rate": 1.9337026748241813e-05, "loss": 0.5294, "step": 11081 }, { "epoch": 0.23503213081376853, "grad_norm": 0.36672714352607727, "learning_rate": 1.9336907335275726e-05, "loss": 0.5535, "step": 11082 }, { "epoch": 0.23505333927170155, "grad_norm": 0.3417290151119232, "learning_rate": 1.933678791192521e-05, "loss": 0.5106, "step": 11083 }, { "epoch": 0.23507454772963457, "grad_norm": 0.3041567802429199, "learning_rate": 1.93366684781904e-05, "loss": 0.4285, "step": 11084 }, { "epoch": 0.2350957561875676, "grad_norm": 0.34618788957595825, "learning_rate": 1.9336549034071434e-05, "loss": 0.5437, "step": 11085 }, { "epoch": 0.23511696464550064, "grad_norm": 0.3646630644798279, "learning_rate": 1.9336429579568433e-05, "loss": 0.4722, "step": 11086 }, { "epoch": 0.23513817310343366, "grad_norm": 0.3424612879753113, "learning_rate": 1.933631011468154e-05, "loss": 0.4947, "step": 11087 }, { "epoch": 0.23515938156136668, "grad_norm": 0.31050384044647217, "learning_rate": 1.933619063941088e-05, "loss": 0.4997, "step": 11088 }, { "epoch": 0.2351805900192997, "grad_norm": 0.5670024752616882, "learning_rate": 1.9336071153756592e-05, "loss": 0.476, "step": 11089 }, { "epoch": 0.23520179847723272, "grad_norm": 0.34201306104660034, "learning_rate": 1.933595165771881e-05, "loss": 0.5463, "step": 11090 }, { "epoch": 0.23522300693516573, "grad_norm": 0.34962546825408936, "learning_rate": 1.9335832151297658e-05, "loss": 0.5641, "step": 11091 }, { "epoch": 0.23524421539309875, "grad_norm": 0.383352667093277, "learning_rate": 1.9335712634493277e-05, "loss": 0.61, "step": 11092 }, { "epoch": 0.2352654238510318, "grad_norm": 0.31890958547592163, "learning_rate": 1.9335593107305796e-05, "loss": 0.5561, "step": 11093 }, { "epoch": 0.23528663230896482, "grad_norm": 0.33508017659187317, "learning_rate": 1.933547356973535e-05, "loss": 0.447, "step": 11094 }, { "epoch": 0.23530784076689784, "grad_norm": 0.31388723850250244, "learning_rate": 1.9335354021782074e-05, "loss": 0.55, "step": 11095 }, { "epoch": 0.23532904922483086, "grad_norm": 0.3205633759498596, "learning_rate": 1.9335234463446095e-05, "loss": 0.4747, "step": 11096 }, { "epoch": 0.23535025768276388, "grad_norm": 0.33563295006752014, "learning_rate": 1.9335114894727552e-05, "loss": 0.5736, "step": 11097 }, { "epoch": 0.2353714661406969, "grad_norm": 0.38830074667930603, "learning_rate": 1.9334995315626574e-05, "loss": 0.6047, "step": 11098 }, { "epoch": 0.23539267459862995, "grad_norm": 0.45202863216400146, "learning_rate": 1.9334875726143296e-05, "loss": 0.6155, "step": 11099 }, { "epoch": 0.23541388305656297, "grad_norm": 0.3869280517101288, "learning_rate": 1.933475612627785e-05, "loss": 0.5232, "step": 11100 }, { "epoch": 0.235435091514496, "grad_norm": 0.32055267691612244, "learning_rate": 1.933463651603037e-05, "loss": 0.4874, "step": 11101 }, { "epoch": 0.235456299972429, "grad_norm": 0.33152419328689575, "learning_rate": 1.9334516895400988e-05, "loss": 0.5422, "step": 11102 }, { "epoch": 0.23547750843036203, "grad_norm": 0.30970481038093567, "learning_rate": 1.9334397264389836e-05, "loss": 0.5046, "step": 11103 }, { "epoch": 0.23549871688829505, "grad_norm": 0.3434596657752991, "learning_rate": 1.933427762299705e-05, "loss": 0.5137, "step": 11104 }, { "epoch": 0.23551992534622807, "grad_norm": 0.3426278531551361, "learning_rate": 1.933415797122276e-05, "loss": 0.5787, "step": 11105 }, { "epoch": 0.2355411338041611, "grad_norm": 0.31302228569984436, "learning_rate": 1.9334038309067102e-05, "loss": 0.5181, "step": 11106 }, { "epoch": 0.23556234226209413, "grad_norm": 0.3650593161582947, "learning_rate": 1.9333918636530207e-05, "loss": 0.5212, "step": 11107 }, { "epoch": 0.23558355072002715, "grad_norm": 0.36555948853492737, "learning_rate": 1.933379895361221e-05, "loss": 0.589, "step": 11108 }, { "epoch": 0.23560475917796017, "grad_norm": 0.3128397762775421, "learning_rate": 1.9333679260313242e-05, "loss": 0.4718, "step": 11109 }, { "epoch": 0.2356259676358932, "grad_norm": 0.3110911548137665, "learning_rate": 1.9333559556633436e-05, "loss": 0.4508, "step": 11110 }, { "epoch": 0.2356471760938262, "grad_norm": 0.3240465521812439, "learning_rate": 1.9333439842572926e-05, "loss": 0.4813, "step": 11111 }, { "epoch": 0.23566838455175923, "grad_norm": 0.30609822273254395, "learning_rate": 1.9333320118131848e-05, "loss": 0.5118, "step": 11112 }, { "epoch": 0.23568959300969228, "grad_norm": 0.3170422613620758, "learning_rate": 1.933320038331033e-05, "loss": 0.4636, "step": 11113 }, { "epoch": 0.2357108014676253, "grad_norm": 0.3341202735900879, "learning_rate": 1.9333080638108508e-05, "loss": 0.4552, "step": 11114 }, { "epoch": 0.23573200992555832, "grad_norm": 0.3128845691680908, "learning_rate": 1.9332960882526518e-05, "loss": 0.4696, "step": 11115 }, { "epoch": 0.23575321838349134, "grad_norm": 0.3442230224609375, "learning_rate": 1.9332841116564488e-05, "loss": 0.5136, "step": 11116 }, { "epoch": 0.23577442684142436, "grad_norm": 0.32318314909935, "learning_rate": 1.933272134022255e-05, "loss": 0.5531, "step": 11117 }, { "epoch": 0.23579563529935738, "grad_norm": 0.304988831281662, "learning_rate": 1.9332601553500843e-05, "loss": 0.5175, "step": 11118 }, { "epoch": 0.2358168437572904, "grad_norm": 0.4056169390678406, "learning_rate": 1.9332481756399497e-05, "loss": 0.5465, "step": 11119 }, { "epoch": 0.23583805221522344, "grad_norm": 0.34154248237609863, "learning_rate": 1.9332361948918646e-05, "loss": 0.5889, "step": 11120 }, { "epoch": 0.23585926067315646, "grad_norm": 0.33306097984313965, "learning_rate": 1.9332242131058426e-05, "loss": 0.5145, "step": 11121 }, { "epoch": 0.23588046913108948, "grad_norm": 0.4620891213417053, "learning_rate": 1.9332122302818962e-05, "loss": 0.5035, "step": 11122 }, { "epoch": 0.2359016775890225, "grad_norm": 0.35940682888031006, "learning_rate": 1.9332002464200394e-05, "loss": 0.6327, "step": 11123 }, { "epoch": 0.23592288604695552, "grad_norm": 0.3463292121887207, "learning_rate": 1.9331882615202856e-05, "loss": 0.5813, "step": 11124 }, { "epoch": 0.23594409450488854, "grad_norm": 0.361716091632843, "learning_rate": 1.933176275582648e-05, "loss": 0.508, "step": 11125 }, { "epoch": 0.23596530296282156, "grad_norm": 0.324694961309433, "learning_rate": 1.93316428860714e-05, "loss": 0.5183, "step": 11126 }, { "epoch": 0.2359865114207546, "grad_norm": 0.3533482551574707, "learning_rate": 1.933152300593774e-05, "loss": 0.4898, "step": 11127 }, { "epoch": 0.23600771987868763, "grad_norm": 0.31823456287384033, "learning_rate": 1.9331403115425648e-05, "loss": 0.5653, "step": 11128 }, { "epoch": 0.23602892833662065, "grad_norm": 0.37480130791664124, "learning_rate": 1.9331283214535247e-05, "loss": 0.5622, "step": 11129 }, { "epoch": 0.23605013679455367, "grad_norm": 0.308711439371109, "learning_rate": 1.9331163303266677e-05, "loss": 0.5822, "step": 11130 }, { "epoch": 0.2360713452524867, "grad_norm": 0.35655826330184937, "learning_rate": 1.933104338162007e-05, "loss": 0.528, "step": 11131 }, { "epoch": 0.2360925537104197, "grad_norm": 0.3733554780483246, "learning_rate": 1.9330923449595552e-05, "loss": 0.5837, "step": 11132 }, { "epoch": 0.23611376216835273, "grad_norm": 0.34092244505882263, "learning_rate": 1.9330803507193262e-05, "loss": 0.5443, "step": 11133 }, { "epoch": 0.23613497062628577, "grad_norm": 0.351088285446167, "learning_rate": 1.9330683554413335e-05, "loss": 0.5558, "step": 11134 }, { "epoch": 0.2361561790842188, "grad_norm": 0.3467762768268585, "learning_rate": 1.9330563591255905e-05, "loss": 0.4854, "step": 11135 }, { "epoch": 0.2361773875421518, "grad_norm": 0.3408736288547516, "learning_rate": 1.9330443617721103e-05, "loss": 0.5286, "step": 11136 }, { "epoch": 0.23619859600008483, "grad_norm": 0.314877986907959, "learning_rate": 1.9330323633809062e-05, "loss": 0.4339, "step": 11137 }, { "epoch": 0.23621980445801785, "grad_norm": 0.30767151713371277, "learning_rate": 1.9330203639519912e-05, "loss": 0.6039, "step": 11138 }, { "epoch": 0.23624101291595087, "grad_norm": 0.36739811301231384, "learning_rate": 1.93300836348538e-05, "loss": 0.6049, "step": 11139 }, { "epoch": 0.23626222137388392, "grad_norm": 0.30510213971138, "learning_rate": 1.932996361981084e-05, "loss": 0.4316, "step": 11140 }, { "epoch": 0.23628342983181694, "grad_norm": 0.3339058756828308, "learning_rate": 1.932984359439118e-05, "loss": 0.5376, "step": 11141 }, { "epoch": 0.23630463828974996, "grad_norm": 0.36728769540786743, "learning_rate": 1.9329723558594947e-05, "loss": 0.5515, "step": 11142 }, { "epoch": 0.23632584674768298, "grad_norm": 0.8984270095825195, "learning_rate": 1.932960351242228e-05, "loss": 0.4863, "step": 11143 }, { "epoch": 0.236347055205616, "grad_norm": 0.3764849603176117, "learning_rate": 1.9329483455873307e-05, "loss": 0.5763, "step": 11144 }, { "epoch": 0.23636826366354902, "grad_norm": 0.28568974137306213, "learning_rate": 1.9329363388948165e-05, "loss": 0.4375, "step": 11145 }, { "epoch": 0.23638947212148204, "grad_norm": 0.325187623500824, "learning_rate": 1.9329243311646985e-05, "loss": 0.5335, "step": 11146 }, { "epoch": 0.23641068057941508, "grad_norm": 0.3698521852493286, "learning_rate": 1.93291232239699e-05, "loss": 0.4896, "step": 11147 }, { "epoch": 0.2364318890373481, "grad_norm": 0.3767881691455841, "learning_rate": 1.9329003125917047e-05, "loss": 0.5379, "step": 11148 }, { "epoch": 0.23645309749528112, "grad_norm": 0.4108913242816925, "learning_rate": 1.9328883017488554e-05, "loss": 0.5912, "step": 11149 }, { "epoch": 0.23647430595321414, "grad_norm": 0.35294225811958313, "learning_rate": 1.9328762898684562e-05, "loss": 0.522, "step": 11150 }, { "epoch": 0.23649551441114716, "grad_norm": 0.4080395996570587, "learning_rate": 1.9328642769505202e-05, "loss": 0.4569, "step": 11151 }, { "epoch": 0.23651672286908018, "grad_norm": 0.3158886134624481, "learning_rate": 1.9328522629950607e-05, "loss": 0.4747, "step": 11152 }, { "epoch": 0.2365379313270132, "grad_norm": 0.34665969014167786, "learning_rate": 1.9328402480020908e-05, "loss": 0.5241, "step": 11153 }, { "epoch": 0.23655913978494625, "grad_norm": 0.38536080718040466, "learning_rate": 1.9328282319716242e-05, "loss": 0.4913, "step": 11154 }, { "epoch": 0.23658034824287927, "grad_norm": 0.4587971866130829, "learning_rate": 1.932816214903674e-05, "loss": 0.471, "step": 11155 }, { "epoch": 0.2366015567008123, "grad_norm": 0.3473285734653473, "learning_rate": 1.9328041967982538e-05, "loss": 0.5196, "step": 11156 }, { "epoch": 0.2366227651587453, "grad_norm": 0.3169425427913666, "learning_rate": 1.9327921776553768e-05, "loss": 0.5296, "step": 11157 }, { "epoch": 0.23664397361667833, "grad_norm": 0.3473156690597534, "learning_rate": 1.9327801574750565e-05, "loss": 0.4933, "step": 11158 }, { "epoch": 0.23666518207461135, "grad_norm": 0.3242391347885132, "learning_rate": 1.9327681362573062e-05, "loss": 0.5278, "step": 11159 }, { "epoch": 0.23668639053254437, "grad_norm": 0.36106663942337036, "learning_rate": 1.9327561140021395e-05, "loss": 0.6074, "step": 11160 }, { "epoch": 0.2367075989904774, "grad_norm": 0.4022527039051056, "learning_rate": 1.932744090709569e-05, "loss": 0.5838, "step": 11161 }, { "epoch": 0.23672880744841043, "grad_norm": 0.40260958671569824, "learning_rate": 1.932732066379609e-05, "loss": 0.539, "step": 11162 }, { "epoch": 0.23675001590634345, "grad_norm": 0.3513312339782715, "learning_rate": 1.9327200410122725e-05, "loss": 0.4453, "step": 11163 }, { "epoch": 0.23677122436427647, "grad_norm": 0.31709256768226624, "learning_rate": 1.932708014607573e-05, "loss": 0.5253, "step": 11164 }, { "epoch": 0.2367924328222095, "grad_norm": 0.31663262844085693, "learning_rate": 1.9326959871655234e-05, "loss": 0.5129, "step": 11165 }, { "epoch": 0.2368136412801425, "grad_norm": 0.42243438959121704, "learning_rate": 1.9326839586861376e-05, "loss": 0.5191, "step": 11166 }, { "epoch": 0.23683484973807553, "grad_norm": 0.33394768834114075, "learning_rate": 1.9326719291694286e-05, "loss": 0.5674, "step": 11167 }, { "epoch": 0.23685605819600858, "grad_norm": 0.337319016456604, "learning_rate": 1.9326598986154104e-05, "loss": 0.5188, "step": 11168 }, { "epoch": 0.2368772666539416, "grad_norm": 0.327846497297287, "learning_rate": 1.9326478670240957e-05, "loss": 0.5571, "step": 11169 }, { "epoch": 0.23689847511187462, "grad_norm": 0.34727856516838074, "learning_rate": 1.9326358343954984e-05, "loss": 0.5018, "step": 11170 }, { "epoch": 0.23691968356980764, "grad_norm": 0.3756174147129059, "learning_rate": 1.9326238007296313e-05, "loss": 0.5894, "step": 11171 }, { "epoch": 0.23694089202774066, "grad_norm": 0.38728106021881104, "learning_rate": 1.9326117660265084e-05, "loss": 0.4888, "step": 11172 }, { "epoch": 0.23696210048567368, "grad_norm": 0.5837518572807312, "learning_rate": 1.9325997302861427e-05, "loss": 0.5033, "step": 11173 }, { "epoch": 0.23698330894360672, "grad_norm": 0.3069940209388733, "learning_rate": 1.9325876935085473e-05, "loss": 0.4732, "step": 11174 }, { "epoch": 0.23700451740153974, "grad_norm": 0.38350775837898254, "learning_rate": 1.9325756556937363e-05, "loss": 0.5263, "step": 11175 }, { "epoch": 0.23702572585947276, "grad_norm": 0.3779592514038086, "learning_rate": 1.9325636168417226e-05, "loss": 0.4487, "step": 11176 }, { "epoch": 0.23704693431740578, "grad_norm": 0.3610670566558838, "learning_rate": 1.9325515769525202e-05, "loss": 0.4885, "step": 11177 }, { "epoch": 0.2370681427753388, "grad_norm": 0.33467379212379456, "learning_rate": 1.9325395360261418e-05, "loss": 0.5098, "step": 11178 }, { "epoch": 0.23708935123327182, "grad_norm": 0.3178282678127289, "learning_rate": 1.932527494062601e-05, "loss": 0.4319, "step": 11179 }, { "epoch": 0.23711055969120484, "grad_norm": 0.4086533784866333, "learning_rate": 1.932515451061911e-05, "loss": 0.6018, "step": 11180 }, { "epoch": 0.2371317681491379, "grad_norm": 0.3187120854854584, "learning_rate": 1.9325034070240856e-05, "loss": 0.502, "step": 11181 }, { "epoch": 0.2371529766070709, "grad_norm": 0.4741204082965851, "learning_rate": 1.932491361949138e-05, "loss": 0.4727, "step": 11182 }, { "epoch": 0.23717418506500393, "grad_norm": 0.3669133186340332, "learning_rate": 1.9324793158370817e-05, "loss": 0.5262, "step": 11183 }, { "epoch": 0.23719539352293695, "grad_norm": 0.40398499369621277, "learning_rate": 1.9324672686879296e-05, "loss": 0.5052, "step": 11184 }, { "epoch": 0.23721660198086997, "grad_norm": 0.3397756516933441, "learning_rate": 1.9324552205016962e-05, "loss": 0.5305, "step": 11185 }, { "epoch": 0.237237810438803, "grad_norm": 0.34416767954826355, "learning_rate": 1.9324431712783936e-05, "loss": 0.5575, "step": 11186 }, { "epoch": 0.237259018896736, "grad_norm": 0.31688636541366577, "learning_rate": 1.932431121018036e-05, "loss": 0.4774, "step": 11187 }, { "epoch": 0.23728022735466905, "grad_norm": 0.3521732985973358, "learning_rate": 1.9324190697206368e-05, "loss": 0.528, "step": 11188 }, { "epoch": 0.23730143581260207, "grad_norm": 0.33575674891471863, "learning_rate": 1.932407017386209e-05, "loss": 0.5335, "step": 11189 }, { "epoch": 0.2373226442705351, "grad_norm": 0.3371071219444275, "learning_rate": 1.9323949640147664e-05, "loss": 0.5679, "step": 11190 }, { "epoch": 0.2373438527284681, "grad_norm": 0.36530008912086487, "learning_rate": 1.9323829096063222e-05, "loss": 0.5332, "step": 11191 }, { "epoch": 0.23736506118640113, "grad_norm": 0.3530636429786682, "learning_rate": 1.9323708541608894e-05, "loss": 0.4824, "step": 11192 }, { "epoch": 0.23738626964433415, "grad_norm": 0.38742300868034363, "learning_rate": 1.9323587976784822e-05, "loss": 0.625, "step": 11193 }, { "epoch": 0.23740747810226717, "grad_norm": 0.3267858326435089, "learning_rate": 1.9323467401591136e-05, "loss": 0.5027, "step": 11194 }, { "epoch": 0.23742868656020022, "grad_norm": 0.3047237992286682, "learning_rate": 1.932334681602797e-05, "loss": 0.4445, "step": 11195 }, { "epoch": 0.23744989501813324, "grad_norm": 0.3524181842803955, "learning_rate": 1.932322622009546e-05, "loss": 0.5556, "step": 11196 }, { "epoch": 0.23747110347606626, "grad_norm": 0.34228515625, "learning_rate": 1.9323105613793736e-05, "loss": 0.6003, "step": 11197 }, { "epoch": 0.23749231193399928, "grad_norm": 0.35657182335853577, "learning_rate": 1.9322984997122938e-05, "loss": 0.5746, "step": 11198 }, { "epoch": 0.2375135203919323, "grad_norm": 0.3431822657585144, "learning_rate": 1.9322864370083195e-05, "loss": 0.5044, "step": 11199 }, { "epoch": 0.23753472884986532, "grad_norm": 0.3149816393852234, "learning_rate": 1.9322743732674648e-05, "loss": 0.5302, "step": 11200 }, { "epoch": 0.23755593730779834, "grad_norm": 0.35917508602142334, "learning_rate": 1.932262308489742e-05, "loss": 0.5487, "step": 11201 }, { "epoch": 0.23757714576573138, "grad_norm": 0.2903634011745453, "learning_rate": 1.9322502426751654e-05, "loss": 0.4642, "step": 11202 }, { "epoch": 0.2375983542236644, "grad_norm": 0.32180139422416687, "learning_rate": 1.9322381758237482e-05, "loss": 0.5237, "step": 11203 }, { "epoch": 0.23761956268159742, "grad_norm": 0.334031343460083, "learning_rate": 1.9322261079355037e-05, "loss": 0.6097, "step": 11204 }, { "epoch": 0.23764077113953044, "grad_norm": 0.3553369641304016, "learning_rate": 1.9322140390104458e-05, "loss": 0.5332, "step": 11205 }, { "epoch": 0.23766197959746346, "grad_norm": 0.33522650599479675, "learning_rate": 1.932201969048587e-05, "loss": 0.5608, "step": 11206 }, { "epoch": 0.23768318805539648, "grad_norm": 0.34940990805625916, "learning_rate": 1.9321898980499417e-05, "loss": 0.5325, "step": 11207 }, { "epoch": 0.2377043965133295, "grad_norm": 0.6572795510292053, "learning_rate": 1.9321778260145226e-05, "loss": 0.3969, "step": 11208 }, { "epoch": 0.23772560497126255, "grad_norm": 0.3368901014328003, "learning_rate": 1.9321657529423436e-05, "loss": 0.5656, "step": 11209 }, { "epoch": 0.23774681342919557, "grad_norm": 0.35309743881225586, "learning_rate": 1.9321536788334177e-05, "loss": 0.4531, "step": 11210 }, { "epoch": 0.2377680218871286, "grad_norm": 0.2934820055961609, "learning_rate": 1.932141603687759e-05, "loss": 0.4256, "step": 11211 }, { "epoch": 0.2377892303450616, "grad_norm": 0.3797356188297272, "learning_rate": 1.93212952750538e-05, "loss": 0.5637, "step": 11212 }, { "epoch": 0.23781043880299463, "grad_norm": 0.3425653278827667, "learning_rate": 1.9321174502862952e-05, "loss": 0.5295, "step": 11213 }, { "epoch": 0.23783164726092765, "grad_norm": 0.38351380825042725, "learning_rate": 1.9321053720305172e-05, "loss": 0.489, "step": 11214 }, { "epoch": 0.2378528557188607, "grad_norm": 0.33387839794158936, "learning_rate": 1.9320932927380596e-05, "loss": 0.5128, "step": 11215 }, { "epoch": 0.23787406417679371, "grad_norm": 0.34236469864845276, "learning_rate": 1.932081212408936e-05, "loss": 0.5358, "step": 11216 }, { "epoch": 0.23789527263472673, "grad_norm": 0.34081390500068665, "learning_rate": 1.93206913104316e-05, "loss": 0.5197, "step": 11217 }, { "epoch": 0.23791648109265975, "grad_norm": 0.3850460946559906, "learning_rate": 1.9320570486407444e-05, "loss": 0.4656, "step": 11218 }, { "epoch": 0.23793768955059277, "grad_norm": 0.3018169701099396, "learning_rate": 1.932044965201703e-05, "loss": 0.4507, "step": 11219 }, { "epoch": 0.2379588980085258, "grad_norm": 0.29709771275520325, "learning_rate": 1.9320328807260497e-05, "loss": 0.5096, "step": 11220 }, { "epoch": 0.2379801064664588, "grad_norm": 0.3314925730228424, "learning_rate": 1.9320207952137973e-05, "loss": 0.5266, "step": 11221 }, { "epoch": 0.23800131492439186, "grad_norm": 0.33720827102661133, "learning_rate": 1.9320087086649596e-05, "loss": 0.5948, "step": 11222 }, { "epoch": 0.23802252338232488, "grad_norm": 0.3512665927410126, "learning_rate": 1.93199662107955e-05, "loss": 0.4598, "step": 11223 }, { "epoch": 0.2380437318402579, "grad_norm": 0.3665581941604614, "learning_rate": 1.9319845324575816e-05, "loss": 0.5211, "step": 11224 }, { "epoch": 0.23806494029819092, "grad_norm": 0.3424476087093353, "learning_rate": 1.931972442799068e-05, "loss": 0.5402, "step": 11225 }, { "epoch": 0.23808614875612394, "grad_norm": 0.3039427697658539, "learning_rate": 1.931960352104023e-05, "loss": 0.4569, "step": 11226 }, { "epoch": 0.23810735721405696, "grad_norm": 0.33109959959983826, "learning_rate": 1.93194826037246e-05, "loss": 0.5687, "step": 11227 }, { "epoch": 0.23812856567198998, "grad_norm": 0.3445996642112732, "learning_rate": 1.9319361676043917e-05, "loss": 0.4627, "step": 11228 }, { "epoch": 0.23814977412992303, "grad_norm": 0.3226597309112549, "learning_rate": 1.9319240737998325e-05, "loss": 0.5011, "step": 11229 }, { "epoch": 0.23817098258785604, "grad_norm": 0.37143078446388245, "learning_rate": 1.9319119789587953e-05, "loss": 0.5434, "step": 11230 }, { "epoch": 0.23819219104578906, "grad_norm": 0.35181495547294617, "learning_rate": 1.9318998830812938e-05, "loss": 0.5208, "step": 11231 }, { "epoch": 0.23821339950372208, "grad_norm": 0.37358736991882324, "learning_rate": 1.931887786167341e-05, "loss": 0.562, "step": 11232 }, { "epoch": 0.2382346079616551, "grad_norm": 0.3654516041278839, "learning_rate": 1.931875688216951e-05, "loss": 0.4646, "step": 11233 }, { "epoch": 0.23825581641958812, "grad_norm": 0.3968934714794159, "learning_rate": 1.931863589230137e-05, "loss": 0.5412, "step": 11234 }, { "epoch": 0.23827702487752114, "grad_norm": 0.35131222009658813, "learning_rate": 1.9318514892069123e-05, "loss": 0.4636, "step": 11235 }, { "epoch": 0.2382982333354542, "grad_norm": 0.34719252586364746, "learning_rate": 1.9318393881472906e-05, "loss": 0.6309, "step": 11236 }, { "epoch": 0.2383194417933872, "grad_norm": 0.31828293204307556, "learning_rate": 1.9318272860512848e-05, "loss": 0.5223, "step": 11237 }, { "epoch": 0.23834065025132023, "grad_norm": 0.33725085854530334, "learning_rate": 1.9318151829189094e-05, "loss": 0.5191, "step": 11238 }, { "epoch": 0.23836185870925325, "grad_norm": 0.3604021370410919, "learning_rate": 1.9318030787501767e-05, "loss": 0.5203, "step": 11239 }, { "epoch": 0.23838306716718627, "grad_norm": 0.43315690755844116, "learning_rate": 1.931790973545101e-05, "loss": 0.6218, "step": 11240 }, { "epoch": 0.2384042756251193, "grad_norm": 0.3135167062282562, "learning_rate": 1.931778867303695e-05, "loss": 0.5639, "step": 11241 }, { "epoch": 0.2384254840830523, "grad_norm": 0.3358898460865021, "learning_rate": 1.9317667600259737e-05, "loss": 0.5301, "step": 11242 }, { "epoch": 0.23844669254098536, "grad_norm": 0.29867011308670044, "learning_rate": 1.9317546517119488e-05, "loss": 0.4583, "step": 11243 }, { "epoch": 0.23846790099891838, "grad_norm": 0.5708189010620117, "learning_rate": 1.9317425423616343e-05, "loss": 0.6075, "step": 11244 }, { "epoch": 0.2384891094568514, "grad_norm": 0.3667939305305481, "learning_rate": 1.931730431975044e-05, "loss": 0.587, "step": 11245 }, { "epoch": 0.23851031791478441, "grad_norm": 0.3547617495059967, "learning_rate": 1.9317183205521915e-05, "loss": 0.4967, "step": 11246 }, { "epoch": 0.23853152637271743, "grad_norm": 0.3344365656375885, "learning_rate": 1.9317062080930896e-05, "loss": 0.5848, "step": 11247 }, { "epoch": 0.23855273483065045, "grad_norm": 0.3569394052028656, "learning_rate": 1.9316940945977523e-05, "loss": 0.5397, "step": 11248 }, { "epoch": 0.2385739432885835, "grad_norm": 0.29863375425338745, "learning_rate": 1.931681980066193e-05, "loss": 0.4532, "step": 11249 }, { "epoch": 0.23859515174651652, "grad_norm": 0.318585604429245, "learning_rate": 1.9316698644984248e-05, "loss": 0.5552, "step": 11250 }, { "epoch": 0.23861636020444954, "grad_norm": 0.325792521238327, "learning_rate": 1.931657747894462e-05, "loss": 0.4947, "step": 11251 }, { "epoch": 0.23863756866238256, "grad_norm": 0.2987338602542877, "learning_rate": 1.931645630254317e-05, "loss": 0.5272, "step": 11252 }, { "epoch": 0.23865877712031558, "grad_norm": 0.3775393068790436, "learning_rate": 1.931633511578004e-05, "loss": 0.4914, "step": 11253 }, { "epoch": 0.2386799855782486, "grad_norm": 0.37272346019744873, "learning_rate": 1.9316213918655365e-05, "loss": 0.6034, "step": 11254 }, { "epoch": 0.23870119403618162, "grad_norm": 0.34103044867515564, "learning_rate": 1.9316092711169277e-05, "loss": 0.5358, "step": 11255 }, { "epoch": 0.23872240249411467, "grad_norm": 0.3221433758735657, "learning_rate": 1.9315971493321912e-05, "loss": 0.4606, "step": 11256 }, { "epoch": 0.23874361095204769, "grad_norm": 0.3345211148262024, "learning_rate": 1.9315850265113404e-05, "loss": 0.5313, "step": 11257 }, { "epoch": 0.2387648194099807, "grad_norm": 0.359413206577301, "learning_rate": 1.9315729026543886e-05, "loss": 0.5621, "step": 11258 }, { "epoch": 0.23878602786791372, "grad_norm": 0.3511926531791687, "learning_rate": 1.93156077776135e-05, "loss": 0.536, "step": 11259 }, { "epoch": 0.23880723632584674, "grad_norm": 0.3509361147880554, "learning_rate": 1.931548651832237e-05, "loss": 0.4946, "step": 11260 }, { "epoch": 0.23882844478377976, "grad_norm": 0.3809434175491333, "learning_rate": 1.931536524867064e-05, "loss": 0.5872, "step": 11261 }, { "epoch": 0.23884965324171278, "grad_norm": 0.33089321851730347, "learning_rate": 1.931524396865844e-05, "loss": 0.5751, "step": 11262 }, { "epoch": 0.23887086169964583, "grad_norm": 0.3172522485256195, "learning_rate": 1.931512267828591e-05, "loss": 0.5649, "step": 11263 }, { "epoch": 0.23889207015757885, "grad_norm": 0.33014172315597534, "learning_rate": 1.9315001377553176e-05, "loss": 0.5449, "step": 11264 }, { "epoch": 0.23891327861551187, "grad_norm": 0.35273653268814087, "learning_rate": 1.9314880066460383e-05, "loss": 0.4953, "step": 11265 }, { "epoch": 0.2389344870734449, "grad_norm": 0.27483096718788147, "learning_rate": 1.931475874500766e-05, "loss": 0.4224, "step": 11266 }, { "epoch": 0.2389556955313779, "grad_norm": 0.3670564293861389, "learning_rate": 1.9314637413195144e-05, "loss": 0.4927, "step": 11267 }, { "epoch": 0.23897690398931093, "grad_norm": 0.3760809302330017, "learning_rate": 1.931451607102297e-05, "loss": 0.5427, "step": 11268 }, { "epoch": 0.23899811244724395, "grad_norm": 0.31724753975868225, "learning_rate": 1.931439471849127e-05, "loss": 0.5202, "step": 11269 }, { "epoch": 0.239019320905177, "grad_norm": 0.38264936208724976, "learning_rate": 1.931427335560018e-05, "loss": 0.5066, "step": 11270 }, { "epoch": 0.23904052936311002, "grad_norm": 0.29773566126823425, "learning_rate": 1.931415198234984e-05, "loss": 0.485, "step": 11271 }, { "epoch": 0.23906173782104304, "grad_norm": 0.36126482486724854, "learning_rate": 1.9314030598740377e-05, "loss": 0.4567, "step": 11272 }, { "epoch": 0.23908294627897606, "grad_norm": 0.36438223719596863, "learning_rate": 1.9313909204771932e-05, "loss": 0.5226, "step": 11273 }, { "epoch": 0.23910415473690907, "grad_norm": 0.3378654718399048, "learning_rate": 1.931378780044464e-05, "loss": 0.5261, "step": 11274 }, { "epoch": 0.2391253631948421, "grad_norm": 0.35355642437934875, "learning_rate": 1.9313666385758633e-05, "loss": 0.5251, "step": 11275 }, { "epoch": 0.23914657165277511, "grad_norm": 0.3509722352027893, "learning_rate": 1.9313544960714043e-05, "loss": 0.5559, "step": 11276 }, { "epoch": 0.23916778011070816, "grad_norm": 0.3421560525894165, "learning_rate": 1.9313423525311016e-05, "loss": 0.5045, "step": 11277 }, { "epoch": 0.23918898856864118, "grad_norm": 0.3206944763660431, "learning_rate": 1.9313302079549678e-05, "loss": 0.5573, "step": 11278 }, { "epoch": 0.2392101970265742, "grad_norm": 0.3343393802642822, "learning_rate": 1.9313180623430163e-05, "loss": 0.4783, "step": 11279 }, { "epoch": 0.23923140548450722, "grad_norm": 0.3767835795879364, "learning_rate": 1.931305915695261e-05, "loss": 0.5159, "step": 11280 }, { "epoch": 0.23925261394244024, "grad_norm": 0.3401348888874054, "learning_rate": 1.9312937680117157e-05, "loss": 0.4774, "step": 11281 }, { "epoch": 0.23927382240037326, "grad_norm": 0.3650902509689331, "learning_rate": 1.9312816192923934e-05, "loss": 0.4566, "step": 11282 }, { "epoch": 0.23929503085830628, "grad_norm": 0.34130632877349854, "learning_rate": 1.9312694695373076e-05, "loss": 0.5104, "step": 11283 }, { "epoch": 0.23931623931623933, "grad_norm": 0.43845438957214355, "learning_rate": 1.931257318746472e-05, "loss": 0.5447, "step": 11284 }, { "epoch": 0.23933744777417235, "grad_norm": 0.3514862060546875, "learning_rate": 1.9312451669199005e-05, "loss": 0.5747, "step": 11285 }, { "epoch": 0.23935865623210537, "grad_norm": 0.3751333951950073, "learning_rate": 1.9312330140576058e-05, "loss": 0.531, "step": 11286 }, { "epoch": 0.23937986469003839, "grad_norm": 0.3425222635269165, "learning_rate": 1.931220860159602e-05, "loss": 0.5461, "step": 11287 }, { "epoch": 0.2394010731479714, "grad_norm": 0.3190184533596039, "learning_rate": 1.9312087052259025e-05, "loss": 0.5914, "step": 11288 }, { "epoch": 0.23942228160590442, "grad_norm": 0.38632968068122864, "learning_rate": 1.931196549256521e-05, "loss": 0.5179, "step": 11289 }, { "epoch": 0.23944349006383747, "grad_norm": 0.34268271923065186, "learning_rate": 1.93118439225147e-05, "loss": 0.5708, "step": 11290 }, { "epoch": 0.2394646985217705, "grad_norm": 0.3352015018463135, "learning_rate": 1.9311722342107643e-05, "loss": 0.5339, "step": 11291 }, { "epoch": 0.2394859069797035, "grad_norm": 0.3421247899532318, "learning_rate": 1.931160075134417e-05, "loss": 0.523, "step": 11292 }, { "epoch": 0.23950711543763653, "grad_norm": 0.30207884311676025, "learning_rate": 1.9311479150224416e-05, "loss": 0.4743, "step": 11293 }, { "epoch": 0.23952832389556955, "grad_norm": 0.3543870151042938, "learning_rate": 1.9311357538748515e-05, "loss": 0.5299, "step": 11294 }, { "epoch": 0.23954953235350257, "grad_norm": 0.34660759568214417, "learning_rate": 1.9311235916916602e-05, "loss": 0.5057, "step": 11295 }, { "epoch": 0.2395707408114356, "grad_norm": 0.30887946486473083, "learning_rate": 1.9311114284728814e-05, "loss": 0.4308, "step": 11296 }, { "epoch": 0.23959194926936864, "grad_norm": 0.34613898396492004, "learning_rate": 1.9310992642185283e-05, "loss": 0.5012, "step": 11297 }, { "epoch": 0.23961315772730166, "grad_norm": 0.37256911396980286, "learning_rate": 1.931087098928615e-05, "loss": 0.5101, "step": 11298 }, { "epoch": 0.23963436618523468, "grad_norm": 0.31920763850212097, "learning_rate": 1.9310749326031545e-05, "loss": 0.4876, "step": 11299 }, { "epoch": 0.2396555746431677, "grad_norm": 0.33792972564697266, "learning_rate": 1.931062765242161e-05, "loss": 0.5216, "step": 11300 }, { "epoch": 0.23967678310110072, "grad_norm": 0.3310207426548004, "learning_rate": 1.9310505968456473e-05, "loss": 0.4613, "step": 11301 }, { "epoch": 0.23969799155903374, "grad_norm": 0.3326619267463684, "learning_rate": 1.931038427413627e-05, "loss": 0.5284, "step": 11302 }, { "epoch": 0.23971920001696675, "grad_norm": 0.37445902824401855, "learning_rate": 1.931026256946114e-05, "loss": 0.6094, "step": 11303 }, { "epoch": 0.2397404084748998, "grad_norm": 0.3187568485736847, "learning_rate": 1.9310140854431217e-05, "loss": 0.5524, "step": 11304 }, { "epoch": 0.23976161693283282, "grad_norm": 0.3744156062602997, "learning_rate": 1.9310019129046637e-05, "loss": 0.6015, "step": 11305 }, { "epoch": 0.23978282539076584, "grad_norm": 0.3404194414615631, "learning_rate": 1.9309897393307536e-05, "loss": 0.5824, "step": 11306 }, { "epoch": 0.23980403384869886, "grad_norm": 0.5490517616271973, "learning_rate": 1.9309775647214044e-05, "loss": 0.5249, "step": 11307 }, { "epoch": 0.23982524230663188, "grad_norm": 0.5892139673233032, "learning_rate": 1.9309653890766302e-05, "loss": 0.4928, "step": 11308 }, { "epoch": 0.2398464507645649, "grad_norm": 0.29216501116752625, "learning_rate": 1.9309532123964447e-05, "loss": 0.4429, "step": 11309 }, { "epoch": 0.23986765922249792, "grad_norm": 0.33694517612457275, "learning_rate": 1.9309410346808607e-05, "loss": 0.47, "step": 11310 }, { "epoch": 0.23988886768043097, "grad_norm": 0.3312638998031616, "learning_rate": 1.9309288559298923e-05, "loss": 0.5378, "step": 11311 }, { "epoch": 0.239910076138364, "grad_norm": 0.33065077662467957, "learning_rate": 1.930916676143553e-05, "loss": 0.5398, "step": 11312 }, { "epoch": 0.239931284596297, "grad_norm": 0.38761940598487854, "learning_rate": 1.930904495321856e-05, "loss": 0.5291, "step": 11313 }, { "epoch": 0.23995249305423003, "grad_norm": 0.3680831789970398, "learning_rate": 1.9308923134648156e-05, "loss": 0.555, "step": 11314 }, { "epoch": 0.23997370151216305, "grad_norm": 0.3558448255062103, "learning_rate": 1.9308801305724443e-05, "loss": 0.6137, "step": 11315 }, { "epoch": 0.23999490997009607, "grad_norm": 0.2944558560848236, "learning_rate": 1.9308679466447566e-05, "loss": 0.4514, "step": 11316 }, { "epoch": 0.24001611842802908, "grad_norm": 0.31549301743507385, "learning_rate": 1.9308557616817655e-05, "loss": 0.5604, "step": 11317 }, { "epoch": 0.24003732688596213, "grad_norm": 0.3390403091907501, "learning_rate": 1.9308435756834847e-05, "loss": 0.4958, "step": 11318 }, { "epoch": 0.24005853534389515, "grad_norm": 0.33313581347465515, "learning_rate": 1.930831388649928e-05, "loss": 0.5316, "step": 11319 }, { "epoch": 0.24007974380182817, "grad_norm": 0.7847219109535217, "learning_rate": 1.9308192005811084e-05, "loss": 0.5566, "step": 11320 }, { "epoch": 0.2401009522597612, "grad_norm": 0.3345395028591156, "learning_rate": 1.9308070114770396e-05, "loss": 0.5246, "step": 11321 }, { "epoch": 0.2401221607176942, "grad_norm": 0.35355329513549805, "learning_rate": 1.9307948213377357e-05, "loss": 0.5691, "step": 11322 }, { "epoch": 0.24014336917562723, "grad_norm": 0.32431185245513916, "learning_rate": 1.93078263016321e-05, "loss": 0.6046, "step": 11323 }, { "epoch": 0.24016457763356025, "grad_norm": 0.3645971417427063, "learning_rate": 1.9307704379534758e-05, "loss": 0.5561, "step": 11324 }, { "epoch": 0.2401857860914933, "grad_norm": 0.33772012591362, "learning_rate": 1.9307582447085466e-05, "loss": 0.5764, "step": 11325 }, { "epoch": 0.24020699454942632, "grad_norm": 0.33862030506134033, "learning_rate": 1.9307460504284363e-05, "loss": 0.4708, "step": 11326 }, { "epoch": 0.24022820300735934, "grad_norm": 0.3901588022708893, "learning_rate": 1.9307338551131584e-05, "loss": 0.6067, "step": 11327 }, { "epoch": 0.24024941146529236, "grad_norm": 0.35309097170829773, "learning_rate": 1.930721658762726e-05, "loss": 0.6094, "step": 11328 }, { "epoch": 0.24027061992322538, "grad_norm": 0.36542803049087524, "learning_rate": 1.9307094613771536e-05, "loss": 0.4874, "step": 11329 }, { "epoch": 0.2402918283811584, "grad_norm": 0.4073358178138733, "learning_rate": 1.930697262956454e-05, "loss": 0.5539, "step": 11330 }, { "epoch": 0.24031303683909144, "grad_norm": 0.3218143582344055, "learning_rate": 1.9306850635006407e-05, "loss": 0.4602, "step": 11331 }, { "epoch": 0.24033424529702446, "grad_norm": 0.30057448148727417, "learning_rate": 1.9306728630097282e-05, "loss": 0.5433, "step": 11332 }, { "epoch": 0.24035545375495748, "grad_norm": 0.5514389872550964, "learning_rate": 1.9306606614837288e-05, "loss": 0.5702, "step": 11333 }, { "epoch": 0.2403766622128905, "grad_norm": 0.3263290524482727, "learning_rate": 1.930648458922657e-05, "loss": 0.4559, "step": 11334 }, { "epoch": 0.24039787067082352, "grad_norm": 0.31304922699928284, "learning_rate": 1.930636255326526e-05, "loss": 0.4766, "step": 11335 }, { "epoch": 0.24041907912875654, "grad_norm": 0.3074035942554474, "learning_rate": 1.93062405069535e-05, "loss": 0.4844, "step": 11336 }, { "epoch": 0.24044028758668956, "grad_norm": 0.3257731795310974, "learning_rate": 1.9306118450291413e-05, "loss": 0.4972, "step": 11337 }, { "epoch": 0.2404614960446226, "grad_norm": 0.40419498085975647, "learning_rate": 1.9305996383279143e-05, "loss": 0.4866, "step": 11338 }, { "epoch": 0.24048270450255563, "grad_norm": 0.35742613673210144, "learning_rate": 1.9305874305916825e-05, "loss": 0.5531, "step": 11339 }, { "epoch": 0.24050391296048865, "grad_norm": 0.3416730463504791, "learning_rate": 1.9305752218204597e-05, "loss": 0.5095, "step": 11340 }, { "epoch": 0.24052512141842167, "grad_norm": 0.33190834522247314, "learning_rate": 1.930563012014259e-05, "loss": 0.4939, "step": 11341 }, { "epoch": 0.2405463298763547, "grad_norm": 0.35740068554878235, "learning_rate": 1.930550801173094e-05, "loss": 0.5672, "step": 11342 }, { "epoch": 0.2405675383342877, "grad_norm": 0.335221529006958, "learning_rate": 1.930538589296979e-05, "loss": 0.5491, "step": 11343 }, { "epoch": 0.24058874679222073, "grad_norm": 0.35783809423446655, "learning_rate": 1.9305263763859267e-05, "loss": 0.4947, "step": 11344 }, { "epoch": 0.24060995525015377, "grad_norm": 0.3255480229854584, "learning_rate": 1.930514162439951e-05, "loss": 0.5606, "step": 11345 }, { "epoch": 0.2406311637080868, "grad_norm": 0.3084178566932678, "learning_rate": 1.9305019474590657e-05, "loss": 0.5054, "step": 11346 }, { "epoch": 0.2406523721660198, "grad_norm": 0.3685813248157501, "learning_rate": 1.930489731443284e-05, "loss": 0.5478, "step": 11347 }, { "epoch": 0.24067358062395283, "grad_norm": 0.410400390625, "learning_rate": 1.93047751439262e-05, "loss": 0.5694, "step": 11348 }, { "epoch": 0.24069478908188585, "grad_norm": 0.3159596025943756, "learning_rate": 1.9304652963070868e-05, "loss": 0.4707, "step": 11349 }, { "epoch": 0.24071599753981887, "grad_norm": 0.3332228660583496, "learning_rate": 1.930453077186698e-05, "loss": 0.4747, "step": 11350 }, { "epoch": 0.2407372059977519, "grad_norm": 0.344156950712204, "learning_rate": 1.930440857031468e-05, "loss": 0.6, "step": 11351 }, { "epoch": 0.24075841445568494, "grad_norm": 0.32853612303733826, "learning_rate": 1.930428635841409e-05, "loss": 0.5244, "step": 11352 }, { "epoch": 0.24077962291361796, "grad_norm": 0.3287535309791565, "learning_rate": 1.930416413616536e-05, "loss": 0.5593, "step": 11353 }, { "epoch": 0.24080083137155098, "grad_norm": 0.4328566789627075, "learning_rate": 1.9304041903568614e-05, "loss": 0.508, "step": 11354 }, { "epoch": 0.240822039829484, "grad_norm": 0.3550536632537842, "learning_rate": 1.9303919660623996e-05, "loss": 0.5404, "step": 11355 }, { "epoch": 0.24084324828741702, "grad_norm": 0.35107964277267456, "learning_rate": 1.930379740733164e-05, "loss": 0.4967, "step": 11356 }, { "epoch": 0.24086445674535004, "grad_norm": 0.32771748304367065, "learning_rate": 1.9303675143691683e-05, "loss": 0.5409, "step": 11357 }, { "epoch": 0.24088566520328306, "grad_norm": 0.33294543623924255, "learning_rate": 1.930355286970426e-05, "loss": 0.4489, "step": 11358 }, { "epoch": 0.2409068736612161, "grad_norm": 0.35958632826805115, "learning_rate": 1.93034305853695e-05, "loss": 0.5523, "step": 11359 }, { "epoch": 0.24092808211914912, "grad_norm": 0.4888606667518616, "learning_rate": 1.930330829068755e-05, "loss": 0.5503, "step": 11360 }, { "epoch": 0.24094929057708214, "grad_norm": 0.4074760675430298, "learning_rate": 1.930318598565854e-05, "loss": 0.6746, "step": 11361 }, { "epoch": 0.24097049903501516, "grad_norm": 0.34335044026374817, "learning_rate": 1.9303063670282606e-05, "loss": 0.443, "step": 11362 }, { "epoch": 0.24099170749294818, "grad_norm": 0.33651503920555115, "learning_rate": 1.9302941344559887e-05, "loss": 0.6004, "step": 11363 }, { "epoch": 0.2410129159508812, "grad_norm": 0.3410261869430542, "learning_rate": 1.930281900849052e-05, "loss": 0.5339, "step": 11364 }, { "epoch": 0.24103412440881425, "grad_norm": 0.31065306067466736, "learning_rate": 1.9302696662074636e-05, "loss": 0.5152, "step": 11365 }, { "epoch": 0.24105533286674727, "grad_norm": 0.36656808853149414, "learning_rate": 1.930257430531237e-05, "loss": 0.5083, "step": 11366 }, { "epoch": 0.2410765413246803, "grad_norm": 0.35281217098236084, "learning_rate": 1.9302451938203867e-05, "loss": 0.5277, "step": 11367 }, { "epoch": 0.2410977497826133, "grad_norm": 0.7540653944015503, "learning_rate": 1.9302329560749254e-05, "loss": 0.606, "step": 11368 }, { "epoch": 0.24111895824054633, "grad_norm": 0.6014519333839417, "learning_rate": 1.9302207172948673e-05, "loss": 0.5885, "step": 11369 }, { "epoch": 0.24114016669847935, "grad_norm": 0.354141503572464, "learning_rate": 1.9302084774802262e-05, "loss": 0.5197, "step": 11370 }, { "epoch": 0.24116137515641237, "grad_norm": 0.320779949426651, "learning_rate": 1.9301962366310144e-05, "loss": 0.4998, "step": 11371 }, { "epoch": 0.2411825836143454, "grad_norm": 0.3097008168697357, "learning_rate": 1.9301839947472473e-05, "loss": 0.5071, "step": 11372 }, { "epoch": 0.24120379207227843, "grad_norm": 0.34268784523010254, "learning_rate": 1.9301717518289372e-05, "loss": 0.5664, "step": 11373 }, { "epoch": 0.24122500053021145, "grad_norm": 0.32265931367874146, "learning_rate": 1.930159507876098e-05, "loss": 0.4497, "step": 11374 }, { "epoch": 0.24124620898814447, "grad_norm": 0.3362849950790405, "learning_rate": 1.930147262888744e-05, "loss": 0.4937, "step": 11375 }, { "epoch": 0.2412674174460775, "grad_norm": 0.3206510543823242, "learning_rate": 1.930135016866888e-05, "loss": 0.4609, "step": 11376 }, { "epoch": 0.2412886259040105, "grad_norm": 0.3622172474861145, "learning_rate": 1.9301227698105437e-05, "loss": 0.5356, "step": 11377 }, { "epoch": 0.24130983436194353, "grad_norm": 0.3514592945575714, "learning_rate": 1.930110521719725e-05, "loss": 0.4819, "step": 11378 }, { "epoch": 0.24133104281987658, "grad_norm": 0.3358970880508423, "learning_rate": 1.9300982725944456e-05, "loss": 0.4935, "step": 11379 }, { "epoch": 0.2413522512778096, "grad_norm": 0.342968225479126, "learning_rate": 1.930086022434719e-05, "loss": 0.4761, "step": 11380 }, { "epoch": 0.24137345973574262, "grad_norm": 0.32766568660736084, "learning_rate": 1.930073771240559e-05, "loss": 0.5366, "step": 11381 }, { "epoch": 0.24139466819367564, "grad_norm": 0.31548628211021423, "learning_rate": 1.9300615190119788e-05, "loss": 0.504, "step": 11382 }, { "epoch": 0.24141587665160866, "grad_norm": 0.37510329484939575, "learning_rate": 1.9300492657489922e-05, "loss": 0.6223, "step": 11383 }, { "epoch": 0.24143708510954168, "grad_norm": 0.33045580983161926, "learning_rate": 1.930037011451613e-05, "loss": 0.534, "step": 11384 }, { "epoch": 0.2414582935674747, "grad_norm": 0.3115500807762146, "learning_rate": 1.9300247561198546e-05, "loss": 0.5212, "step": 11385 }, { "epoch": 0.24147950202540774, "grad_norm": 0.35293257236480713, "learning_rate": 1.930012499753731e-05, "loss": 0.4923, "step": 11386 }, { "epoch": 0.24150071048334076, "grad_norm": 0.33543673157691956, "learning_rate": 1.930000242353255e-05, "loss": 0.5538, "step": 11387 }, { "epoch": 0.24152191894127378, "grad_norm": 0.350541889667511, "learning_rate": 1.9299879839184415e-05, "loss": 0.503, "step": 11388 }, { "epoch": 0.2415431273992068, "grad_norm": 0.3594421446323395, "learning_rate": 1.929975724449303e-05, "loss": 0.5887, "step": 11389 }, { "epoch": 0.24156433585713982, "grad_norm": 0.3121742904186249, "learning_rate": 1.929963463945854e-05, "loss": 0.465, "step": 11390 }, { "epoch": 0.24158554431507284, "grad_norm": 0.30742499232292175, "learning_rate": 1.9299512024081072e-05, "loss": 0.462, "step": 11391 }, { "epoch": 0.24160675277300586, "grad_norm": 0.38802215456962585, "learning_rate": 1.929938939836077e-05, "loss": 0.6483, "step": 11392 }, { "epoch": 0.2416279612309389, "grad_norm": 0.34123241901397705, "learning_rate": 1.9299266762297767e-05, "loss": 0.4998, "step": 11393 }, { "epoch": 0.24164916968887193, "grad_norm": 0.34087568521499634, "learning_rate": 1.9299144115892203e-05, "loss": 0.5331, "step": 11394 }, { "epoch": 0.24167037814680495, "grad_norm": 0.30978071689605713, "learning_rate": 1.929902145914421e-05, "loss": 0.5541, "step": 11395 }, { "epoch": 0.24169158660473797, "grad_norm": 0.345019668340683, "learning_rate": 1.9298898792053923e-05, "loss": 0.5167, "step": 11396 }, { "epoch": 0.241712795062671, "grad_norm": 0.33960336446762085, "learning_rate": 1.9298776114621488e-05, "loss": 0.5302, "step": 11397 }, { "epoch": 0.241734003520604, "grad_norm": 0.4004516303539276, "learning_rate": 1.9298653426847028e-05, "loss": 0.5321, "step": 11398 }, { "epoch": 0.24175521197853703, "grad_norm": 0.315083771944046, "learning_rate": 1.9298530728730692e-05, "loss": 0.4837, "step": 11399 }, { "epoch": 0.24177642043647007, "grad_norm": 0.38750892877578735, "learning_rate": 1.929840802027261e-05, "loss": 0.5092, "step": 11400 }, { "epoch": 0.2417976288944031, "grad_norm": 0.3596692681312561, "learning_rate": 1.9298285301472917e-05, "loss": 0.5382, "step": 11401 }, { "epoch": 0.2418188373523361, "grad_norm": 0.31040170788764954, "learning_rate": 1.9298162572331753e-05, "loss": 0.5531, "step": 11402 }, { "epoch": 0.24184004581026913, "grad_norm": 0.3777475655078888, "learning_rate": 1.9298039832849252e-05, "loss": 0.496, "step": 11403 }, { "epoch": 0.24186125426820215, "grad_norm": 0.34459295868873596, "learning_rate": 1.9297917083025553e-05, "loss": 0.4988, "step": 11404 }, { "epoch": 0.24188246272613517, "grad_norm": 0.35159701108932495, "learning_rate": 1.9297794322860794e-05, "loss": 0.6046, "step": 11405 }, { "epoch": 0.24190367118406822, "grad_norm": 0.3678359389305115, "learning_rate": 1.9297671552355105e-05, "loss": 0.6102, "step": 11406 }, { "epoch": 0.24192487964200124, "grad_norm": 0.3448792099952698, "learning_rate": 1.9297548771508627e-05, "loss": 0.4292, "step": 11407 }, { "epoch": 0.24194608809993426, "grad_norm": 0.306191623210907, "learning_rate": 1.9297425980321497e-05, "loss": 0.5309, "step": 11408 }, { "epoch": 0.24196729655786728, "grad_norm": 0.3234859108924866, "learning_rate": 1.9297303178793853e-05, "loss": 0.5585, "step": 11409 }, { "epoch": 0.2419885050158003, "grad_norm": 0.33453088998794556, "learning_rate": 1.9297180366925827e-05, "loss": 0.5604, "step": 11410 }, { "epoch": 0.24200971347373332, "grad_norm": 0.32702744007110596, "learning_rate": 1.9297057544717555e-05, "loss": 0.4998, "step": 11411 }, { "epoch": 0.24203092193166634, "grad_norm": 0.3018094301223755, "learning_rate": 1.929693471216918e-05, "loss": 0.4708, "step": 11412 }, { "epoch": 0.24205213038959938, "grad_norm": 0.34284600615501404, "learning_rate": 1.9296811869280835e-05, "loss": 0.5057, "step": 11413 }, { "epoch": 0.2420733388475324, "grad_norm": 0.38836419582366943, "learning_rate": 1.9296689016052658e-05, "loss": 0.5104, "step": 11414 }, { "epoch": 0.24209454730546542, "grad_norm": 0.36370155215263367, "learning_rate": 1.929656615248478e-05, "loss": 0.5006, "step": 11415 }, { "epoch": 0.24211575576339844, "grad_norm": 0.3759148120880127, "learning_rate": 1.929644327857734e-05, "loss": 0.491, "step": 11416 }, { "epoch": 0.24213696422133146, "grad_norm": 0.3364281952381134, "learning_rate": 1.9296320394330485e-05, "loss": 0.5662, "step": 11417 }, { "epoch": 0.24215817267926448, "grad_norm": 0.4138340353965759, "learning_rate": 1.9296197499744335e-05, "loss": 0.4533, "step": 11418 }, { "epoch": 0.2421793811371975, "grad_norm": 0.291913777589798, "learning_rate": 1.929607459481904e-05, "loss": 0.486, "step": 11419 }, { "epoch": 0.24220058959513055, "grad_norm": 0.36183202266693115, "learning_rate": 1.929595167955473e-05, "loss": 0.556, "step": 11420 }, { "epoch": 0.24222179805306357, "grad_norm": 0.7270643711090088, "learning_rate": 1.9295828753951544e-05, "loss": 0.5471, "step": 11421 }, { "epoch": 0.2422430065109966, "grad_norm": 0.3501243591308594, "learning_rate": 1.9295705818009617e-05, "loss": 0.5841, "step": 11422 }, { "epoch": 0.2422642149689296, "grad_norm": 0.3403482735157013, "learning_rate": 1.9295582871729087e-05, "loss": 0.6318, "step": 11423 }, { "epoch": 0.24228542342686263, "grad_norm": 0.3650652766227722, "learning_rate": 1.929545991511009e-05, "loss": 0.5686, "step": 11424 }, { "epoch": 0.24230663188479565, "grad_norm": 0.33596497774124146, "learning_rate": 1.9295336948152764e-05, "loss": 0.5748, "step": 11425 }, { "epoch": 0.24232784034272867, "grad_norm": 0.34021806716918945, "learning_rate": 1.929521397085724e-05, "loss": 0.4527, "step": 11426 }, { "epoch": 0.24234904880066171, "grad_norm": 0.32802698016166687, "learning_rate": 1.929509098322367e-05, "loss": 0.412, "step": 11427 }, { "epoch": 0.24237025725859473, "grad_norm": 0.38838711380958557, "learning_rate": 1.9294967985252174e-05, "loss": 0.4096, "step": 11428 }, { "epoch": 0.24239146571652775, "grad_norm": 0.3247382938861847, "learning_rate": 1.9294844976942896e-05, "loss": 0.5699, "step": 11429 }, { "epoch": 0.24241267417446077, "grad_norm": 0.380585253238678, "learning_rate": 1.929472195829597e-05, "loss": 0.5373, "step": 11430 }, { "epoch": 0.2424338826323938, "grad_norm": 0.32695165276527405, "learning_rate": 1.929459892931154e-05, "loss": 0.4584, "step": 11431 }, { "epoch": 0.2424550910903268, "grad_norm": 0.8382043838500977, "learning_rate": 1.9294475889989733e-05, "loss": 0.4432, "step": 11432 }, { "epoch": 0.24247629954825983, "grad_norm": 0.37303802371025085, "learning_rate": 1.9294352840330696e-05, "loss": 0.5534, "step": 11433 }, { "epoch": 0.24249750800619288, "grad_norm": 0.36466312408447266, "learning_rate": 1.9294229780334556e-05, "loss": 0.4915, "step": 11434 }, { "epoch": 0.2425187164641259, "grad_norm": 0.40278786420822144, "learning_rate": 1.9294106710001457e-05, "loss": 0.5647, "step": 11435 }, { "epoch": 0.24253992492205892, "grad_norm": 0.3283834457397461, "learning_rate": 1.929398362933153e-05, "loss": 0.5914, "step": 11436 }, { "epoch": 0.24256113337999194, "grad_norm": 0.38889628648757935, "learning_rate": 1.929386053832492e-05, "loss": 0.3927, "step": 11437 }, { "epoch": 0.24258234183792496, "grad_norm": 0.343858540058136, "learning_rate": 1.9293737436981756e-05, "loss": 0.53, "step": 11438 }, { "epoch": 0.24260355029585798, "grad_norm": 0.300902783870697, "learning_rate": 1.9293614325302176e-05, "loss": 0.4883, "step": 11439 }, { "epoch": 0.24262475875379103, "grad_norm": 0.32454535365104675, "learning_rate": 1.9293491203286325e-05, "loss": 0.4981, "step": 11440 }, { "epoch": 0.24264596721172405, "grad_norm": 0.33648481965065, "learning_rate": 1.929336807093433e-05, "loss": 0.5981, "step": 11441 }, { "epoch": 0.24266717566965706, "grad_norm": 0.3093678057193756, "learning_rate": 1.9293244928246328e-05, "loss": 0.5274, "step": 11442 }, { "epoch": 0.24268838412759008, "grad_norm": 0.31562498211860657, "learning_rate": 1.9293121775222464e-05, "loss": 0.4899, "step": 11443 }, { "epoch": 0.2427095925855231, "grad_norm": 0.31182825565338135, "learning_rate": 1.9292998611862873e-05, "loss": 0.5181, "step": 11444 }, { "epoch": 0.24273080104345612, "grad_norm": 0.3699550926685333, "learning_rate": 1.9292875438167685e-05, "loss": 0.4609, "step": 11445 }, { "epoch": 0.24275200950138914, "grad_norm": 0.3703986704349518, "learning_rate": 1.9292752254137045e-05, "loss": 0.5724, "step": 11446 }, { "epoch": 0.2427732179593222, "grad_norm": 0.3766869008541107, "learning_rate": 1.929262905977108e-05, "loss": 0.5169, "step": 11447 }, { "epoch": 0.2427944264172552, "grad_norm": 0.35677433013916016, "learning_rate": 1.9292505855069944e-05, "loss": 0.5371, "step": 11448 }, { "epoch": 0.24281563487518823, "grad_norm": 0.34489911794662476, "learning_rate": 1.9292382640033754e-05, "loss": 0.439, "step": 11449 }, { "epoch": 0.24283684333312125, "grad_norm": 0.32154446840286255, "learning_rate": 1.929225941466266e-05, "loss": 0.5449, "step": 11450 }, { "epoch": 0.24285805179105427, "grad_norm": 0.4628836214542389, "learning_rate": 1.92921361789568e-05, "loss": 0.4727, "step": 11451 }, { "epoch": 0.2428792602489873, "grad_norm": 0.30337288975715637, "learning_rate": 1.9292012932916305e-05, "loss": 0.4713, "step": 11452 }, { "epoch": 0.2429004687069203, "grad_norm": 0.4159506559371948, "learning_rate": 1.9291889676541313e-05, "loss": 0.5126, "step": 11453 }, { "epoch": 0.24292167716485336, "grad_norm": 0.4377661347389221, "learning_rate": 1.929176640983196e-05, "loss": 0.5672, "step": 11454 }, { "epoch": 0.24294288562278638, "grad_norm": 0.3640994131565094, "learning_rate": 1.9291643132788387e-05, "loss": 0.5818, "step": 11455 }, { "epoch": 0.2429640940807194, "grad_norm": 0.34607091546058655, "learning_rate": 1.929151984541073e-05, "loss": 0.5582, "step": 11456 }, { "epoch": 0.24298530253865241, "grad_norm": 0.3676936626434326, "learning_rate": 1.9291396547699122e-05, "loss": 0.5044, "step": 11457 }, { "epoch": 0.24300651099658543, "grad_norm": 0.35092705488204956, "learning_rate": 1.929127323965371e-05, "loss": 0.4997, "step": 11458 }, { "epoch": 0.24302771945451845, "grad_norm": 0.39143556356430054, "learning_rate": 1.929114992127462e-05, "loss": 0.5489, "step": 11459 }, { "epoch": 0.24304892791245147, "grad_norm": 0.35431307554244995, "learning_rate": 1.9291026592561993e-05, "loss": 0.5846, "step": 11460 }, { "epoch": 0.24307013637038452, "grad_norm": 0.34700679779052734, "learning_rate": 1.929090325351597e-05, "loss": 0.5536, "step": 11461 }, { "epoch": 0.24309134482831754, "grad_norm": 0.31118327379226685, "learning_rate": 1.9290779904136684e-05, "loss": 0.5463, "step": 11462 }, { "epoch": 0.24311255328625056, "grad_norm": 0.31510451436042786, "learning_rate": 1.9290656544424272e-05, "loss": 0.4341, "step": 11463 }, { "epoch": 0.24313376174418358, "grad_norm": 0.3272710144519806, "learning_rate": 1.9290533174378874e-05, "loss": 0.5553, "step": 11464 }, { "epoch": 0.2431549702021166, "grad_norm": 0.3789273798465729, "learning_rate": 1.9290409794000623e-05, "loss": 0.5516, "step": 11465 }, { "epoch": 0.24317617866004962, "grad_norm": 0.3556290864944458, "learning_rate": 1.929028640328966e-05, "loss": 0.4574, "step": 11466 }, { "epoch": 0.24319738711798264, "grad_norm": 0.3391532599925995, "learning_rate": 1.9290163002246124e-05, "loss": 0.5313, "step": 11467 }, { "epoch": 0.24321859557591569, "grad_norm": 0.3425292670726776, "learning_rate": 1.9290039590870147e-05, "loss": 0.5367, "step": 11468 }, { "epoch": 0.2432398040338487, "grad_norm": 0.3343132734298706, "learning_rate": 1.928991616916187e-05, "loss": 0.4557, "step": 11469 }, { "epoch": 0.24326101249178173, "grad_norm": 0.32637178897857666, "learning_rate": 1.9289792737121428e-05, "loss": 0.5769, "step": 11470 }, { "epoch": 0.24328222094971474, "grad_norm": 0.31835508346557617, "learning_rate": 1.9289669294748957e-05, "loss": 0.473, "step": 11471 }, { "epoch": 0.24330342940764776, "grad_norm": 0.33692312240600586, "learning_rate": 1.9289545842044603e-05, "loss": 0.4966, "step": 11472 }, { "epoch": 0.24332463786558078, "grad_norm": 0.3566100597381592, "learning_rate": 1.9289422379008493e-05, "loss": 0.5095, "step": 11473 }, { "epoch": 0.2433458463235138, "grad_norm": 0.33253955841064453, "learning_rate": 1.9289298905640766e-05, "loss": 0.449, "step": 11474 }, { "epoch": 0.24336705478144685, "grad_norm": 0.33362293243408203, "learning_rate": 1.928917542194156e-05, "loss": 0.4555, "step": 11475 }, { "epoch": 0.24338826323937987, "grad_norm": 0.36974284052848816, "learning_rate": 1.928905192791102e-05, "loss": 0.4738, "step": 11476 }, { "epoch": 0.2434094716973129, "grad_norm": 0.35190388560295105, "learning_rate": 1.9288928423549275e-05, "loss": 0.5438, "step": 11477 }, { "epoch": 0.2434306801552459, "grad_norm": 0.39650505781173706, "learning_rate": 1.9288804908856464e-05, "loss": 0.6009, "step": 11478 }, { "epoch": 0.24345188861317893, "grad_norm": 0.324219286441803, "learning_rate": 1.9288681383832724e-05, "loss": 0.4442, "step": 11479 }, { "epoch": 0.24347309707111195, "grad_norm": 0.34358763694763184, "learning_rate": 1.9288557848478195e-05, "loss": 0.4541, "step": 11480 }, { "epoch": 0.243494305529045, "grad_norm": 0.32302698493003845, "learning_rate": 1.9288434302793007e-05, "loss": 0.5216, "step": 11481 }, { "epoch": 0.24351551398697802, "grad_norm": 0.3521306812763214, "learning_rate": 1.928831074677731e-05, "loss": 0.5881, "step": 11482 }, { "epoch": 0.24353672244491104, "grad_norm": 0.37315183877944946, "learning_rate": 1.9288187180431232e-05, "loss": 0.5899, "step": 11483 }, { "epoch": 0.24355793090284406, "grad_norm": 0.3919996917247772, "learning_rate": 1.928806360375491e-05, "loss": 0.4377, "step": 11484 }, { "epoch": 0.24357913936077707, "grad_norm": 0.3408472239971161, "learning_rate": 1.9287940016748492e-05, "loss": 0.5387, "step": 11485 }, { "epoch": 0.2436003478187101, "grad_norm": 0.35618773102760315, "learning_rate": 1.92878164194121e-05, "loss": 0.5817, "step": 11486 }, { "epoch": 0.24362155627664311, "grad_norm": 0.3870770037174225, "learning_rate": 1.9287692811745883e-05, "loss": 0.5317, "step": 11487 }, { "epoch": 0.24364276473457616, "grad_norm": 0.3201623857021332, "learning_rate": 1.9287569193749974e-05, "loss": 0.4905, "step": 11488 }, { "epoch": 0.24366397319250918, "grad_norm": 0.376017689704895, "learning_rate": 1.9287445565424512e-05, "loss": 0.5682, "step": 11489 }, { "epoch": 0.2436851816504422, "grad_norm": 0.44472649693489075, "learning_rate": 1.9287321926769632e-05, "loss": 0.5117, "step": 11490 }, { "epoch": 0.24370639010837522, "grad_norm": 0.36199435591697693, "learning_rate": 1.9287198277785473e-05, "loss": 0.5091, "step": 11491 }, { "epoch": 0.24372759856630824, "grad_norm": 0.31772685050964355, "learning_rate": 1.9287074618472178e-05, "loss": 0.5652, "step": 11492 }, { "epoch": 0.24374880702424126, "grad_norm": 0.34431934356689453, "learning_rate": 1.9286950948829872e-05, "loss": 0.5441, "step": 11493 }, { "epoch": 0.24377001548217428, "grad_norm": 0.35661497712135315, "learning_rate": 1.9286827268858707e-05, "loss": 0.5414, "step": 11494 }, { "epoch": 0.24379122394010733, "grad_norm": 0.4991103410720825, "learning_rate": 1.928670357855881e-05, "loss": 0.5653, "step": 11495 }, { "epoch": 0.24381243239804035, "grad_norm": 0.3609922230243683, "learning_rate": 1.928657987793032e-05, "loss": 0.5206, "step": 11496 }, { "epoch": 0.24383364085597337, "grad_norm": 0.36249059438705444, "learning_rate": 1.9286456166973378e-05, "loss": 0.5454, "step": 11497 }, { "epoch": 0.24385484931390639, "grad_norm": 0.37751489877700806, "learning_rate": 1.928633244568812e-05, "loss": 0.4722, "step": 11498 }, { "epoch": 0.2438760577718394, "grad_norm": 0.34317097067832947, "learning_rate": 1.928620871407468e-05, "loss": 0.5374, "step": 11499 }, { "epoch": 0.24389726622977242, "grad_norm": 0.35663869976997375, "learning_rate": 1.9286084972133206e-05, "loss": 0.5047, "step": 11500 }, { "epoch": 0.24391847468770544, "grad_norm": 0.3418716490268707, "learning_rate": 1.928596121986383e-05, "loss": 0.535, "step": 11501 }, { "epoch": 0.2439396831456385, "grad_norm": 0.34918221831321716, "learning_rate": 1.928583745726668e-05, "loss": 0.5264, "step": 11502 }, { "epoch": 0.2439608916035715, "grad_norm": 0.3168501555919647, "learning_rate": 1.9285713684341906e-05, "loss": 0.5256, "step": 11503 }, { "epoch": 0.24398210006150453, "grad_norm": 0.317294180393219, "learning_rate": 1.9285589901089645e-05, "loss": 0.4405, "step": 11504 }, { "epoch": 0.24400330851943755, "grad_norm": 0.3581644296646118, "learning_rate": 1.928546610751003e-05, "loss": 0.4627, "step": 11505 }, { "epoch": 0.24402451697737057, "grad_norm": 0.32597658038139343, "learning_rate": 1.92853423036032e-05, "loss": 0.5308, "step": 11506 }, { "epoch": 0.2440457254353036, "grad_norm": 0.3324941396713257, "learning_rate": 1.928521848936929e-05, "loss": 0.4877, "step": 11507 }, { "epoch": 0.2440669338932366, "grad_norm": 0.3446466028690338, "learning_rate": 1.9285094664808445e-05, "loss": 0.5108, "step": 11508 }, { "epoch": 0.24408814235116966, "grad_norm": 0.4042886197566986, "learning_rate": 1.92849708299208e-05, "loss": 0.5027, "step": 11509 }, { "epoch": 0.24410935080910268, "grad_norm": 0.3321825861930847, "learning_rate": 1.9284846984706482e-05, "loss": 0.5329, "step": 11510 }, { "epoch": 0.2441305592670357, "grad_norm": 0.39615389704704285, "learning_rate": 1.9284723129165645e-05, "loss": 0.5838, "step": 11511 }, { "epoch": 0.24415176772496872, "grad_norm": 0.35532528162002563, "learning_rate": 1.928459926329842e-05, "loss": 0.5233, "step": 11512 }, { "epoch": 0.24417297618290174, "grad_norm": 0.3152889907360077, "learning_rate": 1.9284475387104943e-05, "loss": 0.5715, "step": 11513 }, { "epoch": 0.24419418464083475, "grad_norm": 0.3662314713001251, "learning_rate": 1.928435150058535e-05, "loss": 0.5268, "step": 11514 }, { "epoch": 0.2442153930987678, "grad_norm": 0.34798818826675415, "learning_rate": 1.9284227603739784e-05, "loss": 0.5421, "step": 11515 }, { "epoch": 0.24423660155670082, "grad_norm": 0.37679290771484375, "learning_rate": 1.9284103696568385e-05, "loss": 0.5298, "step": 11516 }, { "epoch": 0.24425781001463384, "grad_norm": 0.3826761841773987, "learning_rate": 1.9283979779071282e-05, "loss": 0.5025, "step": 11517 }, { "epoch": 0.24427901847256686, "grad_norm": 0.3285207450389862, "learning_rate": 1.9283855851248617e-05, "loss": 0.4678, "step": 11518 }, { "epoch": 0.24430022693049988, "grad_norm": 0.34387460350990295, "learning_rate": 1.928373191310053e-05, "loss": 0.4733, "step": 11519 }, { "epoch": 0.2443214353884329, "grad_norm": 0.4781445562839508, "learning_rate": 1.9283607964627157e-05, "loss": 0.546, "step": 11520 }, { "epoch": 0.24434264384636592, "grad_norm": 0.36775070428848267, "learning_rate": 1.9283484005828636e-05, "loss": 0.5041, "step": 11521 }, { "epoch": 0.24436385230429897, "grad_norm": 0.3154022991657257, "learning_rate": 1.9283360036705102e-05, "loss": 0.5482, "step": 11522 }, { "epoch": 0.244385060762232, "grad_norm": 0.2970576882362366, "learning_rate": 1.9283236057256697e-05, "loss": 0.5148, "step": 11523 }, { "epoch": 0.244406269220165, "grad_norm": 0.6132906079292297, "learning_rate": 1.9283112067483557e-05, "loss": 0.5337, "step": 11524 }, { "epoch": 0.24442747767809803, "grad_norm": 0.3498821258544922, "learning_rate": 1.9282988067385823e-05, "loss": 0.5154, "step": 11525 }, { "epoch": 0.24444868613603105, "grad_norm": 0.6729310154914856, "learning_rate": 1.928286405696363e-05, "loss": 0.4475, "step": 11526 }, { "epoch": 0.24446989459396407, "grad_norm": 0.32007867097854614, "learning_rate": 1.9282740036217117e-05, "loss": 0.5198, "step": 11527 }, { "epoch": 0.24449110305189709, "grad_norm": 0.3435543477535248, "learning_rate": 1.9282616005146417e-05, "loss": 0.4851, "step": 11528 }, { "epoch": 0.24451231150983013, "grad_norm": 0.3513665497303009, "learning_rate": 1.9282491963751674e-05, "loss": 0.4308, "step": 11529 }, { "epoch": 0.24453351996776315, "grad_norm": 0.4241175949573517, "learning_rate": 1.9282367912033026e-05, "loss": 0.5637, "step": 11530 }, { "epoch": 0.24455472842569617, "grad_norm": 0.321675568819046, "learning_rate": 1.9282243849990604e-05, "loss": 0.504, "step": 11531 }, { "epoch": 0.2445759368836292, "grad_norm": 0.3941822946071625, "learning_rate": 1.928211977762456e-05, "loss": 0.5809, "step": 11532 }, { "epoch": 0.2445971453415622, "grad_norm": 0.29231566190719604, "learning_rate": 1.9281995694935014e-05, "loss": 0.5024, "step": 11533 }, { "epoch": 0.24461835379949523, "grad_norm": 0.33625873923301697, "learning_rate": 1.9281871601922114e-05, "loss": 0.5401, "step": 11534 }, { "epoch": 0.24463956225742825, "grad_norm": 0.46399274468421936, "learning_rate": 1.9281747498586002e-05, "loss": 0.4932, "step": 11535 }, { "epoch": 0.2446607707153613, "grad_norm": 0.35291746258735657, "learning_rate": 1.928162338492681e-05, "loss": 0.5764, "step": 11536 }, { "epoch": 0.24468197917329432, "grad_norm": 0.4105572998523712, "learning_rate": 1.9281499260944673e-05, "loss": 0.5099, "step": 11537 }, { "epoch": 0.24470318763122734, "grad_norm": 0.3374508321285248, "learning_rate": 1.9281375126639736e-05, "loss": 0.5504, "step": 11538 }, { "epoch": 0.24472439608916036, "grad_norm": 0.39835938811302185, "learning_rate": 1.9281250982012138e-05, "loss": 0.525, "step": 11539 }, { "epoch": 0.24474560454709338, "grad_norm": 0.3334696590900421, "learning_rate": 1.9281126827062006e-05, "loss": 0.5289, "step": 11540 }, { "epoch": 0.2447668130050264, "grad_norm": 0.30072325468063354, "learning_rate": 1.928100266178949e-05, "loss": 0.421, "step": 11541 }, { "epoch": 0.24478802146295942, "grad_norm": 0.5149354934692383, "learning_rate": 1.9280878486194723e-05, "loss": 0.5075, "step": 11542 }, { "epoch": 0.24480922992089246, "grad_norm": 0.3420179784297943, "learning_rate": 1.9280754300277843e-05, "loss": 0.5322, "step": 11543 }, { "epoch": 0.24483043837882548, "grad_norm": 0.3218533992767334, "learning_rate": 1.9280630104038985e-05, "loss": 0.4828, "step": 11544 }, { "epoch": 0.2448516468367585, "grad_norm": 0.3322732448577881, "learning_rate": 1.9280505897478296e-05, "loss": 0.4615, "step": 11545 }, { "epoch": 0.24487285529469152, "grad_norm": 0.325970858335495, "learning_rate": 1.9280381680595907e-05, "loss": 0.4946, "step": 11546 }, { "epoch": 0.24489406375262454, "grad_norm": 0.332755446434021, "learning_rate": 1.928025745339196e-05, "loss": 0.5061, "step": 11547 }, { "epoch": 0.24491527221055756, "grad_norm": 0.35577115416526794, "learning_rate": 1.9280133215866586e-05, "loss": 0.5824, "step": 11548 }, { "epoch": 0.24493648066849058, "grad_norm": 0.3250981569290161, "learning_rate": 1.9280008968019935e-05, "loss": 0.5793, "step": 11549 }, { "epoch": 0.24495768912642363, "grad_norm": 0.31007641553878784, "learning_rate": 1.9279884709852137e-05, "loss": 0.5592, "step": 11550 }, { "epoch": 0.24497889758435665, "grad_norm": 0.3418683409690857, "learning_rate": 1.9279760441363328e-05, "loss": 0.5582, "step": 11551 }, { "epoch": 0.24500010604228967, "grad_norm": 0.32054388523101807, "learning_rate": 1.9279636162553655e-05, "loss": 0.4843, "step": 11552 }, { "epoch": 0.2450213145002227, "grad_norm": 0.3589114546775818, "learning_rate": 1.9279511873423245e-05, "loss": 0.4652, "step": 11553 }, { "epoch": 0.2450425229581557, "grad_norm": 0.34137189388275146, "learning_rate": 1.9279387573972247e-05, "loss": 0.5247, "step": 11554 }, { "epoch": 0.24506373141608873, "grad_norm": 0.35484778881073, "learning_rate": 1.9279263264200797e-05, "loss": 0.5366, "step": 11555 }, { "epoch": 0.24508493987402177, "grad_norm": 0.35070356726646423, "learning_rate": 1.9279138944109025e-05, "loss": 0.4334, "step": 11556 }, { "epoch": 0.2451061483319548, "grad_norm": 0.36272913217544556, "learning_rate": 1.9279014613697083e-05, "loss": 0.5049, "step": 11557 }, { "epoch": 0.2451273567898878, "grad_norm": 0.36664196848869324, "learning_rate": 1.9278890272965097e-05, "loss": 0.5973, "step": 11558 }, { "epoch": 0.24514856524782083, "grad_norm": 0.3305118680000305, "learning_rate": 1.927876592191321e-05, "loss": 0.5809, "step": 11559 }, { "epoch": 0.24516977370575385, "grad_norm": 0.35952213406562805, "learning_rate": 1.9278641560541557e-05, "loss": 0.6259, "step": 11560 }, { "epoch": 0.24519098216368687, "grad_norm": 0.5090950131416321, "learning_rate": 1.9278517188850283e-05, "loss": 0.5705, "step": 11561 }, { "epoch": 0.2452121906216199, "grad_norm": 0.3629985451698303, "learning_rate": 1.9278392806839523e-05, "loss": 0.5594, "step": 11562 }, { "epoch": 0.24523339907955294, "grad_norm": 0.36066362261772156, "learning_rate": 1.9278268414509414e-05, "loss": 0.5244, "step": 11563 }, { "epoch": 0.24525460753748596, "grad_norm": 0.34541863203048706, "learning_rate": 1.9278144011860096e-05, "loss": 0.503, "step": 11564 }, { "epoch": 0.24527581599541898, "grad_norm": 0.30848875641822815, "learning_rate": 1.9278019598891707e-05, "loss": 0.494, "step": 11565 }, { "epoch": 0.245297024453352, "grad_norm": 0.3364071846008301, "learning_rate": 1.9277895175604384e-05, "loss": 0.476, "step": 11566 }, { "epoch": 0.24531823291128502, "grad_norm": 0.33965545892715454, "learning_rate": 1.9277770741998268e-05, "loss": 0.5189, "step": 11567 }, { "epoch": 0.24533944136921804, "grad_norm": 0.4103037714958191, "learning_rate": 1.9277646298073492e-05, "loss": 0.4768, "step": 11568 }, { "epoch": 0.24536064982715106, "grad_norm": 0.3644576668739319, "learning_rate": 1.9277521843830204e-05, "loss": 0.4774, "step": 11569 }, { "epoch": 0.2453818582850841, "grad_norm": 0.3155502378940582, "learning_rate": 1.9277397379268534e-05, "loss": 0.4585, "step": 11570 }, { "epoch": 0.24540306674301712, "grad_norm": 0.3123484253883362, "learning_rate": 1.9277272904388624e-05, "loss": 0.4891, "step": 11571 }, { "epoch": 0.24542427520095014, "grad_norm": 0.3794032633304596, "learning_rate": 1.9277148419190612e-05, "loss": 0.5569, "step": 11572 }, { "epoch": 0.24544548365888316, "grad_norm": 0.3539648950099945, "learning_rate": 1.9277023923674635e-05, "loss": 0.5729, "step": 11573 }, { "epoch": 0.24546669211681618, "grad_norm": 0.3317733407020569, "learning_rate": 1.9276899417840837e-05, "loss": 0.4436, "step": 11574 }, { "epoch": 0.2454879005747492, "grad_norm": 0.33748659491539, "learning_rate": 1.9276774901689344e-05, "loss": 0.54, "step": 11575 }, { "epoch": 0.24550910903268222, "grad_norm": 0.28606316447257996, "learning_rate": 1.927665037522031e-05, "loss": 0.4288, "step": 11576 }, { "epoch": 0.24553031749061527, "grad_norm": 0.3437713384628296, "learning_rate": 1.927652583843386e-05, "loss": 0.5769, "step": 11577 }, { "epoch": 0.2455515259485483, "grad_norm": 0.33579668402671814, "learning_rate": 1.9276401291330145e-05, "loss": 0.5087, "step": 11578 }, { "epoch": 0.2455727344064813, "grad_norm": 0.37426885962486267, "learning_rate": 1.9276276733909293e-05, "loss": 0.5157, "step": 11579 }, { "epoch": 0.24559394286441433, "grad_norm": 0.4153796136379242, "learning_rate": 1.9276152166171448e-05, "loss": 0.4836, "step": 11580 }, { "epoch": 0.24561515132234735, "grad_norm": 0.3575170934200287, "learning_rate": 1.9276027588116744e-05, "loss": 0.627, "step": 11581 }, { "epoch": 0.24563635978028037, "grad_norm": 0.3592623174190521, "learning_rate": 1.9275902999745325e-05, "loss": 0.5131, "step": 11582 }, { "epoch": 0.2456575682382134, "grad_norm": 0.30675166845321655, "learning_rate": 1.9275778401057326e-05, "loss": 0.5084, "step": 11583 }, { "epoch": 0.24567877669614643, "grad_norm": 0.3479255735874176, "learning_rate": 1.927565379205289e-05, "loss": 0.5205, "step": 11584 }, { "epoch": 0.24569998515407945, "grad_norm": 0.34880879521369934, "learning_rate": 1.9275529172732147e-05, "loss": 0.5157, "step": 11585 }, { "epoch": 0.24572119361201247, "grad_norm": 0.31755825877189636, "learning_rate": 1.9275404543095245e-05, "loss": 0.5936, "step": 11586 }, { "epoch": 0.2457424020699455, "grad_norm": 0.328093022108078, "learning_rate": 1.927527990314232e-05, "loss": 0.464, "step": 11587 }, { "epoch": 0.2457636105278785, "grad_norm": 0.34106534719467163, "learning_rate": 1.927515525287351e-05, "loss": 0.548, "step": 11588 }, { "epoch": 0.24578481898581153, "grad_norm": 0.33007004857063293, "learning_rate": 1.9275030592288947e-05, "loss": 0.5048, "step": 11589 }, { "epoch": 0.24580602744374458, "grad_norm": 0.7378624081611633, "learning_rate": 1.927490592138878e-05, "loss": 0.5575, "step": 11590 }, { "epoch": 0.2458272359016776, "grad_norm": 0.3204861283302307, "learning_rate": 1.927478124017314e-05, "loss": 0.4667, "step": 11591 }, { "epoch": 0.24584844435961062, "grad_norm": 0.3329527676105499, "learning_rate": 1.9274656548642173e-05, "loss": 0.4365, "step": 11592 }, { "epoch": 0.24586965281754364, "grad_norm": 0.33653393387794495, "learning_rate": 1.927453184679601e-05, "loss": 0.5444, "step": 11593 }, { "epoch": 0.24589086127547666, "grad_norm": 0.347176194190979, "learning_rate": 1.9274407134634795e-05, "loss": 0.5131, "step": 11594 }, { "epoch": 0.24591206973340968, "grad_norm": 23.67655372619629, "learning_rate": 1.9274282412158663e-05, "loss": 1.7844, "step": 11595 }, { "epoch": 0.2459332781913427, "grad_norm": 0.4097152054309845, "learning_rate": 1.927415767936776e-05, "loss": 0.5115, "step": 11596 }, { "epoch": 0.24595448664927574, "grad_norm": 0.4892593324184418, "learning_rate": 1.9274032936262213e-05, "loss": 0.4788, "step": 11597 }, { "epoch": 0.24597569510720876, "grad_norm": 0.4435454308986664, "learning_rate": 1.9273908182842172e-05, "loss": 0.5023, "step": 11598 }, { "epoch": 0.24599690356514178, "grad_norm": 0.29527872800827026, "learning_rate": 1.9273783419107767e-05, "loss": 0.4234, "step": 11599 }, { "epoch": 0.2460181120230748, "grad_norm": 0.3265724182128906, "learning_rate": 1.9273658645059144e-05, "loss": 0.5775, "step": 11600 }, { "epoch": 0.24603932048100782, "grad_norm": 0.3218671679496765, "learning_rate": 1.9273533860696437e-05, "loss": 0.4598, "step": 11601 }, { "epoch": 0.24606052893894084, "grad_norm": 0.35922151803970337, "learning_rate": 1.9273409066019786e-05, "loss": 0.6004, "step": 11602 }, { "epoch": 0.24608173739687386, "grad_norm": 0.30420705676078796, "learning_rate": 1.9273284261029327e-05, "loss": 0.5256, "step": 11603 }, { "epoch": 0.2461029458548069, "grad_norm": 0.34838229417800903, "learning_rate": 1.9273159445725208e-05, "loss": 0.4941, "step": 11604 }, { "epoch": 0.24612415431273993, "grad_norm": 0.3423037528991699, "learning_rate": 1.927303462010756e-05, "loss": 0.5, "step": 11605 }, { "epoch": 0.24614536277067295, "grad_norm": 0.31852152943611145, "learning_rate": 1.927290978417652e-05, "loss": 0.483, "step": 11606 }, { "epoch": 0.24616657122860597, "grad_norm": 0.3362400531768799, "learning_rate": 1.9272784937932233e-05, "loss": 0.5688, "step": 11607 }, { "epoch": 0.246187779686539, "grad_norm": 0.31103286147117615, "learning_rate": 1.927266008137483e-05, "loss": 0.4963, "step": 11608 }, { "epoch": 0.246208988144472, "grad_norm": 0.37003645300865173, "learning_rate": 1.927253521450446e-05, "loss": 0.5279, "step": 11609 }, { "epoch": 0.24623019660240503, "grad_norm": 0.32854658365249634, "learning_rate": 1.927241033732126e-05, "loss": 0.4808, "step": 11610 }, { "epoch": 0.24625140506033807, "grad_norm": 0.31251415610313416, "learning_rate": 1.9272285449825362e-05, "loss": 0.518, "step": 11611 }, { "epoch": 0.2462726135182711, "grad_norm": 0.3327837586402893, "learning_rate": 1.9272160552016907e-05, "loss": 0.547, "step": 11612 }, { "epoch": 0.2462938219762041, "grad_norm": 0.32042938470840454, "learning_rate": 1.9272035643896036e-05, "loss": 0.489, "step": 11613 }, { "epoch": 0.24631503043413713, "grad_norm": 0.3409710228443146, "learning_rate": 1.927191072546289e-05, "loss": 0.4713, "step": 11614 }, { "epoch": 0.24633623889207015, "grad_norm": 0.311335027217865, "learning_rate": 1.9271785796717604e-05, "loss": 0.4956, "step": 11615 }, { "epoch": 0.24635744735000317, "grad_norm": 0.3763105869293213, "learning_rate": 1.927166085766032e-05, "loss": 0.5597, "step": 11616 }, { "epoch": 0.2463786558079362, "grad_norm": 0.34800055623054504, "learning_rate": 1.9271535908291172e-05, "loss": 0.5693, "step": 11617 }, { "epoch": 0.24639986426586924, "grad_norm": 0.3431432843208313, "learning_rate": 1.9271410948610304e-05, "loss": 0.5339, "step": 11618 }, { "epoch": 0.24642107272380226, "grad_norm": 0.339456170797348, "learning_rate": 1.9271285978617854e-05, "loss": 0.467, "step": 11619 }, { "epoch": 0.24644228118173528, "grad_norm": 0.5323988795280457, "learning_rate": 1.927116099831396e-05, "loss": 0.532, "step": 11620 }, { "epoch": 0.2464634896396683, "grad_norm": 0.361529141664505, "learning_rate": 1.927103600769876e-05, "loss": 0.5383, "step": 11621 }, { "epoch": 0.24648469809760132, "grad_norm": 0.4051903486251831, "learning_rate": 1.9270911006772397e-05, "loss": 0.5545, "step": 11622 }, { "epoch": 0.24650590655553434, "grad_norm": 0.3093099892139435, "learning_rate": 1.9270785995535005e-05, "loss": 0.5333, "step": 11623 }, { "epoch": 0.24652711501346736, "grad_norm": 0.39304181933403015, "learning_rate": 1.927066097398673e-05, "loss": 0.5301, "step": 11624 }, { "epoch": 0.2465483234714004, "grad_norm": 0.3649323880672455, "learning_rate": 1.9270535942127697e-05, "loss": 0.5302, "step": 11625 }, { "epoch": 0.24656953192933342, "grad_norm": 0.3992592990398407, "learning_rate": 1.927041089995806e-05, "loss": 0.5702, "step": 11626 }, { "epoch": 0.24659074038726644, "grad_norm": 0.3444804251194, "learning_rate": 1.9270285847477954e-05, "loss": 0.515, "step": 11627 }, { "epoch": 0.24661194884519946, "grad_norm": 0.3988434970378876, "learning_rate": 1.9270160784687515e-05, "loss": 0.4795, "step": 11628 }, { "epoch": 0.24663315730313248, "grad_norm": 0.36189550161361694, "learning_rate": 1.9270035711586883e-05, "loss": 0.5241, "step": 11629 }, { "epoch": 0.2466543657610655, "grad_norm": 0.3219417333602905, "learning_rate": 1.92699106281762e-05, "loss": 0.5048, "step": 11630 }, { "epoch": 0.24667557421899855, "grad_norm": 0.3290223479270935, "learning_rate": 1.92697855344556e-05, "loss": 0.456, "step": 11631 }, { "epoch": 0.24669678267693157, "grad_norm": 0.3507169485092163, "learning_rate": 1.9269660430425227e-05, "loss": 0.5703, "step": 11632 }, { "epoch": 0.2467179911348646, "grad_norm": 0.33961760997772217, "learning_rate": 1.926953531608522e-05, "loss": 0.4937, "step": 11633 }, { "epoch": 0.2467391995927976, "grad_norm": 0.35382041335105896, "learning_rate": 1.926941019143571e-05, "loss": 0.5377, "step": 11634 }, { "epoch": 0.24676040805073063, "grad_norm": 0.3131345808506012, "learning_rate": 1.926928505647685e-05, "loss": 0.5203, "step": 11635 }, { "epoch": 0.24678161650866365, "grad_norm": 0.3262192904949188, "learning_rate": 1.9269159911208767e-05, "loss": 0.5722, "step": 11636 }, { "epoch": 0.24680282496659667, "grad_norm": 0.3146578371524811, "learning_rate": 1.9269034755631605e-05, "loss": 0.5196, "step": 11637 }, { "epoch": 0.24682403342452972, "grad_norm": 0.3507985472679138, "learning_rate": 1.9268909589745503e-05, "loss": 0.5261, "step": 11638 }, { "epoch": 0.24684524188246273, "grad_norm": 0.2875073552131653, "learning_rate": 1.9268784413550603e-05, "loss": 0.4889, "step": 11639 }, { "epoch": 0.24686645034039575, "grad_norm": 0.32423463463783264, "learning_rate": 1.926865922704704e-05, "loss": 0.5015, "step": 11640 }, { "epoch": 0.24688765879832877, "grad_norm": 0.381413072347641, "learning_rate": 1.9268534030234956e-05, "loss": 0.4874, "step": 11641 }, { "epoch": 0.2469088672562618, "grad_norm": 0.3829769194126129, "learning_rate": 1.9268408823114487e-05, "loss": 0.5907, "step": 11642 }, { "epoch": 0.2469300757141948, "grad_norm": 0.3034324049949646, "learning_rate": 1.9268283605685773e-05, "loss": 0.4756, "step": 11643 }, { "epoch": 0.24695128417212783, "grad_norm": 0.3732006251811981, "learning_rate": 1.926815837794896e-05, "loss": 0.4861, "step": 11644 }, { "epoch": 0.24697249263006088, "grad_norm": 0.520856499671936, "learning_rate": 1.9268033139904174e-05, "loss": 0.5477, "step": 11645 }, { "epoch": 0.2469937010879939, "grad_norm": 0.31616219878196716, "learning_rate": 1.926790789155157e-05, "loss": 0.4647, "step": 11646 }, { "epoch": 0.24701490954592692, "grad_norm": 0.37435704469680786, "learning_rate": 1.926778263289127e-05, "loss": 0.5959, "step": 11647 }, { "epoch": 0.24703611800385994, "grad_norm": 0.34290194511413574, "learning_rate": 1.9267657363923434e-05, "loss": 0.5187, "step": 11648 }, { "epoch": 0.24705732646179296, "grad_norm": 0.40542808175086975, "learning_rate": 1.9267532084648182e-05, "loss": 0.5069, "step": 11649 }, { "epoch": 0.24707853491972598, "grad_norm": 0.3747018873691559, "learning_rate": 1.9267406795065664e-05, "loss": 0.4949, "step": 11650 }, { "epoch": 0.247099743377659, "grad_norm": 0.33770689368247986, "learning_rate": 1.9267281495176016e-05, "loss": 0.5635, "step": 11651 }, { "epoch": 0.24712095183559205, "grad_norm": 0.3182687759399414, "learning_rate": 1.9267156184979375e-05, "loss": 0.5008, "step": 11652 }, { "epoch": 0.24714216029352506, "grad_norm": 0.35965830087661743, "learning_rate": 1.9267030864475892e-05, "loss": 0.5591, "step": 11653 }, { "epoch": 0.24716336875145808, "grad_norm": 0.32118916511535645, "learning_rate": 1.926690553366569e-05, "loss": 0.4714, "step": 11654 }, { "epoch": 0.2471845772093911, "grad_norm": 0.3375224769115448, "learning_rate": 1.926678019254892e-05, "loss": 0.5272, "step": 11655 }, { "epoch": 0.24720578566732412, "grad_norm": 0.3374592959880829, "learning_rate": 1.9266654841125713e-05, "loss": 0.4899, "step": 11656 }, { "epoch": 0.24722699412525714, "grad_norm": 0.35966944694519043, "learning_rate": 1.9266529479396218e-05, "loss": 0.5269, "step": 11657 }, { "epoch": 0.24724820258319016, "grad_norm": 0.3999924063682556, "learning_rate": 1.926640410736057e-05, "loss": 0.574, "step": 11658 }, { "epoch": 0.2472694110411232, "grad_norm": 0.3587268590927124, "learning_rate": 1.9266278725018904e-05, "loss": 0.5183, "step": 11659 }, { "epoch": 0.24729061949905623, "grad_norm": 0.35491371154785156, "learning_rate": 1.926615333237136e-05, "loss": 0.5071, "step": 11660 }, { "epoch": 0.24731182795698925, "grad_norm": 0.305225133895874, "learning_rate": 1.9266027929418088e-05, "loss": 0.4998, "step": 11661 }, { "epoch": 0.24733303641492227, "grad_norm": 0.35594236850738525, "learning_rate": 1.926590251615922e-05, "loss": 0.5328, "step": 11662 }, { "epoch": 0.2473542448728553, "grad_norm": 0.3342564105987549, "learning_rate": 1.9265777092594892e-05, "loss": 0.4895, "step": 11663 }, { "epoch": 0.2473754533307883, "grad_norm": 0.3815193474292755, "learning_rate": 1.9265651658725248e-05, "loss": 0.5473, "step": 11664 }, { "epoch": 0.24739666178872136, "grad_norm": 0.3584486246109009, "learning_rate": 1.926552621455043e-05, "loss": 0.4698, "step": 11665 }, { "epoch": 0.24741787024665438, "grad_norm": 0.39214611053466797, "learning_rate": 1.926540076007057e-05, "loss": 0.5578, "step": 11666 }, { "epoch": 0.2474390787045874, "grad_norm": 0.34221985936164856, "learning_rate": 1.926527529528581e-05, "loss": 0.5286, "step": 11667 }, { "epoch": 0.24746028716252041, "grad_norm": 0.37096425890922546, "learning_rate": 1.9265149820196298e-05, "loss": 0.4808, "step": 11668 }, { "epoch": 0.24748149562045343, "grad_norm": 0.33837783336639404, "learning_rate": 1.926502433480216e-05, "loss": 0.4992, "step": 11669 }, { "epoch": 0.24750270407838645, "grad_norm": 0.3733202815055847, "learning_rate": 1.9264898839103547e-05, "loss": 0.6068, "step": 11670 }, { "epoch": 0.24752391253631947, "grad_norm": 0.49780669808387756, "learning_rate": 1.9264773333100594e-05, "loss": 0.5023, "step": 11671 }, { "epoch": 0.24754512099425252, "grad_norm": 0.3280958831310272, "learning_rate": 1.9264647816793438e-05, "loss": 0.5731, "step": 11672 }, { "epoch": 0.24756632945218554, "grad_norm": 0.3989533483982086, "learning_rate": 1.9264522290182224e-05, "loss": 0.4752, "step": 11673 }, { "epoch": 0.24758753791011856, "grad_norm": 0.31678178906440735, "learning_rate": 1.9264396753267088e-05, "loss": 0.5114, "step": 11674 }, { "epoch": 0.24760874636805158, "grad_norm": 0.35639476776123047, "learning_rate": 1.926427120604817e-05, "loss": 0.4899, "step": 11675 }, { "epoch": 0.2476299548259846, "grad_norm": 0.3111015260219574, "learning_rate": 1.9264145648525606e-05, "loss": 0.4241, "step": 11676 }, { "epoch": 0.24765116328391762, "grad_norm": 0.35983091592788696, "learning_rate": 1.9264020080699545e-05, "loss": 0.4909, "step": 11677 }, { "epoch": 0.24767237174185064, "grad_norm": 0.3217540979385376, "learning_rate": 1.926389450257012e-05, "loss": 0.447, "step": 11678 }, { "epoch": 0.2476935801997837, "grad_norm": 0.3222959339618683, "learning_rate": 1.926376891413747e-05, "loss": 0.4356, "step": 11679 }, { "epoch": 0.2477147886577167, "grad_norm": 0.3299773335456848, "learning_rate": 1.926364331540174e-05, "loss": 0.4968, "step": 11680 }, { "epoch": 0.24773599711564973, "grad_norm": 0.3244217336177826, "learning_rate": 1.9263517706363068e-05, "loss": 0.6072, "step": 11681 }, { "epoch": 0.24775720557358274, "grad_norm": 0.33551937341690063, "learning_rate": 1.9263392087021587e-05, "loss": 0.5182, "step": 11682 }, { "epoch": 0.24777841403151576, "grad_norm": 0.35582345724105835, "learning_rate": 1.9263266457377445e-05, "loss": 0.5197, "step": 11683 }, { "epoch": 0.24779962248944878, "grad_norm": 0.3610955476760864, "learning_rate": 1.9263140817430777e-05, "loss": 0.4381, "step": 11684 }, { "epoch": 0.2478208309473818, "grad_norm": 0.32842695713043213, "learning_rate": 1.9263015167181725e-05, "loss": 0.6274, "step": 11685 }, { "epoch": 0.24784203940531485, "grad_norm": 0.3636624217033386, "learning_rate": 1.9262889506630428e-05, "loss": 0.5245, "step": 11686 }, { "epoch": 0.24786324786324787, "grad_norm": 0.3319532871246338, "learning_rate": 1.9262763835777025e-05, "loss": 0.5409, "step": 11687 }, { "epoch": 0.2478844563211809, "grad_norm": 0.33402925729751587, "learning_rate": 1.9262638154621655e-05, "loss": 0.5406, "step": 11688 }, { "epoch": 0.2479056647791139, "grad_norm": 0.34724390506744385, "learning_rate": 1.9262512463164463e-05, "loss": 0.4465, "step": 11689 }, { "epoch": 0.24792687323704693, "grad_norm": 0.44847702980041504, "learning_rate": 1.9262386761405582e-05, "loss": 0.4972, "step": 11690 }, { "epoch": 0.24794808169497995, "grad_norm": 0.3294714689254761, "learning_rate": 1.9262261049345158e-05, "loss": 0.4547, "step": 11691 }, { "epoch": 0.24796929015291297, "grad_norm": 0.3218064606189728, "learning_rate": 1.9262135326983326e-05, "loss": 0.6085, "step": 11692 }, { "epoch": 0.24799049861084602, "grad_norm": 0.3448353707790375, "learning_rate": 1.9262009594320224e-05, "loss": 0.4162, "step": 11693 }, { "epoch": 0.24801170706877904, "grad_norm": 0.35677528381347656, "learning_rate": 1.9261883851356003e-05, "loss": 0.5323, "step": 11694 }, { "epoch": 0.24803291552671206, "grad_norm": 0.38873204588890076, "learning_rate": 1.9261758098090787e-05, "loss": 0.5168, "step": 11695 }, { "epoch": 0.24805412398464508, "grad_norm": 0.32003775238990784, "learning_rate": 1.9261632334524732e-05, "loss": 0.4461, "step": 11696 }, { "epoch": 0.2480753324425781, "grad_norm": 0.363598108291626, "learning_rate": 1.9261506560657967e-05, "loss": 0.5087, "step": 11697 }, { "epoch": 0.24809654090051111, "grad_norm": 0.3543398678302765, "learning_rate": 1.926138077649063e-05, "loss": 0.5136, "step": 11698 }, { "epoch": 0.24811774935844413, "grad_norm": 0.303516149520874, "learning_rate": 1.9261254982022873e-05, "loss": 0.4338, "step": 11699 }, { "epoch": 0.24813895781637718, "grad_norm": 0.32031700015068054, "learning_rate": 1.9261129177254824e-05, "loss": 0.551, "step": 11700 }, { "epoch": 0.2481601662743102, "grad_norm": 0.32106447219848633, "learning_rate": 1.926100336218663e-05, "loss": 0.5287, "step": 11701 }, { "epoch": 0.24818137473224322, "grad_norm": 0.3110237121582031, "learning_rate": 1.9260877536818427e-05, "loss": 0.4508, "step": 11702 }, { "epoch": 0.24820258319017624, "grad_norm": 0.3996202349662781, "learning_rate": 1.9260751701150356e-05, "loss": 0.5199, "step": 11703 }, { "epoch": 0.24822379164810926, "grad_norm": 0.3530433177947998, "learning_rate": 1.926062585518256e-05, "loss": 0.5305, "step": 11704 }, { "epoch": 0.24824500010604228, "grad_norm": 0.33225300908088684, "learning_rate": 1.9260499998915174e-05, "loss": 0.5852, "step": 11705 }, { "epoch": 0.24826620856397533, "grad_norm": 0.3388061225414276, "learning_rate": 1.926037413234834e-05, "loss": 0.4331, "step": 11706 }, { "epoch": 0.24828741702190835, "grad_norm": 0.3621801733970642, "learning_rate": 1.92602482554822e-05, "loss": 0.5088, "step": 11707 }, { "epoch": 0.24830862547984137, "grad_norm": 0.3324502408504486, "learning_rate": 1.926012236831689e-05, "loss": 0.4147, "step": 11708 }, { "epoch": 0.24832983393777439, "grad_norm": 0.33244964480400085, "learning_rate": 1.925999647085256e-05, "loss": 0.4809, "step": 11709 }, { "epoch": 0.2483510423957074, "grad_norm": 0.32867369055747986, "learning_rate": 1.9259870563089332e-05, "loss": 0.5105, "step": 11710 }, { "epoch": 0.24837225085364042, "grad_norm": 0.33685189485549927, "learning_rate": 1.925974464502736e-05, "loss": 0.5727, "step": 11711 }, { "epoch": 0.24839345931157344, "grad_norm": 0.34542107582092285, "learning_rate": 1.9259618716666784e-05, "loss": 0.5678, "step": 11712 }, { "epoch": 0.2484146677695065, "grad_norm": 0.36113959550857544, "learning_rate": 1.9259492778007737e-05, "loss": 0.5431, "step": 11713 }, { "epoch": 0.2484358762274395, "grad_norm": 0.3596416711807251, "learning_rate": 1.9259366829050364e-05, "loss": 0.5072, "step": 11714 }, { "epoch": 0.24845708468537253, "grad_norm": 0.3155181407928467, "learning_rate": 1.9259240869794802e-05, "loss": 0.5172, "step": 11715 }, { "epoch": 0.24847829314330555, "grad_norm": 0.36506763100624084, "learning_rate": 1.9259114900241194e-05, "loss": 0.5585, "step": 11716 }, { "epoch": 0.24849950160123857, "grad_norm": 0.4644467532634735, "learning_rate": 1.9258988920389677e-05, "loss": 0.5116, "step": 11717 }, { "epoch": 0.2485207100591716, "grad_norm": 0.6694386005401611, "learning_rate": 1.9258862930240398e-05, "loss": 0.4857, "step": 11718 }, { "epoch": 0.2485419185171046, "grad_norm": 0.32581764459609985, "learning_rate": 1.9258736929793488e-05, "loss": 0.5538, "step": 11719 }, { "epoch": 0.24856312697503766, "grad_norm": 0.407310426235199, "learning_rate": 1.925861091904909e-05, "loss": 0.469, "step": 11720 }, { "epoch": 0.24858433543297068, "grad_norm": 0.3235493004322052, "learning_rate": 1.925848489800735e-05, "loss": 0.5043, "step": 11721 }, { "epoch": 0.2486055438909037, "grad_norm": 0.357770711183548, "learning_rate": 1.9258358866668398e-05, "loss": 0.5896, "step": 11722 }, { "epoch": 0.24862675234883672, "grad_norm": 0.33081862330436707, "learning_rate": 1.9258232825032387e-05, "loss": 0.5321, "step": 11723 }, { "epoch": 0.24864796080676974, "grad_norm": 0.32424360513687134, "learning_rate": 1.9258106773099444e-05, "loss": 0.5476, "step": 11724 }, { "epoch": 0.24866916926470276, "grad_norm": 0.33649909496307373, "learning_rate": 1.9257980710869714e-05, "loss": 0.5094, "step": 11725 }, { "epoch": 0.24869037772263577, "grad_norm": 0.3121063709259033, "learning_rate": 1.925785463834334e-05, "loss": 0.5304, "step": 11726 }, { "epoch": 0.24871158618056882, "grad_norm": 0.32540571689605713, "learning_rate": 1.9257728555520467e-05, "loss": 0.5077, "step": 11727 }, { "epoch": 0.24873279463850184, "grad_norm": 0.33756792545318604, "learning_rate": 1.925760246240122e-05, "loss": 0.5257, "step": 11728 }, { "epoch": 0.24875400309643486, "grad_norm": 0.4220999479293823, "learning_rate": 1.925747635898575e-05, "loss": 0.5895, "step": 11729 }, { "epoch": 0.24877521155436788, "grad_norm": 0.3138362169265747, "learning_rate": 1.92573502452742e-05, "loss": 0.5093, "step": 11730 }, { "epoch": 0.2487964200123009, "grad_norm": 0.336166650056839, "learning_rate": 1.9257224121266697e-05, "loss": 0.4812, "step": 11731 }, { "epoch": 0.24881762847023392, "grad_norm": 0.3284689486026764, "learning_rate": 1.9257097986963394e-05, "loss": 0.5413, "step": 11732 }, { "epoch": 0.24883883692816694, "grad_norm": 0.33599647879600525, "learning_rate": 1.925697184236443e-05, "loss": 0.4603, "step": 11733 }, { "epoch": 0.2488600453861, "grad_norm": 0.32322126626968384, "learning_rate": 1.925684568746994e-05, "loss": 0.3863, "step": 11734 }, { "epoch": 0.248881253844033, "grad_norm": 0.36726558208465576, "learning_rate": 1.9256719522280065e-05, "loss": 0.5054, "step": 11735 }, { "epoch": 0.24890246230196603, "grad_norm": 0.40899696946144104, "learning_rate": 1.925659334679495e-05, "loss": 0.4356, "step": 11736 }, { "epoch": 0.24892367075989905, "grad_norm": 0.32866808772087097, "learning_rate": 1.925646716101473e-05, "loss": 0.4185, "step": 11737 }, { "epoch": 0.24894487921783207, "grad_norm": 0.3960665166378021, "learning_rate": 1.9256340964939547e-05, "loss": 0.571, "step": 11738 }, { "epoch": 0.24896608767576509, "grad_norm": 0.7684429883956909, "learning_rate": 1.9256214758569543e-05, "loss": 0.5618, "step": 11739 }, { "epoch": 0.2489872961336981, "grad_norm": 0.4168541431427002, "learning_rate": 1.925608854190486e-05, "loss": 0.4868, "step": 11740 }, { "epoch": 0.24900850459163115, "grad_norm": 0.3119966983795166, "learning_rate": 1.9255962314945633e-05, "loss": 0.529, "step": 11741 }, { "epoch": 0.24902971304956417, "grad_norm": 0.35029342770576477, "learning_rate": 1.9255836077692005e-05, "loss": 0.497, "step": 11742 }, { "epoch": 0.2490509215074972, "grad_norm": 0.3400588631629944, "learning_rate": 1.9255709830144117e-05, "loss": 0.5136, "step": 11743 }, { "epoch": 0.2490721299654302, "grad_norm": 0.3335220515727997, "learning_rate": 1.9255583572302106e-05, "loss": 0.6081, "step": 11744 }, { "epoch": 0.24909333842336323, "grad_norm": 0.4744606912136078, "learning_rate": 1.9255457304166117e-05, "loss": 0.548, "step": 11745 }, { "epoch": 0.24911454688129625, "grad_norm": 0.37877723574638367, "learning_rate": 1.9255331025736293e-05, "loss": 0.5423, "step": 11746 }, { "epoch": 0.2491357553392293, "grad_norm": 0.34425127506256104, "learning_rate": 1.925520473701277e-05, "loss": 0.5603, "step": 11747 }, { "epoch": 0.24915696379716232, "grad_norm": 0.34555506706237793, "learning_rate": 1.9255078437995683e-05, "loss": 0.5386, "step": 11748 }, { "epoch": 0.24917817225509534, "grad_norm": 0.3466491103172302, "learning_rate": 1.9254952128685182e-05, "loss": 0.5223, "step": 11749 }, { "epoch": 0.24919938071302836, "grad_norm": 0.3851185441017151, "learning_rate": 1.92548258090814e-05, "loss": 0.4935, "step": 11750 }, { "epoch": 0.24922058917096138, "grad_norm": 0.31222841143608093, "learning_rate": 1.9254699479184486e-05, "loss": 0.5315, "step": 11751 }, { "epoch": 0.2492417976288944, "grad_norm": 1.0955559015274048, "learning_rate": 1.9254573138994576e-05, "loss": 0.5882, "step": 11752 }, { "epoch": 0.24926300608682742, "grad_norm": 0.4288738965988159, "learning_rate": 1.9254446788511806e-05, "loss": 0.5243, "step": 11753 }, { "epoch": 0.24928421454476046, "grad_norm": 0.33644309639930725, "learning_rate": 1.925432042773632e-05, "loss": 0.5102, "step": 11754 }, { "epoch": 0.24930542300269348, "grad_norm": 0.3471453785896301, "learning_rate": 1.9254194056668262e-05, "loss": 0.5214, "step": 11755 }, { "epoch": 0.2493266314606265, "grad_norm": 0.3583877384662628, "learning_rate": 1.925406767530777e-05, "loss": 0.5186, "step": 11756 }, { "epoch": 0.24934783991855952, "grad_norm": 0.29208502173423767, "learning_rate": 1.9253941283654984e-05, "loss": 0.4726, "step": 11757 }, { "epoch": 0.24936904837649254, "grad_norm": 0.34332001209259033, "learning_rate": 1.9253814881710046e-05, "loss": 0.5719, "step": 11758 }, { "epoch": 0.24939025683442556, "grad_norm": 0.3462904691696167, "learning_rate": 1.9253688469473092e-05, "loss": 0.5023, "step": 11759 }, { "epoch": 0.24941146529235858, "grad_norm": 0.4614536166191101, "learning_rate": 1.925356204694427e-05, "loss": 0.5336, "step": 11760 }, { "epoch": 0.24943267375029163, "grad_norm": 0.7974421381950378, "learning_rate": 1.9253435614123715e-05, "loss": 0.4913, "step": 11761 }, { "epoch": 0.24945388220822465, "grad_norm": 0.34021639823913574, "learning_rate": 1.9253309171011567e-05, "loss": 0.5588, "step": 11762 }, { "epoch": 0.24947509066615767, "grad_norm": 0.3143659830093384, "learning_rate": 1.925318271760797e-05, "loss": 0.4579, "step": 11763 }, { "epoch": 0.2494962991240907, "grad_norm": 0.341229110956192, "learning_rate": 1.9253056253913068e-05, "loss": 0.4915, "step": 11764 }, { "epoch": 0.2495175075820237, "grad_norm": 0.35721805691719055, "learning_rate": 1.925292977992699e-05, "loss": 0.5603, "step": 11765 }, { "epoch": 0.24953871603995673, "grad_norm": 0.33343836665153503, "learning_rate": 1.9252803295649892e-05, "loss": 0.5007, "step": 11766 }, { "epoch": 0.24955992449788975, "grad_norm": 0.3653334379196167, "learning_rate": 1.92526768010819e-05, "loss": 0.5046, "step": 11767 }, { "epoch": 0.2495811329558228, "grad_norm": 0.35153964161872864, "learning_rate": 1.9252550296223165e-05, "loss": 0.4897, "step": 11768 }, { "epoch": 0.2496023414137558, "grad_norm": 0.35224196314811707, "learning_rate": 1.9252423781073823e-05, "loss": 0.5198, "step": 11769 }, { "epoch": 0.24962354987168883, "grad_norm": 0.36183613538742065, "learning_rate": 1.9252297255634015e-05, "loss": 0.5269, "step": 11770 }, { "epoch": 0.24964475832962185, "grad_norm": 0.3450332581996918, "learning_rate": 1.925217071990388e-05, "loss": 0.497, "step": 11771 }, { "epoch": 0.24966596678755487, "grad_norm": 0.34866827726364136, "learning_rate": 1.9252044173883563e-05, "loss": 0.5596, "step": 11772 }, { "epoch": 0.2496871752454879, "grad_norm": 0.3422906994819641, "learning_rate": 1.9251917617573203e-05, "loss": 0.4971, "step": 11773 }, { "epoch": 0.2497083837034209, "grad_norm": 0.3773951232433319, "learning_rate": 1.9251791050972945e-05, "loss": 0.4657, "step": 11774 }, { "epoch": 0.24972959216135396, "grad_norm": 0.3149475157260895, "learning_rate": 1.925166447408292e-05, "loss": 0.4905, "step": 11775 }, { "epoch": 0.24975080061928698, "grad_norm": 0.44191014766693115, "learning_rate": 1.9251537886903274e-05, "loss": 0.5867, "step": 11776 }, { "epoch": 0.24977200907722, "grad_norm": 0.38805076479911804, "learning_rate": 1.9251411289434153e-05, "loss": 0.5006, "step": 11777 }, { "epoch": 0.24979321753515302, "grad_norm": 0.3264981210231781, "learning_rate": 1.9251284681675686e-05, "loss": 0.4823, "step": 11778 }, { "epoch": 0.24981442599308604, "grad_norm": 0.3689820170402527, "learning_rate": 1.9251158063628025e-05, "loss": 0.4944, "step": 11779 }, { "epoch": 0.24983563445101906, "grad_norm": 0.38766688108444214, "learning_rate": 1.9251031435291306e-05, "loss": 0.5301, "step": 11780 }, { "epoch": 0.2498568429089521, "grad_norm": 0.36973631381988525, "learning_rate": 1.9250904796665664e-05, "loss": 0.4716, "step": 11781 }, { "epoch": 0.24987805136688512, "grad_norm": 0.34148815274238586, "learning_rate": 1.9250778147751256e-05, "loss": 0.5523, "step": 11782 }, { "epoch": 0.24989925982481814, "grad_norm": 0.34871408343315125, "learning_rate": 1.9250651488548205e-05, "loss": 0.4436, "step": 11783 }, { "epoch": 0.24992046828275116, "grad_norm": 0.3623664975166321, "learning_rate": 1.9250524819056666e-05, "loss": 0.4751, "step": 11784 }, { "epoch": 0.24994167674068418, "grad_norm": 0.30536511540412903, "learning_rate": 1.925039813927677e-05, "loss": 0.4446, "step": 11785 }, { "epoch": 0.2499628851986172, "grad_norm": 0.35493847727775574, "learning_rate": 1.9250271449208662e-05, "loss": 0.5617, "step": 11786 }, { "epoch": 0.24998409365655022, "grad_norm": 0.3516107499599457, "learning_rate": 1.9250144748852482e-05, "loss": 0.5472, "step": 11787 }, { "epoch": 0.25000530211448324, "grad_norm": 0.3442856967449188, "learning_rate": 1.9250018038208372e-05, "loss": 0.4492, "step": 11788 }, { "epoch": 0.25002651057241626, "grad_norm": 0.34384867548942566, "learning_rate": 1.9249891317276474e-05, "loss": 0.6415, "step": 11789 }, { "epoch": 0.2500477190303493, "grad_norm": 0.42158472537994385, "learning_rate": 1.9249764586056924e-05, "loss": 0.5828, "step": 11790 }, { "epoch": 0.2500689274882823, "grad_norm": 0.37184083461761475, "learning_rate": 1.9249637844549867e-05, "loss": 0.518, "step": 11791 }, { "epoch": 0.2500901359462154, "grad_norm": 0.34759947657585144, "learning_rate": 1.9249511092755443e-05, "loss": 0.4959, "step": 11792 }, { "epoch": 0.2501113444041484, "grad_norm": 0.33918190002441406, "learning_rate": 1.9249384330673794e-05, "loss": 0.4879, "step": 11793 }, { "epoch": 0.2501325528620814, "grad_norm": 0.29664939641952515, "learning_rate": 1.9249257558305065e-05, "loss": 0.5156, "step": 11794 }, { "epoch": 0.25015376132001443, "grad_norm": 0.3473869562149048, "learning_rate": 1.924913077564938e-05, "loss": 0.5366, "step": 11795 }, { "epoch": 0.25017496977794745, "grad_norm": 0.3507693111896515, "learning_rate": 1.9249003982706903e-05, "loss": 0.5049, "step": 11796 }, { "epoch": 0.2501961782358805, "grad_norm": 0.33758896589279175, "learning_rate": 1.9248877179477762e-05, "loss": 0.4674, "step": 11797 }, { "epoch": 0.2502173866938135, "grad_norm": 0.3482344448566437, "learning_rate": 1.92487503659621e-05, "loss": 0.5531, "step": 11798 }, { "epoch": 0.2502385951517465, "grad_norm": 0.35359182953834534, "learning_rate": 1.9248623542160054e-05, "loss": 0.4743, "step": 11799 }, { "epoch": 0.25025980360967953, "grad_norm": 0.39973756670951843, "learning_rate": 1.924849670807177e-05, "loss": 0.4826, "step": 11800 }, { "epoch": 0.25028101206761255, "grad_norm": 0.35111334919929504, "learning_rate": 1.9248369863697394e-05, "loss": 0.5678, "step": 11801 }, { "epoch": 0.25030222052554557, "grad_norm": 0.3572595715522766, "learning_rate": 1.9248243009037056e-05, "loss": 0.5009, "step": 11802 }, { "epoch": 0.2503234289834786, "grad_norm": 0.3745327591896057, "learning_rate": 1.9248116144090906e-05, "loss": 0.5832, "step": 11803 }, { "epoch": 0.2503446374414116, "grad_norm": 0.34920957684516907, "learning_rate": 1.924798926885908e-05, "loss": 0.5055, "step": 11804 }, { "epoch": 0.2503658458993447, "grad_norm": 0.3161872923374176, "learning_rate": 1.9247862383341716e-05, "loss": 0.4945, "step": 11805 }, { "epoch": 0.2503870543572777, "grad_norm": 0.345625638961792, "learning_rate": 1.924773548753897e-05, "loss": 0.5688, "step": 11806 }, { "epoch": 0.2504082628152107, "grad_norm": 0.3701811134815216, "learning_rate": 1.9247608581450964e-05, "loss": 0.5147, "step": 11807 }, { "epoch": 0.25042947127314374, "grad_norm": 0.3402513861656189, "learning_rate": 1.9247481665077852e-05, "loss": 0.5447, "step": 11808 }, { "epoch": 0.25045067973107676, "grad_norm": 0.3291948735713959, "learning_rate": 1.924735473841977e-05, "loss": 0.524, "step": 11809 }, { "epoch": 0.2504718881890098, "grad_norm": 0.34312090277671814, "learning_rate": 1.9247227801476857e-05, "loss": 0.4671, "step": 11810 }, { "epoch": 0.2504930966469428, "grad_norm": 1.3506431579589844, "learning_rate": 1.9247100854249262e-05, "loss": 0.35, "step": 11811 }, { "epoch": 0.2505143051048758, "grad_norm": 0.33453088998794556, "learning_rate": 1.9246973896737122e-05, "loss": 0.4746, "step": 11812 }, { "epoch": 0.25053551356280884, "grad_norm": 0.3397987186908722, "learning_rate": 1.924684692894058e-05, "loss": 0.5584, "step": 11813 }, { "epoch": 0.25055672202074186, "grad_norm": 0.3495335280895233, "learning_rate": 1.9246719950859768e-05, "loss": 0.5265, "step": 11814 }, { "epoch": 0.2505779304786749, "grad_norm": 0.3204566538333893, "learning_rate": 1.9246592962494842e-05, "loss": 0.3956, "step": 11815 }, { "epoch": 0.2505991389366079, "grad_norm": 0.8552914261817932, "learning_rate": 1.924646596384593e-05, "loss": 0.5888, "step": 11816 }, { "epoch": 0.2506203473945409, "grad_norm": 0.3254723846912384, "learning_rate": 1.924633895491318e-05, "loss": 0.5087, "step": 11817 }, { "epoch": 0.25064155585247394, "grad_norm": 0.32039719820022583, "learning_rate": 1.9246211935696733e-05, "loss": 0.4791, "step": 11818 }, { "epoch": 0.250662764310407, "grad_norm": 0.3158952295780182, "learning_rate": 1.9246084906196733e-05, "loss": 0.4827, "step": 11819 }, { "epoch": 0.25068397276834004, "grad_norm": 0.3936139643192291, "learning_rate": 1.9245957866413313e-05, "loss": 0.5932, "step": 11820 }, { "epoch": 0.25070518122627306, "grad_norm": 0.30730199813842773, "learning_rate": 1.924583081634662e-05, "loss": 0.5471, "step": 11821 }, { "epoch": 0.2507263896842061, "grad_norm": 0.36346733570098877, "learning_rate": 1.9245703755996796e-05, "loss": 0.5967, "step": 11822 }, { "epoch": 0.2507475981421391, "grad_norm": 0.3682340085506439, "learning_rate": 1.924557668536398e-05, "loss": 0.4881, "step": 11823 }, { "epoch": 0.2507688066000721, "grad_norm": 0.40519580245018005, "learning_rate": 1.9245449604448313e-05, "loss": 0.5551, "step": 11824 }, { "epoch": 0.25079001505800513, "grad_norm": 0.32265058159828186, "learning_rate": 1.9245322513249935e-05, "loss": 0.5042, "step": 11825 }, { "epoch": 0.25081122351593815, "grad_norm": 0.3146110773086548, "learning_rate": 1.9245195411768994e-05, "loss": 0.5183, "step": 11826 }, { "epoch": 0.2508324319738712, "grad_norm": 0.410641610622406, "learning_rate": 1.924506830000562e-05, "loss": 0.5701, "step": 11827 }, { "epoch": 0.2508536404318042, "grad_norm": 0.31714263558387756, "learning_rate": 1.924494117795997e-05, "loss": 0.5146, "step": 11828 }, { "epoch": 0.2508748488897372, "grad_norm": 0.49350640177726746, "learning_rate": 1.9244814045632173e-05, "loss": 0.6622, "step": 11829 }, { "epoch": 0.25089605734767023, "grad_norm": 0.3364955484867096, "learning_rate": 1.9244686903022375e-05, "loss": 0.5678, "step": 11830 }, { "epoch": 0.25091726580560325, "grad_norm": 0.3434731364250183, "learning_rate": 1.9244559750130714e-05, "loss": 0.5651, "step": 11831 }, { "epoch": 0.25093847426353627, "grad_norm": 0.3626306653022766, "learning_rate": 1.9244432586957338e-05, "loss": 0.4494, "step": 11832 }, { "epoch": 0.25095968272146935, "grad_norm": 0.4594131112098694, "learning_rate": 1.924430541350238e-05, "loss": 0.5809, "step": 11833 }, { "epoch": 0.25098089117940237, "grad_norm": 0.3282946050167084, "learning_rate": 1.9244178229765992e-05, "loss": 0.5489, "step": 11834 }, { "epoch": 0.2510020996373354, "grad_norm": 0.3101412057876587, "learning_rate": 1.9244051035748304e-05, "loss": 0.4281, "step": 11835 }, { "epoch": 0.2510233080952684, "grad_norm": 0.37064510583877563, "learning_rate": 1.9243923831449462e-05, "loss": 0.5577, "step": 11836 }, { "epoch": 0.2510445165532014, "grad_norm": 0.338608980178833, "learning_rate": 1.924379661686961e-05, "loss": 0.6144, "step": 11837 }, { "epoch": 0.25106572501113444, "grad_norm": 0.34624403715133667, "learning_rate": 1.924366939200889e-05, "loss": 0.5302, "step": 11838 }, { "epoch": 0.25108693346906746, "grad_norm": 0.33629027009010315, "learning_rate": 1.924354215686744e-05, "loss": 0.5238, "step": 11839 }, { "epoch": 0.2511081419270005, "grad_norm": 0.4218807518482208, "learning_rate": 1.92434149114454e-05, "loss": 0.5897, "step": 11840 }, { "epoch": 0.2511293503849335, "grad_norm": 0.37767723202705383, "learning_rate": 1.9243287655742916e-05, "loss": 0.5538, "step": 11841 }, { "epoch": 0.2511505588428665, "grad_norm": 0.3730110824108124, "learning_rate": 1.9243160389760127e-05, "loss": 0.5005, "step": 11842 }, { "epoch": 0.25117176730079954, "grad_norm": 0.35830920934677124, "learning_rate": 1.9243033113497176e-05, "loss": 0.4679, "step": 11843 }, { "epoch": 0.25119297575873256, "grad_norm": 0.3461746871471405, "learning_rate": 1.9242905826954203e-05, "loss": 0.5046, "step": 11844 }, { "epoch": 0.2512141842166656, "grad_norm": 0.3497254252433777, "learning_rate": 1.9242778530131353e-05, "loss": 0.5372, "step": 11845 }, { "epoch": 0.25123539267459866, "grad_norm": 0.3652556240558624, "learning_rate": 1.9242651223028763e-05, "loss": 0.5635, "step": 11846 }, { "epoch": 0.2512566011325317, "grad_norm": 0.3248935043811798, "learning_rate": 1.9242523905646577e-05, "loss": 0.4726, "step": 11847 }, { "epoch": 0.2512778095904647, "grad_norm": 0.36853671073913574, "learning_rate": 1.9242396577984935e-05, "loss": 0.6307, "step": 11848 }, { "epoch": 0.2512990180483977, "grad_norm": 0.36677590012550354, "learning_rate": 1.9242269240043983e-05, "loss": 0.4919, "step": 11849 }, { "epoch": 0.25132022650633074, "grad_norm": 0.38775888085365295, "learning_rate": 1.9242141891823855e-05, "loss": 0.5148, "step": 11850 }, { "epoch": 0.25134143496426375, "grad_norm": 0.345124751329422, "learning_rate": 1.92420145333247e-05, "loss": 0.5273, "step": 11851 }, { "epoch": 0.2513626434221968, "grad_norm": 0.3558133542537689, "learning_rate": 1.9241887164546657e-05, "loss": 0.6031, "step": 11852 }, { "epoch": 0.2513838518801298, "grad_norm": 0.5004855394363403, "learning_rate": 1.9241759785489868e-05, "loss": 0.5695, "step": 11853 }, { "epoch": 0.2514050603380628, "grad_norm": 0.37319982051849365, "learning_rate": 1.9241632396154473e-05, "loss": 0.6209, "step": 11854 }, { "epoch": 0.25142626879599583, "grad_norm": 0.3529418706893921, "learning_rate": 1.9241504996540615e-05, "loss": 0.5409, "step": 11855 }, { "epoch": 0.25144747725392885, "grad_norm": 0.3675263524055481, "learning_rate": 1.924137758664843e-05, "loss": 0.483, "step": 11856 }, { "epoch": 0.2514686857118619, "grad_norm": 0.3346503674983978, "learning_rate": 1.9241250166478073e-05, "loss": 0.5543, "step": 11857 }, { "epoch": 0.2514898941697949, "grad_norm": 0.3253363072872162, "learning_rate": 1.9241122736029677e-05, "loss": 0.5531, "step": 11858 }, { "epoch": 0.2515111026277279, "grad_norm": 0.3447302281856537, "learning_rate": 1.924099529530338e-05, "loss": 0.5492, "step": 11859 }, { "epoch": 0.251532311085661, "grad_norm": 0.37743228673934937, "learning_rate": 1.9240867844299332e-05, "loss": 0.4883, "step": 11860 }, { "epoch": 0.251553519543594, "grad_norm": 0.32950571179389954, "learning_rate": 1.924074038301767e-05, "loss": 0.514, "step": 11861 }, { "epoch": 0.251574728001527, "grad_norm": 0.3193398714065552, "learning_rate": 1.9240612911458534e-05, "loss": 0.4452, "step": 11862 }, { "epoch": 0.25159593645946005, "grad_norm": 0.3480885326862335, "learning_rate": 1.9240485429622072e-05, "loss": 0.5912, "step": 11863 }, { "epoch": 0.25161714491739307, "grad_norm": 0.3566577136516571, "learning_rate": 1.924035793750842e-05, "loss": 0.4328, "step": 11864 }, { "epoch": 0.2516383533753261, "grad_norm": 0.33291560411453247, "learning_rate": 1.9240230435117726e-05, "loss": 0.4879, "step": 11865 }, { "epoch": 0.2516595618332591, "grad_norm": 0.3328889310359955, "learning_rate": 1.9240102922450126e-05, "loss": 0.5065, "step": 11866 }, { "epoch": 0.2516807702911921, "grad_norm": 0.3078628480434418, "learning_rate": 1.9239975399505763e-05, "loss": 0.4864, "step": 11867 }, { "epoch": 0.25170197874912514, "grad_norm": 0.38668885827064514, "learning_rate": 1.923984786628478e-05, "loss": 0.4432, "step": 11868 }, { "epoch": 0.25172318720705816, "grad_norm": 0.41542255878448486, "learning_rate": 1.923972032278732e-05, "loss": 0.4804, "step": 11869 }, { "epoch": 0.2517443956649912, "grad_norm": 0.35194259881973267, "learning_rate": 1.923959276901352e-05, "loss": 0.4445, "step": 11870 }, { "epoch": 0.2517656041229242, "grad_norm": 0.3725777864456177, "learning_rate": 1.9239465204963527e-05, "loss": 0.5453, "step": 11871 }, { "epoch": 0.2517868125808572, "grad_norm": 0.35395389795303345, "learning_rate": 1.923933763063748e-05, "loss": 0.4375, "step": 11872 }, { "epoch": 0.2518080210387903, "grad_norm": 0.34274083375930786, "learning_rate": 1.9239210046035523e-05, "loss": 0.486, "step": 11873 }, { "epoch": 0.2518292294967233, "grad_norm": 0.3158440589904785, "learning_rate": 1.92390824511578e-05, "loss": 0.5563, "step": 11874 }, { "epoch": 0.25185043795465634, "grad_norm": 0.337464839220047, "learning_rate": 1.9238954846004447e-05, "loss": 0.5367, "step": 11875 }, { "epoch": 0.25187164641258936, "grad_norm": 0.3429431915283203, "learning_rate": 1.9238827230575606e-05, "loss": 0.5478, "step": 11876 }, { "epoch": 0.2518928548705224, "grad_norm": 0.3852523863315582, "learning_rate": 1.9238699604871425e-05, "loss": 0.5463, "step": 11877 }, { "epoch": 0.2519140633284554, "grad_norm": 0.3317567706108093, "learning_rate": 1.923857196889204e-05, "loss": 0.5447, "step": 11878 }, { "epoch": 0.2519352717863884, "grad_norm": 0.34537747502326965, "learning_rate": 1.92384443226376e-05, "loss": 0.5136, "step": 11879 }, { "epoch": 0.25195648024432143, "grad_norm": 0.3163950741291046, "learning_rate": 1.923831666610824e-05, "loss": 0.503, "step": 11880 }, { "epoch": 0.25197768870225445, "grad_norm": 0.3847198784351349, "learning_rate": 1.9238188999304103e-05, "loss": 0.5283, "step": 11881 }, { "epoch": 0.2519988971601875, "grad_norm": 0.32725992798805237, "learning_rate": 1.923806132222533e-05, "loss": 0.5157, "step": 11882 }, { "epoch": 0.2520201056181205, "grad_norm": 0.312203049659729, "learning_rate": 1.923793363487207e-05, "loss": 0.4819, "step": 11883 }, { "epoch": 0.2520413140760535, "grad_norm": 0.3153608441352844, "learning_rate": 1.923780593724446e-05, "loss": 0.576, "step": 11884 }, { "epoch": 0.25206252253398653, "grad_norm": 0.3811267018318176, "learning_rate": 1.923767822934264e-05, "loss": 0.464, "step": 11885 }, { "epoch": 0.25208373099191955, "grad_norm": 0.4417135417461395, "learning_rate": 1.923755051116676e-05, "loss": 0.4722, "step": 11886 }, { "epoch": 0.2521049394498526, "grad_norm": 0.3183542490005493, "learning_rate": 1.9237422782716953e-05, "loss": 0.5362, "step": 11887 }, { "epoch": 0.25212614790778565, "grad_norm": 0.3207164406776428, "learning_rate": 1.9237295043993366e-05, "loss": 0.4537, "step": 11888 }, { "epoch": 0.25214735636571867, "grad_norm": 0.3664528727531433, "learning_rate": 1.9237167294996137e-05, "loss": 0.5784, "step": 11889 }, { "epoch": 0.2521685648236517, "grad_norm": 0.31431201100349426, "learning_rate": 1.9237039535725414e-05, "loss": 0.4801, "step": 11890 }, { "epoch": 0.2521897732815847, "grad_norm": 0.34139764308929443, "learning_rate": 1.923691176618133e-05, "loss": 0.5774, "step": 11891 }, { "epoch": 0.2522109817395177, "grad_norm": 0.4025340676307678, "learning_rate": 1.9236783986364038e-05, "loss": 0.553, "step": 11892 }, { "epoch": 0.25223219019745075, "grad_norm": 0.34126317501068115, "learning_rate": 1.9236656196273676e-05, "loss": 0.4926, "step": 11893 }, { "epoch": 0.25225339865538376, "grad_norm": 0.32052356004714966, "learning_rate": 1.923652839591038e-05, "loss": 0.5163, "step": 11894 }, { "epoch": 0.2522746071133168, "grad_norm": 0.3544127941131592, "learning_rate": 1.9236400585274302e-05, "loss": 0.5756, "step": 11895 }, { "epoch": 0.2522958155712498, "grad_norm": 0.2743529677391052, "learning_rate": 1.923627276436558e-05, "loss": 0.4284, "step": 11896 }, { "epoch": 0.2523170240291828, "grad_norm": 0.310490220785141, "learning_rate": 1.9236144933184354e-05, "loss": 0.5045, "step": 11897 }, { "epoch": 0.25233823248711584, "grad_norm": 0.3200131058692932, "learning_rate": 1.9236017091730765e-05, "loss": 0.5308, "step": 11898 }, { "epoch": 0.25235944094504886, "grad_norm": 0.3452781140804291, "learning_rate": 1.9235889240004963e-05, "loss": 0.5076, "step": 11899 }, { "epoch": 0.2523806494029819, "grad_norm": 0.29618656635284424, "learning_rate": 1.923576137800708e-05, "loss": 0.4494, "step": 11900 }, { "epoch": 0.25240185786091496, "grad_norm": 0.3978877365589142, "learning_rate": 1.9235633505737267e-05, "loss": 0.5645, "step": 11901 }, { "epoch": 0.252423066318848, "grad_norm": 0.35281774401664734, "learning_rate": 1.9235505623195664e-05, "loss": 0.5786, "step": 11902 }, { "epoch": 0.252444274776781, "grad_norm": 0.28416842222213745, "learning_rate": 1.9235377730382408e-05, "loss": 0.5548, "step": 11903 }, { "epoch": 0.252465483234714, "grad_norm": 0.34288161993026733, "learning_rate": 1.9235249827297648e-05, "loss": 0.5087, "step": 11904 }, { "epoch": 0.25248669169264704, "grad_norm": 0.3265286684036255, "learning_rate": 1.923512191394152e-05, "loss": 0.5728, "step": 11905 }, { "epoch": 0.25250790015058006, "grad_norm": 0.3055388927459717, "learning_rate": 1.9234993990314173e-05, "loss": 0.503, "step": 11906 }, { "epoch": 0.2525291086085131, "grad_norm": 0.30063584446907043, "learning_rate": 1.9234866056415743e-05, "loss": 0.4793, "step": 11907 }, { "epoch": 0.2525503170664461, "grad_norm": 0.48271486163139343, "learning_rate": 1.9234738112246378e-05, "loss": 0.5527, "step": 11908 }, { "epoch": 0.2525715255243791, "grad_norm": 0.382197767496109, "learning_rate": 1.9234610157806217e-05, "loss": 0.4795, "step": 11909 }, { "epoch": 0.25259273398231213, "grad_norm": 0.3278861939907074, "learning_rate": 1.92344821930954e-05, "loss": 0.5628, "step": 11910 }, { "epoch": 0.25261394244024515, "grad_norm": 0.37022116780281067, "learning_rate": 1.9234354218114078e-05, "loss": 0.5288, "step": 11911 }, { "epoch": 0.2526351508981782, "grad_norm": 0.3203364908695221, "learning_rate": 1.9234226232862382e-05, "loss": 0.5066, "step": 11912 }, { "epoch": 0.2526563593561112, "grad_norm": 0.31654679775238037, "learning_rate": 1.9234098237340465e-05, "loss": 0.5121, "step": 11913 }, { "epoch": 0.25267756781404427, "grad_norm": 0.3078024983406067, "learning_rate": 1.9233970231548457e-05, "loss": 0.5047, "step": 11914 }, { "epoch": 0.2526987762719773, "grad_norm": 0.33566516637802124, "learning_rate": 1.923384221548651e-05, "loss": 0.5047, "step": 11915 }, { "epoch": 0.2527199847299103, "grad_norm": 0.3013900816440582, "learning_rate": 1.9233714189154766e-05, "loss": 0.4365, "step": 11916 }, { "epoch": 0.2527411931878433, "grad_norm": 0.3061447739601135, "learning_rate": 1.9233586152553365e-05, "loss": 0.4864, "step": 11917 }, { "epoch": 0.25276240164577635, "grad_norm": 0.34999585151672363, "learning_rate": 1.923345810568245e-05, "loss": 0.4349, "step": 11918 }, { "epoch": 0.25278361010370937, "grad_norm": 0.3272005021572113, "learning_rate": 1.9233330048542164e-05, "loss": 0.556, "step": 11919 }, { "epoch": 0.2528048185616424, "grad_norm": 0.43680453300476074, "learning_rate": 1.9233201981132646e-05, "loss": 0.4838, "step": 11920 }, { "epoch": 0.2528260270195754, "grad_norm": 0.32396695017814636, "learning_rate": 1.923307390345404e-05, "loss": 0.5171, "step": 11921 }, { "epoch": 0.2528472354775084, "grad_norm": 0.3815484941005707, "learning_rate": 1.9232945815506493e-05, "loss": 0.5694, "step": 11922 }, { "epoch": 0.25286844393544144, "grad_norm": 0.38418933749198914, "learning_rate": 1.9232817717290142e-05, "loss": 0.5678, "step": 11923 }, { "epoch": 0.25288965239337446, "grad_norm": 0.37481817603111267, "learning_rate": 1.9232689608805134e-05, "loss": 0.5132, "step": 11924 }, { "epoch": 0.2529108608513075, "grad_norm": 0.3435663878917694, "learning_rate": 1.9232561490051606e-05, "loss": 0.5684, "step": 11925 }, { "epoch": 0.2529320693092405, "grad_norm": 0.3612916171550751, "learning_rate": 1.9232433361029704e-05, "loss": 0.5002, "step": 11926 }, { "epoch": 0.2529532777671735, "grad_norm": 0.44046691060066223, "learning_rate": 1.9232305221739567e-05, "loss": 0.5933, "step": 11927 }, { "epoch": 0.2529744862251066, "grad_norm": 0.35480231046676636, "learning_rate": 1.923217707218134e-05, "loss": 0.5311, "step": 11928 }, { "epoch": 0.2529956946830396, "grad_norm": 0.33226439356803894, "learning_rate": 1.9232048912355172e-05, "loss": 0.4893, "step": 11929 }, { "epoch": 0.25301690314097264, "grad_norm": 0.363564133644104, "learning_rate": 1.9231920742261195e-05, "loss": 0.5854, "step": 11930 }, { "epoch": 0.25303811159890566, "grad_norm": 0.3154807686805725, "learning_rate": 1.923179256189956e-05, "loss": 0.5015, "step": 11931 }, { "epoch": 0.2530593200568387, "grad_norm": 0.3956473469734192, "learning_rate": 1.92316643712704e-05, "loss": 0.5487, "step": 11932 }, { "epoch": 0.2530805285147717, "grad_norm": 0.45053526759147644, "learning_rate": 1.9231536170373866e-05, "loss": 0.4909, "step": 11933 }, { "epoch": 0.2531017369727047, "grad_norm": 0.3360947370529175, "learning_rate": 1.9231407959210097e-05, "loss": 0.5328, "step": 11934 }, { "epoch": 0.25312294543063774, "grad_norm": 0.3500814735889435, "learning_rate": 1.923127973777924e-05, "loss": 0.5296, "step": 11935 }, { "epoch": 0.25314415388857076, "grad_norm": 0.33136850595474243, "learning_rate": 1.9231151506081426e-05, "loss": 0.5094, "step": 11936 }, { "epoch": 0.2531653623465038, "grad_norm": 0.3366939425468445, "learning_rate": 1.923102326411681e-05, "loss": 0.4572, "step": 11937 }, { "epoch": 0.2531865708044368, "grad_norm": 0.37967854738235474, "learning_rate": 1.923089501188553e-05, "loss": 0.5004, "step": 11938 }, { "epoch": 0.2532077792623698, "grad_norm": 0.3002011179924011, "learning_rate": 1.923076674938773e-05, "loss": 0.4844, "step": 11939 }, { "epoch": 0.25322898772030283, "grad_norm": 0.36327311396598816, "learning_rate": 1.923063847662355e-05, "loss": 0.5493, "step": 11940 }, { "epoch": 0.25325019617823585, "grad_norm": 0.42490917444229126, "learning_rate": 1.9230510193593136e-05, "loss": 0.6337, "step": 11941 }, { "epoch": 0.25327140463616893, "grad_norm": 0.3348848223686218, "learning_rate": 1.9230381900296625e-05, "loss": 0.3992, "step": 11942 }, { "epoch": 0.25329261309410195, "grad_norm": 0.3354208469390869, "learning_rate": 1.9230253596734165e-05, "loss": 0.5046, "step": 11943 }, { "epoch": 0.25331382155203497, "grad_norm": 0.3417311906814575, "learning_rate": 1.9230125282905897e-05, "loss": 0.4913, "step": 11944 }, { "epoch": 0.253335030009968, "grad_norm": 0.35204410552978516, "learning_rate": 1.9229996958811965e-05, "loss": 0.4324, "step": 11945 }, { "epoch": 0.253356238467901, "grad_norm": 0.3399067521095276, "learning_rate": 1.922986862445251e-05, "loss": 0.5953, "step": 11946 }, { "epoch": 0.253377446925834, "grad_norm": 0.3099575638771057, "learning_rate": 1.9229740279827674e-05, "loss": 0.5339, "step": 11947 }, { "epoch": 0.25339865538376705, "grad_norm": 0.3306078314781189, "learning_rate": 1.9229611924937603e-05, "loss": 0.5252, "step": 11948 }, { "epoch": 0.25341986384170007, "grad_norm": 0.7810565829277039, "learning_rate": 1.9229483559782438e-05, "loss": 0.4318, "step": 11949 }, { "epoch": 0.2534410722996331, "grad_norm": 0.34932011365890503, "learning_rate": 1.922935518436232e-05, "loss": 0.5612, "step": 11950 }, { "epoch": 0.2534622807575661, "grad_norm": 0.4502827823162079, "learning_rate": 1.9229226798677395e-05, "loss": 0.5295, "step": 11951 }, { "epoch": 0.2534834892154991, "grad_norm": 0.3213222324848175, "learning_rate": 1.92290984027278e-05, "loss": 0.5582, "step": 11952 }, { "epoch": 0.25350469767343214, "grad_norm": 0.33546045422554016, "learning_rate": 1.9228969996513687e-05, "loss": 0.5345, "step": 11953 }, { "epoch": 0.25352590613136516, "grad_norm": 0.3772491216659546, "learning_rate": 1.9228841580035193e-05, "loss": 0.5519, "step": 11954 }, { "epoch": 0.25354711458929824, "grad_norm": 0.33786043524742126, "learning_rate": 1.922871315329246e-05, "loss": 0.4826, "step": 11955 }, { "epoch": 0.25356832304723126, "grad_norm": 0.32218828797340393, "learning_rate": 1.9228584716285633e-05, "loss": 0.475, "step": 11956 }, { "epoch": 0.2535895315051643, "grad_norm": 0.3366091549396515, "learning_rate": 1.9228456269014852e-05, "loss": 0.5064, "step": 11957 }, { "epoch": 0.2536107399630973, "grad_norm": 0.3441336154937744, "learning_rate": 1.9228327811480265e-05, "loss": 0.5959, "step": 11958 }, { "epoch": 0.2536319484210303, "grad_norm": 0.32621216773986816, "learning_rate": 1.9228199343682013e-05, "loss": 0.451, "step": 11959 }, { "epoch": 0.25365315687896334, "grad_norm": 0.3748174011707306, "learning_rate": 1.9228070865620233e-05, "loss": 0.4604, "step": 11960 }, { "epoch": 0.25367436533689636, "grad_norm": 0.5284426212310791, "learning_rate": 1.9227942377295076e-05, "loss": 0.4719, "step": 11961 }, { "epoch": 0.2536955737948294, "grad_norm": 0.3425969183444977, "learning_rate": 1.9227813878706682e-05, "loss": 0.594, "step": 11962 }, { "epoch": 0.2537167822527624, "grad_norm": 0.32072147727012634, "learning_rate": 1.9227685369855193e-05, "loss": 0.516, "step": 11963 }, { "epoch": 0.2537379907106954, "grad_norm": 0.5034435391426086, "learning_rate": 1.9227556850740754e-05, "loss": 0.5339, "step": 11964 }, { "epoch": 0.25375919916862844, "grad_norm": 0.34701773524284363, "learning_rate": 1.9227428321363502e-05, "loss": 0.4124, "step": 11965 }, { "epoch": 0.25378040762656146, "grad_norm": 0.35428351163864136, "learning_rate": 1.922729978172359e-05, "loss": 0.5883, "step": 11966 }, { "epoch": 0.2538016160844945, "grad_norm": 0.34029385447502136, "learning_rate": 1.9227171231821153e-05, "loss": 0.5383, "step": 11967 }, { "epoch": 0.2538228245424275, "grad_norm": 0.3378838002681732, "learning_rate": 1.9227042671656333e-05, "loss": 0.515, "step": 11968 }, { "epoch": 0.25384403300036057, "grad_norm": 0.345135897397995, "learning_rate": 1.9226914101229282e-05, "loss": 0.5889, "step": 11969 }, { "epoch": 0.2538652414582936, "grad_norm": 0.3329283893108368, "learning_rate": 1.9226785520540133e-05, "loss": 0.5409, "step": 11970 }, { "epoch": 0.2538864499162266, "grad_norm": 0.3173777163028717, "learning_rate": 1.9226656929589037e-05, "loss": 0.5194, "step": 11971 }, { "epoch": 0.25390765837415963, "grad_norm": 0.355840802192688, "learning_rate": 1.922652832837613e-05, "loss": 0.5641, "step": 11972 }, { "epoch": 0.25392886683209265, "grad_norm": 0.3526259660720825, "learning_rate": 1.9226399716901558e-05, "loss": 0.5514, "step": 11973 }, { "epoch": 0.25395007529002567, "grad_norm": 0.41355276107788086, "learning_rate": 1.9226271095165466e-05, "loss": 0.5637, "step": 11974 }, { "epoch": 0.2539712837479587, "grad_norm": 0.35157570242881775, "learning_rate": 1.9226142463167994e-05, "loss": 0.4453, "step": 11975 }, { "epoch": 0.2539924922058917, "grad_norm": 0.297191321849823, "learning_rate": 1.922601382090929e-05, "loss": 0.3922, "step": 11976 }, { "epoch": 0.2540137006638247, "grad_norm": 0.3395700454711914, "learning_rate": 1.922588516838949e-05, "loss": 0.4633, "step": 11977 }, { "epoch": 0.25403490912175775, "grad_norm": 0.3673645555973053, "learning_rate": 1.9225756505608743e-05, "loss": 0.537, "step": 11978 }, { "epoch": 0.25405611757969077, "grad_norm": 0.465593159198761, "learning_rate": 1.9225627832567188e-05, "loss": 0.5228, "step": 11979 }, { "epoch": 0.2540773260376238, "grad_norm": 0.38612303137779236, "learning_rate": 1.922549914926497e-05, "loss": 0.4659, "step": 11980 }, { "epoch": 0.2540985344955568, "grad_norm": 0.3581233322620392, "learning_rate": 1.9225370455702233e-05, "loss": 0.5564, "step": 11981 }, { "epoch": 0.2541197429534898, "grad_norm": 0.41502779722213745, "learning_rate": 1.922524175187912e-05, "loss": 0.462, "step": 11982 }, { "epoch": 0.2541409514114229, "grad_norm": 0.3398919105529785, "learning_rate": 1.922511303779577e-05, "loss": 0.4054, "step": 11983 }, { "epoch": 0.2541621598693559, "grad_norm": 0.383768767118454, "learning_rate": 1.922498431345233e-05, "loss": 0.6269, "step": 11984 }, { "epoch": 0.25418336832728894, "grad_norm": 0.34528887271881104, "learning_rate": 1.9224855578848946e-05, "loss": 0.5199, "step": 11985 }, { "epoch": 0.25420457678522196, "grad_norm": 0.3306111991405487, "learning_rate": 1.9224726833985757e-05, "loss": 0.4625, "step": 11986 }, { "epoch": 0.254225785243155, "grad_norm": 0.35397759079933167, "learning_rate": 1.9224598078862904e-05, "loss": 0.498, "step": 11987 }, { "epoch": 0.254246993701088, "grad_norm": 0.36355483531951904, "learning_rate": 1.9224469313480535e-05, "loss": 0.5117, "step": 11988 }, { "epoch": 0.254268202159021, "grad_norm": 0.33200377225875854, "learning_rate": 1.9224340537838794e-05, "loss": 0.5305, "step": 11989 }, { "epoch": 0.25428941061695404, "grad_norm": 0.32303178310394287, "learning_rate": 1.9224211751937817e-05, "loss": 0.5058, "step": 11990 }, { "epoch": 0.25431061907488706, "grad_norm": 0.34287557005882263, "learning_rate": 1.9224082955777753e-05, "loss": 0.5427, "step": 11991 }, { "epoch": 0.2543318275328201, "grad_norm": 0.3027529716491699, "learning_rate": 1.9223954149358744e-05, "loss": 0.498, "step": 11992 }, { "epoch": 0.2543530359907531, "grad_norm": 0.3451613187789917, "learning_rate": 1.9223825332680934e-05, "loss": 0.5002, "step": 11993 }, { "epoch": 0.2543742444486861, "grad_norm": 0.3854515552520752, "learning_rate": 1.9223696505744466e-05, "loss": 0.61, "step": 11994 }, { "epoch": 0.25439545290661914, "grad_norm": 0.35670873522758484, "learning_rate": 1.9223567668549485e-05, "loss": 0.504, "step": 11995 }, { "epoch": 0.2544166613645522, "grad_norm": 0.35400500893592834, "learning_rate": 1.9223438821096128e-05, "loss": 0.5412, "step": 11996 }, { "epoch": 0.25443786982248523, "grad_norm": 0.36635032296180725, "learning_rate": 1.922330996338454e-05, "loss": 0.5476, "step": 11997 }, { "epoch": 0.25445907828041825, "grad_norm": 0.4838763177394867, "learning_rate": 1.9223181095414874e-05, "loss": 0.5432, "step": 11998 }, { "epoch": 0.25448028673835127, "grad_norm": 0.3376532793045044, "learning_rate": 1.9223052217187265e-05, "loss": 0.5129, "step": 11999 }, { "epoch": 0.2545014951962843, "grad_norm": 0.3401402235031128, "learning_rate": 1.9222923328701853e-05, "loss": 0.4844, "step": 12000 }, { "epoch": 0.2545227036542173, "grad_norm": 0.3070288598537445, "learning_rate": 1.9222794429958788e-05, "loss": 0.475, "step": 12001 }, { "epoch": 0.25454391211215033, "grad_norm": 1.122661828994751, "learning_rate": 1.922266552095821e-05, "loss": 0.5465, "step": 12002 }, { "epoch": 0.25456512057008335, "grad_norm": 0.3770982623100281, "learning_rate": 1.922253660170027e-05, "loss": 0.5248, "step": 12003 }, { "epoch": 0.25458632902801637, "grad_norm": 0.3428516983985901, "learning_rate": 1.9222407672185096e-05, "loss": 0.4799, "step": 12004 }, { "epoch": 0.2546075374859494, "grad_norm": 0.33636289834976196, "learning_rate": 1.9222278732412846e-05, "loss": 0.508, "step": 12005 }, { "epoch": 0.2546287459438824, "grad_norm": 0.35087674856185913, "learning_rate": 1.9222149782383653e-05, "loss": 0.597, "step": 12006 }, { "epoch": 0.2546499544018154, "grad_norm": 0.3296082019805908, "learning_rate": 1.922202082209767e-05, "loss": 0.5106, "step": 12007 }, { "epoch": 0.25467116285974845, "grad_norm": 0.48025280237197876, "learning_rate": 1.9221891851555034e-05, "loss": 0.4967, "step": 12008 }, { "epoch": 0.25469237131768147, "grad_norm": 0.33557915687561035, "learning_rate": 1.922176287075589e-05, "loss": 0.5317, "step": 12009 }, { "epoch": 0.25471357977561454, "grad_norm": 0.38546818494796753, "learning_rate": 1.9221633879700378e-05, "loss": 0.4348, "step": 12010 }, { "epoch": 0.25473478823354756, "grad_norm": 0.35338282585144043, "learning_rate": 1.922150487838865e-05, "loss": 0.5015, "step": 12011 }, { "epoch": 0.2547559966914806, "grad_norm": 0.3198561668395996, "learning_rate": 1.9221375866820843e-05, "loss": 0.3938, "step": 12012 }, { "epoch": 0.2547772051494136, "grad_norm": 0.4267417788505554, "learning_rate": 1.92212468449971e-05, "loss": 0.5786, "step": 12013 }, { "epoch": 0.2547984136073466, "grad_norm": 0.38883161544799805, "learning_rate": 1.9221117812917568e-05, "loss": 0.4652, "step": 12014 }, { "epoch": 0.25481962206527964, "grad_norm": 0.33500126004219055, "learning_rate": 1.9220988770582388e-05, "loss": 0.5563, "step": 12015 }, { "epoch": 0.25484083052321266, "grad_norm": 0.3426001965999603, "learning_rate": 1.9220859717991705e-05, "loss": 0.3953, "step": 12016 }, { "epoch": 0.2548620389811457, "grad_norm": 0.32015591859817505, "learning_rate": 1.9220730655145662e-05, "loss": 0.5241, "step": 12017 }, { "epoch": 0.2548832474390787, "grad_norm": 0.34002041816711426, "learning_rate": 1.9220601582044402e-05, "loss": 0.5073, "step": 12018 }, { "epoch": 0.2549044558970117, "grad_norm": 0.33642423152923584, "learning_rate": 1.9220472498688066e-05, "loss": 0.5506, "step": 12019 }, { "epoch": 0.25492566435494474, "grad_norm": 0.3828883469104767, "learning_rate": 1.9220343405076807e-05, "loss": 0.4899, "step": 12020 }, { "epoch": 0.25494687281287776, "grad_norm": 0.3732128143310547, "learning_rate": 1.9220214301210758e-05, "loss": 0.6299, "step": 12021 }, { "epoch": 0.2549680812708108, "grad_norm": 0.3913940191268921, "learning_rate": 1.922008518709007e-05, "loss": 0.5797, "step": 12022 }, { "epoch": 0.2549892897287438, "grad_norm": 0.3502376973628998, "learning_rate": 1.9219956062714877e-05, "loss": 0.5459, "step": 12023 }, { "epoch": 0.25501049818667687, "grad_norm": 0.3310728669166565, "learning_rate": 1.9219826928085336e-05, "loss": 0.5915, "step": 12024 }, { "epoch": 0.2550317066446099, "grad_norm": 0.3214617669582367, "learning_rate": 1.921969778320158e-05, "loss": 0.5387, "step": 12025 }, { "epoch": 0.2550529151025429, "grad_norm": 0.33636635541915894, "learning_rate": 1.9219568628063757e-05, "loss": 0.543, "step": 12026 }, { "epoch": 0.25507412356047593, "grad_norm": 0.33557429909706116, "learning_rate": 1.921943946267201e-05, "loss": 0.5534, "step": 12027 }, { "epoch": 0.25509533201840895, "grad_norm": 0.4211275279521942, "learning_rate": 1.9219310287026484e-05, "loss": 0.4572, "step": 12028 }, { "epoch": 0.25511654047634197, "grad_norm": 0.32985925674438477, "learning_rate": 1.9219181101127316e-05, "loss": 0.5273, "step": 12029 }, { "epoch": 0.255137748934275, "grad_norm": 0.3839398920536041, "learning_rate": 1.9219051904974656e-05, "loss": 0.6195, "step": 12030 }, { "epoch": 0.255158957392208, "grad_norm": 0.3302135169506073, "learning_rate": 1.921892269856865e-05, "loss": 0.5614, "step": 12031 }, { "epoch": 0.255180165850141, "grad_norm": 0.38312870264053345, "learning_rate": 1.9218793481909438e-05, "loss": 0.6535, "step": 12032 }, { "epoch": 0.25520137430807405, "grad_norm": 0.3093004822731018, "learning_rate": 1.9218664254997162e-05, "loss": 0.4368, "step": 12033 }, { "epoch": 0.25522258276600707, "grad_norm": 0.36529070138931274, "learning_rate": 1.921853501783197e-05, "loss": 0.538, "step": 12034 }, { "epoch": 0.2552437912239401, "grad_norm": 0.4258599281311035, "learning_rate": 1.9218405770414002e-05, "loss": 0.5115, "step": 12035 }, { "epoch": 0.2552649996818731, "grad_norm": 0.32536977529525757, "learning_rate": 1.9218276512743405e-05, "loss": 0.5728, "step": 12036 }, { "epoch": 0.2552862081398062, "grad_norm": 0.35731032490730286, "learning_rate": 1.9218147244820318e-05, "loss": 0.5051, "step": 12037 }, { "epoch": 0.2553074165977392, "grad_norm": 0.32313328981399536, "learning_rate": 1.921801796664489e-05, "loss": 0.4956, "step": 12038 }, { "epoch": 0.2553286250556722, "grad_norm": 0.333137184381485, "learning_rate": 1.921788867821726e-05, "loss": 0.5538, "step": 12039 }, { "epoch": 0.25534983351360524, "grad_norm": 0.3423960208892822, "learning_rate": 1.9217759379537576e-05, "loss": 0.524, "step": 12040 }, { "epoch": 0.25537104197153826, "grad_norm": 0.32167765498161316, "learning_rate": 1.921763007060598e-05, "loss": 0.5692, "step": 12041 }, { "epoch": 0.2553922504294713, "grad_norm": 0.35477226972579956, "learning_rate": 1.9217500751422616e-05, "loss": 0.5277, "step": 12042 }, { "epoch": 0.2554134588874043, "grad_norm": 0.3641605079174042, "learning_rate": 1.9217371421987627e-05, "loss": 0.4876, "step": 12043 }, { "epoch": 0.2554346673453373, "grad_norm": 0.3131581246852875, "learning_rate": 1.9217242082301158e-05, "loss": 0.5791, "step": 12044 }, { "epoch": 0.25545587580327034, "grad_norm": 0.3150165379047394, "learning_rate": 1.9217112732363354e-05, "loss": 0.4521, "step": 12045 }, { "epoch": 0.25547708426120336, "grad_norm": 0.34185710549354553, "learning_rate": 1.9216983372174353e-05, "loss": 0.5145, "step": 12046 }, { "epoch": 0.2554982927191364, "grad_norm": 0.37491685152053833, "learning_rate": 1.9216854001734307e-05, "loss": 0.5406, "step": 12047 }, { "epoch": 0.2555195011770694, "grad_norm": 0.397082656621933, "learning_rate": 1.9216724621043355e-05, "loss": 0.5497, "step": 12048 }, { "epoch": 0.2555407096350024, "grad_norm": 0.3546765446662903, "learning_rate": 1.921659523010164e-05, "loss": 0.5064, "step": 12049 }, { "epoch": 0.25556191809293544, "grad_norm": 0.3030517101287842, "learning_rate": 1.921646582890931e-05, "loss": 0.4773, "step": 12050 }, { "epoch": 0.2555831265508685, "grad_norm": 0.38521116971969604, "learning_rate": 1.921633641746651e-05, "loss": 0.5387, "step": 12051 }, { "epoch": 0.25560433500880153, "grad_norm": 0.442636638879776, "learning_rate": 1.9216206995773373e-05, "loss": 0.5133, "step": 12052 }, { "epoch": 0.25562554346673455, "grad_norm": 0.4075632095336914, "learning_rate": 1.9216077563830057e-05, "loss": 0.5356, "step": 12053 }, { "epoch": 0.25564675192466757, "grad_norm": 0.3979426622390747, "learning_rate": 1.9215948121636697e-05, "loss": 0.6119, "step": 12054 }, { "epoch": 0.2556679603826006, "grad_norm": 0.35112395882606506, "learning_rate": 1.9215818669193443e-05, "loss": 0.5423, "step": 12055 }, { "epoch": 0.2556891688405336, "grad_norm": 0.3131471574306488, "learning_rate": 1.9215689206500432e-05, "loss": 0.4698, "step": 12056 }, { "epoch": 0.25571037729846663, "grad_norm": 0.31040042638778687, "learning_rate": 1.921555973355781e-05, "loss": 0.4892, "step": 12057 }, { "epoch": 0.25573158575639965, "grad_norm": 0.37713366746902466, "learning_rate": 1.9215430250365725e-05, "loss": 0.5363, "step": 12058 }, { "epoch": 0.25575279421433267, "grad_norm": 0.35949525237083435, "learning_rate": 1.9215300756924317e-05, "loss": 0.4999, "step": 12059 }, { "epoch": 0.2557740026722657, "grad_norm": 0.33533090353012085, "learning_rate": 1.9215171253233733e-05, "loss": 0.5393, "step": 12060 }, { "epoch": 0.2557952111301987, "grad_norm": 0.3316901922225952, "learning_rate": 1.9215041739294114e-05, "loss": 0.4982, "step": 12061 }, { "epoch": 0.2558164195881317, "grad_norm": 0.3648948669433594, "learning_rate": 1.921491221510561e-05, "loss": 0.5094, "step": 12062 }, { "epoch": 0.25583762804606475, "grad_norm": 0.3295759856700897, "learning_rate": 1.9214782680668357e-05, "loss": 0.5233, "step": 12063 }, { "epoch": 0.2558588365039978, "grad_norm": 0.379412978887558, "learning_rate": 1.9214653135982502e-05, "loss": 0.4953, "step": 12064 }, { "epoch": 0.25588004496193084, "grad_norm": 0.31966111063957214, "learning_rate": 1.9214523581048193e-05, "loss": 0.5307, "step": 12065 }, { "epoch": 0.25590125341986386, "grad_norm": 0.4085349440574646, "learning_rate": 1.9214394015865568e-05, "loss": 0.4751, "step": 12066 }, { "epoch": 0.2559224618777969, "grad_norm": 0.3425707221031189, "learning_rate": 1.9214264440434776e-05, "loss": 0.6316, "step": 12067 }, { "epoch": 0.2559436703357299, "grad_norm": 0.32157108187675476, "learning_rate": 1.9214134854755956e-05, "loss": 0.492, "step": 12068 }, { "epoch": 0.2559648787936629, "grad_norm": 0.5383448004722595, "learning_rate": 1.921400525882926e-05, "loss": 0.4537, "step": 12069 }, { "epoch": 0.25598608725159594, "grad_norm": 0.3366798460483551, "learning_rate": 1.9213875652654824e-05, "loss": 0.4854, "step": 12070 }, { "epoch": 0.25600729570952896, "grad_norm": 0.34064579010009766, "learning_rate": 1.9213746036232798e-05, "loss": 0.5286, "step": 12071 }, { "epoch": 0.256028504167462, "grad_norm": 0.3385881781578064, "learning_rate": 1.9213616409563323e-05, "loss": 0.5159, "step": 12072 }, { "epoch": 0.256049712625395, "grad_norm": 0.3625720143318176, "learning_rate": 1.921348677264654e-05, "loss": 0.576, "step": 12073 }, { "epoch": 0.256070921083328, "grad_norm": 0.3426985740661621, "learning_rate": 1.9213357125482602e-05, "loss": 0.598, "step": 12074 }, { "epoch": 0.25609212954126104, "grad_norm": 0.35688212513923645, "learning_rate": 1.9213227468071648e-05, "loss": 0.5528, "step": 12075 }, { "epoch": 0.25611333799919406, "grad_norm": 0.3885907828807831, "learning_rate": 1.921309780041382e-05, "loss": 0.558, "step": 12076 }, { "epoch": 0.2561345464571271, "grad_norm": 0.33740243315696716, "learning_rate": 1.9212968122509265e-05, "loss": 0.5071, "step": 12077 }, { "epoch": 0.25615575491506015, "grad_norm": 0.36290615797042847, "learning_rate": 1.9212838434358127e-05, "loss": 0.5166, "step": 12078 }, { "epoch": 0.25617696337299317, "grad_norm": 0.2929953634738922, "learning_rate": 1.921270873596055e-05, "loss": 0.4438, "step": 12079 }, { "epoch": 0.2561981718309262, "grad_norm": 0.3493843078613281, "learning_rate": 1.921257902731668e-05, "loss": 0.5051, "step": 12080 }, { "epoch": 0.2562193802888592, "grad_norm": 0.38438066840171814, "learning_rate": 1.9212449308426658e-05, "loss": 0.5556, "step": 12081 }, { "epoch": 0.25624058874679223, "grad_norm": 0.32743799686431885, "learning_rate": 1.921231957929063e-05, "loss": 0.52, "step": 12082 }, { "epoch": 0.25626179720472525, "grad_norm": 0.36793312430381775, "learning_rate": 1.9212189839908738e-05, "loss": 0.5472, "step": 12083 }, { "epoch": 0.25628300566265827, "grad_norm": 0.3541465997695923, "learning_rate": 1.921206009028113e-05, "loss": 0.4778, "step": 12084 }, { "epoch": 0.2563042141205913, "grad_norm": 0.3323013186454773, "learning_rate": 1.9211930330407953e-05, "loss": 0.5443, "step": 12085 }, { "epoch": 0.2563254225785243, "grad_norm": 0.41402021050453186, "learning_rate": 1.9211800560289345e-05, "loss": 0.6052, "step": 12086 }, { "epoch": 0.25634663103645733, "grad_norm": 0.4112079441547394, "learning_rate": 1.921167077992545e-05, "loss": 0.4726, "step": 12087 }, { "epoch": 0.25636783949439035, "grad_norm": 0.3450827896595001, "learning_rate": 1.9211540989316417e-05, "loss": 0.5357, "step": 12088 }, { "epoch": 0.25638904795232337, "grad_norm": 0.5576100945472717, "learning_rate": 1.921141118846239e-05, "loss": 0.5677, "step": 12089 }, { "epoch": 0.2564102564102564, "grad_norm": 0.32821691036224365, "learning_rate": 1.9211281377363503e-05, "loss": 0.5956, "step": 12090 }, { "epoch": 0.2564314648681894, "grad_norm": 0.4428989589214325, "learning_rate": 1.9211151556019917e-05, "loss": 0.4807, "step": 12091 }, { "epoch": 0.2564526733261225, "grad_norm": 0.30326738953590393, "learning_rate": 1.9211021724431766e-05, "loss": 0.5048, "step": 12092 }, { "epoch": 0.2564738817840555, "grad_norm": 0.3872341513633728, "learning_rate": 1.9210891882599198e-05, "loss": 0.5717, "step": 12093 }, { "epoch": 0.2564950902419885, "grad_norm": 0.3443835973739624, "learning_rate": 1.9210762030522353e-05, "loss": 0.4702, "step": 12094 }, { "epoch": 0.25651629869992154, "grad_norm": 0.40482521057128906, "learning_rate": 1.921063216820138e-05, "loss": 0.554, "step": 12095 }, { "epoch": 0.25653750715785456, "grad_norm": 1.4695863723754883, "learning_rate": 1.921050229563642e-05, "loss": 0.5771, "step": 12096 }, { "epoch": 0.2565587156157876, "grad_norm": 0.32538285851478577, "learning_rate": 1.9210372412827624e-05, "loss": 0.5579, "step": 12097 }, { "epoch": 0.2565799240737206, "grad_norm": 0.3425334692001343, "learning_rate": 1.921024251977513e-05, "loss": 0.5097, "step": 12098 }, { "epoch": 0.2566011325316536, "grad_norm": 0.31797704100608826, "learning_rate": 1.921011261647908e-05, "loss": 0.503, "step": 12099 }, { "epoch": 0.25662234098958664, "grad_norm": 0.3898545801639557, "learning_rate": 1.920998270293963e-05, "loss": 0.5137, "step": 12100 }, { "epoch": 0.25664354944751966, "grad_norm": 0.3594188988208771, "learning_rate": 1.9209852779156913e-05, "loss": 0.4658, "step": 12101 }, { "epoch": 0.2566647579054527, "grad_norm": 0.331564337015152, "learning_rate": 1.9209722845131077e-05, "loss": 0.4764, "step": 12102 }, { "epoch": 0.2566859663633857, "grad_norm": 0.3884902000427246, "learning_rate": 1.920959290086227e-05, "loss": 0.5726, "step": 12103 }, { "epoch": 0.2567071748213187, "grad_norm": 0.33474001288414, "learning_rate": 1.920946294635063e-05, "loss": 0.5739, "step": 12104 }, { "epoch": 0.2567283832792518, "grad_norm": 0.38411223888397217, "learning_rate": 1.9209332981596308e-05, "loss": 0.5196, "step": 12105 }, { "epoch": 0.2567495917371848, "grad_norm": 0.490276038646698, "learning_rate": 1.9209203006599448e-05, "loss": 0.4952, "step": 12106 }, { "epoch": 0.25677080019511783, "grad_norm": 0.44232743978500366, "learning_rate": 1.9209073021360187e-05, "loss": 0.4785, "step": 12107 }, { "epoch": 0.25679200865305085, "grad_norm": 0.3174387216567993, "learning_rate": 1.920894302587868e-05, "loss": 0.4807, "step": 12108 }, { "epoch": 0.25681321711098387, "grad_norm": 0.3850342333316803, "learning_rate": 1.920881302015506e-05, "loss": 0.5253, "step": 12109 }, { "epoch": 0.2568344255689169, "grad_norm": 0.3132701516151428, "learning_rate": 1.9208683004189484e-05, "loss": 0.5713, "step": 12110 }, { "epoch": 0.2568556340268499, "grad_norm": 0.34271809458732605, "learning_rate": 1.9208552977982093e-05, "loss": 0.5551, "step": 12111 }, { "epoch": 0.25687684248478293, "grad_norm": 0.40770232677459717, "learning_rate": 1.9208422941533022e-05, "loss": 0.5302, "step": 12112 }, { "epoch": 0.25689805094271595, "grad_norm": 0.348943829536438, "learning_rate": 1.920829289484243e-05, "loss": 0.5006, "step": 12113 }, { "epoch": 0.25691925940064897, "grad_norm": 0.3658134937286377, "learning_rate": 1.920816283791045e-05, "loss": 0.5169, "step": 12114 }, { "epoch": 0.256940467858582, "grad_norm": 0.33662837743759155, "learning_rate": 1.920803277073723e-05, "loss": 0.5307, "step": 12115 }, { "epoch": 0.256961676316515, "grad_norm": 0.3309729993343353, "learning_rate": 1.920790269332292e-05, "loss": 0.522, "step": 12116 }, { "epoch": 0.25698288477444803, "grad_norm": 0.4307326376438141, "learning_rate": 1.920777260566766e-05, "loss": 0.5254, "step": 12117 }, { "epoch": 0.25700409323238105, "grad_norm": 0.3909158408641815, "learning_rate": 1.9207642507771593e-05, "loss": 0.5424, "step": 12118 }, { "epoch": 0.2570253016903141, "grad_norm": 0.35123327374458313, "learning_rate": 1.920751239963487e-05, "loss": 0.5285, "step": 12119 }, { "epoch": 0.25704651014824714, "grad_norm": 0.40682777762413025, "learning_rate": 1.9207382281257628e-05, "loss": 0.4268, "step": 12120 }, { "epoch": 0.25706771860618016, "grad_norm": 0.3109920620918274, "learning_rate": 1.9207252152640016e-05, "loss": 0.5023, "step": 12121 }, { "epoch": 0.2570889270641132, "grad_norm": 0.36737313866615295, "learning_rate": 1.920712201378218e-05, "loss": 0.5137, "step": 12122 }, { "epoch": 0.2571101355220462, "grad_norm": 0.38871127367019653, "learning_rate": 1.920699186468426e-05, "loss": 0.5051, "step": 12123 }, { "epoch": 0.2571313439799792, "grad_norm": 0.3087400496006012, "learning_rate": 1.9206861705346406e-05, "loss": 0.5375, "step": 12124 }, { "epoch": 0.25715255243791224, "grad_norm": 0.3187295198440552, "learning_rate": 1.920673153576876e-05, "loss": 0.4837, "step": 12125 }, { "epoch": 0.25717376089584526, "grad_norm": 0.3368210792541504, "learning_rate": 1.9206601355951467e-05, "loss": 0.5727, "step": 12126 }, { "epoch": 0.2571949693537783, "grad_norm": 0.40211203694343567, "learning_rate": 1.9206471165894673e-05, "loss": 0.5428, "step": 12127 }, { "epoch": 0.2572161778117113, "grad_norm": 0.34178081154823303, "learning_rate": 1.920634096559852e-05, "loss": 0.4859, "step": 12128 }, { "epoch": 0.2572373862696443, "grad_norm": 0.8244999647140503, "learning_rate": 1.9206210755063153e-05, "loss": 0.4756, "step": 12129 }, { "epoch": 0.25725859472757734, "grad_norm": 0.2979949116706848, "learning_rate": 1.9206080534288723e-05, "loss": 0.4838, "step": 12130 }, { "epoch": 0.25727980318551036, "grad_norm": 0.3171149492263794, "learning_rate": 1.9205950303275368e-05, "loss": 0.5034, "step": 12131 }, { "epoch": 0.2573010116434434, "grad_norm": 0.33860623836517334, "learning_rate": 1.9205820062023232e-05, "loss": 0.5499, "step": 12132 }, { "epoch": 0.25732222010137645, "grad_norm": 0.3168046176433563, "learning_rate": 1.9205689810532466e-05, "loss": 0.5093, "step": 12133 }, { "epoch": 0.2573434285593095, "grad_norm": 0.46541929244995117, "learning_rate": 1.9205559548803213e-05, "loss": 0.5245, "step": 12134 }, { "epoch": 0.2573646370172425, "grad_norm": 0.35798385739326477, "learning_rate": 1.9205429276835616e-05, "loss": 0.4212, "step": 12135 }, { "epoch": 0.2573858454751755, "grad_norm": 0.3219688832759857, "learning_rate": 1.920529899462982e-05, "loss": 0.5173, "step": 12136 }, { "epoch": 0.25740705393310853, "grad_norm": 0.3385360836982727, "learning_rate": 1.9205168702185966e-05, "loss": 0.5557, "step": 12137 }, { "epoch": 0.25742826239104155, "grad_norm": 0.3718721866607666, "learning_rate": 1.9205038399504208e-05, "loss": 0.5946, "step": 12138 }, { "epoch": 0.25744947084897457, "grad_norm": 0.38661643862724304, "learning_rate": 1.9204908086584685e-05, "loss": 0.4866, "step": 12139 }, { "epoch": 0.2574706793069076, "grad_norm": 0.35035067796707153, "learning_rate": 1.9204777763427546e-05, "loss": 0.5104, "step": 12140 }, { "epoch": 0.2574918877648406, "grad_norm": 0.3576895594596863, "learning_rate": 1.9204647430032926e-05, "loss": 0.5603, "step": 12141 }, { "epoch": 0.25751309622277363, "grad_norm": 0.3586243689060211, "learning_rate": 1.9204517086400985e-05, "loss": 0.5532, "step": 12142 }, { "epoch": 0.25753430468070665, "grad_norm": 0.35261669754981995, "learning_rate": 1.9204386732531857e-05, "loss": 0.5618, "step": 12143 }, { "epoch": 0.25755551313863967, "grad_norm": 0.33550336956977844, "learning_rate": 1.920425636842569e-05, "loss": 0.5328, "step": 12144 }, { "epoch": 0.2575767215965727, "grad_norm": 0.3385748565196991, "learning_rate": 1.920412599408263e-05, "loss": 0.5369, "step": 12145 }, { "epoch": 0.25759793005450576, "grad_norm": 0.32629626989364624, "learning_rate": 1.920399560950282e-05, "loss": 0.477, "step": 12146 }, { "epoch": 0.2576191385124388, "grad_norm": 0.34494003653526306, "learning_rate": 1.9203865214686404e-05, "loss": 0.4877, "step": 12147 }, { "epoch": 0.2576403469703718, "grad_norm": 0.7172800898551941, "learning_rate": 1.9203734809633532e-05, "loss": 0.4842, "step": 12148 }, { "epoch": 0.2576615554283048, "grad_norm": 0.3138355016708374, "learning_rate": 1.9203604394344345e-05, "loss": 0.4655, "step": 12149 }, { "epoch": 0.25768276388623784, "grad_norm": 0.3441694676876068, "learning_rate": 1.920347396881899e-05, "loss": 0.5065, "step": 12150 }, { "epoch": 0.25770397234417086, "grad_norm": 0.3236127495765686, "learning_rate": 1.920334353305761e-05, "loss": 0.544, "step": 12151 }, { "epoch": 0.2577251808021039, "grad_norm": 0.3882100284099579, "learning_rate": 1.9203213087060352e-05, "loss": 0.5226, "step": 12152 }, { "epoch": 0.2577463892600369, "grad_norm": 0.35326147079467773, "learning_rate": 1.920308263082736e-05, "loss": 0.6148, "step": 12153 }, { "epoch": 0.2577675977179699, "grad_norm": 0.3475707173347473, "learning_rate": 1.9202952164358782e-05, "loss": 0.5296, "step": 12154 }, { "epoch": 0.25778880617590294, "grad_norm": 0.3324480950832367, "learning_rate": 1.920282168765476e-05, "loss": 0.5288, "step": 12155 }, { "epoch": 0.25781001463383596, "grad_norm": 0.373121976852417, "learning_rate": 1.9202691200715435e-05, "loss": 0.5428, "step": 12156 }, { "epoch": 0.257831223091769, "grad_norm": 0.33565524220466614, "learning_rate": 1.9202560703540957e-05, "loss": 0.4303, "step": 12157 }, { "epoch": 0.257852431549702, "grad_norm": 0.3503148555755615, "learning_rate": 1.9202430196131475e-05, "loss": 0.5247, "step": 12158 }, { "epoch": 0.257873640007635, "grad_norm": 0.32728224992752075, "learning_rate": 1.9202299678487128e-05, "loss": 0.4802, "step": 12159 }, { "epoch": 0.2578948484655681, "grad_norm": 0.6499274969100952, "learning_rate": 1.9202169150608064e-05, "loss": 0.5052, "step": 12160 }, { "epoch": 0.2579160569235011, "grad_norm": 0.37065404653549194, "learning_rate": 1.9202038612494428e-05, "loss": 0.5612, "step": 12161 }, { "epoch": 0.25793726538143413, "grad_norm": 0.37134841084480286, "learning_rate": 1.9201908064146363e-05, "loss": 0.5406, "step": 12162 }, { "epoch": 0.25795847383936715, "grad_norm": 0.3263894319534302, "learning_rate": 1.920177750556402e-05, "loss": 0.4655, "step": 12163 }, { "epoch": 0.2579796822973002, "grad_norm": 0.3651532530784607, "learning_rate": 1.9201646936747535e-05, "loss": 0.5684, "step": 12164 }, { "epoch": 0.2580008907552332, "grad_norm": 0.3433150351047516, "learning_rate": 1.920151635769706e-05, "loss": 0.5328, "step": 12165 }, { "epoch": 0.2580220992131662, "grad_norm": 0.3571310043334961, "learning_rate": 1.920138576841274e-05, "loss": 0.504, "step": 12166 }, { "epoch": 0.25804330767109923, "grad_norm": 0.3138960003852844, "learning_rate": 1.9201255168894716e-05, "loss": 0.4871, "step": 12167 }, { "epoch": 0.25806451612903225, "grad_norm": 0.28789934515953064, "learning_rate": 1.9201124559143136e-05, "loss": 0.4509, "step": 12168 }, { "epoch": 0.25808572458696527, "grad_norm": 0.3107754588127136, "learning_rate": 1.9200993939158147e-05, "loss": 0.5881, "step": 12169 }, { "epoch": 0.2581069330448983, "grad_norm": 0.3489265739917755, "learning_rate": 1.9200863308939892e-05, "loss": 0.5502, "step": 12170 }, { "epoch": 0.2581281415028313, "grad_norm": 0.3214418888092041, "learning_rate": 1.9200732668488514e-05, "loss": 0.4219, "step": 12171 }, { "epoch": 0.25814934996076433, "grad_norm": 0.3942858576774597, "learning_rate": 1.9200602017804164e-05, "loss": 0.5465, "step": 12172 }, { "epoch": 0.25817055841869735, "grad_norm": 0.34351083636283875, "learning_rate": 1.9200471356886986e-05, "loss": 0.435, "step": 12173 }, { "epoch": 0.2581917668766304, "grad_norm": 0.30327603220939636, "learning_rate": 1.920034068573712e-05, "loss": 0.4708, "step": 12174 }, { "epoch": 0.25821297533456344, "grad_norm": 0.4238998591899872, "learning_rate": 1.9200210004354715e-05, "loss": 0.5594, "step": 12175 }, { "epoch": 0.25823418379249646, "grad_norm": 0.39095598459243774, "learning_rate": 1.9200079312739918e-05, "loss": 0.5928, "step": 12176 }, { "epoch": 0.2582553922504295, "grad_norm": 0.34815800189971924, "learning_rate": 1.9199948610892874e-05, "loss": 0.5237, "step": 12177 }, { "epoch": 0.2582766007083625, "grad_norm": 0.3742906153202057, "learning_rate": 1.9199817898813727e-05, "loss": 0.5721, "step": 12178 }, { "epoch": 0.2582978091662955, "grad_norm": 0.34130609035491943, "learning_rate": 1.9199687176502618e-05, "loss": 0.5459, "step": 12179 }, { "epoch": 0.25831901762422854, "grad_norm": 0.3473019301891327, "learning_rate": 1.9199556443959703e-05, "loss": 0.573, "step": 12180 }, { "epoch": 0.25834022608216156, "grad_norm": 0.4080808758735657, "learning_rate": 1.919942570118512e-05, "loss": 0.5933, "step": 12181 }, { "epoch": 0.2583614345400946, "grad_norm": 0.3244595527648926, "learning_rate": 1.9199294948179013e-05, "loss": 0.4957, "step": 12182 }, { "epoch": 0.2583826429980276, "grad_norm": 0.33558303117752075, "learning_rate": 1.919916418494153e-05, "loss": 0.5682, "step": 12183 }, { "epoch": 0.2584038514559606, "grad_norm": 0.3311077058315277, "learning_rate": 1.9199033411472818e-05, "loss": 0.4982, "step": 12184 }, { "epoch": 0.25842505991389364, "grad_norm": 0.3153904974460602, "learning_rate": 1.919890262777302e-05, "loss": 0.5436, "step": 12185 }, { "epoch": 0.25844626837182666, "grad_norm": 0.3204028308391571, "learning_rate": 1.919877183384228e-05, "loss": 0.5195, "step": 12186 }, { "epoch": 0.25846747682975973, "grad_norm": 0.3562544882297516, "learning_rate": 1.919864102968075e-05, "loss": 0.4546, "step": 12187 }, { "epoch": 0.25848868528769275, "grad_norm": 0.3498087525367737, "learning_rate": 1.9198510215288572e-05, "loss": 0.4867, "step": 12188 }, { "epoch": 0.2585098937456258, "grad_norm": 0.34883084893226624, "learning_rate": 1.9198379390665888e-05, "loss": 0.4889, "step": 12189 }, { "epoch": 0.2585311022035588, "grad_norm": 0.42889127135276794, "learning_rate": 1.9198248555812845e-05, "loss": 0.5562, "step": 12190 }, { "epoch": 0.2585523106614918, "grad_norm": 0.3667192757129669, "learning_rate": 1.9198117710729593e-05, "loss": 0.5848, "step": 12191 }, { "epoch": 0.25857351911942483, "grad_norm": 0.3348672389984131, "learning_rate": 1.9197986855416273e-05, "loss": 0.484, "step": 12192 }, { "epoch": 0.25859472757735785, "grad_norm": 0.3352668881416321, "learning_rate": 1.9197855989873034e-05, "loss": 0.5467, "step": 12193 }, { "epoch": 0.25861593603529087, "grad_norm": 0.43878427147865295, "learning_rate": 1.9197725114100016e-05, "loss": 0.5043, "step": 12194 }, { "epoch": 0.2586371444932239, "grad_norm": 0.4266243577003479, "learning_rate": 1.919759422809737e-05, "loss": 0.4851, "step": 12195 }, { "epoch": 0.2586583529511569, "grad_norm": 0.38777998089790344, "learning_rate": 1.919746333186524e-05, "loss": 0.5353, "step": 12196 }, { "epoch": 0.25867956140908993, "grad_norm": 0.3909124433994293, "learning_rate": 1.9197332425403772e-05, "loss": 0.4789, "step": 12197 }, { "epoch": 0.25870076986702295, "grad_norm": 0.3802083134651184, "learning_rate": 1.919720150871311e-05, "loss": 0.5739, "step": 12198 }, { "epoch": 0.25872197832495597, "grad_norm": 0.4257066249847412, "learning_rate": 1.91970705817934e-05, "loss": 0.6101, "step": 12199 }, { "epoch": 0.258743186782889, "grad_norm": 0.9400478601455688, "learning_rate": 1.9196939644644788e-05, "loss": 0.6083, "step": 12200 }, { "epoch": 0.25876439524082206, "grad_norm": 0.35088902711868286, "learning_rate": 1.919680869726742e-05, "loss": 0.5454, "step": 12201 }, { "epoch": 0.2587856036987551, "grad_norm": 0.4227619171142578, "learning_rate": 1.9196677739661437e-05, "loss": 0.558, "step": 12202 }, { "epoch": 0.2588068121566881, "grad_norm": 0.3454628586769104, "learning_rate": 1.9196546771826993e-05, "loss": 0.4836, "step": 12203 }, { "epoch": 0.2588280206146211, "grad_norm": 0.336452841758728, "learning_rate": 1.919641579376423e-05, "loss": 0.4864, "step": 12204 }, { "epoch": 0.25884922907255414, "grad_norm": 0.38286635279655457, "learning_rate": 1.9196284805473293e-05, "loss": 0.4528, "step": 12205 }, { "epoch": 0.25887043753048716, "grad_norm": 0.366477370262146, "learning_rate": 1.919615380695433e-05, "loss": 0.4782, "step": 12206 }, { "epoch": 0.2588916459884202, "grad_norm": 0.35960328578948975, "learning_rate": 1.919602279820748e-05, "loss": 0.4633, "step": 12207 }, { "epoch": 0.2589128544463532, "grad_norm": 0.49748721718788147, "learning_rate": 1.9195891779232892e-05, "loss": 0.5807, "step": 12208 }, { "epoch": 0.2589340629042862, "grad_norm": 0.30389168858528137, "learning_rate": 1.9195760750030718e-05, "loss": 0.3842, "step": 12209 }, { "epoch": 0.25895527136221924, "grad_norm": 0.33513128757476807, "learning_rate": 1.91956297106011e-05, "loss": 0.4963, "step": 12210 }, { "epoch": 0.25897647982015226, "grad_norm": 0.3474176526069641, "learning_rate": 1.9195498660944177e-05, "loss": 0.5223, "step": 12211 }, { "epoch": 0.2589976882780853, "grad_norm": 0.45150327682495117, "learning_rate": 1.91953676010601e-05, "loss": 0.5638, "step": 12212 }, { "epoch": 0.2590188967360183, "grad_norm": 0.31779032945632935, "learning_rate": 1.919523653094902e-05, "loss": 0.4387, "step": 12213 }, { "epoch": 0.2590401051939514, "grad_norm": 0.3011772930622101, "learning_rate": 1.9195105450611078e-05, "loss": 0.4298, "step": 12214 }, { "epoch": 0.2590613136518844, "grad_norm": 0.39252105355262756, "learning_rate": 1.9194974360046415e-05, "loss": 0.5951, "step": 12215 }, { "epoch": 0.2590825221098174, "grad_norm": 0.31899774074554443, "learning_rate": 1.9194843259255185e-05, "loss": 0.4689, "step": 12216 }, { "epoch": 0.25910373056775043, "grad_norm": 0.333048939704895, "learning_rate": 1.919471214823753e-05, "loss": 0.5853, "step": 12217 }, { "epoch": 0.25912493902568345, "grad_norm": 0.3345309793949127, "learning_rate": 1.9194581026993594e-05, "loss": 0.5526, "step": 12218 }, { "epoch": 0.2591461474836165, "grad_norm": 0.4049452543258667, "learning_rate": 1.9194449895523525e-05, "loss": 0.4672, "step": 12219 }, { "epoch": 0.2591673559415495, "grad_norm": 0.3783416748046875, "learning_rate": 1.919431875382747e-05, "loss": 0.5545, "step": 12220 }, { "epoch": 0.2591885643994825, "grad_norm": 0.33585068583488464, "learning_rate": 1.9194187601905574e-05, "loss": 0.524, "step": 12221 }, { "epoch": 0.25920977285741553, "grad_norm": 0.3131702244281769, "learning_rate": 1.919405643975798e-05, "loss": 0.4672, "step": 12222 }, { "epoch": 0.25923098131534855, "grad_norm": 0.33918115496635437, "learning_rate": 1.919392526738484e-05, "loss": 0.5008, "step": 12223 }, { "epoch": 0.25925218977328157, "grad_norm": 0.3419736325740814, "learning_rate": 1.9193794084786293e-05, "loss": 0.5485, "step": 12224 }, { "epoch": 0.2592733982312146, "grad_norm": 0.3515632450580597, "learning_rate": 1.919366289196249e-05, "loss": 0.6783, "step": 12225 }, { "epoch": 0.2592946066891476, "grad_norm": 0.34391531348228455, "learning_rate": 1.9193531688913573e-05, "loss": 0.5353, "step": 12226 }, { "epoch": 0.25931581514708063, "grad_norm": 0.2998404800891876, "learning_rate": 1.919340047563969e-05, "loss": 0.5073, "step": 12227 }, { "epoch": 0.2593370236050137, "grad_norm": 0.37133386731147766, "learning_rate": 1.919326925214099e-05, "loss": 0.5439, "step": 12228 }, { "epoch": 0.2593582320629467, "grad_norm": 0.32942184805870056, "learning_rate": 1.9193138018417615e-05, "loss": 0.4582, "step": 12229 }, { "epoch": 0.25937944052087974, "grad_norm": 0.39786604046821594, "learning_rate": 1.919300677446971e-05, "loss": 0.3895, "step": 12230 }, { "epoch": 0.25940064897881276, "grad_norm": 0.32481393218040466, "learning_rate": 1.9192875520297424e-05, "loss": 0.5692, "step": 12231 }, { "epoch": 0.2594218574367458, "grad_norm": 0.3169570565223694, "learning_rate": 1.91927442559009e-05, "loss": 0.4337, "step": 12232 }, { "epoch": 0.2594430658946788, "grad_norm": 0.3818289637565613, "learning_rate": 1.9192612981280286e-05, "loss": 0.5154, "step": 12233 }, { "epoch": 0.2594642743526118, "grad_norm": 0.33808350563049316, "learning_rate": 1.9192481696435728e-05, "loss": 0.5276, "step": 12234 }, { "epoch": 0.25948548281054484, "grad_norm": 0.3964811861515045, "learning_rate": 1.919235040136737e-05, "loss": 0.501, "step": 12235 }, { "epoch": 0.25950669126847786, "grad_norm": 0.31428223848342896, "learning_rate": 1.9192219096075364e-05, "loss": 0.4984, "step": 12236 }, { "epoch": 0.2595278997264109, "grad_norm": 0.31850552558898926, "learning_rate": 1.9192087780559848e-05, "loss": 0.5438, "step": 12237 }, { "epoch": 0.2595491081843439, "grad_norm": 0.3634091913700104, "learning_rate": 1.9191956454820973e-05, "loss": 0.5471, "step": 12238 }, { "epoch": 0.2595703166422769, "grad_norm": 0.3204819858074188, "learning_rate": 1.9191825118858885e-05, "loss": 0.4694, "step": 12239 }, { "epoch": 0.25959152510020994, "grad_norm": 0.3213281035423279, "learning_rate": 1.9191693772673726e-05, "loss": 0.555, "step": 12240 }, { "epoch": 0.25961273355814296, "grad_norm": 0.31424480676651, "learning_rate": 1.919156241626565e-05, "loss": 0.4874, "step": 12241 }, { "epoch": 0.25963394201607604, "grad_norm": 0.34585562348365784, "learning_rate": 1.9191431049634794e-05, "loss": 0.4818, "step": 12242 }, { "epoch": 0.25965515047400906, "grad_norm": 0.3399854898452759, "learning_rate": 1.919129967278131e-05, "loss": 0.5756, "step": 12243 }, { "epoch": 0.2596763589319421, "grad_norm": 0.3442281186580658, "learning_rate": 1.919116828570534e-05, "loss": 0.5092, "step": 12244 }, { "epoch": 0.2596975673898751, "grad_norm": 0.3404235243797302, "learning_rate": 1.9191036888407035e-05, "loss": 0.5229, "step": 12245 }, { "epoch": 0.2597187758478081, "grad_norm": 0.3913545608520508, "learning_rate": 1.919090548088654e-05, "loss": 0.5853, "step": 12246 }, { "epoch": 0.25973998430574113, "grad_norm": 0.318451464176178, "learning_rate": 1.9190774063143997e-05, "loss": 0.4154, "step": 12247 }, { "epoch": 0.25976119276367415, "grad_norm": 0.447809636592865, "learning_rate": 1.9190642635179555e-05, "loss": 0.4801, "step": 12248 }, { "epoch": 0.2597824012216072, "grad_norm": 0.34678390622138977, "learning_rate": 1.919051119699336e-05, "loss": 0.6271, "step": 12249 }, { "epoch": 0.2598036096795402, "grad_norm": 0.40038612484931946, "learning_rate": 1.919037974858556e-05, "loss": 0.5469, "step": 12250 }, { "epoch": 0.2598248181374732, "grad_norm": 0.34935900568962097, "learning_rate": 1.91902482899563e-05, "loss": 0.5192, "step": 12251 }, { "epoch": 0.25984602659540623, "grad_norm": 0.3437732458114624, "learning_rate": 1.9190116821105724e-05, "loss": 0.5733, "step": 12252 }, { "epoch": 0.25986723505333925, "grad_norm": 0.3789528012275696, "learning_rate": 1.9189985342033978e-05, "loss": 0.5128, "step": 12253 }, { "epoch": 0.25988844351127227, "grad_norm": 0.33083444833755493, "learning_rate": 1.9189853852741213e-05, "loss": 0.6074, "step": 12254 }, { "epoch": 0.25990965196920535, "grad_norm": 0.343561589717865, "learning_rate": 1.9189722353227573e-05, "loss": 0.5117, "step": 12255 }, { "epoch": 0.25993086042713837, "grad_norm": 0.4167657196521759, "learning_rate": 1.9189590843493203e-05, "loss": 0.5803, "step": 12256 }, { "epoch": 0.2599520688850714, "grad_norm": 0.3372712731361389, "learning_rate": 1.918945932353825e-05, "loss": 0.4744, "step": 12257 }, { "epoch": 0.2599732773430044, "grad_norm": 0.30686360597610474, "learning_rate": 1.9189327793362856e-05, "loss": 0.4787, "step": 12258 }, { "epoch": 0.2599944858009374, "grad_norm": 0.38144350051879883, "learning_rate": 1.9189196252967174e-05, "loss": 0.473, "step": 12259 }, { "epoch": 0.26001569425887044, "grad_norm": 0.5213617086410522, "learning_rate": 1.9189064702351348e-05, "loss": 0.4842, "step": 12260 }, { "epoch": 0.26003690271680346, "grad_norm": 0.3348754942417145, "learning_rate": 1.9188933141515528e-05, "loss": 0.5236, "step": 12261 }, { "epoch": 0.2600581111747365, "grad_norm": 0.4394818842411041, "learning_rate": 1.9188801570459852e-05, "loss": 0.4569, "step": 12262 }, { "epoch": 0.2600793196326695, "grad_norm": 0.3283616602420807, "learning_rate": 1.9188669989184473e-05, "loss": 0.4798, "step": 12263 }, { "epoch": 0.2601005280906025, "grad_norm": 0.34672069549560547, "learning_rate": 1.9188538397689533e-05, "loss": 0.547, "step": 12264 }, { "epoch": 0.26012173654853554, "grad_norm": 0.6257970929145813, "learning_rate": 1.918840679597518e-05, "loss": 0.5569, "step": 12265 }, { "epoch": 0.26014294500646856, "grad_norm": 0.3337368667125702, "learning_rate": 1.9188275184041563e-05, "loss": 0.4573, "step": 12266 }, { "epoch": 0.2601641534644016, "grad_norm": 0.29787057638168335, "learning_rate": 1.9188143561888824e-05, "loss": 0.4884, "step": 12267 }, { "epoch": 0.2601853619223346, "grad_norm": 0.289296418428421, "learning_rate": 1.9188011929517114e-05, "loss": 0.4533, "step": 12268 }, { "epoch": 0.2602065703802677, "grad_norm": 0.3672213554382324, "learning_rate": 1.9187880286926575e-05, "loss": 0.5515, "step": 12269 }, { "epoch": 0.2602277788382007, "grad_norm": 0.35827597975730896, "learning_rate": 1.9187748634117356e-05, "loss": 0.5012, "step": 12270 }, { "epoch": 0.2602489872961337, "grad_norm": 0.33731165528297424, "learning_rate": 1.9187616971089602e-05, "loss": 0.4301, "step": 12271 }, { "epoch": 0.26027019575406674, "grad_norm": 0.37174686789512634, "learning_rate": 1.9187485297843464e-05, "loss": 0.6037, "step": 12272 }, { "epoch": 0.26029140421199976, "grad_norm": 0.3771055042743683, "learning_rate": 1.9187353614379082e-05, "loss": 0.4484, "step": 12273 }, { "epoch": 0.2603126126699328, "grad_norm": 0.3604033291339874, "learning_rate": 1.9187221920696603e-05, "loss": 0.5036, "step": 12274 }, { "epoch": 0.2603338211278658, "grad_norm": 0.3559219539165497, "learning_rate": 1.9187090216796176e-05, "loss": 0.4921, "step": 12275 }, { "epoch": 0.2603550295857988, "grad_norm": 0.39437612891197205, "learning_rate": 1.918695850267795e-05, "loss": 0.5424, "step": 12276 }, { "epoch": 0.26037623804373183, "grad_norm": 0.3787892758846283, "learning_rate": 1.9186826778342067e-05, "loss": 0.4896, "step": 12277 }, { "epoch": 0.26039744650166485, "grad_norm": 0.3081834018230438, "learning_rate": 1.9186695043788673e-05, "loss": 0.4838, "step": 12278 }, { "epoch": 0.2604186549595979, "grad_norm": 0.3016515374183655, "learning_rate": 1.918656329901792e-05, "loss": 0.4544, "step": 12279 }, { "epoch": 0.2604398634175309, "grad_norm": 0.33458101749420166, "learning_rate": 1.9186431544029948e-05, "loss": 0.554, "step": 12280 }, { "epoch": 0.2604610718754639, "grad_norm": 0.3163332939147949, "learning_rate": 1.9186299778824908e-05, "loss": 0.5184, "step": 12281 }, { "epoch": 0.26048228033339693, "grad_norm": 0.3793516755104065, "learning_rate": 1.9186168003402947e-05, "loss": 0.4529, "step": 12282 }, { "epoch": 0.26050348879133, "grad_norm": 0.31390881538391113, "learning_rate": 1.918603621776421e-05, "loss": 0.5171, "step": 12283 }, { "epoch": 0.260524697249263, "grad_norm": 0.3470202386379242, "learning_rate": 1.9185904421908842e-05, "loss": 0.5166, "step": 12284 }, { "epoch": 0.26054590570719605, "grad_norm": 0.8300718665122986, "learning_rate": 1.9185772615836987e-05, "loss": 0.5261, "step": 12285 }, { "epoch": 0.26056711416512907, "grad_norm": 0.403425008058548, "learning_rate": 1.9185640799548803e-05, "loss": 0.565, "step": 12286 }, { "epoch": 0.2605883226230621, "grad_norm": 0.3599577844142914, "learning_rate": 1.9185508973044422e-05, "loss": 0.5092, "step": 12287 }, { "epoch": 0.2606095310809951, "grad_norm": 0.34774643182754517, "learning_rate": 1.9185377136324e-05, "loss": 0.4613, "step": 12288 }, { "epoch": 0.2606307395389281, "grad_norm": 0.32046622037887573, "learning_rate": 1.9185245289387685e-05, "loss": 0.5186, "step": 12289 }, { "epoch": 0.26065194799686114, "grad_norm": 0.7861274480819702, "learning_rate": 1.9185113432235617e-05, "loss": 0.5569, "step": 12290 }, { "epoch": 0.26067315645479416, "grad_norm": 0.34036359190940857, "learning_rate": 1.9184981564867944e-05, "loss": 0.487, "step": 12291 }, { "epoch": 0.2606943649127272, "grad_norm": 0.34054839611053467, "learning_rate": 1.9184849687284817e-05, "loss": 0.5144, "step": 12292 }, { "epoch": 0.2607155733706602, "grad_norm": 0.3316739499568939, "learning_rate": 1.918471779948638e-05, "loss": 0.4607, "step": 12293 }, { "epoch": 0.2607367818285932, "grad_norm": 0.3406139612197876, "learning_rate": 1.9184585901472776e-05, "loss": 0.5801, "step": 12294 }, { "epoch": 0.26075799028652624, "grad_norm": 0.34865251183509827, "learning_rate": 1.918445399324416e-05, "loss": 0.5544, "step": 12295 }, { "epoch": 0.2607791987444593, "grad_norm": 0.30650922656059265, "learning_rate": 1.9184322074800672e-05, "loss": 0.4753, "step": 12296 }, { "epoch": 0.26080040720239234, "grad_norm": 0.36924082040786743, "learning_rate": 1.918419014614246e-05, "loss": 0.5395, "step": 12297 }, { "epoch": 0.26082161566032536, "grad_norm": 0.3811499774456024, "learning_rate": 1.9184058207269674e-05, "loss": 0.549, "step": 12298 }, { "epoch": 0.2608428241182584, "grad_norm": 0.4660587012767792, "learning_rate": 1.9183926258182458e-05, "loss": 0.5192, "step": 12299 }, { "epoch": 0.2608640325761914, "grad_norm": 0.3271229565143585, "learning_rate": 1.9183794298880957e-05, "loss": 0.5296, "step": 12300 }, { "epoch": 0.2608852410341244, "grad_norm": 0.3030294179916382, "learning_rate": 1.918366232936532e-05, "loss": 0.4579, "step": 12301 }, { "epoch": 0.26090644949205744, "grad_norm": 0.33833998441696167, "learning_rate": 1.9183530349635694e-05, "loss": 0.5099, "step": 12302 }, { "epoch": 0.26092765794999045, "grad_norm": 0.32075172662734985, "learning_rate": 1.9183398359692224e-05, "loss": 0.4962, "step": 12303 }, { "epoch": 0.2609488664079235, "grad_norm": 0.32881268858909607, "learning_rate": 1.918326635953506e-05, "loss": 0.5641, "step": 12304 }, { "epoch": 0.2609700748658565, "grad_norm": 0.35881954431533813, "learning_rate": 1.9183134349164344e-05, "loss": 0.5132, "step": 12305 }, { "epoch": 0.2609912833237895, "grad_norm": 0.30473726987838745, "learning_rate": 1.918300232858023e-05, "loss": 0.4997, "step": 12306 }, { "epoch": 0.26101249178172253, "grad_norm": 0.41808316111564636, "learning_rate": 1.918287029778286e-05, "loss": 0.4724, "step": 12307 }, { "epoch": 0.26103370023965555, "grad_norm": 0.3563716411590576, "learning_rate": 1.918273825677238e-05, "loss": 0.5527, "step": 12308 }, { "epoch": 0.2610549086975886, "grad_norm": 0.3319757878780365, "learning_rate": 1.9182606205548938e-05, "loss": 0.5311, "step": 12309 }, { "epoch": 0.26107611715552165, "grad_norm": 0.3227095603942871, "learning_rate": 1.918247414411268e-05, "loss": 0.5553, "step": 12310 }, { "epoch": 0.26109732561345467, "grad_norm": 0.31339362263679504, "learning_rate": 1.9182342072463756e-05, "loss": 0.5154, "step": 12311 }, { "epoch": 0.2611185340713877, "grad_norm": 0.338041216135025, "learning_rate": 1.918220999060231e-05, "loss": 0.5129, "step": 12312 }, { "epoch": 0.2611397425293207, "grad_norm": 0.33842015266418457, "learning_rate": 1.918207789852849e-05, "loss": 0.5885, "step": 12313 }, { "epoch": 0.2611609509872537, "grad_norm": 0.39408019185066223, "learning_rate": 1.9181945796242443e-05, "loss": 0.5012, "step": 12314 }, { "epoch": 0.26118215944518675, "grad_norm": 0.30547842383384705, "learning_rate": 1.9181813683744316e-05, "loss": 0.4345, "step": 12315 }, { "epoch": 0.26120336790311977, "grad_norm": 0.3109806776046753, "learning_rate": 1.9181681561034256e-05, "loss": 0.4855, "step": 12316 }, { "epoch": 0.2612245763610528, "grad_norm": 0.3776468336582184, "learning_rate": 1.9181549428112407e-05, "loss": 0.4806, "step": 12317 }, { "epoch": 0.2612457848189858, "grad_norm": 0.3838670253753662, "learning_rate": 1.918141728497892e-05, "loss": 0.5439, "step": 12318 }, { "epoch": 0.2612669932769188, "grad_norm": 0.332698792219162, "learning_rate": 1.9181285131633942e-05, "loss": 0.5118, "step": 12319 }, { "epoch": 0.26128820173485184, "grad_norm": 0.3660193383693695, "learning_rate": 1.9181152968077615e-05, "loss": 0.5021, "step": 12320 }, { "epoch": 0.26130941019278486, "grad_norm": 0.32876524329185486, "learning_rate": 1.9181020794310092e-05, "loss": 0.5365, "step": 12321 }, { "epoch": 0.2613306186507179, "grad_norm": 0.3378341495990753, "learning_rate": 1.918088861033152e-05, "loss": 0.5071, "step": 12322 }, { "epoch": 0.2613518271086509, "grad_norm": 0.3419441878795624, "learning_rate": 1.918075641614204e-05, "loss": 0.4317, "step": 12323 }, { "epoch": 0.261373035566584, "grad_norm": 0.37826675176620483, "learning_rate": 1.91806242117418e-05, "loss": 0.4793, "step": 12324 }, { "epoch": 0.261394244024517, "grad_norm": 0.3390742540359497, "learning_rate": 1.9180491997130956e-05, "loss": 0.5435, "step": 12325 }, { "epoch": 0.26141545248245, "grad_norm": 0.41400524973869324, "learning_rate": 1.9180359772309643e-05, "loss": 0.4759, "step": 12326 }, { "epoch": 0.26143666094038304, "grad_norm": 0.35996830463409424, "learning_rate": 1.9180227537278017e-05, "loss": 0.5645, "step": 12327 }, { "epoch": 0.26145786939831606, "grad_norm": 0.43914666771888733, "learning_rate": 1.9180095292036217e-05, "loss": 0.5889, "step": 12328 }, { "epoch": 0.2614790778562491, "grad_norm": 0.3151470422744751, "learning_rate": 1.9179963036584398e-05, "loss": 0.5405, "step": 12329 }, { "epoch": 0.2615002863141821, "grad_norm": 0.35558056831359863, "learning_rate": 1.9179830770922705e-05, "loss": 0.5958, "step": 12330 }, { "epoch": 0.2615214947721151, "grad_norm": 0.3397352993488312, "learning_rate": 1.9179698495051286e-05, "loss": 0.4895, "step": 12331 }, { "epoch": 0.26154270323004813, "grad_norm": 0.3624687194824219, "learning_rate": 1.917956620897028e-05, "loss": 0.4795, "step": 12332 }, { "epoch": 0.26156391168798115, "grad_norm": 0.39636799693107605, "learning_rate": 1.9179433912679842e-05, "loss": 0.541, "step": 12333 }, { "epoch": 0.2615851201459142, "grad_norm": 0.3129476010799408, "learning_rate": 1.9179301606180122e-05, "loss": 0.4791, "step": 12334 }, { "epoch": 0.2616063286038472, "grad_norm": 0.42778483033180237, "learning_rate": 1.9179169289471256e-05, "loss": 0.5176, "step": 12335 }, { "epoch": 0.2616275370617802, "grad_norm": 0.3370698094367981, "learning_rate": 1.91790369625534e-05, "loss": 0.4868, "step": 12336 }, { "epoch": 0.2616487455197133, "grad_norm": 0.39908289909362793, "learning_rate": 1.91789046254267e-05, "loss": 0.5492, "step": 12337 }, { "epoch": 0.2616699539776463, "grad_norm": 0.366836816072464, "learning_rate": 1.91787722780913e-05, "loss": 0.5189, "step": 12338 }, { "epoch": 0.2616911624355793, "grad_norm": 0.33183997869491577, "learning_rate": 1.9178639920547353e-05, "loss": 0.4847, "step": 12339 }, { "epoch": 0.26171237089351235, "grad_norm": 0.35764068365097046, "learning_rate": 1.9178507552795e-05, "loss": 0.5621, "step": 12340 }, { "epoch": 0.26173357935144537, "grad_norm": 0.3106544017791748, "learning_rate": 1.917837517483439e-05, "loss": 0.4956, "step": 12341 }, { "epoch": 0.2617547878093784, "grad_norm": 0.3817334473133087, "learning_rate": 1.9178242786665672e-05, "loss": 0.5163, "step": 12342 }, { "epoch": 0.2617759962673114, "grad_norm": 0.43949687480926514, "learning_rate": 1.9178110388288993e-05, "loss": 0.4662, "step": 12343 }, { "epoch": 0.2617972047252444, "grad_norm": 0.3147462010383606, "learning_rate": 1.9177977979704496e-05, "loss": 0.4738, "step": 12344 }, { "epoch": 0.26181841318317745, "grad_norm": 0.3599739372730255, "learning_rate": 1.9177845560912334e-05, "loss": 0.4969, "step": 12345 }, { "epoch": 0.26183962164111046, "grad_norm": 0.3318294584751129, "learning_rate": 1.9177713131912652e-05, "loss": 0.5033, "step": 12346 }, { "epoch": 0.2618608300990435, "grad_norm": 0.43008580803871155, "learning_rate": 1.9177580692705597e-05, "loss": 0.5277, "step": 12347 }, { "epoch": 0.2618820385569765, "grad_norm": 0.3277735114097595, "learning_rate": 1.9177448243291317e-05, "loss": 0.4398, "step": 12348 }, { "epoch": 0.2619032470149095, "grad_norm": 0.3454877436161041, "learning_rate": 1.9177315783669958e-05, "loss": 0.4187, "step": 12349 }, { "epoch": 0.26192445547284254, "grad_norm": 0.32907649874687195, "learning_rate": 1.917718331384167e-05, "loss": 0.4928, "step": 12350 }, { "epoch": 0.2619456639307756, "grad_norm": 0.39124321937561035, "learning_rate": 1.9177050833806595e-05, "loss": 0.5309, "step": 12351 }, { "epoch": 0.26196687238870864, "grad_norm": 0.3423580229282379, "learning_rate": 1.9176918343564887e-05, "loss": 0.576, "step": 12352 }, { "epoch": 0.26198808084664166, "grad_norm": 0.3530692756175995, "learning_rate": 1.9176785843116685e-05, "loss": 0.4855, "step": 12353 }, { "epoch": 0.2620092893045747, "grad_norm": 0.3721688985824585, "learning_rate": 1.9176653332462147e-05, "loss": 0.524, "step": 12354 }, { "epoch": 0.2620304977625077, "grad_norm": 0.3462334871292114, "learning_rate": 1.917652081160141e-05, "loss": 0.4381, "step": 12355 }, { "epoch": 0.2620517062204407, "grad_norm": 0.3237079083919525, "learning_rate": 1.917638828053463e-05, "loss": 0.5549, "step": 12356 }, { "epoch": 0.26207291467837374, "grad_norm": 0.3506760001182556, "learning_rate": 1.9176255739261952e-05, "loss": 0.4515, "step": 12357 }, { "epoch": 0.26209412313630676, "grad_norm": 0.3859609067440033, "learning_rate": 1.917612318778352e-05, "loss": 0.4292, "step": 12358 }, { "epoch": 0.2621153315942398, "grad_norm": 0.32474660873413086, "learning_rate": 1.917599062609948e-05, "loss": 0.5311, "step": 12359 }, { "epoch": 0.2621365400521728, "grad_norm": 0.3211309313774109, "learning_rate": 1.917585805420999e-05, "loss": 0.5607, "step": 12360 }, { "epoch": 0.2621577485101058, "grad_norm": 0.4782545864582062, "learning_rate": 1.9175725472115184e-05, "loss": 0.5706, "step": 12361 }, { "epoch": 0.26217895696803883, "grad_norm": 0.32684749364852905, "learning_rate": 1.9175592879815217e-05, "loss": 0.5605, "step": 12362 }, { "epoch": 0.26220016542597185, "grad_norm": 0.3268203139305115, "learning_rate": 1.9175460277310242e-05, "loss": 0.5478, "step": 12363 }, { "epoch": 0.2622213738839049, "grad_norm": 0.3377605974674225, "learning_rate": 1.917532766460039e-05, "loss": 0.4658, "step": 12364 }, { "epoch": 0.26224258234183795, "grad_norm": 0.3696211278438568, "learning_rate": 1.9175195041685827e-05, "loss": 0.502, "step": 12365 }, { "epoch": 0.26226379079977097, "grad_norm": 0.3263213038444519, "learning_rate": 1.9175062408566686e-05, "loss": 0.5557, "step": 12366 }, { "epoch": 0.262284999257704, "grad_norm": 0.31995952129364014, "learning_rate": 1.9174929765243118e-05, "loss": 0.4825, "step": 12367 }, { "epoch": 0.262306207715637, "grad_norm": 0.3475271165370941, "learning_rate": 1.917479711171528e-05, "loss": 0.5854, "step": 12368 }, { "epoch": 0.26232741617357, "grad_norm": 0.3602747321128845, "learning_rate": 1.9174664447983308e-05, "loss": 0.5472, "step": 12369 }, { "epoch": 0.26234862463150305, "grad_norm": 0.35979941487312317, "learning_rate": 1.9174531774047356e-05, "loss": 0.5642, "step": 12370 }, { "epoch": 0.26236983308943607, "grad_norm": 0.3619888126850128, "learning_rate": 1.9174399089907568e-05, "loss": 0.5502, "step": 12371 }, { "epoch": 0.2623910415473691, "grad_norm": 0.33671796321868896, "learning_rate": 1.9174266395564092e-05, "loss": 0.6197, "step": 12372 }, { "epoch": 0.2624122500053021, "grad_norm": 0.3120572865009308, "learning_rate": 1.917413369101708e-05, "loss": 0.4474, "step": 12373 }, { "epoch": 0.2624334584632351, "grad_norm": 0.31308886408805847, "learning_rate": 1.9174000976266675e-05, "loss": 0.4246, "step": 12374 }, { "epoch": 0.26245466692116814, "grad_norm": 0.3314324617385864, "learning_rate": 1.9173868251313027e-05, "loss": 0.5034, "step": 12375 }, { "epoch": 0.26247587537910116, "grad_norm": 0.3484555184841156, "learning_rate": 1.917373551615628e-05, "loss": 0.5317, "step": 12376 }, { "epoch": 0.2624970838370342, "grad_norm": 0.33303454518318176, "learning_rate": 1.9173602770796585e-05, "loss": 0.4476, "step": 12377 }, { "epoch": 0.26251829229496726, "grad_norm": 0.3745436370372772, "learning_rate": 1.9173470015234087e-05, "loss": 0.5202, "step": 12378 }, { "epoch": 0.2625395007529003, "grad_norm": 0.3180169463157654, "learning_rate": 1.9173337249468936e-05, "loss": 0.5626, "step": 12379 }, { "epoch": 0.2625607092108333, "grad_norm": 0.3420943021774292, "learning_rate": 1.917320447350128e-05, "loss": 0.4576, "step": 12380 }, { "epoch": 0.2625819176687663, "grad_norm": 0.3325059115886688, "learning_rate": 1.9173071687331263e-05, "loss": 0.5434, "step": 12381 }, { "epoch": 0.26260312612669934, "grad_norm": 0.44604548811912537, "learning_rate": 1.917293889095904e-05, "loss": 0.5512, "step": 12382 }, { "epoch": 0.26262433458463236, "grad_norm": 0.35891154408454895, "learning_rate": 1.9172806084384753e-05, "loss": 0.5294, "step": 12383 }, { "epoch": 0.2626455430425654, "grad_norm": 0.365379273891449, "learning_rate": 1.9172673267608547e-05, "loss": 0.5938, "step": 12384 }, { "epoch": 0.2626667515004984, "grad_norm": 0.33985087275505066, "learning_rate": 1.9172540440630577e-05, "loss": 0.5773, "step": 12385 }, { "epoch": 0.2626879599584314, "grad_norm": 0.3383156657218933, "learning_rate": 1.9172407603450987e-05, "loss": 0.5678, "step": 12386 }, { "epoch": 0.26270916841636444, "grad_norm": 0.3271200954914093, "learning_rate": 1.9172274756069925e-05, "loss": 0.5486, "step": 12387 }, { "epoch": 0.26273037687429746, "grad_norm": 0.39719414710998535, "learning_rate": 1.9172141898487537e-05, "loss": 0.5512, "step": 12388 }, { "epoch": 0.2627515853322305, "grad_norm": 0.3698638379573822, "learning_rate": 1.9172009030703972e-05, "loss": 0.5783, "step": 12389 }, { "epoch": 0.2627727937901635, "grad_norm": 0.3859040141105652, "learning_rate": 1.9171876152719383e-05, "loss": 0.5298, "step": 12390 }, { "epoch": 0.2627940022480965, "grad_norm": 0.3366951644420624, "learning_rate": 1.9171743264533908e-05, "loss": 0.4949, "step": 12391 }, { "epoch": 0.2628152107060296, "grad_norm": 0.33477962017059326, "learning_rate": 1.9171610366147703e-05, "loss": 0.573, "step": 12392 }, { "epoch": 0.2628364191639626, "grad_norm": 0.3598896563053131, "learning_rate": 1.9171477457560908e-05, "loss": 0.5075, "step": 12393 }, { "epoch": 0.26285762762189563, "grad_norm": 0.3348058760166168, "learning_rate": 1.917134453877368e-05, "loss": 0.5248, "step": 12394 }, { "epoch": 0.26287883607982865, "grad_norm": 0.31734421849250793, "learning_rate": 1.917121160978616e-05, "loss": 0.4667, "step": 12395 }, { "epoch": 0.26290004453776167, "grad_norm": 0.3317377269268036, "learning_rate": 1.9171078670598498e-05, "loss": 0.4966, "step": 12396 }, { "epoch": 0.2629212529956947, "grad_norm": 0.36090725660324097, "learning_rate": 1.9170945721210844e-05, "loss": 0.5442, "step": 12397 }, { "epoch": 0.2629424614536277, "grad_norm": 0.33249446749687195, "learning_rate": 1.917081276162334e-05, "loss": 0.4743, "step": 12398 }, { "epoch": 0.2629636699115607, "grad_norm": 0.32815021276474, "learning_rate": 1.9170679791836142e-05, "loss": 0.461, "step": 12399 }, { "epoch": 0.26298487836949375, "grad_norm": 0.33430445194244385, "learning_rate": 1.9170546811849392e-05, "loss": 0.5072, "step": 12400 }, { "epoch": 0.26300608682742677, "grad_norm": 0.3263354003429413, "learning_rate": 1.9170413821663237e-05, "loss": 0.565, "step": 12401 }, { "epoch": 0.2630272952853598, "grad_norm": 0.3449605405330658, "learning_rate": 1.917028082127783e-05, "loss": 0.4531, "step": 12402 }, { "epoch": 0.2630485037432928, "grad_norm": 0.3209804594516754, "learning_rate": 1.917014781069332e-05, "loss": 0.524, "step": 12403 }, { "epoch": 0.2630697122012258, "grad_norm": 0.4557221531867981, "learning_rate": 1.9170014789909848e-05, "loss": 0.5409, "step": 12404 }, { "epoch": 0.2630909206591589, "grad_norm": 0.34409645199775696, "learning_rate": 1.9169881758927564e-05, "loss": 0.5409, "step": 12405 }, { "epoch": 0.2631121291170919, "grad_norm": 0.3462452292442322, "learning_rate": 1.9169748717746617e-05, "loss": 0.432, "step": 12406 }, { "epoch": 0.26313333757502494, "grad_norm": 0.3624178469181061, "learning_rate": 1.9169615666367157e-05, "loss": 0.4754, "step": 12407 }, { "epoch": 0.26315454603295796, "grad_norm": 0.33874163031578064, "learning_rate": 1.9169482604789328e-05, "loss": 0.5647, "step": 12408 }, { "epoch": 0.263175754490891, "grad_norm": 0.49698975682258606, "learning_rate": 1.916934953301328e-05, "loss": 0.4923, "step": 12409 }, { "epoch": 0.263196962948824, "grad_norm": 0.41564056277275085, "learning_rate": 1.9169216451039162e-05, "loss": 0.544, "step": 12410 }, { "epoch": 0.263218171406757, "grad_norm": 0.3651079535484314, "learning_rate": 1.9169083358867124e-05, "loss": 0.4641, "step": 12411 }, { "epoch": 0.26323937986469004, "grad_norm": 0.3550078272819519, "learning_rate": 1.9168950256497306e-05, "loss": 0.5645, "step": 12412 }, { "epoch": 0.26326058832262306, "grad_norm": 0.29882973432540894, "learning_rate": 1.9168817143929866e-05, "loss": 0.5432, "step": 12413 }, { "epoch": 0.2632817967805561, "grad_norm": 0.339222252368927, "learning_rate": 1.9168684021164946e-05, "loss": 0.4929, "step": 12414 }, { "epoch": 0.2633030052384891, "grad_norm": 0.3413861095905304, "learning_rate": 1.9168550888202694e-05, "loss": 0.4979, "step": 12415 }, { "epoch": 0.2633242136964221, "grad_norm": 0.35641899704933167, "learning_rate": 1.916841774504326e-05, "loss": 0.5519, "step": 12416 }, { "epoch": 0.26334542215435514, "grad_norm": 0.30385997891426086, "learning_rate": 1.916828459168679e-05, "loss": 0.4418, "step": 12417 }, { "epoch": 0.26336663061228816, "grad_norm": 0.39028409123420715, "learning_rate": 1.9168151428133433e-05, "loss": 0.5355, "step": 12418 }, { "epoch": 0.26338783907022123, "grad_norm": 0.31620848178863525, "learning_rate": 1.916801825438334e-05, "loss": 0.4159, "step": 12419 }, { "epoch": 0.26340904752815425, "grad_norm": 0.3562872111797333, "learning_rate": 1.9167885070436657e-05, "loss": 0.6056, "step": 12420 }, { "epoch": 0.26343025598608727, "grad_norm": 0.317205548286438, "learning_rate": 1.916775187629353e-05, "loss": 0.5322, "step": 12421 }, { "epoch": 0.2634514644440203, "grad_norm": 0.312338262796402, "learning_rate": 1.916761867195411e-05, "loss": 0.5195, "step": 12422 }, { "epoch": 0.2634726729019533, "grad_norm": 0.36912599205970764, "learning_rate": 1.9167485457418542e-05, "loss": 0.5626, "step": 12423 }, { "epoch": 0.26349388135988633, "grad_norm": 0.44866400957107544, "learning_rate": 1.916735223268698e-05, "loss": 0.5466, "step": 12424 }, { "epoch": 0.26351508981781935, "grad_norm": 0.356170117855072, "learning_rate": 1.9167218997759568e-05, "loss": 0.5367, "step": 12425 }, { "epoch": 0.26353629827575237, "grad_norm": 0.331226110458374, "learning_rate": 1.9167085752636452e-05, "loss": 0.4972, "step": 12426 }, { "epoch": 0.2635575067336854, "grad_norm": 0.2991032302379608, "learning_rate": 1.9166952497317783e-05, "loss": 0.4525, "step": 12427 }, { "epoch": 0.2635787151916184, "grad_norm": 0.40596166253089905, "learning_rate": 1.916681923180371e-05, "loss": 0.6032, "step": 12428 }, { "epoch": 0.2635999236495514, "grad_norm": 0.35968077182769775, "learning_rate": 1.916668595609438e-05, "loss": 0.457, "step": 12429 }, { "epoch": 0.26362113210748445, "grad_norm": 0.3326101303100586, "learning_rate": 1.9166552670189943e-05, "loss": 0.4836, "step": 12430 }, { "epoch": 0.26364234056541747, "grad_norm": 0.4012191593647003, "learning_rate": 1.9166419374090545e-05, "loss": 0.5347, "step": 12431 }, { "epoch": 0.2636635490233505, "grad_norm": 0.3626847267150879, "learning_rate": 1.9166286067796336e-05, "loss": 0.4875, "step": 12432 }, { "epoch": 0.26368475748128356, "grad_norm": 0.3093520700931549, "learning_rate": 1.9166152751307462e-05, "loss": 0.4537, "step": 12433 }, { "epoch": 0.2637059659392166, "grad_norm": 0.34656602144241333, "learning_rate": 1.9166019424624075e-05, "loss": 0.6372, "step": 12434 }, { "epoch": 0.2637271743971496, "grad_norm": 0.33970460295677185, "learning_rate": 1.9165886087746315e-05, "loss": 0.4382, "step": 12435 }, { "epoch": 0.2637483828550826, "grad_norm": 0.32743775844573975, "learning_rate": 1.916575274067434e-05, "loss": 0.5144, "step": 12436 }, { "epoch": 0.26376959131301564, "grad_norm": 0.30077698826789856, "learning_rate": 1.9165619383408295e-05, "loss": 0.4529, "step": 12437 }, { "epoch": 0.26379079977094866, "grad_norm": 0.3318512439727783, "learning_rate": 1.9165486015948326e-05, "loss": 0.4595, "step": 12438 }, { "epoch": 0.2638120082288817, "grad_norm": 0.32994839549064636, "learning_rate": 1.9165352638294586e-05, "loss": 0.5036, "step": 12439 }, { "epoch": 0.2638332166868147, "grad_norm": 0.30284640192985535, "learning_rate": 1.9165219250447218e-05, "loss": 0.5131, "step": 12440 }, { "epoch": 0.2638544251447477, "grad_norm": 0.3394949734210968, "learning_rate": 1.9165085852406375e-05, "loss": 0.4987, "step": 12441 }, { "epoch": 0.26387563360268074, "grad_norm": 0.36540621519088745, "learning_rate": 1.9164952444172198e-05, "loss": 0.4948, "step": 12442 }, { "epoch": 0.26389684206061376, "grad_norm": 0.3531244099140167, "learning_rate": 1.9164819025744843e-05, "loss": 0.4824, "step": 12443 }, { "epoch": 0.2639180505185468, "grad_norm": 0.3677736818790436, "learning_rate": 1.9164685597124457e-05, "loss": 0.5174, "step": 12444 }, { "epoch": 0.2639392589764798, "grad_norm": 0.3332618772983551, "learning_rate": 1.9164552158311188e-05, "loss": 0.534, "step": 12445 }, { "epoch": 0.26396046743441287, "grad_norm": 0.35319817066192627, "learning_rate": 1.916441870930518e-05, "loss": 0.4443, "step": 12446 }, { "epoch": 0.2639816758923459, "grad_norm": 0.32350778579711914, "learning_rate": 1.9164285250106587e-05, "loss": 0.5475, "step": 12447 }, { "epoch": 0.2640028843502789, "grad_norm": 0.33851584792137146, "learning_rate": 1.9164151780715558e-05, "loss": 0.5395, "step": 12448 }, { "epoch": 0.26402409280821193, "grad_norm": 0.39108356833457947, "learning_rate": 1.9164018301132238e-05, "loss": 0.5819, "step": 12449 }, { "epoch": 0.26404530126614495, "grad_norm": 0.47239041328430176, "learning_rate": 1.9163884811356778e-05, "loss": 0.6361, "step": 12450 }, { "epoch": 0.26406650972407797, "grad_norm": 0.3438655436038971, "learning_rate": 1.916375131138932e-05, "loss": 0.5529, "step": 12451 }, { "epoch": 0.264087718182011, "grad_norm": 0.3074738085269928, "learning_rate": 1.916361780123002e-05, "loss": 0.4402, "step": 12452 }, { "epoch": 0.264108926639944, "grad_norm": 0.28139713406562805, "learning_rate": 1.9163484280879022e-05, "loss": 0.427, "step": 12453 }, { "epoch": 0.26413013509787703, "grad_norm": 0.34962835907936096, "learning_rate": 1.916335075033648e-05, "loss": 0.5999, "step": 12454 }, { "epoch": 0.26415134355581005, "grad_norm": 0.44051098823547363, "learning_rate": 1.9163217209602538e-05, "loss": 0.5409, "step": 12455 }, { "epoch": 0.26417255201374307, "grad_norm": 0.31159618496894836, "learning_rate": 1.9163083658677343e-05, "loss": 0.4507, "step": 12456 }, { "epoch": 0.2641937604716761, "grad_norm": 0.37063273787498474, "learning_rate": 1.9162950097561047e-05, "loss": 0.5068, "step": 12457 }, { "epoch": 0.2642149689296091, "grad_norm": 0.35714882612228394, "learning_rate": 1.91628165262538e-05, "loss": 0.5443, "step": 12458 }, { "epoch": 0.2642361773875421, "grad_norm": 0.37427887320518494, "learning_rate": 1.9162682944755748e-05, "loss": 0.5277, "step": 12459 }, { "epoch": 0.2642573858454752, "grad_norm": 0.31365442276000977, "learning_rate": 1.9162549353067038e-05, "loss": 0.5243, "step": 12460 }, { "epoch": 0.2642785943034082, "grad_norm": 0.34640687704086304, "learning_rate": 1.916241575118782e-05, "loss": 0.6124, "step": 12461 }, { "epoch": 0.26429980276134124, "grad_norm": 0.3105669915676117, "learning_rate": 1.916228213911824e-05, "loss": 0.519, "step": 12462 }, { "epoch": 0.26432101121927426, "grad_norm": 0.40012460947036743, "learning_rate": 1.9162148516858453e-05, "loss": 0.5171, "step": 12463 }, { "epoch": 0.2643422196772073, "grad_norm": 0.33534327149391174, "learning_rate": 1.9162014884408603e-05, "loss": 0.5135, "step": 12464 }, { "epoch": 0.2643634281351403, "grad_norm": 0.32965824007987976, "learning_rate": 1.916188124176884e-05, "loss": 0.5224, "step": 12465 }, { "epoch": 0.2643846365930733, "grad_norm": 0.4316636919975281, "learning_rate": 1.9161747588939313e-05, "loss": 0.53, "step": 12466 }, { "epoch": 0.26440584505100634, "grad_norm": 0.3975713849067688, "learning_rate": 1.9161613925920167e-05, "loss": 0.5586, "step": 12467 }, { "epoch": 0.26442705350893936, "grad_norm": 0.29037803411483765, "learning_rate": 1.9161480252711556e-05, "loss": 0.3868, "step": 12468 }, { "epoch": 0.2644482619668724, "grad_norm": 0.3492126762866974, "learning_rate": 1.9161346569313627e-05, "loss": 0.5317, "step": 12469 }, { "epoch": 0.2644694704248054, "grad_norm": 0.6875748038291931, "learning_rate": 1.9161212875726525e-05, "loss": 0.5432, "step": 12470 }, { "epoch": 0.2644906788827384, "grad_norm": 0.3961995542049408, "learning_rate": 1.9161079171950407e-05, "loss": 0.4914, "step": 12471 }, { "epoch": 0.26451188734067144, "grad_norm": 0.361318975687027, "learning_rate": 1.916094545798541e-05, "loss": 0.5467, "step": 12472 }, { "epoch": 0.26453309579860446, "grad_norm": 0.36555561423301697, "learning_rate": 1.916081173383169e-05, "loss": 0.5297, "step": 12473 }, { "epoch": 0.26455430425653753, "grad_norm": 0.3368360698223114, "learning_rate": 1.91606779994894e-05, "loss": 0.4907, "step": 12474 }, { "epoch": 0.26457551271447055, "grad_norm": 0.3470919132232666, "learning_rate": 1.916054425495868e-05, "loss": 0.562, "step": 12475 }, { "epoch": 0.26459672117240357, "grad_norm": 0.32756197452545166, "learning_rate": 1.9160410500239682e-05, "loss": 0.521, "step": 12476 }, { "epoch": 0.2646179296303366, "grad_norm": 0.3688102662563324, "learning_rate": 1.9160276735332555e-05, "loss": 0.457, "step": 12477 }, { "epoch": 0.2646391380882696, "grad_norm": 0.38400277495384216, "learning_rate": 1.916014296023745e-05, "loss": 0.4632, "step": 12478 }, { "epoch": 0.26466034654620263, "grad_norm": 0.3875201940536499, "learning_rate": 1.9160009174954507e-05, "loss": 0.5744, "step": 12479 }, { "epoch": 0.26468155500413565, "grad_norm": 0.3429633378982544, "learning_rate": 1.915987537948389e-05, "loss": 0.5218, "step": 12480 }, { "epoch": 0.26470276346206867, "grad_norm": 0.3271915912628174, "learning_rate": 1.915974157382573e-05, "loss": 0.5038, "step": 12481 }, { "epoch": 0.2647239719200017, "grad_norm": 0.3298868238925934, "learning_rate": 1.9159607757980194e-05, "loss": 0.5321, "step": 12482 }, { "epoch": 0.2647451803779347, "grad_norm": 0.3147239089012146, "learning_rate": 1.915947393194742e-05, "loss": 0.4919, "step": 12483 }, { "epoch": 0.2647663888358677, "grad_norm": 0.34396710991859436, "learning_rate": 1.9159340095727555e-05, "loss": 0.5515, "step": 12484 }, { "epoch": 0.26478759729380075, "grad_norm": 0.34768277406692505, "learning_rate": 1.9159206249320754e-05, "loss": 0.539, "step": 12485 }, { "epoch": 0.26480880575173377, "grad_norm": 0.39530763030052185, "learning_rate": 1.9159072392727162e-05, "loss": 0.5512, "step": 12486 }, { "epoch": 0.26483001420966684, "grad_norm": 0.353767454624176, "learning_rate": 1.915893852594693e-05, "loss": 0.5432, "step": 12487 }, { "epoch": 0.26485122266759986, "grad_norm": 0.34132012724876404, "learning_rate": 1.9158804648980206e-05, "loss": 0.6297, "step": 12488 }, { "epoch": 0.2648724311255329, "grad_norm": 0.3800102472305298, "learning_rate": 1.915867076182714e-05, "loss": 0.4426, "step": 12489 }, { "epoch": 0.2648936395834659, "grad_norm": 0.30636098980903625, "learning_rate": 1.915853686448788e-05, "loss": 0.5171, "step": 12490 }, { "epoch": 0.2649148480413989, "grad_norm": 0.5366480946540833, "learning_rate": 1.9158402956962572e-05, "loss": 0.5144, "step": 12491 }, { "epoch": 0.26493605649933194, "grad_norm": 0.7159197926521301, "learning_rate": 1.9158269039251366e-05, "loss": 0.539, "step": 12492 }, { "epoch": 0.26495726495726496, "grad_norm": 0.39918503165245056, "learning_rate": 1.915813511135442e-05, "loss": 0.5229, "step": 12493 }, { "epoch": 0.264978473415198, "grad_norm": 0.41226476430892944, "learning_rate": 1.9158001173271873e-05, "loss": 0.4659, "step": 12494 }, { "epoch": 0.264999681873131, "grad_norm": 0.3479936122894287, "learning_rate": 1.9157867225003875e-05, "loss": 0.5388, "step": 12495 }, { "epoch": 0.265020890331064, "grad_norm": 0.33674192428588867, "learning_rate": 1.9157733266550577e-05, "loss": 0.5027, "step": 12496 }, { "epoch": 0.26504209878899704, "grad_norm": 0.36080560088157654, "learning_rate": 1.9157599297912125e-05, "loss": 0.5513, "step": 12497 }, { "epoch": 0.26506330724693006, "grad_norm": 0.3737351894378662, "learning_rate": 1.9157465319088673e-05, "loss": 0.5221, "step": 12498 }, { "epoch": 0.2650845157048631, "grad_norm": 0.3363207280635834, "learning_rate": 1.915733133008037e-05, "loss": 0.4586, "step": 12499 }, { "epoch": 0.2651057241627961, "grad_norm": 0.3340245485305786, "learning_rate": 1.915719733088736e-05, "loss": 0.4983, "step": 12500 }, { "epoch": 0.26512693262072917, "grad_norm": 0.37677818536758423, "learning_rate": 1.9157063321509795e-05, "loss": 0.5019, "step": 12501 }, { "epoch": 0.2651481410786622, "grad_norm": 0.32564473152160645, "learning_rate": 1.9156929301947827e-05, "loss": 0.5003, "step": 12502 }, { "epoch": 0.2651693495365952, "grad_norm": 0.3114117383956909, "learning_rate": 1.91567952722016e-05, "loss": 0.4253, "step": 12503 }, { "epoch": 0.26519055799452823, "grad_norm": 0.32641616463661194, "learning_rate": 1.9156661232271263e-05, "loss": 0.4983, "step": 12504 }, { "epoch": 0.26521176645246125, "grad_norm": 0.37138238549232483, "learning_rate": 1.9156527182156965e-05, "loss": 0.5254, "step": 12505 }, { "epoch": 0.26523297491039427, "grad_norm": 0.33813634514808655, "learning_rate": 1.9156393121858862e-05, "loss": 0.4601, "step": 12506 }, { "epoch": 0.2652541833683273, "grad_norm": 0.4702315628528595, "learning_rate": 1.9156259051377094e-05, "loss": 0.5761, "step": 12507 }, { "epoch": 0.2652753918262603, "grad_norm": 0.33159342408180237, "learning_rate": 1.915612497071182e-05, "loss": 0.5339, "step": 12508 }, { "epoch": 0.26529660028419333, "grad_norm": 0.3072379529476166, "learning_rate": 1.915599087986318e-05, "loss": 0.5166, "step": 12509 }, { "epoch": 0.26531780874212635, "grad_norm": 0.3199695646762848, "learning_rate": 1.9155856778831322e-05, "loss": 0.567, "step": 12510 }, { "epoch": 0.26533901720005937, "grad_norm": 0.3166094422340393, "learning_rate": 1.9155722667616404e-05, "loss": 0.5059, "step": 12511 }, { "epoch": 0.2653602256579924, "grad_norm": 0.34129974246025085, "learning_rate": 1.915558854621857e-05, "loss": 0.5284, "step": 12512 }, { "epoch": 0.2653814341159254, "grad_norm": 0.3103174567222595, "learning_rate": 1.915545441463797e-05, "loss": 0.5552, "step": 12513 }, { "epoch": 0.2654026425738584, "grad_norm": 0.31632378697395325, "learning_rate": 1.9155320272874755e-05, "loss": 0.4383, "step": 12514 }, { "epoch": 0.2654238510317915, "grad_norm": 0.34919723868370056, "learning_rate": 1.915518612092907e-05, "loss": 0.5726, "step": 12515 }, { "epoch": 0.2654450594897245, "grad_norm": 0.37707680463790894, "learning_rate": 1.9155051958801067e-05, "loss": 0.4979, "step": 12516 }, { "epoch": 0.26546626794765754, "grad_norm": 0.34683334827423096, "learning_rate": 1.9154917786490896e-05, "loss": 0.6042, "step": 12517 }, { "epoch": 0.26548747640559056, "grad_norm": 0.32421672344207764, "learning_rate": 1.9154783603998702e-05, "loss": 0.4986, "step": 12518 }, { "epoch": 0.2655086848635236, "grad_norm": 0.33750107884407043, "learning_rate": 1.9154649411324642e-05, "loss": 0.53, "step": 12519 }, { "epoch": 0.2655298933214566, "grad_norm": 0.3400919437408447, "learning_rate": 1.9154515208468854e-05, "loss": 0.5099, "step": 12520 }, { "epoch": 0.2655511017793896, "grad_norm": 0.3422687351703644, "learning_rate": 1.91543809954315e-05, "loss": 0.5024, "step": 12521 }, { "epoch": 0.26557231023732264, "grad_norm": 0.35937902331352234, "learning_rate": 1.915424677221272e-05, "loss": 0.4677, "step": 12522 }, { "epoch": 0.26559351869525566, "grad_norm": 0.3487851321697235, "learning_rate": 1.9154112538812667e-05, "loss": 0.5588, "step": 12523 }, { "epoch": 0.2656147271531887, "grad_norm": 0.410014271736145, "learning_rate": 1.915397829523149e-05, "loss": 0.5095, "step": 12524 }, { "epoch": 0.2656359356111217, "grad_norm": 0.37382572889328003, "learning_rate": 1.9153844041469337e-05, "loss": 0.5045, "step": 12525 }, { "epoch": 0.2656571440690547, "grad_norm": 0.3527594208717346, "learning_rate": 1.9153709777526356e-05, "loss": 0.5799, "step": 12526 }, { "epoch": 0.26567835252698774, "grad_norm": 0.35014405846595764, "learning_rate": 1.9153575503402703e-05, "loss": 0.5734, "step": 12527 }, { "epoch": 0.2656995609849208, "grad_norm": 0.38198286294937134, "learning_rate": 1.915344121909852e-05, "loss": 0.5922, "step": 12528 }, { "epoch": 0.26572076944285383, "grad_norm": 0.34960323572158813, "learning_rate": 1.915330692461396e-05, "loss": 0.5242, "step": 12529 }, { "epoch": 0.26574197790078685, "grad_norm": 0.33602362871170044, "learning_rate": 1.915317261994917e-05, "loss": 0.5518, "step": 12530 }, { "epoch": 0.26576318635871987, "grad_norm": 0.3423673212528229, "learning_rate": 1.9153038305104302e-05, "loss": 0.5087, "step": 12531 }, { "epoch": 0.2657843948166529, "grad_norm": 0.34933891892433167, "learning_rate": 1.9152903980079505e-05, "loss": 0.5173, "step": 12532 }, { "epoch": 0.2658056032745859, "grad_norm": 0.3890681862831116, "learning_rate": 1.9152769644874927e-05, "loss": 0.6442, "step": 12533 }, { "epoch": 0.26582681173251893, "grad_norm": 0.31521761417388916, "learning_rate": 1.9152635299490717e-05, "loss": 0.4628, "step": 12534 }, { "epoch": 0.26584802019045195, "grad_norm": 0.3216298520565033, "learning_rate": 1.9152500943927028e-05, "loss": 0.5435, "step": 12535 }, { "epoch": 0.26586922864838497, "grad_norm": 0.4007791578769684, "learning_rate": 1.9152366578184006e-05, "loss": 0.5356, "step": 12536 }, { "epoch": 0.265890437106318, "grad_norm": 0.34109658002853394, "learning_rate": 1.9152232202261802e-05, "loss": 0.573, "step": 12537 }, { "epoch": 0.265911645564251, "grad_norm": 0.3716041147708893, "learning_rate": 1.9152097816160566e-05, "loss": 0.4536, "step": 12538 }, { "epoch": 0.26593285402218403, "grad_norm": 0.30416566133499146, "learning_rate": 1.915196341988044e-05, "loss": 0.4844, "step": 12539 }, { "epoch": 0.26595406248011705, "grad_norm": 0.3293391764163971, "learning_rate": 1.9151829013421587e-05, "loss": 0.4895, "step": 12540 }, { "epoch": 0.26597527093805007, "grad_norm": 0.3267095685005188, "learning_rate": 1.9151694596784146e-05, "loss": 0.5022, "step": 12541 }, { "epoch": 0.26599647939598314, "grad_norm": 0.3534277677536011, "learning_rate": 1.915156016996827e-05, "loss": 0.4773, "step": 12542 }, { "epoch": 0.26601768785391616, "grad_norm": 0.354969322681427, "learning_rate": 1.915142573297411e-05, "loss": 0.5409, "step": 12543 }, { "epoch": 0.2660388963118492, "grad_norm": 0.3342026472091675, "learning_rate": 1.915129128580181e-05, "loss": 0.4482, "step": 12544 }, { "epoch": 0.2660601047697822, "grad_norm": 0.33569955825805664, "learning_rate": 1.9151156828451528e-05, "loss": 0.4667, "step": 12545 }, { "epoch": 0.2660813132277152, "grad_norm": 0.3354892134666443, "learning_rate": 1.9151022360923406e-05, "loss": 0.5308, "step": 12546 }, { "epoch": 0.26610252168564824, "grad_norm": 0.5759350061416626, "learning_rate": 1.9150887883217596e-05, "loss": 0.4803, "step": 12547 }, { "epoch": 0.26612373014358126, "grad_norm": 0.3820438086986542, "learning_rate": 1.915075339533425e-05, "loss": 0.5383, "step": 12548 }, { "epoch": 0.2661449386015143, "grad_norm": 4.289824485778809, "learning_rate": 1.9150618897273512e-05, "loss": 0.5153, "step": 12549 }, { "epoch": 0.2661661470594473, "grad_norm": 0.4228397011756897, "learning_rate": 1.9150484389035536e-05, "loss": 0.4774, "step": 12550 }, { "epoch": 0.2661873555173803, "grad_norm": 0.3642423450946808, "learning_rate": 1.9150349870620476e-05, "loss": 0.5299, "step": 12551 }, { "epoch": 0.26620856397531334, "grad_norm": 0.4025097191333771, "learning_rate": 1.9150215342028475e-05, "loss": 0.523, "step": 12552 }, { "epoch": 0.26622977243324636, "grad_norm": 0.35292989015579224, "learning_rate": 1.9150080803259677e-05, "loss": 0.5326, "step": 12553 }, { "epoch": 0.2662509808911794, "grad_norm": 0.3324306309223175, "learning_rate": 1.9149946254314246e-05, "loss": 0.5217, "step": 12554 }, { "epoch": 0.26627218934911245, "grad_norm": 0.3212399482727051, "learning_rate": 1.914981169519232e-05, "loss": 0.5499, "step": 12555 }, { "epoch": 0.2662933978070455, "grad_norm": 0.3655056357383728, "learning_rate": 1.9149677125894054e-05, "loss": 0.5534, "step": 12556 }, { "epoch": 0.2663146062649785, "grad_norm": 0.35317373275756836, "learning_rate": 1.91495425464196e-05, "loss": 0.4839, "step": 12557 }, { "epoch": 0.2663358147229115, "grad_norm": 0.33491548895835876, "learning_rate": 1.91494079567691e-05, "loss": 0.5594, "step": 12558 }, { "epoch": 0.26635702318084453, "grad_norm": 0.36610448360443115, "learning_rate": 1.914927335694271e-05, "loss": 0.5345, "step": 12559 }, { "epoch": 0.26637823163877755, "grad_norm": 0.34182995557785034, "learning_rate": 1.9149138746940575e-05, "loss": 0.4854, "step": 12560 }, { "epoch": 0.26639944009671057, "grad_norm": 0.3906879723072052, "learning_rate": 1.914900412676285e-05, "loss": 0.5209, "step": 12561 }, { "epoch": 0.2664206485546436, "grad_norm": 0.37084445357322693, "learning_rate": 1.9148869496409684e-05, "loss": 0.6476, "step": 12562 }, { "epoch": 0.2664418570125766, "grad_norm": 0.34820234775543213, "learning_rate": 1.9148734855881218e-05, "loss": 0.4872, "step": 12563 }, { "epoch": 0.26646306547050963, "grad_norm": 0.3505696952342987, "learning_rate": 1.9148600205177616e-05, "loss": 0.5082, "step": 12564 }, { "epoch": 0.26648427392844265, "grad_norm": 0.31698814034461975, "learning_rate": 1.9148465544299014e-05, "loss": 0.5032, "step": 12565 }, { "epoch": 0.26650548238637567, "grad_norm": 0.3412652015686035, "learning_rate": 1.9148330873245574e-05, "loss": 0.4917, "step": 12566 }, { "epoch": 0.2665266908443087, "grad_norm": 0.3166970908641815, "learning_rate": 1.9148196192017436e-05, "loss": 0.506, "step": 12567 }, { "epoch": 0.2665478993022417, "grad_norm": 0.3420940041542053, "learning_rate": 1.9148061500614755e-05, "loss": 0.5333, "step": 12568 }, { "epoch": 0.2665691077601748, "grad_norm": 0.3573279082775116, "learning_rate": 1.914792679903768e-05, "loss": 0.6318, "step": 12569 }, { "epoch": 0.2665903162181078, "grad_norm": 0.40250110626220703, "learning_rate": 1.914779208728636e-05, "loss": 0.5321, "step": 12570 }, { "epoch": 0.2666115246760408, "grad_norm": 0.36282140016555786, "learning_rate": 1.9147657365360942e-05, "loss": 0.5709, "step": 12571 }, { "epoch": 0.26663273313397384, "grad_norm": 0.33531418442726135, "learning_rate": 1.9147522633261584e-05, "loss": 0.4964, "step": 12572 }, { "epoch": 0.26665394159190686, "grad_norm": 0.3155309855937958, "learning_rate": 1.9147387890988428e-05, "loss": 0.5412, "step": 12573 }, { "epoch": 0.2666751500498399, "grad_norm": 0.34522733092308044, "learning_rate": 1.9147253138541627e-05, "loss": 0.596, "step": 12574 }, { "epoch": 0.2666963585077729, "grad_norm": 0.34980645775794983, "learning_rate": 1.914711837592133e-05, "loss": 0.4774, "step": 12575 }, { "epoch": 0.2667175669657059, "grad_norm": 0.333913654088974, "learning_rate": 1.9146983603127687e-05, "loss": 0.4631, "step": 12576 }, { "epoch": 0.26673877542363894, "grad_norm": 0.5505841970443726, "learning_rate": 1.914684882016085e-05, "loss": 0.5274, "step": 12577 }, { "epoch": 0.26675998388157196, "grad_norm": 0.34318262338638306, "learning_rate": 1.9146714027020967e-05, "loss": 0.5108, "step": 12578 }, { "epoch": 0.266781192339505, "grad_norm": 0.3547036051750183, "learning_rate": 1.9146579223708188e-05, "loss": 0.5492, "step": 12579 }, { "epoch": 0.266802400797438, "grad_norm": 0.31698983907699585, "learning_rate": 1.914644441022266e-05, "loss": 0.5177, "step": 12580 }, { "epoch": 0.266823609255371, "grad_norm": 0.3527694046497345, "learning_rate": 1.914630958656454e-05, "loss": 0.542, "step": 12581 }, { "epoch": 0.26684481771330404, "grad_norm": 0.37741753458976746, "learning_rate": 1.9146174752733974e-05, "loss": 0.5422, "step": 12582 }, { "epoch": 0.2668660261712371, "grad_norm": 0.33478426933288574, "learning_rate": 1.9146039908731108e-05, "loss": 0.4892, "step": 12583 }, { "epoch": 0.26688723462917013, "grad_norm": 0.3520762622356415, "learning_rate": 1.91459050545561e-05, "loss": 0.558, "step": 12584 }, { "epoch": 0.26690844308710315, "grad_norm": 0.3450193703174591, "learning_rate": 1.9145770190209095e-05, "loss": 0.5087, "step": 12585 }, { "epoch": 0.2669296515450362, "grad_norm": 0.4129871129989624, "learning_rate": 1.9145635315690244e-05, "loss": 0.5558, "step": 12586 }, { "epoch": 0.2669508600029692, "grad_norm": 0.4317168891429901, "learning_rate": 1.9145500430999693e-05, "loss": 0.5836, "step": 12587 }, { "epoch": 0.2669720684609022, "grad_norm": 0.3480479121208191, "learning_rate": 1.91453655361376e-05, "loss": 0.494, "step": 12588 }, { "epoch": 0.26699327691883523, "grad_norm": 0.44778650999069214, "learning_rate": 1.914523063110411e-05, "loss": 0.4917, "step": 12589 }, { "epoch": 0.26701448537676825, "grad_norm": 0.3005281984806061, "learning_rate": 1.9145095715899372e-05, "loss": 0.4803, "step": 12590 }, { "epoch": 0.26703569383470127, "grad_norm": 0.32137253880500793, "learning_rate": 1.914496079052354e-05, "loss": 0.4622, "step": 12591 }, { "epoch": 0.2670569022926343, "grad_norm": 0.3200558125972748, "learning_rate": 1.9144825854976763e-05, "loss": 0.5317, "step": 12592 }, { "epoch": 0.2670781107505673, "grad_norm": 0.31528180837631226, "learning_rate": 1.914469090925919e-05, "loss": 0.5041, "step": 12593 }, { "epoch": 0.26709931920850033, "grad_norm": 0.36050549149513245, "learning_rate": 1.9144555953370968e-05, "loss": 0.5764, "step": 12594 }, { "epoch": 0.26712052766643335, "grad_norm": 0.32582908868789673, "learning_rate": 1.9144420987312253e-05, "loss": 0.4932, "step": 12595 }, { "epoch": 0.2671417361243664, "grad_norm": 0.3339994549751282, "learning_rate": 1.9144286011083193e-05, "loss": 0.4634, "step": 12596 }, { "epoch": 0.26716294458229944, "grad_norm": 0.3688565194606781, "learning_rate": 1.9144151024683934e-05, "loss": 0.4527, "step": 12597 }, { "epoch": 0.26718415304023246, "grad_norm": 0.3805306553840637, "learning_rate": 1.9144016028114635e-05, "loss": 0.6031, "step": 12598 }, { "epoch": 0.2672053614981655, "grad_norm": 0.3330581486225128, "learning_rate": 1.9143881021375435e-05, "loss": 0.49, "step": 12599 }, { "epoch": 0.2672265699560985, "grad_norm": 0.435838907957077, "learning_rate": 1.9143746004466493e-05, "loss": 0.5515, "step": 12600 }, { "epoch": 0.2672477784140315, "grad_norm": 0.3141261339187622, "learning_rate": 1.9143610977387955e-05, "loss": 0.5317, "step": 12601 }, { "epoch": 0.26726898687196454, "grad_norm": 0.33379149436950684, "learning_rate": 1.9143475940139977e-05, "loss": 0.455, "step": 12602 }, { "epoch": 0.26729019532989756, "grad_norm": 0.4186227023601532, "learning_rate": 1.9143340892722697e-05, "loss": 0.4717, "step": 12603 }, { "epoch": 0.2673114037878306, "grad_norm": 0.3484274744987488, "learning_rate": 1.9143205835136275e-05, "loss": 0.5011, "step": 12604 }, { "epoch": 0.2673326122457636, "grad_norm": 0.3588870167732239, "learning_rate": 1.9143070767380863e-05, "loss": 0.4899, "step": 12605 }, { "epoch": 0.2673538207036966, "grad_norm": 0.3374716639518738, "learning_rate": 1.9142935689456604e-05, "loss": 0.4535, "step": 12606 }, { "epoch": 0.26737502916162964, "grad_norm": 0.41135525703430176, "learning_rate": 1.914280060136365e-05, "loss": 0.5628, "step": 12607 }, { "epoch": 0.26739623761956266, "grad_norm": 0.4042041599750519, "learning_rate": 1.9142665503102154e-05, "loss": 0.5015, "step": 12608 }, { "epoch": 0.2674174460774957, "grad_norm": 0.3316798508167267, "learning_rate": 1.9142530394672265e-05, "loss": 0.4956, "step": 12609 }, { "epoch": 0.26743865453542875, "grad_norm": 0.35058677196502686, "learning_rate": 1.9142395276074134e-05, "loss": 0.5246, "step": 12610 }, { "epoch": 0.2674598629933618, "grad_norm": 0.31507644057273865, "learning_rate": 1.914226014730791e-05, "loss": 0.5017, "step": 12611 }, { "epoch": 0.2674810714512948, "grad_norm": 0.3244935870170593, "learning_rate": 1.9142125008373743e-05, "loss": 0.5099, "step": 12612 }, { "epoch": 0.2675022799092278, "grad_norm": 0.38667553663253784, "learning_rate": 1.9141989859271785e-05, "loss": 0.4842, "step": 12613 }, { "epoch": 0.26752348836716083, "grad_norm": 0.3430503010749817, "learning_rate": 1.914185470000218e-05, "loss": 0.5915, "step": 12614 }, { "epoch": 0.26754469682509385, "grad_norm": 0.34222811460494995, "learning_rate": 1.914171953056509e-05, "loss": 0.5924, "step": 12615 }, { "epoch": 0.2675659052830269, "grad_norm": 0.33056893944740295, "learning_rate": 1.9141584350960657e-05, "loss": 0.5213, "step": 12616 }, { "epoch": 0.2675871137409599, "grad_norm": 0.36417487263679504, "learning_rate": 1.9141449161189035e-05, "loss": 0.4731, "step": 12617 }, { "epoch": 0.2676083221988929, "grad_norm": 0.3509877324104309, "learning_rate": 1.914131396125037e-05, "loss": 0.4797, "step": 12618 }, { "epoch": 0.26762953065682593, "grad_norm": 0.31138768792152405, "learning_rate": 1.9141178751144815e-05, "loss": 0.4508, "step": 12619 }, { "epoch": 0.26765073911475895, "grad_norm": 0.3542878031730652, "learning_rate": 1.9141043530872526e-05, "loss": 0.4795, "step": 12620 }, { "epoch": 0.26767194757269197, "grad_norm": 0.3381969630718231, "learning_rate": 1.9140908300433642e-05, "loss": 0.5637, "step": 12621 }, { "epoch": 0.267693156030625, "grad_norm": 0.3600009083747864, "learning_rate": 1.914077305982832e-05, "loss": 0.5313, "step": 12622 }, { "epoch": 0.267714364488558, "grad_norm": 0.35285085439682007, "learning_rate": 1.9140637809056712e-05, "loss": 0.488, "step": 12623 }, { "epoch": 0.2677355729464911, "grad_norm": 0.3298407196998596, "learning_rate": 1.9140502548118964e-05, "loss": 0.588, "step": 12624 }, { "epoch": 0.2677567814044241, "grad_norm": 0.3422733545303345, "learning_rate": 1.9140367277015227e-05, "loss": 0.4966, "step": 12625 }, { "epoch": 0.2677779898623571, "grad_norm": 0.3104584813117981, "learning_rate": 1.9140231995745653e-05, "loss": 0.5152, "step": 12626 }, { "epoch": 0.26779919832029014, "grad_norm": 0.3186325430870056, "learning_rate": 1.9140096704310395e-05, "loss": 0.4907, "step": 12627 }, { "epoch": 0.26782040677822316, "grad_norm": 0.35722315311431885, "learning_rate": 1.91399614027096e-05, "loss": 0.546, "step": 12628 }, { "epoch": 0.2678416152361562, "grad_norm": 0.3280453085899353, "learning_rate": 1.9139826090943417e-05, "loss": 0.4675, "step": 12629 }, { "epoch": 0.2678628236940892, "grad_norm": 0.30490565299987793, "learning_rate": 1.9139690769012002e-05, "loss": 0.5454, "step": 12630 }, { "epoch": 0.2678840321520222, "grad_norm": 0.3198140561580658, "learning_rate": 1.9139555436915504e-05, "loss": 0.5132, "step": 12631 }, { "epoch": 0.26790524060995524, "grad_norm": 0.3594515025615692, "learning_rate": 1.9139420094654065e-05, "loss": 0.6071, "step": 12632 }, { "epoch": 0.26792644906788826, "grad_norm": 0.33879297971725464, "learning_rate": 1.9139284742227847e-05, "loss": 0.5676, "step": 12633 }, { "epoch": 0.2679476575258213, "grad_norm": 0.40148913860321045, "learning_rate": 1.9139149379636998e-05, "loss": 0.4758, "step": 12634 }, { "epoch": 0.2679688659837543, "grad_norm": 0.32572463154792786, "learning_rate": 1.913901400688166e-05, "loss": 0.5021, "step": 12635 }, { "epoch": 0.2679900744416873, "grad_norm": 0.42583972215652466, "learning_rate": 1.9138878623961993e-05, "loss": 0.4763, "step": 12636 }, { "epoch": 0.2680112828996204, "grad_norm": 0.3586762845516205, "learning_rate": 1.9138743230878147e-05, "loss": 0.5341, "step": 12637 }, { "epoch": 0.2680324913575534, "grad_norm": 0.3434600830078125, "learning_rate": 1.9138607827630268e-05, "loss": 0.5494, "step": 12638 }, { "epoch": 0.26805369981548643, "grad_norm": 0.33176296949386597, "learning_rate": 1.913847241421851e-05, "loss": 0.4665, "step": 12639 }, { "epoch": 0.26807490827341945, "grad_norm": 0.7582672834396362, "learning_rate": 1.9138336990643024e-05, "loss": 0.5232, "step": 12640 }, { "epoch": 0.2680961167313525, "grad_norm": 0.3377721309661865, "learning_rate": 1.9138201556903955e-05, "loss": 0.5407, "step": 12641 }, { "epoch": 0.2681173251892855, "grad_norm": 0.33432021737098694, "learning_rate": 1.913806611300146e-05, "loss": 0.57, "step": 12642 }, { "epoch": 0.2681385336472185, "grad_norm": 0.4773429036140442, "learning_rate": 1.9137930658935685e-05, "loss": 0.4782, "step": 12643 }, { "epoch": 0.26815974210515153, "grad_norm": 0.34685176610946655, "learning_rate": 1.9137795194706784e-05, "loss": 0.4405, "step": 12644 }, { "epoch": 0.26818095056308455, "grad_norm": 0.32054346799850464, "learning_rate": 1.9137659720314907e-05, "loss": 0.4893, "step": 12645 }, { "epoch": 0.26820215902101757, "grad_norm": 0.3329296410083771, "learning_rate": 1.9137524235760206e-05, "loss": 0.5182, "step": 12646 }, { "epoch": 0.2682233674789506, "grad_norm": 0.3052043318748474, "learning_rate": 1.9137388741042828e-05, "loss": 0.4623, "step": 12647 }, { "epoch": 0.2682445759368836, "grad_norm": 0.42560499906539917, "learning_rate": 1.9137253236162925e-05, "loss": 0.4249, "step": 12648 }, { "epoch": 0.26826578439481663, "grad_norm": 0.3368856906890869, "learning_rate": 1.913711772112065e-05, "loss": 0.4855, "step": 12649 }, { "epoch": 0.26828699285274965, "grad_norm": 0.46028196811676025, "learning_rate": 1.913698219591615e-05, "loss": 0.4745, "step": 12650 }, { "epoch": 0.2683082013106827, "grad_norm": 0.3163427412509918, "learning_rate": 1.913684666054958e-05, "loss": 0.4811, "step": 12651 }, { "epoch": 0.26832940976861575, "grad_norm": 0.578857958316803, "learning_rate": 1.9136711115021087e-05, "loss": 0.548, "step": 12652 }, { "epoch": 0.26835061822654876, "grad_norm": 0.32535097002983093, "learning_rate": 1.9136575559330825e-05, "loss": 0.5066, "step": 12653 }, { "epoch": 0.2683718266844818, "grad_norm": 0.3216809034347534, "learning_rate": 1.913643999347894e-05, "loss": 0.5792, "step": 12654 }, { "epoch": 0.2683930351424148, "grad_norm": 0.35678917169570923, "learning_rate": 1.9136304417465588e-05, "loss": 0.6008, "step": 12655 }, { "epoch": 0.2684142436003478, "grad_norm": 0.368280827999115, "learning_rate": 1.9136168831290915e-05, "loss": 0.548, "step": 12656 }, { "epoch": 0.26843545205828084, "grad_norm": 0.36863091588020325, "learning_rate": 1.913603323495508e-05, "loss": 0.5089, "step": 12657 }, { "epoch": 0.26845666051621386, "grad_norm": 0.3173634111881256, "learning_rate": 1.9135897628458222e-05, "loss": 0.523, "step": 12658 }, { "epoch": 0.2684778689741469, "grad_norm": 0.35457876324653625, "learning_rate": 1.91357620118005e-05, "loss": 0.4687, "step": 12659 }, { "epoch": 0.2684990774320799, "grad_norm": 0.6970980167388916, "learning_rate": 1.9135626384982066e-05, "loss": 0.4623, "step": 12660 }, { "epoch": 0.2685202858900129, "grad_norm": 0.35929548740386963, "learning_rate": 1.9135490748003065e-05, "loss": 0.5247, "step": 12661 }, { "epoch": 0.26854149434794594, "grad_norm": 0.34913167357444763, "learning_rate": 1.913535510086365e-05, "loss": 0.499, "step": 12662 }, { "epoch": 0.26856270280587896, "grad_norm": 0.3302956521511078, "learning_rate": 1.9135219443563976e-05, "loss": 0.5736, "step": 12663 }, { "epoch": 0.268583911263812, "grad_norm": 0.3485736548900604, "learning_rate": 1.9135083776104183e-05, "loss": 0.5456, "step": 12664 }, { "epoch": 0.26860511972174506, "grad_norm": 0.35592371225357056, "learning_rate": 1.9134948098484436e-05, "loss": 0.5275, "step": 12665 }, { "epoch": 0.2686263281796781, "grad_norm": 0.3328423798084259, "learning_rate": 1.9134812410704878e-05, "loss": 0.5184, "step": 12666 }, { "epoch": 0.2686475366376111, "grad_norm": 0.32581591606140137, "learning_rate": 1.9134676712765658e-05, "loss": 0.5117, "step": 12667 }, { "epoch": 0.2686687450955441, "grad_norm": 0.291299968957901, "learning_rate": 1.913454100466693e-05, "loss": 0.3948, "step": 12668 }, { "epoch": 0.26868995355347713, "grad_norm": 0.4042912423610687, "learning_rate": 1.913440528640885e-05, "loss": 0.5569, "step": 12669 }, { "epoch": 0.26871116201141015, "grad_norm": 0.3235938847064972, "learning_rate": 1.913426955799156e-05, "loss": 0.4762, "step": 12670 }, { "epoch": 0.2687323704693432, "grad_norm": 0.35650113224983215, "learning_rate": 1.9134133819415215e-05, "loss": 0.4539, "step": 12671 }, { "epoch": 0.2687535789272762, "grad_norm": 0.6321651935577393, "learning_rate": 1.9133998070679968e-05, "loss": 0.485, "step": 12672 }, { "epoch": 0.2687747873852092, "grad_norm": 0.3611052334308624, "learning_rate": 1.9133862311785966e-05, "loss": 0.5036, "step": 12673 }, { "epoch": 0.26879599584314223, "grad_norm": 0.3173419237136841, "learning_rate": 1.9133726542733356e-05, "loss": 0.4776, "step": 12674 }, { "epoch": 0.26881720430107525, "grad_norm": 0.29347383975982666, "learning_rate": 1.9133590763522304e-05, "loss": 0.4699, "step": 12675 }, { "epoch": 0.26883841275900827, "grad_norm": 0.3692481219768524, "learning_rate": 1.9133454974152945e-05, "loss": 0.6, "step": 12676 }, { "epoch": 0.2688596212169413, "grad_norm": 0.3390677869319916, "learning_rate": 1.913331917462544e-05, "loss": 0.5183, "step": 12677 }, { "epoch": 0.26888082967487437, "grad_norm": 0.3279251456260681, "learning_rate": 1.9133183364939936e-05, "loss": 0.5428, "step": 12678 }, { "epoch": 0.2689020381328074, "grad_norm": 0.34973302483558655, "learning_rate": 1.9133047545096584e-05, "loss": 0.5081, "step": 12679 }, { "epoch": 0.2689232465907404, "grad_norm": 0.37538641691207886, "learning_rate": 1.9132911715095536e-05, "loss": 0.4966, "step": 12680 }, { "epoch": 0.2689444550486734, "grad_norm": 0.3384019434452057, "learning_rate": 1.9132775874936946e-05, "loss": 0.4723, "step": 12681 }, { "epoch": 0.26896566350660644, "grad_norm": 0.3138086199760437, "learning_rate": 1.9132640024620957e-05, "loss": 0.5217, "step": 12682 }, { "epoch": 0.26898687196453946, "grad_norm": 1.1490293741226196, "learning_rate": 1.913250416414773e-05, "loss": 0.5526, "step": 12683 }, { "epoch": 0.2690080804224725, "grad_norm": 0.32523852586746216, "learning_rate": 1.9132368293517407e-05, "loss": 0.5075, "step": 12684 }, { "epoch": 0.2690292888804055, "grad_norm": 0.3314493000507355, "learning_rate": 1.9132232412730146e-05, "loss": 0.5065, "step": 12685 }, { "epoch": 0.2690504973383385, "grad_norm": 0.35420891642570496, "learning_rate": 1.9132096521786097e-05, "loss": 0.5835, "step": 12686 }, { "epoch": 0.26907170579627154, "grad_norm": 0.3480376601219177, "learning_rate": 1.9131960620685404e-05, "loss": 0.494, "step": 12687 }, { "epoch": 0.26909291425420456, "grad_norm": 0.3258228600025177, "learning_rate": 1.9131824709428228e-05, "loss": 0.6044, "step": 12688 }, { "epoch": 0.2691141227121376, "grad_norm": 0.38255512714385986, "learning_rate": 1.9131688788014716e-05, "loss": 0.5035, "step": 12689 }, { "epoch": 0.2691353311700706, "grad_norm": 0.4171237647533417, "learning_rate": 1.913155285644502e-05, "loss": 0.5029, "step": 12690 }, { "epoch": 0.2691565396280036, "grad_norm": 0.32771316170692444, "learning_rate": 1.9131416914719284e-05, "loss": 0.5135, "step": 12691 }, { "epoch": 0.2691777480859367, "grad_norm": 0.39349111914634705, "learning_rate": 1.9131280962837672e-05, "loss": 0.4736, "step": 12692 }, { "epoch": 0.2691989565438697, "grad_norm": 0.3212546110153198, "learning_rate": 1.9131145000800324e-05, "loss": 0.5201, "step": 12693 }, { "epoch": 0.26922016500180274, "grad_norm": 0.35707855224609375, "learning_rate": 1.91310090286074e-05, "loss": 0.6272, "step": 12694 }, { "epoch": 0.26924137345973576, "grad_norm": 0.3727127015590668, "learning_rate": 1.9130873046259047e-05, "loss": 0.4936, "step": 12695 }, { "epoch": 0.2692625819176688, "grad_norm": 0.3570495843887329, "learning_rate": 1.9130737053755412e-05, "loss": 0.6218, "step": 12696 }, { "epoch": 0.2692837903756018, "grad_norm": 0.36188170313835144, "learning_rate": 1.9130601051096655e-05, "loss": 0.575, "step": 12697 }, { "epoch": 0.2693049988335348, "grad_norm": 0.39030247926712036, "learning_rate": 1.9130465038282918e-05, "loss": 0.5554, "step": 12698 }, { "epoch": 0.26932620729146783, "grad_norm": 0.5172110795974731, "learning_rate": 1.913032901531436e-05, "loss": 0.5375, "step": 12699 }, { "epoch": 0.26934741574940085, "grad_norm": 0.31288087368011475, "learning_rate": 1.913019298219113e-05, "loss": 0.4729, "step": 12700 }, { "epoch": 0.2693686242073339, "grad_norm": 0.3471790552139282, "learning_rate": 1.913005693891338e-05, "loss": 0.5293, "step": 12701 }, { "epoch": 0.2693898326652669, "grad_norm": 0.30232730507850647, "learning_rate": 1.9129920885481256e-05, "loss": 0.4763, "step": 12702 }, { "epoch": 0.2694110411231999, "grad_norm": 0.31995710730552673, "learning_rate": 1.9129784821894918e-05, "loss": 0.4533, "step": 12703 }, { "epoch": 0.26943224958113293, "grad_norm": 0.32891467213630676, "learning_rate": 1.912964874815451e-05, "loss": 0.5046, "step": 12704 }, { "epoch": 0.269453458039066, "grad_norm": 0.33419346809387207, "learning_rate": 1.9129512664260185e-05, "loss": 0.4419, "step": 12705 }, { "epoch": 0.269474666496999, "grad_norm": 0.35087499022483826, "learning_rate": 1.9129376570212095e-05, "loss": 0.6141, "step": 12706 }, { "epoch": 0.26949587495493205, "grad_norm": 0.32829728722572327, "learning_rate": 1.9129240466010395e-05, "loss": 0.4841, "step": 12707 }, { "epoch": 0.26951708341286507, "grad_norm": 0.29924747347831726, "learning_rate": 1.912910435165523e-05, "loss": 0.4397, "step": 12708 }, { "epoch": 0.2695382918707981, "grad_norm": 0.6463362574577332, "learning_rate": 1.9128968227146757e-05, "loss": 0.6357, "step": 12709 }, { "epoch": 0.2695595003287311, "grad_norm": 0.35619956254959106, "learning_rate": 1.912883209248512e-05, "loss": 0.536, "step": 12710 }, { "epoch": 0.2695807087866641, "grad_norm": 0.44422850012779236, "learning_rate": 1.912869594767048e-05, "loss": 0.4637, "step": 12711 }, { "epoch": 0.26960191724459714, "grad_norm": 0.37696370482444763, "learning_rate": 1.9128559792702983e-05, "loss": 0.5729, "step": 12712 }, { "epoch": 0.26962312570253016, "grad_norm": 0.35306891798973083, "learning_rate": 1.9128423627582778e-05, "loss": 0.4846, "step": 12713 }, { "epoch": 0.2696443341604632, "grad_norm": 0.4444800615310669, "learning_rate": 1.9128287452310023e-05, "loss": 0.4336, "step": 12714 }, { "epoch": 0.2696655426183962, "grad_norm": 0.31600648164749146, "learning_rate": 1.9128151266884866e-05, "loss": 0.5652, "step": 12715 }, { "epoch": 0.2696867510763292, "grad_norm": 0.3649246096611023, "learning_rate": 1.9128015071307457e-05, "loss": 0.5094, "step": 12716 }, { "epoch": 0.26970795953426224, "grad_norm": 0.33243927359580994, "learning_rate": 1.9127878865577948e-05, "loss": 0.4629, "step": 12717 }, { "epoch": 0.26972916799219526, "grad_norm": 0.3478706181049347, "learning_rate": 1.9127742649696495e-05, "loss": 0.4523, "step": 12718 }, { "epoch": 0.26975037645012834, "grad_norm": 0.3542018532752991, "learning_rate": 1.9127606423663243e-05, "loss": 0.513, "step": 12719 }, { "epoch": 0.26977158490806136, "grad_norm": 0.3534497022628784, "learning_rate": 1.9127470187478346e-05, "loss": 0.5037, "step": 12720 }, { "epoch": 0.2697927933659944, "grad_norm": 0.5416589975357056, "learning_rate": 1.912733394114196e-05, "loss": 0.4829, "step": 12721 }, { "epoch": 0.2698140018239274, "grad_norm": 0.6284846663475037, "learning_rate": 1.9127197684654225e-05, "loss": 0.5488, "step": 12722 }, { "epoch": 0.2698352102818604, "grad_norm": 23.904726028442383, "learning_rate": 1.9127061418015308e-05, "loss": 0.4844, "step": 12723 }, { "epoch": 0.26985641873979344, "grad_norm": 0.34766891598701477, "learning_rate": 1.912692514122535e-05, "loss": 0.6039, "step": 12724 }, { "epoch": 0.26987762719772646, "grad_norm": 0.33009886741638184, "learning_rate": 1.9126788854284503e-05, "loss": 0.4816, "step": 12725 }, { "epoch": 0.2698988356556595, "grad_norm": 0.3304844796657562, "learning_rate": 1.9126652557192923e-05, "loss": 0.5358, "step": 12726 }, { "epoch": 0.2699200441135925, "grad_norm": 0.3580032289028168, "learning_rate": 1.9126516249950757e-05, "loss": 0.4305, "step": 12727 }, { "epoch": 0.2699412525715255, "grad_norm": 0.3920481503009796, "learning_rate": 1.9126379932558158e-05, "loss": 0.505, "step": 12728 }, { "epoch": 0.26996246102945853, "grad_norm": 0.3150225281715393, "learning_rate": 1.9126243605015283e-05, "loss": 0.477, "step": 12729 }, { "epoch": 0.26998366948739155, "grad_norm": 0.3210943043231964, "learning_rate": 1.9126107267322276e-05, "loss": 0.4391, "step": 12730 }, { "epoch": 0.2700048779453246, "grad_norm": 0.32847797870635986, "learning_rate": 1.9125970919479293e-05, "loss": 0.5097, "step": 12731 }, { "epoch": 0.2700260864032576, "grad_norm": 0.34646567702293396, "learning_rate": 1.9125834561486484e-05, "loss": 0.5002, "step": 12732 }, { "epoch": 0.27004729486119067, "grad_norm": 0.33013755083084106, "learning_rate": 1.9125698193344e-05, "loss": 0.4502, "step": 12733 }, { "epoch": 0.2700685033191237, "grad_norm": 0.9662557244300842, "learning_rate": 1.9125561815051994e-05, "loss": 0.5752, "step": 12734 }, { "epoch": 0.2700897117770567, "grad_norm": 0.3301071524620056, "learning_rate": 1.912542542661062e-05, "loss": 0.5407, "step": 12735 }, { "epoch": 0.2701109202349897, "grad_norm": 0.3487231135368347, "learning_rate": 1.9125289028020027e-05, "loss": 0.5629, "step": 12736 }, { "epoch": 0.27013212869292275, "grad_norm": 0.29656386375427246, "learning_rate": 1.9125152619280363e-05, "loss": 0.4124, "step": 12737 }, { "epoch": 0.27015333715085577, "grad_norm": 0.33729246258735657, "learning_rate": 1.9125016200391788e-05, "loss": 0.467, "step": 12738 }, { "epoch": 0.2701745456087888, "grad_norm": 0.4131844639778137, "learning_rate": 1.9124879771354446e-05, "loss": 0.5281, "step": 12739 }, { "epoch": 0.2701957540667218, "grad_norm": 0.3759634792804718, "learning_rate": 1.912474333216849e-05, "loss": 0.5446, "step": 12740 }, { "epoch": 0.2702169625246548, "grad_norm": 0.3566731810569763, "learning_rate": 1.912460688283408e-05, "loss": 0.5817, "step": 12741 }, { "epoch": 0.27023817098258784, "grad_norm": 0.3829845190048218, "learning_rate": 1.9124470423351357e-05, "loss": 0.4681, "step": 12742 }, { "epoch": 0.27025937944052086, "grad_norm": 0.32248181104660034, "learning_rate": 1.9124333953720477e-05, "loss": 0.5296, "step": 12743 }, { "epoch": 0.2702805878984539, "grad_norm": 0.3577706813812256, "learning_rate": 1.9124197473941596e-05, "loss": 0.4905, "step": 12744 }, { "epoch": 0.2703017963563869, "grad_norm": 0.357300341129303, "learning_rate": 1.9124060984014857e-05, "loss": 0.4759, "step": 12745 }, { "epoch": 0.27032300481432, "grad_norm": 0.341334730386734, "learning_rate": 1.912392448394042e-05, "loss": 0.5406, "step": 12746 }, { "epoch": 0.270344213272253, "grad_norm": 0.3676755130290985, "learning_rate": 1.9123787973718433e-05, "loss": 0.6076, "step": 12747 }, { "epoch": 0.270365421730186, "grad_norm": 0.3341754674911499, "learning_rate": 1.9123651453349046e-05, "loss": 0.5166, "step": 12748 }, { "epoch": 0.27038663018811904, "grad_norm": 0.350285142660141, "learning_rate": 1.9123514922832417e-05, "loss": 0.4472, "step": 12749 }, { "epoch": 0.27040783864605206, "grad_norm": 0.3649275302886963, "learning_rate": 1.912337838216869e-05, "loss": 0.5309, "step": 12750 }, { "epoch": 0.2704290471039851, "grad_norm": 0.615014910697937, "learning_rate": 1.9123241831358022e-05, "loss": 0.4882, "step": 12751 }, { "epoch": 0.2704502555619181, "grad_norm": 0.3643299639225006, "learning_rate": 1.9123105270400568e-05, "loss": 0.5272, "step": 12752 }, { "epoch": 0.2704714640198511, "grad_norm": 0.32284170389175415, "learning_rate": 1.9122968699296468e-05, "loss": 0.5625, "step": 12753 }, { "epoch": 0.27049267247778414, "grad_norm": 0.30947938561439514, "learning_rate": 1.9122832118045886e-05, "loss": 0.5, "step": 12754 }, { "epoch": 0.27051388093571715, "grad_norm": 2.9758644104003906, "learning_rate": 1.9122695526648968e-05, "loss": 0.6061, "step": 12755 }, { "epoch": 0.2705350893936502, "grad_norm": 0.32819125056266785, "learning_rate": 1.912255892510587e-05, "loss": 0.5005, "step": 12756 }, { "epoch": 0.2705562978515832, "grad_norm": 0.39859738945961, "learning_rate": 1.912242231341674e-05, "loss": 0.6443, "step": 12757 }, { "epoch": 0.2705775063095162, "grad_norm": 0.3230903148651123, "learning_rate": 1.912228569158173e-05, "loss": 0.4848, "step": 12758 }, { "epoch": 0.27059871476744923, "grad_norm": 0.38200798630714417, "learning_rate": 1.912214905960099e-05, "loss": 0.583, "step": 12759 }, { "epoch": 0.2706199232253823, "grad_norm": 0.3631545901298523, "learning_rate": 1.912201241747468e-05, "loss": 0.5506, "step": 12760 }, { "epoch": 0.27064113168331533, "grad_norm": 0.411568820476532, "learning_rate": 1.9121875765202946e-05, "loss": 0.5072, "step": 12761 }, { "epoch": 0.27066234014124835, "grad_norm": 0.301984041929245, "learning_rate": 1.912173910278594e-05, "loss": 0.5331, "step": 12762 }, { "epoch": 0.27068354859918137, "grad_norm": 0.3492373824119568, "learning_rate": 1.9121602430223815e-05, "loss": 0.5873, "step": 12763 }, { "epoch": 0.2707047570571144, "grad_norm": 0.28877854347229004, "learning_rate": 1.9121465747516724e-05, "loss": 0.4593, "step": 12764 }, { "epoch": 0.2707259655150474, "grad_norm": 0.3183702230453491, "learning_rate": 1.9121329054664818e-05, "loss": 0.5168, "step": 12765 }, { "epoch": 0.2707471739729804, "grad_norm": 0.549979567527771, "learning_rate": 1.912119235166825e-05, "loss": 0.4805, "step": 12766 }, { "epoch": 0.27076838243091345, "grad_norm": 0.3732004761695862, "learning_rate": 1.9121055638527167e-05, "loss": 0.5225, "step": 12767 }, { "epoch": 0.27078959088884647, "grad_norm": 0.3096749186515808, "learning_rate": 1.912091891524173e-05, "loss": 0.5755, "step": 12768 }, { "epoch": 0.2708107993467795, "grad_norm": 0.35792338848114014, "learning_rate": 1.9120782181812084e-05, "loss": 0.5101, "step": 12769 }, { "epoch": 0.2708320078047125, "grad_norm": 0.355674684047699, "learning_rate": 1.9120645438238385e-05, "loss": 0.5305, "step": 12770 }, { "epoch": 0.2708532162626455, "grad_norm": 0.43277767300605774, "learning_rate": 1.912050868452078e-05, "loss": 0.4529, "step": 12771 }, { "epoch": 0.27087442472057854, "grad_norm": 0.385925829410553, "learning_rate": 1.9120371920659428e-05, "loss": 0.4985, "step": 12772 }, { "epoch": 0.27089563317851156, "grad_norm": 0.35245174169540405, "learning_rate": 1.9120235146654474e-05, "loss": 0.5863, "step": 12773 }, { "epoch": 0.27091684163644464, "grad_norm": 0.35857272148132324, "learning_rate": 1.9120098362506074e-05, "loss": 0.5653, "step": 12774 }, { "epoch": 0.27093805009437766, "grad_norm": 0.32203060388565063, "learning_rate": 1.9119961568214383e-05, "loss": 0.5213, "step": 12775 }, { "epoch": 0.2709592585523107, "grad_norm": 0.33096495270729065, "learning_rate": 1.9119824763779548e-05, "loss": 0.5538, "step": 12776 }, { "epoch": 0.2709804670102437, "grad_norm": 0.36918729543685913, "learning_rate": 1.9119687949201723e-05, "loss": 0.525, "step": 12777 }, { "epoch": 0.2710016754681767, "grad_norm": 0.2962694466114044, "learning_rate": 1.911955112448106e-05, "loss": 0.3833, "step": 12778 }, { "epoch": 0.27102288392610974, "grad_norm": 0.3233494758605957, "learning_rate": 1.9119414289617712e-05, "loss": 0.5139, "step": 12779 }, { "epoch": 0.27104409238404276, "grad_norm": 0.37573227286338806, "learning_rate": 1.911927744461183e-05, "loss": 0.5203, "step": 12780 }, { "epoch": 0.2710653008419758, "grad_norm": 0.380230575799942, "learning_rate": 1.911914058946357e-05, "loss": 0.4569, "step": 12781 }, { "epoch": 0.2710865092999088, "grad_norm": 0.29656723141670227, "learning_rate": 1.911900372417308e-05, "loss": 0.4913, "step": 12782 }, { "epoch": 0.2711077177578418, "grad_norm": 0.3047923147678375, "learning_rate": 1.9118866848740513e-05, "loss": 0.5315, "step": 12783 }, { "epoch": 0.27112892621577483, "grad_norm": 0.32793134450912476, "learning_rate": 1.911872996316602e-05, "loss": 0.5563, "step": 12784 }, { "epoch": 0.27115013467370785, "grad_norm": 0.3397235870361328, "learning_rate": 1.9118593067449756e-05, "loss": 0.5027, "step": 12785 }, { "epoch": 0.2711713431316409, "grad_norm": 0.34063059091567993, "learning_rate": 1.911845616159187e-05, "loss": 0.5012, "step": 12786 }, { "epoch": 0.27119255158957395, "grad_norm": 0.3302628993988037, "learning_rate": 1.911831924559252e-05, "loss": 0.4809, "step": 12787 }, { "epoch": 0.27121376004750697, "grad_norm": 0.5150894522666931, "learning_rate": 1.911818231945185e-05, "loss": 0.5199, "step": 12788 }, { "epoch": 0.27123496850544, "grad_norm": 0.3369142711162567, "learning_rate": 1.911804538317002e-05, "loss": 0.6017, "step": 12789 }, { "epoch": 0.271256176963373, "grad_norm": 0.28605690598487854, "learning_rate": 1.9117908436747176e-05, "loss": 0.4758, "step": 12790 }, { "epoch": 0.271277385421306, "grad_norm": 0.3235050141811371, "learning_rate": 1.9117771480183475e-05, "loss": 0.4925, "step": 12791 }, { "epoch": 0.27129859387923905, "grad_norm": 0.33072537183761597, "learning_rate": 1.9117634513479068e-05, "loss": 0.4396, "step": 12792 }, { "epoch": 0.27131980233717207, "grad_norm": 0.33582353591918945, "learning_rate": 1.9117497536634108e-05, "loss": 0.4548, "step": 12793 }, { "epoch": 0.2713410107951051, "grad_norm": 0.33617234230041504, "learning_rate": 1.9117360549648744e-05, "loss": 0.5092, "step": 12794 }, { "epoch": 0.2713622192530381, "grad_norm": 0.38983437418937683, "learning_rate": 1.9117223552523136e-05, "loss": 0.4915, "step": 12795 }, { "epoch": 0.2713834277109711, "grad_norm": 0.4212331175804138, "learning_rate": 1.9117086545257428e-05, "loss": 0.5198, "step": 12796 }, { "epoch": 0.27140463616890415, "grad_norm": 0.31065696477890015, "learning_rate": 1.9116949527851773e-05, "loss": 0.5237, "step": 12797 }, { "epoch": 0.27142584462683716, "grad_norm": 0.3247379958629608, "learning_rate": 1.9116812500306328e-05, "loss": 0.451, "step": 12798 }, { "epoch": 0.2714470530847702, "grad_norm": 0.33483561873435974, "learning_rate": 1.9116675462621245e-05, "loss": 0.5156, "step": 12799 }, { "epoch": 0.2714682615427032, "grad_norm": 0.3397623598575592, "learning_rate": 1.9116538414796672e-05, "loss": 0.4787, "step": 12800 }, { "epoch": 0.2714894700006363, "grad_norm": 0.32808756828308105, "learning_rate": 1.9116401356832766e-05, "loss": 0.4491, "step": 12801 }, { "epoch": 0.2715106784585693, "grad_norm": 0.42489227652549744, "learning_rate": 1.911626428872968e-05, "loss": 0.5268, "step": 12802 }, { "epoch": 0.2715318869165023, "grad_norm": 0.3467337191104889, "learning_rate": 1.9116127210487557e-05, "loss": 0.5258, "step": 12803 }, { "epoch": 0.27155309537443534, "grad_norm": 0.32065677642822266, "learning_rate": 1.9115990122106563e-05, "loss": 0.5023, "step": 12804 }, { "epoch": 0.27157430383236836, "grad_norm": 0.3765344023704529, "learning_rate": 1.911585302358684e-05, "loss": 0.5525, "step": 12805 }, { "epoch": 0.2715955122903014, "grad_norm": 0.3071330189704895, "learning_rate": 1.911571591492855e-05, "loss": 0.5443, "step": 12806 }, { "epoch": 0.2716167207482344, "grad_norm": 0.3925337791442871, "learning_rate": 1.9115578796131834e-05, "loss": 0.5434, "step": 12807 }, { "epoch": 0.2716379292061674, "grad_norm": 0.34247660636901855, "learning_rate": 1.9115441667196853e-05, "loss": 0.5491, "step": 12808 }, { "epoch": 0.27165913766410044, "grad_norm": 0.35599973797798157, "learning_rate": 1.9115304528123756e-05, "loss": 0.4564, "step": 12809 }, { "epoch": 0.27168034612203346, "grad_norm": 0.4713420867919922, "learning_rate": 1.9115167378912697e-05, "loss": 0.5537, "step": 12810 }, { "epoch": 0.2717015545799665, "grad_norm": 0.3294186592102051, "learning_rate": 1.911503021956383e-05, "loss": 0.4474, "step": 12811 }, { "epoch": 0.2717227630378995, "grad_norm": 0.5893006324768066, "learning_rate": 1.9114893050077302e-05, "loss": 0.5528, "step": 12812 }, { "epoch": 0.2717439714958325, "grad_norm": 0.4216875731945038, "learning_rate": 1.9114755870453268e-05, "loss": 0.5977, "step": 12813 }, { "epoch": 0.27176517995376553, "grad_norm": 0.3517339527606964, "learning_rate": 1.9114618680691886e-05, "loss": 0.4565, "step": 12814 }, { "epoch": 0.2717863884116986, "grad_norm": 0.33263570070266724, "learning_rate": 1.91144814807933e-05, "loss": 0.5773, "step": 12815 }, { "epoch": 0.27180759686963163, "grad_norm": 0.3347584307193756, "learning_rate": 1.911434427075767e-05, "loss": 0.5144, "step": 12816 }, { "epoch": 0.27182880532756465, "grad_norm": 0.6657493114471436, "learning_rate": 1.9114207050585146e-05, "loss": 0.4625, "step": 12817 }, { "epoch": 0.27185001378549767, "grad_norm": 0.5251010060310364, "learning_rate": 1.911406982027588e-05, "loss": 0.5877, "step": 12818 }, { "epoch": 0.2718712222434307, "grad_norm": 0.3159244656562805, "learning_rate": 1.9113932579830025e-05, "loss": 0.5174, "step": 12819 }, { "epoch": 0.2718924307013637, "grad_norm": 0.3247068226337433, "learning_rate": 1.911379532924773e-05, "loss": 0.5237, "step": 12820 }, { "epoch": 0.2719136391592967, "grad_norm": 0.32406726479530334, "learning_rate": 1.9113658068529155e-05, "loss": 0.4987, "step": 12821 }, { "epoch": 0.27193484761722975, "grad_norm": 0.31753817200660706, "learning_rate": 1.9113520797674447e-05, "loss": 0.4743, "step": 12822 }, { "epoch": 0.27195605607516277, "grad_norm": 0.3352046608924866, "learning_rate": 1.911338351668376e-05, "loss": 0.5174, "step": 12823 }, { "epoch": 0.2719772645330958, "grad_norm": 0.3732999563217163, "learning_rate": 1.9113246225557248e-05, "loss": 0.5204, "step": 12824 }, { "epoch": 0.2719984729910288, "grad_norm": 0.5182197690010071, "learning_rate": 1.9113108924295065e-05, "loss": 0.5051, "step": 12825 }, { "epoch": 0.2720196814489618, "grad_norm": 0.34191417694091797, "learning_rate": 1.9112971612897356e-05, "loss": 0.4413, "step": 12826 }, { "epoch": 0.27204088990689484, "grad_norm": 0.3535633683204651, "learning_rate": 1.911283429136428e-05, "loss": 0.5589, "step": 12827 }, { "epoch": 0.2720620983648279, "grad_norm": 0.3271768391132355, "learning_rate": 1.911269695969599e-05, "loss": 0.5871, "step": 12828 }, { "epoch": 0.27208330682276094, "grad_norm": 0.33998292684555054, "learning_rate": 1.9112559617892637e-05, "loss": 0.5486, "step": 12829 }, { "epoch": 0.27210451528069396, "grad_norm": 0.3540986180305481, "learning_rate": 1.9112422265954377e-05, "loss": 0.4749, "step": 12830 }, { "epoch": 0.272125723738627, "grad_norm": 0.2907657325267792, "learning_rate": 1.911228490388136e-05, "loss": 0.4304, "step": 12831 }, { "epoch": 0.27214693219656, "grad_norm": 0.30501893162727356, "learning_rate": 1.9112147531673736e-05, "loss": 0.5634, "step": 12832 }, { "epoch": 0.272168140654493, "grad_norm": 0.37650951743125916, "learning_rate": 1.911201014933166e-05, "loss": 0.5547, "step": 12833 }, { "epoch": 0.27218934911242604, "grad_norm": 0.32634463906288147, "learning_rate": 1.911187275685529e-05, "loss": 0.5387, "step": 12834 }, { "epoch": 0.27221055757035906, "grad_norm": 0.3198651969432831, "learning_rate": 1.9111735354244772e-05, "loss": 0.4747, "step": 12835 }, { "epoch": 0.2722317660282921, "grad_norm": 0.3321762681007385, "learning_rate": 1.911159794150026e-05, "loss": 0.5212, "step": 12836 }, { "epoch": 0.2722529744862251, "grad_norm": 0.307981014251709, "learning_rate": 1.9111460518621908e-05, "loss": 0.4945, "step": 12837 }, { "epoch": 0.2722741829441581, "grad_norm": 0.33882078528404236, "learning_rate": 1.9111323085609868e-05, "loss": 0.5134, "step": 12838 }, { "epoch": 0.27229539140209114, "grad_norm": 0.3271627128124237, "learning_rate": 1.91111856424643e-05, "loss": 0.5016, "step": 12839 }, { "epoch": 0.27231659986002416, "grad_norm": 0.33453691005706787, "learning_rate": 1.9111048189185343e-05, "loss": 0.5357, "step": 12840 }, { "epoch": 0.2723378083179572, "grad_norm": 0.30558738112449646, "learning_rate": 1.911091072577316e-05, "loss": 0.4483, "step": 12841 }, { "epoch": 0.27235901677589025, "grad_norm": 0.29215314984321594, "learning_rate": 1.9110773252227902e-05, "loss": 0.563, "step": 12842 }, { "epoch": 0.27238022523382327, "grad_norm": 0.3261679410934448, "learning_rate": 1.911063576854972e-05, "loss": 0.4379, "step": 12843 }, { "epoch": 0.2724014336917563, "grad_norm": 0.32769858837127686, "learning_rate": 1.911049827473877e-05, "loss": 0.5169, "step": 12844 }, { "epoch": 0.2724226421496893, "grad_norm": 0.46311742067337036, "learning_rate": 1.91103607707952e-05, "loss": 0.5635, "step": 12845 }, { "epoch": 0.27244385060762233, "grad_norm": 0.358558714389801, "learning_rate": 1.9110223256719167e-05, "loss": 0.4661, "step": 12846 }, { "epoch": 0.27246505906555535, "grad_norm": 0.3123209476470947, "learning_rate": 1.9110085732510824e-05, "loss": 0.4882, "step": 12847 }, { "epoch": 0.27248626752348837, "grad_norm": 0.34138375520706177, "learning_rate": 1.9109948198170323e-05, "loss": 0.5034, "step": 12848 }, { "epoch": 0.2725074759814214, "grad_norm": 0.3373463749885559, "learning_rate": 1.9109810653697815e-05, "loss": 0.526, "step": 12849 }, { "epoch": 0.2725286844393544, "grad_norm": 0.36484646797180176, "learning_rate": 1.9109673099093453e-05, "loss": 0.5269, "step": 12850 }, { "epoch": 0.2725498928972874, "grad_norm": 0.4045618772506714, "learning_rate": 1.9109535534357395e-05, "loss": 0.6248, "step": 12851 }, { "epoch": 0.27257110135522045, "grad_norm": 0.3655862510204315, "learning_rate": 1.910939795948979e-05, "loss": 0.6118, "step": 12852 }, { "epoch": 0.27259230981315347, "grad_norm": 0.322267085313797, "learning_rate": 1.910926037449079e-05, "loss": 0.5224, "step": 12853 }, { "epoch": 0.2726135182710865, "grad_norm": 0.3612057566642761, "learning_rate": 1.9109122779360554e-05, "loss": 0.5329, "step": 12854 }, { "epoch": 0.2726347267290195, "grad_norm": 0.34436896443367004, "learning_rate": 1.9108985174099225e-05, "loss": 0.4754, "step": 12855 }, { "epoch": 0.2726559351869526, "grad_norm": 0.3772333562374115, "learning_rate": 1.9108847558706968e-05, "loss": 0.5763, "step": 12856 }, { "epoch": 0.2726771436448856, "grad_norm": 0.32995423674583435, "learning_rate": 1.9108709933183926e-05, "loss": 0.523, "step": 12857 }, { "epoch": 0.2726983521028186, "grad_norm": 0.35656216740608215, "learning_rate": 1.9108572297530255e-05, "loss": 0.4991, "step": 12858 }, { "epoch": 0.27271956056075164, "grad_norm": 0.3184307813644409, "learning_rate": 1.9108434651746114e-05, "loss": 0.4785, "step": 12859 }, { "epoch": 0.27274076901868466, "grad_norm": 0.4124089181423187, "learning_rate": 1.9108296995831646e-05, "loss": 0.4526, "step": 12860 }, { "epoch": 0.2727619774766177, "grad_norm": 0.318194717168808, "learning_rate": 1.9108159329787012e-05, "loss": 0.5013, "step": 12861 }, { "epoch": 0.2727831859345507, "grad_norm": 0.3108440637588501, "learning_rate": 1.9108021653612362e-05, "loss": 0.5291, "step": 12862 }, { "epoch": 0.2728043943924837, "grad_norm": 0.3551683723926544, "learning_rate": 1.9107883967307845e-05, "loss": 0.4245, "step": 12863 }, { "epoch": 0.27282560285041674, "grad_norm": 0.3661772906780243, "learning_rate": 1.9107746270873624e-05, "loss": 0.5002, "step": 12864 }, { "epoch": 0.27284681130834976, "grad_norm": 0.3325037658214569, "learning_rate": 1.9107608564309845e-05, "loss": 0.5973, "step": 12865 }, { "epoch": 0.2728680197662828, "grad_norm": 0.417203813791275, "learning_rate": 1.9107470847616662e-05, "loss": 0.4944, "step": 12866 }, { "epoch": 0.2728892282242158, "grad_norm": 0.36060312390327454, "learning_rate": 1.9107333120794233e-05, "loss": 0.4747, "step": 12867 }, { "epoch": 0.2729104366821488, "grad_norm": 0.34394416213035583, "learning_rate": 1.9107195383842703e-05, "loss": 0.4817, "step": 12868 }, { "epoch": 0.2729316451400819, "grad_norm": 0.3400554358959198, "learning_rate": 1.910705763676223e-05, "loss": 0.4645, "step": 12869 }, { "epoch": 0.2729528535980149, "grad_norm": 0.351834237575531, "learning_rate": 1.9106919879552968e-05, "loss": 0.5376, "step": 12870 }, { "epoch": 0.27297406205594793, "grad_norm": 0.318890243768692, "learning_rate": 1.9106782112215066e-05, "loss": 0.5401, "step": 12871 }, { "epoch": 0.27299527051388095, "grad_norm": 0.30847904086112976, "learning_rate": 1.9106644334748683e-05, "loss": 0.4421, "step": 12872 }, { "epoch": 0.27301647897181397, "grad_norm": 0.3369072675704956, "learning_rate": 1.910650654715397e-05, "loss": 0.5748, "step": 12873 }, { "epoch": 0.273037687429747, "grad_norm": 0.3483394682407379, "learning_rate": 1.9106368749431075e-05, "loss": 0.4458, "step": 12874 }, { "epoch": 0.27305889588768, "grad_norm": 0.2982245087623596, "learning_rate": 1.910623094158016e-05, "loss": 0.536, "step": 12875 }, { "epoch": 0.27308010434561303, "grad_norm": 0.34186726808547974, "learning_rate": 1.9106093123601373e-05, "loss": 0.5769, "step": 12876 }, { "epoch": 0.27310131280354605, "grad_norm": 0.3285791575908661, "learning_rate": 1.910595529549487e-05, "loss": 0.4891, "step": 12877 }, { "epoch": 0.27312252126147907, "grad_norm": 0.3046256899833679, "learning_rate": 1.9105817457260798e-05, "loss": 0.5269, "step": 12878 }, { "epoch": 0.2731437297194121, "grad_norm": 0.34809911251068115, "learning_rate": 1.9105679608899316e-05, "loss": 0.5134, "step": 12879 }, { "epoch": 0.2731649381773451, "grad_norm": 0.33529144525527954, "learning_rate": 1.9105541750410582e-05, "loss": 0.5354, "step": 12880 }, { "epoch": 0.2731861466352781, "grad_norm": 0.3723934590816498, "learning_rate": 1.910540388179474e-05, "loss": 0.5195, "step": 12881 }, { "epoch": 0.27320735509321115, "grad_norm": 0.32662323117256165, "learning_rate": 1.9105266003051946e-05, "loss": 0.5616, "step": 12882 }, { "epoch": 0.2732285635511442, "grad_norm": 0.3621578812599182, "learning_rate": 1.9105128114182353e-05, "loss": 0.5094, "step": 12883 }, { "epoch": 0.27324977200907724, "grad_norm": 0.33364444971084595, "learning_rate": 1.9104990215186115e-05, "loss": 0.451, "step": 12884 }, { "epoch": 0.27327098046701026, "grad_norm": 0.34729960560798645, "learning_rate": 1.910485230606339e-05, "loss": 0.5981, "step": 12885 }, { "epoch": 0.2732921889249433, "grad_norm": 0.31741029024124146, "learning_rate": 1.9104714386814324e-05, "loss": 0.4739, "step": 12886 }, { "epoch": 0.2733133973828763, "grad_norm": 0.34769436717033386, "learning_rate": 1.9104576457439077e-05, "loss": 0.5162, "step": 12887 }, { "epoch": 0.2733346058408093, "grad_norm": 0.3250182569026947, "learning_rate": 1.9104438517937794e-05, "loss": 0.5625, "step": 12888 }, { "epoch": 0.27335581429874234, "grad_norm": 0.31612956523895264, "learning_rate": 1.910430056831064e-05, "loss": 0.3738, "step": 12889 }, { "epoch": 0.27337702275667536, "grad_norm": 0.45250409841537476, "learning_rate": 1.910416260855776e-05, "loss": 0.5005, "step": 12890 }, { "epoch": 0.2733982312146084, "grad_norm": 0.33921000361442566, "learning_rate": 1.910402463867931e-05, "loss": 0.5482, "step": 12891 }, { "epoch": 0.2734194396725414, "grad_norm": 0.3426922559738159, "learning_rate": 1.910388665867544e-05, "loss": 0.5516, "step": 12892 }, { "epoch": 0.2734406481304744, "grad_norm": 0.35663822293281555, "learning_rate": 1.9103748668546306e-05, "loss": 0.5088, "step": 12893 }, { "epoch": 0.27346185658840744, "grad_norm": 0.33055174350738525, "learning_rate": 1.9103610668292063e-05, "loss": 0.5269, "step": 12894 }, { "epoch": 0.27348306504634046, "grad_norm": 0.3450264036655426, "learning_rate": 1.9103472657912866e-05, "loss": 0.5988, "step": 12895 }, { "epoch": 0.27350427350427353, "grad_norm": 0.31339848041534424, "learning_rate": 1.9103334637408864e-05, "loss": 0.4565, "step": 12896 }, { "epoch": 0.27352548196220655, "grad_norm": 0.33989444375038147, "learning_rate": 1.910319660678021e-05, "loss": 0.5441, "step": 12897 }, { "epoch": 0.27354669042013957, "grad_norm": 0.32922136783599854, "learning_rate": 1.9103058566027062e-05, "loss": 0.4606, "step": 12898 }, { "epoch": 0.2735678988780726, "grad_norm": 0.3428637981414795, "learning_rate": 1.910292051514957e-05, "loss": 0.5547, "step": 12899 }, { "epoch": 0.2735891073360056, "grad_norm": 0.3380603492259979, "learning_rate": 1.910278245414789e-05, "loss": 0.551, "step": 12900 }, { "epoch": 0.27361031579393863, "grad_norm": 0.3318720757961273, "learning_rate": 1.9102644383022176e-05, "loss": 0.5944, "step": 12901 }, { "epoch": 0.27363152425187165, "grad_norm": 0.4274733364582062, "learning_rate": 1.9102506301772576e-05, "loss": 0.5573, "step": 12902 }, { "epoch": 0.27365273270980467, "grad_norm": 0.33962100744247437, "learning_rate": 1.910236821039925e-05, "loss": 0.4489, "step": 12903 }, { "epoch": 0.2736739411677377, "grad_norm": 0.3254872262477875, "learning_rate": 1.9102230108902352e-05, "loss": 0.4669, "step": 12904 }, { "epoch": 0.2736951496256707, "grad_norm": 0.31619158387184143, "learning_rate": 1.910209199728203e-05, "loss": 0.4546, "step": 12905 }, { "epoch": 0.27371635808360373, "grad_norm": 0.5107262134552002, "learning_rate": 1.9101953875538438e-05, "loss": 0.5247, "step": 12906 }, { "epoch": 0.27373756654153675, "grad_norm": 0.44372716546058655, "learning_rate": 1.9101815743671734e-05, "loss": 0.4313, "step": 12907 }, { "epoch": 0.27375877499946977, "grad_norm": 0.37843164801597595, "learning_rate": 1.9101677601682072e-05, "loss": 0.4613, "step": 12908 }, { "epoch": 0.2737799834574028, "grad_norm": 0.3305949866771698, "learning_rate": 1.9101539449569598e-05, "loss": 0.5307, "step": 12909 }, { "epoch": 0.27380119191533586, "grad_norm": 0.46748945116996765, "learning_rate": 1.9101401287334476e-05, "loss": 0.5717, "step": 12910 }, { "epoch": 0.2738224003732689, "grad_norm": 0.3518533706665039, "learning_rate": 1.910126311497685e-05, "loss": 0.5341, "step": 12911 }, { "epoch": 0.2738436088312019, "grad_norm": 0.32155531644821167, "learning_rate": 1.9101124932496882e-05, "loss": 0.4911, "step": 12912 }, { "epoch": 0.2738648172891349, "grad_norm": 0.3400411605834961, "learning_rate": 1.9100986739894722e-05, "loss": 0.5553, "step": 12913 }, { "epoch": 0.27388602574706794, "grad_norm": 0.3967994153499603, "learning_rate": 1.910084853717052e-05, "loss": 0.469, "step": 12914 }, { "epoch": 0.27390723420500096, "grad_norm": 0.3279738426208496, "learning_rate": 1.9100710324324435e-05, "loss": 0.5004, "step": 12915 }, { "epoch": 0.273928442662934, "grad_norm": 0.36504313349723816, "learning_rate": 1.910057210135662e-05, "loss": 0.5159, "step": 12916 }, { "epoch": 0.273949651120867, "grad_norm": 0.42008844017982483, "learning_rate": 1.9100433868267228e-05, "loss": 0.5351, "step": 12917 }, { "epoch": 0.2739708595788, "grad_norm": 0.31566378474235535, "learning_rate": 1.910029562505641e-05, "loss": 0.4867, "step": 12918 }, { "epoch": 0.27399206803673304, "grad_norm": 0.45110487937927246, "learning_rate": 1.9100157371724325e-05, "loss": 0.5604, "step": 12919 }, { "epoch": 0.27401327649466606, "grad_norm": 0.3081240653991699, "learning_rate": 1.9100019108271125e-05, "loss": 0.4991, "step": 12920 }, { "epoch": 0.2740344849525991, "grad_norm": 0.2958950102329254, "learning_rate": 1.909988083469696e-05, "loss": 0.4969, "step": 12921 }, { "epoch": 0.2740556934105321, "grad_norm": 0.4374023973941803, "learning_rate": 1.9099742551001987e-05, "loss": 0.5198, "step": 12922 }, { "epoch": 0.2740769018684651, "grad_norm": 0.3263724148273468, "learning_rate": 1.909960425718636e-05, "loss": 0.4031, "step": 12923 }, { "epoch": 0.2740981103263982, "grad_norm": 0.4035007059574127, "learning_rate": 1.909946595325023e-05, "loss": 0.4617, "step": 12924 }, { "epoch": 0.2741193187843312, "grad_norm": 0.3492215573787689, "learning_rate": 1.9099327639193758e-05, "loss": 0.4876, "step": 12925 }, { "epoch": 0.27414052724226423, "grad_norm": 0.3688594102859497, "learning_rate": 1.909918931501709e-05, "loss": 0.5405, "step": 12926 }, { "epoch": 0.27416173570019725, "grad_norm": 0.33898818492889404, "learning_rate": 1.909905098072038e-05, "loss": 0.5146, "step": 12927 }, { "epoch": 0.27418294415813027, "grad_norm": 0.3565391004085541, "learning_rate": 1.9098912636303788e-05, "loss": 0.5853, "step": 12928 }, { "epoch": 0.2742041526160633, "grad_norm": 0.3573713004589081, "learning_rate": 1.9098774281767465e-05, "loss": 0.5408, "step": 12929 }, { "epoch": 0.2742253610739963, "grad_norm": 0.3681103587150574, "learning_rate": 1.9098635917111564e-05, "loss": 0.5876, "step": 12930 }, { "epoch": 0.27424656953192933, "grad_norm": 0.3500337302684784, "learning_rate": 1.9098497542336235e-05, "loss": 0.4929, "step": 12931 }, { "epoch": 0.27426777798986235, "grad_norm": 0.4334268271923065, "learning_rate": 1.909835915744164e-05, "loss": 0.4741, "step": 12932 }, { "epoch": 0.27428898644779537, "grad_norm": 0.3209885060787201, "learning_rate": 1.909822076242793e-05, "loss": 0.4947, "step": 12933 }, { "epoch": 0.2743101949057284, "grad_norm": 0.34107619524002075, "learning_rate": 1.9098082357295255e-05, "loss": 0.5088, "step": 12934 }, { "epoch": 0.2743314033636614, "grad_norm": 0.3014116883277893, "learning_rate": 1.9097943942043772e-05, "loss": 0.4607, "step": 12935 }, { "epoch": 0.2743526118215944, "grad_norm": 0.3721321225166321, "learning_rate": 1.9097805516673634e-05, "loss": 0.5398, "step": 12936 }, { "epoch": 0.2743738202795275, "grad_norm": 0.3635519742965698, "learning_rate": 1.9097667081185e-05, "loss": 0.4987, "step": 12937 }, { "epoch": 0.2743950287374605, "grad_norm": 0.39973095059394836, "learning_rate": 1.9097528635578014e-05, "loss": 0.5303, "step": 12938 }, { "epoch": 0.27441623719539354, "grad_norm": 0.33037957549095154, "learning_rate": 1.909739017985284e-05, "loss": 0.4487, "step": 12939 }, { "epoch": 0.27443744565332656, "grad_norm": 0.3950921893119812, "learning_rate": 1.9097251714009625e-05, "loss": 0.4667, "step": 12940 }, { "epoch": 0.2744586541112596, "grad_norm": 0.32234257459640503, "learning_rate": 1.909711323804853e-05, "loss": 0.5015, "step": 12941 }, { "epoch": 0.2744798625691926, "grad_norm": 0.3638896942138672, "learning_rate": 1.90969747519697e-05, "loss": 0.5508, "step": 12942 }, { "epoch": 0.2745010710271256, "grad_norm": 0.3184433579444885, "learning_rate": 1.9096836255773296e-05, "loss": 0.6299, "step": 12943 }, { "epoch": 0.27452227948505864, "grad_norm": 0.41665616631507874, "learning_rate": 1.9096697749459466e-05, "loss": 0.5201, "step": 12944 }, { "epoch": 0.27454348794299166, "grad_norm": 0.36584803462028503, "learning_rate": 1.9096559233028373e-05, "loss": 0.5636, "step": 12945 }, { "epoch": 0.2745646964009247, "grad_norm": 0.3468281924724579, "learning_rate": 1.9096420706480163e-05, "loss": 0.5499, "step": 12946 }, { "epoch": 0.2745859048588577, "grad_norm": 0.3554666042327881, "learning_rate": 1.9096282169814994e-05, "loss": 0.4686, "step": 12947 }, { "epoch": 0.2746071133167907, "grad_norm": 0.3341882824897766, "learning_rate": 1.909614362303302e-05, "loss": 0.5284, "step": 12948 }, { "epoch": 0.27462832177472374, "grad_norm": 0.6153843402862549, "learning_rate": 1.909600506613439e-05, "loss": 0.573, "step": 12949 }, { "epoch": 0.27464953023265676, "grad_norm": 0.328963041305542, "learning_rate": 1.9095866499119268e-05, "loss": 0.4791, "step": 12950 }, { "epoch": 0.27467073869058983, "grad_norm": 0.338961660861969, "learning_rate": 1.90957279219878e-05, "loss": 0.5058, "step": 12951 }, { "epoch": 0.27469194714852285, "grad_norm": 0.3539023995399475, "learning_rate": 1.909558933474014e-05, "loss": 0.5201, "step": 12952 }, { "epoch": 0.27471315560645587, "grad_norm": 0.3318985104560852, "learning_rate": 1.9095450737376445e-05, "loss": 0.5163, "step": 12953 }, { "epoch": 0.2747343640643889, "grad_norm": 0.341612845659256, "learning_rate": 1.9095312129896873e-05, "loss": 0.5286, "step": 12954 }, { "epoch": 0.2747555725223219, "grad_norm": 0.34319868683815, "learning_rate": 1.909517351230157e-05, "loss": 0.5224, "step": 12955 }, { "epoch": 0.27477678098025493, "grad_norm": 0.36169058084487915, "learning_rate": 1.9095034884590692e-05, "loss": 0.6138, "step": 12956 }, { "epoch": 0.27479798943818795, "grad_norm": 0.3720753490924835, "learning_rate": 1.90948962467644e-05, "loss": 0.5234, "step": 12957 }, { "epoch": 0.27481919789612097, "grad_norm": 0.35260578989982605, "learning_rate": 1.9094757598822842e-05, "loss": 0.5324, "step": 12958 }, { "epoch": 0.274840406354054, "grad_norm": 0.29803013801574707, "learning_rate": 1.9094618940766173e-05, "loss": 0.5718, "step": 12959 }, { "epoch": 0.274861614811987, "grad_norm": 1.2860684394836426, "learning_rate": 1.9094480272594545e-05, "loss": 0.5185, "step": 12960 }, { "epoch": 0.27488282326992003, "grad_norm": 0.3804500699043274, "learning_rate": 1.9094341594308118e-05, "loss": 0.5072, "step": 12961 }, { "epoch": 0.27490403172785305, "grad_norm": 0.3155708909034729, "learning_rate": 1.9094202905907046e-05, "loss": 0.5238, "step": 12962 }, { "epoch": 0.27492524018578607, "grad_norm": 0.35926029086112976, "learning_rate": 1.909406420739148e-05, "loss": 0.5341, "step": 12963 }, { "epoch": 0.2749464486437191, "grad_norm": 0.3161805272102356, "learning_rate": 1.909392549876157e-05, "loss": 0.4771, "step": 12964 }, { "epoch": 0.27496765710165216, "grad_norm": 0.34916138648986816, "learning_rate": 1.9093786780017474e-05, "loss": 0.5302, "step": 12965 }, { "epoch": 0.2749888655595852, "grad_norm": 0.28870007395744324, "learning_rate": 1.9093648051159354e-05, "loss": 0.4519, "step": 12966 }, { "epoch": 0.2750100740175182, "grad_norm": 0.3227691054344177, "learning_rate": 1.9093509312187356e-05, "loss": 0.4994, "step": 12967 }, { "epoch": 0.2750312824754512, "grad_norm": 0.3249422609806061, "learning_rate": 1.9093370563101632e-05, "loss": 0.4483, "step": 12968 }, { "epoch": 0.27505249093338424, "grad_norm": 0.4082008898258209, "learning_rate": 1.9093231803902345e-05, "loss": 0.5016, "step": 12969 }, { "epoch": 0.27507369939131726, "grad_norm": 0.32987892627716064, "learning_rate": 1.9093093034589643e-05, "loss": 0.5239, "step": 12970 }, { "epoch": 0.2750949078492503, "grad_norm": 0.36928191781044006, "learning_rate": 1.9092954255163677e-05, "loss": 0.5235, "step": 12971 }, { "epoch": 0.2751161163071833, "grad_norm": 1.1578660011291504, "learning_rate": 1.909281546562461e-05, "loss": 0.5416, "step": 12972 }, { "epoch": 0.2751373247651163, "grad_norm": 0.4577043950557709, "learning_rate": 1.9092676665972596e-05, "loss": 0.6251, "step": 12973 }, { "epoch": 0.27515853322304934, "grad_norm": 0.3961348831653595, "learning_rate": 1.9092537856207783e-05, "loss": 0.6022, "step": 12974 }, { "epoch": 0.27517974168098236, "grad_norm": 0.3274637460708618, "learning_rate": 1.9092399036330328e-05, "loss": 0.536, "step": 12975 }, { "epoch": 0.2752009501389154, "grad_norm": 0.32236120104789734, "learning_rate": 1.9092260206340388e-05, "loss": 0.4908, "step": 12976 }, { "epoch": 0.2752221585968484, "grad_norm": 0.3470681607723236, "learning_rate": 1.909212136623811e-05, "loss": 0.5306, "step": 12977 }, { "epoch": 0.2752433670547815, "grad_norm": 0.3474477529525757, "learning_rate": 1.909198251602366e-05, "loss": 0.5, "step": 12978 }, { "epoch": 0.2752645755127145, "grad_norm": 0.46089839935302734, "learning_rate": 1.9091843655697183e-05, "loss": 0.5385, "step": 12979 }, { "epoch": 0.2752857839706475, "grad_norm": 0.3413020372390747, "learning_rate": 1.9091704785258834e-05, "loss": 0.5833, "step": 12980 }, { "epoch": 0.27530699242858053, "grad_norm": 0.39887577295303345, "learning_rate": 1.9091565904708775e-05, "loss": 0.528, "step": 12981 }, { "epoch": 0.27532820088651355, "grad_norm": 0.35761499404907227, "learning_rate": 1.9091427014047152e-05, "loss": 0.5531, "step": 12982 }, { "epoch": 0.27534940934444657, "grad_norm": 0.35868942737579346, "learning_rate": 1.9091288113274124e-05, "loss": 0.5623, "step": 12983 }, { "epoch": 0.2753706178023796, "grad_norm": 0.3989550769329071, "learning_rate": 1.9091149202389844e-05, "loss": 0.6282, "step": 12984 }, { "epoch": 0.2753918262603126, "grad_norm": 0.38250139355659485, "learning_rate": 1.9091010281394467e-05, "loss": 0.6079, "step": 12985 }, { "epoch": 0.27541303471824563, "grad_norm": 0.35349246859550476, "learning_rate": 1.9090871350288145e-05, "loss": 0.4341, "step": 12986 }, { "epoch": 0.27543424317617865, "grad_norm": 0.34470847249031067, "learning_rate": 1.909073240907104e-05, "loss": 0.542, "step": 12987 }, { "epoch": 0.27545545163411167, "grad_norm": 0.33798283338546753, "learning_rate": 1.9090593457743297e-05, "loss": 0.4485, "step": 12988 }, { "epoch": 0.2754766600920447, "grad_norm": 0.3497067093849182, "learning_rate": 1.9090454496305078e-05, "loss": 0.5873, "step": 12989 }, { "epoch": 0.2754978685499777, "grad_norm": 0.34058091044425964, "learning_rate": 1.909031552475653e-05, "loss": 0.4773, "step": 12990 }, { "epoch": 0.27551907700791073, "grad_norm": 0.38669347763061523, "learning_rate": 1.9090176543097817e-05, "loss": 0.5327, "step": 12991 }, { "epoch": 0.2755402854658438, "grad_norm": 0.5487338304519653, "learning_rate": 1.9090037551329086e-05, "loss": 0.5132, "step": 12992 }, { "epoch": 0.2755614939237768, "grad_norm": 0.3344384729862213, "learning_rate": 1.9089898549450496e-05, "loss": 0.5175, "step": 12993 }, { "epoch": 0.27558270238170984, "grad_norm": 0.5643198490142822, "learning_rate": 1.9089759537462198e-05, "loss": 0.5127, "step": 12994 }, { "epoch": 0.27560391083964286, "grad_norm": 0.3985172510147095, "learning_rate": 1.9089620515364348e-05, "loss": 0.5016, "step": 12995 }, { "epoch": 0.2756251192975759, "grad_norm": 0.3585471212863922, "learning_rate": 1.9089481483157102e-05, "loss": 0.5433, "step": 12996 }, { "epoch": 0.2756463277555089, "grad_norm": 0.34605589509010315, "learning_rate": 1.9089342440840613e-05, "loss": 0.5384, "step": 12997 }, { "epoch": 0.2756675362134419, "grad_norm": 0.41669782996177673, "learning_rate": 1.9089203388415037e-05, "loss": 0.4603, "step": 12998 }, { "epoch": 0.27568874467137494, "grad_norm": 0.3533673584461212, "learning_rate": 1.9089064325880532e-05, "loss": 0.5581, "step": 12999 }, { "epoch": 0.27570995312930796, "grad_norm": 0.38474634289741516, "learning_rate": 1.908892525323724e-05, "loss": 0.5801, "step": 13000 }, { "epoch": 0.275731161587241, "grad_norm": 0.33846354484558105, "learning_rate": 1.908878617048533e-05, "loss": 0.4952, "step": 13001 }, { "epoch": 0.275752370045174, "grad_norm": 0.35901421308517456, "learning_rate": 1.908864707762495e-05, "loss": 0.5766, "step": 13002 }, { "epoch": 0.275773578503107, "grad_norm": 0.32326802611351013, "learning_rate": 1.9088507974656256e-05, "loss": 0.4492, "step": 13003 }, { "epoch": 0.27579478696104004, "grad_norm": 0.36908045411109924, "learning_rate": 1.90883688615794e-05, "loss": 0.47, "step": 13004 }, { "epoch": 0.27581599541897306, "grad_norm": 0.3100808560848236, "learning_rate": 1.9088229738394545e-05, "loss": 0.5214, "step": 13005 }, { "epoch": 0.27583720387690613, "grad_norm": 0.387796014547348, "learning_rate": 1.9088090605101835e-05, "loss": 0.5384, "step": 13006 }, { "epoch": 0.27585841233483915, "grad_norm": 0.3137279450893402, "learning_rate": 1.9087951461701427e-05, "loss": 0.484, "step": 13007 }, { "epoch": 0.2758796207927722, "grad_norm": 0.34314703941345215, "learning_rate": 1.9087812308193483e-05, "loss": 0.4659, "step": 13008 }, { "epoch": 0.2759008292507052, "grad_norm": 0.39638105034828186, "learning_rate": 1.9087673144578153e-05, "loss": 0.5618, "step": 13009 }, { "epoch": 0.2759220377086382, "grad_norm": 0.35362377762794495, "learning_rate": 1.9087533970855588e-05, "loss": 0.5378, "step": 13010 }, { "epoch": 0.27594324616657123, "grad_norm": 0.35005682706832886, "learning_rate": 1.908739478702595e-05, "loss": 0.5077, "step": 13011 }, { "epoch": 0.27596445462450425, "grad_norm": 0.38115668296813965, "learning_rate": 1.908725559308939e-05, "loss": 0.573, "step": 13012 }, { "epoch": 0.27598566308243727, "grad_norm": 0.36706411838531494, "learning_rate": 1.9087116389046064e-05, "loss": 0.5141, "step": 13013 }, { "epoch": 0.2760068715403703, "grad_norm": 0.3725031018257141, "learning_rate": 1.9086977174896124e-05, "loss": 0.5343, "step": 13014 }, { "epoch": 0.2760280799983033, "grad_norm": 0.3830960988998413, "learning_rate": 1.908683795063973e-05, "loss": 0.4942, "step": 13015 }, { "epoch": 0.27604928845623633, "grad_norm": 0.3417324721813202, "learning_rate": 1.9086698716277033e-05, "loss": 0.5027, "step": 13016 }, { "epoch": 0.27607049691416935, "grad_norm": 0.6248785853385925, "learning_rate": 1.9086559471808182e-05, "loss": 0.5406, "step": 13017 }, { "epoch": 0.27609170537210237, "grad_norm": 0.3293010890483856, "learning_rate": 1.9086420217233348e-05, "loss": 0.4503, "step": 13018 }, { "epoch": 0.27611291383003544, "grad_norm": 0.3346823453903198, "learning_rate": 1.9086280952552672e-05, "loss": 0.465, "step": 13019 }, { "epoch": 0.27613412228796846, "grad_norm": 0.39382004737854004, "learning_rate": 1.9086141677766314e-05, "loss": 0.5221, "step": 13020 }, { "epoch": 0.2761553307459015, "grad_norm": 0.34118208289146423, "learning_rate": 1.908600239287443e-05, "loss": 0.4871, "step": 13021 }, { "epoch": 0.2761765392038345, "grad_norm": 0.39004507660865784, "learning_rate": 1.9085863097877166e-05, "loss": 0.6318, "step": 13022 }, { "epoch": 0.2761977476617675, "grad_norm": 0.38111430406570435, "learning_rate": 1.9085723792774692e-05, "loss": 0.4738, "step": 13023 }, { "epoch": 0.27621895611970054, "grad_norm": 0.33628371357917786, "learning_rate": 1.908558447756715e-05, "loss": 0.5551, "step": 13024 }, { "epoch": 0.27624016457763356, "grad_norm": 0.3419051766395569, "learning_rate": 1.90854451522547e-05, "loss": 0.5117, "step": 13025 }, { "epoch": 0.2762613730355666, "grad_norm": 0.321254700422287, "learning_rate": 1.9085305816837502e-05, "loss": 0.4409, "step": 13026 }, { "epoch": 0.2762825814934996, "grad_norm": 0.3330799341201782, "learning_rate": 1.90851664713157e-05, "loss": 0.5108, "step": 13027 }, { "epoch": 0.2763037899514326, "grad_norm": 0.3258688449859619, "learning_rate": 1.9085027115689456e-05, "loss": 0.4619, "step": 13028 }, { "epoch": 0.27632499840936564, "grad_norm": 0.3391711413860321, "learning_rate": 1.9084887749958928e-05, "loss": 0.5442, "step": 13029 }, { "epoch": 0.27634620686729866, "grad_norm": 0.3753812611103058, "learning_rate": 1.9084748374124263e-05, "loss": 0.5627, "step": 13030 }, { "epoch": 0.2763674153252317, "grad_norm": 0.30748099088668823, "learning_rate": 1.9084608988185616e-05, "loss": 0.4585, "step": 13031 }, { "epoch": 0.2763886237831647, "grad_norm": 0.40440696477890015, "learning_rate": 1.9084469592143154e-05, "loss": 0.5335, "step": 13032 }, { "epoch": 0.2764098322410978, "grad_norm": 0.3393113315105438, "learning_rate": 1.908433018599702e-05, "loss": 0.4906, "step": 13033 }, { "epoch": 0.2764310406990308, "grad_norm": 0.2982558310031891, "learning_rate": 1.9084190769747373e-05, "loss": 0.4903, "step": 13034 }, { "epoch": 0.2764522491569638, "grad_norm": 0.3153970539569855, "learning_rate": 1.908405134339437e-05, "loss": 0.4691, "step": 13035 }, { "epoch": 0.27647345761489683, "grad_norm": 0.3673242926597595, "learning_rate": 1.908391190693816e-05, "loss": 0.4834, "step": 13036 }, { "epoch": 0.27649466607282985, "grad_norm": 0.33434566855430603, "learning_rate": 1.90837724603789e-05, "loss": 0.4588, "step": 13037 }, { "epoch": 0.2765158745307629, "grad_norm": 0.38123440742492676, "learning_rate": 1.9083633003716752e-05, "loss": 0.5219, "step": 13038 }, { "epoch": 0.2765370829886959, "grad_norm": 0.3430918753147125, "learning_rate": 1.9083493536951863e-05, "loss": 0.5788, "step": 13039 }, { "epoch": 0.2765582914466289, "grad_norm": 0.3266090154647827, "learning_rate": 1.9083354060084396e-05, "loss": 0.533, "step": 13040 }, { "epoch": 0.27657949990456193, "grad_norm": 0.3376990556716919, "learning_rate": 1.9083214573114502e-05, "loss": 0.5317, "step": 13041 }, { "epoch": 0.27660070836249495, "grad_norm": 0.455032080411911, "learning_rate": 1.908307507604233e-05, "loss": 0.482, "step": 13042 }, { "epoch": 0.27662191682042797, "grad_norm": 0.3487059473991394, "learning_rate": 1.9082935568868045e-05, "loss": 0.5434, "step": 13043 }, { "epoch": 0.276643125278361, "grad_norm": 0.34592658281326294, "learning_rate": 1.9082796051591796e-05, "loss": 0.4343, "step": 13044 }, { "epoch": 0.276664333736294, "grad_norm": 0.37082916498184204, "learning_rate": 1.908265652421374e-05, "loss": 0.4655, "step": 13045 }, { "epoch": 0.2766855421942271, "grad_norm": 0.40364187955856323, "learning_rate": 1.9082516986734033e-05, "loss": 0.5352, "step": 13046 }, { "epoch": 0.2767067506521601, "grad_norm": 0.32095491886138916, "learning_rate": 1.908237743915283e-05, "loss": 0.5143, "step": 13047 }, { "epoch": 0.2767279591100931, "grad_norm": 0.342123419046402, "learning_rate": 1.9082237881470288e-05, "loss": 0.5461, "step": 13048 }, { "epoch": 0.27674916756802614, "grad_norm": 0.3346843719482422, "learning_rate": 1.9082098313686556e-05, "loss": 0.4909, "step": 13049 }, { "epoch": 0.27677037602595916, "grad_norm": 0.32112815976142883, "learning_rate": 1.9081958735801793e-05, "loss": 0.5071, "step": 13050 }, { "epoch": 0.2767915844838922, "grad_norm": 0.33437204360961914, "learning_rate": 1.9081819147816156e-05, "loss": 0.6131, "step": 13051 }, { "epoch": 0.2768127929418252, "grad_norm": 0.3314962089061737, "learning_rate": 1.9081679549729797e-05, "loss": 0.4021, "step": 13052 }, { "epoch": 0.2768340013997582, "grad_norm": 0.3644464612007141, "learning_rate": 1.9081539941542875e-05, "loss": 0.5459, "step": 13053 }, { "epoch": 0.27685520985769124, "grad_norm": 0.3689616620540619, "learning_rate": 1.9081400323255542e-05, "loss": 0.4348, "step": 13054 }, { "epoch": 0.27687641831562426, "grad_norm": 0.3591691255569458, "learning_rate": 1.9081260694867954e-05, "loss": 0.5109, "step": 13055 }, { "epoch": 0.2768976267735573, "grad_norm": 0.3200746774673462, "learning_rate": 1.9081121056380268e-05, "loss": 0.5194, "step": 13056 }, { "epoch": 0.2769188352314903, "grad_norm": 0.3061739504337311, "learning_rate": 1.908098140779264e-05, "loss": 0.4938, "step": 13057 }, { "epoch": 0.2769400436894233, "grad_norm": 0.4927591383457184, "learning_rate": 1.908084174910522e-05, "loss": 0.5228, "step": 13058 }, { "epoch": 0.27696125214735634, "grad_norm": 0.32211899757385254, "learning_rate": 1.908070208031817e-05, "loss": 0.4697, "step": 13059 }, { "epoch": 0.2769824606052894, "grad_norm": 0.32575440406799316, "learning_rate": 1.908056240143164e-05, "loss": 0.6275, "step": 13060 }, { "epoch": 0.27700366906322244, "grad_norm": 0.3737882971763611, "learning_rate": 1.9080422712445786e-05, "loss": 0.5538, "step": 13061 }, { "epoch": 0.27702487752115545, "grad_norm": 0.3338439464569092, "learning_rate": 1.9080283013360767e-05, "loss": 0.5081, "step": 13062 }, { "epoch": 0.2770460859790885, "grad_norm": 0.3502809703350067, "learning_rate": 1.9080143304176736e-05, "loss": 0.5159, "step": 13063 }, { "epoch": 0.2770672944370215, "grad_norm": 0.36184176802635193, "learning_rate": 1.9080003584893843e-05, "loss": 0.486, "step": 13064 }, { "epoch": 0.2770885028949545, "grad_norm": 0.3607271611690521, "learning_rate": 1.9079863855512254e-05, "loss": 0.464, "step": 13065 }, { "epoch": 0.27710971135288753, "grad_norm": 0.3215637803077698, "learning_rate": 1.907972411603212e-05, "loss": 0.5348, "step": 13066 }, { "epoch": 0.27713091981082055, "grad_norm": 0.3521820604801178, "learning_rate": 1.9079584366453594e-05, "loss": 0.4893, "step": 13067 }, { "epoch": 0.2771521282687536, "grad_norm": 0.35250258445739746, "learning_rate": 1.9079444606776835e-05, "loss": 0.5455, "step": 13068 }, { "epoch": 0.2771733367266866, "grad_norm": 0.3672017753124237, "learning_rate": 1.9079304837001997e-05, "loss": 0.548, "step": 13069 }, { "epoch": 0.2771945451846196, "grad_norm": 0.35897374153137207, "learning_rate": 1.907916505712923e-05, "loss": 0.528, "step": 13070 }, { "epoch": 0.27721575364255263, "grad_norm": 0.35107213258743286, "learning_rate": 1.9079025267158698e-05, "loss": 0.511, "step": 13071 }, { "epoch": 0.27723696210048565, "grad_norm": 0.3409637212753296, "learning_rate": 1.9078885467090552e-05, "loss": 0.5055, "step": 13072 }, { "epoch": 0.27725817055841867, "grad_norm": 0.3184622824192047, "learning_rate": 1.907874565692495e-05, "loss": 0.4695, "step": 13073 }, { "epoch": 0.27727937901635175, "grad_norm": 0.37413913011550903, "learning_rate": 1.9078605836662046e-05, "loss": 0.5387, "step": 13074 }, { "epoch": 0.27730058747428477, "grad_norm": 0.3375832438468933, "learning_rate": 1.9078466006301994e-05, "loss": 0.5124, "step": 13075 }, { "epoch": 0.2773217959322178, "grad_norm": 0.34797203540802, "learning_rate": 1.9078326165844953e-05, "loss": 0.5317, "step": 13076 }, { "epoch": 0.2773430043901508, "grad_norm": 0.3071702718734741, "learning_rate": 1.9078186315291075e-05, "loss": 0.5249, "step": 13077 }, { "epoch": 0.2773642128480838, "grad_norm": 0.3680584728717804, "learning_rate": 1.9078046454640515e-05, "loss": 0.5375, "step": 13078 }, { "epoch": 0.27738542130601684, "grad_norm": 0.32027339935302734, "learning_rate": 1.9077906583893433e-05, "loss": 0.5647, "step": 13079 }, { "epoch": 0.27740662976394986, "grad_norm": 0.3613285422325134, "learning_rate": 1.9077766703049983e-05, "loss": 0.524, "step": 13080 }, { "epoch": 0.2774278382218829, "grad_norm": 0.3346424996852875, "learning_rate": 1.9077626812110317e-05, "loss": 0.5698, "step": 13081 }, { "epoch": 0.2774490466798159, "grad_norm": 0.3519459366798401, "learning_rate": 1.9077486911074598e-05, "loss": 0.5168, "step": 13082 }, { "epoch": 0.2774702551377489, "grad_norm": 0.31348344683647156, "learning_rate": 1.907734699994297e-05, "loss": 0.4391, "step": 13083 }, { "epoch": 0.27749146359568194, "grad_norm": 0.3778078258037567, "learning_rate": 1.9077207078715602e-05, "loss": 0.4898, "step": 13084 }, { "epoch": 0.27751267205361496, "grad_norm": 0.32317304611206055, "learning_rate": 1.907706714739264e-05, "loss": 0.4939, "step": 13085 }, { "epoch": 0.277533880511548, "grad_norm": 0.5402340292930603, "learning_rate": 1.907692720597424e-05, "loss": 0.5221, "step": 13086 }, { "epoch": 0.27755508896948106, "grad_norm": 0.3434329032897949, "learning_rate": 1.9076787254460564e-05, "loss": 0.4889, "step": 13087 }, { "epoch": 0.2775762974274141, "grad_norm": 0.33187007904052734, "learning_rate": 1.9076647292851763e-05, "loss": 0.4813, "step": 13088 }, { "epoch": 0.2775975058853471, "grad_norm": 0.3450734615325928, "learning_rate": 1.9076507321147997e-05, "loss": 0.4847, "step": 13089 }, { "epoch": 0.2776187143432801, "grad_norm": 0.307679682970047, "learning_rate": 1.9076367339349414e-05, "loss": 0.3819, "step": 13090 }, { "epoch": 0.27763992280121313, "grad_norm": 0.36701899766921997, "learning_rate": 1.9076227347456177e-05, "loss": 0.485, "step": 13091 }, { "epoch": 0.27766113125914615, "grad_norm": 0.3271324634552002, "learning_rate": 1.907608734546844e-05, "loss": 0.5095, "step": 13092 }, { "epoch": 0.2776823397170792, "grad_norm": 0.34579089283943176, "learning_rate": 1.9075947333386354e-05, "loss": 0.5025, "step": 13093 }, { "epoch": 0.2777035481750122, "grad_norm": 0.3677637279033661, "learning_rate": 1.9075807311210076e-05, "loss": 0.5661, "step": 13094 }, { "epoch": 0.2777247566329452, "grad_norm": 0.3373225927352905, "learning_rate": 1.9075667278939768e-05, "loss": 0.486, "step": 13095 }, { "epoch": 0.27774596509087823, "grad_norm": 0.3106013238430023, "learning_rate": 1.9075527236575585e-05, "loss": 0.5205, "step": 13096 }, { "epoch": 0.27776717354881125, "grad_norm": 0.4037027359008789, "learning_rate": 1.9075387184117673e-05, "loss": 0.4873, "step": 13097 }, { "epoch": 0.27778838200674427, "grad_norm": 0.3561544716358185, "learning_rate": 1.9075247121566198e-05, "loss": 0.5526, "step": 13098 }, { "epoch": 0.2778095904646773, "grad_norm": 0.4887593686580658, "learning_rate": 1.907510704892131e-05, "loss": 0.602, "step": 13099 }, { "epoch": 0.2778307989226103, "grad_norm": 0.4097140431404114, "learning_rate": 1.9074966966183168e-05, "loss": 0.4918, "step": 13100 }, { "epoch": 0.2778520073805434, "grad_norm": 0.41008809208869934, "learning_rate": 1.9074826873351925e-05, "loss": 0.521, "step": 13101 }, { "epoch": 0.2778732158384764, "grad_norm": 0.43071112036705017, "learning_rate": 1.9074686770427742e-05, "loss": 0.5317, "step": 13102 }, { "epoch": 0.2778944242964094, "grad_norm": 0.32253649830818176, "learning_rate": 1.9074546657410765e-05, "loss": 0.4997, "step": 13103 }, { "epoch": 0.27791563275434245, "grad_norm": 0.3316039443016052, "learning_rate": 1.907440653430116e-05, "loss": 0.5043, "step": 13104 }, { "epoch": 0.27793684121227547, "grad_norm": 0.3487464189529419, "learning_rate": 1.907426640109908e-05, "loss": 0.4428, "step": 13105 }, { "epoch": 0.2779580496702085, "grad_norm": 0.3553081154823303, "learning_rate": 1.9074126257804678e-05, "loss": 0.5105, "step": 13106 }, { "epoch": 0.2779792581281415, "grad_norm": 0.34388747811317444, "learning_rate": 1.907398610441811e-05, "loss": 0.4678, "step": 13107 }, { "epoch": 0.2780004665860745, "grad_norm": 0.3245662450790405, "learning_rate": 1.9073845940939536e-05, "loss": 0.4984, "step": 13108 }, { "epoch": 0.27802167504400754, "grad_norm": 0.31138429045677185, "learning_rate": 1.907370576736911e-05, "loss": 0.5036, "step": 13109 }, { "epoch": 0.27804288350194056, "grad_norm": 0.3697715997695923, "learning_rate": 1.9073565583706986e-05, "loss": 0.4688, "step": 13110 }, { "epoch": 0.2780640919598736, "grad_norm": 0.34615612030029297, "learning_rate": 1.9073425389953323e-05, "loss": 0.4314, "step": 13111 }, { "epoch": 0.2780853004178066, "grad_norm": 0.3295505940914154, "learning_rate": 1.907328518610827e-05, "loss": 0.4898, "step": 13112 }, { "epoch": 0.2781065088757396, "grad_norm": 0.350358784198761, "learning_rate": 1.907314497217199e-05, "loss": 0.5197, "step": 13113 }, { "epoch": 0.27812771733367264, "grad_norm": 0.4605852961540222, "learning_rate": 1.9073004748144642e-05, "loss": 0.484, "step": 13114 }, { "epoch": 0.2781489257916057, "grad_norm": 0.3190130293369293, "learning_rate": 1.9072864514026372e-05, "loss": 0.4769, "step": 13115 }, { "epoch": 0.27817013424953874, "grad_norm": 0.41878724098205566, "learning_rate": 1.9072724269817345e-05, "loss": 0.5026, "step": 13116 }, { "epoch": 0.27819134270747176, "grad_norm": 0.32995396852493286, "learning_rate": 1.907258401551771e-05, "loss": 0.4829, "step": 13117 }, { "epoch": 0.2782125511654048, "grad_norm": 0.32766175270080566, "learning_rate": 1.9072443751127626e-05, "loss": 0.4956, "step": 13118 }, { "epoch": 0.2782337596233378, "grad_norm": 0.38300156593322754, "learning_rate": 1.9072303476647247e-05, "loss": 0.4771, "step": 13119 }, { "epoch": 0.2782549680812708, "grad_norm": 0.32850944995880127, "learning_rate": 1.9072163192076734e-05, "loss": 0.4743, "step": 13120 }, { "epoch": 0.27827617653920383, "grad_norm": 0.36393240094184875, "learning_rate": 1.907202289741624e-05, "loss": 0.4768, "step": 13121 }, { "epoch": 0.27829738499713685, "grad_norm": 0.3519813120365143, "learning_rate": 1.9071882592665914e-05, "loss": 0.5822, "step": 13122 }, { "epoch": 0.2783185934550699, "grad_norm": 0.35173746943473816, "learning_rate": 1.907174227782593e-05, "loss": 0.6692, "step": 13123 }, { "epoch": 0.2783398019130029, "grad_norm": 0.31420668959617615, "learning_rate": 1.9071601952896424e-05, "loss": 0.5366, "step": 13124 }, { "epoch": 0.2783610103709359, "grad_norm": 0.38072216510772705, "learning_rate": 1.9071461617877567e-05, "loss": 0.4741, "step": 13125 }, { "epoch": 0.27838221882886893, "grad_norm": 0.33148038387298584, "learning_rate": 1.9071321272769504e-05, "loss": 0.5198, "step": 13126 }, { "epoch": 0.27840342728680195, "grad_norm": 0.3341134786605835, "learning_rate": 1.90711809175724e-05, "loss": 0.5223, "step": 13127 }, { "epoch": 0.278424635744735, "grad_norm": 0.35900187492370605, "learning_rate": 1.9071040552286407e-05, "loss": 0.633, "step": 13128 }, { "epoch": 0.27844584420266805, "grad_norm": 0.35502690076828003, "learning_rate": 1.907090017691168e-05, "loss": 0.5609, "step": 13129 }, { "epoch": 0.27846705266060107, "grad_norm": 0.3359074592590332, "learning_rate": 1.907075979144838e-05, "loss": 0.5301, "step": 13130 }, { "epoch": 0.2784882611185341, "grad_norm": 0.3434544503688812, "learning_rate": 1.9070619395896656e-05, "loss": 0.6299, "step": 13131 }, { "epoch": 0.2785094695764671, "grad_norm": 0.38462144136428833, "learning_rate": 1.907047899025667e-05, "loss": 0.5024, "step": 13132 }, { "epoch": 0.2785306780344001, "grad_norm": 0.5078271627426147, "learning_rate": 1.9070338574528574e-05, "loss": 0.5909, "step": 13133 }, { "epoch": 0.27855188649233315, "grad_norm": 0.3593365252017975, "learning_rate": 1.9070198148712528e-05, "loss": 0.5753, "step": 13134 }, { "epoch": 0.27857309495026616, "grad_norm": 0.37033817172050476, "learning_rate": 1.9070057712808686e-05, "loss": 0.5387, "step": 13135 }, { "epoch": 0.2785943034081992, "grad_norm": 0.3136860132217407, "learning_rate": 1.9069917266817206e-05, "loss": 0.4865, "step": 13136 }, { "epoch": 0.2786155118661322, "grad_norm": 0.3453660011291504, "learning_rate": 1.9069776810738242e-05, "loss": 0.5536, "step": 13137 }, { "epoch": 0.2786367203240652, "grad_norm": 0.34907659888267517, "learning_rate": 1.9069636344571952e-05, "loss": 0.4935, "step": 13138 }, { "epoch": 0.27865792878199824, "grad_norm": 0.3420877456665039, "learning_rate": 1.9069495868318486e-05, "loss": 0.4719, "step": 13139 }, { "epoch": 0.27867913723993126, "grad_norm": 0.4102828800678253, "learning_rate": 1.906935538197801e-05, "loss": 0.4142, "step": 13140 }, { "epoch": 0.2787003456978643, "grad_norm": 0.3365069329738617, "learning_rate": 1.9069214885550675e-05, "loss": 0.5484, "step": 13141 }, { "epoch": 0.27872155415579736, "grad_norm": 0.45970404148101807, "learning_rate": 1.9069074379036638e-05, "loss": 0.4997, "step": 13142 }, { "epoch": 0.2787427626137304, "grad_norm": 0.3303124010562897, "learning_rate": 1.9068933862436055e-05, "loss": 0.4493, "step": 13143 }, { "epoch": 0.2787639710716634, "grad_norm": 0.34771761298179626, "learning_rate": 1.906879333574908e-05, "loss": 0.559, "step": 13144 }, { "epoch": 0.2787851795295964, "grad_norm": 0.41252008080482483, "learning_rate": 1.9068652798975877e-05, "loss": 0.5848, "step": 13145 }, { "epoch": 0.27880638798752944, "grad_norm": 0.4123743176460266, "learning_rate": 1.9068512252116597e-05, "loss": 0.4648, "step": 13146 }, { "epoch": 0.27882759644546246, "grad_norm": 0.33806005120277405, "learning_rate": 1.9068371695171394e-05, "loss": 0.4816, "step": 13147 }, { "epoch": 0.2788488049033955, "grad_norm": 0.34149229526519775, "learning_rate": 1.9068231128140425e-05, "loss": 0.5064, "step": 13148 }, { "epoch": 0.2788700133613285, "grad_norm": 0.3482072949409485, "learning_rate": 1.906809055102385e-05, "loss": 0.6143, "step": 13149 }, { "epoch": 0.2788912218192615, "grad_norm": 0.4063884913921356, "learning_rate": 1.9067949963821824e-05, "loss": 0.534, "step": 13150 }, { "epoch": 0.27891243027719453, "grad_norm": 0.3543848991394043, "learning_rate": 1.9067809366534502e-05, "loss": 0.4764, "step": 13151 }, { "epoch": 0.27893363873512755, "grad_norm": 0.3298068642616272, "learning_rate": 1.9067668759162044e-05, "loss": 0.4867, "step": 13152 }, { "epoch": 0.2789548471930606, "grad_norm": 0.35232892632484436, "learning_rate": 1.90675281417046e-05, "loss": 0.5316, "step": 13153 }, { "epoch": 0.2789760556509936, "grad_norm": 0.4312053322792053, "learning_rate": 1.9067387514162332e-05, "loss": 0.4248, "step": 13154 }, { "epoch": 0.2789972641089266, "grad_norm": 0.31625205278396606, "learning_rate": 1.906724687653539e-05, "loss": 0.4934, "step": 13155 }, { "epoch": 0.2790184725668597, "grad_norm": 0.3087078332901001, "learning_rate": 1.906710622882394e-05, "loss": 0.5238, "step": 13156 }, { "epoch": 0.2790396810247927, "grad_norm": 0.35055044293403625, "learning_rate": 1.9066965571028132e-05, "loss": 0.5671, "step": 13157 }, { "epoch": 0.2790608894827257, "grad_norm": 0.2972327768802643, "learning_rate": 1.9066824903148122e-05, "loss": 0.4771, "step": 13158 }, { "epoch": 0.27908209794065875, "grad_norm": 0.33543500304222107, "learning_rate": 1.906668422518407e-05, "loss": 0.5547, "step": 13159 }, { "epoch": 0.27910330639859177, "grad_norm": 0.36994969844818115, "learning_rate": 1.9066543537136127e-05, "loss": 0.5101, "step": 13160 }, { "epoch": 0.2791245148565248, "grad_norm": 0.33365872502326965, "learning_rate": 1.9066402839004457e-05, "loss": 0.56, "step": 13161 }, { "epoch": 0.2791457233144578, "grad_norm": 0.35095304250717163, "learning_rate": 1.9066262130789212e-05, "loss": 0.5556, "step": 13162 }, { "epoch": 0.2791669317723908, "grad_norm": 0.34944432973861694, "learning_rate": 1.906612141249055e-05, "loss": 0.5099, "step": 13163 }, { "epoch": 0.27918814023032384, "grad_norm": 0.36311930418014526, "learning_rate": 1.9065980684108622e-05, "loss": 0.4588, "step": 13164 }, { "epoch": 0.27920934868825686, "grad_norm": 0.34479594230651855, "learning_rate": 1.9065839945643594e-05, "loss": 0.5471, "step": 13165 }, { "epoch": 0.2792305571461899, "grad_norm": 0.321834921836853, "learning_rate": 1.906569919709562e-05, "loss": 0.4917, "step": 13166 }, { "epoch": 0.2792517656041229, "grad_norm": 0.3640066683292389, "learning_rate": 1.9065558438464844e-05, "loss": 0.523, "step": 13167 }, { "epoch": 0.2792729740620559, "grad_norm": 0.3391638398170471, "learning_rate": 1.9065417669751437e-05, "loss": 0.5179, "step": 13168 }, { "epoch": 0.279294182519989, "grad_norm": 0.3237229883670807, "learning_rate": 1.9065276890955556e-05, "loss": 0.505, "step": 13169 }, { "epoch": 0.279315390977922, "grad_norm": 0.3652937412261963, "learning_rate": 1.906513610207735e-05, "loss": 0.5278, "step": 13170 }, { "epoch": 0.27933659943585504, "grad_norm": 0.40522241592407227, "learning_rate": 1.9064995303116975e-05, "loss": 0.5557, "step": 13171 }, { "epoch": 0.27935780789378806, "grad_norm": 0.3595919907093048, "learning_rate": 1.9064854494074594e-05, "loss": 0.5248, "step": 13172 }, { "epoch": 0.2793790163517211, "grad_norm": 0.5772743225097656, "learning_rate": 1.906471367495036e-05, "loss": 0.5362, "step": 13173 }, { "epoch": 0.2794002248096541, "grad_norm": 0.3615258038043976, "learning_rate": 1.906457284574443e-05, "loss": 0.5008, "step": 13174 }, { "epoch": 0.2794214332675871, "grad_norm": 0.3630797266960144, "learning_rate": 1.906443200645696e-05, "loss": 0.4905, "step": 13175 }, { "epoch": 0.27944264172552014, "grad_norm": 0.37993863224983215, "learning_rate": 1.9064291157088113e-05, "loss": 0.6291, "step": 13176 }, { "epoch": 0.27946385018345316, "grad_norm": 0.3723340332508087, "learning_rate": 1.9064150297638036e-05, "loss": 0.4849, "step": 13177 }, { "epoch": 0.2794850586413862, "grad_norm": 0.3210228979587555, "learning_rate": 1.9064009428106888e-05, "loss": 0.4943, "step": 13178 }, { "epoch": 0.2795062670993192, "grad_norm": 0.39056456089019775, "learning_rate": 1.9063868548494828e-05, "loss": 0.5212, "step": 13179 }, { "epoch": 0.2795274755572522, "grad_norm": 0.3874381184577942, "learning_rate": 1.9063727658802015e-05, "loss": 0.5906, "step": 13180 }, { "epoch": 0.27954868401518523, "grad_norm": 0.39783239364624023, "learning_rate": 1.90635867590286e-05, "loss": 0.5178, "step": 13181 }, { "epoch": 0.27956989247311825, "grad_norm": 0.3274626135826111, "learning_rate": 1.9063445849174746e-05, "loss": 0.5469, "step": 13182 }, { "epoch": 0.27959110093105133, "grad_norm": 0.42987510561943054, "learning_rate": 1.90633049292406e-05, "loss": 0.5463, "step": 13183 }, { "epoch": 0.27961230938898435, "grad_norm": 0.3685165345668793, "learning_rate": 1.906316399922633e-05, "loss": 0.5083, "step": 13184 }, { "epoch": 0.27963351784691737, "grad_norm": 0.31655991077423096, "learning_rate": 1.9063023059132085e-05, "loss": 0.5366, "step": 13185 }, { "epoch": 0.2796547263048504, "grad_norm": 0.3313061594963074, "learning_rate": 1.9062882108958026e-05, "loss": 0.5184, "step": 13186 }, { "epoch": 0.2796759347627834, "grad_norm": 0.34869030117988586, "learning_rate": 1.906274114870431e-05, "loss": 0.5486, "step": 13187 }, { "epoch": 0.2796971432207164, "grad_norm": 0.3675733208656311, "learning_rate": 1.9062600178371088e-05, "loss": 0.5752, "step": 13188 }, { "epoch": 0.27971835167864945, "grad_norm": 0.3469080924987793, "learning_rate": 1.9062459197958525e-05, "loss": 0.538, "step": 13189 }, { "epoch": 0.27973956013658247, "grad_norm": 0.3327961266040802, "learning_rate": 1.9062318207466773e-05, "loss": 0.5761, "step": 13190 }, { "epoch": 0.2797607685945155, "grad_norm": 0.3337372839450836, "learning_rate": 1.9062177206895985e-05, "loss": 0.5548, "step": 13191 }, { "epoch": 0.2797819770524485, "grad_norm": 0.3503800332546234, "learning_rate": 1.9062036196246326e-05, "loss": 0.5565, "step": 13192 }, { "epoch": 0.2798031855103815, "grad_norm": 0.39172983169555664, "learning_rate": 1.906189517551795e-05, "loss": 0.5859, "step": 13193 }, { "epoch": 0.27982439396831454, "grad_norm": 0.33180445432662964, "learning_rate": 1.906175414471101e-05, "loss": 0.4894, "step": 13194 }, { "epoch": 0.27984560242624756, "grad_norm": 0.3420417308807373, "learning_rate": 1.906161310382567e-05, "loss": 0.5768, "step": 13195 }, { "epoch": 0.2798668108841806, "grad_norm": 0.3450632393360138, "learning_rate": 1.9061472052862076e-05, "loss": 0.4277, "step": 13196 }, { "epoch": 0.27988801934211366, "grad_norm": 0.34068456292152405, "learning_rate": 1.9061330991820397e-05, "loss": 0.5163, "step": 13197 }, { "epoch": 0.2799092278000467, "grad_norm": 0.3993791341781616, "learning_rate": 1.9061189920700785e-05, "loss": 0.6167, "step": 13198 }, { "epoch": 0.2799304362579797, "grad_norm": 0.30460643768310547, "learning_rate": 1.9061048839503394e-05, "loss": 0.496, "step": 13199 }, { "epoch": 0.2799516447159127, "grad_norm": 0.3113783299922943, "learning_rate": 1.9060907748228382e-05, "loss": 0.5199, "step": 13200 }, { "epoch": 0.27997285317384574, "grad_norm": 0.37085768580436707, "learning_rate": 1.9060766646875907e-05, "loss": 0.5381, "step": 13201 }, { "epoch": 0.27999406163177876, "grad_norm": 0.3155352473258972, "learning_rate": 1.9060625535446128e-05, "loss": 0.4302, "step": 13202 }, { "epoch": 0.2800152700897118, "grad_norm": 0.3290714621543884, "learning_rate": 1.90604844139392e-05, "loss": 0.5395, "step": 13203 }, { "epoch": 0.2800364785476448, "grad_norm": 0.31506311893463135, "learning_rate": 1.9060343282355277e-05, "loss": 0.4765, "step": 13204 }, { "epoch": 0.2800576870055778, "grad_norm": 0.35567301511764526, "learning_rate": 1.9060202140694524e-05, "loss": 0.4657, "step": 13205 }, { "epoch": 0.28007889546351084, "grad_norm": 0.3422285318374634, "learning_rate": 1.9060060988957086e-05, "loss": 0.5259, "step": 13206 }, { "epoch": 0.28010010392144385, "grad_norm": 0.345110684633255, "learning_rate": 1.9059919827143135e-05, "loss": 0.5734, "step": 13207 }, { "epoch": 0.2801213123793769, "grad_norm": 0.3260796368122101, "learning_rate": 1.9059778655252812e-05, "loss": 0.4916, "step": 13208 }, { "epoch": 0.2801425208373099, "grad_norm": 0.533923327922821, "learning_rate": 1.9059637473286287e-05, "loss": 0.5024, "step": 13209 }, { "epoch": 0.28016372929524297, "grad_norm": 0.33526450395584106, "learning_rate": 1.905949628124371e-05, "loss": 0.5651, "step": 13210 }, { "epoch": 0.280184937753176, "grad_norm": 0.34562474489212036, "learning_rate": 1.905935507912524e-05, "loss": 0.6036, "step": 13211 }, { "epoch": 0.280206146211109, "grad_norm": 0.36968177556991577, "learning_rate": 1.9059213866931036e-05, "loss": 0.5059, "step": 13212 }, { "epoch": 0.28022735466904203, "grad_norm": 0.354937344789505, "learning_rate": 1.905907264466125e-05, "loss": 0.4727, "step": 13213 }, { "epoch": 0.28024856312697505, "grad_norm": 0.3289879560470581, "learning_rate": 1.9058931412316045e-05, "loss": 0.5418, "step": 13214 }, { "epoch": 0.28026977158490807, "grad_norm": 0.5156262516975403, "learning_rate": 1.9058790169895572e-05, "loss": 0.4706, "step": 13215 }, { "epoch": 0.2802909800428411, "grad_norm": 0.34731847047805786, "learning_rate": 1.9058648917399995e-05, "loss": 0.5547, "step": 13216 }, { "epoch": 0.2803121885007741, "grad_norm": 0.28881925344467163, "learning_rate": 1.9058507654829465e-05, "loss": 0.5202, "step": 13217 }, { "epoch": 0.2803333969587071, "grad_norm": 0.38421520590782166, "learning_rate": 1.9058366382184143e-05, "loss": 0.6063, "step": 13218 }, { "epoch": 0.28035460541664015, "grad_norm": 0.31902629137039185, "learning_rate": 1.9058225099464184e-05, "loss": 0.4492, "step": 13219 }, { "epoch": 0.28037581387457317, "grad_norm": 0.33833572268486023, "learning_rate": 1.9058083806669743e-05, "loss": 0.5112, "step": 13220 }, { "epoch": 0.2803970223325062, "grad_norm": 0.5338941216468811, "learning_rate": 1.9057942503800985e-05, "loss": 0.5375, "step": 13221 }, { "epoch": 0.2804182307904392, "grad_norm": 0.3451780378818512, "learning_rate": 1.9057801190858056e-05, "loss": 0.4811, "step": 13222 }, { "epoch": 0.2804394392483722, "grad_norm": 0.3794308602809906, "learning_rate": 1.905765986784112e-05, "loss": 0.542, "step": 13223 }, { "epoch": 0.2804606477063053, "grad_norm": 0.3840845823287964, "learning_rate": 1.905751853475034e-05, "loss": 0.5076, "step": 13224 }, { "epoch": 0.2804818561642383, "grad_norm": 0.3589056730270386, "learning_rate": 1.905737719158586e-05, "loss": 0.5492, "step": 13225 }, { "epoch": 0.28050306462217134, "grad_norm": 0.3391925096511841, "learning_rate": 1.9057235838347848e-05, "loss": 0.5659, "step": 13226 }, { "epoch": 0.28052427308010436, "grad_norm": 0.33486834168434143, "learning_rate": 1.9057094475036454e-05, "loss": 0.568, "step": 13227 }, { "epoch": 0.2805454815380374, "grad_norm": 0.3413065969944, "learning_rate": 1.905695310165184e-05, "loss": 0.5595, "step": 13228 }, { "epoch": 0.2805666899959704, "grad_norm": 0.3197261393070221, "learning_rate": 1.905681171819416e-05, "loss": 0.4932, "step": 13229 }, { "epoch": 0.2805878984539034, "grad_norm": 0.36292898654937744, "learning_rate": 1.9056670324663573e-05, "loss": 0.5822, "step": 13230 }, { "epoch": 0.28060910691183644, "grad_norm": 0.3511950671672821, "learning_rate": 1.9056528921060238e-05, "loss": 0.4902, "step": 13231 }, { "epoch": 0.28063031536976946, "grad_norm": 0.362203449010849, "learning_rate": 1.9056387507384306e-05, "loss": 0.5478, "step": 13232 }, { "epoch": 0.2806515238277025, "grad_norm": 0.3623354136943817, "learning_rate": 1.9056246083635943e-05, "loss": 0.5627, "step": 13233 }, { "epoch": 0.2806727322856355, "grad_norm": 0.4136310815811157, "learning_rate": 1.9056104649815298e-05, "loss": 0.5511, "step": 13234 }, { "epoch": 0.2806939407435685, "grad_norm": 0.32651424407958984, "learning_rate": 1.9055963205922533e-05, "loss": 0.4696, "step": 13235 }, { "epoch": 0.28071514920150153, "grad_norm": 0.5989212393760681, "learning_rate": 1.9055821751957807e-05, "loss": 0.445, "step": 13236 }, { "epoch": 0.2807363576594346, "grad_norm": 0.32468608021736145, "learning_rate": 1.905568028792127e-05, "loss": 0.464, "step": 13237 }, { "epoch": 0.28075756611736763, "grad_norm": 0.31617021560668945, "learning_rate": 1.905553881381309e-05, "loss": 0.4011, "step": 13238 }, { "epoch": 0.28077877457530065, "grad_norm": 0.39402979612350464, "learning_rate": 1.9055397329633414e-05, "loss": 0.6074, "step": 13239 }, { "epoch": 0.28079998303323367, "grad_norm": 0.37212008237838745, "learning_rate": 1.9055255835382406e-05, "loss": 0.5226, "step": 13240 }, { "epoch": 0.2808211914911667, "grad_norm": 0.3627327084541321, "learning_rate": 1.905511433106022e-05, "loss": 0.5401, "step": 13241 }, { "epoch": 0.2808423999490997, "grad_norm": 0.3488199710845947, "learning_rate": 1.9054972816667016e-05, "loss": 0.5459, "step": 13242 }, { "epoch": 0.2808636084070327, "grad_norm": 0.3837723731994629, "learning_rate": 1.905483129220295e-05, "loss": 0.4799, "step": 13243 }, { "epoch": 0.28088481686496575, "grad_norm": 0.45821714401245117, "learning_rate": 1.9054689757668175e-05, "loss": 0.5227, "step": 13244 }, { "epoch": 0.28090602532289877, "grad_norm": 0.36500197649002075, "learning_rate": 1.9054548213062857e-05, "loss": 0.5111, "step": 13245 }, { "epoch": 0.2809272337808318, "grad_norm": 0.36339542269706726, "learning_rate": 1.9054406658387148e-05, "loss": 0.5739, "step": 13246 }, { "epoch": 0.2809484422387648, "grad_norm": 0.3385402262210846, "learning_rate": 1.9054265093641206e-05, "loss": 0.5275, "step": 13247 }, { "epoch": 0.2809696506966978, "grad_norm": 0.32598233222961426, "learning_rate": 1.905412351882519e-05, "loss": 0.4827, "step": 13248 }, { "epoch": 0.28099085915463085, "grad_norm": 0.32361936569213867, "learning_rate": 1.9053981933939255e-05, "loss": 0.4776, "step": 13249 }, { "epoch": 0.28101206761256387, "grad_norm": 0.36023852229118347, "learning_rate": 1.905384033898356e-05, "loss": 0.5259, "step": 13250 }, { "epoch": 0.28103327607049694, "grad_norm": 0.5231522917747498, "learning_rate": 1.9053698733958265e-05, "loss": 0.6142, "step": 13251 }, { "epoch": 0.28105448452842996, "grad_norm": 0.33748799562454224, "learning_rate": 1.9053557118863523e-05, "loss": 0.4819, "step": 13252 }, { "epoch": 0.281075692986363, "grad_norm": 0.2952848970890045, "learning_rate": 1.9053415493699493e-05, "loss": 0.4497, "step": 13253 }, { "epoch": 0.281096901444296, "grad_norm": 0.3864927887916565, "learning_rate": 1.9053273858466332e-05, "loss": 0.5034, "step": 13254 }, { "epoch": 0.281118109902229, "grad_norm": 0.32929596304893494, "learning_rate": 1.90531322131642e-05, "loss": 0.5454, "step": 13255 }, { "epoch": 0.28113931836016204, "grad_norm": 0.4140380918979645, "learning_rate": 1.9052990557793254e-05, "loss": 0.6043, "step": 13256 }, { "epoch": 0.28116052681809506, "grad_norm": 0.3140608072280884, "learning_rate": 1.905284889235365e-05, "loss": 0.4763, "step": 13257 }, { "epoch": 0.2811817352760281, "grad_norm": 0.33272475004196167, "learning_rate": 1.9052707216845544e-05, "loss": 0.5864, "step": 13258 }, { "epoch": 0.2812029437339611, "grad_norm": 0.35605624318122864, "learning_rate": 1.90525655312691e-05, "loss": 0.58, "step": 13259 }, { "epoch": 0.2812241521918941, "grad_norm": 0.35255667567253113, "learning_rate": 1.9052423835624467e-05, "loss": 0.5367, "step": 13260 }, { "epoch": 0.28124536064982714, "grad_norm": 0.32318031787872314, "learning_rate": 1.905228212991181e-05, "loss": 0.5098, "step": 13261 }, { "epoch": 0.28126656910776016, "grad_norm": 0.33927926421165466, "learning_rate": 1.905214041413128e-05, "loss": 0.5873, "step": 13262 }, { "epoch": 0.2812877775656932, "grad_norm": 0.3474230468273163, "learning_rate": 1.9051998688283043e-05, "loss": 0.585, "step": 13263 }, { "epoch": 0.2813089860236262, "grad_norm": 0.3267360329627991, "learning_rate": 1.9051856952367247e-05, "loss": 0.4807, "step": 13264 }, { "epoch": 0.28133019448155927, "grad_norm": 0.3522084951400757, "learning_rate": 1.9051715206384057e-05, "loss": 0.5074, "step": 13265 }, { "epoch": 0.2813514029394923, "grad_norm": 0.37145012617111206, "learning_rate": 1.9051573450333623e-05, "loss": 0.4975, "step": 13266 }, { "epoch": 0.2813726113974253, "grad_norm": 0.3442409336566925, "learning_rate": 1.905143168421611e-05, "loss": 0.4388, "step": 13267 }, { "epoch": 0.28139381985535833, "grad_norm": 0.33390650153160095, "learning_rate": 1.9051289908031678e-05, "loss": 0.5365, "step": 13268 }, { "epoch": 0.28141502831329135, "grad_norm": 0.5741122961044312, "learning_rate": 1.9051148121780477e-05, "loss": 0.5225, "step": 13269 }, { "epoch": 0.28143623677122437, "grad_norm": 0.31731361150741577, "learning_rate": 1.9051006325462667e-05, "loss": 0.5552, "step": 13270 }, { "epoch": 0.2814574452291574, "grad_norm": 0.3504418730735779, "learning_rate": 1.9050864519078408e-05, "loss": 0.4427, "step": 13271 }, { "epoch": 0.2814786536870904, "grad_norm": 0.377902626991272, "learning_rate": 1.9050722702627854e-05, "loss": 0.5023, "step": 13272 }, { "epoch": 0.2814998621450234, "grad_norm": 0.35451188683509827, "learning_rate": 1.9050580876111167e-05, "loss": 0.6278, "step": 13273 }, { "epoch": 0.28152107060295645, "grad_norm": 0.34501492977142334, "learning_rate": 1.90504390395285e-05, "loss": 0.5002, "step": 13274 }, { "epoch": 0.28154227906088947, "grad_norm": 0.3637597858905792, "learning_rate": 1.9050297192880015e-05, "loss": 0.5331, "step": 13275 }, { "epoch": 0.2815634875188225, "grad_norm": 0.6150937676429749, "learning_rate": 1.905015533616587e-05, "loss": 0.531, "step": 13276 }, { "epoch": 0.2815846959767555, "grad_norm": 0.4602476954460144, "learning_rate": 1.905001346938622e-05, "loss": 0.5702, "step": 13277 }, { "epoch": 0.2816059044346886, "grad_norm": 0.3356078565120697, "learning_rate": 1.904987159254122e-05, "loss": 0.4814, "step": 13278 }, { "epoch": 0.2816271128926216, "grad_norm": 0.3434356451034546, "learning_rate": 1.9049729705631033e-05, "loss": 0.5278, "step": 13279 }, { "epoch": 0.2816483213505546, "grad_norm": 0.35162127017974854, "learning_rate": 1.9049587808655818e-05, "loss": 0.479, "step": 13280 }, { "epoch": 0.28166952980848764, "grad_norm": 0.346323162317276, "learning_rate": 1.9049445901615727e-05, "loss": 0.4866, "step": 13281 }, { "epoch": 0.28169073826642066, "grad_norm": 0.40824204683303833, "learning_rate": 1.9049303984510923e-05, "loss": 0.5262, "step": 13282 }, { "epoch": 0.2817119467243537, "grad_norm": 0.3784148395061493, "learning_rate": 1.9049162057341562e-05, "loss": 0.5341, "step": 13283 }, { "epoch": 0.2817331551822867, "grad_norm": 0.3299981653690338, "learning_rate": 1.90490201201078e-05, "loss": 0.5, "step": 13284 }, { "epoch": 0.2817543636402197, "grad_norm": 0.3619365692138672, "learning_rate": 1.9048878172809795e-05, "loss": 0.5464, "step": 13285 }, { "epoch": 0.28177557209815274, "grad_norm": 0.3346879482269287, "learning_rate": 1.9048736215447707e-05, "loss": 0.4773, "step": 13286 }, { "epoch": 0.28179678055608576, "grad_norm": 0.34112685918807983, "learning_rate": 1.9048594248021695e-05, "loss": 0.483, "step": 13287 }, { "epoch": 0.2818179890140188, "grad_norm": 0.34826967120170593, "learning_rate": 1.9048452270531913e-05, "loss": 0.5258, "step": 13288 }, { "epoch": 0.2818391974719518, "grad_norm": 0.31155577301979065, "learning_rate": 1.9048310282978525e-05, "loss": 0.5123, "step": 13289 }, { "epoch": 0.2818604059298848, "grad_norm": 0.32473284006118774, "learning_rate": 1.904816828536168e-05, "loss": 0.4852, "step": 13290 }, { "epoch": 0.28188161438781784, "grad_norm": 0.3187997043132782, "learning_rate": 1.9048026277681543e-05, "loss": 0.4519, "step": 13291 }, { "epoch": 0.2819028228457509, "grad_norm": 0.3346872329711914, "learning_rate": 1.904788425993827e-05, "loss": 0.4482, "step": 13292 }, { "epoch": 0.28192403130368393, "grad_norm": 0.3995912969112396, "learning_rate": 1.904774223213202e-05, "loss": 0.5469, "step": 13293 }, { "epoch": 0.28194523976161695, "grad_norm": 0.34860628843307495, "learning_rate": 1.9047600194262948e-05, "loss": 0.4834, "step": 13294 }, { "epoch": 0.28196644821954997, "grad_norm": 0.3462681770324707, "learning_rate": 1.904745814633121e-05, "loss": 0.5723, "step": 13295 }, { "epoch": 0.281987656677483, "grad_norm": 0.43331655859947205, "learning_rate": 1.9047316088336975e-05, "loss": 0.5591, "step": 13296 }, { "epoch": 0.282008865135416, "grad_norm": 0.38905975222587585, "learning_rate": 1.904717402028039e-05, "loss": 0.591, "step": 13297 }, { "epoch": 0.28203007359334903, "grad_norm": 0.42186546325683594, "learning_rate": 1.9047031942161616e-05, "loss": 0.5977, "step": 13298 }, { "epoch": 0.28205128205128205, "grad_norm": 0.36514630913734436, "learning_rate": 1.904688985398081e-05, "loss": 0.5271, "step": 13299 }, { "epoch": 0.28207249050921507, "grad_norm": 0.3167559504508972, "learning_rate": 1.9046747755738137e-05, "loss": 0.4794, "step": 13300 }, { "epoch": 0.2820936989671481, "grad_norm": 0.3449501097202301, "learning_rate": 1.904660564743375e-05, "loss": 0.5024, "step": 13301 }, { "epoch": 0.2821149074250811, "grad_norm": 0.4131101369857788, "learning_rate": 1.90464635290678e-05, "loss": 0.5311, "step": 13302 }, { "epoch": 0.2821361158830141, "grad_norm": 0.35069525241851807, "learning_rate": 1.9046321400640457e-05, "loss": 0.5259, "step": 13303 }, { "epoch": 0.28215732434094715, "grad_norm": 0.31877994537353516, "learning_rate": 1.9046179262151872e-05, "loss": 0.5675, "step": 13304 }, { "epoch": 0.28217853279888017, "grad_norm": 0.35932424664497375, "learning_rate": 1.9046037113602204e-05, "loss": 0.5345, "step": 13305 }, { "epoch": 0.28219974125681324, "grad_norm": 0.393825501203537, "learning_rate": 1.9045894954991617e-05, "loss": 0.5136, "step": 13306 }, { "epoch": 0.28222094971474626, "grad_norm": 0.39175140857696533, "learning_rate": 1.904575278632026e-05, "loss": 0.4888, "step": 13307 }, { "epoch": 0.2822421581726793, "grad_norm": 0.31706124544143677, "learning_rate": 1.9045610607588293e-05, "loss": 0.547, "step": 13308 }, { "epoch": 0.2822633666306123, "grad_norm": 0.35699331760406494, "learning_rate": 1.9045468418795883e-05, "loss": 0.4805, "step": 13309 }, { "epoch": 0.2822845750885453, "grad_norm": 0.3770524561405182, "learning_rate": 1.9045326219943177e-05, "loss": 0.4781, "step": 13310 }, { "epoch": 0.28230578354647834, "grad_norm": 0.358381986618042, "learning_rate": 1.904518401103034e-05, "loss": 0.482, "step": 13311 }, { "epoch": 0.28232699200441136, "grad_norm": 0.4271797835826874, "learning_rate": 1.9045041792057524e-05, "loss": 0.5647, "step": 13312 }, { "epoch": 0.2823482004623444, "grad_norm": 0.37331852316856384, "learning_rate": 1.9044899563024896e-05, "loss": 0.5324, "step": 13313 }, { "epoch": 0.2823694089202774, "grad_norm": 0.44582605361938477, "learning_rate": 1.9044757323932606e-05, "loss": 0.4929, "step": 13314 }, { "epoch": 0.2823906173782104, "grad_norm": 0.32096943259239197, "learning_rate": 1.9044615074780817e-05, "loss": 0.4862, "step": 13315 }, { "epoch": 0.28241182583614344, "grad_norm": 0.34954097867012024, "learning_rate": 1.9044472815569685e-05, "loss": 0.5167, "step": 13316 }, { "epoch": 0.28243303429407646, "grad_norm": 0.3384491801261902, "learning_rate": 1.904433054629937e-05, "loss": 0.5424, "step": 13317 }, { "epoch": 0.2824542427520095, "grad_norm": 0.4444005489349365, "learning_rate": 1.9044188266970026e-05, "loss": 0.5329, "step": 13318 }, { "epoch": 0.28247545120994255, "grad_norm": 0.3902844190597534, "learning_rate": 1.9044045977581818e-05, "loss": 0.5679, "step": 13319 }, { "epoch": 0.28249665966787557, "grad_norm": 0.2970307767391205, "learning_rate": 1.90439036781349e-05, "loss": 0.4976, "step": 13320 }, { "epoch": 0.2825178681258086, "grad_norm": 0.33576270937919617, "learning_rate": 1.904376136862943e-05, "loss": 0.5997, "step": 13321 }, { "epoch": 0.2825390765837416, "grad_norm": 0.3483298122882843, "learning_rate": 1.9043619049065566e-05, "loss": 0.6516, "step": 13322 }, { "epoch": 0.28256028504167463, "grad_norm": 0.3505721986293793, "learning_rate": 1.904347671944347e-05, "loss": 0.5692, "step": 13323 }, { "epoch": 0.28258149349960765, "grad_norm": 0.3801012933254242, "learning_rate": 1.90433343797633e-05, "loss": 0.5614, "step": 13324 }, { "epoch": 0.28260270195754067, "grad_norm": 0.3141803443431854, "learning_rate": 1.904319203002521e-05, "loss": 0.5699, "step": 13325 }, { "epoch": 0.2826239104154737, "grad_norm": 0.3089512288570404, "learning_rate": 1.9043049670229356e-05, "loss": 0.4604, "step": 13326 }, { "epoch": 0.2826451188734067, "grad_norm": 0.325048565864563, "learning_rate": 1.9042907300375907e-05, "loss": 0.4955, "step": 13327 }, { "epoch": 0.28266632733133973, "grad_norm": 6.561715126037598, "learning_rate": 1.904276492046501e-05, "loss": 0.4796, "step": 13328 }, { "epoch": 0.28268753578927275, "grad_norm": 0.347807914018631, "learning_rate": 1.904262253049683e-05, "loss": 0.5189, "step": 13329 }, { "epoch": 0.28270874424720577, "grad_norm": 0.3328729271888733, "learning_rate": 1.9042480130471522e-05, "loss": 0.4392, "step": 13330 }, { "epoch": 0.2827299527051388, "grad_norm": 0.29740023612976074, "learning_rate": 1.904233772038925e-05, "loss": 0.4517, "step": 13331 }, { "epoch": 0.2827511611630718, "grad_norm": 0.34180304408073425, "learning_rate": 1.904219530025017e-05, "loss": 0.5232, "step": 13332 }, { "epoch": 0.2827723696210049, "grad_norm": 0.3300210237503052, "learning_rate": 1.9042052870054433e-05, "loss": 0.4974, "step": 13333 }, { "epoch": 0.2827935780789379, "grad_norm": 0.36357465386390686, "learning_rate": 1.9041910429802206e-05, "loss": 0.5308, "step": 13334 }, { "epoch": 0.2828147865368709, "grad_norm": 0.321704626083374, "learning_rate": 1.9041767979493644e-05, "loss": 0.5024, "step": 13335 }, { "epoch": 0.28283599499480394, "grad_norm": 0.3810541033744812, "learning_rate": 1.9041625519128905e-05, "loss": 0.4287, "step": 13336 }, { "epoch": 0.28285720345273696, "grad_norm": 0.3383617401123047, "learning_rate": 1.9041483048708153e-05, "loss": 0.5423, "step": 13337 }, { "epoch": 0.28287841191067, "grad_norm": 0.35063618421554565, "learning_rate": 1.9041340568231537e-05, "loss": 0.5296, "step": 13338 }, { "epoch": 0.282899620368603, "grad_norm": 0.36941370368003845, "learning_rate": 1.9041198077699224e-05, "loss": 0.5131, "step": 13339 }, { "epoch": 0.282920828826536, "grad_norm": 0.3050394058227539, "learning_rate": 1.904105557711137e-05, "loss": 0.4621, "step": 13340 }, { "epoch": 0.28294203728446904, "grad_norm": 0.31592032313346863, "learning_rate": 1.904091306646813e-05, "loss": 0.4692, "step": 13341 }, { "epoch": 0.28296324574240206, "grad_norm": 0.3300603926181793, "learning_rate": 1.9040770545769668e-05, "loss": 0.5313, "step": 13342 }, { "epoch": 0.2829844542003351, "grad_norm": 0.3461029827594757, "learning_rate": 1.9040628015016138e-05, "loss": 0.5535, "step": 13343 }, { "epoch": 0.2830056626582681, "grad_norm": 0.4002659022808075, "learning_rate": 1.9040485474207696e-05, "loss": 0.4747, "step": 13344 }, { "epoch": 0.2830268711162011, "grad_norm": 0.35636162757873535, "learning_rate": 1.904034292334451e-05, "loss": 0.5394, "step": 13345 }, { "epoch": 0.28304807957413414, "grad_norm": 0.3947894275188446, "learning_rate": 1.904020036242673e-05, "loss": 0.4928, "step": 13346 }, { "epoch": 0.2830692880320672, "grad_norm": 0.346510648727417, "learning_rate": 1.904005779145452e-05, "loss": 0.525, "step": 13347 }, { "epoch": 0.28309049649000023, "grad_norm": 0.4831452965736389, "learning_rate": 1.9039915210428037e-05, "loss": 0.495, "step": 13348 }, { "epoch": 0.28311170494793325, "grad_norm": 0.45057952404022217, "learning_rate": 1.9039772619347435e-05, "loss": 0.625, "step": 13349 }, { "epoch": 0.28313291340586627, "grad_norm": 0.3411838710308075, "learning_rate": 1.9039630018212877e-05, "loss": 0.5196, "step": 13350 }, { "epoch": 0.2831541218637993, "grad_norm": 1.4997104406356812, "learning_rate": 1.9039487407024526e-05, "loss": 0.5008, "step": 13351 }, { "epoch": 0.2831753303217323, "grad_norm": 0.32275858521461487, "learning_rate": 1.903934478578253e-05, "loss": 0.5306, "step": 13352 }, { "epoch": 0.28319653877966533, "grad_norm": 0.3154575824737549, "learning_rate": 1.9039202154487054e-05, "loss": 0.4792, "step": 13353 }, { "epoch": 0.28321774723759835, "grad_norm": 0.44018638134002686, "learning_rate": 1.903905951313826e-05, "loss": 0.5415, "step": 13354 }, { "epoch": 0.28323895569553137, "grad_norm": 0.6438615918159485, "learning_rate": 1.90389168617363e-05, "loss": 0.4679, "step": 13355 }, { "epoch": 0.2832601641534644, "grad_norm": 0.308116614818573, "learning_rate": 1.9038774200281334e-05, "loss": 0.5089, "step": 13356 }, { "epoch": 0.2832813726113974, "grad_norm": 0.3583400249481201, "learning_rate": 1.9038631528773523e-05, "loss": 0.4501, "step": 13357 }, { "epoch": 0.28330258106933043, "grad_norm": 0.34210172295570374, "learning_rate": 1.9038488847213024e-05, "loss": 0.5143, "step": 13358 }, { "epoch": 0.28332378952726345, "grad_norm": 0.3278217017650604, "learning_rate": 1.90383461556e-05, "loss": 0.5035, "step": 13359 }, { "epoch": 0.2833449979851965, "grad_norm": 0.3930762708187103, "learning_rate": 1.9038203453934597e-05, "loss": 0.435, "step": 13360 }, { "epoch": 0.28336620644312954, "grad_norm": 0.33674246072769165, "learning_rate": 1.903806074221699e-05, "loss": 0.4652, "step": 13361 }, { "epoch": 0.28338741490106256, "grad_norm": 0.33572083711624146, "learning_rate": 1.903791802044733e-05, "loss": 0.4547, "step": 13362 }, { "epoch": 0.2834086233589956, "grad_norm": 0.35344505310058594, "learning_rate": 1.9037775288625773e-05, "loss": 0.5428, "step": 13363 }, { "epoch": 0.2834298318169286, "grad_norm": 0.33969128131866455, "learning_rate": 1.9037632546752483e-05, "loss": 0.5321, "step": 13364 }, { "epoch": 0.2834510402748616, "grad_norm": 0.3565985858440399, "learning_rate": 1.903748979482762e-05, "loss": 0.5136, "step": 13365 }, { "epoch": 0.28347224873279464, "grad_norm": 0.29103201627731323, "learning_rate": 1.9037347032851333e-05, "loss": 0.4351, "step": 13366 }, { "epoch": 0.28349345719072766, "grad_norm": 0.37001869082450867, "learning_rate": 1.9037204260823788e-05, "loss": 0.5787, "step": 13367 }, { "epoch": 0.2835146656486607, "grad_norm": 0.33976125717163086, "learning_rate": 1.9037061478745144e-05, "loss": 0.4551, "step": 13368 }, { "epoch": 0.2835358741065937, "grad_norm": 0.35441869497299194, "learning_rate": 1.903691868661556e-05, "loss": 0.5282, "step": 13369 }, { "epoch": 0.2835570825645267, "grad_norm": 0.35220712423324585, "learning_rate": 1.9036775884435193e-05, "loss": 0.5051, "step": 13370 }, { "epoch": 0.28357829102245974, "grad_norm": 0.6527135372161865, "learning_rate": 1.9036633072204202e-05, "loss": 0.5107, "step": 13371 }, { "epoch": 0.28359949948039276, "grad_norm": 0.3535762429237366, "learning_rate": 1.903649024992275e-05, "loss": 0.4114, "step": 13372 }, { "epoch": 0.2836207079383258, "grad_norm": 0.37547364830970764, "learning_rate": 1.9036347417590985e-05, "loss": 0.5245, "step": 13373 }, { "epoch": 0.28364191639625885, "grad_norm": 0.34849977493286133, "learning_rate": 1.9036204575209077e-05, "loss": 0.5115, "step": 13374 }, { "epoch": 0.2836631248541919, "grad_norm": 0.31007465720176697, "learning_rate": 1.903606172277718e-05, "loss": 0.468, "step": 13375 }, { "epoch": 0.2836843333121249, "grad_norm": 0.30772870779037476, "learning_rate": 1.903591886029545e-05, "loss": 0.4537, "step": 13376 }, { "epoch": 0.2837055417700579, "grad_norm": 0.3574773967266083, "learning_rate": 1.9035775987764056e-05, "loss": 0.4839, "step": 13377 }, { "epoch": 0.28372675022799093, "grad_norm": 0.39290982484817505, "learning_rate": 1.903563310518315e-05, "loss": 0.5746, "step": 13378 }, { "epoch": 0.28374795868592395, "grad_norm": 0.32755303382873535, "learning_rate": 1.9035490212552886e-05, "loss": 0.5377, "step": 13379 }, { "epoch": 0.28376916714385697, "grad_norm": 0.3596992790699005, "learning_rate": 1.9035347309873433e-05, "loss": 0.4847, "step": 13380 }, { "epoch": 0.28379037560179, "grad_norm": 0.34837278723716736, "learning_rate": 1.903520439714494e-05, "loss": 0.5469, "step": 13381 }, { "epoch": 0.283811584059723, "grad_norm": 0.355210542678833, "learning_rate": 1.9035061474367578e-05, "loss": 0.5556, "step": 13382 }, { "epoch": 0.28383279251765603, "grad_norm": 0.3340907692909241, "learning_rate": 1.9034918541541494e-05, "loss": 0.5062, "step": 13383 }, { "epoch": 0.28385400097558905, "grad_norm": 0.3546532094478607, "learning_rate": 1.9034775598666856e-05, "loss": 0.6074, "step": 13384 }, { "epoch": 0.28387520943352207, "grad_norm": 0.3699988126754761, "learning_rate": 1.9034632645743817e-05, "loss": 0.595, "step": 13385 }, { "epoch": 0.2838964178914551, "grad_norm": 0.4201286733150482, "learning_rate": 1.9034489682772536e-05, "loss": 0.5352, "step": 13386 }, { "epoch": 0.28391762634938816, "grad_norm": 0.31850433349609375, "learning_rate": 1.9034346709753177e-05, "loss": 0.4641, "step": 13387 }, { "epoch": 0.2839388348073212, "grad_norm": 0.352258563041687, "learning_rate": 1.9034203726685896e-05, "loss": 0.4518, "step": 13388 }, { "epoch": 0.2839600432652542, "grad_norm": 0.35096293687820435, "learning_rate": 1.903406073357085e-05, "loss": 0.548, "step": 13389 }, { "epoch": 0.2839812517231872, "grad_norm": 0.36455363035202026, "learning_rate": 1.90339177304082e-05, "loss": 0.6233, "step": 13390 }, { "epoch": 0.28400246018112024, "grad_norm": 0.3662043809890747, "learning_rate": 1.9033774717198107e-05, "loss": 0.475, "step": 13391 }, { "epoch": 0.28402366863905326, "grad_norm": 0.3212825059890747, "learning_rate": 1.903363169394073e-05, "loss": 0.4836, "step": 13392 }, { "epoch": 0.2840448770969863, "grad_norm": 0.5201354622840881, "learning_rate": 1.9033488660636222e-05, "loss": 0.5993, "step": 13393 }, { "epoch": 0.2840660855549193, "grad_norm": 0.357256144285202, "learning_rate": 1.903334561728475e-05, "loss": 0.4654, "step": 13394 }, { "epoch": 0.2840872940128523, "grad_norm": 0.3806457221508026, "learning_rate": 1.9033202563886467e-05, "loss": 0.467, "step": 13395 }, { "epoch": 0.28410850247078534, "grad_norm": 0.32949596643447876, "learning_rate": 1.9033059500441536e-05, "loss": 0.574, "step": 13396 }, { "epoch": 0.28412971092871836, "grad_norm": 0.35683122277259827, "learning_rate": 1.9032916426950114e-05, "loss": 0.5589, "step": 13397 }, { "epoch": 0.2841509193866514, "grad_norm": 0.3559078276157379, "learning_rate": 1.903277334341236e-05, "loss": 0.5637, "step": 13398 }, { "epoch": 0.2841721278445844, "grad_norm": 0.3532775044441223, "learning_rate": 1.9032630249828434e-05, "loss": 0.5228, "step": 13399 }, { "epoch": 0.2841933363025174, "grad_norm": 0.3431239426136017, "learning_rate": 1.90324871461985e-05, "loss": 0.4948, "step": 13400 }, { "epoch": 0.2842145447604505, "grad_norm": 0.32240086793899536, "learning_rate": 1.903234403252271e-05, "loss": 0.4944, "step": 13401 }, { "epoch": 0.2842357532183835, "grad_norm": 0.33073920011520386, "learning_rate": 1.903220090880122e-05, "loss": 0.4919, "step": 13402 }, { "epoch": 0.28425696167631653, "grad_norm": 0.31162354350090027, "learning_rate": 1.9032057775034197e-05, "loss": 0.5181, "step": 13403 }, { "epoch": 0.28427817013424955, "grad_norm": 0.31634557247161865, "learning_rate": 1.9031914631221798e-05, "loss": 0.56, "step": 13404 }, { "epoch": 0.28429937859218257, "grad_norm": 0.32610198855400085, "learning_rate": 1.9031771477364185e-05, "loss": 0.4892, "step": 13405 }, { "epoch": 0.2843205870501156, "grad_norm": 0.37798503041267395, "learning_rate": 1.9031628313461512e-05, "loss": 0.5181, "step": 13406 }, { "epoch": 0.2843417955080486, "grad_norm": 0.35129082202911377, "learning_rate": 1.903148513951394e-05, "loss": 0.5229, "step": 13407 }, { "epoch": 0.28436300396598163, "grad_norm": 0.36009925603866577, "learning_rate": 1.9031341955521633e-05, "loss": 0.5316, "step": 13408 }, { "epoch": 0.28438421242391465, "grad_norm": 0.3458985388278961, "learning_rate": 1.903119876148474e-05, "loss": 0.5354, "step": 13409 }, { "epoch": 0.28440542088184767, "grad_norm": 0.3347281515598297, "learning_rate": 1.903105555740343e-05, "loss": 0.5286, "step": 13410 }, { "epoch": 0.2844266293397807, "grad_norm": 0.3237506151199341, "learning_rate": 1.9030912343277856e-05, "loss": 0.5305, "step": 13411 }, { "epoch": 0.2844478377977137, "grad_norm": 0.6315252184867859, "learning_rate": 1.903076911910818e-05, "loss": 0.4356, "step": 13412 }, { "epoch": 0.28446904625564673, "grad_norm": 0.367435097694397, "learning_rate": 1.9030625884894563e-05, "loss": 0.5044, "step": 13413 }, { "epoch": 0.28449025471357975, "grad_norm": 0.36163246631622314, "learning_rate": 1.9030482640637162e-05, "loss": 0.5238, "step": 13414 }, { "epoch": 0.2845114631715128, "grad_norm": 0.33845046162605286, "learning_rate": 1.9030339386336135e-05, "loss": 0.4373, "step": 13415 }, { "epoch": 0.28453267162944584, "grad_norm": 0.3176705837249756, "learning_rate": 1.9030196121991645e-05, "loss": 0.494, "step": 13416 }, { "epoch": 0.28455388008737886, "grad_norm": 0.4345088601112366, "learning_rate": 1.9030052847603845e-05, "loss": 0.4313, "step": 13417 }, { "epoch": 0.2845750885453119, "grad_norm": 0.38941749930381775, "learning_rate": 1.9029909563172905e-05, "loss": 0.5847, "step": 13418 }, { "epoch": 0.2845962970032449, "grad_norm": 0.3442925810813904, "learning_rate": 1.9029766268698972e-05, "loss": 0.553, "step": 13419 }, { "epoch": 0.2846175054611779, "grad_norm": 0.5330249071121216, "learning_rate": 1.9029622964182217e-05, "loss": 0.5404, "step": 13420 }, { "epoch": 0.28463871391911094, "grad_norm": 0.47589653730392456, "learning_rate": 1.902947964962279e-05, "loss": 0.5357, "step": 13421 }, { "epoch": 0.28465992237704396, "grad_norm": 0.32541486620903015, "learning_rate": 1.9029336325020856e-05, "loss": 0.4636, "step": 13422 }, { "epoch": 0.284681130834977, "grad_norm": 0.3177569806575775, "learning_rate": 1.9029192990376572e-05, "loss": 0.5016, "step": 13423 }, { "epoch": 0.28470233929291, "grad_norm": 0.33790266513824463, "learning_rate": 1.9029049645690096e-05, "loss": 0.5008, "step": 13424 }, { "epoch": 0.284723547750843, "grad_norm": 0.2995246648788452, "learning_rate": 1.902890629096159e-05, "loss": 0.4629, "step": 13425 }, { "epoch": 0.28474475620877604, "grad_norm": 0.3353801965713501, "learning_rate": 1.9028762926191215e-05, "loss": 0.5185, "step": 13426 }, { "epoch": 0.28476596466670906, "grad_norm": 0.38816210627555847, "learning_rate": 1.9028619551379127e-05, "loss": 0.524, "step": 13427 }, { "epoch": 0.28478717312464213, "grad_norm": 0.33823898434638977, "learning_rate": 1.902847616652549e-05, "loss": 0.601, "step": 13428 }, { "epoch": 0.28480838158257515, "grad_norm": 0.3170730173587799, "learning_rate": 1.9028332771630457e-05, "loss": 0.4469, "step": 13429 }, { "epoch": 0.2848295900405082, "grad_norm": 0.3281436264514923, "learning_rate": 1.902818936669419e-05, "loss": 0.4931, "step": 13430 }, { "epoch": 0.2848507984984412, "grad_norm": 0.36787447333335876, "learning_rate": 1.9028045951716853e-05, "loss": 0.4905, "step": 13431 }, { "epoch": 0.2848720069563742, "grad_norm": 0.3125879764556885, "learning_rate": 1.90279025266986e-05, "loss": 0.4871, "step": 13432 }, { "epoch": 0.28489321541430723, "grad_norm": 0.3390197455883026, "learning_rate": 1.902775909163959e-05, "loss": 0.5727, "step": 13433 }, { "epoch": 0.28491442387224025, "grad_norm": 0.3702661991119385, "learning_rate": 1.9027615646539983e-05, "loss": 0.5268, "step": 13434 }, { "epoch": 0.28493563233017327, "grad_norm": 0.36412063241004944, "learning_rate": 1.9027472191399946e-05, "loss": 0.5585, "step": 13435 }, { "epoch": 0.2849568407881063, "grad_norm": 0.3391854465007782, "learning_rate": 1.9027328726219628e-05, "loss": 0.5607, "step": 13436 }, { "epoch": 0.2849780492460393, "grad_norm": 0.3097691833972931, "learning_rate": 1.9027185250999195e-05, "loss": 0.4679, "step": 13437 }, { "epoch": 0.28499925770397233, "grad_norm": 0.3349542021751404, "learning_rate": 1.9027041765738806e-05, "loss": 0.4753, "step": 13438 }, { "epoch": 0.28502046616190535, "grad_norm": 0.3386603593826294, "learning_rate": 1.902689827043862e-05, "loss": 0.5426, "step": 13439 }, { "epoch": 0.28504167461983837, "grad_norm": 0.6938004493713379, "learning_rate": 1.9026754765098796e-05, "loss": 0.5634, "step": 13440 }, { "epoch": 0.2850628830777714, "grad_norm": 0.7450199127197266, "learning_rate": 1.902661124971949e-05, "loss": 0.6066, "step": 13441 }, { "epoch": 0.28508409153570446, "grad_norm": 0.4779932498931885, "learning_rate": 1.902646772430087e-05, "loss": 0.5503, "step": 13442 }, { "epoch": 0.2851052999936375, "grad_norm": 0.3207662105560303, "learning_rate": 1.9026324188843092e-05, "loss": 0.4549, "step": 13443 }, { "epoch": 0.2851265084515705, "grad_norm": 0.3977868854999542, "learning_rate": 1.902618064334631e-05, "loss": 0.5712, "step": 13444 }, { "epoch": 0.2851477169095035, "grad_norm": 0.33601704239845276, "learning_rate": 1.902603708781069e-05, "loss": 0.4818, "step": 13445 }, { "epoch": 0.28516892536743654, "grad_norm": 0.41865822672843933, "learning_rate": 1.9025893522236393e-05, "loss": 0.5401, "step": 13446 }, { "epoch": 0.28519013382536956, "grad_norm": 0.34480050206184387, "learning_rate": 1.902574994662357e-05, "loss": 0.5061, "step": 13447 }, { "epoch": 0.2852113422833026, "grad_norm": 0.34856095910072327, "learning_rate": 1.902560636097239e-05, "loss": 0.5145, "step": 13448 }, { "epoch": 0.2852325507412356, "grad_norm": 0.35641905665397644, "learning_rate": 1.9025462765283008e-05, "loss": 0.5513, "step": 13449 }, { "epoch": 0.2852537591991686, "grad_norm": 0.352850079536438, "learning_rate": 1.9025319159555587e-05, "loss": 0.4953, "step": 13450 }, { "epoch": 0.28527496765710164, "grad_norm": 0.35431137681007385, "learning_rate": 1.902517554379028e-05, "loss": 0.5282, "step": 13451 }, { "epoch": 0.28529617611503466, "grad_norm": 0.36116883158683777, "learning_rate": 1.9025031917987257e-05, "loss": 0.5259, "step": 13452 }, { "epoch": 0.2853173845729677, "grad_norm": 0.4039618968963623, "learning_rate": 1.9024888282146667e-05, "loss": 0.58, "step": 13453 }, { "epoch": 0.2853385930309007, "grad_norm": 0.30830252170562744, "learning_rate": 1.9024744636268674e-05, "loss": 0.4537, "step": 13454 }, { "epoch": 0.2853598014888337, "grad_norm": 0.5599004626274109, "learning_rate": 1.9024600980353444e-05, "loss": 0.528, "step": 13455 }, { "epoch": 0.2853810099467668, "grad_norm": 0.3423091173171997, "learning_rate": 1.9024457314401127e-05, "loss": 0.5662, "step": 13456 }, { "epoch": 0.2854022184046998, "grad_norm": 0.3996117115020752, "learning_rate": 1.902431363841189e-05, "loss": 0.5641, "step": 13457 }, { "epoch": 0.28542342686263283, "grad_norm": 0.3245154619216919, "learning_rate": 1.9024169952385887e-05, "loss": 0.583, "step": 13458 }, { "epoch": 0.28544463532056585, "grad_norm": 0.3291088938713074, "learning_rate": 1.9024026256323277e-05, "loss": 0.4455, "step": 13459 }, { "epoch": 0.2854658437784989, "grad_norm": 0.3126325011253357, "learning_rate": 1.902388255022423e-05, "loss": 0.5031, "step": 13460 }, { "epoch": 0.2854870522364319, "grad_norm": 0.3747280538082123, "learning_rate": 1.9023738834088898e-05, "loss": 0.5049, "step": 13461 }, { "epoch": 0.2855082606943649, "grad_norm": 0.31762391328811646, "learning_rate": 1.9023595107917437e-05, "loss": 0.5463, "step": 13462 }, { "epoch": 0.28552946915229793, "grad_norm": 0.36103886365890503, "learning_rate": 1.902345137171002e-05, "loss": 0.5758, "step": 13463 }, { "epoch": 0.28555067761023095, "grad_norm": 0.3784218728542328, "learning_rate": 1.9023307625466794e-05, "loss": 0.6116, "step": 13464 }, { "epoch": 0.28557188606816397, "grad_norm": 0.33722686767578125, "learning_rate": 1.902316386918792e-05, "loss": 0.522, "step": 13465 }, { "epoch": 0.285593094526097, "grad_norm": 0.3305327594280243, "learning_rate": 1.902302010287357e-05, "loss": 0.5173, "step": 13466 }, { "epoch": 0.28561430298403, "grad_norm": 0.37926942110061646, "learning_rate": 1.902287632652389e-05, "loss": 0.5965, "step": 13467 }, { "epoch": 0.28563551144196303, "grad_norm": 0.3699328601360321, "learning_rate": 1.9022732540139047e-05, "loss": 0.4379, "step": 13468 }, { "epoch": 0.2856567198998961, "grad_norm": 0.38294920325279236, "learning_rate": 1.9022588743719197e-05, "loss": 0.5147, "step": 13469 }, { "epoch": 0.2856779283578291, "grad_norm": 0.3295837640762329, "learning_rate": 1.9022444937264507e-05, "loss": 0.5271, "step": 13470 }, { "epoch": 0.28569913681576214, "grad_norm": 0.31351277232170105, "learning_rate": 1.9022301120775126e-05, "loss": 0.499, "step": 13471 }, { "epoch": 0.28572034527369516, "grad_norm": 0.33710283041000366, "learning_rate": 1.902215729425122e-05, "loss": 0.5105, "step": 13472 }, { "epoch": 0.2857415537316282, "grad_norm": 0.3662495017051697, "learning_rate": 1.9022013457692956e-05, "loss": 0.5325, "step": 13473 }, { "epoch": 0.2857627621895612, "grad_norm": 0.32760024070739746, "learning_rate": 1.9021869611100482e-05, "loss": 0.5536, "step": 13474 }, { "epoch": 0.2857839706474942, "grad_norm": 0.3958735764026642, "learning_rate": 1.9021725754473967e-05, "loss": 0.4817, "step": 13475 }, { "epoch": 0.28580517910542724, "grad_norm": 0.3196694850921631, "learning_rate": 1.902158188781356e-05, "loss": 0.4238, "step": 13476 }, { "epoch": 0.28582638756336026, "grad_norm": 0.3490073084831238, "learning_rate": 1.9021438011119433e-05, "loss": 0.5556, "step": 13477 }, { "epoch": 0.2858475960212933, "grad_norm": 0.38827750086784363, "learning_rate": 1.902129412439174e-05, "loss": 0.5137, "step": 13478 }, { "epoch": 0.2858688044792263, "grad_norm": 0.3489784002304077, "learning_rate": 1.902115022763064e-05, "loss": 0.5232, "step": 13479 }, { "epoch": 0.2858900129371593, "grad_norm": 0.3559396266937256, "learning_rate": 1.90210063208363e-05, "loss": 0.5295, "step": 13480 }, { "epoch": 0.28591122139509234, "grad_norm": 0.3416464626789093, "learning_rate": 1.9020862404008873e-05, "loss": 0.4963, "step": 13481 }, { "epoch": 0.28593242985302536, "grad_norm": 0.3191327750682831, "learning_rate": 1.902071847714852e-05, "loss": 0.5041, "step": 13482 }, { "epoch": 0.28595363831095844, "grad_norm": 0.3380783498287201, "learning_rate": 1.9020574540255402e-05, "loss": 0.4897, "step": 13483 }, { "epoch": 0.28597484676889146, "grad_norm": 0.35143816471099854, "learning_rate": 1.9020430593329683e-05, "loss": 0.4916, "step": 13484 }, { "epoch": 0.2859960552268245, "grad_norm": 0.35590431094169617, "learning_rate": 1.9020286636371516e-05, "loss": 0.496, "step": 13485 }, { "epoch": 0.2860172636847575, "grad_norm": 0.35754629969596863, "learning_rate": 1.9020142669381064e-05, "loss": 0.5122, "step": 13486 }, { "epoch": 0.2860384721426905, "grad_norm": 0.39102840423583984, "learning_rate": 1.901999869235849e-05, "loss": 0.6069, "step": 13487 }, { "epoch": 0.28605968060062353, "grad_norm": 0.3351745009422302, "learning_rate": 1.901985470530395e-05, "loss": 0.5092, "step": 13488 }, { "epoch": 0.28608088905855655, "grad_norm": 0.3465438485145569, "learning_rate": 1.9019710708217607e-05, "loss": 0.5274, "step": 13489 }, { "epoch": 0.2861020975164896, "grad_norm": 0.33900558948516846, "learning_rate": 1.901956670109962e-05, "loss": 0.5049, "step": 13490 }, { "epoch": 0.2861233059744226, "grad_norm": 0.3589865565299988, "learning_rate": 1.901942268395015e-05, "loss": 0.51, "step": 13491 }, { "epoch": 0.2861445144323556, "grad_norm": 0.5246248245239258, "learning_rate": 1.9019278656769353e-05, "loss": 0.5511, "step": 13492 }, { "epoch": 0.28616572289028863, "grad_norm": 0.4128941595554352, "learning_rate": 1.9019134619557396e-05, "loss": 0.5525, "step": 13493 }, { "epoch": 0.28618693134822165, "grad_norm": 0.3616259694099426, "learning_rate": 1.9018990572314434e-05, "loss": 0.5274, "step": 13494 }, { "epoch": 0.28620813980615467, "grad_norm": 0.6976037621498108, "learning_rate": 1.901884651504063e-05, "loss": 0.4725, "step": 13495 }, { "epoch": 0.2862293482640877, "grad_norm": 0.3413083255290985, "learning_rate": 1.9018702447736144e-05, "loss": 0.6136, "step": 13496 }, { "epoch": 0.28625055672202077, "grad_norm": 0.3456784188747406, "learning_rate": 1.9018558370401134e-05, "loss": 0.4599, "step": 13497 }, { "epoch": 0.2862717651799538, "grad_norm": 0.4360005557537079, "learning_rate": 1.9018414283035764e-05, "loss": 0.6057, "step": 13498 }, { "epoch": 0.2862929736378868, "grad_norm": 0.3780141770839691, "learning_rate": 1.9018270185640188e-05, "loss": 0.5199, "step": 13499 }, { "epoch": 0.2863141820958198, "grad_norm": 0.3100002706050873, "learning_rate": 1.9018126078214573e-05, "loss": 0.4876, "step": 13500 }, { "epoch": 0.28633539055375284, "grad_norm": 0.38394761085510254, "learning_rate": 1.9017981960759075e-05, "loss": 0.5416, "step": 13501 }, { "epoch": 0.28635659901168586, "grad_norm": 0.3414878845214844, "learning_rate": 1.9017837833273857e-05, "loss": 0.5532, "step": 13502 }, { "epoch": 0.2863778074696189, "grad_norm": 0.3659514784812927, "learning_rate": 1.901769369575908e-05, "loss": 0.518, "step": 13503 }, { "epoch": 0.2863990159275519, "grad_norm": 0.3539293110370636, "learning_rate": 1.90175495482149e-05, "loss": 0.4923, "step": 13504 }, { "epoch": 0.2864202243854849, "grad_norm": 0.35862255096435547, "learning_rate": 1.901740539064148e-05, "loss": 0.6137, "step": 13505 }, { "epoch": 0.28644143284341794, "grad_norm": 0.4738372564315796, "learning_rate": 1.901726122303898e-05, "loss": 0.485, "step": 13506 }, { "epoch": 0.28646264130135096, "grad_norm": 0.3860822916030884, "learning_rate": 1.9017117045407564e-05, "loss": 0.5018, "step": 13507 }, { "epoch": 0.286483849759284, "grad_norm": 0.3556816875934601, "learning_rate": 1.9016972857747383e-05, "loss": 0.5098, "step": 13508 }, { "epoch": 0.286505058217217, "grad_norm": 0.32208961248397827, "learning_rate": 1.9016828660058607e-05, "loss": 0.5054, "step": 13509 }, { "epoch": 0.2865262666751501, "grad_norm": 0.3376617133617401, "learning_rate": 1.901668445234139e-05, "loss": 0.4696, "step": 13510 }, { "epoch": 0.2865474751330831, "grad_norm": 0.48937004804611206, "learning_rate": 1.9016540234595894e-05, "loss": 0.5196, "step": 13511 }, { "epoch": 0.2865686835910161, "grad_norm": 0.3194374144077301, "learning_rate": 1.9016396006822283e-05, "loss": 0.4223, "step": 13512 }, { "epoch": 0.28658989204894914, "grad_norm": 0.37018871307373047, "learning_rate": 1.9016251769020714e-05, "loss": 0.5509, "step": 13513 }, { "epoch": 0.28661110050688215, "grad_norm": 0.3136070668697357, "learning_rate": 1.9016107521191348e-05, "loss": 0.4589, "step": 13514 }, { "epoch": 0.2866323089648152, "grad_norm": 0.32849621772766113, "learning_rate": 1.901596326333435e-05, "loss": 0.4547, "step": 13515 }, { "epoch": 0.2866535174227482, "grad_norm": 0.3605147898197174, "learning_rate": 1.9015818995449868e-05, "loss": 0.6114, "step": 13516 }, { "epoch": 0.2866747258806812, "grad_norm": 0.38338249921798706, "learning_rate": 1.9015674717538073e-05, "loss": 0.5855, "step": 13517 }, { "epoch": 0.28669593433861423, "grad_norm": 0.33900022506713867, "learning_rate": 1.9015530429599126e-05, "loss": 0.4844, "step": 13518 }, { "epoch": 0.28671714279654725, "grad_norm": 0.3291042447090149, "learning_rate": 1.901538613163318e-05, "loss": 0.5012, "step": 13519 }, { "epoch": 0.2867383512544803, "grad_norm": 0.39939776062965393, "learning_rate": 1.9015241823640404e-05, "loss": 0.4935, "step": 13520 }, { "epoch": 0.2867595597124133, "grad_norm": 0.49876388907432556, "learning_rate": 1.901509750562095e-05, "loss": 0.6103, "step": 13521 }, { "epoch": 0.2867807681703463, "grad_norm": 0.7439096570014954, "learning_rate": 1.9014953177574987e-05, "loss": 0.5614, "step": 13522 }, { "epoch": 0.28680197662827933, "grad_norm": 0.3372991681098938, "learning_rate": 1.9014808839502667e-05, "loss": 0.5412, "step": 13523 }, { "epoch": 0.2868231850862124, "grad_norm": 0.3348589241504669, "learning_rate": 1.901466449140416e-05, "loss": 0.4797, "step": 13524 }, { "epoch": 0.2868443935441454, "grad_norm": 0.34254708886146545, "learning_rate": 1.9014520133279618e-05, "loss": 0.5674, "step": 13525 }, { "epoch": 0.28686560200207845, "grad_norm": 0.33199045062065125, "learning_rate": 1.901437576512921e-05, "loss": 0.5688, "step": 13526 }, { "epoch": 0.28688681046001147, "grad_norm": 0.34154829382896423, "learning_rate": 1.9014231386953083e-05, "loss": 0.6015, "step": 13527 }, { "epoch": 0.2869080189179445, "grad_norm": 0.3423226773738861, "learning_rate": 1.901408699875141e-05, "loss": 0.554, "step": 13528 }, { "epoch": 0.2869292273758775, "grad_norm": 0.36718565225601196, "learning_rate": 1.901394260052435e-05, "loss": 0.6245, "step": 13529 }, { "epoch": 0.2869504358338105, "grad_norm": 0.34658554196357727, "learning_rate": 1.9013798192272063e-05, "loss": 0.5739, "step": 13530 }, { "epoch": 0.28697164429174354, "grad_norm": 0.31947773694992065, "learning_rate": 1.90136537739947e-05, "loss": 0.4758, "step": 13531 }, { "epoch": 0.28699285274967656, "grad_norm": 0.34207695722579956, "learning_rate": 1.9013509345692436e-05, "loss": 0.6285, "step": 13532 }, { "epoch": 0.2870140612076096, "grad_norm": 0.3579007089138031, "learning_rate": 1.9013364907365425e-05, "loss": 0.5062, "step": 13533 }, { "epoch": 0.2870352696655426, "grad_norm": 0.35250893235206604, "learning_rate": 1.9013220459013826e-05, "loss": 0.5071, "step": 13534 }, { "epoch": 0.2870564781234756, "grad_norm": 0.2988603115081787, "learning_rate": 1.9013076000637803e-05, "loss": 0.4696, "step": 13535 }, { "epoch": 0.28707768658140864, "grad_norm": 0.41979214549064636, "learning_rate": 1.9012931532237513e-05, "loss": 0.5523, "step": 13536 }, { "epoch": 0.2870988950393417, "grad_norm": 0.32973313331604004, "learning_rate": 1.901278705381312e-05, "loss": 0.5523, "step": 13537 }, { "epoch": 0.28712010349727474, "grad_norm": 0.35145553946495056, "learning_rate": 1.901264256536478e-05, "loss": 0.4772, "step": 13538 }, { "epoch": 0.28714131195520776, "grad_norm": 0.3386284410953522, "learning_rate": 1.9012498066892664e-05, "loss": 0.5705, "step": 13539 }, { "epoch": 0.2871625204131408, "grad_norm": 0.3539902865886688, "learning_rate": 1.9012353558396922e-05, "loss": 0.5676, "step": 13540 }, { "epoch": 0.2871837288710738, "grad_norm": 0.3485250771045685, "learning_rate": 1.901220903987772e-05, "loss": 0.4206, "step": 13541 }, { "epoch": 0.2872049373290068, "grad_norm": 0.3021676540374756, "learning_rate": 1.901206451133522e-05, "loss": 0.4651, "step": 13542 }, { "epoch": 0.28722614578693983, "grad_norm": 0.3652017414569855, "learning_rate": 1.9011919972769576e-05, "loss": 0.555, "step": 13543 }, { "epoch": 0.28724735424487285, "grad_norm": 0.30711570382118225, "learning_rate": 1.9011775424180952e-05, "loss": 0.4627, "step": 13544 }, { "epoch": 0.2872685627028059, "grad_norm": 0.45949843525886536, "learning_rate": 1.901163086556951e-05, "loss": 0.5534, "step": 13545 }, { "epoch": 0.2872897711607389, "grad_norm": 0.33523306250572205, "learning_rate": 1.9011486296935413e-05, "loss": 0.4819, "step": 13546 }, { "epoch": 0.2873109796186719, "grad_norm": 0.3511676788330078, "learning_rate": 1.9011341718278817e-05, "loss": 0.5121, "step": 13547 }, { "epoch": 0.28733218807660493, "grad_norm": 0.3243442475795746, "learning_rate": 1.9011197129599887e-05, "loss": 0.5333, "step": 13548 }, { "epoch": 0.28735339653453795, "grad_norm": 0.32586270570755005, "learning_rate": 1.901105253089878e-05, "loss": 0.5041, "step": 13549 }, { "epoch": 0.28737460499247097, "grad_norm": 0.33741289377212524, "learning_rate": 1.901090792217566e-05, "loss": 0.5008, "step": 13550 }, { "epoch": 0.28739581345040405, "grad_norm": 0.31589600443840027, "learning_rate": 1.9010763303430688e-05, "loss": 0.4881, "step": 13551 }, { "epoch": 0.28741702190833707, "grad_norm": 0.320245623588562, "learning_rate": 1.9010618674664022e-05, "loss": 0.5137, "step": 13552 }, { "epoch": 0.2874382303662701, "grad_norm": 0.30036699771881104, "learning_rate": 1.9010474035875822e-05, "loss": 0.4218, "step": 13553 }, { "epoch": 0.2874594388242031, "grad_norm": 0.34636926651000977, "learning_rate": 1.9010329387066253e-05, "loss": 0.4805, "step": 13554 }, { "epoch": 0.2874806472821361, "grad_norm": 0.35932084918022156, "learning_rate": 1.901018472823547e-05, "loss": 0.5254, "step": 13555 }, { "epoch": 0.28750185574006915, "grad_norm": 0.3662574887275696, "learning_rate": 1.9010040059383646e-05, "loss": 0.517, "step": 13556 }, { "epoch": 0.28752306419800217, "grad_norm": 0.3512635827064514, "learning_rate": 1.900989538051093e-05, "loss": 0.5652, "step": 13557 }, { "epoch": 0.2875442726559352, "grad_norm": 0.3890801668167114, "learning_rate": 1.9009750691617484e-05, "loss": 0.4871, "step": 13558 }, { "epoch": 0.2875654811138682, "grad_norm": 0.3707369267940521, "learning_rate": 1.9009605992703473e-05, "loss": 0.4898, "step": 13559 }, { "epoch": 0.2875866895718012, "grad_norm": 1.9581091403961182, "learning_rate": 1.9009461283769056e-05, "loss": 0.465, "step": 13560 }, { "epoch": 0.28760789802973424, "grad_norm": 0.43041473627090454, "learning_rate": 1.9009316564814398e-05, "loss": 0.5198, "step": 13561 }, { "epoch": 0.28762910648766726, "grad_norm": 0.34201541543006897, "learning_rate": 1.9009171835839652e-05, "loss": 0.5274, "step": 13562 }, { "epoch": 0.2876503149456003, "grad_norm": 0.3463059365749359, "learning_rate": 1.9009027096844984e-05, "loss": 0.4647, "step": 13563 }, { "epoch": 0.2876715234035333, "grad_norm": 0.31889277696609497, "learning_rate": 1.9008882347830557e-05, "loss": 0.4917, "step": 13564 }, { "epoch": 0.2876927318614664, "grad_norm": 0.3119072914123535, "learning_rate": 1.900873758879653e-05, "loss": 0.499, "step": 13565 }, { "epoch": 0.2877139403193994, "grad_norm": 0.35683342814445496, "learning_rate": 1.9008592819743058e-05, "loss": 0.5543, "step": 13566 }, { "epoch": 0.2877351487773324, "grad_norm": 0.32615840435028076, "learning_rate": 1.900844804067031e-05, "loss": 0.4623, "step": 13567 }, { "epoch": 0.28775635723526544, "grad_norm": 0.34103620052337646, "learning_rate": 1.9008303251578445e-05, "loss": 0.4904, "step": 13568 }, { "epoch": 0.28777756569319846, "grad_norm": 0.3529915511608124, "learning_rate": 1.9008158452467623e-05, "loss": 0.5723, "step": 13569 }, { "epoch": 0.2877987741511315, "grad_norm": 0.30755314230918884, "learning_rate": 1.9008013643338007e-05, "loss": 0.4679, "step": 13570 }, { "epoch": 0.2878199826090645, "grad_norm": 0.32502320408821106, "learning_rate": 1.9007868824189753e-05, "loss": 0.5804, "step": 13571 }, { "epoch": 0.2878411910669975, "grad_norm": 0.3829200267791748, "learning_rate": 1.9007723995023027e-05, "loss": 0.6037, "step": 13572 }, { "epoch": 0.28786239952493053, "grad_norm": 0.3218367397785187, "learning_rate": 1.900757915583799e-05, "loss": 0.5796, "step": 13573 }, { "epoch": 0.28788360798286355, "grad_norm": 0.3646047115325928, "learning_rate": 1.90074343066348e-05, "loss": 0.5392, "step": 13574 }, { "epoch": 0.2879048164407966, "grad_norm": 0.8951069712638855, "learning_rate": 1.900728944741362e-05, "loss": 0.5583, "step": 13575 }, { "epoch": 0.2879260248987296, "grad_norm": 0.3197020888328552, "learning_rate": 1.9007144578174614e-05, "loss": 0.4913, "step": 13576 }, { "epoch": 0.2879472333566626, "grad_norm": 0.3166508078575134, "learning_rate": 1.9006999698917937e-05, "loss": 0.4454, "step": 13577 }, { "epoch": 0.2879684418145957, "grad_norm": 0.3544842302799225, "learning_rate": 1.9006854809643754e-05, "loss": 0.5546, "step": 13578 }, { "epoch": 0.2879896502725287, "grad_norm": 0.31399592757225037, "learning_rate": 1.9006709910352224e-05, "loss": 0.4771, "step": 13579 }, { "epoch": 0.2880108587304617, "grad_norm": 0.35338306427001953, "learning_rate": 1.900656500104351e-05, "loss": 0.5935, "step": 13580 }, { "epoch": 0.28803206718839475, "grad_norm": 0.34167933464050293, "learning_rate": 1.9006420081717774e-05, "loss": 0.5156, "step": 13581 }, { "epoch": 0.28805327564632777, "grad_norm": 0.3226947784423828, "learning_rate": 1.9006275152375172e-05, "loss": 0.4826, "step": 13582 }, { "epoch": 0.2880744841042608, "grad_norm": 0.35695815086364746, "learning_rate": 1.9006130213015877e-05, "loss": 0.5298, "step": 13583 }, { "epoch": 0.2880956925621938, "grad_norm": 0.35229817032814026, "learning_rate": 1.9005985263640034e-05, "loss": 0.4393, "step": 13584 }, { "epoch": 0.2881169010201268, "grad_norm": 0.3299410343170166, "learning_rate": 1.9005840304247817e-05, "loss": 0.4694, "step": 13585 }, { "epoch": 0.28813810947805985, "grad_norm": 1.7146281003952026, "learning_rate": 1.900569533483938e-05, "loss": 0.4584, "step": 13586 }, { "epoch": 0.28815931793599286, "grad_norm": 0.3691066801548004, "learning_rate": 1.900555035541489e-05, "loss": 0.4635, "step": 13587 }, { "epoch": 0.2881805263939259, "grad_norm": 0.31022709608078003, "learning_rate": 1.90054053659745e-05, "loss": 0.5113, "step": 13588 }, { "epoch": 0.2882017348518589, "grad_norm": 0.3763183355331421, "learning_rate": 1.9005260366518383e-05, "loss": 0.4827, "step": 13589 }, { "epoch": 0.2882229433097919, "grad_norm": 0.3532460629940033, "learning_rate": 1.900511535704669e-05, "loss": 0.4894, "step": 13590 }, { "epoch": 0.28824415176772494, "grad_norm": 0.3417191207408905, "learning_rate": 1.9004970337559587e-05, "loss": 0.5044, "step": 13591 }, { "epoch": 0.288265360225658, "grad_norm": 0.39498135447502136, "learning_rate": 1.9004825308057233e-05, "loss": 0.5441, "step": 13592 }, { "epoch": 0.28828656868359104, "grad_norm": 0.33581075072288513, "learning_rate": 1.900468026853979e-05, "loss": 0.5205, "step": 13593 }, { "epoch": 0.28830777714152406, "grad_norm": 0.4424222707748413, "learning_rate": 1.900453521900742e-05, "loss": 0.5201, "step": 13594 }, { "epoch": 0.2883289855994571, "grad_norm": 0.36762338876724243, "learning_rate": 1.9004390159460288e-05, "loss": 0.515, "step": 13595 }, { "epoch": 0.2883501940573901, "grad_norm": 0.31281578540802, "learning_rate": 1.9004245089898544e-05, "loss": 0.5014, "step": 13596 }, { "epoch": 0.2883714025153231, "grad_norm": 0.5440685749053955, "learning_rate": 1.9004100010322363e-05, "loss": 0.4984, "step": 13597 }, { "epoch": 0.28839261097325614, "grad_norm": 0.5315148830413818, "learning_rate": 1.90039549207319e-05, "loss": 0.5669, "step": 13598 }, { "epoch": 0.28841381943118916, "grad_norm": 0.32869160175323486, "learning_rate": 1.9003809821127316e-05, "loss": 0.5176, "step": 13599 }, { "epoch": 0.2884350278891222, "grad_norm": 0.4746123254299164, "learning_rate": 1.900366471150877e-05, "loss": 0.5256, "step": 13600 }, { "epoch": 0.2884562363470552, "grad_norm": 0.3239118456840515, "learning_rate": 1.900351959187643e-05, "loss": 0.5233, "step": 13601 }, { "epoch": 0.2884774448049882, "grad_norm": 0.3433739244937897, "learning_rate": 1.900337446223045e-05, "loss": 0.4434, "step": 13602 }, { "epoch": 0.28849865326292123, "grad_norm": 0.5188084840774536, "learning_rate": 1.9003229322570997e-05, "loss": 0.5919, "step": 13603 }, { "epoch": 0.28851986172085425, "grad_norm": 0.31799864768981934, "learning_rate": 1.9003084172898233e-05, "loss": 0.5358, "step": 13604 }, { "epoch": 0.2885410701787873, "grad_norm": 0.4032004177570343, "learning_rate": 1.9002939013212315e-05, "loss": 0.4636, "step": 13605 }, { "epoch": 0.28856227863672035, "grad_norm": 0.3158668577671051, "learning_rate": 1.9002793843513405e-05, "loss": 0.4843, "step": 13606 }, { "epoch": 0.28858348709465337, "grad_norm": 0.3606559634208679, "learning_rate": 1.9002648663801667e-05, "loss": 0.4711, "step": 13607 }, { "epoch": 0.2886046955525864, "grad_norm": 0.32300761342048645, "learning_rate": 1.900250347407726e-05, "loss": 0.5078, "step": 13608 }, { "epoch": 0.2886259040105194, "grad_norm": 0.33518338203430176, "learning_rate": 1.900235827434035e-05, "loss": 0.6066, "step": 13609 }, { "epoch": 0.2886471124684524, "grad_norm": 0.31695476174354553, "learning_rate": 1.900221306459109e-05, "loss": 0.45, "step": 13610 }, { "epoch": 0.28866832092638545, "grad_norm": 0.5916690230369568, "learning_rate": 1.9002067844829654e-05, "loss": 0.5975, "step": 13611 }, { "epoch": 0.28868952938431847, "grad_norm": 0.3197977840900421, "learning_rate": 1.9001922615056192e-05, "loss": 0.4915, "step": 13612 }, { "epoch": 0.2887107378422515, "grad_norm": 0.3072603642940521, "learning_rate": 1.9001777375270868e-05, "loss": 0.5459, "step": 13613 }, { "epoch": 0.2887319463001845, "grad_norm": 0.3280337154865265, "learning_rate": 1.900163212547385e-05, "loss": 0.5532, "step": 13614 }, { "epoch": 0.2887531547581175, "grad_norm": 0.34341129660606384, "learning_rate": 1.9001486865665293e-05, "loss": 0.5458, "step": 13615 }, { "epoch": 0.28877436321605054, "grad_norm": 0.32685643434524536, "learning_rate": 1.900134159584536e-05, "loss": 0.492, "step": 13616 }, { "epoch": 0.28879557167398356, "grad_norm": 0.3629016578197479, "learning_rate": 1.9001196316014216e-05, "loss": 0.4496, "step": 13617 }, { "epoch": 0.2888167801319166, "grad_norm": 0.30662479996681213, "learning_rate": 1.9001051026172016e-05, "loss": 0.5213, "step": 13618 }, { "epoch": 0.28883798858984966, "grad_norm": 0.3543194532394409, "learning_rate": 1.9000905726318928e-05, "loss": 0.5833, "step": 13619 }, { "epoch": 0.2888591970477827, "grad_norm": 0.34923094511032104, "learning_rate": 1.9000760416455106e-05, "loss": 0.5308, "step": 13620 }, { "epoch": 0.2888804055057157, "grad_norm": 0.3315140902996063, "learning_rate": 1.9000615096580723e-05, "loss": 0.5526, "step": 13621 }, { "epoch": 0.2889016139636487, "grad_norm": 0.3632022738456726, "learning_rate": 1.9000469766695932e-05, "loss": 0.4478, "step": 13622 }, { "epoch": 0.28892282242158174, "grad_norm": 0.40683692693710327, "learning_rate": 1.9000324426800892e-05, "loss": 0.5484, "step": 13623 }, { "epoch": 0.28894403087951476, "grad_norm": 0.32824382185935974, "learning_rate": 1.9000179076895776e-05, "loss": 0.547, "step": 13624 }, { "epoch": 0.2889652393374478, "grad_norm": 0.31633123755455017, "learning_rate": 1.9000033716980733e-05, "loss": 0.5367, "step": 13625 }, { "epoch": 0.2889864477953808, "grad_norm": 0.31878453493118286, "learning_rate": 1.8999888347055932e-05, "loss": 0.4705, "step": 13626 }, { "epoch": 0.2890076562533138, "grad_norm": 0.3425650894641876, "learning_rate": 1.8999742967121537e-05, "loss": 0.4512, "step": 13627 }, { "epoch": 0.28902886471124684, "grad_norm": 0.42794132232666016, "learning_rate": 1.8999597577177705e-05, "loss": 0.6397, "step": 13628 }, { "epoch": 0.28905007316917986, "grad_norm": 0.370989054441452, "learning_rate": 1.8999452177224594e-05, "loss": 0.4965, "step": 13629 }, { "epoch": 0.2890712816271129, "grad_norm": 0.36624836921691895, "learning_rate": 1.899930676726238e-05, "loss": 0.5465, "step": 13630 }, { "epoch": 0.2890924900850459, "grad_norm": 0.39734503626823425, "learning_rate": 1.8999161347291205e-05, "loss": 0.5412, "step": 13631 }, { "epoch": 0.2891136985429789, "grad_norm": 0.3182258903980255, "learning_rate": 1.8999015917311247e-05, "loss": 0.4243, "step": 13632 }, { "epoch": 0.289134907000912, "grad_norm": 0.3807617127895355, "learning_rate": 1.899887047732266e-05, "loss": 0.5307, "step": 13633 }, { "epoch": 0.289156115458845, "grad_norm": 0.286990225315094, "learning_rate": 1.8998725027325608e-05, "loss": 0.4429, "step": 13634 }, { "epoch": 0.28917732391677803, "grad_norm": 0.33482661843299866, "learning_rate": 1.8998579567320253e-05, "loss": 0.5498, "step": 13635 }, { "epoch": 0.28919853237471105, "grad_norm": 0.3120773434638977, "learning_rate": 1.8998434097306753e-05, "loss": 0.4771, "step": 13636 }, { "epoch": 0.28921974083264407, "grad_norm": 0.4240982234477997, "learning_rate": 1.8998288617285273e-05, "loss": 0.5738, "step": 13637 }, { "epoch": 0.2892409492905771, "grad_norm": 0.4812290668487549, "learning_rate": 1.8998143127255978e-05, "loss": 0.5078, "step": 13638 }, { "epoch": 0.2892621577485101, "grad_norm": 0.3608098328113556, "learning_rate": 1.8997997627219026e-05, "loss": 0.5496, "step": 13639 }, { "epoch": 0.2892833662064431, "grad_norm": 0.3906994163990021, "learning_rate": 1.8997852117174576e-05, "loss": 0.6084, "step": 13640 }, { "epoch": 0.28930457466437615, "grad_norm": 0.3289884924888611, "learning_rate": 1.8997706597122794e-05, "loss": 0.5954, "step": 13641 }, { "epoch": 0.28932578312230917, "grad_norm": 0.34961098432540894, "learning_rate": 1.899756106706384e-05, "loss": 0.4723, "step": 13642 }, { "epoch": 0.2893469915802422, "grad_norm": 0.2983507812023163, "learning_rate": 1.899741552699788e-05, "loss": 0.4016, "step": 13643 }, { "epoch": 0.2893682000381752, "grad_norm": 0.42092543840408325, "learning_rate": 1.899726997692507e-05, "loss": 0.5479, "step": 13644 }, { "epoch": 0.2893894084961082, "grad_norm": 0.32897499203681946, "learning_rate": 1.8997124416845575e-05, "loss": 0.5215, "step": 13645 }, { "epoch": 0.28941061695404124, "grad_norm": 0.3217056691646576, "learning_rate": 1.8996978846759557e-05, "loss": 0.5178, "step": 13646 }, { "epoch": 0.2894318254119743, "grad_norm": 0.32902997732162476, "learning_rate": 1.899683326666718e-05, "loss": 0.4501, "step": 13647 }, { "epoch": 0.28945303386990734, "grad_norm": 0.385422021150589, "learning_rate": 1.89966876765686e-05, "loss": 0.5213, "step": 13648 }, { "epoch": 0.28947424232784036, "grad_norm": 0.3217393457889557, "learning_rate": 1.899654207646398e-05, "loss": 0.5373, "step": 13649 }, { "epoch": 0.2894954507857734, "grad_norm": 0.32026588916778564, "learning_rate": 1.8996396466353487e-05, "loss": 0.5127, "step": 13650 }, { "epoch": 0.2895166592437064, "grad_norm": 0.3643474876880646, "learning_rate": 1.8996250846237278e-05, "loss": 0.466, "step": 13651 }, { "epoch": 0.2895378677016394, "grad_norm": 0.34485384821891785, "learning_rate": 1.899610521611552e-05, "loss": 0.5325, "step": 13652 }, { "epoch": 0.28955907615957244, "grad_norm": 0.36943382024765015, "learning_rate": 1.899595957598837e-05, "loss": 0.512, "step": 13653 }, { "epoch": 0.28958028461750546, "grad_norm": 0.33367621898651123, "learning_rate": 1.899581392585599e-05, "loss": 0.4948, "step": 13654 }, { "epoch": 0.2896014930754385, "grad_norm": 0.3774401545524597, "learning_rate": 1.8995668265718546e-05, "loss": 0.4944, "step": 13655 }, { "epoch": 0.2896227015333715, "grad_norm": 0.5708572268486023, "learning_rate": 1.89955225955762e-05, "loss": 0.5799, "step": 13656 }, { "epoch": 0.2896439099913045, "grad_norm": 0.3368956446647644, "learning_rate": 1.8995376915429108e-05, "loss": 0.4588, "step": 13657 }, { "epoch": 0.28966511844923754, "grad_norm": 0.3369399607181549, "learning_rate": 1.899523122527744e-05, "loss": 0.545, "step": 13658 }, { "epoch": 0.28968632690717055, "grad_norm": 1.9812698364257812, "learning_rate": 1.899508552512135e-05, "loss": 0.56, "step": 13659 }, { "epoch": 0.28970753536510363, "grad_norm": 0.3513330817222595, "learning_rate": 1.8994939814961007e-05, "loss": 0.4756, "step": 13660 }, { "epoch": 0.28972874382303665, "grad_norm": 0.3336217403411865, "learning_rate": 1.899479409479657e-05, "loss": 0.5133, "step": 13661 }, { "epoch": 0.28974995228096967, "grad_norm": 0.3105024993419647, "learning_rate": 1.89946483646282e-05, "loss": 0.4931, "step": 13662 }, { "epoch": 0.2897711607389027, "grad_norm": 0.3210752308368683, "learning_rate": 1.8994502624456058e-05, "loss": 0.5899, "step": 13663 }, { "epoch": 0.2897923691968357, "grad_norm": 0.33022764325141907, "learning_rate": 1.8994356874280312e-05, "loss": 0.4985, "step": 13664 }, { "epoch": 0.28981357765476873, "grad_norm": 0.32920148968696594, "learning_rate": 1.8994211114101117e-05, "loss": 0.4883, "step": 13665 }, { "epoch": 0.28983478611270175, "grad_norm": 0.3164002001285553, "learning_rate": 1.8994065343918642e-05, "loss": 0.4503, "step": 13666 }, { "epoch": 0.28985599457063477, "grad_norm": 0.34844717383384705, "learning_rate": 1.8993919563733044e-05, "loss": 0.4438, "step": 13667 }, { "epoch": 0.2898772030285678, "grad_norm": 0.33368805050849915, "learning_rate": 1.8993773773544484e-05, "loss": 0.502, "step": 13668 }, { "epoch": 0.2898984114865008, "grad_norm": 0.38653483986854553, "learning_rate": 1.8993627973353133e-05, "loss": 0.5053, "step": 13669 }, { "epoch": 0.2899196199444338, "grad_norm": 0.370207279920578, "learning_rate": 1.8993482163159143e-05, "loss": 0.5896, "step": 13670 }, { "epoch": 0.28994082840236685, "grad_norm": 0.33165106177330017, "learning_rate": 1.899333634296268e-05, "loss": 0.474, "step": 13671 }, { "epoch": 0.28996203686029987, "grad_norm": 0.33289024233818054, "learning_rate": 1.899319051276391e-05, "loss": 0.4868, "step": 13672 }, { "epoch": 0.2899832453182329, "grad_norm": 0.35732877254486084, "learning_rate": 1.899304467256299e-05, "loss": 0.4641, "step": 13673 }, { "epoch": 0.29000445377616596, "grad_norm": 0.38962268829345703, "learning_rate": 1.899289882236008e-05, "loss": 0.4977, "step": 13674 }, { "epoch": 0.290025662234099, "grad_norm": 0.35759496688842773, "learning_rate": 1.899275296215535e-05, "loss": 0.4398, "step": 13675 }, { "epoch": 0.290046870692032, "grad_norm": 0.3415929973125458, "learning_rate": 1.8992607091948958e-05, "loss": 0.4924, "step": 13676 }, { "epoch": 0.290068079149965, "grad_norm": 0.3588719964027405, "learning_rate": 1.8992461211741067e-05, "loss": 0.4949, "step": 13677 }, { "epoch": 0.29008928760789804, "grad_norm": 0.36806297302246094, "learning_rate": 1.899231532153184e-05, "loss": 0.5481, "step": 13678 }, { "epoch": 0.29011049606583106, "grad_norm": 0.29993781447410583, "learning_rate": 1.8992169421321435e-05, "loss": 0.4877, "step": 13679 }, { "epoch": 0.2901317045237641, "grad_norm": 0.35066741704940796, "learning_rate": 1.8992023511110018e-05, "loss": 0.414, "step": 13680 }, { "epoch": 0.2901529129816971, "grad_norm": 0.370979905128479, "learning_rate": 1.899187759089775e-05, "loss": 0.5428, "step": 13681 }, { "epoch": 0.2901741214396301, "grad_norm": 0.3322605490684509, "learning_rate": 1.8991731660684792e-05, "loss": 0.5191, "step": 13682 }, { "epoch": 0.29019532989756314, "grad_norm": 0.34151679277420044, "learning_rate": 1.8991585720471312e-05, "loss": 0.5499, "step": 13683 }, { "epoch": 0.29021653835549616, "grad_norm": 0.3351231813430786, "learning_rate": 1.8991439770257467e-05, "loss": 0.5375, "step": 13684 }, { "epoch": 0.2902377468134292, "grad_norm": 0.35294008255004883, "learning_rate": 1.899129381004342e-05, "loss": 0.4826, "step": 13685 }, { "epoch": 0.2902589552713622, "grad_norm": 0.3547428846359253, "learning_rate": 1.899114783982934e-05, "loss": 0.489, "step": 13686 }, { "epoch": 0.2902801637292952, "grad_norm": 0.3648480176925659, "learning_rate": 1.8991001859615375e-05, "loss": 0.5733, "step": 13687 }, { "epoch": 0.2903013721872283, "grad_norm": 0.3308126628398895, "learning_rate": 1.89908558694017e-05, "loss": 0.4399, "step": 13688 }, { "epoch": 0.2903225806451613, "grad_norm": 0.3385576009750366, "learning_rate": 1.8990709869188472e-05, "loss": 0.5264, "step": 13689 }, { "epoch": 0.29034378910309433, "grad_norm": 0.39594414830207825, "learning_rate": 1.8990563858975854e-05, "loss": 0.5368, "step": 13690 }, { "epoch": 0.29036499756102735, "grad_norm": 0.3467051088809967, "learning_rate": 1.8990417838764012e-05, "loss": 0.6084, "step": 13691 }, { "epoch": 0.29038620601896037, "grad_norm": 0.3168160617351532, "learning_rate": 1.8990271808553102e-05, "loss": 0.488, "step": 13692 }, { "epoch": 0.2904074144768934, "grad_norm": 0.3438895642757416, "learning_rate": 1.8990125768343293e-05, "loss": 0.4828, "step": 13693 }, { "epoch": 0.2904286229348264, "grad_norm": 0.37819790840148926, "learning_rate": 1.8989979718134745e-05, "loss": 0.54, "step": 13694 }, { "epoch": 0.2904498313927594, "grad_norm": 0.3182566165924072, "learning_rate": 1.8989833657927613e-05, "loss": 0.5037, "step": 13695 }, { "epoch": 0.29047103985069245, "grad_norm": 0.35708871483802795, "learning_rate": 1.898968758772207e-05, "loss": 0.5161, "step": 13696 }, { "epoch": 0.29049224830862547, "grad_norm": 2.661904811859131, "learning_rate": 1.8989541507518277e-05, "loss": 0.6341, "step": 13697 }, { "epoch": 0.2905134567665585, "grad_norm": 0.3411925435066223, "learning_rate": 1.8989395417316394e-05, "loss": 0.5428, "step": 13698 }, { "epoch": 0.2905346652244915, "grad_norm": 0.3424413204193115, "learning_rate": 1.8989249317116577e-05, "loss": 0.5907, "step": 13699 }, { "epoch": 0.2905558736824245, "grad_norm": 0.35375821590423584, "learning_rate": 1.8989103206919e-05, "loss": 0.4784, "step": 13700 }, { "epoch": 0.2905770821403576, "grad_norm": 0.332014799118042, "learning_rate": 1.8988957086723824e-05, "loss": 0.4715, "step": 13701 }, { "epoch": 0.2905982905982906, "grad_norm": 0.39529353380203247, "learning_rate": 1.89888109565312e-05, "loss": 0.5048, "step": 13702 }, { "epoch": 0.29061949905622364, "grad_norm": 0.37414252758026123, "learning_rate": 1.8988664816341303e-05, "loss": 0.5005, "step": 13703 }, { "epoch": 0.29064070751415666, "grad_norm": 0.2979033291339874, "learning_rate": 1.898851866615429e-05, "loss": 0.4764, "step": 13704 }, { "epoch": 0.2906619159720897, "grad_norm": 0.45874348282814026, "learning_rate": 1.8988372505970326e-05, "loss": 0.6074, "step": 13705 }, { "epoch": 0.2906831244300227, "grad_norm": 0.3529585599899292, "learning_rate": 1.898822633578957e-05, "loss": 0.5455, "step": 13706 }, { "epoch": 0.2907043328879557, "grad_norm": 0.32182273268699646, "learning_rate": 1.898808015561219e-05, "loss": 0.5259, "step": 13707 }, { "epoch": 0.29072554134588874, "grad_norm": 0.3132936656475067, "learning_rate": 1.8987933965438343e-05, "loss": 0.4301, "step": 13708 }, { "epoch": 0.29074674980382176, "grad_norm": 0.3619574308395386, "learning_rate": 1.8987787765268195e-05, "loss": 0.5742, "step": 13709 }, { "epoch": 0.2907679582617548, "grad_norm": 0.3190385401248932, "learning_rate": 1.8987641555101907e-05, "loss": 0.444, "step": 13710 }, { "epoch": 0.2907891667196878, "grad_norm": 0.3388917148113251, "learning_rate": 1.8987495334939643e-05, "loss": 0.4657, "step": 13711 }, { "epoch": 0.2908103751776208, "grad_norm": 0.3313465118408203, "learning_rate": 1.898734910478156e-05, "loss": 0.4805, "step": 13712 }, { "epoch": 0.29083158363555384, "grad_norm": 0.3857221305370331, "learning_rate": 1.8987202864627833e-05, "loss": 0.5226, "step": 13713 }, { "epoch": 0.29085279209348686, "grad_norm": 0.3803218603134155, "learning_rate": 1.898705661447861e-05, "loss": 0.5802, "step": 13714 }, { "epoch": 0.29087400055141993, "grad_norm": 0.44750863313674927, "learning_rate": 1.8986910354334065e-05, "loss": 0.4125, "step": 13715 }, { "epoch": 0.29089520900935295, "grad_norm": 0.3359717130661011, "learning_rate": 1.8986764084194354e-05, "loss": 0.4774, "step": 13716 }, { "epoch": 0.29091641746728597, "grad_norm": 0.45680326223373413, "learning_rate": 1.8986617804059647e-05, "loss": 0.6169, "step": 13717 }, { "epoch": 0.290937625925219, "grad_norm": 0.30340325832366943, "learning_rate": 1.8986471513930098e-05, "loss": 0.4401, "step": 13718 }, { "epoch": 0.290958834383152, "grad_norm": 0.3848130702972412, "learning_rate": 1.898632521380587e-05, "loss": 0.5702, "step": 13719 }, { "epoch": 0.29098004284108503, "grad_norm": 0.3353922367095947, "learning_rate": 1.8986178903687133e-05, "loss": 0.4521, "step": 13720 }, { "epoch": 0.29100125129901805, "grad_norm": 0.3609045147895813, "learning_rate": 1.8986032583574047e-05, "loss": 0.4667, "step": 13721 }, { "epoch": 0.29102245975695107, "grad_norm": 0.3991590440273285, "learning_rate": 1.898588625346677e-05, "loss": 0.4505, "step": 13722 }, { "epoch": 0.2910436682148841, "grad_norm": 0.35898903012275696, "learning_rate": 1.8985739913365473e-05, "loss": 0.4666, "step": 13723 }, { "epoch": 0.2910648766728171, "grad_norm": 0.3609324097633362, "learning_rate": 1.8985593563270314e-05, "loss": 0.5531, "step": 13724 }, { "epoch": 0.2910860851307501, "grad_norm": 0.39281922578811646, "learning_rate": 1.898544720318145e-05, "loss": 0.5463, "step": 13725 }, { "epoch": 0.29110729358868315, "grad_norm": 0.3436453938484192, "learning_rate": 1.8985300833099055e-05, "loss": 0.447, "step": 13726 }, { "epoch": 0.29112850204661617, "grad_norm": 0.38090842962265015, "learning_rate": 1.8985154453023283e-05, "loss": 0.5587, "step": 13727 }, { "epoch": 0.29114971050454924, "grad_norm": 0.3454335033893585, "learning_rate": 1.8985008062954305e-05, "loss": 0.5904, "step": 13728 }, { "epoch": 0.29117091896248226, "grad_norm": 0.3249136209487915, "learning_rate": 1.8984861662892276e-05, "loss": 0.4882, "step": 13729 }, { "epoch": 0.2911921274204153, "grad_norm": 0.3518342971801758, "learning_rate": 1.898471525283736e-05, "loss": 0.5377, "step": 13730 }, { "epoch": 0.2912133358783483, "grad_norm": 0.33456748723983765, "learning_rate": 1.8984568832789726e-05, "loss": 0.5487, "step": 13731 }, { "epoch": 0.2912345443362813, "grad_norm": 0.3447062373161316, "learning_rate": 1.898442240274953e-05, "loss": 0.5216, "step": 13732 }, { "epoch": 0.29125575279421434, "grad_norm": 0.35302141308784485, "learning_rate": 1.898427596271694e-05, "loss": 0.5635, "step": 13733 }, { "epoch": 0.29127696125214736, "grad_norm": 0.32210779190063477, "learning_rate": 1.8984129512692112e-05, "loss": 0.5554, "step": 13734 }, { "epoch": 0.2912981697100804, "grad_norm": 0.34396350383758545, "learning_rate": 1.8983983052675214e-05, "loss": 0.481, "step": 13735 }, { "epoch": 0.2913193781680134, "grad_norm": 0.36247488856315613, "learning_rate": 1.8983836582666407e-05, "loss": 0.5344, "step": 13736 }, { "epoch": 0.2913405866259464, "grad_norm": 0.355924129486084, "learning_rate": 1.898369010266586e-05, "loss": 0.5381, "step": 13737 }, { "epoch": 0.29136179508387944, "grad_norm": 0.3460390865802765, "learning_rate": 1.898354361267373e-05, "loss": 0.5086, "step": 13738 }, { "epoch": 0.29138300354181246, "grad_norm": 0.3438967168331146, "learning_rate": 1.8983397112690175e-05, "loss": 0.4758, "step": 13739 }, { "epoch": 0.2914042119997455, "grad_norm": 0.3834174573421478, "learning_rate": 1.8983250602715372e-05, "loss": 0.5437, "step": 13740 }, { "epoch": 0.2914254204576785, "grad_norm": 0.33878737688064575, "learning_rate": 1.898310408274947e-05, "loss": 0.4575, "step": 13741 }, { "epoch": 0.29144662891561157, "grad_norm": 0.3716478645801544, "learning_rate": 1.898295755279264e-05, "loss": 0.5772, "step": 13742 }, { "epoch": 0.2914678373735446, "grad_norm": 0.3360125422477722, "learning_rate": 1.8982811012845042e-05, "loss": 0.5413, "step": 13743 }, { "epoch": 0.2914890458314776, "grad_norm": 0.34910520911216736, "learning_rate": 1.898266446290684e-05, "loss": 0.5111, "step": 13744 }, { "epoch": 0.29151025428941063, "grad_norm": 0.3138291835784912, "learning_rate": 1.8982517902978196e-05, "loss": 0.616, "step": 13745 }, { "epoch": 0.29153146274734365, "grad_norm": 0.3244510889053345, "learning_rate": 1.8982371333059274e-05, "loss": 0.4837, "step": 13746 }, { "epoch": 0.29155267120527667, "grad_norm": 0.38174179196357727, "learning_rate": 1.8982224753150236e-05, "loss": 0.6563, "step": 13747 }, { "epoch": 0.2915738796632097, "grad_norm": 0.32350432872772217, "learning_rate": 1.8982078163251247e-05, "loss": 0.4778, "step": 13748 }, { "epoch": 0.2915950881211427, "grad_norm": 0.33726903796195984, "learning_rate": 1.8981931563362467e-05, "loss": 0.5344, "step": 13749 }, { "epoch": 0.29161629657907573, "grad_norm": 0.46110060811042786, "learning_rate": 1.898178495348406e-05, "loss": 0.5234, "step": 13750 }, { "epoch": 0.29163750503700875, "grad_norm": 0.3628002107143402, "learning_rate": 1.8981638333616193e-05, "loss": 0.5366, "step": 13751 }, { "epoch": 0.29165871349494177, "grad_norm": 0.33940085768699646, "learning_rate": 1.8981491703759025e-05, "loss": 0.5684, "step": 13752 }, { "epoch": 0.2916799219528748, "grad_norm": 0.3480411469936371, "learning_rate": 1.898134506391272e-05, "loss": 0.5185, "step": 13753 }, { "epoch": 0.2917011304108078, "grad_norm": 0.37143242359161377, "learning_rate": 1.898119841407744e-05, "loss": 0.5085, "step": 13754 }, { "epoch": 0.2917223388687408, "grad_norm": 0.3307648003101349, "learning_rate": 1.898105175425335e-05, "loss": 0.5055, "step": 13755 }, { "epoch": 0.2917435473266739, "grad_norm": 0.36010265350341797, "learning_rate": 1.898090508444061e-05, "loss": 0.4722, "step": 13756 }, { "epoch": 0.2917647557846069, "grad_norm": 0.41709020733833313, "learning_rate": 1.8980758404639387e-05, "loss": 0.5986, "step": 13757 }, { "epoch": 0.29178596424253994, "grad_norm": 0.43023380637168884, "learning_rate": 1.8980611714849843e-05, "loss": 0.6331, "step": 13758 }, { "epoch": 0.29180717270047296, "grad_norm": 0.35830652713775635, "learning_rate": 1.898046501507214e-05, "loss": 0.5617, "step": 13759 }, { "epoch": 0.291828381158406, "grad_norm": 0.33817246556282043, "learning_rate": 1.8980318305306444e-05, "loss": 0.5823, "step": 13760 }, { "epoch": 0.291849589616339, "grad_norm": 0.3251418471336365, "learning_rate": 1.898017158555291e-05, "loss": 0.4679, "step": 13761 }, { "epoch": 0.291870798074272, "grad_norm": 0.3308916389942169, "learning_rate": 1.8980024855811716e-05, "loss": 0.6119, "step": 13762 }, { "epoch": 0.29189200653220504, "grad_norm": 0.34199461340904236, "learning_rate": 1.8979878116083007e-05, "loss": 0.5074, "step": 13763 }, { "epoch": 0.29191321499013806, "grad_norm": 0.362718790769577, "learning_rate": 1.8979731366366963e-05, "loss": 0.5802, "step": 13764 }, { "epoch": 0.2919344234480711, "grad_norm": 0.3132096827030182, "learning_rate": 1.8979584606663736e-05, "loss": 0.4815, "step": 13765 }, { "epoch": 0.2919556319060041, "grad_norm": 0.32939612865448, "learning_rate": 1.8979437836973494e-05, "loss": 0.5261, "step": 13766 }, { "epoch": 0.2919768403639371, "grad_norm": 0.31215211749076843, "learning_rate": 1.8979291057296397e-05, "loss": 0.5047, "step": 13767 }, { "epoch": 0.29199804882187014, "grad_norm": 0.3685440123081207, "learning_rate": 1.8979144267632616e-05, "loss": 0.4807, "step": 13768 }, { "epoch": 0.2920192572798032, "grad_norm": 0.3123037815093994, "learning_rate": 1.8978997467982303e-05, "loss": 0.4442, "step": 13769 }, { "epoch": 0.29204046573773623, "grad_norm": 0.3341536223888397, "learning_rate": 1.897885065834563e-05, "loss": 0.4885, "step": 13770 }, { "epoch": 0.29206167419566925, "grad_norm": 0.3498120903968811, "learning_rate": 1.8978703838722758e-05, "loss": 0.4805, "step": 13771 }, { "epoch": 0.29208288265360227, "grad_norm": 0.3613438308238983, "learning_rate": 1.8978557009113848e-05, "loss": 0.5009, "step": 13772 }, { "epoch": 0.2921040911115353, "grad_norm": 0.32029542326927185, "learning_rate": 1.8978410169519064e-05, "loss": 0.5132, "step": 13773 }, { "epoch": 0.2921252995694683, "grad_norm": 0.3395979106426239, "learning_rate": 1.8978263319938574e-05, "loss": 0.4902, "step": 13774 }, { "epoch": 0.29214650802740133, "grad_norm": 0.35468190908432007, "learning_rate": 1.8978116460372535e-05, "loss": 0.4818, "step": 13775 }, { "epoch": 0.29216771648533435, "grad_norm": 0.36058780550956726, "learning_rate": 1.897796959082111e-05, "loss": 0.4738, "step": 13776 }, { "epoch": 0.29218892494326737, "grad_norm": 0.38655030727386475, "learning_rate": 1.897782271128447e-05, "loss": 0.5627, "step": 13777 }, { "epoch": 0.2922101334012004, "grad_norm": 0.3612041473388672, "learning_rate": 1.897767582176277e-05, "loss": 0.5277, "step": 13778 }, { "epoch": 0.2922313418591334, "grad_norm": 0.36348506808280945, "learning_rate": 1.8977528922256177e-05, "loss": 0.4661, "step": 13779 }, { "epoch": 0.29225255031706643, "grad_norm": 0.34781354665756226, "learning_rate": 1.8977382012764857e-05, "loss": 0.5302, "step": 13780 }, { "epoch": 0.29227375877499945, "grad_norm": 0.31844016909599304, "learning_rate": 1.897723509328897e-05, "loss": 0.3922, "step": 13781 }, { "epoch": 0.29229496723293247, "grad_norm": 0.3000158369541168, "learning_rate": 1.897708816382868e-05, "loss": 0.4892, "step": 13782 }, { "epoch": 0.29231617569086554, "grad_norm": 0.31313276290893555, "learning_rate": 1.897694122438415e-05, "loss": 0.5114, "step": 13783 }, { "epoch": 0.29233738414879856, "grad_norm": 0.45797866582870483, "learning_rate": 1.8976794274955542e-05, "loss": 0.5132, "step": 13784 }, { "epoch": 0.2923585926067316, "grad_norm": 0.3224372863769531, "learning_rate": 1.8976647315543025e-05, "loss": 0.4816, "step": 13785 }, { "epoch": 0.2923798010646646, "grad_norm": 0.4191831648349762, "learning_rate": 1.8976500346146757e-05, "loss": 0.5202, "step": 13786 }, { "epoch": 0.2924010095225976, "grad_norm": 0.4025306701660156, "learning_rate": 1.8976353366766904e-05, "loss": 0.5456, "step": 13787 }, { "epoch": 0.29242221798053064, "grad_norm": 0.38219907879829407, "learning_rate": 1.897620637740363e-05, "loss": 0.5366, "step": 13788 }, { "epoch": 0.29244342643846366, "grad_norm": 0.40357136726379395, "learning_rate": 1.8976059378057097e-05, "loss": 0.4653, "step": 13789 }, { "epoch": 0.2924646348963967, "grad_norm": 0.306627482175827, "learning_rate": 1.8975912368727464e-05, "loss": 0.4683, "step": 13790 }, { "epoch": 0.2924858433543297, "grad_norm": 0.3487766981124878, "learning_rate": 1.8975765349414904e-05, "loss": 0.575, "step": 13791 }, { "epoch": 0.2925070518122627, "grad_norm": 0.40431270003318787, "learning_rate": 1.8975618320119573e-05, "loss": 0.4731, "step": 13792 }, { "epoch": 0.29252826027019574, "grad_norm": 0.35162070393562317, "learning_rate": 1.897547128084164e-05, "loss": 0.4539, "step": 13793 }, { "epoch": 0.29254946872812876, "grad_norm": 0.32362672686576843, "learning_rate": 1.8975324231581264e-05, "loss": 0.5151, "step": 13794 }, { "epoch": 0.2925706771860618, "grad_norm": 0.3505438566207886, "learning_rate": 1.897517717233861e-05, "loss": 0.571, "step": 13795 }, { "epoch": 0.2925918856439948, "grad_norm": 0.596621572971344, "learning_rate": 1.8975030103113843e-05, "loss": 0.5102, "step": 13796 }, { "epoch": 0.2926130941019279, "grad_norm": 0.40768447518348694, "learning_rate": 1.8974883023907126e-05, "loss": 0.4112, "step": 13797 }, { "epoch": 0.2926343025598609, "grad_norm": 0.3196864128112793, "learning_rate": 1.8974735934718623e-05, "loss": 0.5141, "step": 13798 }, { "epoch": 0.2926555110177939, "grad_norm": 0.3230871260166168, "learning_rate": 1.8974588835548492e-05, "loss": 0.4789, "step": 13799 }, { "epoch": 0.29267671947572693, "grad_norm": 0.3500753343105316, "learning_rate": 1.8974441726396907e-05, "loss": 0.5801, "step": 13800 }, { "epoch": 0.29269792793365995, "grad_norm": 0.4208708703517914, "learning_rate": 1.8974294607264025e-05, "loss": 0.5477, "step": 13801 }, { "epoch": 0.29271913639159297, "grad_norm": 0.3255804181098938, "learning_rate": 1.8974147478150008e-05, "loss": 0.5151, "step": 13802 }, { "epoch": 0.292740344849526, "grad_norm": 0.32557496428489685, "learning_rate": 1.8974000339055022e-05, "loss": 0.4724, "step": 13803 }, { "epoch": 0.292761553307459, "grad_norm": 0.359516978263855, "learning_rate": 1.897385318997923e-05, "loss": 0.5178, "step": 13804 }, { "epoch": 0.29278276176539203, "grad_norm": 0.3297279179096222, "learning_rate": 1.89737060309228e-05, "loss": 0.4768, "step": 13805 }, { "epoch": 0.29280397022332505, "grad_norm": 0.3386845290660858, "learning_rate": 1.897355886188589e-05, "loss": 0.4916, "step": 13806 }, { "epoch": 0.29282517868125807, "grad_norm": 0.3871232569217682, "learning_rate": 1.8973411682868662e-05, "loss": 0.4762, "step": 13807 }, { "epoch": 0.2928463871391911, "grad_norm": 0.32733985781669617, "learning_rate": 1.897326449387129e-05, "loss": 0.5459, "step": 13808 }, { "epoch": 0.2928675955971241, "grad_norm": 0.4180768132209778, "learning_rate": 1.8973117294893927e-05, "loss": 0.6416, "step": 13809 }, { "epoch": 0.2928888040550572, "grad_norm": 0.3295648694038391, "learning_rate": 1.8972970085936745e-05, "loss": 0.5395, "step": 13810 }, { "epoch": 0.2929100125129902, "grad_norm": 0.34111326932907104, "learning_rate": 1.89728228669999e-05, "loss": 0.4813, "step": 13811 }, { "epoch": 0.2929312209709232, "grad_norm": 0.3331316411495209, "learning_rate": 1.897267563808356e-05, "loss": 0.5144, "step": 13812 }, { "epoch": 0.29295242942885624, "grad_norm": 0.3304596543312073, "learning_rate": 1.897252839918789e-05, "loss": 0.4771, "step": 13813 }, { "epoch": 0.29297363788678926, "grad_norm": 0.4009401500225067, "learning_rate": 1.8972381150313047e-05, "loss": 0.5531, "step": 13814 }, { "epoch": 0.2929948463447223, "grad_norm": 0.39524272084236145, "learning_rate": 1.8972233891459203e-05, "loss": 0.5219, "step": 13815 }, { "epoch": 0.2930160548026553, "grad_norm": 0.3322722911834717, "learning_rate": 1.897208662262652e-05, "loss": 0.5185, "step": 13816 }, { "epoch": 0.2930372632605883, "grad_norm": 0.3330884575843811, "learning_rate": 1.897193934381516e-05, "loss": 0.4056, "step": 13817 }, { "epoch": 0.29305847171852134, "grad_norm": 0.3287203013896942, "learning_rate": 1.8971792055025285e-05, "loss": 0.5864, "step": 13818 }, { "epoch": 0.29307968017645436, "grad_norm": 0.3197672367095947, "learning_rate": 1.897164475625706e-05, "loss": 0.4735, "step": 13819 }, { "epoch": 0.2931008886343874, "grad_norm": 0.369089812040329, "learning_rate": 1.8971497447510652e-05, "loss": 0.5215, "step": 13820 }, { "epoch": 0.2931220970923204, "grad_norm": 0.2951434552669525, "learning_rate": 1.8971350128786222e-05, "loss": 0.5211, "step": 13821 }, { "epoch": 0.2931433055502534, "grad_norm": 0.3366055190563202, "learning_rate": 1.897120280008393e-05, "loss": 0.5292, "step": 13822 }, { "epoch": 0.29316451400818644, "grad_norm": 0.33473122119903564, "learning_rate": 1.897105546140395e-05, "loss": 0.5096, "step": 13823 }, { "epoch": 0.2931857224661195, "grad_norm": 0.3701033890247345, "learning_rate": 1.897090811274644e-05, "loss": 0.5918, "step": 13824 }, { "epoch": 0.29320693092405253, "grad_norm": 0.4418455958366394, "learning_rate": 1.897076075411156e-05, "loss": 0.5624, "step": 13825 }, { "epoch": 0.29322813938198555, "grad_norm": 0.3213369846343994, "learning_rate": 1.897061338549948e-05, "loss": 0.554, "step": 13826 }, { "epoch": 0.2932493478399186, "grad_norm": 0.33426254987716675, "learning_rate": 1.8970466006910364e-05, "loss": 0.4197, "step": 13827 }, { "epoch": 0.2932705562978516, "grad_norm": 0.3493935167789459, "learning_rate": 1.897031861834437e-05, "loss": 0.5375, "step": 13828 }, { "epoch": 0.2932917647557846, "grad_norm": 0.3152850270271301, "learning_rate": 1.8970171219801666e-05, "loss": 0.4775, "step": 13829 }, { "epoch": 0.29331297321371763, "grad_norm": 0.8590610027313232, "learning_rate": 1.8970023811282417e-05, "loss": 0.5072, "step": 13830 }, { "epoch": 0.29333418167165065, "grad_norm": 0.3331449627876282, "learning_rate": 1.8969876392786783e-05, "loss": 0.4979, "step": 13831 }, { "epoch": 0.29335539012958367, "grad_norm": 0.32282641530036926, "learning_rate": 1.8969728964314934e-05, "loss": 0.5193, "step": 13832 }, { "epoch": 0.2933765985875167, "grad_norm": 0.3650777041912079, "learning_rate": 1.8969581525867026e-05, "loss": 0.5863, "step": 13833 }, { "epoch": 0.2933978070454497, "grad_norm": 0.37957003712654114, "learning_rate": 1.896943407744323e-05, "loss": 0.5356, "step": 13834 }, { "epoch": 0.29341901550338273, "grad_norm": 0.36467671394348145, "learning_rate": 1.896928661904371e-05, "loss": 0.5206, "step": 13835 }, { "epoch": 0.29344022396131575, "grad_norm": 0.38611292839050293, "learning_rate": 1.8969139150668626e-05, "loss": 0.4466, "step": 13836 }, { "epoch": 0.29346143241924877, "grad_norm": 0.5091506242752075, "learning_rate": 1.896899167231814e-05, "loss": 0.4866, "step": 13837 }, { "epoch": 0.29348264087718184, "grad_norm": 0.35390058159828186, "learning_rate": 1.896884418399242e-05, "loss": 0.6097, "step": 13838 }, { "epoch": 0.29350384933511486, "grad_norm": 0.37289485335350037, "learning_rate": 1.896869668569163e-05, "loss": 0.6048, "step": 13839 }, { "epoch": 0.2935250577930479, "grad_norm": 0.5000653266906738, "learning_rate": 1.8968549177415937e-05, "loss": 0.4323, "step": 13840 }, { "epoch": 0.2935462662509809, "grad_norm": 0.3449054956436157, "learning_rate": 1.8968401659165497e-05, "loss": 0.5064, "step": 13841 }, { "epoch": 0.2935674747089139, "grad_norm": 0.3261827528476715, "learning_rate": 1.896825413094048e-05, "loss": 0.4774, "step": 13842 }, { "epoch": 0.29358868316684694, "grad_norm": 0.2905406653881073, "learning_rate": 1.8968106592741048e-05, "loss": 0.4859, "step": 13843 }, { "epoch": 0.29360989162477996, "grad_norm": 0.35255199670791626, "learning_rate": 1.8967959044567367e-05, "loss": 0.5453, "step": 13844 }, { "epoch": 0.293631100082713, "grad_norm": 0.2969943583011627, "learning_rate": 1.8967811486419602e-05, "loss": 0.473, "step": 13845 }, { "epoch": 0.293652308540646, "grad_norm": 0.3543855845928192, "learning_rate": 1.896766391829791e-05, "loss": 0.468, "step": 13846 }, { "epoch": 0.293673516998579, "grad_norm": 0.34849342703819275, "learning_rate": 1.8967516340202463e-05, "loss": 0.5145, "step": 13847 }, { "epoch": 0.29369472545651204, "grad_norm": 0.2953813970088959, "learning_rate": 1.8967368752133422e-05, "loss": 0.4979, "step": 13848 }, { "epoch": 0.29371593391444506, "grad_norm": 0.3337647020816803, "learning_rate": 1.896722115409095e-05, "loss": 0.4747, "step": 13849 }, { "epoch": 0.2937371423723781, "grad_norm": 0.34668755531311035, "learning_rate": 1.8967073546075215e-05, "loss": 0.49, "step": 13850 }, { "epoch": 0.29375835083031115, "grad_norm": 0.3389423191547394, "learning_rate": 1.896692592808638e-05, "loss": 0.4832, "step": 13851 }, { "epoch": 0.2937795592882442, "grad_norm": 0.39947426319122314, "learning_rate": 1.8966778300124603e-05, "loss": 0.5056, "step": 13852 }, { "epoch": 0.2938007677461772, "grad_norm": 0.33094242215156555, "learning_rate": 1.8966630662190054e-05, "loss": 0.4899, "step": 13853 }, { "epoch": 0.2938219762041102, "grad_norm": 0.36592456698417664, "learning_rate": 1.89664830142829e-05, "loss": 0.5346, "step": 13854 }, { "epoch": 0.29384318466204323, "grad_norm": 0.3338863253593445, "learning_rate": 1.8966335356403296e-05, "loss": 0.4592, "step": 13855 }, { "epoch": 0.29386439311997625, "grad_norm": 0.372548907995224, "learning_rate": 1.8966187688551415e-05, "loss": 0.5151, "step": 13856 }, { "epoch": 0.2938856015779093, "grad_norm": 0.3583211898803711, "learning_rate": 1.896604001072742e-05, "loss": 0.4631, "step": 13857 }, { "epoch": 0.2939068100358423, "grad_norm": 0.40943074226379395, "learning_rate": 1.896589232293147e-05, "loss": 0.539, "step": 13858 }, { "epoch": 0.2939280184937753, "grad_norm": 0.3765721023082733, "learning_rate": 1.896574462516373e-05, "loss": 0.4213, "step": 13859 }, { "epoch": 0.29394922695170833, "grad_norm": 0.45954903960227966, "learning_rate": 1.8965596917424373e-05, "loss": 0.5656, "step": 13860 }, { "epoch": 0.29397043540964135, "grad_norm": 0.32182231545448303, "learning_rate": 1.8965449199713554e-05, "loss": 0.4941, "step": 13861 }, { "epoch": 0.29399164386757437, "grad_norm": 0.3717540204524994, "learning_rate": 1.896530147203144e-05, "loss": 0.5195, "step": 13862 }, { "epoch": 0.2940128523255074, "grad_norm": 0.41599562764167786, "learning_rate": 1.8965153734378193e-05, "loss": 0.5492, "step": 13863 }, { "epoch": 0.2940340607834404, "grad_norm": 0.29493623971939087, "learning_rate": 1.8965005986753986e-05, "loss": 0.4405, "step": 13864 }, { "epoch": 0.2940552692413735, "grad_norm": 0.2914307415485382, "learning_rate": 1.8964858229158974e-05, "loss": 0.4992, "step": 13865 }, { "epoch": 0.2940764776993065, "grad_norm": 0.39715731143951416, "learning_rate": 1.8964710461593324e-05, "loss": 0.5335, "step": 13866 }, { "epoch": 0.2940976861572395, "grad_norm": 0.3705584704875946, "learning_rate": 1.8964562684057202e-05, "loss": 0.5293, "step": 13867 }, { "epoch": 0.29411889461517254, "grad_norm": 0.30107665061950684, "learning_rate": 1.896441489655077e-05, "loss": 0.5014, "step": 13868 }, { "epoch": 0.29414010307310556, "grad_norm": 0.3623674511909485, "learning_rate": 1.8964267099074195e-05, "loss": 0.493, "step": 13869 }, { "epoch": 0.2941613115310386, "grad_norm": 0.33333054184913635, "learning_rate": 1.896411929162764e-05, "loss": 0.4386, "step": 13870 }, { "epoch": 0.2941825199889716, "grad_norm": 0.3816836178302765, "learning_rate": 1.8963971474211267e-05, "loss": 0.5038, "step": 13871 }, { "epoch": 0.2942037284469046, "grad_norm": 0.3923231363296509, "learning_rate": 1.8963823646825247e-05, "loss": 0.5724, "step": 13872 }, { "epoch": 0.29422493690483764, "grad_norm": 0.33537063002586365, "learning_rate": 1.8963675809469734e-05, "loss": 0.4298, "step": 13873 }, { "epoch": 0.29424614536277066, "grad_norm": 0.38474205136299133, "learning_rate": 1.8963527962144904e-05, "loss": 0.523, "step": 13874 }, { "epoch": 0.2942673538207037, "grad_norm": 0.3240557014942169, "learning_rate": 1.8963380104850916e-05, "loss": 0.4743, "step": 13875 }, { "epoch": 0.2942885622786367, "grad_norm": 0.3061990439891815, "learning_rate": 1.8963232237587933e-05, "loss": 0.4289, "step": 13876 }, { "epoch": 0.2943097707365697, "grad_norm": 0.3194443881511688, "learning_rate": 1.896308436035612e-05, "loss": 0.5091, "step": 13877 }, { "epoch": 0.2943309791945028, "grad_norm": 0.3498864769935608, "learning_rate": 1.8962936473155643e-05, "loss": 0.5539, "step": 13878 }, { "epoch": 0.2943521876524358, "grad_norm": 0.36599379777908325, "learning_rate": 1.8962788575986664e-05, "loss": 0.5181, "step": 13879 }, { "epoch": 0.29437339611036883, "grad_norm": 0.3292437791824341, "learning_rate": 1.8962640668849355e-05, "loss": 0.5254, "step": 13880 }, { "epoch": 0.29439460456830185, "grad_norm": 0.32800406217575073, "learning_rate": 1.896249275174387e-05, "loss": 0.5182, "step": 13881 }, { "epoch": 0.2944158130262349, "grad_norm": 0.3379881680011749, "learning_rate": 1.8962344824670383e-05, "loss": 0.4637, "step": 13882 }, { "epoch": 0.2944370214841679, "grad_norm": 0.30124953389167786, "learning_rate": 1.896219688762905e-05, "loss": 0.4372, "step": 13883 }, { "epoch": 0.2944582299421009, "grad_norm": 0.3149794042110443, "learning_rate": 1.896204894062004e-05, "loss": 0.5126, "step": 13884 }, { "epoch": 0.29447943840003393, "grad_norm": 0.312274694442749, "learning_rate": 1.8961900983643518e-05, "loss": 0.6304, "step": 13885 }, { "epoch": 0.29450064685796695, "grad_norm": 0.31288185715675354, "learning_rate": 1.8961753016699648e-05, "loss": 0.4899, "step": 13886 }, { "epoch": 0.29452185531589997, "grad_norm": 0.3484368622303009, "learning_rate": 1.8961605039788592e-05, "loss": 0.5762, "step": 13887 }, { "epoch": 0.294543063773833, "grad_norm": 0.3886677622795105, "learning_rate": 1.8961457052910516e-05, "loss": 0.5169, "step": 13888 }, { "epoch": 0.294564272231766, "grad_norm": 0.36652636528015137, "learning_rate": 1.8961309056065587e-05, "loss": 0.4755, "step": 13889 }, { "epoch": 0.29458548068969903, "grad_norm": 1.7257078886032104, "learning_rate": 1.896116104925397e-05, "loss": 0.525, "step": 13890 }, { "epoch": 0.29460668914763205, "grad_norm": 0.3300057649612427, "learning_rate": 1.8961013032475825e-05, "loss": 0.5881, "step": 13891 }, { "epoch": 0.2946278976055651, "grad_norm": 0.32122838497161865, "learning_rate": 1.896086500573132e-05, "loss": 0.5349, "step": 13892 }, { "epoch": 0.29464910606349815, "grad_norm": 0.33853575587272644, "learning_rate": 1.896071696902062e-05, "loss": 0.5647, "step": 13893 }, { "epoch": 0.29467031452143116, "grad_norm": 0.3389747142791748, "learning_rate": 1.8960568922343885e-05, "loss": 0.5471, "step": 13894 }, { "epoch": 0.2946915229793642, "grad_norm": 0.3500731289386749, "learning_rate": 1.8960420865701286e-05, "loss": 0.5587, "step": 13895 }, { "epoch": 0.2947127314372972, "grad_norm": 0.33881664276123047, "learning_rate": 1.8960272799092982e-05, "loss": 0.4978, "step": 13896 }, { "epoch": 0.2947339398952302, "grad_norm": 0.34386560320854187, "learning_rate": 1.8960124722519145e-05, "loss": 0.6282, "step": 13897 }, { "epoch": 0.29475514835316324, "grad_norm": 0.33347979187965393, "learning_rate": 1.8959976635979932e-05, "loss": 0.5013, "step": 13898 }, { "epoch": 0.29477635681109626, "grad_norm": 0.37659624218940735, "learning_rate": 1.8959828539475512e-05, "loss": 0.4607, "step": 13899 }, { "epoch": 0.2947975652690293, "grad_norm": 0.34393468499183655, "learning_rate": 1.8959680433006047e-05, "loss": 0.525, "step": 13900 }, { "epoch": 0.2948187737269623, "grad_norm": 0.3628990948200226, "learning_rate": 1.8959532316571704e-05, "loss": 0.4991, "step": 13901 }, { "epoch": 0.2948399821848953, "grad_norm": 0.32748502492904663, "learning_rate": 1.895938419017265e-05, "loss": 0.5077, "step": 13902 }, { "epoch": 0.29486119064282834, "grad_norm": 0.34544941782951355, "learning_rate": 1.895923605380904e-05, "loss": 0.5416, "step": 13903 }, { "epoch": 0.29488239910076136, "grad_norm": 0.3337147533893585, "learning_rate": 1.8959087907481048e-05, "loss": 0.5155, "step": 13904 }, { "epoch": 0.2949036075586944, "grad_norm": 0.6704559922218323, "learning_rate": 1.8958939751188837e-05, "loss": 0.4598, "step": 13905 }, { "epoch": 0.29492481601662746, "grad_norm": 0.34421423077583313, "learning_rate": 1.8958791584932572e-05, "loss": 0.464, "step": 13906 }, { "epoch": 0.2949460244745605, "grad_norm": 0.30048683285713196, "learning_rate": 1.8958643408712417e-05, "loss": 0.5808, "step": 13907 }, { "epoch": 0.2949672329324935, "grad_norm": 0.2993600070476532, "learning_rate": 1.8958495222528536e-05, "loss": 0.457, "step": 13908 }, { "epoch": 0.2949884413904265, "grad_norm": 0.3140372037887573, "learning_rate": 1.8958347026381095e-05, "loss": 0.4245, "step": 13909 }, { "epoch": 0.29500964984835953, "grad_norm": 0.3279534578323364, "learning_rate": 1.8958198820270256e-05, "loss": 0.5426, "step": 13910 }, { "epoch": 0.29503085830629255, "grad_norm": 0.3424042761325836, "learning_rate": 1.895805060419619e-05, "loss": 0.5241, "step": 13911 }, { "epoch": 0.2950520667642256, "grad_norm": 0.38113173842430115, "learning_rate": 1.8957902378159058e-05, "loss": 0.4999, "step": 13912 }, { "epoch": 0.2950732752221586, "grad_norm": 0.3860447108745575, "learning_rate": 1.895775414215902e-05, "loss": 0.544, "step": 13913 }, { "epoch": 0.2950944836800916, "grad_norm": 0.35611382126808167, "learning_rate": 1.895760589619625e-05, "loss": 0.5024, "step": 13914 }, { "epoch": 0.29511569213802463, "grad_norm": 0.3298257291316986, "learning_rate": 1.8957457640270906e-05, "loss": 0.4816, "step": 13915 }, { "epoch": 0.29513690059595765, "grad_norm": 0.3276177644729614, "learning_rate": 1.8957309374383155e-05, "loss": 0.4648, "step": 13916 }, { "epoch": 0.29515810905389067, "grad_norm": 0.31360408663749695, "learning_rate": 1.8957161098533163e-05, "loss": 0.5558, "step": 13917 }, { "epoch": 0.2951793175118237, "grad_norm": 0.3136787414550781, "learning_rate": 1.8957012812721097e-05, "loss": 0.3823, "step": 13918 }, { "epoch": 0.29520052596975677, "grad_norm": 0.3177451193332672, "learning_rate": 1.8956864516947114e-05, "loss": 0.4759, "step": 13919 }, { "epoch": 0.2952217344276898, "grad_norm": 0.35491546988487244, "learning_rate": 1.8956716211211387e-05, "loss": 0.5934, "step": 13920 }, { "epoch": 0.2952429428856228, "grad_norm": 0.35239848494529724, "learning_rate": 1.8956567895514078e-05, "loss": 0.5899, "step": 13921 }, { "epoch": 0.2952641513435558, "grad_norm": 0.2748393416404724, "learning_rate": 1.8956419569855353e-05, "loss": 0.3799, "step": 13922 }, { "epoch": 0.29528535980148884, "grad_norm": 0.32772180438041687, "learning_rate": 1.8956271234235375e-05, "loss": 0.4831, "step": 13923 }, { "epoch": 0.29530656825942186, "grad_norm": 0.33729371428489685, "learning_rate": 1.895612288865431e-05, "loss": 0.5221, "step": 13924 }, { "epoch": 0.2953277767173549, "grad_norm": 0.3318414092063904, "learning_rate": 1.8955974533112323e-05, "loss": 0.5255, "step": 13925 }, { "epoch": 0.2953489851752879, "grad_norm": 0.32229968905448914, "learning_rate": 1.895582616760958e-05, "loss": 0.5276, "step": 13926 }, { "epoch": 0.2953701936332209, "grad_norm": 0.36378568410873413, "learning_rate": 1.895567779214624e-05, "loss": 0.5658, "step": 13927 }, { "epoch": 0.29539140209115394, "grad_norm": 0.32763996720314026, "learning_rate": 1.8955529406722478e-05, "loss": 0.538, "step": 13928 }, { "epoch": 0.29541261054908696, "grad_norm": 0.4399019181728363, "learning_rate": 1.8955381011338454e-05, "loss": 0.456, "step": 13929 }, { "epoch": 0.29543381900702, "grad_norm": 0.4161965250968933, "learning_rate": 1.8955232605994332e-05, "loss": 0.5372, "step": 13930 }, { "epoch": 0.295455027464953, "grad_norm": 0.37800589203834534, "learning_rate": 1.8955084190690275e-05, "loss": 0.5113, "step": 13931 }, { "epoch": 0.295476235922886, "grad_norm": 0.33933964371681213, "learning_rate": 1.8954935765426455e-05, "loss": 0.4761, "step": 13932 }, { "epoch": 0.2954974443808191, "grad_norm": 0.4067699909210205, "learning_rate": 1.8954787330203036e-05, "loss": 0.5465, "step": 13933 }, { "epoch": 0.2955186528387521, "grad_norm": 0.38541871309280396, "learning_rate": 1.8954638885020173e-05, "loss": 0.4765, "step": 13934 }, { "epoch": 0.29553986129668514, "grad_norm": 0.37447455525398254, "learning_rate": 1.8954490429878042e-05, "loss": 0.5911, "step": 13935 }, { "epoch": 0.29556106975461816, "grad_norm": 0.3112410604953766, "learning_rate": 1.8954341964776803e-05, "loss": 0.4731, "step": 13936 }, { "epoch": 0.2955822782125512, "grad_norm": 0.39096930623054504, "learning_rate": 1.8954193489716625e-05, "loss": 0.5465, "step": 13937 }, { "epoch": 0.2956034866704842, "grad_norm": 0.3409067690372467, "learning_rate": 1.8954045004697673e-05, "loss": 0.5246, "step": 13938 }, { "epoch": 0.2956246951284172, "grad_norm": 0.3468918800354004, "learning_rate": 1.8953896509720105e-05, "loss": 0.5365, "step": 13939 }, { "epoch": 0.29564590358635023, "grad_norm": 0.4395216405391693, "learning_rate": 1.8953748004784095e-05, "loss": 0.5681, "step": 13940 }, { "epoch": 0.29566711204428325, "grad_norm": 0.3086511790752411, "learning_rate": 1.89535994898898e-05, "loss": 0.5192, "step": 13941 }, { "epoch": 0.2956883205022163, "grad_norm": 0.35825952887535095, "learning_rate": 1.8953450965037394e-05, "loss": 0.428, "step": 13942 }, { "epoch": 0.2957095289601493, "grad_norm": 0.3285719156265259, "learning_rate": 1.8953302430227034e-05, "loss": 0.4711, "step": 13943 }, { "epoch": 0.2957307374180823, "grad_norm": 0.4001615345478058, "learning_rate": 1.895315388545889e-05, "loss": 0.5362, "step": 13944 }, { "epoch": 0.29575194587601533, "grad_norm": 0.34519875049591064, "learning_rate": 1.8953005330733125e-05, "loss": 0.5808, "step": 13945 }, { "epoch": 0.29577315433394835, "grad_norm": 0.38191601634025574, "learning_rate": 1.895285676604991e-05, "loss": 0.5849, "step": 13946 }, { "epoch": 0.2957943627918814, "grad_norm": 0.3374471664428711, "learning_rate": 1.8952708191409396e-05, "loss": 0.4834, "step": 13947 }, { "epoch": 0.29581557124981445, "grad_norm": 0.32589101791381836, "learning_rate": 1.8952559606811764e-05, "loss": 0.4216, "step": 13948 }, { "epoch": 0.29583677970774747, "grad_norm": 0.47606122493743896, "learning_rate": 1.895241101225717e-05, "loss": 0.5625, "step": 13949 }, { "epoch": 0.2958579881656805, "grad_norm": 0.3249635398387909, "learning_rate": 1.8952262407745788e-05, "loss": 0.5797, "step": 13950 }, { "epoch": 0.2958791966236135, "grad_norm": 0.3503839671611786, "learning_rate": 1.8952113793277777e-05, "loss": 0.5789, "step": 13951 }, { "epoch": 0.2959004050815465, "grad_norm": 0.34128016233444214, "learning_rate": 1.8951965168853296e-05, "loss": 0.5583, "step": 13952 }, { "epoch": 0.29592161353947954, "grad_norm": 0.48215657472610474, "learning_rate": 1.8951816534472522e-05, "loss": 0.585, "step": 13953 }, { "epoch": 0.29594282199741256, "grad_norm": 0.37041157484054565, "learning_rate": 1.8951667890135615e-05, "loss": 0.4452, "step": 13954 }, { "epoch": 0.2959640304553456, "grad_norm": 0.36890867352485657, "learning_rate": 1.895151923584274e-05, "loss": 0.546, "step": 13955 }, { "epoch": 0.2959852389132786, "grad_norm": 0.4580887258052826, "learning_rate": 1.8951370571594063e-05, "loss": 0.4832, "step": 13956 }, { "epoch": 0.2960064473712116, "grad_norm": 0.3707796633243561, "learning_rate": 1.895122189738975e-05, "loss": 0.5528, "step": 13957 }, { "epoch": 0.29602765582914464, "grad_norm": 0.3502032458782196, "learning_rate": 1.895107321322997e-05, "loss": 0.5594, "step": 13958 }, { "epoch": 0.29604886428707766, "grad_norm": 0.3060489594936371, "learning_rate": 1.8950924519114876e-05, "loss": 0.521, "step": 13959 }, { "epoch": 0.29607007274501074, "grad_norm": 0.3506089150905609, "learning_rate": 1.8950775815044643e-05, "loss": 0.4958, "step": 13960 }, { "epoch": 0.29609128120294376, "grad_norm": 0.31942886114120483, "learning_rate": 1.8950627101019438e-05, "loss": 0.5963, "step": 13961 }, { "epoch": 0.2961124896608768, "grad_norm": 0.3267726004123688, "learning_rate": 1.8950478377039424e-05, "loss": 0.5264, "step": 13962 }, { "epoch": 0.2961336981188098, "grad_norm": 0.34121793508529663, "learning_rate": 1.895032964310476e-05, "loss": 0.4713, "step": 13963 }, { "epoch": 0.2961549065767428, "grad_norm": 0.34250086545944214, "learning_rate": 1.8950180899215624e-05, "loss": 0.4266, "step": 13964 }, { "epoch": 0.29617611503467584, "grad_norm": 0.37404900789260864, "learning_rate": 1.895003214537217e-05, "loss": 0.5761, "step": 13965 }, { "epoch": 0.29619732349260885, "grad_norm": 0.35929402709007263, "learning_rate": 1.894988338157457e-05, "loss": 0.4533, "step": 13966 }, { "epoch": 0.2962185319505419, "grad_norm": 0.33146655559539795, "learning_rate": 1.8949734607822988e-05, "loss": 0.5204, "step": 13967 }, { "epoch": 0.2962397404084749, "grad_norm": 0.3281484842300415, "learning_rate": 1.894958582411759e-05, "loss": 0.5618, "step": 13968 }, { "epoch": 0.2962609488664079, "grad_norm": 0.40082553029060364, "learning_rate": 1.894943703045854e-05, "loss": 0.4962, "step": 13969 }, { "epoch": 0.29628215732434093, "grad_norm": 0.32679513096809387, "learning_rate": 1.8949288226846e-05, "loss": 0.4329, "step": 13970 }, { "epoch": 0.29630336578227395, "grad_norm": 0.3251156508922577, "learning_rate": 1.8949139413280143e-05, "loss": 0.4802, "step": 13971 }, { "epoch": 0.296324574240207, "grad_norm": 0.33318403363227844, "learning_rate": 1.8948990589761127e-05, "loss": 0.5107, "step": 13972 }, { "epoch": 0.29634578269814, "grad_norm": 0.3567203879356384, "learning_rate": 1.8948841756289125e-05, "loss": 0.5772, "step": 13973 }, { "epoch": 0.29636699115607307, "grad_norm": 0.4986581802368164, "learning_rate": 1.8948692912864296e-05, "loss": 0.6298, "step": 13974 }, { "epoch": 0.2963881996140061, "grad_norm": 0.3610023856163025, "learning_rate": 1.8948544059486812e-05, "loss": 0.5587, "step": 13975 }, { "epoch": 0.2964094080719391, "grad_norm": 0.3882608711719513, "learning_rate": 1.894839519615683e-05, "loss": 0.4972, "step": 13976 }, { "epoch": 0.2964306165298721, "grad_norm": 0.43761005997657776, "learning_rate": 1.8948246322874524e-05, "loss": 0.4811, "step": 13977 }, { "epoch": 0.29645182498780515, "grad_norm": 0.31852683424949646, "learning_rate": 1.8948097439640062e-05, "loss": 0.4599, "step": 13978 }, { "epoch": 0.29647303344573817, "grad_norm": 0.342043936252594, "learning_rate": 1.8947948546453595e-05, "loss": 0.5113, "step": 13979 }, { "epoch": 0.2964942419036712, "grad_norm": 0.3012596368789673, "learning_rate": 1.8947799643315302e-05, "loss": 0.4701, "step": 13980 }, { "epoch": 0.2965154503616042, "grad_norm": 0.3727598488330841, "learning_rate": 1.8947650730225343e-05, "loss": 0.4558, "step": 13981 }, { "epoch": 0.2965366588195372, "grad_norm": 0.3737618029117584, "learning_rate": 1.894750180718388e-05, "loss": 0.5427, "step": 13982 }, { "epoch": 0.29655786727747024, "grad_norm": 0.34584516286849976, "learning_rate": 1.894735287419109e-05, "loss": 0.5935, "step": 13983 }, { "epoch": 0.29657907573540326, "grad_norm": 0.37934449315071106, "learning_rate": 1.894720393124713e-05, "loss": 0.5463, "step": 13984 }, { "epoch": 0.2966002841933363, "grad_norm": 0.7439669370651245, "learning_rate": 1.8947054978352163e-05, "loss": 0.5406, "step": 13985 }, { "epoch": 0.2966214926512693, "grad_norm": 0.36421191692352295, "learning_rate": 1.8946906015506364e-05, "loss": 0.5487, "step": 13986 }, { "epoch": 0.2966427011092023, "grad_norm": 0.36360663175582886, "learning_rate": 1.8946757042709893e-05, "loss": 0.5617, "step": 13987 }, { "epoch": 0.2966639095671354, "grad_norm": 0.43856894969940186, "learning_rate": 1.8946608059962917e-05, "loss": 0.473, "step": 13988 }, { "epoch": 0.2966851180250684, "grad_norm": 0.35581570863723755, "learning_rate": 1.89464590672656e-05, "loss": 0.58, "step": 13989 }, { "epoch": 0.29670632648300144, "grad_norm": 0.3601246476173401, "learning_rate": 1.894631006461811e-05, "loss": 0.5549, "step": 13990 }, { "epoch": 0.29672753494093446, "grad_norm": 0.31638243794441223, "learning_rate": 1.894616105202061e-05, "loss": 0.4555, "step": 13991 }, { "epoch": 0.2967487433988675, "grad_norm": 0.32088378071784973, "learning_rate": 1.8946012029473268e-05, "loss": 0.4969, "step": 13992 }, { "epoch": 0.2967699518568005, "grad_norm": 0.31498271226882935, "learning_rate": 1.894586299697625e-05, "loss": 0.5148, "step": 13993 }, { "epoch": 0.2967911603147335, "grad_norm": 0.4065302908420563, "learning_rate": 1.8945713954529722e-05, "loss": 0.4155, "step": 13994 }, { "epoch": 0.29681236877266653, "grad_norm": 0.3157701790332794, "learning_rate": 1.8945564902133845e-05, "loss": 0.5032, "step": 13995 }, { "epoch": 0.29683357723059955, "grad_norm": 0.33351150155067444, "learning_rate": 1.894541583978879e-05, "loss": 0.5363, "step": 13996 }, { "epoch": 0.2968547856885326, "grad_norm": 0.4204863905906677, "learning_rate": 1.894526676749472e-05, "loss": 0.4758, "step": 13997 }, { "epoch": 0.2968759941464656, "grad_norm": 0.3163730204105377, "learning_rate": 1.8945117685251805e-05, "loss": 0.5234, "step": 13998 }, { "epoch": 0.2968972026043986, "grad_norm": 0.3965478241443634, "learning_rate": 1.8944968593060206e-05, "loss": 0.5539, "step": 13999 }, { "epoch": 0.29691841106233163, "grad_norm": 0.4005654752254486, "learning_rate": 1.8944819490920093e-05, "loss": 0.5241, "step": 14000 }, { "epoch": 0.2969396195202647, "grad_norm": 0.35916557908058167, "learning_rate": 1.8944670378831626e-05, "loss": 0.5006, "step": 14001 }, { "epoch": 0.29696082797819773, "grad_norm": 0.4504496455192566, "learning_rate": 1.8944521256794977e-05, "loss": 0.49, "step": 14002 }, { "epoch": 0.29698203643613075, "grad_norm": 0.3412771224975586, "learning_rate": 1.8944372124810306e-05, "loss": 0.5045, "step": 14003 }, { "epoch": 0.29700324489406377, "grad_norm": 0.3028221130371094, "learning_rate": 1.8944222982877788e-05, "loss": 0.509, "step": 14004 }, { "epoch": 0.2970244533519968, "grad_norm": 0.3397690951824188, "learning_rate": 1.8944073830997577e-05, "loss": 0.4087, "step": 14005 }, { "epoch": 0.2970456618099298, "grad_norm": 0.3330637216567993, "learning_rate": 1.8943924669169847e-05, "loss": 0.5505, "step": 14006 }, { "epoch": 0.2970668702678628, "grad_norm": 0.3748929798603058, "learning_rate": 1.894377549739476e-05, "loss": 0.5493, "step": 14007 }, { "epoch": 0.29708807872579585, "grad_norm": 0.3448963761329651, "learning_rate": 1.8943626315672487e-05, "loss": 0.5253, "step": 14008 }, { "epoch": 0.29710928718372887, "grad_norm": 0.33248159289360046, "learning_rate": 1.8943477124003187e-05, "loss": 0.4511, "step": 14009 }, { "epoch": 0.2971304956416619, "grad_norm": 0.34169909358024597, "learning_rate": 1.894332792238703e-05, "loss": 0.5002, "step": 14010 }, { "epoch": 0.2971517040995949, "grad_norm": 0.3629803955554962, "learning_rate": 1.8943178710824183e-05, "loss": 0.5454, "step": 14011 }, { "epoch": 0.2971729125575279, "grad_norm": 0.3448020815849304, "learning_rate": 1.894302948931481e-05, "loss": 0.5244, "step": 14012 }, { "epoch": 0.29719412101546094, "grad_norm": 0.34944871068000793, "learning_rate": 1.894288025785908e-05, "loss": 0.4597, "step": 14013 }, { "epoch": 0.29721532947339396, "grad_norm": 0.3393433690071106, "learning_rate": 1.894273101645715e-05, "loss": 0.5191, "step": 14014 }, { "epoch": 0.29723653793132704, "grad_norm": 0.3372744917869568, "learning_rate": 1.8942581765109196e-05, "loss": 0.5168, "step": 14015 }, { "epoch": 0.29725774638926006, "grad_norm": 0.49503588676452637, "learning_rate": 1.894243250381538e-05, "loss": 0.4696, "step": 14016 }, { "epoch": 0.2972789548471931, "grad_norm": 0.32912445068359375, "learning_rate": 1.8942283232575867e-05, "loss": 0.4935, "step": 14017 }, { "epoch": 0.2973001633051261, "grad_norm": 0.34731560945510864, "learning_rate": 1.8942133951390828e-05, "loss": 0.4831, "step": 14018 }, { "epoch": 0.2973213717630591, "grad_norm": 0.3581436574459076, "learning_rate": 1.8941984660260423e-05, "loss": 0.5984, "step": 14019 }, { "epoch": 0.29734258022099214, "grad_norm": 0.33305883407592773, "learning_rate": 1.8941835359184817e-05, "loss": 0.5421, "step": 14020 }, { "epoch": 0.29736378867892516, "grad_norm": 0.369266152381897, "learning_rate": 1.8941686048164184e-05, "loss": 0.5597, "step": 14021 }, { "epoch": 0.2973849971368582, "grad_norm": 0.32795482873916626, "learning_rate": 1.8941536727198683e-05, "loss": 0.467, "step": 14022 }, { "epoch": 0.2974062055947912, "grad_norm": 0.4149807393550873, "learning_rate": 1.8941387396288483e-05, "loss": 0.5508, "step": 14023 }, { "epoch": 0.2974274140527242, "grad_norm": 0.3494647443294525, "learning_rate": 1.8941238055433747e-05, "loss": 0.502, "step": 14024 }, { "epoch": 0.29744862251065723, "grad_norm": 0.2894987463951111, "learning_rate": 1.8941088704634648e-05, "loss": 0.4636, "step": 14025 }, { "epoch": 0.29746983096859025, "grad_norm": 0.35847896337509155, "learning_rate": 1.894093934389135e-05, "loss": 0.4699, "step": 14026 }, { "epoch": 0.2974910394265233, "grad_norm": 0.313342422246933, "learning_rate": 1.894078997320401e-05, "loss": 0.4967, "step": 14027 }, { "epoch": 0.2975122478844563, "grad_norm": 0.346420556306839, "learning_rate": 1.8940640592572804e-05, "loss": 0.5243, "step": 14028 }, { "epoch": 0.29753345634238937, "grad_norm": 0.33997446298599243, "learning_rate": 1.8940491201997898e-05, "loss": 0.5101, "step": 14029 }, { "epoch": 0.2975546648003224, "grad_norm": 0.331133097410202, "learning_rate": 1.8940341801479452e-05, "loss": 0.4656, "step": 14030 }, { "epoch": 0.2975758732582554, "grad_norm": 0.5030669569969177, "learning_rate": 1.8940192391017636e-05, "loss": 0.4296, "step": 14031 }, { "epoch": 0.2975970817161884, "grad_norm": 0.3210504949092865, "learning_rate": 1.8940042970612615e-05, "loss": 0.4395, "step": 14032 }, { "epoch": 0.29761829017412145, "grad_norm": 0.3687068819999695, "learning_rate": 1.893989354026456e-05, "loss": 0.4876, "step": 14033 }, { "epoch": 0.29763949863205447, "grad_norm": 0.3399410545825958, "learning_rate": 1.893974409997363e-05, "loss": 0.4867, "step": 14034 }, { "epoch": 0.2976607070899875, "grad_norm": 0.3677307665348053, "learning_rate": 1.893959464973999e-05, "loss": 0.5392, "step": 14035 }, { "epoch": 0.2976819155479205, "grad_norm": 0.3248291015625, "learning_rate": 1.893944518956382e-05, "loss": 0.5511, "step": 14036 }, { "epoch": 0.2977031240058535, "grad_norm": 0.4032048285007477, "learning_rate": 1.893929571944527e-05, "loss": 0.5141, "step": 14037 }, { "epoch": 0.29772433246378655, "grad_norm": 0.3431931734085083, "learning_rate": 1.8939146239384513e-05, "loss": 0.5939, "step": 14038 }, { "epoch": 0.29774554092171956, "grad_norm": 0.3271161615848541, "learning_rate": 1.8938996749381716e-05, "loss": 0.5021, "step": 14039 }, { "epoch": 0.2977667493796526, "grad_norm": 0.5324564576148987, "learning_rate": 1.8938847249437048e-05, "loss": 0.5793, "step": 14040 }, { "epoch": 0.2977879578375856, "grad_norm": 0.36335501074790955, "learning_rate": 1.8938697739550667e-05, "loss": 0.4645, "step": 14041 }, { "epoch": 0.2978091662955187, "grad_norm": 0.35438352823257446, "learning_rate": 1.8938548219722745e-05, "loss": 0.5192, "step": 14042 }, { "epoch": 0.2978303747534517, "grad_norm": 0.3424828350543976, "learning_rate": 1.893839868995345e-05, "loss": 0.5527, "step": 14043 }, { "epoch": 0.2978515832113847, "grad_norm": 0.3506004512310028, "learning_rate": 1.8938249150242942e-05, "loss": 0.543, "step": 14044 }, { "epoch": 0.29787279166931774, "grad_norm": 0.33279040455818176, "learning_rate": 1.8938099600591396e-05, "loss": 0.4566, "step": 14045 }, { "epoch": 0.29789400012725076, "grad_norm": 0.3408474028110504, "learning_rate": 1.893795004099897e-05, "loss": 0.5743, "step": 14046 }, { "epoch": 0.2979152085851838, "grad_norm": 0.43381816148757935, "learning_rate": 1.8937800471465833e-05, "loss": 0.518, "step": 14047 }, { "epoch": 0.2979364170431168, "grad_norm": 0.32603731751441956, "learning_rate": 1.8937650891992148e-05, "loss": 0.559, "step": 14048 }, { "epoch": 0.2979576255010498, "grad_norm": 0.33459898829460144, "learning_rate": 1.8937501302578094e-05, "loss": 0.4956, "step": 14049 }, { "epoch": 0.29797883395898284, "grad_norm": 0.33126595616340637, "learning_rate": 1.8937351703223824e-05, "loss": 0.4738, "step": 14050 }, { "epoch": 0.29800004241691586, "grad_norm": 0.35189753770828247, "learning_rate": 1.893720209392951e-05, "loss": 0.4887, "step": 14051 }, { "epoch": 0.2980212508748489, "grad_norm": 0.35459965467453003, "learning_rate": 1.8937052474695316e-05, "loss": 0.4559, "step": 14052 }, { "epoch": 0.2980424593327819, "grad_norm": 0.31878533959388733, "learning_rate": 1.893690284552141e-05, "loss": 0.5298, "step": 14053 }, { "epoch": 0.2980636677907149, "grad_norm": 0.3152067959308624, "learning_rate": 1.893675320640796e-05, "loss": 0.5611, "step": 14054 }, { "epoch": 0.29808487624864793, "grad_norm": 0.4260839521884918, "learning_rate": 1.8936603557355132e-05, "loss": 0.5291, "step": 14055 }, { "epoch": 0.298106084706581, "grad_norm": 0.3265259861946106, "learning_rate": 1.893645389836309e-05, "loss": 0.5102, "step": 14056 }, { "epoch": 0.29812729316451403, "grad_norm": 0.3663254380226135, "learning_rate": 1.8936304229431997e-05, "loss": 0.5443, "step": 14057 }, { "epoch": 0.29814850162244705, "grad_norm": 0.39073583483695984, "learning_rate": 1.893615455056203e-05, "loss": 0.4816, "step": 14058 }, { "epoch": 0.29816971008038007, "grad_norm": 0.3526979684829712, "learning_rate": 1.8936004861753347e-05, "loss": 0.5638, "step": 14059 }, { "epoch": 0.2981909185383131, "grad_norm": 0.3278345465660095, "learning_rate": 1.8935855163006117e-05, "loss": 0.4772, "step": 14060 }, { "epoch": 0.2982121269962461, "grad_norm": 0.32129544019699097, "learning_rate": 1.8935705454320507e-05, "loss": 0.4879, "step": 14061 }, { "epoch": 0.2982333354541791, "grad_norm": 0.32581067085266113, "learning_rate": 1.8935555735696686e-05, "loss": 0.5844, "step": 14062 }, { "epoch": 0.29825454391211215, "grad_norm": 0.42184245586395264, "learning_rate": 1.893540600713481e-05, "loss": 0.6039, "step": 14063 }, { "epoch": 0.29827575237004517, "grad_norm": 0.3226272463798523, "learning_rate": 1.893525626863506e-05, "loss": 0.5009, "step": 14064 }, { "epoch": 0.2982969608279782, "grad_norm": 0.3747880756855011, "learning_rate": 1.893510652019759e-05, "loss": 0.4994, "step": 14065 }, { "epoch": 0.2983181692859112, "grad_norm": 0.36705705523490906, "learning_rate": 1.893495676182258e-05, "loss": 0.4478, "step": 14066 }, { "epoch": 0.2983393777438442, "grad_norm": 0.3723961114883423, "learning_rate": 1.8934806993510186e-05, "loss": 0.4637, "step": 14067 }, { "epoch": 0.29836058620177724, "grad_norm": 0.2970336377620697, "learning_rate": 1.893465721526057e-05, "loss": 0.5113, "step": 14068 }, { "epoch": 0.2983817946597103, "grad_norm": 0.46455657482147217, "learning_rate": 1.8934507427073913e-05, "loss": 0.5527, "step": 14069 }, { "epoch": 0.29840300311764334, "grad_norm": 0.3502654433250427, "learning_rate": 1.893435762895037e-05, "loss": 0.521, "step": 14070 }, { "epoch": 0.29842421157557636, "grad_norm": 0.32557064294815063, "learning_rate": 1.8934207820890117e-05, "loss": 0.4634, "step": 14071 }, { "epoch": 0.2984454200335094, "grad_norm": 0.34938839077949524, "learning_rate": 1.8934058002893313e-05, "loss": 0.5465, "step": 14072 }, { "epoch": 0.2984666284914424, "grad_norm": 0.3210662603378296, "learning_rate": 1.893390817496013e-05, "loss": 0.5282, "step": 14073 }, { "epoch": 0.2984878369493754, "grad_norm": 0.3544105589389801, "learning_rate": 1.8933758337090727e-05, "loss": 0.5157, "step": 14074 }, { "epoch": 0.29850904540730844, "grad_norm": 0.36509257555007935, "learning_rate": 1.893360848928528e-05, "loss": 0.4982, "step": 14075 }, { "epoch": 0.29853025386524146, "grad_norm": 0.41920074820518494, "learning_rate": 1.893345863154395e-05, "loss": 0.5743, "step": 14076 }, { "epoch": 0.2985514623231745, "grad_norm": 0.329614520072937, "learning_rate": 1.89333087638669e-05, "loss": 0.4752, "step": 14077 }, { "epoch": 0.2985726707811075, "grad_norm": 0.3448149859905243, "learning_rate": 1.8933158886254312e-05, "loss": 0.5465, "step": 14078 }, { "epoch": 0.2985938792390405, "grad_norm": 0.31260088086128235, "learning_rate": 1.893300899870633e-05, "loss": 0.5328, "step": 14079 }, { "epoch": 0.29861508769697354, "grad_norm": 0.34779998660087585, "learning_rate": 1.8932859101223142e-05, "loss": 0.4933, "step": 14080 }, { "epoch": 0.29863629615490656, "grad_norm": 0.37372836470603943, "learning_rate": 1.8932709193804903e-05, "loss": 0.5473, "step": 14081 }, { "epoch": 0.2986575046128396, "grad_norm": 0.3173470199108124, "learning_rate": 1.8932559276451784e-05, "loss": 0.5055, "step": 14082 }, { "epoch": 0.29867871307077265, "grad_norm": 0.3601604998111725, "learning_rate": 1.893240934916395e-05, "loss": 0.4723, "step": 14083 }, { "epoch": 0.29869992152870567, "grad_norm": 0.32987213134765625, "learning_rate": 1.8932259411941565e-05, "loss": 0.5098, "step": 14084 }, { "epoch": 0.2987211299866387, "grad_norm": 0.37395626306533813, "learning_rate": 1.89321094647848e-05, "loss": 0.4768, "step": 14085 }, { "epoch": 0.2987423384445717, "grad_norm": 0.32130932807922363, "learning_rate": 1.893195950769382e-05, "loss": 0.5159, "step": 14086 }, { "epoch": 0.29876354690250473, "grad_norm": 0.3421105444431305, "learning_rate": 1.8931809540668793e-05, "loss": 0.5226, "step": 14087 }, { "epoch": 0.29878475536043775, "grad_norm": 0.3660613000392914, "learning_rate": 1.8931659563709884e-05, "loss": 0.5162, "step": 14088 }, { "epoch": 0.29880596381837077, "grad_norm": 0.3292827010154724, "learning_rate": 1.8931509576817265e-05, "loss": 0.5775, "step": 14089 }, { "epoch": 0.2988271722763038, "grad_norm": 0.33791249990463257, "learning_rate": 1.8931359579991093e-05, "loss": 0.4639, "step": 14090 }, { "epoch": 0.2988483807342368, "grad_norm": 0.36548781394958496, "learning_rate": 1.8931209573231543e-05, "loss": 0.5238, "step": 14091 }, { "epoch": 0.2988695891921698, "grad_norm": 0.3544387221336365, "learning_rate": 1.8931059556538777e-05, "loss": 0.5544, "step": 14092 }, { "epoch": 0.29889079765010285, "grad_norm": 0.34628137946128845, "learning_rate": 1.893090952991297e-05, "loss": 0.4963, "step": 14093 }, { "epoch": 0.29891200610803587, "grad_norm": 0.34627509117126465, "learning_rate": 1.893075949335428e-05, "loss": 0.548, "step": 14094 }, { "epoch": 0.2989332145659689, "grad_norm": 0.35071542859077454, "learning_rate": 1.8930609446862873e-05, "loss": 0.4995, "step": 14095 }, { "epoch": 0.2989544230239019, "grad_norm": 0.3668533265590668, "learning_rate": 1.8930459390438925e-05, "loss": 0.5185, "step": 14096 }, { "epoch": 0.298975631481835, "grad_norm": 0.32967230677604675, "learning_rate": 1.8930309324082594e-05, "loss": 0.5045, "step": 14097 }, { "epoch": 0.298996839939768, "grad_norm": 0.3218529224395752, "learning_rate": 1.8930159247794052e-05, "loss": 0.5389, "step": 14098 }, { "epoch": 0.299018048397701, "grad_norm": 0.3292396068572998, "learning_rate": 1.8930009161573464e-05, "loss": 0.4923, "step": 14099 }, { "epoch": 0.29903925685563404, "grad_norm": 0.331523597240448, "learning_rate": 1.8929859065420997e-05, "loss": 0.4723, "step": 14100 }, { "epoch": 0.29906046531356706, "grad_norm": 0.29266607761383057, "learning_rate": 1.8929708959336822e-05, "loss": 0.4791, "step": 14101 }, { "epoch": 0.2990816737715001, "grad_norm": 0.3087976574897766, "learning_rate": 1.8929558843321097e-05, "loss": 0.4248, "step": 14102 }, { "epoch": 0.2991028822294331, "grad_norm": 0.37548258900642395, "learning_rate": 1.8929408717374e-05, "loss": 0.4578, "step": 14103 }, { "epoch": 0.2991240906873661, "grad_norm": 0.3286145329475403, "learning_rate": 1.8929258581495688e-05, "loss": 0.4926, "step": 14104 }, { "epoch": 0.29914529914529914, "grad_norm": 0.4308588206768036, "learning_rate": 1.892910843568633e-05, "loss": 0.4973, "step": 14105 }, { "epoch": 0.29916650760323216, "grad_norm": 0.36955368518829346, "learning_rate": 1.89289582799461e-05, "loss": 0.5168, "step": 14106 }, { "epoch": 0.2991877160611652, "grad_norm": 0.3740536868572235, "learning_rate": 1.892880811427516e-05, "loss": 0.5116, "step": 14107 }, { "epoch": 0.2992089245190982, "grad_norm": 0.35011714696884155, "learning_rate": 1.8928657938673675e-05, "loss": 0.4287, "step": 14108 }, { "epoch": 0.2992301329770312, "grad_norm": 0.35014232993125916, "learning_rate": 1.8928507753141814e-05, "loss": 0.4527, "step": 14109 }, { "epoch": 0.2992513414349643, "grad_norm": 0.34206119179725647, "learning_rate": 1.8928357557679746e-05, "loss": 0.4762, "step": 14110 }, { "epoch": 0.2992725498928973, "grad_norm": 0.33459189534187317, "learning_rate": 1.8928207352287632e-05, "loss": 0.5642, "step": 14111 }, { "epoch": 0.29929375835083033, "grad_norm": 0.349700003862381, "learning_rate": 1.8928057136965646e-05, "loss": 0.5384, "step": 14112 }, { "epoch": 0.29931496680876335, "grad_norm": 0.324420303106308, "learning_rate": 1.8927906911713957e-05, "loss": 0.4969, "step": 14113 }, { "epoch": 0.29933617526669637, "grad_norm": 0.3789043724536896, "learning_rate": 1.892775667653272e-05, "loss": 0.5534, "step": 14114 }, { "epoch": 0.2993573837246294, "grad_norm": 0.29863008856773376, "learning_rate": 1.8927606431422118e-05, "loss": 0.4159, "step": 14115 }, { "epoch": 0.2993785921825624, "grad_norm": 0.3726600408554077, "learning_rate": 1.8927456176382303e-05, "loss": 0.5454, "step": 14116 }, { "epoch": 0.29939980064049543, "grad_norm": 0.33029839396476746, "learning_rate": 1.892730591141345e-05, "loss": 0.5409, "step": 14117 }, { "epoch": 0.29942100909842845, "grad_norm": 0.35675281286239624, "learning_rate": 1.8927155636515726e-05, "loss": 0.5748, "step": 14118 }, { "epoch": 0.29944221755636147, "grad_norm": 0.3505668044090271, "learning_rate": 1.8927005351689297e-05, "loss": 0.475, "step": 14119 }, { "epoch": 0.2994634260142945, "grad_norm": 0.3317396938800812, "learning_rate": 1.892685505693433e-05, "loss": 0.5483, "step": 14120 }, { "epoch": 0.2994846344722275, "grad_norm": 0.29440996050834656, "learning_rate": 1.892670475225099e-05, "loss": 0.4356, "step": 14121 }, { "epoch": 0.2995058429301605, "grad_norm": 0.359825998544693, "learning_rate": 1.8926554437639447e-05, "loss": 0.4525, "step": 14122 }, { "epoch": 0.29952705138809355, "grad_norm": 0.371799498796463, "learning_rate": 1.892640411309987e-05, "loss": 0.4359, "step": 14123 }, { "epoch": 0.2995482598460266, "grad_norm": 0.3264455497264862, "learning_rate": 1.8926253778632425e-05, "loss": 0.5249, "step": 14124 }, { "epoch": 0.29956946830395964, "grad_norm": 0.30439722537994385, "learning_rate": 1.8926103434237278e-05, "loss": 0.5018, "step": 14125 }, { "epoch": 0.29959067676189266, "grad_norm": 0.3926530182361603, "learning_rate": 1.8925953079914593e-05, "loss": 0.5238, "step": 14126 }, { "epoch": 0.2996118852198257, "grad_norm": 0.34752169251441956, "learning_rate": 1.8925802715664544e-05, "loss": 0.5036, "step": 14127 }, { "epoch": 0.2996330936777587, "grad_norm": 0.3237230181694031, "learning_rate": 1.8925652341487292e-05, "loss": 0.5689, "step": 14128 }, { "epoch": 0.2996543021356917, "grad_norm": 0.3270779550075531, "learning_rate": 1.8925501957383007e-05, "loss": 0.4453, "step": 14129 }, { "epoch": 0.29967551059362474, "grad_norm": 0.348280668258667, "learning_rate": 1.8925351563351858e-05, "loss": 0.4275, "step": 14130 }, { "epoch": 0.29969671905155776, "grad_norm": 0.3109431564807892, "learning_rate": 1.8925201159394012e-05, "loss": 0.4708, "step": 14131 }, { "epoch": 0.2997179275094908, "grad_norm": 0.3629433214664459, "learning_rate": 1.892505074550963e-05, "loss": 0.4511, "step": 14132 }, { "epoch": 0.2997391359674238, "grad_norm": 1.0803053379058838, "learning_rate": 1.8924900321698887e-05, "loss": 0.4783, "step": 14133 }, { "epoch": 0.2997603444253568, "grad_norm": 0.33747076988220215, "learning_rate": 1.8924749887961948e-05, "loss": 0.5384, "step": 14134 }, { "epoch": 0.29978155288328984, "grad_norm": 0.3395178020000458, "learning_rate": 1.8924599444298978e-05, "loss": 0.5484, "step": 14135 }, { "epoch": 0.29980276134122286, "grad_norm": 0.3298759162425995, "learning_rate": 1.8924448990710144e-05, "loss": 0.5499, "step": 14136 }, { "epoch": 0.2998239697991559, "grad_norm": 0.3447432518005371, "learning_rate": 1.8924298527195623e-05, "loss": 0.5668, "step": 14137 }, { "epoch": 0.29984517825708895, "grad_norm": 0.29213303327560425, "learning_rate": 1.8924148053755566e-05, "loss": 0.4069, "step": 14138 }, { "epoch": 0.29986638671502197, "grad_norm": 0.29283469915390015, "learning_rate": 1.8923997570390153e-05, "loss": 0.4425, "step": 14139 }, { "epoch": 0.299887595172955, "grad_norm": 0.3319813311100006, "learning_rate": 1.8923847077099546e-05, "loss": 0.5447, "step": 14140 }, { "epoch": 0.299908803630888, "grad_norm": 0.3435472548007965, "learning_rate": 1.892369657388392e-05, "loss": 0.5654, "step": 14141 }, { "epoch": 0.29993001208882103, "grad_norm": 0.3448869287967682, "learning_rate": 1.8923546060743428e-05, "loss": 0.5143, "step": 14142 }, { "epoch": 0.29995122054675405, "grad_norm": 0.3480086624622345, "learning_rate": 1.8923395537678248e-05, "loss": 0.4757, "step": 14143 }, { "epoch": 0.29997242900468707, "grad_norm": 0.35159310698509216, "learning_rate": 1.892324500468855e-05, "loss": 0.5273, "step": 14144 }, { "epoch": 0.2999936374626201, "grad_norm": 0.3697229027748108, "learning_rate": 1.892309446177449e-05, "loss": 0.5617, "step": 14145 }, { "epoch": 0.3000148459205531, "grad_norm": 0.32748815417289734, "learning_rate": 1.8922943908936245e-05, "loss": 0.482, "step": 14146 }, { "epoch": 0.30003605437848613, "grad_norm": 0.36493873596191406, "learning_rate": 1.892279334617398e-05, "loss": 0.5072, "step": 14147 }, { "epoch": 0.30005726283641915, "grad_norm": 0.3260316252708435, "learning_rate": 1.8922642773487857e-05, "loss": 0.4417, "step": 14148 }, { "epoch": 0.30007847129435217, "grad_norm": 0.3373330533504486, "learning_rate": 1.8922492190878053e-05, "loss": 0.5558, "step": 14149 }, { "epoch": 0.3000996797522852, "grad_norm": 0.38191869854927063, "learning_rate": 1.8922341598344727e-05, "loss": 0.556, "step": 14150 }, { "epoch": 0.30012088821021826, "grad_norm": 0.30375221371650696, "learning_rate": 1.8922190995888053e-05, "loss": 0.53, "step": 14151 }, { "epoch": 0.3001420966681513, "grad_norm": 0.6795934438705444, "learning_rate": 1.8922040383508196e-05, "loss": 0.5557, "step": 14152 }, { "epoch": 0.3001633051260843, "grad_norm": 0.39444783329963684, "learning_rate": 1.892188976120532e-05, "loss": 0.5716, "step": 14153 }, { "epoch": 0.3001845135840173, "grad_norm": 0.3211336135864258, "learning_rate": 1.89217391289796e-05, "loss": 0.495, "step": 14154 }, { "epoch": 0.30020572204195034, "grad_norm": 0.31906911730766296, "learning_rate": 1.8921588486831196e-05, "loss": 0.559, "step": 14155 }, { "epoch": 0.30022693049988336, "grad_norm": 0.41425007581710815, "learning_rate": 1.8921437834760283e-05, "loss": 0.5314, "step": 14156 }, { "epoch": 0.3002481389578164, "grad_norm": 0.3314748704433441, "learning_rate": 1.892128717276702e-05, "loss": 0.4916, "step": 14157 }, { "epoch": 0.3002693474157494, "grad_norm": 0.3014295697212219, "learning_rate": 1.892113650085158e-05, "loss": 0.4129, "step": 14158 }, { "epoch": 0.3002905558736824, "grad_norm": 0.29667383432388306, "learning_rate": 1.8920985819014127e-05, "loss": 0.4257, "step": 14159 }, { "epoch": 0.30031176433161544, "grad_norm": 0.33049431443214417, "learning_rate": 1.8920835127254834e-05, "loss": 0.5084, "step": 14160 }, { "epoch": 0.30033297278954846, "grad_norm": 0.3532980680465698, "learning_rate": 1.8920684425573865e-05, "loss": 0.5592, "step": 14161 }, { "epoch": 0.3003541812474815, "grad_norm": 0.33939480781555176, "learning_rate": 1.8920533713971393e-05, "loss": 0.4748, "step": 14162 }, { "epoch": 0.3003753897054145, "grad_norm": 0.36195287108421326, "learning_rate": 1.8920382992447576e-05, "loss": 0.4499, "step": 14163 }, { "epoch": 0.3003965981633475, "grad_norm": 0.3445165753364563, "learning_rate": 1.8920232261002587e-05, "loss": 0.57, "step": 14164 }, { "epoch": 0.3004178066212806, "grad_norm": 0.33070889115333557, "learning_rate": 1.8920081519636593e-05, "loss": 0.5473, "step": 14165 }, { "epoch": 0.3004390150792136, "grad_norm": 0.3108968436717987, "learning_rate": 1.891993076834976e-05, "loss": 0.5344, "step": 14166 }, { "epoch": 0.30046022353714663, "grad_norm": 0.37806686758995056, "learning_rate": 1.891978000714226e-05, "loss": 0.4874, "step": 14167 }, { "epoch": 0.30048143199507965, "grad_norm": 0.3546048104763031, "learning_rate": 1.8919629236014255e-05, "loss": 0.5167, "step": 14168 }, { "epoch": 0.30050264045301267, "grad_norm": 0.32442620396614075, "learning_rate": 1.891947845496592e-05, "loss": 0.5502, "step": 14169 }, { "epoch": 0.3005238489109457, "grad_norm": 0.36153534054756165, "learning_rate": 1.891932766399742e-05, "loss": 0.5135, "step": 14170 }, { "epoch": 0.3005450573688787, "grad_norm": 0.33037084341049194, "learning_rate": 1.8919176863108915e-05, "loss": 0.4929, "step": 14171 }, { "epoch": 0.30056626582681173, "grad_norm": 0.35349172353744507, "learning_rate": 1.8919026052300585e-05, "loss": 0.5634, "step": 14172 }, { "epoch": 0.30058747428474475, "grad_norm": 0.34994107484817505, "learning_rate": 1.8918875231572588e-05, "loss": 0.5326, "step": 14173 }, { "epoch": 0.30060868274267777, "grad_norm": 0.36194664239883423, "learning_rate": 1.8918724400925097e-05, "loss": 0.5528, "step": 14174 }, { "epoch": 0.3006298912006108, "grad_norm": 0.3666777014732361, "learning_rate": 1.8918573560358277e-05, "loss": 0.518, "step": 14175 }, { "epoch": 0.3006510996585438, "grad_norm": 0.34053248167037964, "learning_rate": 1.8918422709872298e-05, "loss": 0.4456, "step": 14176 }, { "epoch": 0.3006723081164768, "grad_norm": 0.3405047357082367, "learning_rate": 1.8918271849467325e-05, "loss": 0.4838, "step": 14177 }, { "epoch": 0.30069351657440985, "grad_norm": 0.33448272943496704, "learning_rate": 1.891812097914353e-05, "loss": 0.5632, "step": 14178 }, { "epoch": 0.3007147250323429, "grad_norm": 0.3477877378463745, "learning_rate": 1.891797009890108e-05, "loss": 0.5235, "step": 14179 }, { "epoch": 0.30073593349027594, "grad_norm": 0.4525514841079712, "learning_rate": 1.8917819208740135e-05, "loss": 0.5111, "step": 14180 }, { "epoch": 0.30075714194820896, "grad_norm": 0.7774720191955566, "learning_rate": 1.8917668308660872e-05, "loss": 0.4697, "step": 14181 }, { "epoch": 0.300778350406142, "grad_norm": 0.35352903604507446, "learning_rate": 1.891751739866346e-05, "loss": 0.5982, "step": 14182 }, { "epoch": 0.300799558864075, "grad_norm": 0.34101906418800354, "learning_rate": 1.8917366478748056e-05, "loss": 0.4916, "step": 14183 }, { "epoch": 0.300820767322008, "grad_norm": 0.36773353815078735, "learning_rate": 1.8917215548914836e-05, "loss": 0.5123, "step": 14184 }, { "epoch": 0.30084197577994104, "grad_norm": 0.34961172938346863, "learning_rate": 1.8917064609163968e-05, "loss": 0.5507, "step": 14185 }, { "epoch": 0.30086318423787406, "grad_norm": 0.5542333126068115, "learning_rate": 1.8916913659495618e-05, "loss": 0.5406, "step": 14186 }, { "epoch": 0.3008843926958071, "grad_norm": 0.29585838317871094, "learning_rate": 1.8916762699909952e-05, "loss": 0.4091, "step": 14187 }, { "epoch": 0.3009056011537401, "grad_norm": 0.38014325499534607, "learning_rate": 1.8916611730407144e-05, "loss": 0.5101, "step": 14188 }, { "epoch": 0.3009268096116731, "grad_norm": 0.7231181263923645, "learning_rate": 1.8916460750987354e-05, "loss": 0.5039, "step": 14189 }, { "epoch": 0.30094801806960614, "grad_norm": 0.692217230796814, "learning_rate": 1.8916309761650757e-05, "loss": 0.4924, "step": 14190 }, { "epoch": 0.30096922652753916, "grad_norm": 0.33310481905937195, "learning_rate": 1.8916158762397515e-05, "loss": 0.4994, "step": 14191 }, { "epoch": 0.30099043498547223, "grad_norm": 0.3322511613368988, "learning_rate": 1.89160077532278e-05, "loss": 0.4873, "step": 14192 }, { "epoch": 0.30101164344340525, "grad_norm": 0.3152886927127838, "learning_rate": 1.891585673414178e-05, "loss": 0.4943, "step": 14193 }, { "epoch": 0.30103285190133827, "grad_norm": 0.3912414014339447, "learning_rate": 1.891570570513962e-05, "loss": 0.4703, "step": 14194 }, { "epoch": 0.3010540603592713, "grad_norm": 0.40261921286582947, "learning_rate": 1.891555466622149e-05, "loss": 0.5163, "step": 14195 }, { "epoch": 0.3010752688172043, "grad_norm": 0.34874826669692993, "learning_rate": 1.8915403617387557e-05, "loss": 0.5512, "step": 14196 }, { "epoch": 0.30109647727513733, "grad_norm": 0.33486101031303406, "learning_rate": 1.891525255863799e-05, "loss": 0.4655, "step": 14197 }, { "epoch": 0.30111768573307035, "grad_norm": 0.36458539962768555, "learning_rate": 1.8915101489972958e-05, "loss": 0.5755, "step": 14198 }, { "epoch": 0.30113889419100337, "grad_norm": 0.3790990114212036, "learning_rate": 1.8914950411392624e-05, "loss": 0.5137, "step": 14199 }, { "epoch": 0.3011601026489364, "grad_norm": 0.35886138677597046, "learning_rate": 1.891479932289716e-05, "loss": 0.5039, "step": 14200 }, { "epoch": 0.3011813111068694, "grad_norm": 0.37493881583213806, "learning_rate": 1.8914648224486738e-05, "loss": 0.4276, "step": 14201 }, { "epoch": 0.30120251956480243, "grad_norm": 0.3392443060874939, "learning_rate": 1.8914497116161517e-05, "loss": 0.5266, "step": 14202 }, { "epoch": 0.30122372802273545, "grad_norm": 0.31403881311416626, "learning_rate": 1.891434599792167e-05, "loss": 0.4561, "step": 14203 }, { "epoch": 0.30124493648066847, "grad_norm": 0.34266233444213867, "learning_rate": 1.891419486976737e-05, "loss": 0.5005, "step": 14204 }, { "epoch": 0.3012661449386015, "grad_norm": 0.45034369826316833, "learning_rate": 1.8914043731698776e-05, "loss": 0.6176, "step": 14205 }, { "epoch": 0.30128735339653456, "grad_norm": 0.8562451601028442, "learning_rate": 1.891389258371606e-05, "loss": 0.4968, "step": 14206 }, { "epoch": 0.3013085618544676, "grad_norm": 0.41904476284980774, "learning_rate": 1.891374142581939e-05, "loss": 0.5313, "step": 14207 }, { "epoch": 0.3013297703124006, "grad_norm": 0.33238789439201355, "learning_rate": 1.8913590258008937e-05, "loss": 0.4948, "step": 14208 }, { "epoch": 0.3013509787703336, "grad_norm": 0.3672223687171936, "learning_rate": 1.8913439080284863e-05, "loss": 0.4933, "step": 14209 }, { "epoch": 0.30137218722826664, "grad_norm": 0.3830110728740692, "learning_rate": 1.8913287892647337e-05, "loss": 0.6039, "step": 14210 }, { "epoch": 0.30139339568619966, "grad_norm": 0.3470137417316437, "learning_rate": 1.8913136695096533e-05, "loss": 0.459, "step": 14211 }, { "epoch": 0.3014146041441327, "grad_norm": 0.38332849740982056, "learning_rate": 1.891298548763262e-05, "loss": 0.5362, "step": 14212 }, { "epoch": 0.3014358126020657, "grad_norm": 0.34133967757225037, "learning_rate": 1.8912834270255754e-05, "loss": 0.4468, "step": 14213 }, { "epoch": 0.3014570210599987, "grad_norm": 0.31605860590934753, "learning_rate": 1.891268304296612e-05, "loss": 0.4757, "step": 14214 }, { "epoch": 0.30147822951793174, "grad_norm": 0.3249877095222473, "learning_rate": 1.891253180576387e-05, "loss": 0.4947, "step": 14215 }, { "epoch": 0.30149943797586476, "grad_norm": 0.3464188575744629, "learning_rate": 1.891238055864918e-05, "loss": 0.5461, "step": 14216 }, { "epoch": 0.3015206464337978, "grad_norm": 0.3528051972389221, "learning_rate": 1.891222930162222e-05, "loss": 0.5467, "step": 14217 }, { "epoch": 0.3015418548917308, "grad_norm": 0.3676740527153015, "learning_rate": 1.8912078034683158e-05, "loss": 0.5117, "step": 14218 }, { "epoch": 0.3015630633496639, "grad_norm": 0.33054131269454956, "learning_rate": 1.8911926757832157e-05, "loss": 0.4586, "step": 14219 }, { "epoch": 0.3015842718075969, "grad_norm": 0.3712471127510071, "learning_rate": 1.891177547106939e-05, "loss": 0.5828, "step": 14220 }, { "epoch": 0.3016054802655299, "grad_norm": 0.3612740635871887, "learning_rate": 1.8911624174395023e-05, "loss": 0.5499, "step": 14221 }, { "epoch": 0.30162668872346293, "grad_norm": 0.3283037841320038, "learning_rate": 1.8911472867809226e-05, "loss": 0.4986, "step": 14222 }, { "epoch": 0.30164789718139595, "grad_norm": 0.368883341550827, "learning_rate": 1.8911321551312163e-05, "loss": 0.5209, "step": 14223 }, { "epoch": 0.30166910563932897, "grad_norm": 0.37171468138694763, "learning_rate": 1.891117022490401e-05, "loss": 0.5556, "step": 14224 }, { "epoch": 0.301690314097262, "grad_norm": 0.33429959416389465, "learning_rate": 1.8911018888584926e-05, "loss": 0.3971, "step": 14225 }, { "epoch": 0.301711522555195, "grad_norm": 0.33317843079566956, "learning_rate": 1.891086754235509e-05, "loss": 0.5482, "step": 14226 }, { "epoch": 0.30173273101312803, "grad_norm": 0.39758938550949097, "learning_rate": 1.891071618621466e-05, "loss": 0.5648, "step": 14227 }, { "epoch": 0.30175393947106105, "grad_norm": 0.3507757782936096, "learning_rate": 1.891056482016381e-05, "loss": 0.4715, "step": 14228 }, { "epoch": 0.30177514792899407, "grad_norm": 0.38745924830436707, "learning_rate": 1.891041344420271e-05, "loss": 0.4655, "step": 14229 }, { "epoch": 0.3017963563869271, "grad_norm": 0.3663935363292694, "learning_rate": 1.891026205833152e-05, "loss": 0.4722, "step": 14230 }, { "epoch": 0.3018175648448601, "grad_norm": 0.33747944235801697, "learning_rate": 1.891011066255042e-05, "loss": 0.5027, "step": 14231 }, { "epoch": 0.30183877330279313, "grad_norm": 0.503546416759491, "learning_rate": 1.8909959256859568e-05, "loss": 0.5652, "step": 14232 }, { "epoch": 0.3018599817607262, "grad_norm": 0.38094639778137207, "learning_rate": 1.890980784125914e-05, "loss": 0.5605, "step": 14233 }, { "epoch": 0.3018811902186592, "grad_norm": 0.411038875579834, "learning_rate": 1.89096564157493e-05, "loss": 0.4632, "step": 14234 }, { "epoch": 0.30190239867659224, "grad_norm": 0.3188333213329315, "learning_rate": 1.8909504980330214e-05, "loss": 0.531, "step": 14235 }, { "epoch": 0.30192360713452526, "grad_norm": 0.3447572588920593, "learning_rate": 1.890935353500206e-05, "loss": 0.5135, "step": 14236 }, { "epoch": 0.3019448155924583, "grad_norm": 0.3827308714389801, "learning_rate": 1.8909202079764995e-05, "loss": 0.495, "step": 14237 }, { "epoch": 0.3019660240503913, "grad_norm": 0.3437293469905853, "learning_rate": 1.8909050614619197e-05, "loss": 0.5536, "step": 14238 }, { "epoch": 0.3019872325083243, "grad_norm": 0.4129927158355713, "learning_rate": 1.8908899139564828e-05, "loss": 0.5573, "step": 14239 }, { "epoch": 0.30200844096625734, "grad_norm": 0.314177006483078, "learning_rate": 1.8908747654602057e-05, "loss": 0.4661, "step": 14240 }, { "epoch": 0.30202964942419036, "grad_norm": 0.36961686611175537, "learning_rate": 1.8908596159731057e-05, "loss": 0.5034, "step": 14241 }, { "epoch": 0.3020508578821234, "grad_norm": 0.4999427795410156, "learning_rate": 1.8908444654951994e-05, "loss": 0.4434, "step": 14242 }, { "epoch": 0.3020720663400564, "grad_norm": 0.3619598150253296, "learning_rate": 1.8908293140265038e-05, "loss": 0.4873, "step": 14243 }, { "epoch": 0.3020932747979894, "grad_norm": 0.3392689526081085, "learning_rate": 1.890814161567035e-05, "loss": 0.5369, "step": 14244 }, { "epoch": 0.30211448325592244, "grad_norm": 0.4586118161678314, "learning_rate": 1.890799008116811e-05, "loss": 0.56, "step": 14245 }, { "epoch": 0.30213569171385546, "grad_norm": 0.3841618597507477, "learning_rate": 1.8907838536758476e-05, "loss": 0.5389, "step": 14246 }, { "epoch": 0.30215690017178853, "grad_norm": 0.35808026790618896, "learning_rate": 1.8907686982441623e-05, "loss": 0.5314, "step": 14247 }, { "epoch": 0.30217810862972155, "grad_norm": 0.2875012159347534, "learning_rate": 1.890753541821772e-05, "loss": 0.4992, "step": 14248 }, { "epoch": 0.3021993170876546, "grad_norm": 0.3869284391403198, "learning_rate": 1.8907383844086934e-05, "loss": 0.593, "step": 14249 }, { "epoch": 0.3022205255455876, "grad_norm": 0.40626004338264465, "learning_rate": 1.890723226004943e-05, "loss": 0.4941, "step": 14250 }, { "epoch": 0.3022417340035206, "grad_norm": 0.5962551832199097, "learning_rate": 1.890708066610538e-05, "loss": 0.5565, "step": 14251 }, { "epoch": 0.30226294246145363, "grad_norm": 0.45620620250701904, "learning_rate": 1.8906929062254953e-05, "loss": 0.4786, "step": 14252 }, { "epoch": 0.30228415091938665, "grad_norm": 0.32136470079421997, "learning_rate": 1.8906777448498318e-05, "loss": 0.5424, "step": 14253 }, { "epoch": 0.30230535937731967, "grad_norm": 0.3231934607028961, "learning_rate": 1.890662582483564e-05, "loss": 0.5282, "step": 14254 }, { "epoch": 0.3023265678352527, "grad_norm": 0.4200814664363861, "learning_rate": 1.8906474191267092e-05, "loss": 0.5257, "step": 14255 }, { "epoch": 0.3023477762931857, "grad_norm": 0.643531322479248, "learning_rate": 1.8906322547792842e-05, "loss": 0.5187, "step": 14256 }, { "epoch": 0.30236898475111873, "grad_norm": 0.36667296290397644, "learning_rate": 1.8906170894413053e-05, "loss": 0.4781, "step": 14257 }, { "epoch": 0.30239019320905175, "grad_norm": 0.4227410852909088, "learning_rate": 1.8906019231127902e-05, "loss": 0.5605, "step": 14258 }, { "epoch": 0.30241140166698477, "grad_norm": 1.017837643623352, "learning_rate": 1.890586755793755e-05, "loss": 0.5111, "step": 14259 }, { "epoch": 0.30243261012491784, "grad_norm": 0.3167276978492737, "learning_rate": 1.8905715874842176e-05, "loss": 0.585, "step": 14260 }, { "epoch": 0.30245381858285086, "grad_norm": 0.3559034764766693, "learning_rate": 1.8905564181841937e-05, "loss": 0.5158, "step": 14261 }, { "epoch": 0.3024750270407839, "grad_norm": 0.39722663164138794, "learning_rate": 1.8905412478937006e-05, "loss": 0.5174, "step": 14262 }, { "epoch": 0.3024962354987169, "grad_norm": 0.3518960773944855, "learning_rate": 1.890526076612756e-05, "loss": 0.5248, "step": 14263 }, { "epoch": 0.3025174439566499, "grad_norm": 0.39255914092063904, "learning_rate": 1.8905109043413753e-05, "loss": 0.5202, "step": 14264 }, { "epoch": 0.30253865241458294, "grad_norm": 0.3061608672142029, "learning_rate": 1.890495731079576e-05, "loss": 0.4673, "step": 14265 }, { "epoch": 0.30255986087251596, "grad_norm": 0.356507271528244, "learning_rate": 1.8904805568273757e-05, "loss": 0.5661, "step": 14266 }, { "epoch": 0.302581069330449, "grad_norm": 0.3416385054588318, "learning_rate": 1.89046538158479e-05, "loss": 0.5476, "step": 14267 }, { "epoch": 0.302602277788382, "grad_norm": 0.4336321949958801, "learning_rate": 1.890450205351837e-05, "loss": 0.5212, "step": 14268 }, { "epoch": 0.302623486246315, "grad_norm": 0.33490511775016785, "learning_rate": 1.8904350281285328e-05, "loss": 0.5068, "step": 14269 }, { "epoch": 0.30264469470424804, "grad_norm": 0.31447407603263855, "learning_rate": 1.8904198499148946e-05, "loss": 0.4041, "step": 14270 }, { "epoch": 0.30266590316218106, "grad_norm": 0.3978813886642456, "learning_rate": 1.8904046707109393e-05, "loss": 0.5218, "step": 14271 }, { "epoch": 0.3026871116201141, "grad_norm": 0.37727031111717224, "learning_rate": 1.890389490516683e-05, "loss": 0.5154, "step": 14272 }, { "epoch": 0.3027083200780471, "grad_norm": 0.3254354000091553, "learning_rate": 1.890374309332144e-05, "loss": 0.4747, "step": 14273 }, { "epoch": 0.3027295285359802, "grad_norm": 0.37181225419044495, "learning_rate": 1.8903591271573378e-05, "loss": 0.593, "step": 14274 }, { "epoch": 0.3027507369939132, "grad_norm": 0.4049733579158783, "learning_rate": 1.8903439439922824e-05, "loss": 0.6153, "step": 14275 }, { "epoch": 0.3027719454518462, "grad_norm": 0.3820447027683258, "learning_rate": 1.8903287598369943e-05, "loss": 0.64, "step": 14276 }, { "epoch": 0.30279315390977923, "grad_norm": 0.3520430326461792, "learning_rate": 1.8903135746914897e-05, "loss": 0.4967, "step": 14277 }, { "epoch": 0.30281436236771225, "grad_norm": 0.38003867864608765, "learning_rate": 1.8902983885557864e-05, "loss": 0.5418, "step": 14278 }, { "epoch": 0.3028355708256453, "grad_norm": 0.44150230288505554, "learning_rate": 1.890283201429901e-05, "loss": 0.5374, "step": 14279 }, { "epoch": 0.3028567792835783, "grad_norm": 0.361318975687027, "learning_rate": 1.8902680133138505e-05, "loss": 0.5217, "step": 14280 }, { "epoch": 0.3028779877415113, "grad_norm": 0.3868752419948578, "learning_rate": 1.8902528242076515e-05, "loss": 0.5511, "step": 14281 }, { "epoch": 0.30289919619944433, "grad_norm": 0.31891241669654846, "learning_rate": 1.890237634111321e-05, "loss": 0.4643, "step": 14282 }, { "epoch": 0.30292040465737735, "grad_norm": 0.35302117466926575, "learning_rate": 1.8902224430248758e-05, "loss": 0.4589, "step": 14283 }, { "epoch": 0.30294161311531037, "grad_norm": 0.32529231905937195, "learning_rate": 1.890207250948333e-05, "loss": 0.4749, "step": 14284 }, { "epoch": 0.3029628215732434, "grad_norm": 0.4816211760044098, "learning_rate": 1.8901920578817096e-05, "loss": 0.5025, "step": 14285 }, { "epoch": 0.3029840300311764, "grad_norm": 0.43786323070526123, "learning_rate": 1.890176863825022e-05, "loss": 0.5453, "step": 14286 }, { "epoch": 0.30300523848910943, "grad_norm": 0.4478570222854614, "learning_rate": 1.890161668778288e-05, "loss": 0.4893, "step": 14287 }, { "epoch": 0.3030264469470425, "grad_norm": 0.34366127848625183, "learning_rate": 1.8901464727415236e-05, "loss": 0.5277, "step": 14288 }, { "epoch": 0.3030476554049755, "grad_norm": 0.36793020367622375, "learning_rate": 1.8901312757147462e-05, "loss": 0.5202, "step": 14289 }, { "epoch": 0.30306886386290854, "grad_norm": 0.35157081484794617, "learning_rate": 1.8901160776979723e-05, "loss": 0.5745, "step": 14290 }, { "epoch": 0.30309007232084156, "grad_norm": 0.34480881690979004, "learning_rate": 1.8901008786912188e-05, "loss": 0.436, "step": 14291 }, { "epoch": 0.3031112807787746, "grad_norm": 0.3808784782886505, "learning_rate": 1.890085678694503e-05, "loss": 0.5057, "step": 14292 }, { "epoch": 0.3031324892367076, "grad_norm": 0.335292786359787, "learning_rate": 1.8900704777078422e-05, "loss": 0.5066, "step": 14293 }, { "epoch": 0.3031536976946406, "grad_norm": 0.3322639465332031, "learning_rate": 1.890055275731252e-05, "loss": 0.5036, "step": 14294 }, { "epoch": 0.30317490615257364, "grad_norm": 0.3369397521018982, "learning_rate": 1.8900400727647502e-05, "loss": 0.5042, "step": 14295 }, { "epoch": 0.30319611461050666, "grad_norm": 0.7257651090621948, "learning_rate": 1.890024868808354e-05, "loss": 0.5357, "step": 14296 }, { "epoch": 0.3032173230684397, "grad_norm": 0.5540763139724731, "learning_rate": 1.8900096638620793e-05, "loss": 0.4716, "step": 14297 }, { "epoch": 0.3032385315263727, "grad_norm": 0.3548874855041504, "learning_rate": 1.8899944579259438e-05, "loss": 0.5672, "step": 14298 }, { "epoch": 0.3032597399843057, "grad_norm": 0.3510599136352539, "learning_rate": 1.8899792509999645e-05, "loss": 0.5288, "step": 14299 }, { "epoch": 0.30328094844223874, "grad_norm": 0.3148283064365387, "learning_rate": 1.8899640430841578e-05, "loss": 0.4993, "step": 14300 }, { "epoch": 0.3033021569001718, "grad_norm": 0.4910221993923187, "learning_rate": 1.8899488341785405e-05, "loss": 0.4694, "step": 14301 }, { "epoch": 0.30332336535810484, "grad_norm": 0.29437869787216187, "learning_rate": 1.8899336242831304e-05, "loss": 0.4797, "step": 14302 }, { "epoch": 0.30334457381603785, "grad_norm": 0.3187297582626343, "learning_rate": 1.8899184133979436e-05, "loss": 0.4535, "step": 14303 }, { "epoch": 0.3033657822739709, "grad_norm": 0.3324390649795532, "learning_rate": 1.889903201522997e-05, "loss": 0.5663, "step": 14304 }, { "epoch": 0.3033869907319039, "grad_norm": 0.4420487582683563, "learning_rate": 1.8898879886583083e-05, "loss": 0.5298, "step": 14305 }, { "epoch": 0.3034081991898369, "grad_norm": 0.3230573832988739, "learning_rate": 1.8898727748038936e-05, "loss": 0.4759, "step": 14306 }, { "epoch": 0.30342940764776993, "grad_norm": 0.3951614797115326, "learning_rate": 1.88985755995977e-05, "loss": 0.6194, "step": 14307 }, { "epoch": 0.30345061610570295, "grad_norm": 0.4051317870616913, "learning_rate": 1.889842344125955e-05, "loss": 0.5294, "step": 14308 }, { "epoch": 0.303471824563636, "grad_norm": 0.4326106905937195, "learning_rate": 1.8898271273024647e-05, "loss": 0.5015, "step": 14309 }, { "epoch": 0.303493033021569, "grad_norm": 0.3098796308040619, "learning_rate": 1.8898119094893165e-05, "loss": 0.503, "step": 14310 }, { "epoch": 0.303514241479502, "grad_norm": 0.370958536863327, "learning_rate": 1.8897966906865272e-05, "loss": 0.5067, "step": 14311 }, { "epoch": 0.30353544993743503, "grad_norm": 0.38154685497283936, "learning_rate": 1.8897814708941138e-05, "loss": 0.5227, "step": 14312 }, { "epoch": 0.30355665839536805, "grad_norm": 0.3199065923690796, "learning_rate": 1.8897662501120934e-05, "loss": 0.4467, "step": 14313 }, { "epoch": 0.30357786685330107, "grad_norm": 0.4283837080001831, "learning_rate": 1.8897510283404824e-05, "loss": 0.544, "step": 14314 }, { "epoch": 0.30359907531123415, "grad_norm": 0.36247462034225464, "learning_rate": 1.889735805579298e-05, "loss": 0.5722, "step": 14315 }, { "epoch": 0.30362028376916717, "grad_norm": 0.31179219484329224, "learning_rate": 1.8897205818285574e-05, "loss": 0.4986, "step": 14316 }, { "epoch": 0.3036414922271002, "grad_norm": 0.38413169980049133, "learning_rate": 1.889705357088277e-05, "loss": 0.5176, "step": 14317 }, { "epoch": 0.3036627006850332, "grad_norm": 0.32708245515823364, "learning_rate": 1.8896901313584744e-05, "loss": 0.5083, "step": 14318 }, { "epoch": 0.3036839091429662, "grad_norm": 0.4595261514186859, "learning_rate": 1.8896749046391658e-05, "loss": 0.4818, "step": 14319 }, { "epoch": 0.30370511760089924, "grad_norm": 0.3797876834869385, "learning_rate": 1.889659676930369e-05, "loss": 0.5565, "step": 14320 }, { "epoch": 0.30372632605883226, "grad_norm": 0.35538336634635925, "learning_rate": 1.8896444482320995e-05, "loss": 0.4564, "step": 14321 }, { "epoch": 0.3037475345167653, "grad_norm": 0.35667574405670166, "learning_rate": 1.889629218544376e-05, "loss": 0.4824, "step": 14322 }, { "epoch": 0.3037687429746983, "grad_norm": 0.40928569436073303, "learning_rate": 1.8896139878672145e-05, "loss": 0.559, "step": 14323 }, { "epoch": 0.3037899514326313, "grad_norm": 0.3498519957065582, "learning_rate": 1.889598756200632e-05, "loss": 0.477, "step": 14324 }, { "epoch": 0.30381115989056434, "grad_norm": 0.3062988519668579, "learning_rate": 1.8895835235446454e-05, "loss": 0.4725, "step": 14325 }, { "epoch": 0.30383236834849736, "grad_norm": 0.43555954098701477, "learning_rate": 1.8895682898992716e-05, "loss": 0.5294, "step": 14326 }, { "epoch": 0.3038535768064304, "grad_norm": 0.3789849877357483, "learning_rate": 1.8895530552645282e-05, "loss": 0.5449, "step": 14327 }, { "epoch": 0.3038747852643634, "grad_norm": 0.33980247378349304, "learning_rate": 1.889537819640431e-05, "loss": 0.4115, "step": 14328 }, { "epoch": 0.3038959937222965, "grad_norm": 0.3810543119907379, "learning_rate": 1.889522583026998e-05, "loss": 0.4742, "step": 14329 }, { "epoch": 0.3039172021802295, "grad_norm": 0.3100529909133911, "learning_rate": 1.8895073454242456e-05, "loss": 0.4855, "step": 14330 }, { "epoch": 0.3039384106381625, "grad_norm": 0.5870413780212402, "learning_rate": 1.8894921068321906e-05, "loss": 0.4957, "step": 14331 }, { "epoch": 0.30395961909609553, "grad_norm": 0.3547591269016266, "learning_rate": 1.8894768672508503e-05, "loss": 0.5163, "step": 14332 }, { "epoch": 0.30398082755402855, "grad_norm": 0.32475587725639343, "learning_rate": 1.8894616266802416e-05, "loss": 0.567, "step": 14333 }, { "epoch": 0.3040020360119616, "grad_norm": 0.34269022941589355, "learning_rate": 1.8894463851203814e-05, "loss": 0.4994, "step": 14334 }, { "epoch": 0.3040232444698946, "grad_norm": 0.37078195810317993, "learning_rate": 1.889431142571287e-05, "loss": 0.5573, "step": 14335 }, { "epoch": 0.3040444529278276, "grad_norm": 0.32974573969841003, "learning_rate": 1.8894158990329744e-05, "loss": 0.5133, "step": 14336 }, { "epoch": 0.30406566138576063, "grad_norm": 0.3009625971317291, "learning_rate": 1.8894006545054618e-05, "loss": 0.4758, "step": 14337 }, { "epoch": 0.30408686984369365, "grad_norm": 0.4322878420352936, "learning_rate": 1.889385408988765e-05, "loss": 0.4574, "step": 14338 }, { "epoch": 0.30410807830162667, "grad_norm": 0.3239037096500397, "learning_rate": 1.8893701624829017e-05, "loss": 0.5697, "step": 14339 }, { "epoch": 0.3041292867595597, "grad_norm": 0.364956259727478, "learning_rate": 1.8893549149878887e-05, "loss": 0.5715, "step": 14340 }, { "epoch": 0.3041504952174927, "grad_norm": 0.35032737255096436, "learning_rate": 1.8893396665037426e-05, "loss": 0.5187, "step": 14341 }, { "epoch": 0.3041717036754258, "grad_norm": 0.33981961011886597, "learning_rate": 1.889324417030481e-05, "loss": 0.5183, "step": 14342 }, { "epoch": 0.3041929121333588, "grad_norm": 0.3944372832775116, "learning_rate": 1.8893091665681204e-05, "loss": 0.5707, "step": 14343 }, { "epoch": 0.3042141205912918, "grad_norm": 0.29693612456321716, "learning_rate": 1.889293915116678e-05, "loss": 0.4581, "step": 14344 }, { "epoch": 0.30423532904922485, "grad_norm": 0.33712178468704224, "learning_rate": 1.8892786626761702e-05, "loss": 0.4881, "step": 14345 }, { "epoch": 0.30425653750715786, "grad_norm": 0.33817365765571594, "learning_rate": 1.8892634092466147e-05, "loss": 0.5284, "step": 14346 }, { "epoch": 0.3042777459650909, "grad_norm": 0.33877262473106384, "learning_rate": 1.889248154828028e-05, "loss": 0.5551, "step": 14347 }, { "epoch": 0.3042989544230239, "grad_norm": 0.342203825712204, "learning_rate": 1.8892328994204273e-05, "loss": 0.5011, "step": 14348 }, { "epoch": 0.3043201628809569, "grad_norm": 0.3435191214084625, "learning_rate": 1.88921764302383e-05, "loss": 0.5242, "step": 14349 }, { "epoch": 0.30434137133888994, "grad_norm": 0.35923951864242554, "learning_rate": 1.8892023856382518e-05, "loss": 0.5161, "step": 14350 }, { "epoch": 0.30436257979682296, "grad_norm": 0.3510819971561432, "learning_rate": 1.889187127263711e-05, "loss": 0.529, "step": 14351 }, { "epoch": 0.304383788254756, "grad_norm": 0.346169650554657, "learning_rate": 1.8891718679002234e-05, "loss": 0.5379, "step": 14352 }, { "epoch": 0.304404996712689, "grad_norm": 0.3499356806278229, "learning_rate": 1.889156607547807e-05, "loss": 0.4657, "step": 14353 }, { "epoch": 0.304426205170622, "grad_norm": 0.4242857098579407, "learning_rate": 1.8891413462064783e-05, "loss": 0.4663, "step": 14354 }, { "epoch": 0.30444741362855504, "grad_norm": 0.46290597319602966, "learning_rate": 1.8891260838762542e-05, "loss": 0.4727, "step": 14355 }, { "epoch": 0.3044686220864881, "grad_norm": 0.4184563457965851, "learning_rate": 1.8891108205571516e-05, "loss": 0.5899, "step": 14356 }, { "epoch": 0.30448983054442114, "grad_norm": 0.39826688170433044, "learning_rate": 1.8890955562491883e-05, "loss": 0.4963, "step": 14357 }, { "epoch": 0.30451103900235416, "grad_norm": 0.34799402952194214, "learning_rate": 1.8890802909523803e-05, "loss": 0.4609, "step": 14358 }, { "epoch": 0.3045322474602872, "grad_norm": 0.36398953199386597, "learning_rate": 1.8890650246667448e-05, "loss": 0.5469, "step": 14359 }, { "epoch": 0.3045534559182202, "grad_norm": 0.32460853457450867, "learning_rate": 1.889049757392299e-05, "loss": 0.5436, "step": 14360 }, { "epoch": 0.3045746643761532, "grad_norm": 0.37469759583473206, "learning_rate": 1.8890344891290596e-05, "loss": 0.5316, "step": 14361 }, { "epoch": 0.30459587283408623, "grad_norm": 0.3741942346096039, "learning_rate": 1.889019219877044e-05, "loss": 0.4885, "step": 14362 }, { "epoch": 0.30461708129201925, "grad_norm": 0.35979342460632324, "learning_rate": 1.889003949636269e-05, "loss": 0.5535, "step": 14363 }, { "epoch": 0.3046382897499523, "grad_norm": 0.32793888449668884, "learning_rate": 1.8889886784067515e-05, "loss": 0.5288, "step": 14364 }, { "epoch": 0.3046594982078853, "grad_norm": 0.31123220920562744, "learning_rate": 1.8889734061885084e-05, "loss": 0.5097, "step": 14365 }, { "epoch": 0.3046807066658183, "grad_norm": 0.30550259351730347, "learning_rate": 1.888958132981557e-05, "loss": 0.5, "step": 14366 }, { "epoch": 0.30470191512375133, "grad_norm": 0.3880537748336792, "learning_rate": 1.8889428587859137e-05, "loss": 0.5509, "step": 14367 }, { "epoch": 0.30472312358168435, "grad_norm": 0.3306138813495636, "learning_rate": 1.8889275836015963e-05, "loss": 0.4837, "step": 14368 }, { "epoch": 0.3047443320396174, "grad_norm": 0.3586258888244629, "learning_rate": 1.8889123074286214e-05, "loss": 0.6051, "step": 14369 }, { "epoch": 0.30476554049755045, "grad_norm": 0.36872047185897827, "learning_rate": 1.8888970302670056e-05, "loss": 0.5047, "step": 14370 }, { "epoch": 0.30478674895548347, "grad_norm": 0.3684108257293701, "learning_rate": 1.8888817521167664e-05, "loss": 0.5262, "step": 14371 }, { "epoch": 0.3048079574134165, "grad_norm": 0.34745872020721436, "learning_rate": 1.8888664729779205e-05, "loss": 0.4122, "step": 14372 }, { "epoch": 0.3048291658713495, "grad_norm": 0.3245472013950348, "learning_rate": 1.8888511928504853e-05, "loss": 0.4632, "step": 14373 }, { "epoch": 0.3048503743292825, "grad_norm": 0.40132153034210205, "learning_rate": 1.888835911734477e-05, "loss": 0.4783, "step": 14374 }, { "epoch": 0.30487158278721554, "grad_norm": 0.40030813217163086, "learning_rate": 1.8888206296299138e-05, "loss": 0.5735, "step": 14375 }, { "epoch": 0.30489279124514856, "grad_norm": 0.3912424147129059, "learning_rate": 1.8888053465368115e-05, "loss": 0.5552, "step": 14376 }, { "epoch": 0.3049139997030816, "grad_norm": 0.3111282289028168, "learning_rate": 1.888790062455188e-05, "loss": 0.4761, "step": 14377 }, { "epoch": 0.3049352081610146, "grad_norm": 0.3692459762096405, "learning_rate": 1.88877477738506e-05, "loss": 0.4905, "step": 14378 }, { "epoch": 0.3049564166189476, "grad_norm": 0.3699910640716553, "learning_rate": 1.8887594913264438e-05, "loss": 0.5325, "step": 14379 }, { "epoch": 0.30497762507688064, "grad_norm": 0.4175933301448822, "learning_rate": 1.888744204279357e-05, "loss": 0.5014, "step": 14380 }, { "epoch": 0.30499883353481366, "grad_norm": 0.3133202791213989, "learning_rate": 1.8887289162438173e-05, "loss": 0.4857, "step": 14381 }, { "epoch": 0.3050200419927467, "grad_norm": 0.37783628702163696, "learning_rate": 1.8887136272198408e-05, "loss": 0.4797, "step": 14382 }, { "epoch": 0.30504125045067976, "grad_norm": 0.32580119371414185, "learning_rate": 1.8886983372074446e-05, "loss": 0.5213, "step": 14383 }, { "epoch": 0.3050624589086128, "grad_norm": 0.6853193044662476, "learning_rate": 1.8886830462066456e-05, "loss": 0.6088, "step": 14384 }, { "epoch": 0.3050836673665458, "grad_norm": 0.36050811409950256, "learning_rate": 1.8886677542174612e-05, "loss": 0.5201, "step": 14385 }, { "epoch": 0.3051048758244788, "grad_norm": 0.37000566720962524, "learning_rate": 1.8886524612399087e-05, "loss": 0.4754, "step": 14386 }, { "epoch": 0.30512608428241184, "grad_norm": 0.3290918469429016, "learning_rate": 1.888637167274004e-05, "loss": 0.6084, "step": 14387 }, { "epoch": 0.30514729274034486, "grad_norm": 0.30554714798927307, "learning_rate": 1.888621872319765e-05, "loss": 0.4798, "step": 14388 }, { "epoch": 0.3051685011982779, "grad_norm": 0.3734087646007538, "learning_rate": 1.8886065763772085e-05, "loss": 0.5532, "step": 14389 }, { "epoch": 0.3051897096562109, "grad_norm": 0.34533894062042236, "learning_rate": 1.8885912794463515e-05, "loss": 0.4366, "step": 14390 }, { "epoch": 0.3052109181141439, "grad_norm": 0.34719133377075195, "learning_rate": 1.888575981527211e-05, "loss": 0.566, "step": 14391 }, { "epoch": 0.30523212657207693, "grad_norm": 0.3478187918663025, "learning_rate": 1.888560682619804e-05, "loss": 0.5144, "step": 14392 }, { "epoch": 0.30525333503000995, "grad_norm": 0.3254358768463135, "learning_rate": 1.888545382724148e-05, "loss": 0.5652, "step": 14393 }, { "epoch": 0.305274543487943, "grad_norm": 0.35043033957481384, "learning_rate": 1.8885300818402587e-05, "loss": 0.4628, "step": 14394 }, { "epoch": 0.305295751945876, "grad_norm": 0.2785240113735199, "learning_rate": 1.8885147799681544e-05, "loss": 0.477, "step": 14395 }, { "epoch": 0.305316960403809, "grad_norm": 0.3367585241794586, "learning_rate": 1.8884994771078517e-05, "loss": 0.5418, "step": 14396 }, { "epoch": 0.3053381688617421, "grad_norm": 0.3640981614589691, "learning_rate": 1.8884841732593675e-05, "loss": 0.5479, "step": 14397 }, { "epoch": 0.3053593773196751, "grad_norm": 0.3327164351940155, "learning_rate": 1.8884688684227187e-05, "loss": 0.4728, "step": 14398 }, { "epoch": 0.3053805857776081, "grad_norm": 0.35773301124572754, "learning_rate": 1.888453562597923e-05, "loss": 0.5704, "step": 14399 }, { "epoch": 0.30540179423554115, "grad_norm": 0.3249501585960388, "learning_rate": 1.888438255784997e-05, "loss": 0.534, "step": 14400 }, { "epoch": 0.30542300269347417, "grad_norm": 0.3115755021572113, "learning_rate": 1.888422947983957e-05, "loss": 0.5085, "step": 14401 }, { "epoch": 0.3054442111514072, "grad_norm": 0.34257254004478455, "learning_rate": 1.8884076391948215e-05, "loss": 0.4988, "step": 14402 }, { "epoch": 0.3054654196093402, "grad_norm": 0.40185001492500305, "learning_rate": 1.8883923294176064e-05, "loss": 0.5936, "step": 14403 }, { "epoch": 0.3054866280672732, "grad_norm": 0.32731175422668457, "learning_rate": 1.888377018652329e-05, "loss": 0.5226, "step": 14404 }, { "epoch": 0.30550783652520624, "grad_norm": 0.33663684129714966, "learning_rate": 1.8883617068990064e-05, "loss": 0.492, "step": 14405 }, { "epoch": 0.30552904498313926, "grad_norm": 0.3254883289337158, "learning_rate": 1.8883463941576558e-05, "loss": 0.5262, "step": 14406 }, { "epoch": 0.3055502534410723, "grad_norm": 0.3141157627105713, "learning_rate": 1.888331080428294e-05, "loss": 0.4832, "step": 14407 }, { "epoch": 0.3055714618990053, "grad_norm": 0.3345930278301239, "learning_rate": 1.8883157657109378e-05, "loss": 0.5019, "step": 14408 }, { "epoch": 0.3055926703569383, "grad_norm": 0.4584118723869324, "learning_rate": 1.8883004500056047e-05, "loss": 0.5602, "step": 14409 }, { "epoch": 0.3056138788148714, "grad_norm": 0.3359057307243347, "learning_rate": 1.8882851333123118e-05, "loss": 0.5639, "step": 14410 }, { "epoch": 0.3056350872728044, "grad_norm": 0.37883129715919495, "learning_rate": 1.8882698156310758e-05, "loss": 0.4352, "step": 14411 }, { "epoch": 0.30565629573073744, "grad_norm": 0.414698988199234, "learning_rate": 1.8882544969619138e-05, "loss": 0.5322, "step": 14412 }, { "epoch": 0.30567750418867046, "grad_norm": 0.39808425307273865, "learning_rate": 1.8882391773048424e-05, "loss": 0.6015, "step": 14413 }, { "epoch": 0.3056987126466035, "grad_norm": 0.39152488112449646, "learning_rate": 1.8882238566598798e-05, "loss": 0.4002, "step": 14414 }, { "epoch": 0.3057199211045365, "grad_norm": 0.3684331178665161, "learning_rate": 1.888208535027042e-05, "loss": 0.5038, "step": 14415 }, { "epoch": 0.3057411295624695, "grad_norm": 0.3418152332305908, "learning_rate": 1.8881932124063466e-05, "loss": 0.5021, "step": 14416 }, { "epoch": 0.30576233802040254, "grad_norm": 0.37910494208335876, "learning_rate": 1.88817788879781e-05, "loss": 0.4344, "step": 14417 }, { "epoch": 0.30578354647833556, "grad_norm": 0.35366857051849365, "learning_rate": 1.88816256420145e-05, "loss": 0.5247, "step": 14418 }, { "epoch": 0.3058047549362686, "grad_norm": 0.3362092673778534, "learning_rate": 1.8881472386172833e-05, "loss": 0.4685, "step": 14419 }, { "epoch": 0.3058259633942016, "grad_norm": 0.45350098609924316, "learning_rate": 1.888131912045327e-05, "loss": 0.5512, "step": 14420 }, { "epoch": 0.3058471718521346, "grad_norm": 0.3557283580303192, "learning_rate": 1.8881165844855977e-05, "loss": 0.5344, "step": 14421 }, { "epoch": 0.30586838031006763, "grad_norm": 0.3346503674983978, "learning_rate": 1.8881012559381135e-05, "loss": 0.4604, "step": 14422 }, { "epoch": 0.30588958876800065, "grad_norm": 0.36152157187461853, "learning_rate": 1.8880859264028903e-05, "loss": 0.4834, "step": 14423 }, { "epoch": 0.30591079722593373, "grad_norm": 0.33916354179382324, "learning_rate": 1.8880705958799457e-05, "loss": 0.4935, "step": 14424 }, { "epoch": 0.30593200568386675, "grad_norm": 0.357072114944458, "learning_rate": 1.888055264369297e-05, "loss": 0.5529, "step": 14425 }, { "epoch": 0.30595321414179977, "grad_norm": 0.36915701627731323, "learning_rate": 1.8880399318709604e-05, "loss": 0.5428, "step": 14426 }, { "epoch": 0.3059744225997328, "grad_norm": 0.45044469833374023, "learning_rate": 1.888024598384954e-05, "loss": 0.5099, "step": 14427 }, { "epoch": 0.3059956310576658, "grad_norm": 0.4546239376068115, "learning_rate": 1.8880092639112942e-05, "loss": 0.5593, "step": 14428 }, { "epoch": 0.3060168395155988, "grad_norm": 0.3373487591743469, "learning_rate": 1.887993928449998e-05, "loss": 0.5296, "step": 14429 }, { "epoch": 0.30603804797353185, "grad_norm": 0.35129889845848083, "learning_rate": 1.887978592001083e-05, "loss": 0.5254, "step": 14430 }, { "epoch": 0.30605925643146487, "grad_norm": 0.35396265983581543, "learning_rate": 1.8879632545645657e-05, "loss": 0.5059, "step": 14431 }, { "epoch": 0.3060804648893979, "grad_norm": 0.37430357933044434, "learning_rate": 1.8879479161404636e-05, "loss": 0.534, "step": 14432 }, { "epoch": 0.3061016733473309, "grad_norm": 0.3250732719898224, "learning_rate": 1.8879325767287935e-05, "loss": 0.5349, "step": 14433 }, { "epoch": 0.3061228818052639, "grad_norm": 0.34940096735954285, "learning_rate": 1.8879172363295725e-05, "loss": 0.551, "step": 14434 }, { "epoch": 0.30614409026319694, "grad_norm": 0.3365161716938019, "learning_rate": 1.8879018949428174e-05, "loss": 0.5129, "step": 14435 }, { "epoch": 0.30616529872112996, "grad_norm": 0.3422030806541443, "learning_rate": 1.8878865525685456e-05, "loss": 0.5715, "step": 14436 }, { "epoch": 0.306186507179063, "grad_norm": 0.36315372586250305, "learning_rate": 1.8878712092067744e-05, "loss": 0.5561, "step": 14437 }, { "epoch": 0.30620771563699606, "grad_norm": 0.3586938679218292, "learning_rate": 1.8878558648575202e-05, "loss": 0.5461, "step": 14438 }, { "epoch": 0.3062289240949291, "grad_norm": 0.34357887506484985, "learning_rate": 1.8878405195208008e-05, "loss": 0.4719, "step": 14439 }, { "epoch": 0.3062501325528621, "grad_norm": 0.3313578963279724, "learning_rate": 1.8878251731966324e-05, "loss": 0.5395, "step": 14440 }, { "epoch": 0.3062713410107951, "grad_norm": 0.3083745837211609, "learning_rate": 1.8878098258850327e-05, "loss": 0.4196, "step": 14441 }, { "epoch": 0.30629254946872814, "grad_norm": 0.361602246761322, "learning_rate": 1.887794477586019e-05, "loss": 0.4978, "step": 14442 }, { "epoch": 0.30631375792666116, "grad_norm": 0.3480170667171478, "learning_rate": 1.8877791282996075e-05, "loss": 0.5353, "step": 14443 }, { "epoch": 0.3063349663845942, "grad_norm": 0.3478875160217285, "learning_rate": 1.8877637780258164e-05, "loss": 0.4973, "step": 14444 }, { "epoch": 0.3063561748425272, "grad_norm": 0.3295903503894806, "learning_rate": 1.887748426764662e-05, "loss": 0.5368, "step": 14445 }, { "epoch": 0.3063773833004602, "grad_norm": 0.32981565594673157, "learning_rate": 1.8877330745161608e-05, "loss": 0.4193, "step": 14446 }, { "epoch": 0.30639859175839324, "grad_norm": 0.3831578195095062, "learning_rate": 1.887717721280331e-05, "loss": 0.5642, "step": 14447 }, { "epoch": 0.30641980021632625, "grad_norm": 0.3504890203475952, "learning_rate": 1.8877023670571893e-05, "loss": 0.5596, "step": 14448 }, { "epoch": 0.3064410086742593, "grad_norm": 0.317255437374115, "learning_rate": 1.887687011846753e-05, "loss": 0.467, "step": 14449 }, { "epoch": 0.3064622171321923, "grad_norm": 0.38003283739089966, "learning_rate": 1.8876716556490386e-05, "loss": 0.4872, "step": 14450 }, { "epoch": 0.30648342559012537, "grad_norm": 0.34148886799812317, "learning_rate": 1.8876562984640633e-05, "loss": 0.5022, "step": 14451 }, { "epoch": 0.3065046340480584, "grad_norm": 0.3558676540851593, "learning_rate": 1.8876409402918447e-05, "loss": 0.5785, "step": 14452 }, { "epoch": 0.3065258425059914, "grad_norm": 0.3347914218902588, "learning_rate": 1.8876255811323994e-05, "loss": 0.4625, "step": 14453 }, { "epoch": 0.30654705096392443, "grad_norm": 0.3726557791233063, "learning_rate": 1.887610220985745e-05, "loss": 0.5627, "step": 14454 }, { "epoch": 0.30656825942185745, "grad_norm": 0.33222517371177673, "learning_rate": 1.8875948598518977e-05, "loss": 0.5206, "step": 14455 }, { "epoch": 0.30658946787979047, "grad_norm": 0.33872687816619873, "learning_rate": 1.8875794977308756e-05, "loss": 0.4902, "step": 14456 }, { "epoch": 0.3066106763377235, "grad_norm": 0.2979024648666382, "learning_rate": 1.8875641346226948e-05, "loss": 0.4661, "step": 14457 }, { "epoch": 0.3066318847956565, "grad_norm": 0.34322938323020935, "learning_rate": 1.8875487705273734e-05, "loss": 0.5298, "step": 14458 }, { "epoch": 0.3066530932535895, "grad_norm": 0.3925175070762634, "learning_rate": 1.8875334054449275e-05, "loss": 0.5018, "step": 14459 }, { "epoch": 0.30667430171152255, "grad_norm": 0.3706107437610626, "learning_rate": 1.8875180393753745e-05, "loss": 0.5233, "step": 14460 }, { "epoch": 0.30669551016945557, "grad_norm": 0.3644236922264099, "learning_rate": 1.8875026723187324e-05, "loss": 0.4832, "step": 14461 }, { "epoch": 0.3067167186273886, "grad_norm": 0.4104584753513336, "learning_rate": 1.887487304275017e-05, "loss": 0.509, "step": 14462 }, { "epoch": 0.3067379270853216, "grad_norm": 0.3543602228164673, "learning_rate": 1.8874719352442464e-05, "loss": 0.4552, "step": 14463 }, { "epoch": 0.3067591355432546, "grad_norm": 0.3477959632873535, "learning_rate": 1.8874565652264367e-05, "loss": 0.5444, "step": 14464 }, { "epoch": 0.3067803440011877, "grad_norm": 0.37672311067581177, "learning_rate": 1.8874411942216056e-05, "loss": 0.5391, "step": 14465 }, { "epoch": 0.3068015524591207, "grad_norm": 0.3624055087566376, "learning_rate": 1.8874258222297703e-05, "loss": 0.5024, "step": 14466 }, { "epoch": 0.30682276091705374, "grad_norm": 0.32673999667167664, "learning_rate": 1.8874104492509476e-05, "loss": 0.4287, "step": 14467 }, { "epoch": 0.30684396937498676, "grad_norm": 0.3388892412185669, "learning_rate": 1.887395075285155e-05, "loss": 0.5686, "step": 14468 }, { "epoch": 0.3068651778329198, "grad_norm": 0.38744935393333435, "learning_rate": 1.8873797003324087e-05, "loss": 0.5446, "step": 14469 }, { "epoch": 0.3068863862908528, "grad_norm": 0.39285707473754883, "learning_rate": 1.887364324392727e-05, "loss": 0.5253, "step": 14470 }, { "epoch": 0.3069075947487858, "grad_norm": 0.3538581430912018, "learning_rate": 1.887348947466126e-05, "loss": 0.5168, "step": 14471 }, { "epoch": 0.30692880320671884, "grad_norm": 0.3026984632015228, "learning_rate": 1.8873335695526234e-05, "loss": 0.5819, "step": 14472 }, { "epoch": 0.30695001166465186, "grad_norm": 0.4110077917575836, "learning_rate": 1.887318190652236e-05, "loss": 0.4407, "step": 14473 }, { "epoch": 0.3069712201225849, "grad_norm": 0.37438201904296875, "learning_rate": 1.887302810764981e-05, "loss": 0.5027, "step": 14474 }, { "epoch": 0.3069924285805179, "grad_norm": 0.34674742817878723, "learning_rate": 1.8872874298908758e-05, "loss": 0.5054, "step": 14475 }, { "epoch": 0.3070136370384509, "grad_norm": 0.3005047142505646, "learning_rate": 1.8872720480299368e-05, "loss": 0.5559, "step": 14476 }, { "epoch": 0.30703484549638393, "grad_norm": 0.35598528385162354, "learning_rate": 1.887256665182182e-05, "loss": 0.526, "step": 14477 }, { "epoch": 0.30705605395431695, "grad_norm": 0.39544591307640076, "learning_rate": 1.887241281347628e-05, "loss": 0.447, "step": 14478 }, { "epoch": 0.30707726241225003, "grad_norm": 0.3306322693824768, "learning_rate": 1.8872258965262915e-05, "loss": 0.5813, "step": 14479 }, { "epoch": 0.30709847087018305, "grad_norm": 0.38249871134757996, "learning_rate": 1.8872105107181905e-05, "loss": 0.5595, "step": 14480 }, { "epoch": 0.30711967932811607, "grad_norm": 0.3313012719154358, "learning_rate": 1.8871951239233415e-05, "loss": 0.5218, "step": 14481 }, { "epoch": 0.3071408877860491, "grad_norm": 0.35731345415115356, "learning_rate": 1.8871797361417618e-05, "loss": 0.5137, "step": 14482 }, { "epoch": 0.3071620962439821, "grad_norm": 0.3235286474227905, "learning_rate": 1.8871643473734684e-05, "loss": 0.4898, "step": 14483 }, { "epoch": 0.3071833047019151, "grad_norm": 0.3562163710594177, "learning_rate": 1.8871489576184788e-05, "loss": 0.5058, "step": 14484 }, { "epoch": 0.30720451315984815, "grad_norm": 0.34067410230636597, "learning_rate": 1.8871335668768097e-05, "loss": 0.4841, "step": 14485 }, { "epoch": 0.30722572161778117, "grad_norm": 0.37545058131217957, "learning_rate": 1.8871181751484783e-05, "loss": 0.4587, "step": 14486 }, { "epoch": 0.3072469300757142, "grad_norm": 0.340101957321167, "learning_rate": 1.887102782433502e-05, "loss": 0.4859, "step": 14487 }, { "epoch": 0.3072681385336472, "grad_norm": 0.2956654131412506, "learning_rate": 1.887087388731897e-05, "loss": 0.4342, "step": 14488 }, { "epoch": 0.3072893469915802, "grad_norm": 0.39408883452415466, "learning_rate": 1.887071994043682e-05, "loss": 0.4691, "step": 14489 }, { "epoch": 0.30731055544951325, "grad_norm": 0.32039526104927063, "learning_rate": 1.8870565983688726e-05, "loss": 0.5115, "step": 14490 }, { "epoch": 0.30733176390744626, "grad_norm": 0.3132452666759491, "learning_rate": 1.8870412017074867e-05, "loss": 0.5459, "step": 14491 }, { "epoch": 0.30735297236537934, "grad_norm": 0.36626604199409485, "learning_rate": 1.8870258040595416e-05, "loss": 0.3964, "step": 14492 }, { "epoch": 0.30737418082331236, "grad_norm": 0.35887953639030457, "learning_rate": 1.8870104054250538e-05, "loss": 0.5488, "step": 14493 }, { "epoch": 0.3073953892812454, "grad_norm": 0.3199739158153534, "learning_rate": 1.8869950058040406e-05, "loss": 0.5054, "step": 14494 }, { "epoch": 0.3074165977391784, "grad_norm": 0.35809072852134705, "learning_rate": 1.8869796051965196e-05, "loss": 0.5587, "step": 14495 }, { "epoch": 0.3074378061971114, "grad_norm": 0.6193200945854187, "learning_rate": 1.8869642036025075e-05, "loss": 0.5318, "step": 14496 }, { "epoch": 0.30745901465504444, "grad_norm": 0.33916330337524414, "learning_rate": 1.8869488010220215e-05, "loss": 0.4518, "step": 14497 }, { "epoch": 0.30748022311297746, "grad_norm": 0.31013354659080505, "learning_rate": 1.886933397455079e-05, "loss": 0.4737, "step": 14498 }, { "epoch": 0.3075014315709105, "grad_norm": 0.3413242995738983, "learning_rate": 1.8869179929016963e-05, "loss": 0.4987, "step": 14499 }, { "epoch": 0.3075226400288435, "grad_norm": 0.3161473870277405, "learning_rate": 1.8869025873618913e-05, "loss": 0.5283, "step": 14500 }, { "epoch": 0.3075438484867765, "grad_norm": 0.3368251323699951, "learning_rate": 1.8868871808356812e-05, "loss": 0.4691, "step": 14501 }, { "epoch": 0.30756505694470954, "grad_norm": 0.3423457443714142, "learning_rate": 1.886871773323083e-05, "loss": 0.5119, "step": 14502 }, { "epoch": 0.30758626540264256, "grad_norm": 0.4480579197406769, "learning_rate": 1.886856364824113e-05, "loss": 0.5553, "step": 14503 }, { "epoch": 0.3076074738605756, "grad_norm": 0.30928945541381836, "learning_rate": 1.8868409553387897e-05, "loss": 0.4422, "step": 14504 }, { "epoch": 0.3076286823185086, "grad_norm": 0.35716015100479126, "learning_rate": 1.8868255448671293e-05, "loss": 0.6178, "step": 14505 }, { "epoch": 0.30764989077644167, "grad_norm": 0.604253888130188, "learning_rate": 1.8868101334091495e-05, "loss": 0.525, "step": 14506 }, { "epoch": 0.3076710992343747, "grad_norm": 1.0133415460586548, "learning_rate": 1.8867947209648672e-05, "loss": 0.5752, "step": 14507 }, { "epoch": 0.3076923076923077, "grad_norm": 0.31676778197288513, "learning_rate": 1.886779307534299e-05, "loss": 0.5181, "step": 14508 }, { "epoch": 0.30771351615024073, "grad_norm": 0.35558101534843445, "learning_rate": 1.886763893117463e-05, "loss": 0.5182, "step": 14509 }, { "epoch": 0.30773472460817375, "grad_norm": 0.4184972047805786, "learning_rate": 1.886748477714376e-05, "loss": 0.5339, "step": 14510 }, { "epoch": 0.30775593306610677, "grad_norm": 0.6708607077598572, "learning_rate": 1.8867330613250546e-05, "loss": 0.5753, "step": 14511 }, { "epoch": 0.3077771415240398, "grad_norm": 0.34967806935310364, "learning_rate": 1.8867176439495166e-05, "loss": 0.6267, "step": 14512 }, { "epoch": 0.3077983499819728, "grad_norm": 0.3494112491607666, "learning_rate": 1.8867022255877792e-05, "loss": 0.5256, "step": 14513 }, { "epoch": 0.3078195584399058, "grad_norm": 0.35462746024131775, "learning_rate": 1.8866868062398593e-05, "loss": 0.4692, "step": 14514 }, { "epoch": 0.30784076689783885, "grad_norm": 0.3228527307510376, "learning_rate": 1.8866713859057734e-05, "loss": 0.462, "step": 14515 }, { "epoch": 0.30786197535577187, "grad_norm": 0.4636370539665222, "learning_rate": 1.8866559645855397e-05, "loss": 0.5531, "step": 14516 }, { "epoch": 0.3078831838137049, "grad_norm": 0.35452690720558167, "learning_rate": 1.886640542279175e-05, "loss": 0.5641, "step": 14517 }, { "epoch": 0.3079043922716379, "grad_norm": 0.8572847247123718, "learning_rate": 1.8866251189866964e-05, "loss": 0.5503, "step": 14518 }, { "epoch": 0.3079256007295709, "grad_norm": 0.3243367075920105, "learning_rate": 1.886609694708121e-05, "loss": 0.453, "step": 14519 }, { "epoch": 0.307946809187504, "grad_norm": 0.3573552966117859, "learning_rate": 1.886594269443466e-05, "loss": 0.5518, "step": 14520 }, { "epoch": 0.307968017645437, "grad_norm": 0.3415551483631134, "learning_rate": 1.8865788431927482e-05, "loss": 0.4833, "step": 14521 }, { "epoch": 0.30798922610337004, "grad_norm": 0.45771920680999756, "learning_rate": 1.8865634159559856e-05, "loss": 0.5031, "step": 14522 }, { "epoch": 0.30801043456130306, "grad_norm": 0.4050944447517395, "learning_rate": 1.8865479877331946e-05, "loss": 0.4881, "step": 14523 }, { "epoch": 0.3080316430192361, "grad_norm": 0.3396764099597931, "learning_rate": 1.886532558524393e-05, "loss": 0.5492, "step": 14524 }, { "epoch": 0.3080528514771691, "grad_norm": 0.3222036361694336, "learning_rate": 1.886517128329597e-05, "loss": 0.5737, "step": 14525 }, { "epoch": 0.3080740599351021, "grad_norm": 0.34504857659339905, "learning_rate": 1.8865016971488247e-05, "loss": 0.4959, "step": 14526 }, { "epoch": 0.30809526839303514, "grad_norm": 0.3456345200538635, "learning_rate": 1.8864862649820928e-05, "loss": 0.4996, "step": 14527 }, { "epoch": 0.30811647685096816, "grad_norm": 0.39088863134384155, "learning_rate": 1.886470831829419e-05, "loss": 0.4942, "step": 14528 }, { "epoch": 0.3081376853089012, "grad_norm": 0.33690983057022095, "learning_rate": 1.8864553976908194e-05, "loss": 0.5153, "step": 14529 }, { "epoch": 0.3081588937668342, "grad_norm": 0.332075834274292, "learning_rate": 1.8864399625663122e-05, "loss": 0.6033, "step": 14530 }, { "epoch": 0.3081801022247672, "grad_norm": 0.333763986825943, "learning_rate": 1.886424526455914e-05, "loss": 0.5493, "step": 14531 }, { "epoch": 0.30820131068270024, "grad_norm": 0.3199920356273651, "learning_rate": 1.8864090893596426e-05, "loss": 0.473, "step": 14532 }, { "epoch": 0.3082225191406333, "grad_norm": 0.3300851583480835, "learning_rate": 1.886393651277514e-05, "loss": 0.5182, "step": 14533 }, { "epoch": 0.30824372759856633, "grad_norm": 0.3414529860019684, "learning_rate": 1.8863782122095467e-05, "loss": 0.478, "step": 14534 }, { "epoch": 0.30826493605649935, "grad_norm": 0.43747156858444214, "learning_rate": 1.886362772155757e-05, "loss": 0.5006, "step": 14535 }, { "epoch": 0.30828614451443237, "grad_norm": 0.3325398564338684, "learning_rate": 1.8863473311161618e-05, "loss": 0.5298, "step": 14536 }, { "epoch": 0.3083073529723654, "grad_norm": 0.33527976274490356, "learning_rate": 1.8863318890907796e-05, "loss": 0.5706, "step": 14537 }, { "epoch": 0.3083285614302984, "grad_norm": 0.35920006036758423, "learning_rate": 1.8863164460796265e-05, "loss": 0.4874, "step": 14538 }, { "epoch": 0.30834976988823143, "grad_norm": 0.38296541571617126, "learning_rate": 1.8863010020827198e-05, "loss": 0.5834, "step": 14539 }, { "epoch": 0.30837097834616445, "grad_norm": 0.3943960964679718, "learning_rate": 1.886285557100077e-05, "loss": 0.5084, "step": 14540 }, { "epoch": 0.30839218680409747, "grad_norm": 0.3483371138572693, "learning_rate": 1.886270111131715e-05, "loss": 0.4291, "step": 14541 }, { "epoch": 0.3084133952620305, "grad_norm": 0.34658658504486084, "learning_rate": 1.8862546641776512e-05, "loss": 0.4817, "step": 14542 }, { "epoch": 0.3084346037199635, "grad_norm": 0.2964179515838623, "learning_rate": 1.8862392162379027e-05, "loss": 0.507, "step": 14543 }, { "epoch": 0.3084558121778965, "grad_norm": 0.3003751337528229, "learning_rate": 1.8862237673124864e-05, "loss": 0.4569, "step": 14544 }, { "epoch": 0.30847702063582955, "grad_norm": 0.33543309569358826, "learning_rate": 1.88620831740142e-05, "loss": 0.4931, "step": 14545 }, { "epoch": 0.30849822909376257, "grad_norm": 0.3271726369857788, "learning_rate": 1.88619286650472e-05, "loss": 0.5072, "step": 14546 }, { "epoch": 0.30851943755169564, "grad_norm": 0.3656357526779175, "learning_rate": 1.8861774146224045e-05, "loss": 0.5624, "step": 14547 }, { "epoch": 0.30854064600962866, "grad_norm": 0.30800938606262207, "learning_rate": 1.8861619617544896e-05, "loss": 0.4912, "step": 14548 }, { "epoch": 0.3085618544675617, "grad_norm": 0.35482415556907654, "learning_rate": 1.8861465079009936e-05, "loss": 0.4993, "step": 14549 }, { "epoch": 0.3085830629254947, "grad_norm": 0.35994210839271545, "learning_rate": 1.886131053061933e-05, "loss": 0.5612, "step": 14550 }, { "epoch": 0.3086042713834277, "grad_norm": 0.503848135471344, "learning_rate": 1.886115597237325e-05, "loss": 0.5414, "step": 14551 }, { "epoch": 0.30862547984136074, "grad_norm": 0.30438295006752014, "learning_rate": 1.886100140427187e-05, "loss": 0.506, "step": 14552 }, { "epoch": 0.30864668829929376, "grad_norm": 0.30192580819129944, "learning_rate": 1.886084682631536e-05, "loss": 0.4402, "step": 14553 }, { "epoch": 0.3086678967572268, "grad_norm": 0.3701073229312897, "learning_rate": 1.8860692238503894e-05, "loss": 0.5221, "step": 14554 }, { "epoch": 0.3086891052151598, "grad_norm": 0.33310580253601074, "learning_rate": 1.8860537640837645e-05, "loss": 0.4457, "step": 14555 }, { "epoch": 0.3087103136730928, "grad_norm": 0.3384035527706146, "learning_rate": 1.8860383033316784e-05, "loss": 0.4878, "step": 14556 }, { "epoch": 0.30873152213102584, "grad_norm": 0.3635580241680145, "learning_rate": 1.8860228415941478e-05, "loss": 0.5635, "step": 14557 }, { "epoch": 0.30875273058895886, "grad_norm": 0.35472217202186584, "learning_rate": 1.8860073788711902e-05, "loss": 0.5147, "step": 14558 }, { "epoch": 0.3087739390468919, "grad_norm": 0.35968759655952454, "learning_rate": 1.8859919151628233e-05, "loss": 0.5036, "step": 14559 }, { "epoch": 0.30879514750482495, "grad_norm": 0.31931766867637634, "learning_rate": 1.885976450469064e-05, "loss": 0.5173, "step": 14560 }, { "epoch": 0.30881635596275797, "grad_norm": 0.3184349834918976, "learning_rate": 1.885960984789929e-05, "loss": 0.5189, "step": 14561 }, { "epoch": 0.308837564420691, "grad_norm": 0.38141730427742004, "learning_rate": 1.885945518125436e-05, "loss": 0.6506, "step": 14562 }, { "epoch": 0.308858772878624, "grad_norm": 0.4188999831676483, "learning_rate": 1.885930050475602e-05, "loss": 0.5037, "step": 14563 }, { "epoch": 0.30887998133655703, "grad_norm": 0.37069082260131836, "learning_rate": 1.8859145818404447e-05, "loss": 0.5199, "step": 14564 }, { "epoch": 0.30890118979449005, "grad_norm": 0.3004796504974365, "learning_rate": 1.8858991122199802e-05, "loss": 0.4791, "step": 14565 }, { "epoch": 0.30892239825242307, "grad_norm": 0.531197726726532, "learning_rate": 1.885883641614227e-05, "loss": 0.4018, "step": 14566 }, { "epoch": 0.3089436067103561, "grad_norm": 0.2914358079433441, "learning_rate": 1.8858681700232015e-05, "loss": 0.4633, "step": 14567 }, { "epoch": 0.3089648151682891, "grad_norm": 0.37659528851509094, "learning_rate": 1.885852697446921e-05, "loss": 0.5163, "step": 14568 }, { "epoch": 0.30898602362622213, "grad_norm": 0.3487325608730316, "learning_rate": 1.8858372238854027e-05, "loss": 0.5581, "step": 14569 }, { "epoch": 0.30900723208415515, "grad_norm": 0.3977675437927246, "learning_rate": 1.8858217493386644e-05, "loss": 0.5857, "step": 14570 }, { "epoch": 0.30902844054208817, "grad_norm": 0.42167773842811584, "learning_rate": 1.8858062738067228e-05, "loss": 0.6126, "step": 14571 }, { "epoch": 0.3090496490000212, "grad_norm": 1.0397439002990723, "learning_rate": 1.8857907972895948e-05, "loss": 0.5502, "step": 14572 }, { "epoch": 0.3090708574579542, "grad_norm": 0.3636747896671295, "learning_rate": 1.885775319787298e-05, "loss": 0.532, "step": 14573 }, { "epoch": 0.3090920659158873, "grad_norm": 0.4501069486141205, "learning_rate": 1.8857598412998498e-05, "loss": 0.5552, "step": 14574 }, { "epoch": 0.3091132743738203, "grad_norm": 0.3841038942337036, "learning_rate": 1.8857443618272673e-05, "loss": 0.5139, "step": 14575 }, { "epoch": 0.3091344828317533, "grad_norm": 0.3897817134857178, "learning_rate": 1.8857288813695672e-05, "loss": 0.5727, "step": 14576 }, { "epoch": 0.30915569128968634, "grad_norm": 0.3778780698776245, "learning_rate": 1.8857133999267675e-05, "loss": 0.5091, "step": 14577 }, { "epoch": 0.30917689974761936, "grad_norm": 0.3340212106704712, "learning_rate": 1.885697917498885e-05, "loss": 0.4258, "step": 14578 }, { "epoch": 0.3091981082055524, "grad_norm": 0.31621354818344116, "learning_rate": 1.8856824340859368e-05, "loss": 0.4729, "step": 14579 }, { "epoch": 0.3092193166634854, "grad_norm": 0.3336848020553589, "learning_rate": 1.8856669496879404e-05, "loss": 0.5088, "step": 14580 }, { "epoch": 0.3092405251214184, "grad_norm": 0.3808107376098633, "learning_rate": 1.885651464304913e-05, "loss": 0.5342, "step": 14581 }, { "epoch": 0.30926173357935144, "grad_norm": 0.32146742939949036, "learning_rate": 1.8856359779368715e-05, "loss": 0.4982, "step": 14582 }, { "epoch": 0.30928294203728446, "grad_norm": 0.3147730827331543, "learning_rate": 1.8856204905838337e-05, "loss": 0.5369, "step": 14583 }, { "epoch": 0.3093041504952175, "grad_norm": 0.3022198975086212, "learning_rate": 1.885605002245816e-05, "loss": 0.47, "step": 14584 }, { "epoch": 0.3093253589531505, "grad_norm": 0.35511937737464905, "learning_rate": 1.8855895129228364e-05, "loss": 0.5111, "step": 14585 }, { "epoch": 0.3093465674110835, "grad_norm": 0.3396582305431366, "learning_rate": 1.885574022614912e-05, "loss": 0.5088, "step": 14586 }, { "epoch": 0.30936777586901654, "grad_norm": 0.35912585258483887, "learning_rate": 1.88555853132206e-05, "loss": 0.5139, "step": 14587 }, { "epoch": 0.3093889843269496, "grad_norm": 0.3376428484916687, "learning_rate": 1.885543039044297e-05, "loss": 0.4929, "step": 14588 }, { "epoch": 0.30941019278488263, "grad_norm": 0.35488709807395935, "learning_rate": 1.885527545781641e-05, "loss": 0.5588, "step": 14589 }, { "epoch": 0.30943140124281565, "grad_norm": 0.3087560832500458, "learning_rate": 1.8855120515341086e-05, "loss": 0.4716, "step": 14590 }, { "epoch": 0.30945260970074867, "grad_norm": 0.3637963831424713, "learning_rate": 1.8854965563017175e-05, "loss": 0.4945, "step": 14591 }, { "epoch": 0.3094738181586817, "grad_norm": 0.33148080110549927, "learning_rate": 1.8854810600844852e-05, "loss": 0.5465, "step": 14592 }, { "epoch": 0.3094950266166147, "grad_norm": 0.3470924496650696, "learning_rate": 1.8854655628824285e-05, "loss": 0.5085, "step": 14593 }, { "epoch": 0.30951623507454773, "grad_norm": 0.3615637421607971, "learning_rate": 1.885450064695564e-05, "loss": 0.5216, "step": 14594 }, { "epoch": 0.30953744353248075, "grad_norm": 0.31551089882850647, "learning_rate": 1.8854345655239107e-05, "loss": 0.5357, "step": 14595 }, { "epoch": 0.30955865199041377, "grad_norm": 0.355682373046875, "learning_rate": 1.8854190653674844e-05, "loss": 0.5603, "step": 14596 }, { "epoch": 0.3095798604483468, "grad_norm": 0.46719300746917725, "learning_rate": 1.8854035642263022e-05, "loss": 0.4505, "step": 14597 }, { "epoch": 0.3096010689062798, "grad_norm": 0.3337452709674835, "learning_rate": 1.8853880621003823e-05, "loss": 0.4746, "step": 14598 }, { "epoch": 0.30962227736421283, "grad_norm": 0.36688271164894104, "learning_rate": 1.885372558989741e-05, "loss": 0.597, "step": 14599 }, { "epoch": 0.30964348582214585, "grad_norm": 0.3352097272872925, "learning_rate": 1.8853570548943967e-05, "loss": 0.6109, "step": 14600 }, { "epoch": 0.3096646942800789, "grad_norm": 0.3360934257507324, "learning_rate": 1.8853415498143657e-05, "loss": 0.387, "step": 14601 }, { "epoch": 0.30968590273801194, "grad_norm": 0.3352397382259369, "learning_rate": 1.8853260437496656e-05, "loss": 0.4896, "step": 14602 }, { "epoch": 0.30970711119594496, "grad_norm": 0.34387195110321045, "learning_rate": 1.885310536700313e-05, "loss": 0.4701, "step": 14603 }, { "epoch": 0.309728319653878, "grad_norm": 0.3522907495498657, "learning_rate": 1.8852950286663265e-05, "loss": 0.4707, "step": 14604 }, { "epoch": 0.309749528111811, "grad_norm": 0.3754212558269501, "learning_rate": 1.8852795196477224e-05, "loss": 0.5661, "step": 14605 }, { "epoch": 0.309770736569744, "grad_norm": 0.3258441984653473, "learning_rate": 1.885264009644518e-05, "loss": 0.4725, "step": 14606 }, { "epoch": 0.30979194502767704, "grad_norm": 0.898348867893219, "learning_rate": 1.8852484986567306e-05, "loss": 0.5226, "step": 14607 }, { "epoch": 0.30981315348561006, "grad_norm": 0.34638577699661255, "learning_rate": 1.8852329866843772e-05, "loss": 0.5773, "step": 14608 }, { "epoch": 0.3098343619435431, "grad_norm": 0.3599030673503876, "learning_rate": 1.8852174737274758e-05, "loss": 0.5745, "step": 14609 }, { "epoch": 0.3098555704014761, "grad_norm": 0.3197329640388489, "learning_rate": 1.8852019597860432e-05, "loss": 0.5699, "step": 14610 }, { "epoch": 0.3098767788594091, "grad_norm": 0.35484498739242554, "learning_rate": 1.8851864448600964e-05, "loss": 0.496, "step": 14611 }, { "epoch": 0.30989798731734214, "grad_norm": 0.32682138681411743, "learning_rate": 1.885170928949653e-05, "loss": 0.5121, "step": 14612 }, { "epoch": 0.30991919577527516, "grad_norm": 0.3646218478679657, "learning_rate": 1.8851554120547305e-05, "loss": 0.5718, "step": 14613 }, { "epoch": 0.3099404042332082, "grad_norm": 0.36950406432151794, "learning_rate": 1.8851398941753455e-05, "loss": 0.5379, "step": 14614 }, { "epoch": 0.30996161269114125, "grad_norm": 0.33257752656936646, "learning_rate": 1.8851243753115155e-05, "loss": 0.4688, "step": 14615 }, { "epoch": 0.3099828211490743, "grad_norm": 0.35576099157333374, "learning_rate": 1.8851088554632583e-05, "loss": 0.5435, "step": 14616 }, { "epoch": 0.3100040296070073, "grad_norm": 1.8307815790176392, "learning_rate": 1.8850933346305903e-05, "loss": 0.4835, "step": 14617 }, { "epoch": 0.3100252380649403, "grad_norm": 0.34662047028541565, "learning_rate": 1.8850778128135294e-05, "loss": 0.4924, "step": 14618 }, { "epoch": 0.31004644652287333, "grad_norm": 0.30585548281669617, "learning_rate": 1.8850622900120926e-05, "loss": 0.5804, "step": 14619 }, { "epoch": 0.31006765498080635, "grad_norm": 0.34099265933036804, "learning_rate": 1.885046766226297e-05, "loss": 0.4961, "step": 14620 }, { "epoch": 0.31008886343873937, "grad_norm": 0.3520852029323578, "learning_rate": 1.885031241456161e-05, "loss": 0.5161, "step": 14621 }, { "epoch": 0.3101100718966724, "grad_norm": 0.33949342370033264, "learning_rate": 1.8850157157016998e-05, "loss": 0.5123, "step": 14622 }, { "epoch": 0.3101312803546054, "grad_norm": 0.33241400122642517, "learning_rate": 1.8850001889629323e-05, "loss": 0.4754, "step": 14623 }, { "epoch": 0.31015248881253843, "grad_norm": 0.33306270837783813, "learning_rate": 1.8849846612398752e-05, "loss": 0.5063, "step": 14624 }, { "epoch": 0.31017369727047145, "grad_norm": 0.33270785212516785, "learning_rate": 1.884969132532546e-05, "loss": 0.4816, "step": 14625 }, { "epoch": 0.31019490572840447, "grad_norm": 0.39685872197151184, "learning_rate": 1.8849536028409615e-05, "loss": 0.6101, "step": 14626 }, { "epoch": 0.3102161141863375, "grad_norm": 0.299716591835022, "learning_rate": 1.8849380721651398e-05, "loss": 0.4326, "step": 14627 }, { "epoch": 0.3102373226442705, "grad_norm": 0.4122583568096161, "learning_rate": 1.8849225405050973e-05, "loss": 0.5246, "step": 14628 }, { "epoch": 0.3102585311022036, "grad_norm": 0.37462151050567627, "learning_rate": 1.8849070078608517e-05, "loss": 0.5552, "step": 14629 }, { "epoch": 0.3102797395601366, "grad_norm": 0.3268653154373169, "learning_rate": 1.88489147423242e-05, "loss": 0.5146, "step": 14630 }, { "epoch": 0.3103009480180696, "grad_norm": 0.33741769194602966, "learning_rate": 1.88487593961982e-05, "loss": 0.4757, "step": 14631 }, { "epoch": 0.31032215647600264, "grad_norm": 0.6919705271720886, "learning_rate": 1.8848604040230686e-05, "loss": 0.4666, "step": 14632 }, { "epoch": 0.31034336493393566, "grad_norm": 0.3644726276397705, "learning_rate": 1.8848448674421827e-05, "loss": 0.4822, "step": 14633 }, { "epoch": 0.3103645733918687, "grad_norm": 0.31307029724121094, "learning_rate": 1.884829329877181e-05, "loss": 0.4713, "step": 14634 }, { "epoch": 0.3103857818498017, "grad_norm": 0.3725956380367279, "learning_rate": 1.884813791328079e-05, "loss": 0.5364, "step": 14635 }, { "epoch": 0.3104069903077347, "grad_norm": 0.3165268003940582, "learning_rate": 1.8847982517948952e-05, "loss": 0.5081, "step": 14636 }, { "epoch": 0.31042819876566774, "grad_norm": 0.37288087606430054, "learning_rate": 1.8847827112776464e-05, "loss": 0.3572, "step": 14637 }, { "epoch": 0.31044940722360076, "grad_norm": 0.3682188093662262, "learning_rate": 1.8847671697763495e-05, "loss": 0.532, "step": 14638 }, { "epoch": 0.3104706156815338, "grad_norm": 0.3431658446788788, "learning_rate": 1.8847516272910223e-05, "loss": 0.5446, "step": 14639 }, { "epoch": 0.3104918241394668, "grad_norm": 0.4062819182872772, "learning_rate": 1.8847360838216828e-05, "loss": 0.5581, "step": 14640 }, { "epoch": 0.3105130325973998, "grad_norm": 0.3234120309352875, "learning_rate": 1.8847205393683467e-05, "loss": 0.4787, "step": 14641 }, { "epoch": 0.3105342410553329, "grad_norm": 0.3193095624446869, "learning_rate": 1.8847049939310328e-05, "loss": 0.5645, "step": 14642 }, { "epoch": 0.3105554495132659, "grad_norm": 0.2807515263557434, "learning_rate": 1.8846894475097573e-05, "loss": 0.3642, "step": 14643 }, { "epoch": 0.31057665797119893, "grad_norm": 0.339108407497406, "learning_rate": 1.8846739001045378e-05, "loss": 0.5271, "step": 14644 }, { "epoch": 0.31059786642913195, "grad_norm": 0.3617912828922272, "learning_rate": 1.8846583517153916e-05, "loss": 0.4877, "step": 14645 }, { "epoch": 0.31061907488706497, "grad_norm": 0.35194212198257446, "learning_rate": 1.884642802342336e-05, "loss": 0.4967, "step": 14646 }, { "epoch": 0.310640283344998, "grad_norm": 0.5985755324363708, "learning_rate": 1.884627251985389e-05, "loss": 0.4753, "step": 14647 }, { "epoch": 0.310661491802931, "grad_norm": 0.40872302651405334, "learning_rate": 1.884611700644567e-05, "loss": 0.5357, "step": 14648 }, { "epoch": 0.31068270026086403, "grad_norm": 0.34024113416671753, "learning_rate": 1.884596148319887e-05, "loss": 0.5718, "step": 14649 }, { "epoch": 0.31070390871879705, "grad_norm": 0.3380354344844818, "learning_rate": 1.8845805950113674e-05, "loss": 0.482, "step": 14650 }, { "epoch": 0.31072511717673007, "grad_norm": 0.3239508867263794, "learning_rate": 1.8845650407190247e-05, "loss": 0.4783, "step": 14651 }, { "epoch": 0.3107463256346631, "grad_norm": 0.6211571097373962, "learning_rate": 1.8845494854428766e-05, "loss": 0.5253, "step": 14652 }, { "epoch": 0.3107675340925961, "grad_norm": 0.34697121381759644, "learning_rate": 1.8845339291829404e-05, "loss": 0.5416, "step": 14653 }, { "epoch": 0.31078874255052913, "grad_norm": 0.332123726606369, "learning_rate": 1.8845183719392327e-05, "loss": 0.5085, "step": 14654 }, { "epoch": 0.31080995100846215, "grad_norm": 0.5229790210723877, "learning_rate": 1.8845028137117717e-05, "loss": 0.5933, "step": 14655 }, { "epoch": 0.3108311594663952, "grad_norm": 0.3212500214576721, "learning_rate": 1.8844872545005745e-05, "loss": 0.4879, "step": 14656 }, { "epoch": 0.31085236792432824, "grad_norm": 0.3805592358112335, "learning_rate": 1.884471694305658e-05, "loss": 0.5487, "step": 14657 }, { "epoch": 0.31087357638226126, "grad_norm": 0.3313358724117279, "learning_rate": 1.88445613312704e-05, "loss": 0.486, "step": 14658 }, { "epoch": 0.3108947848401943, "grad_norm": 0.461789608001709, "learning_rate": 1.8844405709647377e-05, "loss": 0.5551, "step": 14659 }, { "epoch": 0.3109159932981273, "grad_norm": 0.3665784001350403, "learning_rate": 1.884425007818768e-05, "loss": 0.5914, "step": 14660 }, { "epoch": 0.3109372017560603, "grad_norm": 0.331924170255661, "learning_rate": 1.8844094436891485e-05, "loss": 0.4936, "step": 14661 }, { "epoch": 0.31095841021399334, "grad_norm": 0.32223790884017944, "learning_rate": 1.8843938785758965e-05, "loss": 0.4947, "step": 14662 }, { "epoch": 0.31097961867192636, "grad_norm": 0.3642667829990387, "learning_rate": 1.8843783124790292e-05, "loss": 0.5306, "step": 14663 }, { "epoch": 0.3110008271298594, "grad_norm": 0.6586233973503113, "learning_rate": 1.8843627453985642e-05, "loss": 0.4914, "step": 14664 }, { "epoch": 0.3110220355877924, "grad_norm": 0.3280273675918579, "learning_rate": 1.8843471773345185e-05, "loss": 0.5755, "step": 14665 }, { "epoch": 0.3110432440457254, "grad_norm": 0.3597758412361145, "learning_rate": 1.8843316082869096e-05, "loss": 0.4838, "step": 14666 }, { "epoch": 0.31106445250365844, "grad_norm": 0.32460305094718933, "learning_rate": 1.884316038255755e-05, "loss": 0.4467, "step": 14667 }, { "epoch": 0.31108566096159146, "grad_norm": 0.4339667558670044, "learning_rate": 1.8843004672410715e-05, "loss": 0.5192, "step": 14668 }, { "epoch": 0.3111068694195245, "grad_norm": 0.3715800940990448, "learning_rate": 1.884284895242877e-05, "loss": 0.4484, "step": 14669 }, { "epoch": 0.31112807787745755, "grad_norm": 0.4098343849182129, "learning_rate": 1.884269322261188e-05, "loss": 0.6006, "step": 14670 }, { "epoch": 0.3111492863353906, "grad_norm": 0.3841528594493866, "learning_rate": 1.8842537482960225e-05, "loss": 0.5675, "step": 14671 }, { "epoch": 0.3111704947933236, "grad_norm": 0.3109731674194336, "learning_rate": 1.884238173347398e-05, "loss": 0.4408, "step": 14672 }, { "epoch": 0.3111917032512566, "grad_norm": 0.383707195520401, "learning_rate": 1.8842225974153313e-05, "loss": 0.5637, "step": 14673 }, { "epoch": 0.31121291170918963, "grad_norm": 0.4567091464996338, "learning_rate": 1.8842070204998397e-05, "loss": 0.5906, "step": 14674 }, { "epoch": 0.31123412016712265, "grad_norm": 0.3087098300457001, "learning_rate": 1.8841914426009412e-05, "loss": 0.5046, "step": 14675 }, { "epoch": 0.31125532862505567, "grad_norm": 0.4010385274887085, "learning_rate": 1.8841758637186523e-05, "loss": 0.4928, "step": 14676 }, { "epoch": 0.3112765370829887, "grad_norm": 0.3318479657173157, "learning_rate": 1.8841602838529907e-05, "loss": 0.4274, "step": 14677 }, { "epoch": 0.3112977455409217, "grad_norm": 0.33343368768692017, "learning_rate": 1.884144703003974e-05, "loss": 0.4767, "step": 14678 }, { "epoch": 0.31131895399885473, "grad_norm": 0.3670431077480316, "learning_rate": 1.8841291211716187e-05, "loss": 0.4471, "step": 14679 }, { "epoch": 0.31134016245678775, "grad_norm": 0.35516294836997986, "learning_rate": 1.8841135383559433e-05, "loss": 0.4648, "step": 14680 }, { "epoch": 0.31136137091472077, "grad_norm": 0.3085872530937195, "learning_rate": 1.8840979545569643e-05, "loss": 0.5192, "step": 14681 }, { "epoch": 0.3113825793726538, "grad_norm": 0.34799253940582275, "learning_rate": 1.884082369774699e-05, "loss": 0.4761, "step": 14682 }, { "epoch": 0.31140378783058686, "grad_norm": 0.3446964621543884, "learning_rate": 1.884066784009165e-05, "loss": 0.5993, "step": 14683 }, { "epoch": 0.3114249962885199, "grad_norm": 0.37682193517684937, "learning_rate": 1.88405119726038e-05, "loss": 0.5749, "step": 14684 }, { "epoch": 0.3114462047464529, "grad_norm": 0.3568458557128906, "learning_rate": 1.8840356095283607e-05, "loss": 0.609, "step": 14685 }, { "epoch": 0.3114674132043859, "grad_norm": 0.7161781787872314, "learning_rate": 1.884020020813124e-05, "loss": 0.6312, "step": 14686 }, { "epoch": 0.31148862166231894, "grad_norm": 0.38089531660079956, "learning_rate": 1.884004431114689e-05, "loss": 0.4539, "step": 14687 }, { "epoch": 0.31150983012025196, "grad_norm": 0.30123868584632874, "learning_rate": 1.8839888404330716e-05, "loss": 0.4683, "step": 14688 }, { "epoch": 0.311531038578185, "grad_norm": 0.31997552514076233, "learning_rate": 1.8839732487682895e-05, "loss": 0.567, "step": 14689 }, { "epoch": 0.311552247036118, "grad_norm": 0.4031039774417877, "learning_rate": 1.8839576561203596e-05, "loss": 0.5927, "step": 14690 }, { "epoch": 0.311573455494051, "grad_norm": 0.4267408847808838, "learning_rate": 1.8839420624893e-05, "loss": 0.5443, "step": 14691 }, { "epoch": 0.31159466395198404, "grad_norm": 0.4012182056903839, "learning_rate": 1.8839264678751278e-05, "loss": 0.5365, "step": 14692 }, { "epoch": 0.31161587240991706, "grad_norm": 0.3671874403953552, "learning_rate": 1.88391087227786e-05, "loss": 0.523, "step": 14693 }, { "epoch": 0.3116370808678501, "grad_norm": 0.3255866765975952, "learning_rate": 1.8838952756975146e-05, "loss": 0.4568, "step": 14694 }, { "epoch": 0.3116582893257831, "grad_norm": 0.36019396781921387, "learning_rate": 1.8838796781341086e-05, "loss": 0.5661, "step": 14695 }, { "epoch": 0.3116794977837161, "grad_norm": 0.3570806682109833, "learning_rate": 1.8838640795876588e-05, "loss": 0.5519, "step": 14696 }, { "epoch": 0.3117007062416492, "grad_norm": 0.3652600646018982, "learning_rate": 1.8838484800581832e-05, "loss": 0.5836, "step": 14697 }, { "epoch": 0.3117219146995822, "grad_norm": 0.35332441329956055, "learning_rate": 1.883832879545699e-05, "loss": 0.5927, "step": 14698 }, { "epoch": 0.31174312315751523, "grad_norm": 0.3069678246974945, "learning_rate": 1.8838172780502238e-05, "loss": 0.5489, "step": 14699 }, { "epoch": 0.31176433161544825, "grad_norm": 0.6869966387748718, "learning_rate": 1.8838016755717742e-05, "loss": 0.5623, "step": 14700 }, { "epoch": 0.3117855400733813, "grad_norm": 0.32362014055252075, "learning_rate": 1.8837860721103687e-05, "loss": 0.4661, "step": 14701 }, { "epoch": 0.3118067485313143, "grad_norm": 0.32870057225227356, "learning_rate": 1.8837704676660237e-05, "loss": 0.5156, "step": 14702 }, { "epoch": 0.3118279569892473, "grad_norm": 0.32131779193878174, "learning_rate": 1.8837548622387567e-05, "loss": 0.514, "step": 14703 }, { "epoch": 0.31184916544718033, "grad_norm": 0.4220614731311798, "learning_rate": 1.8837392558285855e-05, "loss": 0.5835, "step": 14704 }, { "epoch": 0.31187037390511335, "grad_norm": 0.33262473344802856, "learning_rate": 1.883723648435527e-05, "loss": 0.5456, "step": 14705 }, { "epoch": 0.31189158236304637, "grad_norm": 0.41170400381088257, "learning_rate": 1.8837080400595987e-05, "loss": 0.5349, "step": 14706 }, { "epoch": 0.3119127908209794, "grad_norm": 0.3645467758178711, "learning_rate": 1.883692430700818e-05, "loss": 0.6095, "step": 14707 }, { "epoch": 0.3119339992789124, "grad_norm": 0.41192784905433655, "learning_rate": 1.8836768203592024e-05, "loss": 0.4664, "step": 14708 }, { "epoch": 0.31195520773684543, "grad_norm": 0.3183664381504059, "learning_rate": 1.8836612090347693e-05, "loss": 0.4425, "step": 14709 }, { "epoch": 0.3119764161947785, "grad_norm": 0.3096064031124115, "learning_rate": 1.8836455967275354e-05, "loss": 0.4716, "step": 14710 }, { "epoch": 0.3119976246527115, "grad_norm": 0.3341454267501831, "learning_rate": 1.883629983437519e-05, "loss": 0.4586, "step": 14711 }, { "epoch": 0.31201883311064454, "grad_norm": 0.31035974621772766, "learning_rate": 1.8836143691647367e-05, "loss": 0.5062, "step": 14712 }, { "epoch": 0.31204004156857756, "grad_norm": 0.37148913741111755, "learning_rate": 1.883598753909206e-05, "loss": 0.5625, "step": 14713 }, { "epoch": 0.3120612500265106, "grad_norm": 0.390727162361145, "learning_rate": 1.8835831376709448e-05, "loss": 0.5506, "step": 14714 }, { "epoch": 0.3120824584844436, "grad_norm": 0.3475368320941925, "learning_rate": 1.88356752044997e-05, "loss": 0.4281, "step": 14715 }, { "epoch": 0.3121036669423766, "grad_norm": 0.35728919506073, "learning_rate": 1.8835519022462993e-05, "loss": 0.5697, "step": 14716 }, { "epoch": 0.31212487540030964, "grad_norm": 0.2919284999370575, "learning_rate": 1.8835362830599497e-05, "loss": 0.4696, "step": 14717 }, { "epoch": 0.31214608385824266, "grad_norm": 0.2900061309337616, "learning_rate": 1.8835206628909385e-05, "loss": 0.4925, "step": 14718 }, { "epoch": 0.3121672923161757, "grad_norm": 0.39003899693489075, "learning_rate": 1.8835050417392834e-05, "loss": 0.5602, "step": 14719 }, { "epoch": 0.3121885007741087, "grad_norm": 0.3606496751308441, "learning_rate": 1.8834894196050016e-05, "loss": 0.5317, "step": 14720 }, { "epoch": 0.3122097092320417, "grad_norm": 0.3509896993637085, "learning_rate": 1.8834737964881108e-05, "loss": 0.5101, "step": 14721 }, { "epoch": 0.31223091768997474, "grad_norm": 0.33812206983566284, "learning_rate": 1.8834581723886282e-05, "loss": 0.5264, "step": 14722 }, { "epoch": 0.31225212614790776, "grad_norm": 0.33246511220932007, "learning_rate": 1.8834425473065708e-05, "loss": 0.4708, "step": 14723 }, { "epoch": 0.31227333460584084, "grad_norm": 0.3176616132259369, "learning_rate": 1.883426921241956e-05, "loss": 0.4792, "step": 14724 }, { "epoch": 0.31229454306377386, "grad_norm": 0.37826094031333923, "learning_rate": 1.8834112941948023e-05, "loss": 0.5662, "step": 14725 }, { "epoch": 0.3123157515217069, "grad_norm": 0.334617555141449, "learning_rate": 1.8833956661651255e-05, "loss": 0.4828, "step": 14726 }, { "epoch": 0.3123369599796399, "grad_norm": 0.3571913242340088, "learning_rate": 1.883380037152944e-05, "loss": 0.4468, "step": 14727 }, { "epoch": 0.3123581684375729, "grad_norm": 0.34870320558547974, "learning_rate": 1.883364407158275e-05, "loss": 0.5508, "step": 14728 }, { "epoch": 0.31237937689550593, "grad_norm": 0.3354801833629608, "learning_rate": 1.8833487761811353e-05, "loss": 0.5725, "step": 14729 }, { "epoch": 0.31240058535343895, "grad_norm": 0.3718727231025696, "learning_rate": 1.8833331442215434e-05, "loss": 0.5077, "step": 14730 }, { "epoch": 0.312421793811372, "grad_norm": 0.35356372594833374, "learning_rate": 1.8833175112795155e-05, "loss": 0.5647, "step": 14731 }, { "epoch": 0.312443002269305, "grad_norm": 0.34071552753448486, "learning_rate": 1.88330187735507e-05, "loss": 0.4649, "step": 14732 }, { "epoch": 0.312464210727238, "grad_norm": 0.4244593381881714, "learning_rate": 1.8832862424482233e-05, "loss": 0.6135, "step": 14733 }, { "epoch": 0.31248541918517103, "grad_norm": 0.28456664085388184, "learning_rate": 1.883270606558994e-05, "loss": 0.4138, "step": 14734 }, { "epoch": 0.31250662764310405, "grad_norm": 0.30845242738723755, "learning_rate": 1.8832549696873983e-05, "loss": 0.4496, "step": 14735 }, { "epoch": 0.31252783610103707, "grad_norm": 0.2878059446811676, "learning_rate": 1.883239331833454e-05, "loss": 0.4884, "step": 14736 }, { "epoch": 0.3125490445589701, "grad_norm": 0.32285264134407043, "learning_rate": 1.883223692997179e-05, "loss": 0.5767, "step": 14737 }, { "epoch": 0.31257025301690317, "grad_norm": 0.31794872879981995, "learning_rate": 1.8832080531785903e-05, "loss": 0.5084, "step": 14738 }, { "epoch": 0.3125914614748362, "grad_norm": 0.3161827325820923, "learning_rate": 1.8831924123777047e-05, "loss": 0.5179, "step": 14739 }, { "epoch": 0.3126126699327692, "grad_norm": 0.6691029667854309, "learning_rate": 1.8831767705945408e-05, "loss": 0.4958, "step": 14740 }, { "epoch": 0.3126338783907022, "grad_norm": 0.37813082337379456, "learning_rate": 1.8831611278291148e-05, "loss": 0.6151, "step": 14741 }, { "epoch": 0.31265508684863524, "grad_norm": 0.4132533073425293, "learning_rate": 1.8831454840814453e-05, "loss": 0.6075, "step": 14742 }, { "epoch": 0.31267629530656826, "grad_norm": 0.3247573971748352, "learning_rate": 1.8831298393515486e-05, "loss": 0.4817, "step": 14743 }, { "epoch": 0.3126975037645013, "grad_norm": 0.34705638885498047, "learning_rate": 1.8831141936394427e-05, "loss": 0.5306, "step": 14744 }, { "epoch": 0.3127187122224343, "grad_norm": 0.3297845423221588, "learning_rate": 1.883098546945145e-05, "loss": 0.4331, "step": 14745 }, { "epoch": 0.3127399206803673, "grad_norm": 0.3195938766002655, "learning_rate": 1.8830828992686726e-05, "loss": 0.4642, "step": 14746 }, { "epoch": 0.31276112913830034, "grad_norm": 0.38685935735702515, "learning_rate": 1.883067250610043e-05, "loss": 0.5235, "step": 14747 }, { "epoch": 0.31278233759623336, "grad_norm": 0.44689685106277466, "learning_rate": 1.8830516009692736e-05, "loss": 0.52, "step": 14748 }, { "epoch": 0.3128035460541664, "grad_norm": 0.3373579978942871, "learning_rate": 1.8830359503463824e-05, "loss": 0.5639, "step": 14749 }, { "epoch": 0.3128247545120994, "grad_norm": 0.34155189990997314, "learning_rate": 1.8830202987413856e-05, "loss": 0.5365, "step": 14750 }, { "epoch": 0.3128459629700325, "grad_norm": 0.3869876563549042, "learning_rate": 1.883004646154302e-05, "loss": 0.4985, "step": 14751 }, { "epoch": 0.3128671714279655, "grad_norm": 0.3069954216480255, "learning_rate": 1.8829889925851478e-05, "loss": 0.4914, "step": 14752 }, { "epoch": 0.3128883798858985, "grad_norm": 0.3512239456176758, "learning_rate": 1.8829733380339414e-05, "loss": 0.4951, "step": 14753 }, { "epoch": 0.31290958834383154, "grad_norm": 0.3378005623817444, "learning_rate": 1.8829576825006988e-05, "loss": 0.5011, "step": 14754 }, { "epoch": 0.31293079680176455, "grad_norm": 0.3416680097579956, "learning_rate": 1.882942025985439e-05, "loss": 0.4914, "step": 14755 }, { "epoch": 0.3129520052596976, "grad_norm": 0.34934094548225403, "learning_rate": 1.882926368488179e-05, "loss": 0.5353, "step": 14756 }, { "epoch": 0.3129732137176306, "grad_norm": 0.3230714499950409, "learning_rate": 1.8829107100089355e-05, "loss": 0.4823, "step": 14757 }, { "epoch": 0.3129944221755636, "grad_norm": 0.35846611857414246, "learning_rate": 1.8828950505477267e-05, "loss": 0.5688, "step": 14758 }, { "epoch": 0.31301563063349663, "grad_norm": 0.3347609043121338, "learning_rate": 1.8828793901045693e-05, "loss": 0.594, "step": 14759 }, { "epoch": 0.31303683909142965, "grad_norm": 0.34632864594459534, "learning_rate": 1.8828637286794815e-05, "loss": 0.5539, "step": 14760 }, { "epoch": 0.3130580475493627, "grad_norm": 0.35948142409324646, "learning_rate": 1.8828480662724798e-05, "loss": 0.492, "step": 14761 }, { "epoch": 0.3130792560072957, "grad_norm": 0.39793860912323, "learning_rate": 1.8828324028835827e-05, "loss": 0.6644, "step": 14762 }, { "epoch": 0.3131004644652287, "grad_norm": 0.4817101061344147, "learning_rate": 1.8828167385128066e-05, "loss": 0.5235, "step": 14763 }, { "epoch": 0.31312167292316173, "grad_norm": 0.353384405374527, "learning_rate": 1.8828010731601697e-05, "loss": 0.4929, "step": 14764 }, { "epoch": 0.3131428813810948, "grad_norm": 1.2734850645065308, "learning_rate": 1.8827854068256894e-05, "loss": 0.5156, "step": 14765 }, { "epoch": 0.3131640898390278, "grad_norm": 0.34500226378440857, "learning_rate": 1.8827697395093823e-05, "loss": 0.5163, "step": 14766 }, { "epoch": 0.31318529829696085, "grad_norm": 0.3634709119796753, "learning_rate": 1.8827540712112666e-05, "loss": 0.5276, "step": 14767 }, { "epoch": 0.31320650675489387, "grad_norm": 0.3282209634780884, "learning_rate": 1.8827384019313596e-05, "loss": 0.5801, "step": 14768 }, { "epoch": 0.3132277152128269, "grad_norm": 0.34102165699005127, "learning_rate": 1.8827227316696785e-05, "loss": 0.5099, "step": 14769 }, { "epoch": 0.3132489236707599, "grad_norm": 0.341590940952301, "learning_rate": 1.8827070604262408e-05, "loss": 0.5079, "step": 14770 }, { "epoch": 0.3132701321286929, "grad_norm": 0.30643534660339355, "learning_rate": 1.8826913882010637e-05, "loss": 0.5333, "step": 14771 }, { "epoch": 0.31329134058662594, "grad_norm": 0.3050604462623596, "learning_rate": 1.8826757149941654e-05, "loss": 0.5091, "step": 14772 }, { "epoch": 0.31331254904455896, "grad_norm": 0.3752039968967438, "learning_rate": 1.8826600408055626e-05, "loss": 0.5856, "step": 14773 }, { "epoch": 0.313333757502492, "grad_norm": 0.32545679807662964, "learning_rate": 1.882644365635273e-05, "loss": 0.4955, "step": 14774 }, { "epoch": 0.313354965960425, "grad_norm": 0.39444664120674133, "learning_rate": 1.8826286894833142e-05, "loss": 0.6484, "step": 14775 }, { "epoch": 0.313376174418358, "grad_norm": 0.43502938747406006, "learning_rate": 1.8826130123497032e-05, "loss": 0.4722, "step": 14776 }, { "epoch": 0.31339738287629104, "grad_norm": 0.34878766536712646, "learning_rate": 1.8825973342344576e-05, "loss": 0.5679, "step": 14777 }, { "epoch": 0.31341859133422406, "grad_norm": 0.3175309896469116, "learning_rate": 1.882581655137595e-05, "loss": 0.488, "step": 14778 }, { "epoch": 0.31343979979215714, "grad_norm": 0.3923036456108093, "learning_rate": 1.882565975059133e-05, "loss": 0.5206, "step": 14779 }, { "epoch": 0.31346100825009016, "grad_norm": 0.33207079768180847, "learning_rate": 1.8825502939990886e-05, "loss": 0.4152, "step": 14780 }, { "epoch": 0.3134822167080232, "grad_norm": 0.3628751337528229, "learning_rate": 1.8825346119574794e-05, "loss": 0.4883, "step": 14781 }, { "epoch": 0.3135034251659562, "grad_norm": 0.330778568983078, "learning_rate": 1.882518928934323e-05, "loss": 0.4983, "step": 14782 }, { "epoch": 0.3135246336238892, "grad_norm": 0.3300822377204895, "learning_rate": 1.8825032449296366e-05, "loss": 0.5042, "step": 14783 }, { "epoch": 0.31354584208182223, "grad_norm": 0.36981189250946045, "learning_rate": 1.8824875599434376e-05, "loss": 0.4918, "step": 14784 }, { "epoch": 0.31356705053975525, "grad_norm": 0.3509962558746338, "learning_rate": 1.8824718739757436e-05, "loss": 0.531, "step": 14785 }, { "epoch": 0.3135882589976883, "grad_norm": 0.4068848788738251, "learning_rate": 1.8824561870265725e-05, "loss": 0.4963, "step": 14786 }, { "epoch": 0.3136094674556213, "grad_norm": 0.3414326310157776, "learning_rate": 1.882440499095941e-05, "loss": 0.5461, "step": 14787 }, { "epoch": 0.3136306759135543, "grad_norm": 0.36781278252601624, "learning_rate": 1.882424810183867e-05, "loss": 0.5424, "step": 14788 }, { "epoch": 0.31365188437148733, "grad_norm": 0.34135890007019043, "learning_rate": 1.8824091202903677e-05, "loss": 0.5624, "step": 14789 }, { "epoch": 0.31367309282942035, "grad_norm": 0.37597885727882385, "learning_rate": 1.88239342941546e-05, "loss": 0.5412, "step": 14790 }, { "epoch": 0.31369430128735337, "grad_norm": 0.34534963965415955, "learning_rate": 1.882377737559163e-05, "loss": 0.5775, "step": 14791 }, { "epoch": 0.31371550974528645, "grad_norm": 0.3168695867061615, "learning_rate": 1.8823620447214926e-05, "loss": 0.5296, "step": 14792 }, { "epoch": 0.31373671820321947, "grad_norm": 0.3354501724243164, "learning_rate": 1.882346350902467e-05, "loss": 0.5554, "step": 14793 }, { "epoch": 0.3137579266611525, "grad_norm": 0.34566792845726013, "learning_rate": 1.8823306561021034e-05, "loss": 0.4888, "step": 14794 }, { "epoch": 0.3137791351190855, "grad_norm": 0.3172933757305145, "learning_rate": 1.8823149603204193e-05, "loss": 0.4179, "step": 14795 }, { "epoch": 0.3138003435770185, "grad_norm": 0.3074877858161926, "learning_rate": 1.8822992635574324e-05, "loss": 0.4565, "step": 14796 }, { "epoch": 0.31382155203495155, "grad_norm": 0.32831329107284546, "learning_rate": 1.8822835658131594e-05, "loss": 0.5424, "step": 14797 }, { "epoch": 0.31384276049288456, "grad_norm": 0.3868049085140228, "learning_rate": 1.8822678670876185e-05, "loss": 0.5038, "step": 14798 }, { "epoch": 0.3138639689508176, "grad_norm": 0.37336549162864685, "learning_rate": 1.882252167380827e-05, "loss": 0.5231, "step": 14799 }, { "epoch": 0.3138851774087506, "grad_norm": 0.40463924407958984, "learning_rate": 1.8822364666928023e-05, "loss": 0.4747, "step": 14800 }, { "epoch": 0.3139063858666836, "grad_norm": 0.37888815999031067, "learning_rate": 1.8822207650235616e-05, "loss": 0.4955, "step": 14801 }, { "epoch": 0.31392759432461664, "grad_norm": 0.4529998004436493, "learning_rate": 1.882205062373123e-05, "loss": 0.468, "step": 14802 }, { "epoch": 0.31394880278254966, "grad_norm": 0.40408486127853394, "learning_rate": 1.8821893587415036e-05, "loss": 0.5322, "step": 14803 }, { "epoch": 0.3139700112404827, "grad_norm": 0.35217297077178955, "learning_rate": 1.882173654128721e-05, "loss": 0.4577, "step": 14804 }, { "epoch": 0.3139912196984157, "grad_norm": 0.3289611339569092, "learning_rate": 1.882157948534792e-05, "loss": 0.5137, "step": 14805 }, { "epoch": 0.3140124281563488, "grad_norm": 0.37244781851768494, "learning_rate": 1.882142241959735e-05, "loss": 0.5668, "step": 14806 }, { "epoch": 0.3140336366142818, "grad_norm": 0.3467157781124115, "learning_rate": 1.882126534403567e-05, "loss": 0.5211, "step": 14807 }, { "epoch": 0.3140548450722148, "grad_norm": 0.3788485825061798, "learning_rate": 1.8821108258663056e-05, "loss": 0.5697, "step": 14808 }, { "epoch": 0.31407605353014784, "grad_norm": 0.33197981119155884, "learning_rate": 1.882095116347968e-05, "loss": 0.5614, "step": 14809 }, { "epoch": 0.31409726198808086, "grad_norm": 0.36916273832321167, "learning_rate": 1.882079405848572e-05, "loss": 0.6055, "step": 14810 }, { "epoch": 0.3141184704460139, "grad_norm": 0.3152063488960266, "learning_rate": 1.8820636943681347e-05, "loss": 0.5775, "step": 14811 }, { "epoch": 0.3141396789039469, "grad_norm": 0.31462621688842773, "learning_rate": 1.8820479819066738e-05, "loss": 0.457, "step": 14812 }, { "epoch": 0.3141608873618799, "grad_norm": 0.3468925952911377, "learning_rate": 1.882032268464207e-05, "loss": 0.5554, "step": 14813 }, { "epoch": 0.31418209581981293, "grad_norm": 0.3139709532260895, "learning_rate": 1.882016554040752e-05, "loss": 0.4561, "step": 14814 }, { "epoch": 0.31420330427774595, "grad_norm": 0.3310871124267578, "learning_rate": 1.8820008386363252e-05, "loss": 0.4987, "step": 14815 }, { "epoch": 0.314224512735679, "grad_norm": 0.457941472530365, "learning_rate": 1.8819851222509448e-05, "loss": 0.6009, "step": 14816 }, { "epoch": 0.314245721193612, "grad_norm": 0.34555986523628235, "learning_rate": 1.8819694048846285e-05, "loss": 0.503, "step": 14817 }, { "epoch": 0.314266929651545, "grad_norm": 0.3317420184612274, "learning_rate": 1.8819536865373935e-05, "loss": 0.4805, "step": 14818 }, { "epoch": 0.31428813810947803, "grad_norm": 0.38610896468162537, "learning_rate": 1.881937967209257e-05, "loss": 0.5332, "step": 14819 }, { "epoch": 0.3143093465674111, "grad_norm": 0.3150603473186493, "learning_rate": 1.881922246900237e-05, "loss": 0.4895, "step": 14820 }, { "epoch": 0.3143305550253441, "grad_norm": 0.3249044716358185, "learning_rate": 1.8819065256103507e-05, "loss": 0.4971, "step": 14821 }, { "epoch": 0.31435176348327715, "grad_norm": 0.3253948986530304, "learning_rate": 1.8818908033396153e-05, "loss": 0.45, "step": 14822 }, { "epoch": 0.31437297194121017, "grad_norm": 0.3727678656578064, "learning_rate": 1.881875080088049e-05, "loss": 0.5264, "step": 14823 }, { "epoch": 0.3143941803991432, "grad_norm": 0.3492974042892456, "learning_rate": 1.8818593558556687e-05, "loss": 0.4844, "step": 14824 }, { "epoch": 0.3144153888570762, "grad_norm": 0.5999301671981812, "learning_rate": 1.881843630642492e-05, "loss": 0.5128, "step": 14825 }, { "epoch": 0.3144365973150092, "grad_norm": 0.34264618158340454, "learning_rate": 1.881827904448537e-05, "loss": 0.4716, "step": 14826 }, { "epoch": 0.31445780577294224, "grad_norm": 0.4158271253108978, "learning_rate": 1.88181217727382e-05, "loss": 0.5512, "step": 14827 }, { "epoch": 0.31447901423087526, "grad_norm": 0.4476342499256134, "learning_rate": 1.8817964491183595e-05, "loss": 0.5292, "step": 14828 }, { "epoch": 0.3145002226888083, "grad_norm": 0.3606869876384735, "learning_rate": 1.8817807199821727e-05, "loss": 0.5465, "step": 14829 }, { "epoch": 0.3145214311467413, "grad_norm": 0.3322782814502716, "learning_rate": 1.8817649898652766e-05, "loss": 0.4448, "step": 14830 }, { "epoch": 0.3145426396046743, "grad_norm": 0.36347973346710205, "learning_rate": 1.88174925876769e-05, "loss": 0.5168, "step": 14831 }, { "epoch": 0.31456384806260734, "grad_norm": 0.3096655011177063, "learning_rate": 1.8817335266894287e-05, "loss": 0.4701, "step": 14832 }, { "epoch": 0.3145850565205404, "grad_norm": 0.34702685475349426, "learning_rate": 1.8817177936305114e-05, "loss": 0.5846, "step": 14833 }, { "epoch": 0.31460626497847344, "grad_norm": 0.34901100397109985, "learning_rate": 1.8817020595909553e-05, "loss": 0.5035, "step": 14834 }, { "epoch": 0.31462747343640646, "grad_norm": 0.33765745162963867, "learning_rate": 1.8816863245707773e-05, "loss": 0.5068, "step": 14835 }, { "epoch": 0.3146486818943395, "grad_norm": 0.311172753572464, "learning_rate": 1.881670588569996e-05, "loss": 0.5358, "step": 14836 }, { "epoch": 0.3146698903522725, "grad_norm": 0.3502238690853119, "learning_rate": 1.881654851588628e-05, "loss": 0.5851, "step": 14837 }, { "epoch": 0.3146910988102055, "grad_norm": 0.3289620280265808, "learning_rate": 1.8816391136266914e-05, "loss": 0.508, "step": 14838 }, { "epoch": 0.31471230726813854, "grad_norm": 0.3188985288143158, "learning_rate": 1.8816233746842033e-05, "loss": 0.5085, "step": 14839 }, { "epoch": 0.31473351572607156, "grad_norm": 0.36688730120658875, "learning_rate": 1.8816076347611814e-05, "loss": 0.4312, "step": 14840 }, { "epoch": 0.3147547241840046, "grad_norm": 0.6729917526245117, "learning_rate": 1.881591893857643e-05, "loss": 0.577, "step": 14841 }, { "epoch": 0.3147759326419376, "grad_norm": 0.3659748136997223, "learning_rate": 1.881576151973606e-05, "loss": 0.6302, "step": 14842 }, { "epoch": 0.3147971410998706, "grad_norm": 0.319021075963974, "learning_rate": 1.8815604091090876e-05, "loss": 0.5354, "step": 14843 }, { "epoch": 0.31481834955780363, "grad_norm": 0.3890163004398346, "learning_rate": 1.8815446652641053e-05, "loss": 0.4858, "step": 14844 }, { "epoch": 0.31483955801573665, "grad_norm": 0.3894961178302765, "learning_rate": 1.8815289204386766e-05, "loss": 0.4292, "step": 14845 }, { "epoch": 0.3148607664736697, "grad_norm": 0.3400386571884155, "learning_rate": 1.881513174632819e-05, "loss": 0.5341, "step": 14846 }, { "epoch": 0.31488197493160275, "grad_norm": 0.473365843296051, "learning_rate": 1.8814974278465504e-05, "loss": 0.5675, "step": 14847 }, { "epoch": 0.31490318338953577, "grad_norm": 0.34098541736602783, "learning_rate": 1.881481680079888e-05, "loss": 0.4511, "step": 14848 }, { "epoch": 0.3149243918474688, "grad_norm": 0.3523411750793457, "learning_rate": 1.881465931332849e-05, "loss": 0.5714, "step": 14849 }, { "epoch": 0.3149456003054018, "grad_norm": 0.3300052583217621, "learning_rate": 1.8814501816054517e-05, "loss": 0.493, "step": 14850 }, { "epoch": 0.3149668087633348, "grad_norm": 0.37130793929100037, "learning_rate": 1.881434430897713e-05, "loss": 0.5361, "step": 14851 }, { "epoch": 0.31498801722126785, "grad_norm": 0.542046070098877, "learning_rate": 1.8814186792096503e-05, "loss": 0.5618, "step": 14852 }, { "epoch": 0.31500922567920087, "grad_norm": 0.4072694182395935, "learning_rate": 1.8814029265412816e-05, "loss": 0.6095, "step": 14853 }, { "epoch": 0.3150304341371339, "grad_norm": 0.34567493200302124, "learning_rate": 1.8813871728926244e-05, "loss": 0.5745, "step": 14854 }, { "epoch": 0.3150516425950669, "grad_norm": 0.354142963886261, "learning_rate": 1.8813714182636957e-05, "loss": 0.5539, "step": 14855 }, { "epoch": 0.3150728510529999, "grad_norm": 0.318406343460083, "learning_rate": 1.8813556626545137e-05, "loss": 0.4678, "step": 14856 }, { "epoch": 0.31509405951093294, "grad_norm": 0.31834569573402405, "learning_rate": 1.8813399060650953e-05, "loss": 0.4987, "step": 14857 }, { "epoch": 0.31511526796886596, "grad_norm": 0.40348020195961, "learning_rate": 1.8813241484954586e-05, "loss": 0.5539, "step": 14858 }, { "epoch": 0.315136476426799, "grad_norm": 0.35670891404151917, "learning_rate": 1.8813083899456207e-05, "loss": 0.5333, "step": 14859 }, { "epoch": 0.31515768488473206, "grad_norm": 0.32197996973991394, "learning_rate": 1.881292630415599e-05, "loss": 0.5211, "step": 14860 }, { "epoch": 0.3151788933426651, "grad_norm": 0.3371613025665283, "learning_rate": 1.8812768699054116e-05, "loss": 0.479, "step": 14861 }, { "epoch": 0.3152001018005981, "grad_norm": 0.32222700119018555, "learning_rate": 1.881261108415076e-05, "loss": 0.4741, "step": 14862 }, { "epoch": 0.3152213102585311, "grad_norm": 0.4033253490924835, "learning_rate": 1.8812453459446088e-05, "loss": 0.5249, "step": 14863 }, { "epoch": 0.31524251871646414, "grad_norm": 0.3807774782180786, "learning_rate": 1.8812295824940284e-05, "loss": 0.5253, "step": 14864 }, { "epoch": 0.31526372717439716, "grad_norm": 0.33052942156791687, "learning_rate": 1.8812138180633526e-05, "loss": 0.6014, "step": 14865 }, { "epoch": 0.3152849356323302, "grad_norm": 0.3421396017074585, "learning_rate": 1.881198052652598e-05, "loss": 0.4639, "step": 14866 }, { "epoch": 0.3153061440902632, "grad_norm": 0.32647430896759033, "learning_rate": 1.8811822862617827e-05, "loss": 0.4929, "step": 14867 }, { "epoch": 0.3153273525481962, "grad_norm": 0.3778642416000366, "learning_rate": 1.881166518890924e-05, "loss": 0.5635, "step": 14868 }, { "epoch": 0.31534856100612924, "grad_norm": 0.3291819989681244, "learning_rate": 1.8811507505400396e-05, "loss": 0.4691, "step": 14869 }, { "epoch": 0.31536976946406226, "grad_norm": 0.38643452525138855, "learning_rate": 1.8811349812091467e-05, "loss": 0.5156, "step": 14870 }, { "epoch": 0.3153909779219953, "grad_norm": 0.3832428455352783, "learning_rate": 1.8811192108982635e-05, "loss": 0.498, "step": 14871 }, { "epoch": 0.3154121863799283, "grad_norm": 0.3803185224533081, "learning_rate": 1.8811034396074073e-05, "loss": 0.5235, "step": 14872 }, { "epoch": 0.3154333948378613, "grad_norm": 0.30623480677604675, "learning_rate": 1.8810876673365952e-05, "loss": 0.4547, "step": 14873 }, { "epoch": 0.3154546032957944, "grad_norm": 0.405254602432251, "learning_rate": 1.8810718940858453e-05, "loss": 0.5643, "step": 14874 }, { "epoch": 0.3154758117537274, "grad_norm": 0.322938472032547, "learning_rate": 1.8810561198551745e-05, "loss": 0.5857, "step": 14875 }, { "epoch": 0.31549702021166043, "grad_norm": 0.3134182095527649, "learning_rate": 1.881040344644601e-05, "loss": 0.4728, "step": 14876 }, { "epoch": 0.31551822866959345, "grad_norm": 0.32172641158103943, "learning_rate": 1.881024568454142e-05, "loss": 0.4453, "step": 14877 }, { "epoch": 0.31553943712752647, "grad_norm": 0.3312892019748688, "learning_rate": 1.8810087912838157e-05, "loss": 0.5089, "step": 14878 }, { "epoch": 0.3155606455854595, "grad_norm": 0.34885382652282715, "learning_rate": 1.8809930131336385e-05, "loss": 0.4248, "step": 14879 }, { "epoch": 0.3155818540433925, "grad_norm": 0.3499591648578644, "learning_rate": 1.8809772340036282e-05, "loss": 0.4923, "step": 14880 }, { "epoch": 0.3156030625013255, "grad_norm": 0.33114081621170044, "learning_rate": 1.8809614538938033e-05, "loss": 0.4522, "step": 14881 }, { "epoch": 0.31562427095925855, "grad_norm": 0.3489353358745575, "learning_rate": 1.8809456728041806e-05, "loss": 0.5595, "step": 14882 }, { "epoch": 0.31564547941719157, "grad_norm": 0.3462553024291992, "learning_rate": 1.8809298907347776e-05, "loss": 0.476, "step": 14883 }, { "epoch": 0.3156666878751246, "grad_norm": 0.399662047624588, "learning_rate": 1.880914107685612e-05, "loss": 0.5501, "step": 14884 }, { "epoch": 0.3156878963330576, "grad_norm": 0.30842822790145874, "learning_rate": 1.8808983236567014e-05, "loss": 0.4539, "step": 14885 }, { "epoch": 0.3157091047909906, "grad_norm": 0.31421101093292236, "learning_rate": 1.8808825386480636e-05, "loss": 0.511, "step": 14886 }, { "epoch": 0.31573031324892364, "grad_norm": 0.34995391964912415, "learning_rate": 1.8808667526597156e-05, "loss": 0.6002, "step": 14887 }, { "epoch": 0.3157515217068567, "grad_norm": 0.2999500036239624, "learning_rate": 1.8808509656916754e-05, "loss": 0.5114, "step": 14888 }, { "epoch": 0.31577273016478974, "grad_norm": 0.37673836946487427, "learning_rate": 1.8808351777439603e-05, "loss": 0.5253, "step": 14889 }, { "epoch": 0.31579393862272276, "grad_norm": 0.40571504831314087, "learning_rate": 1.8808193888165882e-05, "loss": 0.5065, "step": 14890 }, { "epoch": 0.3158151470806558, "grad_norm": 0.37463411688804626, "learning_rate": 1.880803598909576e-05, "loss": 0.5361, "step": 14891 }, { "epoch": 0.3158363555385888, "grad_norm": 0.325910747051239, "learning_rate": 1.8807878080229422e-05, "loss": 0.4472, "step": 14892 }, { "epoch": 0.3158575639965218, "grad_norm": 0.33011993765830994, "learning_rate": 1.8807720161567036e-05, "loss": 0.4507, "step": 14893 }, { "epoch": 0.31587877245445484, "grad_norm": 0.33213183283805847, "learning_rate": 1.8807562233108782e-05, "loss": 0.5461, "step": 14894 }, { "epoch": 0.31589998091238786, "grad_norm": 0.34358125925064087, "learning_rate": 1.880740429485483e-05, "loss": 0.4957, "step": 14895 }, { "epoch": 0.3159211893703209, "grad_norm": 0.3429197072982788, "learning_rate": 1.8807246346805363e-05, "loss": 0.5275, "step": 14896 }, { "epoch": 0.3159423978282539, "grad_norm": 0.44271260499954224, "learning_rate": 1.8807088388960547e-05, "loss": 0.5692, "step": 14897 }, { "epoch": 0.3159636062861869, "grad_norm": 0.35217711329460144, "learning_rate": 1.880693042132057e-05, "loss": 0.482, "step": 14898 }, { "epoch": 0.31598481474411994, "grad_norm": 0.32301583886146545, "learning_rate": 1.88067724438856e-05, "loss": 0.5107, "step": 14899 }, { "epoch": 0.31600602320205295, "grad_norm": 0.37687671184539795, "learning_rate": 1.8806614456655813e-05, "loss": 0.5456, "step": 14900 }, { "epoch": 0.31602723165998603, "grad_norm": 0.3623543083667755, "learning_rate": 1.880645645963139e-05, "loss": 0.5354, "step": 14901 }, { "epoch": 0.31604844011791905, "grad_norm": 0.3734891712665558, "learning_rate": 1.8806298452812498e-05, "loss": 0.4985, "step": 14902 }, { "epoch": 0.31606964857585207, "grad_norm": 0.3096937835216522, "learning_rate": 1.8806140436199317e-05, "loss": 0.4954, "step": 14903 }, { "epoch": 0.3160908570337851, "grad_norm": 0.31801286339759827, "learning_rate": 1.8805982409792026e-05, "loss": 0.5294, "step": 14904 }, { "epoch": 0.3161120654917181, "grad_norm": 0.3262699246406555, "learning_rate": 1.8805824373590798e-05, "loss": 0.4471, "step": 14905 }, { "epoch": 0.31613327394965113, "grad_norm": 0.3529696464538574, "learning_rate": 1.880566632759581e-05, "loss": 0.509, "step": 14906 }, { "epoch": 0.31615448240758415, "grad_norm": 0.34998464584350586, "learning_rate": 1.880550827180723e-05, "loss": 0.5248, "step": 14907 }, { "epoch": 0.31617569086551717, "grad_norm": 0.35325953364372253, "learning_rate": 1.880535020622525e-05, "loss": 0.5951, "step": 14908 }, { "epoch": 0.3161968993234502, "grad_norm": 0.3321007192134857, "learning_rate": 1.8805192130850028e-05, "loss": 0.5127, "step": 14909 }, { "epoch": 0.3162181077813832, "grad_norm": 0.3395330011844635, "learning_rate": 1.8805034045681747e-05, "loss": 0.4888, "step": 14910 }, { "epoch": 0.3162393162393162, "grad_norm": 0.33970320224761963, "learning_rate": 1.880487595072059e-05, "loss": 0.5642, "step": 14911 }, { "epoch": 0.31626052469724925, "grad_norm": 0.32615038752555847, "learning_rate": 1.8804717845966722e-05, "loss": 0.4591, "step": 14912 }, { "epoch": 0.31628173315518227, "grad_norm": 0.424841970205307, "learning_rate": 1.8804559731420323e-05, "loss": 0.5338, "step": 14913 }, { "epoch": 0.3163029416131153, "grad_norm": 0.3678802251815796, "learning_rate": 1.880440160708157e-05, "loss": 0.5353, "step": 14914 }, { "epoch": 0.31632415007104836, "grad_norm": 0.35184869170188904, "learning_rate": 1.880424347295064e-05, "loss": 0.5244, "step": 14915 }, { "epoch": 0.3163453585289814, "grad_norm": 0.3759511709213257, "learning_rate": 1.8804085329027706e-05, "loss": 0.5568, "step": 14916 }, { "epoch": 0.3163665669869144, "grad_norm": 0.37474751472473145, "learning_rate": 1.8803927175312942e-05, "loss": 0.4431, "step": 14917 }, { "epoch": 0.3163877754448474, "grad_norm": 0.5053343772888184, "learning_rate": 1.880376901180653e-05, "loss": 0.4948, "step": 14918 }, { "epoch": 0.31640898390278044, "grad_norm": 0.3099506199359894, "learning_rate": 1.8803610838508643e-05, "loss": 0.4646, "step": 14919 }, { "epoch": 0.31643019236071346, "grad_norm": 0.30551883578300476, "learning_rate": 1.880345265541945e-05, "loss": 0.4822, "step": 14920 }, { "epoch": 0.3164514008186465, "grad_norm": 0.34858545660972595, "learning_rate": 1.880329446253914e-05, "loss": 0.5554, "step": 14921 }, { "epoch": 0.3164726092765795, "grad_norm": 0.35550621151924133, "learning_rate": 1.8803136259867883e-05, "loss": 0.5281, "step": 14922 }, { "epoch": 0.3164938177345125, "grad_norm": 0.5426180958747864, "learning_rate": 1.880297804740585e-05, "loss": 0.5461, "step": 14923 }, { "epoch": 0.31651502619244554, "grad_norm": 0.3682413697242737, "learning_rate": 1.8802819825153223e-05, "loss": 0.6052, "step": 14924 }, { "epoch": 0.31653623465037856, "grad_norm": 0.32985636591911316, "learning_rate": 1.8802661593110178e-05, "loss": 0.5143, "step": 14925 }, { "epoch": 0.3165574431083116, "grad_norm": 0.3481671214103699, "learning_rate": 1.880250335127689e-05, "loss": 0.5717, "step": 14926 }, { "epoch": 0.3165786515662446, "grad_norm": 0.32838743925094604, "learning_rate": 1.880234509965353e-05, "loss": 0.5124, "step": 14927 }, { "epoch": 0.3165998600241776, "grad_norm": 0.32191553711891174, "learning_rate": 1.880218683824028e-05, "loss": 0.4977, "step": 14928 }, { "epoch": 0.3166210684821107, "grad_norm": 0.34151774644851685, "learning_rate": 1.8802028567037313e-05, "loss": 0.5909, "step": 14929 }, { "epoch": 0.3166422769400437, "grad_norm": 0.3483063876628876, "learning_rate": 1.880187028604481e-05, "loss": 0.6198, "step": 14930 }, { "epoch": 0.31666348539797673, "grad_norm": 0.33737459778785706, "learning_rate": 1.880171199526294e-05, "loss": 0.5161, "step": 14931 }, { "epoch": 0.31668469385590975, "grad_norm": 0.3526340126991272, "learning_rate": 1.8801553694691884e-05, "loss": 0.5424, "step": 14932 }, { "epoch": 0.31670590231384277, "grad_norm": 0.339531272649765, "learning_rate": 1.8801395384331817e-05, "loss": 0.5062, "step": 14933 }, { "epoch": 0.3167271107717758, "grad_norm": 0.5178717970848083, "learning_rate": 1.8801237064182912e-05, "loss": 0.458, "step": 14934 }, { "epoch": 0.3167483192297088, "grad_norm": 0.3179045617580414, "learning_rate": 1.8801078734245352e-05, "loss": 0.459, "step": 14935 }, { "epoch": 0.3167695276876418, "grad_norm": 0.3519578278064728, "learning_rate": 1.8800920394519303e-05, "loss": 0.5341, "step": 14936 }, { "epoch": 0.31679073614557485, "grad_norm": 0.3405393064022064, "learning_rate": 1.880076204500495e-05, "loss": 0.4163, "step": 14937 }, { "epoch": 0.31681194460350787, "grad_norm": 0.3786376416683197, "learning_rate": 1.8800603685702467e-05, "loss": 0.457, "step": 14938 }, { "epoch": 0.3168331530614409, "grad_norm": 0.3353278636932373, "learning_rate": 1.8800445316612025e-05, "loss": 0.5439, "step": 14939 }, { "epoch": 0.3168543615193739, "grad_norm": 0.4541058838367462, "learning_rate": 1.880028693773381e-05, "loss": 0.5176, "step": 14940 }, { "epoch": 0.3168755699773069, "grad_norm": 0.3199479281902313, "learning_rate": 1.8800128549067986e-05, "loss": 0.4743, "step": 14941 }, { "epoch": 0.31689677843524, "grad_norm": 0.39966511726379395, "learning_rate": 1.879997015061474e-05, "loss": 0.4487, "step": 14942 }, { "epoch": 0.316917986893173, "grad_norm": 0.42097434401512146, "learning_rate": 1.8799811742374243e-05, "loss": 0.4759, "step": 14943 }, { "epoch": 0.31693919535110604, "grad_norm": 0.36637383699417114, "learning_rate": 1.879965332434667e-05, "loss": 0.6665, "step": 14944 }, { "epoch": 0.31696040380903906, "grad_norm": 0.46800097823143005, "learning_rate": 1.8799494896532203e-05, "loss": 0.4573, "step": 14945 }, { "epoch": 0.3169816122669721, "grad_norm": 0.3607635200023651, "learning_rate": 1.879933645893101e-05, "loss": 0.4844, "step": 14946 }, { "epoch": 0.3170028207249051, "grad_norm": 0.36727264523506165, "learning_rate": 1.8799178011543275e-05, "loss": 0.5236, "step": 14947 }, { "epoch": 0.3170240291828381, "grad_norm": 0.35642048716545105, "learning_rate": 1.8799019554369165e-05, "loss": 0.4365, "step": 14948 }, { "epoch": 0.31704523764077114, "grad_norm": 0.38271915912628174, "learning_rate": 1.8798861087408868e-05, "loss": 0.5758, "step": 14949 }, { "epoch": 0.31706644609870416, "grad_norm": 0.40863385796546936, "learning_rate": 1.8798702610662548e-05, "loss": 0.4619, "step": 14950 }, { "epoch": 0.3170876545566372, "grad_norm": 0.39098060131073, "learning_rate": 1.879854412413039e-05, "loss": 0.561, "step": 14951 }, { "epoch": 0.3171088630145702, "grad_norm": 0.32456204295158386, "learning_rate": 1.879838562781257e-05, "loss": 0.5851, "step": 14952 }, { "epoch": 0.3171300714725032, "grad_norm": 0.3621031939983368, "learning_rate": 1.879822712170926e-05, "loss": 0.5544, "step": 14953 }, { "epoch": 0.31715127993043624, "grad_norm": 0.3072042763233185, "learning_rate": 1.879806860582064e-05, "loss": 0.4709, "step": 14954 }, { "epoch": 0.31717248838836926, "grad_norm": 0.4181075096130371, "learning_rate": 1.8797910080146883e-05, "loss": 0.5045, "step": 14955 }, { "epoch": 0.31719369684630233, "grad_norm": 0.3158456087112427, "learning_rate": 1.8797751544688166e-05, "loss": 0.5274, "step": 14956 }, { "epoch": 0.31721490530423535, "grad_norm": 0.39605629444122314, "learning_rate": 1.8797592999444667e-05, "loss": 0.4787, "step": 14957 }, { "epoch": 0.31723611376216837, "grad_norm": 0.33685949444770813, "learning_rate": 1.879743444441656e-05, "loss": 0.5363, "step": 14958 }, { "epoch": 0.3172573222201014, "grad_norm": 0.31663447618484497, "learning_rate": 1.8797275879604026e-05, "loss": 0.498, "step": 14959 }, { "epoch": 0.3172785306780344, "grad_norm": 0.3276090621948242, "learning_rate": 1.8797117305007237e-05, "loss": 0.4416, "step": 14960 }, { "epoch": 0.31729973913596743, "grad_norm": 0.3487277030944824, "learning_rate": 1.879695872062637e-05, "loss": 0.4181, "step": 14961 }, { "epoch": 0.31732094759390045, "grad_norm": 0.37288525700569153, "learning_rate": 1.87968001264616e-05, "loss": 0.5397, "step": 14962 }, { "epoch": 0.31734215605183347, "grad_norm": 0.3321439027786255, "learning_rate": 1.8796641522513108e-05, "loss": 0.5282, "step": 14963 }, { "epoch": 0.3173633645097665, "grad_norm": 0.32022610306739807, "learning_rate": 1.879648290878107e-05, "loss": 0.4909, "step": 14964 }, { "epoch": 0.3173845729676995, "grad_norm": 0.37566429376602173, "learning_rate": 1.8796324285265654e-05, "loss": 0.5995, "step": 14965 }, { "epoch": 0.3174057814256325, "grad_norm": 0.3442583680152893, "learning_rate": 1.8796165651967045e-05, "loss": 0.5706, "step": 14966 }, { "epoch": 0.31742698988356555, "grad_norm": 0.33724984526634216, "learning_rate": 1.879600700888542e-05, "loss": 0.586, "step": 14967 }, { "epoch": 0.31744819834149857, "grad_norm": 0.42117953300476074, "learning_rate": 1.879584835602095e-05, "loss": 0.5419, "step": 14968 }, { "epoch": 0.3174694067994316, "grad_norm": 0.3242699205875397, "learning_rate": 1.8795689693373815e-05, "loss": 0.4707, "step": 14969 }, { "epoch": 0.31749061525736466, "grad_norm": 0.3029822111129761, "learning_rate": 1.879553102094419e-05, "loss": 0.5185, "step": 14970 }, { "epoch": 0.3175118237152977, "grad_norm": 0.36279457807540894, "learning_rate": 1.879537233873225e-05, "loss": 0.5338, "step": 14971 }, { "epoch": 0.3175330321732307, "grad_norm": 0.33695006370544434, "learning_rate": 1.8795213646738175e-05, "loss": 0.5043, "step": 14972 }, { "epoch": 0.3175542406311637, "grad_norm": 0.32900115847587585, "learning_rate": 1.879505494496214e-05, "loss": 0.5547, "step": 14973 }, { "epoch": 0.31757544908909674, "grad_norm": 0.3568917214870453, "learning_rate": 1.8794896233404317e-05, "loss": 0.5269, "step": 14974 }, { "epoch": 0.31759665754702976, "grad_norm": 0.3376561403274536, "learning_rate": 1.879473751206489e-05, "loss": 0.4712, "step": 14975 }, { "epoch": 0.3176178660049628, "grad_norm": 0.3763905465602875, "learning_rate": 1.8794578780944036e-05, "loss": 0.5085, "step": 14976 }, { "epoch": 0.3176390744628958, "grad_norm": 0.3176664710044861, "learning_rate": 1.8794420040041923e-05, "loss": 0.4917, "step": 14977 }, { "epoch": 0.3176602829208288, "grad_norm": 0.4898325800895691, "learning_rate": 1.8794261289358734e-05, "loss": 0.4752, "step": 14978 }, { "epoch": 0.31768149137876184, "grad_norm": 0.35044506192207336, "learning_rate": 1.8794102528894646e-05, "loss": 0.4923, "step": 14979 }, { "epoch": 0.31770269983669486, "grad_norm": 0.4194522798061371, "learning_rate": 1.8793943758649827e-05, "loss": 0.4667, "step": 14980 }, { "epoch": 0.3177239082946279, "grad_norm": 0.4186162054538727, "learning_rate": 1.8793784978624468e-05, "loss": 0.5336, "step": 14981 }, { "epoch": 0.3177451167525609, "grad_norm": 0.513692319393158, "learning_rate": 1.8793626188818735e-05, "loss": 0.5437, "step": 14982 }, { "epoch": 0.31776632521049397, "grad_norm": 0.8020551800727844, "learning_rate": 1.8793467389232804e-05, "loss": 0.5015, "step": 14983 }, { "epoch": 0.317787533668427, "grad_norm": 0.32430413365364075, "learning_rate": 1.879330857986686e-05, "loss": 0.4944, "step": 14984 }, { "epoch": 0.31780874212636, "grad_norm": 0.43918243050575256, "learning_rate": 1.879314976072107e-05, "loss": 0.6174, "step": 14985 }, { "epoch": 0.31782995058429303, "grad_norm": 0.36389240622520447, "learning_rate": 1.879299093179562e-05, "loss": 0.5058, "step": 14986 }, { "epoch": 0.31785115904222605, "grad_norm": 0.3651590049266815, "learning_rate": 1.879283209309068e-05, "loss": 0.5688, "step": 14987 }, { "epoch": 0.31787236750015907, "grad_norm": 0.31299054622650146, "learning_rate": 1.879267324460643e-05, "loss": 0.5276, "step": 14988 }, { "epoch": 0.3178935759580921, "grad_norm": 0.3180477023124695, "learning_rate": 1.8792514386343042e-05, "loss": 0.5211, "step": 14989 }, { "epoch": 0.3179147844160251, "grad_norm": 0.3640363812446594, "learning_rate": 1.8792355518300697e-05, "loss": 0.5527, "step": 14990 }, { "epoch": 0.31793599287395813, "grad_norm": 0.3613026440143585, "learning_rate": 1.879219664047957e-05, "loss": 0.542, "step": 14991 }, { "epoch": 0.31795720133189115, "grad_norm": 0.32715335488319397, "learning_rate": 1.8792037752879838e-05, "loss": 0.5077, "step": 14992 }, { "epoch": 0.31797840978982417, "grad_norm": 0.39250630140304565, "learning_rate": 1.879187885550168e-05, "loss": 0.5812, "step": 14993 }, { "epoch": 0.3179996182477572, "grad_norm": 0.3377425968647003, "learning_rate": 1.8791719948345268e-05, "loss": 0.5362, "step": 14994 }, { "epoch": 0.3180208267056902, "grad_norm": 0.3309858441352844, "learning_rate": 1.8791561031410782e-05, "loss": 0.4723, "step": 14995 }, { "epoch": 0.3180420351636232, "grad_norm": 0.32889747619628906, "learning_rate": 1.87914021046984e-05, "loss": 0.4951, "step": 14996 }, { "epoch": 0.3180632436215563, "grad_norm": 0.32032620906829834, "learning_rate": 1.8791243168208297e-05, "loss": 0.5135, "step": 14997 }, { "epoch": 0.3180844520794893, "grad_norm": 0.3493930995464325, "learning_rate": 1.8791084221940647e-05, "loss": 0.5756, "step": 14998 }, { "epoch": 0.31810566053742234, "grad_norm": 0.3554675281047821, "learning_rate": 1.879092526589563e-05, "loss": 0.5134, "step": 14999 }, { "epoch": 0.31812686899535536, "grad_norm": 0.3829890191555023, "learning_rate": 1.8790766300073426e-05, "loss": 0.5152, "step": 15000 }, { "epoch": 0.3181480774532884, "grad_norm": 0.3372992277145386, "learning_rate": 1.8790607324474207e-05, "loss": 0.5095, "step": 15001 }, { "epoch": 0.3181692859112214, "grad_norm": 0.34586167335510254, "learning_rate": 1.879044833909815e-05, "loss": 0.5577, "step": 15002 }, { "epoch": 0.3181904943691544, "grad_norm": 0.3392535150051117, "learning_rate": 1.879028934394543e-05, "loss": 0.5587, "step": 15003 }, { "epoch": 0.31821170282708744, "grad_norm": 0.33086690306663513, "learning_rate": 1.879013033901623e-05, "loss": 0.4913, "step": 15004 }, { "epoch": 0.31823291128502046, "grad_norm": 0.34371018409729004, "learning_rate": 1.8789971324310723e-05, "loss": 0.4777, "step": 15005 }, { "epoch": 0.3182541197429535, "grad_norm": 0.47611039876937866, "learning_rate": 1.8789812299829084e-05, "loss": 0.5195, "step": 15006 }, { "epoch": 0.3182753282008865, "grad_norm": 0.3500717580318451, "learning_rate": 1.8789653265571496e-05, "loss": 0.5682, "step": 15007 }, { "epoch": 0.3182965366588195, "grad_norm": 0.353721022605896, "learning_rate": 1.878949422153813e-05, "loss": 0.5395, "step": 15008 }, { "epoch": 0.31831774511675254, "grad_norm": 0.3562602400779724, "learning_rate": 1.8789335167729165e-05, "loss": 0.4843, "step": 15009 }, { "epoch": 0.31833895357468556, "grad_norm": 0.792291522026062, "learning_rate": 1.878917610414478e-05, "loss": 0.4911, "step": 15010 }, { "epoch": 0.31836016203261863, "grad_norm": 0.32581135630607605, "learning_rate": 1.878901703078514e-05, "loss": 0.4736, "step": 15011 }, { "epoch": 0.31838137049055165, "grad_norm": 0.34590962529182434, "learning_rate": 1.878885794765044e-05, "loss": 0.5302, "step": 15012 }, { "epoch": 0.31840257894848467, "grad_norm": 0.42311185598373413, "learning_rate": 1.878869885474085e-05, "loss": 0.5872, "step": 15013 }, { "epoch": 0.3184237874064177, "grad_norm": 0.3978460133075714, "learning_rate": 1.878853975205654e-05, "loss": 0.4953, "step": 15014 }, { "epoch": 0.3184449958643507, "grad_norm": 0.3984394967556, "learning_rate": 1.8788380639597695e-05, "loss": 0.4326, "step": 15015 }, { "epoch": 0.31846620432228373, "grad_norm": 0.37987732887268066, "learning_rate": 1.878822151736449e-05, "loss": 0.5681, "step": 15016 }, { "epoch": 0.31848741278021675, "grad_norm": 0.34228527545928955, "learning_rate": 1.87880623853571e-05, "loss": 0.5653, "step": 15017 }, { "epoch": 0.31850862123814977, "grad_norm": 0.3685334622859955, "learning_rate": 1.8787903243575706e-05, "loss": 0.4931, "step": 15018 }, { "epoch": 0.3185298296960828, "grad_norm": 0.556223452091217, "learning_rate": 1.878774409202048e-05, "loss": 0.5018, "step": 15019 }, { "epoch": 0.3185510381540158, "grad_norm": 0.3498789668083191, "learning_rate": 1.8787584930691602e-05, "loss": 0.5214, "step": 15020 }, { "epoch": 0.31857224661194883, "grad_norm": 0.33932530879974365, "learning_rate": 1.878742575958925e-05, "loss": 0.5231, "step": 15021 }, { "epoch": 0.31859345506988185, "grad_norm": 0.3302895724773407, "learning_rate": 1.8787266578713597e-05, "loss": 0.4926, "step": 15022 }, { "epoch": 0.31861466352781487, "grad_norm": 0.48967140913009644, "learning_rate": 1.8787107388064822e-05, "loss": 0.4136, "step": 15023 }, { "epoch": 0.31863587198574794, "grad_norm": 0.32170334458351135, "learning_rate": 1.8786948187643106e-05, "loss": 0.5134, "step": 15024 }, { "epoch": 0.31865708044368096, "grad_norm": 0.3283942639827728, "learning_rate": 1.878678897744862e-05, "loss": 0.5518, "step": 15025 }, { "epoch": 0.318678288901614, "grad_norm": 0.3506799340248108, "learning_rate": 1.8786629757481544e-05, "loss": 0.5318, "step": 15026 }, { "epoch": 0.318699497359547, "grad_norm": 0.37035468220710754, "learning_rate": 1.8786470527742053e-05, "loss": 0.5566, "step": 15027 }, { "epoch": 0.31872070581748, "grad_norm": 0.4484946131706238, "learning_rate": 1.8786311288230326e-05, "loss": 0.4418, "step": 15028 }, { "epoch": 0.31874191427541304, "grad_norm": 0.3501281440258026, "learning_rate": 1.8786152038946543e-05, "loss": 0.5126, "step": 15029 }, { "epoch": 0.31876312273334606, "grad_norm": 0.3316620886325836, "learning_rate": 1.878599277989088e-05, "loss": 0.4726, "step": 15030 }, { "epoch": 0.3187843311912791, "grad_norm": 0.38647815585136414, "learning_rate": 1.8785833511063504e-05, "loss": 0.5659, "step": 15031 }, { "epoch": 0.3188055396492121, "grad_norm": 0.29956385493278503, "learning_rate": 1.8785674232464605e-05, "loss": 0.4966, "step": 15032 }, { "epoch": 0.3188267481071451, "grad_norm": 0.3950347602367401, "learning_rate": 1.8785514944094358e-05, "loss": 0.6397, "step": 15033 }, { "epoch": 0.31884795656507814, "grad_norm": 0.40084102749824524, "learning_rate": 1.878535564595293e-05, "loss": 0.5396, "step": 15034 }, { "epoch": 0.31886916502301116, "grad_norm": 0.35732337832450867, "learning_rate": 1.8785196338040515e-05, "loss": 0.4996, "step": 15035 }, { "epoch": 0.3188903734809442, "grad_norm": 0.3396296203136444, "learning_rate": 1.8785037020357277e-05, "loss": 0.5103, "step": 15036 }, { "epoch": 0.3189115819388772, "grad_norm": 0.367866188287735, "learning_rate": 1.8784877692903394e-05, "loss": 0.546, "step": 15037 }, { "epoch": 0.3189327903968103, "grad_norm": 0.34302613139152527, "learning_rate": 1.878471835567905e-05, "loss": 0.4834, "step": 15038 }, { "epoch": 0.3189539988547433, "grad_norm": 0.30773764848709106, "learning_rate": 1.8784559008684416e-05, "loss": 0.5062, "step": 15039 }, { "epoch": 0.3189752073126763, "grad_norm": 0.36198288202285767, "learning_rate": 1.8784399651919677e-05, "loss": 0.4724, "step": 15040 }, { "epoch": 0.31899641577060933, "grad_norm": 0.31949281692504883, "learning_rate": 1.8784240285385002e-05, "loss": 0.5438, "step": 15041 }, { "epoch": 0.31901762422854235, "grad_norm": 0.36470311880111694, "learning_rate": 1.8784080909080568e-05, "loss": 0.4545, "step": 15042 }, { "epoch": 0.31903883268647537, "grad_norm": 0.3342129588127136, "learning_rate": 1.8783921523006557e-05, "loss": 0.5174, "step": 15043 }, { "epoch": 0.3190600411444084, "grad_norm": 0.3055360019207001, "learning_rate": 1.8783762127163146e-05, "loss": 0.4953, "step": 15044 }, { "epoch": 0.3190812496023414, "grad_norm": 0.36538031697273254, "learning_rate": 1.8783602721550515e-05, "loss": 0.5815, "step": 15045 }, { "epoch": 0.31910245806027443, "grad_norm": 0.36412855982780457, "learning_rate": 1.8783443306168832e-05, "loss": 0.4132, "step": 15046 }, { "epoch": 0.31912366651820745, "grad_norm": 0.616692066192627, "learning_rate": 1.8783283881018278e-05, "loss": 0.5372, "step": 15047 }, { "epoch": 0.31914487497614047, "grad_norm": 0.3385674059391022, "learning_rate": 1.8783124446099038e-05, "loss": 0.5394, "step": 15048 }, { "epoch": 0.3191660834340735, "grad_norm": 0.341758668422699, "learning_rate": 1.878296500141128e-05, "loss": 0.4955, "step": 15049 }, { "epoch": 0.3191872918920065, "grad_norm": 0.32999667525291443, "learning_rate": 1.8782805546955184e-05, "loss": 0.5202, "step": 15050 }, { "epoch": 0.3192085003499396, "grad_norm": 0.2991560101509094, "learning_rate": 1.878264608273093e-05, "loss": 0.4685, "step": 15051 }, { "epoch": 0.3192297088078726, "grad_norm": 0.4550394117832184, "learning_rate": 1.8782486608738693e-05, "loss": 0.5221, "step": 15052 }, { "epoch": 0.3192509172658056, "grad_norm": 0.3344469964504242, "learning_rate": 1.8782327124978646e-05, "loss": 0.5244, "step": 15053 }, { "epoch": 0.31927212572373864, "grad_norm": 0.3602267801761627, "learning_rate": 1.8782167631450978e-05, "loss": 0.5077, "step": 15054 }, { "epoch": 0.31929333418167166, "grad_norm": 0.34254562854766846, "learning_rate": 1.8782008128155854e-05, "loss": 0.5277, "step": 15055 }, { "epoch": 0.3193145426396047, "grad_norm": 0.3321376144886017, "learning_rate": 1.878184861509346e-05, "loss": 0.4815, "step": 15056 }, { "epoch": 0.3193357510975377, "grad_norm": 0.3376004099845886, "learning_rate": 1.8781689092263972e-05, "loss": 0.5437, "step": 15057 }, { "epoch": 0.3193569595554707, "grad_norm": 0.3294471502304077, "learning_rate": 1.8781529559667563e-05, "loss": 0.4462, "step": 15058 }, { "epoch": 0.31937816801340374, "grad_norm": 0.3222852051258087, "learning_rate": 1.8781370017304413e-05, "loss": 0.5494, "step": 15059 }, { "epoch": 0.31939937647133676, "grad_norm": 0.3501560091972351, "learning_rate": 1.87812104651747e-05, "loss": 0.5541, "step": 15060 }, { "epoch": 0.3194205849292698, "grad_norm": 0.37267154455184937, "learning_rate": 1.87810509032786e-05, "loss": 0.577, "step": 15061 }, { "epoch": 0.3194417933872028, "grad_norm": 0.3457246422767639, "learning_rate": 1.8780891331616293e-05, "loss": 0.5413, "step": 15062 }, { "epoch": 0.3194630018451358, "grad_norm": 0.31528016924858093, "learning_rate": 1.8780731750187953e-05, "loss": 0.4917, "step": 15063 }, { "epoch": 0.31948421030306884, "grad_norm": 0.3972959816455841, "learning_rate": 1.878057215899376e-05, "loss": 0.5277, "step": 15064 }, { "epoch": 0.3195054187610019, "grad_norm": 0.3116827607154846, "learning_rate": 1.8780412558033895e-05, "loss": 0.4706, "step": 15065 }, { "epoch": 0.31952662721893493, "grad_norm": 0.37879839539527893, "learning_rate": 1.8780252947308525e-05, "loss": 0.4862, "step": 15066 }, { "epoch": 0.31954783567686795, "grad_norm": 0.3767910301685333, "learning_rate": 1.8780093326817838e-05, "loss": 0.53, "step": 15067 }, { "epoch": 0.319569044134801, "grad_norm": 0.35714811086654663, "learning_rate": 1.8779933696562006e-05, "loss": 0.5091, "step": 15068 }, { "epoch": 0.319590252592734, "grad_norm": 0.33220627903938293, "learning_rate": 1.877977405654121e-05, "loss": 0.5801, "step": 15069 }, { "epoch": 0.319611461050667, "grad_norm": 0.316813200712204, "learning_rate": 1.8779614406755623e-05, "loss": 0.5004, "step": 15070 }, { "epoch": 0.31963266950860003, "grad_norm": 0.3360535502433777, "learning_rate": 1.8779454747205425e-05, "loss": 0.5704, "step": 15071 }, { "epoch": 0.31965387796653305, "grad_norm": 0.3406011760234833, "learning_rate": 1.8779295077890796e-05, "loss": 0.5305, "step": 15072 }, { "epoch": 0.31967508642446607, "grad_norm": 0.4956694543361664, "learning_rate": 1.877913539881191e-05, "loss": 0.5176, "step": 15073 }, { "epoch": 0.3196962948823991, "grad_norm": 0.33109548687934875, "learning_rate": 1.8778975709968948e-05, "loss": 0.4728, "step": 15074 }, { "epoch": 0.3197175033403321, "grad_norm": 0.3271072208881378, "learning_rate": 1.877881601136208e-05, "loss": 0.4908, "step": 15075 }, { "epoch": 0.31973871179826513, "grad_norm": 0.3077583909034729, "learning_rate": 1.8778656302991495e-05, "loss": 0.5101, "step": 15076 }, { "epoch": 0.31975992025619815, "grad_norm": 0.46224847435951233, "learning_rate": 1.8778496584857364e-05, "loss": 0.476, "step": 15077 }, { "epoch": 0.31978112871413117, "grad_norm": 0.39793065190315247, "learning_rate": 1.8778336856959863e-05, "loss": 0.5444, "step": 15078 }, { "epoch": 0.31980233717206424, "grad_norm": 0.3758009076118469, "learning_rate": 1.8778177119299173e-05, "loss": 0.5502, "step": 15079 }, { "epoch": 0.31982354562999726, "grad_norm": 0.32726597785949707, "learning_rate": 1.877801737187547e-05, "loss": 0.531, "step": 15080 }, { "epoch": 0.3198447540879303, "grad_norm": 0.3276914358139038, "learning_rate": 1.877785761468893e-05, "loss": 0.5173, "step": 15081 }, { "epoch": 0.3198659625458633, "grad_norm": 0.3261708617210388, "learning_rate": 1.877769784773974e-05, "loss": 0.5349, "step": 15082 }, { "epoch": 0.3198871710037963, "grad_norm": 0.3320753574371338, "learning_rate": 1.8777538071028065e-05, "loss": 0.5096, "step": 15083 }, { "epoch": 0.31990837946172934, "grad_norm": 0.3599316477775574, "learning_rate": 1.877737828455409e-05, "loss": 0.5707, "step": 15084 }, { "epoch": 0.31992958791966236, "grad_norm": 0.3200497627258301, "learning_rate": 1.877721848831799e-05, "loss": 0.4764, "step": 15085 }, { "epoch": 0.3199507963775954, "grad_norm": 0.33363965153694153, "learning_rate": 1.8777058682319946e-05, "loss": 0.5103, "step": 15086 }, { "epoch": 0.3199720048355284, "grad_norm": 0.3236689269542694, "learning_rate": 1.877689886656013e-05, "loss": 0.4877, "step": 15087 }, { "epoch": 0.3199932132934614, "grad_norm": 0.3528499901294708, "learning_rate": 1.8776739041038725e-05, "loss": 0.448, "step": 15088 }, { "epoch": 0.32001442175139444, "grad_norm": 0.3519243896007538, "learning_rate": 1.8776579205755906e-05, "loss": 0.5692, "step": 15089 }, { "epoch": 0.32003563020932746, "grad_norm": 0.3325144052505493, "learning_rate": 1.8776419360711858e-05, "loss": 0.4558, "step": 15090 }, { "epoch": 0.3200568386672605, "grad_norm": 0.34041106700897217, "learning_rate": 1.8776259505906748e-05, "loss": 0.5161, "step": 15091 }, { "epoch": 0.32007804712519355, "grad_norm": 0.35933735966682434, "learning_rate": 1.8776099641340756e-05, "loss": 0.586, "step": 15092 }, { "epoch": 0.3200992555831266, "grad_norm": 0.32586678862571716, "learning_rate": 1.8775939767014068e-05, "loss": 0.5328, "step": 15093 }, { "epoch": 0.3201204640410596, "grad_norm": 0.32678836584091187, "learning_rate": 1.8775779882926853e-05, "loss": 0.5371, "step": 15094 }, { "epoch": 0.3201416724989926, "grad_norm": 0.406329482793808, "learning_rate": 1.877561998907929e-05, "loss": 0.5006, "step": 15095 }, { "epoch": 0.32016288095692563, "grad_norm": 0.3519558608531952, "learning_rate": 1.877546008547156e-05, "loss": 0.55, "step": 15096 }, { "epoch": 0.32018408941485865, "grad_norm": 0.5349828004837036, "learning_rate": 1.877530017210384e-05, "loss": 0.532, "step": 15097 }, { "epoch": 0.32020529787279167, "grad_norm": 0.34137943387031555, "learning_rate": 1.8775140248976307e-05, "loss": 0.5009, "step": 15098 }, { "epoch": 0.3202265063307247, "grad_norm": 0.38796302676200867, "learning_rate": 1.877498031608914e-05, "loss": 0.5466, "step": 15099 }, { "epoch": 0.3202477147886577, "grad_norm": 0.304596483707428, "learning_rate": 1.8774820373442515e-05, "loss": 0.4692, "step": 15100 }, { "epoch": 0.32026892324659073, "grad_norm": 0.3926909267902374, "learning_rate": 1.877466042103661e-05, "loss": 0.4843, "step": 15101 }, { "epoch": 0.32029013170452375, "grad_norm": 0.3308022916316986, "learning_rate": 1.877450045887161e-05, "loss": 0.5351, "step": 15102 }, { "epoch": 0.32031134016245677, "grad_norm": 0.3095861077308655, "learning_rate": 1.877434048694768e-05, "loss": 0.5032, "step": 15103 }, { "epoch": 0.3203325486203898, "grad_norm": 0.3281556963920593, "learning_rate": 1.877418050526501e-05, "loss": 0.5251, "step": 15104 }, { "epoch": 0.3203537570783228, "grad_norm": 0.336160272359848, "learning_rate": 1.8774020513823767e-05, "loss": 0.5138, "step": 15105 }, { "epoch": 0.3203749655362559, "grad_norm": 0.3290254771709442, "learning_rate": 1.877386051262414e-05, "loss": 0.4663, "step": 15106 }, { "epoch": 0.3203961739941889, "grad_norm": 0.49030786752700806, "learning_rate": 1.8773700501666298e-05, "loss": 0.5562, "step": 15107 }, { "epoch": 0.3204173824521219, "grad_norm": 0.3438678979873657, "learning_rate": 1.8773540480950422e-05, "loss": 0.4157, "step": 15108 }, { "epoch": 0.32043859091005494, "grad_norm": 0.3672598600387573, "learning_rate": 1.8773380450476693e-05, "loss": 0.5485, "step": 15109 }, { "epoch": 0.32045979936798796, "grad_norm": 0.38444983959198, "learning_rate": 1.8773220410245287e-05, "loss": 0.4359, "step": 15110 }, { "epoch": 0.320481007825921, "grad_norm": 0.4919538199901581, "learning_rate": 1.877306036025638e-05, "loss": 0.4644, "step": 15111 }, { "epoch": 0.320502216283854, "grad_norm": 0.3575858771800995, "learning_rate": 1.877290030051015e-05, "loss": 0.537, "step": 15112 }, { "epoch": 0.320523424741787, "grad_norm": 0.31591036915779114, "learning_rate": 1.877274023100678e-05, "loss": 0.519, "step": 15113 }, { "epoch": 0.32054463319972004, "grad_norm": 0.37134382128715515, "learning_rate": 1.8772580151746443e-05, "loss": 0.4983, "step": 15114 }, { "epoch": 0.32056584165765306, "grad_norm": 0.31011638045310974, "learning_rate": 1.8772420062729317e-05, "loss": 0.4942, "step": 15115 }, { "epoch": 0.3205870501155861, "grad_norm": 0.34399187564849854, "learning_rate": 1.877225996395558e-05, "loss": 0.578, "step": 15116 }, { "epoch": 0.3206082585735191, "grad_norm": 0.3128587305545807, "learning_rate": 1.8772099855425417e-05, "loss": 0.4284, "step": 15117 }, { "epoch": 0.3206294670314521, "grad_norm": 0.31450754404067993, "learning_rate": 1.8771939737138997e-05, "loss": 0.4936, "step": 15118 }, { "epoch": 0.32065067548938514, "grad_norm": 0.3791358172893524, "learning_rate": 1.8771779609096505e-05, "loss": 0.5668, "step": 15119 }, { "epoch": 0.3206718839473182, "grad_norm": 0.339419960975647, "learning_rate": 1.8771619471298114e-05, "loss": 0.5218, "step": 15120 }, { "epoch": 0.32069309240525123, "grad_norm": 0.3393198847770691, "learning_rate": 1.8771459323744e-05, "loss": 0.5902, "step": 15121 }, { "epoch": 0.32071430086318425, "grad_norm": 0.3795250952243805, "learning_rate": 1.8771299166434352e-05, "loss": 0.5911, "step": 15122 }, { "epoch": 0.3207355093211173, "grad_norm": 0.3317467272281647, "learning_rate": 1.8771138999369337e-05, "loss": 0.464, "step": 15123 }, { "epoch": 0.3207567177790503, "grad_norm": 0.3232945501804352, "learning_rate": 1.8770978822549138e-05, "loss": 0.4662, "step": 15124 }, { "epoch": 0.3207779262369833, "grad_norm": 0.3311139643192291, "learning_rate": 1.8770818635973932e-05, "loss": 0.5335, "step": 15125 }, { "epoch": 0.32079913469491633, "grad_norm": 0.34254828095436096, "learning_rate": 1.87706584396439e-05, "loss": 0.505, "step": 15126 }, { "epoch": 0.32082034315284935, "grad_norm": 0.32561996579170227, "learning_rate": 1.877049823355922e-05, "loss": 0.546, "step": 15127 }, { "epoch": 0.32084155161078237, "grad_norm": 0.3650988042354584, "learning_rate": 1.877033801772006e-05, "loss": 0.4313, "step": 15128 }, { "epoch": 0.3208627600687154, "grad_norm": 0.47173964977264404, "learning_rate": 1.8770177792126614e-05, "loss": 0.4923, "step": 15129 }, { "epoch": 0.3208839685266484, "grad_norm": 0.3461157977581024, "learning_rate": 1.8770017556779047e-05, "loss": 0.5699, "step": 15130 }, { "epoch": 0.32090517698458143, "grad_norm": 0.326844185590744, "learning_rate": 1.8769857311677547e-05, "loss": 0.512, "step": 15131 }, { "epoch": 0.32092638544251445, "grad_norm": 0.32993248105049133, "learning_rate": 1.8769697056822286e-05, "loss": 0.5191, "step": 15132 }, { "epoch": 0.3209475939004475, "grad_norm": 0.3128845691680908, "learning_rate": 1.8769536792213445e-05, "loss": 0.3852, "step": 15133 }, { "epoch": 0.32096880235838054, "grad_norm": 0.393643856048584, "learning_rate": 1.8769376517851202e-05, "loss": 0.4472, "step": 15134 }, { "epoch": 0.32099001081631356, "grad_norm": 0.3062538206577301, "learning_rate": 1.8769216233735733e-05, "loss": 0.4327, "step": 15135 }, { "epoch": 0.3210112192742466, "grad_norm": 0.3520749509334564, "learning_rate": 1.8769055939867216e-05, "loss": 0.4886, "step": 15136 }, { "epoch": 0.3210324277321796, "grad_norm": 0.3303820788860321, "learning_rate": 1.8768895636245833e-05, "loss": 0.5045, "step": 15137 }, { "epoch": 0.3210536361901126, "grad_norm": 0.33103975653648376, "learning_rate": 1.876873532287176e-05, "loss": 0.4959, "step": 15138 }, { "epoch": 0.32107484464804564, "grad_norm": 0.3982977271080017, "learning_rate": 1.876857499974518e-05, "loss": 0.5522, "step": 15139 }, { "epoch": 0.32109605310597866, "grad_norm": 0.31984615325927734, "learning_rate": 1.876841466686626e-05, "loss": 0.543, "step": 15140 }, { "epoch": 0.3211172615639117, "grad_norm": 0.4302436411380768, "learning_rate": 1.876825432423519e-05, "loss": 0.558, "step": 15141 }, { "epoch": 0.3211384700218447, "grad_norm": 0.3690064251422882, "learning_rate": 1.8768093971852144e-05, "loss": 0.4945, "step": 15142 }, { "epoch": 0.3211596784797777, "grad_norm": 0.33709684014320374, "learning_rate": 1.8767933609717298e-05, "loss": 0.5372, "step": 15143 }, { "epoch": 0.32118088693771074, "grad_norm": 0.3909838795661926, "learning_rate": 1.8767773237830832e-05, "loss": 0.5512, "step": 15144 }, { "epoch": 0.32120209539564376, "grad_norm": 0.33179762959480286, "learning_rate": 1.8767612856192923e-05, "loss": 0.5032, "step": 15145 }, { "epoch": 0.3212233038535768, "grad_norm": 0.34468021988868713, "learning_rate": 1.8767452464803758e-05, "loss": 0.4172, "step": 15146 }, { "epoch": 0.32124451231150986, "grad_norm": 0.36668986082077026, "learning_rate": 1.8767292063663505e-05, "loss": 0.5241, "step": 15147 }, { "epoch": 0.3212657207694429, "grad_norm": 0.31277453899383545, "learning_rate": 1.8767131652772344e-05, "loss": 0.4704, "step": 15148 }, { "epoch": 0.3212869292273759, "grad_norm": 0.37719520926475525, "learning_rate": 1.876697123213046e-05, "loss": 0.4856, "step": 15149 }, { "epoch": 0.3213081376853089, "grad_norm": 0.3254184126853943, "learning_rate": 1.876681080173802e-05, "loss": 0.5309, "step": 15150 }, { "epoch": 0.32132934614324193, "grad_norm": 0.46660998463630676, "learning_rate": 1.8766650361595212e-05, "loss": 0.4878, "step": 15151 }, { "epoch": 0.32135055460117495, "grad_norm": 0.4891326427459717, "learning_rate": 1.8766489911702212e-05, "loss": 0.4836, "step": 15152 }, { "epoch": 0.321371763059108, "grad_norm": 0.3474266827106476, "learning_rate": 1.8766329452059196e-05, "loss": 0.587, "step": 15153 }, { "epoch": 0.321392971517041, "grad_norm": 0.36867770552635193, "learning_rate": 1.876616898266635e-05, "loss": 0.5791, "step": 15154 }, { "epoch": 0.321414179974974, "grad_norm": 0.4516209363937378, "learning_rate": 1.8766008503523846e-05, "loss": 0.511, "step": 15155 }, { "epoch": 0.32143538843290703, "grad_norm": 0.3633602559566498, "learning_rate": 1.876584801463186e-05, "loss": 0.613, "step": 15156 }, { "epoch": 0.32145659689084005, "grad_norm": 0.379257470369339, "learning_rate": 1.8765687515990577e-05, "loss": 0.517, "step": 15157 }, { "epoch": 0.32147780534877307, "grad_norm": 0.35523542761802673, "learning_rate": 1.876552700760017e-05, "loss": 0.4793, "step": 15158 }, { "epoch": 0.3214990138067061, "grad_norm": 0.9754561185836792, "learning_rate": 1.876536648946082e-05, "loss": 0.4153, "step": 15159 }, { "epoch": 0.3215202222646391, "grad_norm": 0.36462244391441345, "learning_rate": 1.8765205961572705e-05, "loss": 0.4883, "step": 15160 }, { "epoch": 0.3215414307225722, "grad_norm": 0.3403186798095703, "learning_rate": 1.8765045423936006e-05, "loss": 0.4675, "step": 15161 }, { "epoch": 0.3215626391805052, "grad_norm": 0.35481756925582886, "learning_rate": 1.87648848765509e-05, "loss": 0.5239, "step": 15162 }, { "epoch": 0.3215838476384382, "grad_norm": 0.3420104384422302, "learning_rate": 1.8764724319417563e-05, "loss": 0.4228, "step": 15163 }, { "epoch": 0.32160505609637124, "grad_norm": 0.4054378271102905, "learning_rate": 1.876456375253618e-05, "loss": 0.5163, "step": 15164 }, { "epoch": 0.32162626455430426, "grad_norm": 0.3332284688949585, "learning_rate": 1.876440317590692e-05, "loss": 0.4902, "step": 15165 }, { "epoch": 0.3216474730122373, "grad_norm": 0.42705222964286804, "learning_rate": 1.876424258952997e-05, "loss": 0.4941, "step": 15166 }, { "epoch": 0.3216686814701703, "grad_norm": 0.43452927470207214, "learning_rate": 1.8764081993405506e-05, "loss": 0.5213, "step": 15167 }, { "epoch": 0.3216898899281033, "grad_norm": 0.33380982279777527, "learning_rate": 1.8763921387533703e-05, "loss": 0.5222, "step": 15168 }, { "epoch": 0.32171109838603634, "grad_norm": 0.33297958970069885, "learning_rate": 1.8763760771914747e-05, "loss": 0.5742, "step": 15169 }, { "epoch": 0.32173230684396936, "grad_norm": 0.3420264720916748, "learning_rate": 1.876360014654881e-05, "loss": 0.5034, "step": 15170 }, { "epoch": 0.3217535153019024, "grad_norm": 0.6396236419677734, "learning_rate": 1.876343951143607e-05, "loss": 0.585, "step": 15171 }, { "epoch": 0.3217747237598354, "grad_norm": 0.46463802456855774, "learning_rate": 1.876327886657671e-05, "loss": 0.5496, "step": 15172 }, { "epoch": 0.3217959322177684, "grad_norm": 0.3728456199169159, "learning_rate": 1.8763118211970912e-05, "loss": 0.5513, "step": 15173 }, { "epoch": 0.3218171406757015, "grad_norm": 0.38400816917419434, "learning_rate": 1.876295754761885e-05, "loss": 0.4375, "step": 15174 }, { "epoch": 0.3218383491336345, "grad_norm": 0.34103691577911377, "learning_rate": 1.87627968735207e-05, "loss": 0.5379, "step": 15175 }, { "epoch": 0.32185955759156754, "grad_norm": 0.3213117718696594, "learning_rate": 1.876263618967664e-05, "loss": 0.5812, "step": 15176 }, { "epoch": 0.32188076604950056, "grad_norm": 0.34299367666244507, "learning_rate": 1.876247549608686e-05, "loss": 0.4968, "step": 15177 }, { "epoch": 0.3219019745074336, "grad_norm": 0.7335833311080933, "learning_rate": 1.8762314792751525e-05, "loss": 0.5673, "step": 15178 }, { "epoch": 0.3219231829653666, "grad_norm": 0.3939598798751831, "learning_rate": 1.876215407967082e-05, "loss": 0.4457, "step": 15179 }, { "epoch": 0.3219443914232996, "grad_norm": 0.3191312551498413, "learning_rate": 1.8761993356844927e-05, "loss": 0.4843, "step": 15180 }, { "epoch": 0.32196559988123263, "grad_norm": 0.3250025808811188, "learning_rate": 1.8761832624274017e-05, "loss": 0.4998, "step": 15181 }, { "epoch": 0.32198680833916565, "grad_norm": 0.34144213795661926, "learning_rate": 1.8761671881958275e-05, "loss": 0.5662, "step": 15182 }, { "epoch": 0.3220080167970987, "grad_norm": 0.37573012709617615, "learning_rate": 1.8761511129897876e-05, "loss": 0.5279, "step": 15183 }, { "epoch": 0.3220292252550317, "grad_norm": 0.347930908203125, "learning_rate": 1.8761350368093003e-05, "loss": 0.4701, "step": 15184 }, { "epoch": 0.3220504337129647, "grad_norm": 0.28656914830207825, "learning_rate": 1.876118959654383e-05, "loss": 0.4643, "step": 15185 }, { "epoch": 0.32207164217089773, "grad_norm": 0.39235854148864746, "learning_rate": 1.876102881525054e-05, "loss": 0.5482, "step": 15186 }, { "epoch": 0.32209285062883075, "grad_norm": 0.354500412940979, "learning_rate": 1.876086802421331e-05, "loss": 0.5266, "step": 15187 }, { "epoch": 0.3221140590867638, "grad_norm": 0.3811352252960205, "learning_rate": 1.8760707223432317e-05, "loss": 0.5439, "step": 15188 }, { "epoch": 0.32213526754469685, "grad_norm": 0.3384605348110199, "learning_rate": 1.876054641290774e-05, "loss": 0.5409, "step": 15189 }, { "epoch": 0.32215647600262987, "grad_norm": 0.39828649163246155, "learning_rate": 1.876038559263976e-05, "loss": 0.5747, "step": 15190 }, { "epoch": 0.3221776844605629, "grad_norm": 0.3395746648311615, "learning_rate": 1.876022476262856e-05, "loss": 0.5572, "step": 15191 }, { "epoch": 0.3221988929184959, "grad_norm": 0.30416786670684814, "learning_rate": 1.8760063922874308e-05, "loss": 0.4992, "step": 15192 }, { "epoch": 0.3222201013764289, "grad_norm": 0.34726110100746155, "learning_rate": 1.875990307337719e-05, "loss": 0.5308, "step": 15193 }, { "epoch": 0.32224130983436194, "grad_norm": 0.3627351224422455, "learning_rate": 1.8759742214137387e-05, "loss": 0.5021, "step": 15194 }, { "epoch": 0.32226251829229496, "grad_norm": 0.3676275312900543, "learning_rate": 1.8759581345155074e-05, "loss": 0.5395, "step": 15195 }, { "epoch": 0.322283726750228, "grad_norm": 0.357607901096344, "learning_rate": 1.875942046643043e-05, "loss": 0.5074, "step": 15196 }, { "epoch": 0.322304935208161, "grad_norm": 0.4015852212905884, "learning_rate": 1.8759259577963634e-05, "loss": 0.5274, "step": 15197 }, { "epoch": 0.322326143666094, "grad_norm": 0.32168421149253845, "learning_rate": 1.875909867975487e-05, "loss": 0.5169, "step": 15198 }, { "epoch": 0.32234735212402704, "grad_norm": 0.8981680274009705, "learning_rate": 1.8758937771804304e-05, "loss": 0.5791, "step": 15199 }, { "epoch": 0.32236856058196006, "grad_norm": 0.37878113985061646, "learning_rate": 1.875877685411213e-05, "loss": 0.5001, "step": 15200 }, { "epoch": 0.32238976903989314, "grad_norm": 0.3036942183971405, "learning_rate": 1.8758615926678518e-05, "loss": 0.4052, "step": 15201 }, { "epoch": 0.32241097749782616, "grad_norm": 0.35385897755622864, "learning_rate": 1.8758454989503653e-05, "loss": 0.5358, "step": 15202 }, { "epoch": 0.3224321859557592, "grad_norm": 0.36192312836647034, "learning_rate": 1.8758294042587705e-05, "loss": 0.4578, "step": 15203 }, { "epoch": 0.3224533944136922, "grad_norm": 0.3667859733104706, "learning_rate": 1.8758133085930862e-05, "loss": 0.5186, "step": 15204 }, { "epoch": 0.3224746028716252, "grad_norm": 0.704515814781189, "learning_rate": 1.87579721195333e-05, "loss": 0.4959, "step": 15205 }, { "epoch": 0.32249581132955824, "grad_norm": 0.34522148966789246, "learning_rate": 1.8757811143395196e-05, "loss": 0.5006, "step": 15206 }, { "epoch": 0.32251701978749125, "grad_norm": 0.3474617898464203, "learning_rate": 1.875765015751673e-05, "loss": 0.5443, "step": 15207 }, { "epoch": 0.3225382282454243, "grad_norm": 0.3498622179031372, "learning_rate": 1.875748916189808e-05, "loss": 0.5782, "step": 15208 }, { "epoch": 0.3225594367033573, "grad_norm": 0.3777402937412262, "learning_rate": 1.8757328156539428e-05, "loss": 0.5552, "step": 15209 }, { "epoch": 0.3225806451612903, "grad_norm": 0.48036065697669983, "learning_rate": 1.8757167141440957e-05, "loss": 0.5229, "step": 15210 }, { "epoch": 0.32260185361922333, "grad_norm": 0.33952897787094116, "learning_rate": 1.8757006116602837e-05, "loss": 0.5512, "step": 15211 }, { "epoch": 0.32262306207715635, "grad_norm": 0.3236570656299591, "learning_rate": 1.8756845082025247e-05, "loss": 0.4601, "step": 15212 }, { "epoch": 0.3226442705350894, "grad_norm": 0.3855893909931183, "learning_rate": 1.8756684037708374e-05, "loss": 0.4958, "step": 15213 }, { "epoch": 0.3226654789930224, "grad_norm": 0.3729800283908844, "learning_rate": 1.875652298365239e-05, "loss": 0.5332, "step": 15214 }, { "epoch": 0.32268668745095547, "grad_norm": 0.3290991187095642, "learning_rate": 1.875636191985748e-05, "loss": 0.4726, "step": 15215 }, { "epoch": 0.3227078959088885, "grad_norm": 0.3763262927532196, "learning_rate": 1.8756200846323823e-05, "loss": 0.4984, "step": 15216 }, { "epoch": 0.3227291043668215, "grad_norm": 0.3141031861305237, "learning_rate": 1.875603976305159e-05, "loss": 0.4311, "step": 15217 }, { "epoch": 0.3227503128247545, "grad_norm": 0.38424745202064514, "learning_rate": 1.875587867004097e-05, "loss": 0.5172, "step": 15218 }, { "epoch": 0.32277152128268755, "grad_norm": 0.3465031385421753, "learning_rate": 1.8755717567292135e-05, "loss": 0.538, "step": 15219 }, { "epoch": 0.32279272974062057, "grad_norm": 0.31932276487350464, "learning_rate": 1.8755556454805268e-05, "loss": 0.5027, "step": 15220 }, { "epoch": 0.3228139381985536, "grad_norm": 0.3620815873146057, "learning_rate": 1.8755395332580548e-05, "loss": 0.5095, "step": 15221 }, { "epoch": 0.3228351466564866, "grad_norm": 0.35738605260849, "learning_rate": 1.875523420061815e-05, "loss": 0.5419, "step": 15222 }, { "epoch": 0.3228563551144196, "grad_norm": 0.37046921253204346, "learning_rate": 1.8755073058918262e-05, "loss": 0.4641, "step": 15223 }, { "epoch": 0.32287756357235264, "grad_norm": 0.3900904357433319, "learning_rate": 1.8754911907481053e-05, "loss": 0.4945, "step": 15224 }, { "epoch": 0.32289877203028566, "grad_norm": 0.37842297554016113, "learning_rate": 1.875475074630671e-05, "loss": 0.5173, "step": 15225 }, { "epoch": 0.3229199804882187, "grad_norm": 0.3443904519081116, "learning_rate": 1.8754589575395405e-05, "loss": 0.5004, "step": 15226 }, { "epoch": 0.3229411889461517, "grad_norm": 0.3408009111881256, "learning_rate": 1.8754428394747326e-05, "loss": 0.4981, "step": 15227 }, { "epoch": 0.3229623974040847, "grad_norm": 0.4139483571052551, "learning_rate": 1.8754267204362647e-05, "loss": 0.4955, "step": 15228 }, { "epoch": 0.3229836058620178, "grad_norm": 0.3025578558444977, "learning_rate": 1.8754106004241545e-05, "loss": 0.4481, "step": 15229 }, { "epoch": 0.3230048143199508, "grad_norm": 0.42572712898254395, "learning_rate": 1.8753944794384205e-05, "loss": 0.4887, "step": 15230 }, { "epoch": 0.32302602277788384, "grad_norm": 0.3774119019508362, "learning_rate": 1.87537835747908e-05, "loss": 0.5585, "step": 15231 }, { "epoch": 0.32304723123581686, "grad_norm": 0.38352760672569275, "learning_rate": 1.875362234546152e-05, "loss": 0.5107, "step": 15232 }, { "epoch": 0.3230684396937499, "grad_norm": 0.3448503017425537, "learning_rate": 1.8753461106396533e-05, "loss": 0.5724, "step": 15233 }, { "epoch": 0.3230896481516829, "grad_norm": 0.34315699338912964, "learning_rate": 1.8753299857596024e-05, "loss": 0.5596, "step": 15234 }, { "epoch": 0.3231108566096159, "grad_norm": 0.31282734870910645, "learning_rate": 1.875313859906017e-05, "loss": 0.5106, "step": 15235 }, { "epoch": 0.32313206506754893, "grad_norm": 0.3705679476261139, "learning_rate": 1.875297733078915e-05, "loss": 0.574, "step": 15236 }, { "epoch": 0.32315327352548195, "grad_norm": 0.4161047637462616, "learning_rate": 1.8752816052783148e-05, "loss": 0.4248, "step": 15237 }, { "epoch": 0.323174481983415, "grad_norm": 0.3320813775062561, "learning_rate": 1.8752654765042338e-05, "loss": 0.5313, "step": 15238 }, { "epoch": 0.323195690441348, "grad_norm": 0.3611794114112854, "learning_rate": 1.8752493467566898e-05, "loss": 0.5427, "step": 15239 }, { "epoch": 0.323216898899281, "grad_norm": 0.43779414892196655, "learning_rate": 1.8752332160357016e-05, "loss": 0.5435, "step": 15240 }, { "epoch": 0.32323810735721403, "grad_norm": 0.31557920575141907, "learning_rate": 1.875217084341287e-05, "loss": 0.4818, "step": 15241 }, { "epoch": 0.3232593158151471, "grad_norm": 0.3583986759185791, "learning_rate": 1.8752009516734625e-05, "loss": 0.5225, "step": 15242 }, { "epoch": 0.3232805242730801, "grad_norm": 0.36398592591285706, "learning_rate": 1.8751848180322476e-05, "loss": 0.4962, "step": 15243 }, { "epoch": 0.32330173273101315, "grad_norm": 0.3301679193973541, "learning_rate": 1.87516868341766e-05, "loss": 0.514, "step": 15244 }, { "epoch": 0.32332294118894617, "grad_norm": 0.32269594073295593, "learning_rate": 1.875152547829717e-05, "loss": 0.5967, "step": 15245 }, { "epoch": 0.3233441496468792, "grad_norm": 0.35372138023376465, "learning_rate": 1.875136411268437e-05, "loss": 0.583, "step": 15246 }, { "epoch": 0.3233653581048122, "grad_norm": 0.5125793814659119, "learning_rate": 1.8751202737338382e-05, "loss": 0.5756, "step": 15247 }, { "epoch": 0.3233865665627452, "grad_norm": 0.3440326452255249, "learning_rate": 1.875104135225938e-05, "loss": 0.5936, "step": 15248 }, { "epoch": 0.32340777502067825, "grad_norm": 0.4662989377975464, "learning_rate": 1.8750879957447544e-05, "loss": 0.4768, "step": 15249 }, { "epoch": 0.32342898347861126, "grad_norm": 0.35889479517936707, "learning_rate": 1.875071855290306e-05, "loss": 0.6174, "step": 15250 }, { "epoch": 0.3234501919365443, "grad_norm": 0.3307650089263916, "learning_rate": 1.87505571386261e-05, "loss": 0.5694, "step": 15251 }, { "epoch": 0.3234714003944773, "grad_norm": 0.34463655948638916, "learning_rate": 1.8750395714616845e-05, "loss": 0.5269, "step": 15252 }, { "epoch": 0.3234926088524103, "grad_norm": 0.354798287153244, "learning_rate": 1.8750234280875477e-05, "loss": 0.4559, "step": 15253 }, { "epoch": 0.32351381731034334, "grad_norm": 0.33695605397224426, "learning_rate": 1.8750072837402175e-05, "loss": 0.5102, "step": 15254 }, { "epoch": 0.32353502576827636, "grad_norm": 0.34384822845458984, "learning_rate": 1.8749911384197118e-05, "loss": 0.5507, "step": 15255 }, { "epoch": 0.32355623422620944, "grad_norm": 0.37315312027931213, "learning_rate": 1.8749749921260486e-05, "loss": 0.5408, "step": 15256 }, { "epoch": 0.32357744268414246, "grad_norm": 0.35953691601753235, "learning_rate": 1.874958844859246e-05, "loss": 0.5486, "step": 15257 }, { "epoch": 0.3235986511420755, "grad_norm": 0.3849295377731323, "learning_rate": 1.8749426966193212e-05, "loss": 0.6028, "step": 15258 }, { "epoch": 0.3236198596000085, "grad_norm": 0.36136406660079956, "learning_rate": 1.8749265474062932e-05, "loss": 0.5027, "step": 15259 }, { "epoch": 0.3236410680579415, "grad_norm": 0.5130009651184082, "learning_rate": 1.8749103972201792e-05, "loss": 0.5631, "step": 15260 }, { "epoch": 0.32366227651587454, "grad_norm": 0.3760469853878021, "learning_rate": 1.8748942460609975e-05, "loss": 0.483, "step": 15261 }, { "epoch": 0.32368348497380756, "grad_norm": 0.3300389349460602, "learning_rate": 1.8748780939287664e-05, "loss": 0.507, "step": 15262 }, { "epoch": 0.3237046934317406, "grad_norm": 0.35342422127723694, "learning_rate": 1.874861940823503e-05, "loss": 0.4828, "step": 15263 }, { "epoch": 0.3237259018896736, "grad_norm": 0.35556164383888245, "learning_rate": 1.874845786745226e-05, "loss": 0.4781, "step": 15264 }, { "epoch": 0.3237471103476066, "grad_norm": 0.32988500595092773, "learning_rate": 1.8748296316939533e-05, "loss": 0.5324, "step": 15265 }, { "epoch": 0.32376831880553963, "grad_norm": 0.47472018003463745, "learning_rate": 1.8748134756697023e-05, "loss": 0.5105, "step": 15266 }, { "epoch": 0.32378952726347265, "grad_norm": 0.3097167909145355, "learning_rate": 1.8747973186724917e-05, "loss": 0.534, "step": 15267 }, { "epoch": 0.3238107357214057, "grad_norm": 0.2886657118797302, "learning_rate": 1.8747811607023387e-05, "loss": 0.4766, "step": 15268 }, { "epoch": 0.3238319441793387, "grad_norm": 0.31583720445632935, "learning_rate": 1.874765001759262e-05, "loss": 0.5593, "step": 15269 }, { "epoch": 0.32385315263727177, "grad_norm": 0.382521390914917, "learning_rate": 1.8747488418432792e-05, "loss": 0.4836, "step": 15270 }, { "epoch": 0.3238743610952048, "grad_norm": 0.35367900133132935, "learning_rate": 1.8747326809544083e-05, "loss": 0.544, "step": 15271 }, { "epoch": 0.3238955695531378, "grad_norm": 0.32365500926971436, "learning_rate": 1.8747165190926674e-05, "loss": 0.5807, "step": 15272 }, { "epoch": 0.3239167780110708, "grad_norm": 0.40339961647987366, "learning_rate": 1.8747003562580744e-05, "loss": 0.5222, "step": 15273 }, { "epoch": 0.32393798646900385, "grad_norm": 0.34815311431884766, "learning_rate": 1.874684192450647e-05, "loss": 0.4886, "step": 15274 }, { "epoch": 0.32395919492693687, "grad_norm": 0.3585927486419678, "learning_rate": 1.8746680276704037e-05, "loss": 0.5509, "step": 15275 }, { "epoch": 0.3239804033848699, "grad_norm": 0.3698681592941284, "learning_rate": 1.874651861917362e-05, "loss": 0.5506, "step": 15276 }, { "epoch": 0.3240016118428029, "grad_norm": 0.35411474108695984, "learning_rate": 1.8746356951915406e-05, "loss": 0.4751, "step": 15277 }, { "epoch": 0.3240228203007359, "grad_norm": 0.47561877965927124, "learning_rate": 1.8746195274929564e-05, "loss": 0.576, "step": 15278 }, { "epoch": 0.32404402875866894, "grad_norm": 0.399094820022583, "learning_rate": 1.874603358821628e-05, "loss": 0.5801, "step": 15279 }, { "epoch": 0.32406523721660196, "grad_norm": 0.34863147139549255, "learning_rate": 1.874587189177574e-05, "loss": 0.5603, "step": 15280 }, { "epoch": 0.324086445674535, "grad_norm": 0.35180339217185974, "learning_rate": 1.874571018560811e-05, "loss": 0.509, "step": 15281 }, { "epoch": 0.324107654132468, "grad_norm": 0.3278665244579315, "learning_rate": 1.874554846971358e-05, "loss": 0.5099, "step": 15282 }, { "epoch": 0.3241288625904011, "grad_norm": 0.3445322811603546, "learning_rate": 1.8745386744092325e-05, "loss": 0.4946, "step": 15283 }, { "epoch": 0.3241500710483341, "grad_norm": 0.3434787094593048, "learning_rate": 1.874522500874453e-05, "loss": 0.5183, "step": 15284 }, { "epoch": 0.3241712795062671, "grad_norm": 0.3542397916316986, "learning_rate": 1.874506326367037e-05, "loss": 0.5561, "step": 15285 }, { "epoch": 0.32419248796420014, "grad_norm": 0.34784919023513794, "learning_rate": 1.874490150887003e-05, "loss": 0.6049, "step": 15286 }, { "epoch": 0.32421369642213316, "grad_norm": 0.3131870925426483, "learning_rate": 1.874473974434368e-05, "loss": 0.4738, "step": 15287 }, { "epoch": 0.3242349048800662, "grad_norm": 0.45200565457344055, "learning_rate": 1.8744577970091507e-05, "loss": 0.5691, "step": 15288 }, { "epoch": 0.3242561133379992, "grad_norm": 0.308637410402298, "learning_rate": 1.8744416186113698e-05, "loss": 0.5243, "step": 15289 }, { "epoch": 0.3242773217959322, "grad_norm": 0.3332352042198181, "learning_rate": 1.874425439241042e-05, "loss": 0.4694, "step": 15290 }, { "epoch": 0.32429853025386524, "grad_norm": 0.45892515778541565, "learning_rate": 1.8744092588981856e-05, "loss": 0.6086, "step": 15291 }, { "epoch": 0.32431973871179826, "grad_norm": 0.43702641129493713, "learning_rate": 1.874393077582819e-05, "loss": 0.5, "step": 15292 }, { "epoch": 0.3243409471697313, "grad_norm": 0.3307531774044037, "learning_rate": 1.8743768952949604e-05, "loss": 0.4512, "step": 15293 }, { "epoch": 0.3243621556276643, "grad_norm": 0.33349525928497314, "learning_rate": 1.874360712034627e-05, "loss": 0.494, "step": 15294 }, { "epoch": 0.3243833640855973, "grad_norm": 0.3327687680721283, "learning_rate": 1.8743445278018372e-05, "loss": 0.4142, "step": 15295 }, { "epoch": 0.32440457254353033, "grad_norm": 0.31738612055778503, "learning_rate": 1.8743283425966094e-05, "loss": 0.5097, "step": 15296 }, { "epoch": 0.3244257810014634, "grad_norm": 0.34199023246765137, "learning_rate": 1.874312156418961e-05, "loss": 0.519, "step": 15297 }, { "epoch": 0.32444698945939643, "grad_norm": 0.7791659832000732, "learning_rate": 1.87429596926891e-05, "loss": 0.585, "step": 15298 }, { "epoch": 0.32446819791732945, "grad_norm": 0.34172555804252625, "learning_rate": 1.8742797811464748e-05, "loss": 0.5487, "step": 15299 }, { "epoch": 0.32448940637526247, "grad_norm": 0.38139617443084717, "learning_rate": 1.8742635920516733e-05, "loss": 0.4896, "step": 15300 }, { "epoch": 0.3245106148331955, "grad_norm": 0.33405008912086487, "learning_rate": 1.874247401984523e-05, "loss": 0.5511, "step": 15301 }, { "epoch": 0.3245318232911285, "grad_norm": 0.3645188510417938, "learning_rate": 1.8742312109450427e-05, "loss": 0.4683, "step": 15302 }, { "epoch": 0.3245530317490615, "grad_norm": 0.3489912152290344, "learning_rate": 1.87421501893325e-05, "loss": 0.4997, "step": 15303 }, { "epoch": 0.32457424020699455, "grad_norm": 0.32515284419059753, "learning_rate": 1.874198825949163e-05, "loss": 0.4785, "step": 15304 }, { "epoch": 0.32459544866492757, "grad_norm": 0.3750901222229004, "learning_rate": 1.8741826319927995e-05, "loss": 0.5176, "step": 15305 }, { "epoch": 0.3246166571228606, "grad_norm": 0.3277263939380646, "learning_rate": 1.8741664370641775e-05, "loss": 0.5339, "step": 15306 }, { "epoch": 0.3246378655807936, "grad_norm": 0.3268624544143677, "learning_rate": 1.874150241163316e-05, "loss": 0.5235, "step": 15307 }, { "epoch": 0.3246590740387266, "grad_norm": 0.3720344603061676, "learning_rate": 1.8741340442902312e-05, "loss": 0.4842, "step": 15308 }, { "epoch": 0.32468028249665964, "grad_norm": 0.4332703649997711, "learning_rate": 1.8741178464449428e-05, "loss": 0.4871, "step": 15309 }, { "epoch": 0.32470149095459266, "grad_norm": 0.667585015296936, "learning_rate": 1.874101647627468e-05, "loss": 0.5133, "step": 15310 }, { "epoch": 0.32472269941252574, "grad_norm": 0.32904112339019775, "learning_rate": 1.874085447837825e-05, "loss": 0.4746, "step": 15311 }, { "epoch": 0.32474390787045876, "grad_norm": 0.35967838764190674, "learning_rate": 1.8740692470760312e-05, "loss": 0.5032, "step": 15312 }, { "epoch": 0.3247651163283918, "grad_norm": 0.3623490333557129, "learning_rate": 1.8740530453421056e-05, "loss": 0.5264, "step": 15313 }, { "epoch": 0.3247863247863248, "grad_norm": 0.30707770586013794, "learning_rate": 1.8740368426360658e-05, "loss": 0.3789, "step": 15314 }, { "epoch": 0.3248075332442578, "grad_norm": 0.3429834544658661, "learning_rate": 1.87402063895793e-05, "loss": 0.5679, "step": 15315 }, { "epoch": 0.32482874170219084, "grad_norm": 0.3299559950828552, "learning_rate": 1.8740044343077153e-05, "loss": 0.5131, "step": 15316 }, { "epoch": 0.32484995016012386, "grad_norm": 0.3335150480270386, "learning_rate": 1.8739882286854413e-05, "loss": 0.4442, "step": 15317 }, { "epoch": 0.3248711586180569, "grad_norm": 0.4911975562572479, "learning_rate": 1.873972022091125e-05, "loss": 0.5665, "step": 15318 }, { "epoch": 0.3248923670759899, "grad_norm": 0.3338552415370941, "learning_rate": 1.8739558145247842e-05, "loss": 0.4106, "step": 15319 }, { "epoch": 0.3249135755339229, "grad_norm": 0.33228370547294617, "learning_rate": 1.8739396059864377e-05, "loss": 0.4704, "step": 15320 }, { "epoch": 0.32493478399185594, "grad_norm": 0.31668782234191895, "learning_rate": 1.8739233964761032e-05, "loss": 0.4116, "step": 15321 }, { "epoch": 0.32495599244978896, "grad_norm": 0.395893931388855, "learning_rate": 1.8739071859937984e-05, "loss": 0.5249, "step": 15322 }, { "epoch": 0.324977200907722, "grad_norm": 0.39868584275245667, "learning_rate": 1.873890974539542e-05, "loss": 0.4668, "step": 15323 }, { "epoch": 0.32499840936565505, "grad_norm": 0.36269375681877136, "learning_rate": 1.8738747621133515e-05, "loss": 0.5108, "step": 15324 }, { "epoch": 0.32501961782358807, "grad_norm": 0.3291354179382324, "learning_rate": 1.873858548715245e-05, "loss": 0.4694, "step": 15325 }, { "epoch": 0.3250408262815211, "grad_norm": 0.3811652362346649, "learning_rate": 1.87384233434524e-05, "loss": 0.526, "step": 15326 }, { "epoch": 0.3250620347394541, "grad_norm": 0.32176539301872253, "learning_rate": 1.8738261190033563e-05, "loss": 0.5567, "step": 15327 }, { "epoch": 0.32508324319738713, "grad_norm": 0.37516093254089355, "learning_rate": 1.87380990268961e-05, "loss": 0.5184, "step": 15328 }, { "epoch": 0.32510445165532015, "grad_norm": 0.3131962716579437, "learning_rate": 1.8737936854040204e-05, "loss": 0.49, "step": 15329 }, { "epoch": 0.32512566011325317, "grad_norm": 0.37453189492225647, "learning_rate": 1.8737774671466047e-05, "loss": 0.522, "step": 15330 }, { "epoch": 0.3251468685711862, "grad_norm": 0.32429254055023193, "learning_rate": 1.8737612479173814e-05, "loss": 0.4967, "step": 15331 }, { "epoch": 0.3251680770291192, "grad_norm": 0.3641565442085266, "learning_rate": 1.8737450277163686e-05, "loss": 0.4768, "step": 15332 }, { "epoch": 0.3251892854870522, "grad_norm": 0.35819724202156067, "learning_rate": 1.8737288065435838e-05, "loss": 0.5505, "step": 15333 }, { "epoch": 0.32521049394498525, "grad_norm": 0.3800778090953827, "learning_rate": 1.8737125843990455e-05, "loss": 0.5798, "step": 15334 }, { "epoch": 0.32523170240291827, "grad_norm": 0.3811907470226288, "learning_rate": 1.873696361282772e-05, "loss": 0.583, "step": 15335 }, { "epoch": 0.3252529108608513, "grad_norm": 0.3274528682231903, "learning_rate": 1.873680137194781e-05, "loss": 0.542, "step": 15336 }, { "epoch": 0.3252741193187843, "grad_norm": 0.3958398997783661, "learning_rate": 1.87366391213509e-05, "loss": 0.4874, "step": 15337 }, { "epoch": 0.3252953277767174, "grad_norm": 0.31766751408576965, "learning_rate": 1.873647686103718e-05, "loss": 0.4854, "step": 15338 }, { "epoch": 0.3253165362346504, "grad_norm": 0.32342439889907837, "learning_rate": 1.8736314591006824e-05, "loss": 0.4563, "step": 15339 }, { "epoch": 0.3253377446925834, "grad_norm": 0.3205049932003021, "learning_rate": 1.8736152311260018e-05, "loss": 0.447, "step": 15340 }, { "epoch": 0.32535895315051644, "grad_norm": 0.4280956983566284, "learning_rate": 1.8735990021796937e-05, "loss": 0.5039, "step": 15341 }, { "epoch": 0.32538016160844946, "grad_norm": 0.4092792272567749, "learning_rate": 1.8735827722617765e-05, "loss": 0.5034, "step": 15342 }, { "epoch": 0.3254013700663825, "grad_norm": 0.3521115183830261, "learning_rate": 1.8735665413722677e-05, "loss": 0.4831, "step": 15343 }, { "epoch": 0.3254225785243155, "grad_norm": 0.3189781606197357, "learning_rate": 1.8735503095111865e-05, "loss": 0.534, "step": 15344 }, { "epoch": 0.3254437869822485, "grad_norm": 0.3409486413002014, "learning_rate": 1.8735340766785496e-05, "loss": 0.5188, "step": 15345 }, { "epoch": 0.32546499544018154, "grad_norm": 0.34786471724510193, "learning_rate": 1.8735178428743764e-05, "loss": 0.4923, "step": 15346 }, { "epoch": 0.32548620389811456, "grad_norm": 0.34215688705444336, "learning_rate": 1.8735016080986836e-05, "loss": 0.5156, "step": 15347 }, { "epoch": 0.3255074123560476, "grad_norm": 0.33845388889312744, "learning_rate": 1.87348537235149e-05, "loss": 0.4826, "step": 15348 }, { "epoch": 0.3255286208139806, "grad_norm": 0.43003714084625244, "learning_rate": 1.8734691356328137e-05, "loss": 0.496, "step": 15349 }, { "epoch": 0.3255498292719136, "grad_norm": 0.4945802092552185, "learning_rate": 1.8734528979426726e-05, "loss": 0.5739, "step": 15350 }, { "epoch": 0.32557103772984664, "grad_norm": 0.31152963638305664, "learning_rate": 1.873436659281085e-05, "loss": 0.4876, "step": 15351 }, { "epoch": 0.3255922461877797, "grad_norm": 0.37079358100891113, "learning_rate": 1.873420419648068e-05, "loss": 0.481, "step": 15352 }, { "epoch": 0.32561345464571273, "grad_norm": 0.49143186211586, "learning_rate": 1.8734041790436414e-05, "loss": 0.5795, "step": 15353 }, { "epoch": 0.32563466310364575, "grad_norm": 0.31726768612861633, "learning_rate": 1.8733879374678214e-05, "loss": 0.5011, "step": 15354 }, { "epoch": 0.32565587156157877, "grad_norm": 0.3225482404232025, "learning_rate": 1.8733716949206273e-05, "loss": 0.4747, "step": 15355 }, { "epoch": 0.3256770800195118, "grad_norm": 0.3583533465862274, "learning_rate": 1.873355451402077e-05, "loss": 0.4617, "step": 15356 }, { "epoch": 0.3256982884774448, "grad_norm": 0.38432779908180237, "learning_rate": 1.8733392069121877e-05, "loss": 0.5301, "step": 15357 }, { "epoch": 0.32571949693537783, "grad_norm": 0.32983025908470154, "learning_rate": 1.8733229614509787e-05, "loss": 0.5625, "step": 15358 }, { "epoch": 0.32574070539331085, "grad_norm": 0.3864728808403015, "learning_rate": 1.8733067150184673e-05, "loss": 0.5408, "step": 15359 }, { "epoch": 0.32576191385124387, "grad_norm": 0.364852637052536, "learning_rate": 1.873290467614672e-05, "loss": 0.4059, "step": 15360 }, { "epoch": 0.3257831223091769, "grad_norm": 0.35997527837753296, "learning_rate": 1.87327421923961e-05, "loss": 0.501, "step": 15361 }, { "epoch": 0.3258043307671099, "grad_norm": 0.35124266147613525, "learning_rate": 1.8732579698933007e-05, "loss": 0.4615, "step": 15362 }, { "epoch": 0.3258255392250429, "grad_norm": 0.3773459196090698, "learning_rate": 1.8732417195757612e-05, "loss": 0.5075, "step": 15363 }, { "epoch": 0.32584674768297595, "grad_norm": 0.3005220293998718, "learning_rate": 1.8732254682870096e-05, "loss": 0.4417, "step": 15364 }, { "epoch": 0.325867956140909, "grad_norm": 0.3268164098262787, "learning_rate": 1.8732092160270646e-05, "loss": 0.4974, "step": 15365 }, { "epoch": 0.32588916459884204, "grad_norm": 0.36390814185142517, "learning_rate": 1.8731929627959434e-05, "loss": 0.4587, "step": 15366 }, { "epoch": 0.32591037305677506, "grad_norm": 0.36372116208076477, "learning_rate": 1.873176708593665e-05, "loss": 0.5698, "step": 15367 }, { "epoch": 0.3259315815147081, "grad_norm": 0.3372388780117035, "learning_rate": 1.8731604534202473e-05, "loss": 0.4503, "step": 15368 }, { "epoch": 0.3259527899726411, "grad_norm": 0.3357803523540497, "learning_rate": 1.8731441972757076e-05, "loss": 0.5187, "step": 15369 }, { "epoch": 0.3259739984305741, "grad_norm": 0.3377930819988251, "learning_rate": 1.873127940160065e-05, "loss": 0.4767, "step": 15370 }, { "epoch": 0.32599520688850714, "grad_norm": 0.3216579556465149, "learning_rate": 1.8731116820733365e-05, "loss": 0.4457, "step": 15371 }, { "epoch": 0.32601641534644016, "grad_norm": 0.3625694513320923, "learning_rate": 1.873095423015541e-05, "loss": 0.5225, "step": 15372 }, { "epoch": 0.3260376238043732, "grad_norm": 0.38668695092201233, "learning_rate": 1.8730791629866963e-05, "loss": 0.584, "step": 15373 }, { "epoch": 0.3260588322623062, "grad_norm": 0.2930186986923218, "learning_rate": 1.8730629019868207e-05, "loss": 0.47, "step": 15374 }, { "epoch": 0.3260800407202392, "grad_norm": 0.3211124837398529, "learning_rate": 1.8730466400159323e-05, "loss": 0.5358, "step": 15375 }, { "epoch": 0.32610124917817224, "grad_norm": 0.3360491394996643, "learning_rate": 1.8730303770740487e-05, "loss": 0.4318, "step": 15376 }, { "epoch": 0.32612245763610526, "grad_norm": 0.40369394421577454, "learning_rate": 1.8730141131611882e-05, "loss": 0.4801, "step": 15377 }, { "epoch": 0.3261436660940383, "grad_norm": 0.4469979405403137, "learning_rate": 1.872997848277369e-05, "loss": 0.5431, "step": 15378 }, { "epoch": 0.32616487455197135, "grad_norm": 0.36043137311935425, "learning_rate": 1.8729815824226096e-05, "loss": 0.5729, "step": 15379 }, { "epoch": 0.32618608300990437, "grad_norm": 0.3298783302307129, "learning_rate": 1.8729653155969275e-05, "loss": 0.5304, "step": 15380 }, { "epoch": 0.3262072914678374, "grad_norm": 0.3484845757484436, "learning_rate": 1.872949047800341e-05, "loss": 0.497, "step": 15381 }, { "epoch": 0.3262284999257704, "grad_norm": 0.34264740347862244, "learning_rate": 1.8729327790328677e-05, "loss": 0.5305, "step": 15382 }, { "epoch": 0.32624970838370343, "grad_norm": 0.39663854241371155, "learning_rate": 1.872916509294527e-05, "loss": 0.4589, "step": 15383 }, { "epoch": 0.32627091684163645, "grad_norm": 0.44961902499198914, "learning_rate": 1.8729002385853352e-05, "loss": 0.5909, "step": 15384 }, { "epoch": 0.32629212529956947, "grad_norm": 0.3505796790122986, "learning_rate": 1.8728839669053117e-05, "loss": 0.5162, "step": 15385 }, { "epoch": 0.3263133337575025, "grad_norm": 0.33387118577957153, "learning_rate": 1.8728676942544745e-05, "loss": 0.5879, "step": 15386 }, { "epoch": 0.3263345422154355, "grad_norm": 0.42526182532310486, "learning_rate": 1.872851420632841e-05, "loss": 0.5679, "step": 15387 }, { "epoch": 0.3263557506733685, "grad_norm": 0.3406195342540741, "learning_rate": 1.87283514604043e-05, "loss": 0.4797, "step": 15388 }, { "epoch": 0.32637695913130155, "grad_norm": 0.3234194815158844, "learning_rate": 1.8728188704772593e-05, "loss": 0.4873, "step": 15389 }, { "epoch": 0.32639816758923457, "grad_norm": 0.5304234027862549, "learning_rate": 1.872802593943347e-05, "loss": 0.5173, "step": 15390 }, { "epoch": 0.3264193760471676, "grad_norm": 0.3509116768836975, "learning_rate": 1.8727863164387116e-05, "loss": 0.6761, "step": 15391 }, { "epoch": 0.32644058450510066, "grad_norm": 0.38833343982696533, "learning_rate": 1.8727700379633703e-05, "loss": 0.5622, "step": 15392 }, { "epoch": 0.3264617929630337, "grad_norm": 0.3412812650203705, "learning_rate": 1.8727537585173417e-05, "loss": 0.4966, "step": 15393 }, { "epoch": 0.3264830014209667, "grad_norm": 0.29547443985939026, "learning_rate": 1.8727374781006443e-05, "loss": 0.4348, "step": 15394 }, { "epoch": 0.3265042098788997, "grad_norm": 0.35047125816345215, "learning_rate": 1.872721196713296e-05, "loss": 0.4794, "step": 15395 }, { "epoch": 0.32652541833683274, "grad_norm": 0.3483443260192871, "learning_rate": 1.8727049143553144e-05, "loss": 0.5411, "step": 15396 }, { "epoch": 0.32654662679476576, "grad_norm": 0.36875078082084656, "learning_rate": 1.872688631026718e-05, "loss": 0.5446, "step": 15397 }, { "epoch": 0.3265678352526988, "grad_norm": 0.3609996438026428, "learning_rate": 1.872672346727525e-05, "loss": 0.5245, "step": 15398 }, { "epoch": 0.3265890437106318, "grad_norm": 0.31025415658950806, "learning_rate": 1.8726560614577533e-05, "loss": 0.4322, "step": 15399 }, { "epoch": 0.3266102521685648, "grad_norm": 0.34660184383392334, "learning_rate": 1.8726397752174215e-05, "loss": 0.4474, "step": 15400 }, { "epoch": 0.32663146062649784, "grad_norm": 0.35594314336776733, "learning_rate": 1.8726234880065467e-05, "loss": 0.4953, "step": 15401 }, { "epoch": 0.32665266908443086, "grad_norm": 0.3388861119747162, "learning_rate": 1.872607199825148e-05, "loss": 0.5674, "step": 15402 }, { "epoch": 0.3266738775423639, "grad_norm": 0.3509675860404968, "learning_rate": 1.872590910673243e-05, "loss": 0.5456, "step": 15403 }, { "epoch": 0.3266950860002969, "grad_norm": 0.3460319936275482, "learning_rate": 1.87257462055085e-05, "loss": 0.5543, "step": 15404 }, { "epoch": 0.3267162944582299, "grad_norm": 0.3623066842556, "learning_rate": 1.8725583294579874e-05, "loss": 0.5192, "step": 15405 }, { "epoch": 0.326737502916163, "grad_norm": 0.3309136927127838, "learning_rate": 1.8725420373946727e-05, "loss": 0.5188, "step": 15406 }, { "epoch": 0.326758711374096, "grad_norm": 0.3941529095172882, "learning_rate": 1.8725257443609245e-05, "loss": 0.5443, "step": 15407 }, { "epoch": 0.32677991983202903, "grad_norm": 0.3811119496822357, "learning_rate": 1.8725094503567604e-05, "loss": 0.4669, "step": 15408 }, { "epoch": 0.32680112828996205, "grad_norm": 0.3687022924423218, "learning_rate": 1.8724931553821992e-05, "loss": 0.5196, "step": 15409 }, { "epoch": 0.32682233674789507, "grad_norm": 0.33870574831962585, "learning_rate": 1.8724768594372587e-05, "loss": 0.4744, "step": 15410 }, { "epoch": 0.3268435452058281, "grad_norm": 0.3564106822013855, "learning_rate": 1.872460562521957e-05, "loss": 0.5927, "step": 15411 }, { "epoch": 0.3268647536637611, "grad_norm": 0.3247068226337433, "learning_rate": 1.872444264636312e-05, "loss": 0.4689, "step": 15412 }, { "epoch": 0.32688596212169413, "grad_norm": 0.3079060912132263, "learning_rate": 1.872427965780342e-05, "loss": 0.4544, "step": 15413 }, { "epoch": 0.32690717057962715, "grad_norm": 0.31935441493988037, "learning_rate": 1.872411665954066e-05, "loss": 0.5477, "step": 15414 }, { "epoch": 0.32692837903756017, "grad_norm": 0.3210993707180023, "learning_rate": 1.8723953651575003e-05, "loss": 0.4947, "step": 15415 }, { "epoch": 0.3269495874954932, "grad_norm": 0.31003281474113464, "learning_rate": 1.8723790633906645e-05, "loss": 0.4763, "step": 15416 }, { "epoch": 0.3269707959534262, "grad_norm": 0.3678622543811798, "learning_rate": 1.8723627606535765e-05, "loss": 0.4688, "step": 15417 }, { "epoch": 0.3269920044113592, "grad_norm": 0.32861483097076416, "learning_rate": 1.872346456946254e-05, "loss": 0.5203, "step": 15418 }, { "epoch": 0.32701321286929225, "grad_norm": 0.5501909852027893, "learning_rate": 1.8723301522687152e-05, "loss": 0.5545, "step": 15419 }, { "epoch": 0.3270344213272253, "grad_norm": 0.34660571813583374, "learning_rate": 1.8723138466209785e-05, "loss": 0.5394, "step": 15420 }, { "epoch": 0.32705562978515834, "grad_norm": 0.3185959756374359, "learning_rate": 1.872297540003062e-05, "loss": 0.5372, "step": 15421 }, { "epoch": 0.32707683824309136, "grad_norm": 0.3505292236804962, "learning_rate": 1.872281232414984e-05, "loss": 0.4882, "step": 15422 }, { "epoch": 0.3270980467010244, "grad_norm": 0.37198406457901, "learning_rate": 1.872264923856762e-05, "loss": 0.5436, "step": 15423 }, { "epoch": 0.3271192551589574, "grad_norm": 0.37342971563339233, "learning_rate": 1.8722486143284145e-05, "loss": 0.6034, "step": 15424 }, { "epoch": 0.3271404636168904, "grad_norm": 0.38330891728401184, "learning_rate": 1.8722323038299596e-05, "loss": 0.4839, "step": 15425 }, { "epoch": 0.32716167207482344, "grad_norm": 0.3107757568359375, "learning_rate": 1.8722159923614158e-05, "loss": 0.4726, "step": 15426 }, { "epoch": 0.32718288053275646, "grad_norm": 0.3726630210876465, "learning_rate": 1.8721996799228007e-05, "loss": 0.5139, "step": 15427 }, { "epoch": 0.3272040889906895, "grad_norm": 0.35987675189971924, "learning_rate": 1.872183366514133e-05, "loss": 0.4545, "step": 15428 }, { "epoch": 0.3272252974486225, "grad_norm": 0.5015622973442078, "learning_rate": 1.8721670521354304e-05, "loss": 0.4727, "step": 15429 }, { "epoch": 0.3272465059065555, "grad_norm": 0.33281490206718445, "learning_rate": 1.872150736786711e-05, "loss": 0.4955, "step": 15430 }, { "epoch": 0.32726771436448854, "grad_norm": 0.37181493639945984, "learning_rate": 1.8721344204679935e-05, "loss": 0.5253, "step": 15431 }, { "epoch": 0.32728892282242156, "grad_norm": 0.3421989679336548, "learning_rate": 1.8721181031792956e-05, "loss": 0.4712, "step": 15432 }, { "epoch": 0.32731013128035463, "grad_norm": 0.32473957538604736, "learning_rate": 1.872101784920635e-05, "loss": 0.4198, "step": 15433 }, { "epoch": 0.32733133973828765, "grad_norm": 0.3326236307621002, "learning_rate": 1.8720854656920312e-05, "loss": 0.5, "step": 15434 }, { "epoch": 0.32735254819622067, "grad_norm": 0.34958893060684204, "learning_rate": 1.8720691454935007e-05, "loss": 0.4399, "step": 15435 }, { "epoch": 0.3273737566541537, "grad_norm": 0.32264164090156555, "learning_rate": 1.872052824325063e-05, "loss": 0.5409, "step": 15436 }, { "epoch": 0.3273949651120867, "grad_norm": 0.33123546838760376, "learning_rate": 1.8720365021867354e-05, "loss": 0.5257, "step": 15437 }, { "epoch": 0.32741617357001973, "grad_norm": 0.45960789918899536, "learning_rate": 1.8720201790785365e-05, "loss": 0.5264, "step": 15438 }, { "epoch": 0.32743738202795275, "grad_norm": 0.31496986746788025, "learning_rate": 1.8720038550004843e-05, "loss": 0.5466, "step": 15439 }, { "epoch": 0.32745859048588577, "grad_norm": 0.356123149394989, "learning_rate": 1.8719875299525973e-05, "loss": 0.6032, "step": 15440 }, { "epoch": 0.3274797989438188, "grad_norm": 0.5703307390213013, "learning_rate": 1.8719712039348928e-05, "loss": 0.5495, "step": 15441 }, { "epoch": 0.3275010074017518, "grad_norm": 0.35665419697761536, "learning_rate": 1.87195487694739e-05, "loss": 0.5243, "step": 15442 }, { "epoch": 0.32752221585968483, "grad_norm": 0.4021994471549988, "learning_rate": 1.8719385489901066e-05, "loss": 0.5412, "step": 15443 }, { "epoch": 0.32754342431761785, "grad_norm": 0.31888914108276367, "learning_rate": 1.8719222200630605e-05, "loss": 0.4686, "step": 15444 }, { "epoch": 0.32756463277555087, "grad_norm": 0.37331265211105347, "learning_rate": 1.87190589016627e-05, "loss": 0.5753, "step": 15445 }, { "epoch": 0.3275858412334839, "grad_norm": 0.37248268723487854, "learning_rate": 1.8718895592997532e-05, "loss": 0.4909, "step": 15446 }, { "epoch": 0.32760704969141696, "grad_norm": 0.3255220055580139, "learning_rate": 1.8718732274635287e-05, "loss": 0.4518, "step": 15447 }, { "epoch": 0.32762825814935, "grad_norm": 0.33814939856529236, "learning_rate": 1.8718568946576144e-05, "loss": 0.5104, "step": 15448 }, { "epoch": 0.327649466607283, "grad_norm": 0.3609732985496521, "learning_rate": 1.8718405608820283e-05, "loss": 0.5276, "step": 15449 }, { "epoch": 0.327670675065216, "grad_norm": 0.34537339210510254, "learning_rate": 1.8718242261367887e-05, "loss": 0.4969, "step": 15450 }, { "epoch": 0.32769188352314904, "grad_norm": 0.38115617632865906, "learning_rate": 1.8718078904219136e-05, "loss": 0.4821, "step": 15451 }, { "epoch": 0.32771309198108206, "grad_norm": 0.3582843244075775, "learning_rate": 1.871791553737422e-05, "loss": 0.4632, "step": 15452 }, { "epoch": 0.3277343004390151, "grad_norm": 0.3625064790248871, "learning_rate": 1.8717752160833306e-05, "loss": 0.5148, "step": 15453 }, { "epoch": 0.3277555088969481, "grad_norm": 0.3192813992500305, "learning_rate": 1.8717588774596586e-05, "loss": 0.4846, "step": 15454 }, { "epoch": 0.3277767173548811, "grad_norm": 0.36124908924102783, "learning_rate": 1.8717425378664242e-05, "loss": 0.5644, "step": 15455 }, { "epoch": 0.32779792581281414, "grad_norm": 0.3490932285785675, "learning_rate": 1.8717261973036455e-05, "loss": 0.4714, "step": 15456 }, { "epoch": 0.32781913427074716, "grad_norm": 0.3874531090259552, "learning_rate": 1.8717098557713396e-05, "loss": 0.5087, "step": 15457 }, { "epoch": 0.3278403427286802, "grad_norm": 0.37226635217666626, "learning_rate": 1.8716935132695266e-05, "loss": 0.4834, "step": 15458 }, { "epoch": 0.3278615511866132, "grad_norm": 0.33772316575050354, "learning_rate": 1.871677169798223e-05, "loss": 0.5296, "step": 15459 }, { "epoch": 0.3278827596445462, "grad_norm": 0.3137437701225281, "learning_rate": 1.871660825357448e-05, "loss": 0.5533, "step": 15460 }, { "epoch": 0.3279039681024793, "grad_norm": 0.3371139466762543, "learning_rate": 1.8716444799472193e-05, "loss": 0.5731, "step": 15461 }, { "epoch": 0.3279251765604123, "grad_norm": 0.3256511986255646, "learning_rate": 1.871628133567555e-05, "loss": 0.5025, "step": 15462 }, { "epoch": 0.32794638501834533, "grad_norm": 0.36567142605781555, "learning_rate": 1.871611786218474e-05, "loss": 0.5319, "step": 15463 }, { "epoch": 0.32796759347627835, "grad_norm": 0.3660370409488678, "learning_rate": 1.871595437899993e-05, "loss": 0.5183, "step": 15464 }, { "epoch": 0.32798880193421137, "grad_norm": 0.34129735827445984, "learning_rate": 1.8715790886121316e-05, "loss": 0.5115, "step": 15465 }, { "epoch": 0.3280100103921444, "grad_norm": 0.3175702393054962, "learning_rate": 1.8715627383549076e-05, "loss": 0.4914, "step": 15466 }, { "epoch": 0.3280312188500774, "grad_norm": 0.3612567186355591, "learning_rate": 1.871546387128339e-05, "loss": 0.4942, "step": 15467 }, { "epoch": 0.32805242730801043, "grad_norm": 0.33707720041275024, "learning_rate": 1.8715300349324445e-05, "loss": 0.504, "step": 15468 }, { "epoch": 0.32807363576594345, "grad_norm": 0.35069382190704346, "learning_rate": 1.8715136817672412e-05, "loss": 0.4686, "step": 15469 }, { "epoch": 0.32809484422387647, "grad_norm": 0.31936296820640564, "learning_rate": 1.8714973276327483e-05, "loss": 0.5075, "step": 15470 }, { "epoch": 0.3281160526818095, "grad_norm": 0.3555285632610321, "learning_rate": 1.8714809725289837e-05, "loss": 0.4476, "step": 15471 }, { "epoch": 0.3281372611397425, "grad_norm": 0.3548116385936737, "learning_rate": 1.8714646164559655e-05, "loss": 0.4461, "step": 15472 }, { "epoch": 0.32815846959767553, "grad_norm": 0.3337320387363434, "learning_rate": 1.8714482594137115e-05, "loss": 0.4907, "step": 15473 }, { "epoch": 0.3281796780556086, "grad_norm": 0.3858185112476349, "learning_rate": 1.8714319014022407e-05, "loss": 0.5516, "step": 15474 }, { "epoch": 0.3282008865135416, "grad_norm": 0.386852890253067, "learning_rate": 1.871415542421571e-05, "loss": 0.5558, "step": 15475 }, { "epoch": 0.32822209497147464, "grad_norm": 0.33327117562294006, "learning_rate": 1.8713991824717203e-05, "loss": 0.5485, "step": 15476 }, { "epoch": 0.32824330342940766, "grad_norm": 0.33793357014656067, "learning_rate": 1.8713828215527073e-05, "loss": 0.4606, "step": 15477 }, { "epoch": 0.3282645118873407, "grad_norm": 0.3031880855560303, "learning_rate": 1.8713664596645497e-05, "loss": 0.4721, "step": 15478 }, { "epoch": 0.3282857203452737, "grad_norm": 0.3558495044708252, "learning_rate": 1.8713500968072657e-05, "loss": 0.468, "step": 15479 }, { "epoch": 0.3283069288032067, "grad_norm": 0.44676896929740906, "learning_rate": 1.8713337329808738e-05, "loss": 0.576, "step": 15480 }, { "epoch": 0.32832813726113974, "grad_norm": 0.63784259557724, "learning_rate": 1.871317368185392e-05, "loss": 0.5017, "step": 15481 }, { "epoch": 0.32834934571907276, "grad_norm": 0.3167605698108673, "learning_rate": 1.871301002420839e-05, "loss": 0.5177, "step": 15482 }, { "epoch": 0.3283705541770058, "grad_norm": 0.32476574182510376, "learning_rate": 1.8712846356872322e-05, "loss": 0.5447, "step": 15483 }, { "epoch": 0.3283917626349388, "grad_norm": 0.3100467622280121, "learning_rate": 1.8712682679845905e-05, "loss": 0.4891, "step": 15484 }, { "epoch": 0.3284129710928718, "grad_norm": 0.39327844977378845, "learning_rate": 1.8712518993129315e-05, "loss": 0.4769, "step": 15485 }, { "epoch": 0.32843417955080484, "grad_norm": 0.3520410358905792, "learning_rate": 1.871235529672274e-05, "loss": 0.439, "step": 15486 }, { "epoch": 0.32845538800873786, "grad_norm": 0.36583203077316284, "learning_rate": 1.8712191590626356e-05, "loss": 0.5437, "step": 15487 }, { "epoch": 0.32847659646667093, "grad_norm": 0.36485546827316284, "learning_rate": 1.871202787484035e-05, "loss": 0.5049, "step": 15488 }, { "epoch": 0.32849780492460395, "grad_norm": 0.30063024163246155, "learning_rate": 1.8711864149364903e-05, "loss": 0.3872, "step": 15489 }, { "epoch": 0.328519013382537, "grad_norm": 0.3259310722351074, "learning_rate": 1.8711700414200194e-05, "loss": 0.3958, "step": 15490 }, { "epoch": 0.32854022184047, "grad_norm": 0.35516357421875, "learning_rate": 1.871153666934641e-05, "loss": 0.5294, "step": 15491 }, { "epoch": 0.328561430298403, "grad_norm": 0.3320390582084656, "learning_rate": 1.871137291480373e-05, "loss": 0.5563, "step": 15492 }, { "epoch": 0.32858263875633603, "grad_norm": 0.3289867341518402, "learning_rate": 1.8711209150572337e-05, "loss": 0.4802, "step": 15493 }, { "epoch": 0.32860384721426905, "grad_norm": 0.31058308482170105, "learning_rate": 1.871104537665241e-05, "loss": 0.4708, "step": 15494 }, { "epoch": 0.32862505567220207, "grad_norm": 0.34964096546173096, "learning_rate": 1.8710881593044137e-05, "loss": 0.5116, "step": 15495 }, { "epoch": 0.3286462641301351, "grad_norm": 0.3829614520072937, "learning_rate": 1.8710717799747696e-05, "loss": 0.5462, "step": 15496 }, { "epoch": 0.3286674725880681, "grad_norm": 0.33592328429222107, "learning_rate": 1.871055399676327e-05, "loss": 0.4703, "step": 15497 }, { "epoch": 0.32868868104600113, "grad_norm": 0.3490123152732849, "learning_rate": 1.8710390184091043e-05, "loss": 0.553, "step": 15498 }, { "epoch": 0.32870988950393415, "grad_norm": 0.49920883774757385, "learning_rate": 1.8710226361731194e-05, "loss": 0.4929, "step": 15499 }, { "epoch": 0.32873109796186717, "grad_norm": 0.3729996085166931, "learning_rate": 1.8710062529683907e-05, "loss": 0.5545, "step": 15500 }, { "epoch": 0.3287523064198002, "grad_norm": 0.3486555218696594, "learning_rate": 1.8709898687949365e-05, "loss": 0.5009, "step": 15501 }, { "epoch": 0.32877351487773326, "grad_norm": 0.3421284258365631, "learning_rate": 1.870973483652775e-05, "loss": 0.5091, "step": 15502 }, { "epoch": 0.3287947233356663, "grad_norm": 0.4386492371559143, "learning_rate": 1.870957097541924e-05, "loss": 0.5324, "step": 15503 }, { "epoch": 0.3288159317935993, "grad_norm": 0.31201496720314026, "learning_rate": 1.8709407104624026e-05, "loss": 0.5135, "step": 15504 }, { "epoch": 0.3288371402515323, "grad_norm": 0.33408623933792114, "learning_rate": 1.870924322414228e-05, "loss": 0.434, "step": 15505 }, { "epoch": 0.32885834870946534, "grad_norm": 0.39623114466667175, "learning_rate": 1.8709079333974194e-05, "loss": 0.5474, "step": 15506 }, { "epoch": 0.32887955716739836, "grad_norm": 0.3253840506076813, "learning_rate": 1.8708915434119945e-05, "loss": 0.5313, "step": 15507 }, { "epoch": 0.3289007656253314, "grad_norm": 0.4377323389053345, "learning_rate": 1.8708751524579714e-05, "loss": 0.5691, "step": 15508 }, { "epoch": 0.3289219740832644, "grad_norm": 0.3760583698749542, "learning_rate": 1.8708587605353685e-05, "loss": 0.533, "step": 15509 }, { "epoch": 0.3289431825411974, "grad_norm": 0.42189982533454895, "learning_rate": 1.870842367644204e-05, "loss": 0.4792, "step": 15510 }, { "epoch": 0.32896439099913044, "grad_norm": 0.4537586271762848, "learning_rate": 1.8708259737844965e-05, "loss": 0.5946, "step": 15511 }, { "epoch": 0.32898559945706346, "grad_norm": 0.33288341760635376, "learning_rate": 1.8708095789562634e-05, "loss": 0.4381, "step": 15512 }, { "epoch": 0.3290068079149965, "grad_norm": 0.364614874124527, "learning_rate": 1.8707931831595242e-05, "loss": 0.4308, "step": 15513 }, { "epoch": 0.3290280163729295, "grad_norm": 0.3222218453884125, "learning_rate": 1.8707767863942958e-05, "loss": 0.4612, "step": 15514 }, { "epoch": 0.3290492248308626, "grad_norm": 0.3391931354999542, "learning_rate": 1.8707603886605973e-05, "loss": 0.5297, "step": 15515 }, { "epoch": 0.3290704332887956, "grad_norm": 0.3206750154495239, "learning_rate": 1.8707439899584467e-05, "loss": 0.4751, "step": 15516 }, { "epoch": 0.3290916417467286, "grad_norm": 0.3342782258987427, "learning_rate": 1.870727590287862e-05, "loss": 0.4757, "step": 15517 }, { "epoch": 0.32911285020466163, "grad_norm": 0.34079089760780334, "learning_rate": 1.8707111896488615e-05, "loss": 0.5278, "step": 15518 }, { "epoch": 0.32913405866259465, "grad_norm": 0.3067828416824341, "learning_rate": 1.8706947880414635e-05, "loss": 0.5002, "step": 15519 }, { "epoch": 0.3291552671205277, "grad_norm": 0.31119173765182495, "learning_rate": 1.870678385465687e-05, "loss": 0.4437, "step": 15520 }, { "epoch": 0.3291764755784607, "grad_norm": 0.3325352966785431, "learning_rate": 1.8706619819215487e-05, "loss": 0.4654, "step": 15521 }, { "epoch": 0.3291976840363937, "grad_norm": 0.33293989300727844, "learning_rate": 1.8706455774090683e-05, "loss": 0.4181, "step": 15522 }, { "epoch": 0.32921889249432673, "grad_norm": 0.34470635652542114, "learning_rate": 1.8706291719282635e-05, "loss": 0.573, "step": 15523 }, { "epoch": 0.32924010095225975, "grad_norm": 0.35085710883140564, "learning_rate": 1.8706127654791523e-05, "loss": 0.5228, "step": 15524 }, { "epoch": 0.32926130941019277, "grad_norm": 0.4149112403392792, "learning_rate": 1.870596358061753e-05, "loss": 0.5322, "step": 15525 }, { "epoch": 0.3292825178681258, "grad_norm": 0.2976658046245575, "learning_rate": 1.8705799496760843e-05, "loss": 0.5087, "step": 15526 }, { "epoch": 0.3293037263260588, "grad_norm": 0.2951149046421051, "learning_rate": 1.8705635403221637e-05, "loss": 0.4207, "step": 15527 }, { "epoch": 0.32932493478399183, "grad_norm": 0.4220399260520935, "learning_rate": 1.8705471300000103e-05, "loss": 0.5688, "step": 15528 }, { "epoch": 0.3293461432419249, "grad_norm": 0.38365888595581055, "learning_rate": 1.8705307187096416e-05, "loss": 0.5055, "step": 15529 }, { "epoch": 0.3293673516998579, "grad_norm": 0.34621211886405945, "learning_rate": 1.8705143064510767e-05, "loss": 0.5216, "step": 15530 }, { "epoch": 0.32938856015779094, "grad_norm": 0.3310956656932831, "learning_rate": 1.8704978932243328e-05, "loss": 0.4526, "step": 15531 }, { "epoch": 0.32940976861572396, "grad_norm": 0.34214097261428833, "learning_rate": 1.870481479029429e-05, "loss": 0.5272, "step": 15532 }, { "epoch": 0.329430977073657, "grad_norm": 0.35658127069473267, "learning_rate": 1.8704650638663832e-05, "loss": 0.5593, "step": 15533 }, { "epoch": 0.32945218553159, "grad_norm": 0.3348313271999359, "learning_rate": 1.8704486477352138e-05, "loss": 0.5045, "step": 15534 }, { "epoch": 0.329473393989523, "grad_norm": 0.39018628001213074, "learning_rate": 1.870432230635939e-05, "loss": 0.5354, "step": 15535 }, { "epoch": 0.32949460244745604, "grad_norm": 0.3377828896045685, "learning_rate": 1.870415812568577e-05, "loss": 0.5251, "step": 15536 }, { "epoch": 0.32951581090538906, "grad_norm": 0.364585280418396, "learning_rate": 1.870399393533146e-05, "loss": 0.5542, "step": 15537 }, { "epoch": 0.3295370193633221, "grad_norm": 0.36744198203086853, "learning_rate": 1.8703829735296645e-05, "loss": 0.5376, "step": 15538 }, { "epoch": 0.3295582278212551, "grad_norm": 0.3764069378376007, "learning_rate": 1.8703665525581504e-05, "loss": 0.5256, "step": 15539 }, { "epoch": 0.3295794362791881, "grad_norm": 0.42204639315605164, "learning_rate": 1.8703501306186226e-05, "loss": 0.4319, "step": 15540 }, { "epoch": 0.32960064473712114, "grad_norm": 0.435489684343338, "learning_rate": 1.8703337077110985e-05, "loss": 0.5162, "step": 15541 }, { "epoch": 0.3296218531950542, "grad_norm": 0.31638801097869873, "learning_rate": 1.870317283835597e-05, "loss": 0.5111, "step": 15542 }, { "epoch": 0.32964306165298723, "grad_norm": 0.30671653151512146, "learning_rate": 1.870300858992136e-05, "loss": 0.5552, "step": 15543 }, { "epoch": 0.32966427011092025, "grad_norm": 0.32411059737205505, "learning_rate": 1.8702844331807343e-05, "loss": 0.5367, "step": 15544 }, { "epoch": 0.3296854785688533, "grad_norm": 0.35209426283836365, "learning_rate": 1.8702680064014097e-05, "loss": 0.5458, "step": 15545 }, { "epoch": 0.3297066870267863, "grad_norm": 0.6312811374664307, "learning_rate": 1.8702515786541805e-05, "loss": 0.5686, "step": 15546 }, { "epoch": 0.3297278954847193, "grad_norm": 1.4692282676696777, "learning_rate": 1.8702351499390648e-05, "loss": 0.6611, "step": 15547 }, { "epoch": 0.32974910394265233, "grad_norm": 0.3617665767669678, "learning_rate": 1.8702187202560814e-05, "loss": 0.5794, "step": 15548 }, { "epoch": 0.32977031240058535, "grad_norm": 0.35121580958366394, "learning_rate": 1.8702022896052484e-05, "loss": 0.4827, "step": 15549 }, { "epoch": 0.32979152085851837, "grad_norm": 0.35274118185043335, "learning_rate": 1.8701858579865836e-05, "loss": 0.4993, "step": 15550 }, { "epoch": 0.3298127293164514, "grad_norm": 0.36615726351737976, "learning_rate": 1.870169425400106e-05, "loss": 0.5214, "step": 15551 }, { "epoch": 0.3298339377743844, "grad_norm": 0.3484419882297516, "learning_rate": 1.8701529918458333e-05, "loss": 0.4895, "step": 15552 }, { "epoch": 0.32985514623231743, "grad_norm": 0.345748633146286, "learning_rate": 1.8701365573237842e-05, "loss": 0.4912, "step": 15553 }, { "epoch": 0.32987635469025045, "grad_norm": 0.3372252285480499, "learning_rate": 1.8701201218339767e-05, "loss": 0.4998, "step": 15554 }, { "epoch": 0.32989756314818347, "grad_norm": 0.34891584515571594, "learning_rate": 1.870103685376429e-05, "loss": 0.5556, "step": 15555 }, { "epoch": 0.32991877160611655, "grad_norm": 0.3207956850528717, "learning_rate": 1.87008724795116e-05, "loss": 0.5476, "step": 15556 }, { "epoch": 0.32993998006404957, "grad_norm": 0.3332701325416565, "learning_rate": 1.8700708095581868e-05, "loss": 0.5448, "step": 15557 }, { "epoch": 0.3299611885219826, "grad_norm": 0.3585776388645172, "learning_rate": 1.8700543701975288e-05, "loss": 0.5662, "step": 15558 }, { "epoch": 0.3299823969799156, "grad_norm": 0.4371817111968994, "learning_rate": 1.870037929869204e-05, "loss": 0.5554, "step": 15559 }, { "epoch": 0.3300036054378486, "grad_norm": 0.36030977964401245, "learning_rate": 1.8700214885732303e-05, "loss": 0.5411, "step": 15560 }, { "epoch": 0.33002481389578164, "grad_norm": 0.3904927372932434, "learning_rate": 1.8700050463096267e-05, "loss": 0.5444, "step": 15561 }, { "epoch": 0.33004602235371466, "grad_norm": 0.3988293707370758, "learning_rate": 1.8699886030784107e-05, "loss": 0.563, "step": 15562 }, { "epoch": 0.3300672308116477, "grad_norm": 0.3237605690956116, "learning_rate": 1.869972158879601e-05, "loss": 0.5505, "step": 15563 }, { "epoch": 0.3300884392695807, "grad_norm": 0.33636483550071716, "learning_rate": 1.869955713713216e-05, "loss": 0.5141, "step": 15564 }, { "epoch": 0.3301096477275137, "grad_norm": 0.36363667249679565, "learning_rate": 1.8699392675792735e-05, "loss": 0.5371, "step": 15565 }, { "epoch": 0.33013085618544674, "grad_norm": 0.3547343611717224, "learning_rate": 1.8699228204777925e-05, "loss": 0.523, "step": 15566 }, { "epoch": 0.33015206464337976, "grad_norm": 0.30409926176071167, "learning_rate": 1.8699063724087905e-05, "loss": 0.424, "step": 15567 }, { "epoch": 0.3301732731013128, "grad_norm": 0.35621827840805054, "learning_rate": 1.8698899233722864e-05, "loss": 0.4434, "step": 15568 }, { "epoch": 0.3301944815592458, "grad_norm": 0.34594282507896423, "learning_rate": 1.8698734733682982e-05, "loss": 0.4752, "step": 15569 }, { "epoch": 0.3302156900171789, "grad_norm": 0.33473506569862366, "learning_rate": 1.8698570223968443e-05, "loss": 0.472, "step": 15570 }, { "epoch": 0.3302368984751119, "grad_norm": 0.32430246472358704, "learning_rate": 1.869840570457943e-05, "loss": 0.4655, "step": 15571 }, { "epoch": 0.3302581069330449, "grad_norm": 0.5038649439811707, "learning_rate": 1.8698241175516125e-05, "loss": 0.4523, "step": 15572 }, { "epoch": 0.33027931539097793, "grad_norm": 0.4002748727798462, "learning_rate": 1.8698076636778713e-05, "loss": 0.5612, "step": 15573 }, { "epoch": 0.33030052384891095, "grad_norm": 0.3667100667953491, "learning_rate": 1.8697912088367375e-05, "loss": 0.5976, "step": 15574 }, { "epoch": 0.330321732306844, "grad_norm": 0.3533385694026947, "learning_rate": 1.8697747530282294e-05, "loss": 0.5042, "step": 15575 }, { "epoch": 0.330342940764777, "grad_norm": 0.36253705620765686, "learning_rate": 1.8697582962523656e-05, "loss": 0.3866, "step": 15576 }, { "epoch": 0.33036414922271, "grad_norm": 0.31766650080680847, "learning_rate": 1.8697418385091638e-05, "loss": 0.5532, "step": 15577 }, { "epoch": 0.33038535768064303, "grad_norm": 0.3808611333370209, "learning_rate": 1.869725379798643e-05, "loss": 0.5508, "step": 15578 }, { "epoch": 0.33040656613857605, "grad_norm": 0.32125529646873474, "learning_rate": 1.869708920120821e-05, "loss": 0.5334, "step": 15579 }, { "epoch": 0.33042777459650907, "grad_norm": 0.33772704005241394, "learning_rate": 1.8696924594757163e-05, "loss": 0.4676, "step": 15580 }, { "epoch": 0.3304489830544421, "grad_norm": 0.31524336338043213, "learning_rate": 1.8696759978633473e-05, "loss": 0.4559, "step": 15581 }, { "epoch": 0.3304701915123751, "grad_norm": 0.35720914602279663, "learning_rate": 1.869659535283732e-05, "loss": 0.5776, "step": 15582 }, { "epoch": 0.3304913999703082, "grad_norm": 0.38584014773368835, "learning_rate": 1.8696430717368894e-05, "loss": 0.4214, "step": 15583 }, { "epoch": 0.3305126084282412, "grad_norm": 0.3382594883441925, "learning_rate": 1.869626607222837e-05, "loss": 0.4866, "step": 15584 }, { "epoch": 0.3305338168861742, "grad_norm": 0.3578680455684662, "learning_rate": 1.8696101417415934e-05, "loss": 0.4794, "step": 15585 }, { "epoch": 0.33055502534410725, "grad_norm": 0.3182416260242462, "learning_rate": 1.869593675293177e-05, "loss": 0.491, "step": 15586 }, { "epoch": 0.33057623380204026, "grad_norm": 0.34197163581848145, "learning_rate": 1.8695772078776065e-05, "loss": 0.4723, "step": 15587 }, { "epoch": 0.3305974422599733, "grad_norm": 0.3467932641506195, "learning_rate": 1.869560739494899e-05, "loss": 0.4645, "step": 15588 }, { "epoch": 0.3306186507179063, "grad_norm": 0.3577336370944977, "learning_rate": 1.8695442701450744e-05, "loss": 0.5652, "step": 15589 }, { "epoch": 0.3306398591758393, "grad_norm": 0.36997857689857483, "learning_rate": 1.8695277998281496e-05, "loss": 0.5466, "step": 15590 }, { "epoch": 0.33066106763377234, "grad_norm": 0.39887160062789917, "learning_rate": 1.869511328544144e-05, "loss": 0.5045, "step": 15591 }, { "epoch": 0.33068227609170536, "grad_norm": 0.34877803921699524, "learning_rate": 1.869494856293075e-05, "loss": 0.4844, "step": 15592 }, { "epoch": 0.3307034845496384, "grad_norm": 0.33902162313461304, "learning_rate": 1.8694783830749616e-05, "loss": 0.4907, "step": 15593 }, { "epoch": 0.3307246930075714, "grad_norm": 0.4290148615837097, "learning_rate": 1.869461908889822e-05, "loss": 0.5191, "step": 15594 }, { "epoch": 0.3307459014655044, "grad_norm": 0.3268964886665344, "learning_rate": 1.8694454337376743e-05, "loss": 0.4551, "step": 15595 }, { "epoch": 0.33076710992343744, "grad_norm": 0.33086729049682617, "learning_rate": 1.8694289576185368e-05, "loss": 0.469, "step": 15596 }, { "epoch": 0.3307883183813705, "grad_norm": 0.35510072112083435, "learning_rate": 1.869412480532428e-05, "loss": 0.5451, "step": 15597 }, { "epoch": 0.33080952683930354, "grad_norm": 0.4821704626083374, "learning_rate": 1.8693960024793662e-05, "loss": 0.5226, "step": 15598 }, { "epoch": 0.33083073529723656, "grad_norm": 0.35950833559036255, "learning_rate": 1.86937952345937e-05, "loss": 0.4669, "step": 15599 }, { "epoch": 0.3308519437551696, "grad_norm": 0.3206005096435547, "learning_rate": 1.869363043472457e-05, "loss": 0.4542, "step": 15600 }, { "epoch": 0.3308731522131026, "grad_norm": 0.3615425229072571, "learning_rate": 1.869346562518646e-05, "loss": 0.6489, "step": 15601 }, { "epoch": 0.3308943606710356, "grad_norm": 0.41462525725364685, "learning_rate": 1.869330080597956e-05, "loss": 0.5802, "step": 15602 }, { "epoch": 0.33091556912896863, "grad_norm": 0.335997611284256, "learning_rate": 1.869313597710404e-05, "loss": 0.509, "step": 15603 }, { "epoch": 0.33093677758690165, "grad_norm": 0.3675176501274109, "learning_rate": 1.8692971138560092e-05, "loss": 0.5717, "step": 15604 }, { "epoch": 0.3309579860448347, "grad_norm": 0.3473203480243683, "learning_rate": 1.8692806290347896e-05, "loss": 0.5269, "step": 15605 }, { "epoch": 0.3309791945027677, "grad_norm": 0.6603637337684631, "learning_rate": 1.8692641432467637e-05, "loss": 0.6535, "step": 15606 }, { "epoch": 0.3310004029607007, "grad_norm": 0.3697628378868103, "learning_rate": 1.86924765649195e-05, "loss": 0.5882, "step": 15607 }, { "epoch": 0.33102161141863373, "grad_norm": 0.3125987946987152, "learning_rate": 1.8692311687703662e-05, "loss": 0.494, "step": 15608 }, { "epoch": 0.33104281987656675, "grad_norm": 0.34236404299736023, "learning_rate": 1.8692146800820313e-05, "loss": 0.4255, "step": 15609 }, { "epoch": 0.33106402833449977, "grad_norm": 0.31406691670417786, "learning_rate": 1.869198190426963e-05, "loss": 0.4077, "step": 15610 }, { "epoch": 0.33108523679243285, "grad_norm": 0.3994399607181549, "learning_rate": 1.8691816998051803e-05, "loss": 0.5603, "step": 15611 }, { "epoch": 0.33110644525036587, "grad_norm": 0.35529643297195435, "learning_rate": 1.8691652082167015e-05, "loss": 0.4734, "step": 15612 }, { "epoch": 0.3311276537082989, "grad_norm": 0.38167569041252136, "learning_rate": 1.8691487156615443e-05, "loss": 0.5255, "step": 15613 }, { "epoch": 0.3311488621662319, "grad_norm": 0.33216387033462524, "learning_rate": 1.8691322221397277e-05, "loss": 0.4431, "step": 15614 }, { "epoch": 0.3311700706241649, "grad_norm": 0.4219847619533539, "learning_rate": 1.86911572765127e-05, "loss": 0.5962, "step": 15615 }, { "epoch": 0.33119127908209794, "grad_norm": 0.36271870136260986, "learning_rate": 1.8690992321961888e-05, "loss": 0.5426, "step": 15616 }, { "epoch": 0.33121248754003096, "grad_norm": 0.4256376028060913, "learning_rate": 1.8690827357745034e-05, "loss": 0.5979, "step": 15617 }, { "epoch": 0.331233695997964, "grad_norm": 0.30806663632392883, "learning_rate": 1.8690662383862314e-05, "loss": 0.4392, "step": 15618 }, { "epoch": 0.331254904455897, "grad_norm": 0.3199750483036041, "learning_rate": 1.869049740031392e-05, "loss": 0.467, "step": 15619 }, { "epoch": 0.33127611291383, "grad_norm": 0.30808642506599426, "learning_rate": 1.8690332407100024e-05, "loss": 0.4905, "step": 15620 }, { "epoch": 0.33129732137176304, "grad_norm": 0.352718710899353, "learning_rate": 1.8690167404220818e-05, "loss": 0.4566, "step": 15621 }, { "epoch": 0.33131852982969606, "grad_norm": 0.3587479591369629, "learning_rate": 1.8690002391676483e-05, "loss": 0.5476, "step": 15622 }, { "epoch": 0.3313397382876291, "grad_norm": 0.32890844345092773, "learning_rate": 1.8689837369467206e-05, "loss": 0.5154, "step": 15623 }, { "epoch": 0.33136094674556216, "grad_norm": 0.3408973813056946, "learning_rate": 1.8689672337593165e-05, "loss": 0.4818, "step": 15624 }, { "epoch": 0.3313821552034952, "grad_norm": 0.37603428959846497, "learning_rate": 1.8689507296054546e-05, "loss": 0.5146, "step": 15625 }, { "epoch": 0.3314033636614282, "grad_norm": 0.44854092597961426, "learning_rate": 1.8689342244851532e-05, "loss": 0.4898, "step": 15626 }, { "epoch": 0.3314245721193612, "grad_norm": 0.3233552575111389, "learning_rate": 1.868917718398431e-05, "loss": 0.483, "step": 15627 }, { "epoch": 0.33144578057729424, "grad_norm": 0.38040879368782043, "learning_rate": 1.8689012113453056e-05, "loss": 0.4969, "step": 15628 }, { "epoch": 0.33146698903522726, "grad_norm": 0.36611905694007874, "learning_rate": 1.868884703325796e-05, "loss": 0.4965, "step": 15629 }, { "epoch": 0.3314881974931603, "grad_norm": 0.3158598244190216, "learning_rate": 1.8688681943399202e-05, "loss": 0.4975, "step": 15630 }, { "epoch": 0.3315094059510933, "grad_norm": 0.3659948408603668, "learning_rate": 1.8688516843876973e-05, "loss": 0.5092, "step": 15631 }, { "epoch": 0.3315306144090263, "grad_norm": 0.39089435338974, "learning_rate": 1.8688351734691447e-05, "loss": 0.5488, "step": 15632 }, { "epoch": 0.33155182286695933, "grad_norm": 0.2979213297367096, "learning_rate": 1.8688186615842813e-05, "loss": 0.5138, "step": 15633 }, { "epoch": 0.33157303132489235, "grad_norm": 0.44315657019615173, "learning_rate": 1.868802148733125e-05, "loss": 0.5087, "step": 15634 }, { "epoch": 0.3315942397828254, "grad_norm": 0.3715904653072357, "learning_rate": 1.8687856349156946e-05, "loss": 0.5482, "step": 15635 }, { "epoch": 0.3316154482407584, "grad_norm": 0.32944679260253906, "learning_rate": 1.868769120132009e-05, "loss": 0.5203, "step": 15636 }, { "epoch": 0.3316366566986914, "grad_norm": 0.3452852964401245, "learning_rate": 1.868752604382085e-05, "loss": 0.46, "step": 15637 }, { "epoch": 0.3316578651566245, "grad_norm": 0.3479387164115906, "learning_rate": 1.8687360876659426e-05, "loss": 0.4351, "step": 15638 }, { "epoch": 0.3316790736145575, "grad_norm": 0.4057804346084595, "learning_rate": 1.868719569983599e-05, "loss": 0.5657, "step": 15639 }, { "epoch": 0.3317002820724905, "grad_norm": 0.3358915150165558, "learning_rate": 1.8687030513350736e-05, "loss": 0.5028, "step": 15640 }, { "epoch": 0.33172149053042355, "grad_norm": 0.36068883538246155, "learning_rate": 1.8686865317203837e-05, "loss": 0.5391, "step": 15641 }, { "epoch": 0.33174269898835657, "grad_norm": 0.320101797580719, "learning_rate": 1.8686700111395483e-05, "loss": 0.5029, "step": 15642 }, { "epoch": 0.3317639074462896, "grad_norm": 0.3360586166381836, "learning_rate": 1.868653489592586e-05, "loss": 0.4762, "step": 15643 }, { "epoch": 0.3317851159042226, "grad_norm": 0.36053305864334106, "learning_rate": 1.8686369670795145e-05, "loss": 0.4353, "step": 15644 }, { "epoch": 0.3318063243621556, "grad_norm": 0.35290834307670593, "learning_rate": 1.8686204436003527e-05, "loss": 0.4553, "step": 15645 }, { "epoch": 0.33182753282008864, "grad_norm": 0.34313642978668213, "learning_rate": 1.8686039191551185e-05, "loss": 0.5143, "step": 15646 }, { "epoch": 0.33184874127802166, "grad_norm": 0.3643028140068054, "learning_rate": 1.8685873937438308e-05, "loss": 0.5382, "step": 15647 }, { "epoch": 0.3318699497359547, "grad_norm": 0.35095447301864624, "learning_rate": 1.8685708673665078e-05, "loss": 0.4624, "step": 15648 }, { "epoch": 0.3318911581938877, "grad_norm": 0.3393642008304596, "learning_rate": 1.8685543400231677e-05, "loss": 0.505, "step": 15649 }, { "epoch": 0.3319123666518207, "grad_norm": 0.37464073300361633, "learning_rate": 1.8685378117138286e-05, "loss": 0.4711, "step": 15650 }, { "epoch": 0.33193357510975374, "grad_norm": 0.3641524612903595, "learning_rate": 1.86852128243851e-05, "loss": 0.5108, "step": 15651 }, { "epoch": 0.3319547835676868, "grad_norm": 0.32083410024642944, "learning_rate": 1.8685047521972294e-05, "loss": 0.4648, "step": 15652 }, { "epoch": 0.33197599202561984, "grad_norm": 0.33310240507125854, "learning_rate": 1.8684882209900052e-05, "loss": 0.4464, "step": 15653 }, { "epoch": 0.33199720048355286, "grad_norm": 0.3495844900608063, "learning_rate": 1.8684716888168558e-05, "loss": 0.5076, "step": 15654 }, { "epoch": 0.3320184089414859, "grad_norm": 0.33301472663879395, "learning_rate": 1.8684551556778e-05, "loss": 0.5322, "step": 15655 }, { "epoch": 0.3320396173994189, "grad_norm": 0.560871422290802, "learning_rate": 1.8684386215728557e-05, "loss": 0.4828, "step": 15656 }, { "epoch": 0.3320608258573519, "grad_norm": 0.5904308557510376, "learning_rate": 1.8684220865020418e-05, "loss": 0.5215, "step": 15657 }, { "epoch": 0.33208203431528494, "grad_norm": 0.3389133810997009, "learning_rate": 1.8684055504653763e-05, "loss": 0.5581, "step": 15658 }, { "epoch": 0.33210324277321795, "grad_norm": 0.37024107575416565, "learning_rate": 1.8683890134628774e-05, "loss": 0.535, "step": 15659 }, { "epoch": 0.332124451231151, "grad_norm": 0.38817906379699707, "learning_rate": 1.868372475494564e-05, "loss": 0.5284, "step": 15660 }, { "epoch": 0.332145659689084, "grad_norm": 0.32589444518089294, "learning_rate": 1.8683559365604546e-05, "loss": 0.5783, "step": 15661 }, { "epoch": 0.332166868147017, "grad_norm": 0.395054429769516, "learning_rate": 1.8683393966605667e-05, "loss": 0.6121, "step": 15662 }, { "epoch": 0.33218807660495003, "grad_norm": 0.3253616690635681, "learning_rate": 1.8683228557949196e-05, "loss": 0.5401, "step": 15663 }, { "epoch": 0.33220928506288305, "grad_norm": 0.36335229873657227, "learning_rate": 1.8683063139635312e-05, "loss": 0.5217, "step": 15664 }, { "epoch": 0.33223049352081613, "grad_norm": 0.3281175494194031, "learning_rate": 1.86828977116642e-05, "loss": 0.4412, "step": 15665 }, { "epoch": 0.33225170197874915, "grad_norm": 0.30035173892974854, "learning_rate": 1.8682732274036047e-05, "loss": 0.4812, "step": 15666 }, { "epoch": 0.33227291043668217, "grad_norm": 0.38010120391845703, "learning_rate": 1.8682566826751035e-05, "loss": 0.3992, "step": 15667 }, { "epoch": 0.3322941188946152, "grad_norm": 0.34567829966545105, "learning_rate": 1.8682401369809345e-05, "loss": 0.5825, "step": 15668 }, { "epoch": 0.3323153273525482, "grad_norm": 0.33545494079589844, "learning_rate": 1.8682235903211165e-05, "loss": 0.527, "step": 15669 }, { "epoch": 0.3323365358104812, "grad_norm": 0.42152121663093567, "learning_rate": 1.8682070426956677e-05, "loss": 0.4731, "step": 15670 }, { "epoch": 0.33235774426841425, "grad_norm": 0.3099266290664673, "learning_rate": 1.8681904941046068e-05, "loss": 0.4245, "step": 15671 }, { "epoch": 0.33237895272634727, "grad_norm": 0.33461642265319824, "learning_rate": 1.8681739445479517e-05, "loss": 0.5535, "step": 15672 }, { "epoch": 0.3324001611842803, "grad_norm": 0.3823310434818268, "learning_rate": 1.868157394025721e-05, "loss": 0.4944, "step": 15673 }, { "epoch": 0.3324213696422133, "grad_norm": 0.336136132478714, "learning_rate": 1.868140842537934e-05, "loss": 0.6229, "step": 15674 }, { "epoch": 0.3324425781001463, "grad_norm": 0.3797528147697449, "learning_rate": 1.8681242900846073e-05, "loss": 0.6139, "step": 15675 }, { "epoch": 0.33246378655807934, "grad_norm": 0.35106202960014343, "learning_rate": 1.868107736665761e-05, "loss": 0.5105, "step": 15676 }, { "epoch": 0.33248499501601236, "grad_norm": 0.34541410207748413, "learning_rate": 1.868091182281412e-05, "loss": 0.5147, "step": 15677 }, { "epoch": 0.3325062034739454, "grad_norm": 0.32700416445732117, "learning_rate": 1.86807462693158e-05, "loss": 0.5136, "step": 15678 }, { "epoch": 0.33252741193187846, "grad_norm": 0.38378089666366577, "learning_rate": 1.868058070616283e-05, "loss": 0.5187, "step": 15679 }, { "epoch": 0.3325486203898115, "grad_norm": 0.3657163679599762, "learning_rate": 1.8680415133355393e-05, "loss": 0.5285, "step": 15680 }, { "epoch": 0.3325698288477445, "grad_norm": 0.3392346203327179, "learning_rate": 1.8680249550893673e-05, "loss": 0.4819, "step": 15681 }, { "epoch": 0.3325910373056775, "grad_norm": 0.665254533290863, "learning_rate": 1.8680083958777858e-05, "loss": 0.5194, "step": 15682 }, { "epoch": 0.33261224576361054, "grad_norm": 0.3487618565559387, "learning_rate": 1.8679918357008127e-05, "loss": 0.4923, "step": 15683 }, { "epoch": 0.33263345422154356, "grad_norm": 0.3315548598766327, "learning_rate": 1.8679752745584664e-05, "loss": 0.5226, "step": 15684 }, { "epoch": 0.3326546626794766, "grad_norm": 0.39658522605895996, "learning_rate": 1.8679587124507657e-05, "loss": 0.5269, "step": 15685 }, { "epoch": 0.3326758711374096, "grad_norm": 0.3666675090789795, "learning_rate": 1.867942149377729e-05, "loss": 0.5167, "step": 15686 }, { "epoch": 0.3326970795953426, "grad_norm": 0.3380924165248871, "learning_rate": 1.8679255853393744e-05, "loss": 0.4516, "step": 15687 }, { "epoch": 0.33271828805327563, "grad_norm": 0.3579999804496765, "learning_rate": 1.8679090203357206e-05, "loss": 0.5198, "step": 15688 }, { "epoch": 0.33273949651120865, "grad_norm": 0.3267574906349182, "learning_rate": 1.867892454366786e-05, "loss": 0.4923, "step": 15689 }, { "epoch": 0.3327607049691417, "grad_norm": 0.3768194615840912, "learning_rate": 1.8678758874325887e-05, "loss": 0.6158, "step": 15690 }, { "epoch": 0.3327819134270747, "grad_norm": 0.45941898226737976, "learning_rate": 1.8678593195331478e-05, "loss": 0.4719, "step": 15691 }, { "epoch": 0.33280312188500777, "grad_norm": 0.3037830591201782, "learning_rate": 1.867842750668481e-05, "loss": 0.4906, "step": 15692 }, { "epoch": 0.3328243303429408, "grad_norm": 0.4256945848464966, "learning_rate": 1.867826180838607e-05, "loss": 0.5763, "step": 15693 }, { "epoch": 0.3328455388008738, "grad_norm": 0.3312288522720337, "learning_rate": 1.8678096100435443e-05, "loss": 0.4664, "step": 15694 }, { "epoch": 0.3328667472588068, "grad_norm": 0.35397347807884216, "learning_rate": 1.867793038283311e-05, "loss": 0.6152, "step": 15695 }, { "epoch": 0.33288795571673985, "grad_norm": 0.3162258267402649, "learning_rate": 1.8677764655579263e-05, "loss": 0.4255, "step": 15696 }, { "epoch": 0.33290916417467287, "grad_norm": 0.3126877248287201, "learning_rate": 1.867759891867408e-05, "loss": 0.4582, "step": 15697 }, { "epoch": 0.3329303726326059, "grad_norm": 0.3353196680545807, "learning_rate": 1.8677433172117747e-05, "loss": 0.5586, "step": 15698 }, { "epoch": 0.3329515810905389, "grad_norm": 0.39395710825920105, "learning_rate": 1.867726741591045e-05, "loss": 0.5258, "step": 15699 }, { "epoch": 0.3329727895484719, "grad_norm": 0.38187816739082336, "learning_rate": 1.867710165005237e-05, "loss": 0.5582, "step": 15700 }, { "epoch": 0.33299399800640495, "grad_norm": 0.2987762987613678, "learning_rate": 1.8676935874543693e-05, "loss": 0.445, "step": 15701 }, { "epoch": 0.33301520646433797, "grad_norm": 0.34535297751426697, "learning_rate": 1.86767700893846e-05, "loss": 0.4873, "step": 15702 }, { "epoch": 0.333036414922271, "grad_norm": 0.396567165851593, "learning_rate": 1.867660429457528e-05, "loss": 0.5093, "step": 15703 }, { "epoch": 0.333057623380204, "grad_norm": 0.3546283543109894, "learning_rate": 1.867643849011592e-05, "loss": 0.4979, "step": 15704 }, { "epoch": 0.333078831838137, "grad_norm": 0.33587872982025146, "learning_rate": 1.8676272676006698e-05, "loss": 0.4713, "step": 15705 }, { "epoch": 0.3331000402960701, "grad_norm": 0.351478636264801, "learning_rate": 1.8676106852247803e-05, "loss": 0.6048, "step": 15706 }, { "epoch": 0.3331212487540031, "grad_norm": 0.3351350426673889, "learning_rate": 1.8675941018839413e-05, "loss": 0.4344, "step": 15707 }, { "epoch": 0.33314245721193614, "grad_norm": 0.3423381447792053, "learning_rate": 1.867577517578172e-05, "loss": 0.5098, "step": 15708 }, { "epoch": 0.33316366566986916, "grad_norm": 0.3318840563297272, "learning_rate": 1.8675609323074903e-05, "loss": 0.4844, "step": 15709 }, { "epoch": 0.3331848741278022, "grad_norm": 0.354552298784256, "learning_rate": 1.867544346071915e-05, "loss": 0.5878, "step": 15710 }, { "epoch": 0.3332060825857352, "grad_norm": 0.35689303278923035, "learning_rate": 1.8675277588714645e-05, "loss": 0.4986, "step": 15711 }, { "epoch": 0.3332272910436682, "grad_norm": 0.3162367641925812, "learning_rate": 1.867511170706157e-05, "loss": 0.4416, "step": 15712 }, { "epoch": 0.33324849950160124, "grad_norm": 0.3140241503715515, "learning_rate": 1.8674945815760114e-05, "loss": 0.546, "step": 15713 }, { "epoch": 0.33326970795953426, "grad_norm": 0.3900092542171478, "learning_rate": 1.8674779914810456e-05, "loss": 0.5416, "step": 15714 }, { "epoch": 0.3332909164174673, "grad_norm": 0.46179115772247314, "learning_rate": 1.867461400421278e-05, "loss": 0.525, "step": 15715 }, { "epoch": 0.3333121248754003, "grad_norm": 0.35139569640159607, "learning_rate": 1.8674448083967277e-05, "loss": 0.5191, "step": 15716 }, { "epoch": 0.3333333333333333, "grad_norm": 0.4718974232673645, "learning_rate": 1.867428215407413e-05, "loss": 0.5731, "step": 15717 }, { "epoch": 0.33335454179126633, "grad_norm": 0.34592190384864807, "learning_rate": 1.8674116214533522e-05, "loss": 0.4995, "step": 15718 }, { "epoch": 0.33337575024919935, "grad_norm": 0.32471033930778503, "learning_rate": 1.8673950265345635e-05, "loss": 0.425, "step": 15719 }, { "epoch": 0.33339695870713243, "grad_norm": 0.37404513359069824, "learning_rate": 1.8673784306510656e-05, "loss": 0.5401, "step": 15720 }, { "epoch": 0.33341816716506545, "grad_norm": 0.3378911316394806, "learning_rate": 1.8673618338028768e-05, "loss": 0.5726, "step": 15721 }, { "epoch": 0.33343937562299847, "grad_norm": 0.316854327917099, "learning_rate": 1.8673452359900158e-05, "loss": 0.4694, "step": 15722 }, { "epoch": 0.3334605840809315, "grad_norm": 0.4244421124458313, "learning_rate": 1.867328637212501e-05, "loss": 0.4966, "step": 15723 }, { "epoch": 0.3334817925388645, "grad_norm": 0.4027610123157501, "learning_rate": 1.867312037470351e-05, "loss": 0.5931, "step": 15724 }, { "epoch": 0.3335030009967975, "grad_norm": 0.3393353819847107, "learning_rate": 1.867295436763584e-05, "loss": 0.5212, "step": 15725 }, { "epoch": 0.33352420945473055, "grad_norm": 0.3728764057159424, "learning_rate": 1.8672788350922185e-05, "loss": 0.4748, "step": 15726 }, { "epoch": 0.33354541791266357, "grad_norm": 0.4347614049911499, "learning_rate": 1.8672622324562732e-05, "loss": 0.6483, "step": 15727 }, { "epoch": 0.3335666263705966, "grad_norm": 0.34820449352264404, "learning_rate": 1.867245628855766e-05, "loss": 0.4934, "step": 15728 }, { "epoch": 0.3335878348285296, "grad_norm": 0.3826774060726166, "learning_rate": 1.8672290242907162e-05, "loss": 0.546, "step": 15729 }, { "epoch": 0.3336090432864626, "grad_norm": 0.3321819305419922, "learning_rate": 1.8672124187611414e-05, "loss": 0.5524, "step": 15730 }, { "epoch": 0.33363025174439565, "grad_norm": 0.30204302072525024, "learning_rate": 1.8671958122670607e-05, "loss": 0.545, "step": 15731 }, { "epoch": 0.33365146020232866, "grad_norm": 0.42245957255363464, "learning_rate": 1.8671792048084923e-05, "loss": 0.4874, "step": 15732 }, { "epoch": 0.33367266866026174, "grad_norm": 0.3385056257247925, "learning_rate": 1.8671625963854546e-05, "loss": 0.5285, "step": 15733 }, { "epoch": 0.33369387711819476, "grad_norm": 0.3280540108680725, "learning_rate": 1.8671459869979663e-05, "loss": 0.5328, "step": 15734 }, { "epoch": 0.3337150855761278, "grad_norm": 0.33076211810112, "learning_rate": 1.867129376646046e-05, "loss": 0.5363, "step": 15735 }, { "epoch": 0.3337362940340608, "grad_norm": 0.33148840069770813, "learning_rate": 1.8671127653297114e-05, "loss": 0.4912, "step": 15736 }, { "epoch": 0.3337575024919938, "grad_norm": 0.32869791984558105, "learning_rate": 1.867096153048982e-05, "loss": 0.531, "step": 15737 }, { "epoch": 0.33377871094992684, "grad_norm": 0.31801140308380127, "learning_rate": 1.8670795398038753e-05, "loss": 0.5617, "step": 15738 }, { "epoch": 0.33379991940785986, "grad_norm": 0.3444298505783081, "learning_rate": 1.8670629255944107e-05, "loss": 0.511, "step": 15739 }, { "epoch": 0.3338211278657929, "grad_norm": 0.33539846539497375, "learning_rate": 1.8670463104206065e-05, "loss": 0.5268, "step": 15740 }, { "epoch": 0.3338423363237259, "grad_norm": 0.326394259929657, "learning_rate": 1.8670296942824803e-05, "loss": 0.5297, "step": 15741 }, { "epoch": 0.3338635447816589, "grad_norm": 0.371049702167511, "learning_rate": 1.8670130771800516e-05, "loss": 0.4918, "step": 15742 }, { "epoch": 0.33388475323959194, "grad_norm": 0.2940967082977295, "learning_rate": 1.866996459113338e-05, "loss": 0.4708, "step": 15743 }, { "epoch": 0.33390596169752496, "grad_norm": 0.35309934616088867, "learning_rate": 1.866979840082359e-05, "loss": 0.4638, "step": 15744 }, { "epoch": 0.333927170155458, "grad_norm": 0.368509441614151, "learning_rate": 1.8669632200871325e-05, "loss": 0.5001, "step": 15745 }, { "epoch": 0.333948378613391, "grad_norm": 0.3351716101169586, "learning_rate": 1.866946599127677e-05, "loss": 0.5118, "step": 15746 }, { "epoch": 0.33396958707132407, "grad_norm": 0.3274342715740204, "learning_rate": 1.8669299772040108e-05, "loss": 0.5499, "step": 15747 }, { "epoch": 0.3339907955292571, "grad_norm": 0.3668433427810669, "learning_rate": 1.8669133543161528e-05, "loss": 0.5738, "step": 15748 }, { "epoch": 0.3340120039871901, "grad_norm": 0.33198845386505127, "learning_rate": 1.866896730464121e-05, "loss": 0.5054, "step": 15749 }, { "epoch": 0.33403321244512313, "grad_norm": 0.3696742057800293, "learning_rate": 1.8668801056479346e-05, "loss": 0.4866, "step": 15750 }, { "epoch": 0.33405442090305615, "grad_norm": 0.42591482400894165, "learning_rate": 1.8668634798676117e-05, "loss": 0.4568, "step": 15751 }, { "epoch": 0.33407562936098917, "grad_norm": 0.3678995966911316, "learning_rate": 1.8668468531231704e-05, "loss": 0.4472, "step": 15752 }, { "epoch": 0.3340968378189222, "grad_norm": 0.33880388736724854, "learning_rate": 1.8668302254146297e-05, "loss": 0.5155, "step": 15753 }, { "epoch": 0.3341180462768552, "grad_norm": 0.41745245456695557, "learning_rate": 1.866813596742008e-05, "loss": 0.5023, "step": 15754 }, { "epoch": 0.3341392547347882, "grad_norm": 0.3363135755062103, "learning_rate": 1.866796967105324e-05, "loss": 0.5097, "step": 15755 }, { "epoch": 0.33416046319272125, "grad_norm": 0.34839361906051636, "learning_rate": 1.8667803365045955e-05, "loss": 0.5488, "step": 15756 }, { "epoch": 0.33418167165065427, "grad_norm": 0.3978619873523712, "learning_rate": 1.8667637049398414e-05, "loss": 0.6109, "step": 15757 }, { "epoch": 0.3342028801085873, "grad_norm": 0.333812952041626, "learning_rate": 1.8667470724110806e-05, "loss": 0.5498, "step": 15758 }, { "epoch": 0.3342240885665203, "grad_norm": 0.31952229142189026, "learning_rate": 1.8667304389183312e-05, "loss": 0.4682, "step": 15759 }, { "epoch": 0.3342452970244533, "grad_norm": 0.3036927580833435, "learning_rate": 1.8667138044616116e-05, "loss": 0.4832, "step": 15760 }, { "epoch": 0.3342665054823864, "grad_norm": 0.32127290964126587, "learning_rate": 1.8666971690409403e-05, "loss": 0.5534, "step": 15761 }, { "epoch": 0.3342877139403194, "grad_norm": 0.3825950622558594, "learning_rate": 1.866680532656336e-05, "loss": 0.4769, "step": 15762 }, { "epoch": 0.33430892239825244, "grad_norm": 0.32335272431373596, "learning_rate": 1.8666638953078175e-05, "loss": 0.4062, "step": 15763 }, { "epoch": 0.33433013085618546, "grad_norm": 0.35290324687957764, "learning_rate": 1.8666472569954028e-05, "loss": 0.5523, "step": 15764 }, { "epoch": 0.3343513393141185, "grad_norm": 0.33058425784111023, "learning_rate": 1.86663061771911e-05, "loss": 0.4858, "step": 15765 }, { "epoch": 0.3343725477720515, "grad_norm": 0.6305566430091858, "learning_rate": 1.866613977478959e-05, "loss": 0.4622, "step": 15766 }, { "epoch": 0.3343937562299845, "grad_norm": 0.3115113377571106, "learning_rate": 1.866597336274967e-05, "loss": 0.4592, "step": 15767 }, { "epoch": 0.33441496468791754, "grad_norm": 0.32998713850975037, "learning_rate": 1.866580694107153e-05, "loss": 0.5476, "step": 15768 }, { "epoch": 0.33443617314585056, "grad_norm": 0.33538365364074707, "learning_rate": 1.8665640509755353e-05, "loss": 0.5207, "step": 15769 }, { "epoch": 0.3344573816037836, "grad_norm": 0.35282084345817566, "learning_rate": 1.866547406880133e-05, "loss": 0.5085, "step": 15770 }, { "epoch": 0.3344785900617166, "grad_norm": 0.32128384709358215, "learning_rate": 1.8665307618209637e-05, "loss": 0.4584, "step": 15771 }, { "epoch": 0.3344997985196496, "grad_norm": 0.333349347114563, "learning_rate": 1.8665141157980468e-05, "loss": 0.5053, "step": 15772 }, { "epoch": 0.33452100697758264, "grad_norm": 0.3485155403614044, "learning_rate": 1.8664974688114005e-05, "loss": 0.5537, "step": 15773 }, { "epoch": 0.3345422154355157, "grad_norm": 0.42296916246414185, "learning_rate": 1.866480820861043e-05, "loss": 0.557, "step": 15774 }, { "epoch": 0.33456342389344873, "grad_norm": 0.372341513633728, "learning_rate": 1.8664641719469934e-05, "loss": 0.5196, "step": 15775 }, { "epoch": 0.33458463235138175, "grad_norm": 0.31165212392807007, "learning_rate": 1.8664475220692692e-05, "loss": 0.3981, "step": 15776 }, { "epoch": 0.33460584080931477, "grad_norm": 0.3774346113204956, "learning_rate": 1.86643087122789e-05, "loss": 0.619, "step": 15777 }, { "epoch": 0.3346270492672478, "grad_norm": 0.34558892250061035, "learning_rate": 1.866414219422874e-05, "loss": 0.4479, "step": 15778 }, { "epoch": 0.3346482577251808, "grad_norm": 0.3394763469696045, "learning_rate": 1.8663975666542398e-05, "loss": 0.4461, "step": 15779 }, { "epoch": 0.33466946618311383, "grad_norm": 0.3340565264225006, "learning_rate": 1.8663809129220054e-05, "loss": 0.5724, "step": 15780 }, { "epoch": 0.33469067464104685, "grad_norm": 0.33888235688209534, "learning_rate": 1.8663642582261897e-05, "loss": 0.512, "step": 15781 }, { "epoch": 0.33471188309897987, "grad_norm": 0.3351908326148987, "learning_rate": 1.8663476025668113e-05, "loss": 0.5251, "step": 15782 }, { "epoch": 0.3347330915569129, "grad_norm": 0.3386439383029938, "learning_rate": 1.8663309459438886e-05, "loss": 0.5264, "step": 15783 }, { "epoch": 0.3347543000148459, "grad_norm": 0.31446635723114014, "learning_rate": 1.8663142883574402e-05, "loss": 0.5242, "step": 15784 }, { "epoch": 0.3347755084727789, "grad_norm": 0.3627711534500122, "learning_rate": 1.8662976298074843e-05, "loss": 0.4995, "step": 15785 }, { "epoch": 0.33479671693071195, "grad_norm": 0.36691808700561523, "learning_rate": 1.86628097029404e-05, "loss": 0.4694, "step": 15786 }, { "epoch": 0.33481792538864497, "grad_norm": 0.3265528976917267, "learning_rate": 1.8662643098171255e-05, "loss": 0.517, "step": 15787 }, { "epoch": 0.33483913384657804, "grad_norm": 0.334456205368042, "learning_rate": 1.8662476483767595e-05, "loss": 0.4735, "step": 15788 }, { "epoch": 0.33486034230451106, "grad_norm": 0.3636225461959839, "learning_rate": 1.8662309859729602e-05, "loss": 0.5564, "step": 15789 }, { "epoch": 0.3348815507624441, "grad_norm": 0.39837557077407837, "learning_rate": 1.8662143226057464e-05, "loss": 0.5454, "step": 15790 }, { "epoch": 0.3349027592203771, "grad_norm": 0.35811519622802734, "learning_rate": 1.8661976582751366e-05, "loss": 0.4756, "step": 15791 }, { "epoch": 0.3349239676783101, "grad_norm": 0.37123531103134155, "learning_rate": 1.866180992981149e-05, "loss": 0.5174, "step": 15792 }, { "epoch": 0.33494517613624314, "grad_norm": 0.8305818438529968, "learning_rate": 1.8661643267238026e-05, "loss": 0.4683, "step": 15793 }, { "epoch": 0.33496638459417616, "grad_norm": 0.3153611719608307, "learning_rate": 1.8661476595031157e-05, "loss": 0.4796, "step": 15794 }, { "epoch": 0.3349875930521092, "grad_norm": 0.448594331741333, "learning_rate": 1.866130991319107e-05, "loss": 0.5102, "step": 15795 }, { "epoch": 0.3350088015100422, "grad_norm": 0.3763335645198822, "learning_rate": 1.866114322171795e-05, "loss": 0.53, "step": 15796 }, { "epoch": 0.3350300099679752, "grad_norm": 0.3194774389266968, "learning_rate": 1.866097652061198e-05, "loss": 0.486, "step": 15797 }, { "epoch": 0.33505121842590824, "grad_norm": 0.33390888571739197, "learning_rate": 1.8660809809873348e-05, "loss": 0.4739, "step": 15798 }, { "epoch": 0.33507242688384126, "grad_norm": 0.4231976866722107, "learning_rate": 1.8660643089502238e-05, "loss": 0.4774, "step": 15799 }, { "epoch": 0.3350936353417743, "grad_norm": 0.35326120257377625, "learning_rate": 1.8660476359498834e-05, "loss": 0.5134, "step": 15800 }, { "epoch": 0.3351148437997073, "grad_norm": 0.3605075180530548, "learning_rate": 1.866030961986333e-05, "loss": 0.5846, "step": 15801 }, { "epoch": 0.33513605225764037, "grad_norm": 0.6439855098724365, "learning_rate": 1.86601428705959e-05, "loss": 0.6237, "step": 15802 }, { "epoch": 0.3351572607155734, "grad_norm": 0.3558928370475769, "learning_rate": 1.8659976111696732e-05, "loss": 0.5537, "step": 15803 }, { "epoch": 0.3351784691735064, "grad_norm": 0.3307577967643738, "learning_rate": 1.8659809343166015e-05, "loss": 0.4752, "step": 15804 }, { "epoch": 0.33519967763143943, "grad_norm": 0.33140459656715393, "learning_rate": 1.865964256500394e-05, "loss": 0.5392, "step": 15805 }, { "epoch": 0.33522088608937245, "grad_norm": 0.29201745986938477, "learning_rate": 1.8659475777210678e-05, "loss": 0.4928, "step": 15806 }, { "epoch": 0.33524209454730547, "grad_norm": 0.3537725806236267, "learning_rate": 1.8659308979786423e-05, "loss": 0.4297, "step": 15807 }, { "epoch": 0.3352633030052385, "grad_norm": 0.43132123351097107, "learning_rate": 1.8659142172731367e-05, "loss": 0.5226, "step": 15808 }, { "epoch": 0.3352845114631715, "grad_norm": 0.35763296484947205, "learning_rate": 1.8658975356045682e-05, "loss": 0.5819, "step": 15809 }, { "epoch": 0.33530571992110453, "grad_norm": 0.3310958445072174, "learning_rate": 1.8658808529729557e-05, "loss": 0.4903, "step": 15810 }, { "epoch": 0.33532692837903755, "grad_norm": 0.3194032311439514, "learning_rate": 1.8658641693783186e-05, "loss": 0.4008, "step": 15811 }, { "epoch": 0.33534813683697057, "grad_norm": 0.32664260268211365, "learning_rate": 1.865847484820675e-05, "loss": 0.497, "step": 15812 }, { "epoch": 0.3353693452949036, "grad_norm": 0.3437666893005371, "learning_rate": 1.8658307993000427e-05, "loss": 0.577, "step": 15813 }, { "epoch": 0.3353905537528366, "grad_norm": 0.33162257075309753, "learning_rate": 1.8658141128164415e-05, "loss": 0.5327, "step": 15814 }, { "epoch": 0.3354117622107697, "grad_norm": 0.3324296772480011, "learning_rate": 1.8657974253698892e-05, "loss": 0.5096, "step": 15815 }, { "epoch": 0.3354329706687027, "grad_norm": 0.34900954365730286, "learning_rate": 1.8657807369604045e-05, "loss": 0.502, "step": 15816 }, { "epoch": 0.3354541791266357, "grad_norm": 0.3589875102043152, "learning_rate": 1.8657640475880056e-05, "loss": 0.4296, "step": 15817 }, { "epoch": 0.33547538758456874, "grad_norm": 0.3620738387107849, "learning_rate": 1.865747357252712e-05, "loss": 0.528, "step": 15818 }, { "epoch": 0.33549659604250176, "grad_norm": 0.3421610891819, "learning_rate": 1.8657306659545414e-05, "loss": 0.53, "step": 15819 }, { "epoch": 0.3355178045004348, "grad_norm": 0.3596511781215668, "learning_rate": 1.8657139736935132e-05, "loss": 0.5261, "step": 15820 }, { "epoch": 0.3355390129583678, "grad_norm": 0.33744165301322937, "learning_rate": 1.8656972804696446e-05, "loss": 0.4762, "step": 15821 }, { "epoch": 0.3355602214163008, "grad_norm": 0.37603241205215454, "learning_rate": 1.8656805862829557e-05, "loss": 0.5463, "step": 15822 }, { "epoch": 0.33558142987423384, "grad_norm": 0.30660703778266907, "learning_rate": 1.865663891133464e-05, "loss": 0.4897, "step": 15823 }, { "epoch": 0.33560263833216686, "grad_norm": 0.34625986218452454, "learning_rate": 1.8656471950211885e-05, "loss": 0.4273, "step": 15824 }, { "epoch": 0.3356238467900999, "grad_norm": 0.347609281539917, "learning_rate": 1.865630497946148e-05, "loss": 0.5121, "step": 15825 }, { "epoch": 0.3356450552480329, "grad_norm": 0.3246251344680786, "learning_rate": 1.8656137999083605e-05, "loss": 0.5128, "step": 15826 }, { "epoch": 0.3356662637059659, "grad_norm": 0.3428887724876404, "learning_rate": 1.8655971009078448e-05, "loss": 0.4741, "step": 15827 }, { "epoch": 0.33568747216389894, "grad_norm": 0.35160452127456665, "learning_rate": 1.8655804009446196e-05, "loss": 0.5391, "step": 15828 }, { "epoch": 0.335708680621832, "grad_norm": 0.33176958560943604, "learning_rate": 1.8655637000187034e-05, "loss": 0.5363, "step": 15829 }, { "epoch": 0.33572988907976503, "grad_norm": 0.3689132332801819, "learning_rate": 1.8655469981301147e-05, "loss": 0.4868, "step": 15830 }, { "epoch": 0.33575109753769805, "grad_norm": 0.3656662702560425, "learning_rate": 1.8655302952788724e-05, "loss": 0.5933, "step": 15831 }, { "epoch": 0.33577230599563107, "grad_norm": 0.342069149017334, "learning_rate": 1.8655135914649944e-05, "loss": 0.4573, "step": 15832 }, { "epoch": 0.3357935144535641, "grad_norm": 0.31170132756233215, "learning_rate": 1.8654968866885e-05, "loss": 0.5037, "step": 15833 }, { "epoch": 0.3358147229114971, "grad_norm": 0.3663029372692108, "learning_rate": 1.8654801809494076e-05, "loss": 0.5025, "step": 15834 }, { "epoch": 0.33583593136943013, "grad_norm": 0.4201262295246124, "learning_rate": 1.8654634742477354e-05, "loss": 0.4996, "step": 15835 }, { "epoch": 0.33585713982736315, "grad_norm": 0.40965917706489563, "learning_rate": 1.8654467665835024e-05, "loss": 0.5797, "step": 15836 }, { "epoch": 0.33587834828529617, "grad_norm": 0.31750890612602234, "learning_rate": 1.865430057956727e-05, "loss": 0.507, "step": 15837 }, { "epoch": 0.3358995567432292, "grad_norm": 0.427892804145813, "learning_rate": 1.8654133483674277e-05, "loss": 0.5221, "step": 15838 }, { "epoch": 0.3359207652011622, "grad_norm": 0.3288205564022064, "learning_rate": 1.865396637815623e-05, "loss": 0.4688, "step": 15839 }, { "epoch": 0.3359419736590952, "grad_norm": 0.334308922290802, "learning_rate": 1.865379926301332e-05, "loss": 0.5173, "step": 15840 }, { "epoch": 0.33596318211702825, "grad_norm": 0.3550758361816406, "learning_rate": 1.8653632138245733e-05, "loss": 0.5295, "step": 15841 }, { "epoch": 0.33598439057496127, "grad_norm": 0.33326205611228943, "learning_rate": 1.8653465003853645e-05, "loss": 0.5283, "step": 15842 }, { "epoch": 0.33600559903289434, "grad_norm": 0.3358033299446106, "learning_rate": 1.865329785983725e-05, "loss": 0.4644, "step": 15843 }, { "epoch": 0.33602680749082736, "grad_norm": 0.3534330129623413, "learning_rate": 1.8653130706196733e-05, "loss": 0.5151, "step": 15844 }, { "epoch": 0.3360480159487604, "grad_norm": 0.33730795979499817, "learning_rate": 1.865296354293228e-05, "loss": 0.496, "step": 15845 }, { "epoch": 0.3360692244066934, "grad_norm": 0.3446764349937439, "learning_rate": 1.8652796370044078e-05, "loss": 0.5305, "step": 15846 }, { "epoch": 0.3360904328646264, "grad_norm": 0.358590692281723, "learning_rate": 1.8652629187532306e-05, "loss": 0.5711, "step": 15847 }, { "epoch": 0.33611164132255944, "grad_norm": 0.3196130394935608, "learning_rate": 1.865246199539716e-05, "loss": 0.4725, "step": 15848 }, { "epoch": 0.33613284978049246, "grad_norm": 0.37053370475769043, "learning_rate": 1.8652294793638817e-05, "loss": 0.5139, "step": 15849 }, { "epoch": 0.3361540582384255, "grad_norm": 0.3380984663963318, "learning_rate": 1.865212758225747e-05, "loss": 0.5056, "step": 15850 }, { "epoch": 0.3361752666963585, "grad_norm": 0.34717097878456116, "learning_rate": 1.8651960361253302e-05, "loss": 0.5147, "step": 15851 }, { "epoch": 0.3361964751542915, "grad_norm": 0.33595073223114014, "learning_rate": 1.8651793130626495e-05, "loss": 0.4734, "step": 15852 }, { "epoch": 0.33621768361222454, "grad_norm": 0.31822893023490906, "learning_rate": 1.865162589037724e-05, "loss": 0.4849, "step": 15853 }, { "epoch": 0.33623889207015756, "grad_norm": 0.3303995728492737, "learning_rate": 1.8651458640505723e-05, "loss": 0.4527, "step": 15854 }, { "epoch": 0.3362601005280906, "grad_norm": 0.3476575016975403, "learning_rate": 1.8651291381012127e-05, "loss": 0.5295, "step": 15855 }, { "epoch": 0.33628130898602365, "grad_norm": 0.36107560992240906, "learning_rate": 1.8651124111896644e-05, "loss": 0.5745, "step": 15856 }, { "epoch": 0.33630251744395667, "grad_norm": 0.36831820011138916, "learning_rate": 1.8650956833159453e-05, "loss": 0.5874, "step": 15857 }, { "epoch": 0.3363237259018897, "grad_norm": 0.3459356427192688, "learning_rate": 1.8650789544800744e-05, "loss": 0.5231, "step": 15858 }, { "epoch": 0.3363449343598227, "grad_norm": 0.34550267457962036, "learning_rate": 1.86506222468207e-05, "loss": 0.5965, "step": 15859 }, { "epoch": 0.33636614281775573, "grad_norm": 0.48023003339767456, "learning_rate": 1.865045493921951e-05, "loss": 0.4964, "step": 15860 }, { "epoch": 0.33638735127568875, "grad_norm": 0.34739404916763306, "learning_rate": 1.865028762199736e-05, "loss": 0.5915, "step": 15861 }, { "epoch": 0.33640855973362177, "grad_norm": 0.29712894558906555, "learning_rate": 1.8650120295154435e-05, "loss": 0.4851, "step": 15862 }, { "epoch": 0.3364297681915548, "grad_norm": 0.3330915570259094, "learning_rate": 1.8649952958690926e-05, "loss": 0.5497, "step": 15863 }, { "epoch": 0.3364509766494878, "grad_norm": 0.3532017767429352, "learning_rate": 1.8649785612607005e-05, "loss": 0.5042, "step": 15864 }, { "epoch": 0.33647218510742083, "grad_norm": 0.34293144941329956, "learning_rate": 1.8649618256902875e-05, "loss": 0.4587, "step": 15865 }, { "epoch": 0.33649339356535385, "grad_norm": 0.3249993622303009, "learning_rate": 1.864945089157871e-05, "loss": 0.4506, "step": 15866 }, { "epoch": 0.33651460202328687, "grad_norm": 0.3656443953514099, "learning_rate": 1.8649283516634703e-05, "loss": 0.4959, "step": 15867 }, { "epoch": 0.3365358104812199, "grad_norm": 0.32676151394844055, "learning_rate": 1.864911613207104e-05, "loss": 0.5463, "step": 15868 }, { "epoch": 0.3365570189391529, "grad_norm": 0.3213042616844177, "learning_rate": 1.8648948737887902e-05, "loss": 0.4763, "step": 15869 }, { "epoch": 0.336578227397086, "grad_norm": 0.36188098788261414, "learning_rate": 1.864878133408548e-05, "loss": 0.478, "step": 15870 }, { "epoch": 0.336599435855019, "grad_norm": 0.35018977522850037, "learning_rate": 1.8648613920663958e-05, "loss": 0.5527, "step": 15871 }, { "epoch": 0.336620644312952, "grad_norm": 0.37381771206855774, "learning_rate": 1.8648446497623523e-05, "loss": 0.4645, "step": 15872 }, { "epoch": 0.33664185277088504, "grad_norm": 0.32898494601249695, "learning_rate": 1.864827906496436e-05, "loss": 0.5197, "step": 15873 }, { "epoch": 0.33666306122881806, "grad_norm": 0.43818700313568115, "learning_rate": 1.8648111622686658e-05, "loss": 0.5252, "step": 15874 }, { "epoch": 0.3366842696867511, "grad_norm": 0.5289339423179626, "learning_rate": 1.8647944170790596e-05, "loss": 0.5538, "step": 15875 }, { "epoch": 0.3367054781446841, "grad_norm": 0.38764625787734985, "learning_rate": 1.8647776709276373e-05, "loss": 0.5328, "step": 15876 }, { "epoch": 0.3367266866026171, "grad_norm": 0.37649860978126526, "learning_rate": 1.8647609238144166e-05, "loss": 0.5614, "step": 15877 }, { "epoch": 0.33674789506055014, "grad_norm": 0.39628151059150696, "learning_rate": 1.864744175739416e-05, "loss": 0.4842, "step": 15878 }, { "epoch": 0.33676910351848316, "grad_norm": 0.31599995493888855, "learning_rate": 1.8647274267026546e-05, "loss": 0.5427, "step": 15879 }, { "epoch": 0.3367903119764162, "grad_norm": 0.3189188539981842, "learning_rate": 1.8647106767041507e-05, "loss": 0.5072, "step": 15880 }, { "epoch": 0.3368115204343492, "grad_norm": 0.3322168290615082, "learning_rate": 1.8646939257439235e-05, "loss": 0.5373, "step": 15881 }, { "epoch": 0.3368327288922822, "grad_norm": 0.3019641041755676, "learning_rate": 1.864677173821991e-05, "loss": 0.4408, "step": 15882 }, { "epoch": 0.3368539373502153, "grad_norm": 0.32712048292160034, "learning_rate": 1.8646604209383722e-05, "loss": 0.5432, "step": 15883 }, { "epoch": 0.3368751458081483, "grad_norm": 0.33298397064208984, "learning_rate": 1.8646436670930854e-05, "loss": 0.539, "step": 15884 }, { "epoch": 0.33689635426608133, "grad_norm": 0.33555150032043457, "learning_rate": 1.8646269122861492e-05, "loss": 0.4722, "step": 15885 }, { "epoch": 0.33691756272401435, "grad_norm": 0.3078297972679138, "learning_rate": 1.864610156517583e-05, "loss": 0.464, "step": 15886 }, { "epoch": 0.33693877118194737, "grad_norm": 0.42780694365501404, "learning_rate": 1.8645933997874046e-05, "loss": 0.4812, "step": 15887 }, { "epoch": 0.3369599796398804, "grad_norm": 0.3213708996772766, "learning_rate": 1.8645766420956327e-05, "loss": 0.4143, "step": 15888 }, { "epoch": 0.3369811880978134, "grad_norm": 0.3577035367488861, "learning_rate": 1.8645598834422867e-05, "loss": 0.5575, "step": 15889 }, { "epoch": 0.33700239655574643, "grad_norm": 0.38659659028053284, "learning_rate": 1.8645431238273842e-05, "loss": 0.6107, "step": 15890 }, { "epoch": 0.33702360501367945, "grad_norm": 0.3818279504776001, "learning_rate": 1.8645263632509447e-05, "loss": 0.5839, "step": 15891 }, { "epoch": 0.33704481347161247, "grad_norm": 0.31913402676582336, "learning_rate": 1.8645096017129867e-05, "loss": 0.4642, "step": 15892 }, { "epoch": 0.3370660219295455, "grad_norm": 0.36771735548973083, "learning_rate": 1.864492839213528e-05, "loss": 0.5665, "step": 15893 }, { "epoch": 0.3370872303874785, "grad_norm": 0.30363473296165466, "learning_rate": 1.8644760757525883e-05, "loss": 0.4697, "step": 15894 }, { "epoch": 0.33710843884541153, "grad_norm": 0.3710314631462097, "learning_rate": 1.864459311330186e-05, "loss": 0.6016, "step": 15895 }, { "epoch": 0.33712964730334455, "grad_norm": 0.3130257725715637, "learning_rate": 1.864442545946339e-05, "loss": 0.5034, "step": 15896 }, { "epoch": 0.3371508557612776, "grad_norm": 0.3403584659099579, "learning_rate": 1.8644257796010667e-05, "loss": 0.5272, "step": 15897 }, { "epoch": 0.33717206421921064, "grad_norm": 0.3837134540081024, "learning_rate": 1.8644090122943877e-05, "loss": 0.546, "step": 15898 }, { "epoch": 0.33719327267714366, "grad_norm": 0.4245477020740509, "learning_rate": 1.8643922440263206e-05, "loss": 0.55, "step": 15899 }, { "epoch": 0.3372144811350767, "grad_norm": 0.36688509583473206, "learning_rate": 1.8643754747968836e-05, "loss": 0.5333, "step": 15900 }, { "epoch": 0.3372356895930097, "grad_norm": 0.3766104578971863, "learning_rate": 1.864358704606096e-05, "loss": 0.5969, "step": 15901 }, { "epoch": 0.3372568980509427, "grad_norm": 0.3726844787597656, "learning_rate": 1.864341933453976e-05, "loss": 0.5206, "step": 15902 }, { "epoch": 0.33727810650887574, "grad_norm": 0.4395492672920227, "learning_rate": 1.8643251613405425e-05, "loss": 0.4733, "step": 15903 }, { "epoch": 0.33729931496680876, "grad_norm": 0.3513907790184021, "learning_rate": 1.864308388265814e-05, "loss": 0.5416, "step": 15904 }, { "epoch": 0.3373205234247418, "grad_norm": 0.30332687497138977, "learning_rate": 1.8642916142298094e-05, "loss": 0.4602, "step": 15905 }, { "epoch": 0.3373417318826748, "grad_norm": 0.32360270619392395, "learning_rate": 1.864274839232547e-05, "loss": 0.4646, "step": 15906 }, { "epoch": 0.3373629403406078, "grad_norm": 0.3283666670322418, "learning_rate": 1.8642580632740458e-05, "loss": 0.4432, "step": 15907 }, { "epoch": 0.33738414879854084, "grad_norm": 0.34419432282447815, "learning_rate": 1.864241286354324e-05, "loss": 0.4967, "step": 15908 }, { "epoch": 0.33740535725647386, "grad_norm": 0.41755953431129456, "learning_rate": 1.8642245084734004e-05, "loss": 0.5122, "step": 15909 }, { "epoch": 0.3374265657144069, "grad_norm": 0.367394357919693, "learning_rate": 1.8642077296312944e-05, "loss": 0.5807, "step": 15910 }, { "epoch": 0.33744777417233995, "grad_norm": 0.3689134418964386, "learning_rate": 1.8641909498280236e-05, "loss": 0.5134, "step": 15911 }, { "epoch": 0.337468982630273, "grad_norm": 0.32255634665489197, "learning_rate": 1.8641741690636076e-05, "loss": 0.5266, "step": 15912 }, { "epoch": 0.337490191088206, "grad_norm": 0.34086382389068604, "learning_rate": 1.864157387338064e-05, "loss": 0.4868, "step": 15913 }, { "epoch": 0.337511399546139, "grad_norm": 0.2997492551803589, "learning_rate": 1.8641406046514127e-05, "loss": 0.4339, "step": 15914 }, { "epoch": 0.33753260800407203, "grad_norm": 0.36482924222946167, "learning_rate": 1.8641238210036714e-05, "loss": 0.5168, "step": 15915 }, { "epoch": 0.33755381646200505, "grad_norm": 0.37547796964645386, "learning_rate": 1.864107036394859e-05, "loss": 0.5197, "step": 15916 }, { "epoch": 0.33757502491993807, "grad_norm": 0.3610260784626007, "learning_rate": 1.864090250824994e-05, "loss": 0.4879, "step": 15917 }, { "epoch": 0.3375962333778711, "grad_norm": 0.3385845422744751, "learning_rate": 1.864073464294096e-05, "loss": 0.5157, "step": 15918 }, { "epoch": 0.3376174418358041, "grad_norm": 0.3679233193397522, "learning_rate": 1.8640566768021825e-05, "loss": 0.4729, "step": 15919 }, { "epoch": 0.33763865029373713, "grad_norm": 0.4616340100765228, "learning_rate": 1.8640398883492728e-05, "loss": 0.4085, "step": 15920 }, { "epoch": 0.33765985875167015, "grad_norm": 0.33132562041282654, "learning_rate": 1.864023098935386e-05, "loss": 0.4437, "step": 15921 }, { "epoch": 0.33768106720960317, "grad_norm": 0.36734339594841003, "learning_rate": 1.864006308560539e-05, "loss": 0.535, "step": 15922 }, { "epoch": 0.3377022756675362, "grad_norm": 0.3355112373828888, "learning_rate": 1.863989517224753e-05, "loss": 0.5796, "step": 15923 }, { "epoch": 0.33772348412546926, "grad_norm": 0.4246169328689575, "learning_rate": 1.8639727249280445e-05, "loss": 0.4922, "step": 15924 }, { "epoch": 0.3377446925834023, "grad_norm": 0.3470969498157501, "learning_rate": 1.8639559316704333e-05, "loss": 0.4843, "step": 15925 }, { "epoch": 0.3377659010413353, "grad_norm": 0.3462640345096588, "learning_rate": 1.863939137451938e-05, "loss": 0.518, "step": 15926 }, { "epoch": 0.3377871094992683, "grad_norm": 0.35228589177131653, "learning_rate": 1.8639223422725767e-05, "loss": 0.5301, "step": 15927 }, { "epoch": 0.33780831795720134, "grad_norm": 0.3613194227218628, "learning_rate": 1.8639055461323687e-05, "loss": 0.5442, "step": 15928 }, { "epoch": 0.33782952641513436, "grad_norm": 0.32664352655410767, "learning_rate": 1.8638887490313326e-05, "loss": 0.4833, "step": 15929 }, { "epoch": 0.3378507348730674, "grad_norm": 0.2897016704082489, "learning_rate": 1.863871950969487e-05, "loss": 0.4719, "step": 15930 }, { "epoch": 0.3378719433310004, "grad_norm": 0.3430297076702118, "learning_rate": 1.8638551519468502e-05, "loss": 0.4634, "step": 15931 }, { "epoch": 0.3378931517889334, "grad_norm": 0.428493469953537, "learning_rate": 1.8638383519634413e-05, "loss": 0.4245, "step": 15932 }, { "epoch": 0.33791436024686644, "grad_norm": 0.35677987337112427, "learning_rate": 1.863821551019279e-05, "loss": 0.4767, "step": 15933 }, { "epoch": 0.33793556870479946, "grad_norm": 0.40752825140953064, "learning_rate": 1.8638047491143817e-05, "loss": 0.5649, "step": 15934 }, { "epoch": 0.3379567771627325, "grad_norm": 0.35547390580177307, "learning_rate": 1.8637879462487683e-05, "loss": 0.4993, "step": 15935 }, { "epoch": 0.3379779856206655, "grad_norm": 0.33387118577957153, "learning_rate": 1.8637711424224572e-05, "loss": 0.6008, "step": 15936 }, { "epoch": 0.3379991940785985, "grad_norm": 0.3163522481918335, "learning_rate": 1.863754337635468e-05, "loss": 0.4529, "step": 15937 }, { "epoch": 0.3380204025365316, "grad_norm": 0.3407413065433502, "learning_rate": 1.8637375318878184e-05, "loss": 0.4856, "step": 15938 }, { "epoch": 0.3380416109944646, "grad_norm": 0.3562939167022705, "learning_rate": 1.8637207251795273e-05, "loss": 0.5403, "step": 15939 }, { "epoch": 0.33806281945239763, "grad_norm": 0.31898263096809387, "learning_rate": 1.8637039175106137e-05, "loss": 0.424, "step": 15940 }, { "epoch": 0.33808402791033065, "grad_norm": 0.4018423855304718, "learning_rate": 1.8636871088810956e-05, "loss": 0.5637, "step": 15941 }, { "epoch": 0.3381052363682637, "grad_norm": 0.33188802003860474, "learning_rate": 1.863670299290993e-05, "loss": 0.4747, "step": 15942 }, { "epoch": 0.3381264448261967, "grad_norm": 0.31962576508522034, "learning_rate": 1.8636534887403232e-05, "loss": 0.4722, "step": 15943 }, { "epoch": 0.3381476532841297, "grad_norm": 0.2824797034263611, "learning_rate": 1.863636677229106e-05, "loss": 0.4008, "step": 15944 }, { "epoch": 0.33816886174206273, "grad_norm": 0.4077494442462921, "learning_rate": 1.8636198647573592e-05, "loss": 0.5356, "step": 15945 }, { "epoch": 0.33819007019999575, "grad_norm": 0.34888461232185364, "learning_rate": 1.8636030513251017e-05, "loss": 0.568, "step": 15946 }, { "epoch": 0.33821127865792877, "grad_norm": 0.3192535936832428, "learning_rate": 1.8635862369323528e-05, "loss": 0.4917, "step": 15947 }, { "epoch": 0.3382324871158618, "grad_norm": 0.3382388949394226, "learning_rate": 1.8635694215791305e-05, "loss": 0.4581, "step": 15948 }, { "epoch": 0.3382536955737948, "grad_norm": 0.3611908257007599, "learning_rate": 1.863552605265454e-05, "loss": 0.516, "step": 15949 }, { "epoch": 0.33827490403172783, "grad_norm": 0.3469105660915375, "learning_rate": 1.8635357879913418e-05, "loss": 0.4755, "step": 15950 }, { "epoch": 0.33829611248966085, "grad_norm": 0.36089184880256653, "learning_rate": 1.8635189697568123e-05, "loss": 0.5391, "step": 15951 }, { "epoch": 0.3383173209475939, "grad_norm": 0.37862443923950195, "learning_rate": 1.863502150561885e-05, "loss": 0.5229, "step": 15952 }, { "epoch": 0.33833852940552694, "grad_norm": 0.3476252853870392, "learning_rate": 1.8634853304065775e-05, "loss": 0.5125, "step": 15953 }, { "epoch": 0.33835973786345996, "grad_norm": 0.3213273286819458, "learning_rate": 1.8634685092909098e-05, "loss": 0.4984, "step": 15954 }, { "epoch": 0.338380946321393, "grad_norm": 0.3532714247703552, "learning_rate": 1.863451687214899e-05, "loss": 0.539, "step": 15955 }, { "epoch": 0.338402154779326, "grad_norm": 0.40080755949020386, "learning_rate": 1.8634348641785656e-05, "loss": 0.4856, "step": 15956 }, { "epoch": 0.338423363237259, "grad_norm": 0.3460719883441925, "learning_rate": 1.863418040181927e-05, "loss": 0.5231, "step": 15957 }, { "epoch": 0.33844457169519204, "grad_norm": 0.341549813747406, "learning_rate": 1.863401215225003e-05, "loss": 0.4521, "step": 15958 }, { "epoch": 0.33846578015312506, "grad_norm": 0.3849910795688629, "learning_rate": 1.863384389307811e-05, "loss": 0.561, "step": 15959 }, { "epoch": 0.3384869886110581, "grad_norm": 0.3187967538833618, "learning_rate": 1.8633675624303705e-05, "loss": 0.434, "step": 15960 }, { "epoch": 0.3385081970689911, "grad_norm": 0.3148365020751953, "learning_rate": 1.8633507345927e-05, "loss": 0.4331, "step": 15961 }, { "epoch": 0.3385294055269241, "grad_norm": 0.3043437898159027, "learning_rate": 1.8633339057948187e-05, "loss": 0.4899, "step": 15962 }, { "epoch": 0.33855061398485714, "grad_norm": 0.32215607166290283, "learning_rate": 1.8633170760367448e-05, "loss": 0.5051, "step": 15963 }, { "epoch": 0.33857182244279016, "grad_norm": 0.5116187334060669, "learning_rate": 1.863300245318497e-05, "loss": 0.559, "step": 15964 }, { "epoch": 0.33859303090072324, "grad_norm": 0.34936246275901794, "learning_rate": 1.8632834136400943e-05, "loss": 0.5014, "step": 15965 }, { "epoch": 0.33861423935865625, "grad_norm": 0.2981308698654175, "learning_rate": 1.863266581001555e-05, "loss": 0.4999, "step": 15966 }, { "epoch": 0.3386354478165893, "grad_norm": 0.3681807518005371, "learning_rate": 1.8632497474028985e-05, "loss": 0.4886, "step": 15967 }, { "epoch": 0.3386566562745223, "grad_norm": 0.3503590226173401, "learning_rate": 1.8632329128441427e-05, "loss": 0.5438, "step": 15968 }, { "epoch": 0.3386778647324553, "grad_norm": 0.3329276144504547, "learning_rate": 1.8632160773253072e-05, "loss": 0.4779, "step": 15969 }, { "epoch": 0.33869907319038833, "grad_norm": 0.31343740224838257, "learning_rate": 1.86319924084641e-05, "loss": 0.4324, "step": 15970 }, { "epoch": 0.33872028164832135, "grad_norm": 0.38493362069129944, "learning_rate": 1.8631824034074705e-05, "loss": 0.445, "step": 15971 }, { "epoch": 0.3387414901062544, "grad_norm": 0.3164670765399933, "learning_rate": 1.8631655650085065e-05, "loss": 0.5002, "step": 15972 }, { "epoch": 0.3387626985641874, "grad_norm": 0.3197978138923645, "learning_rate": 1.8631487256495373e-05, "loss": 0.4287, "step": 15973 }, { "epoch": 0.3387839070221204, "grad_norm": 0.36684906482696533, "learning_rate": 1.863131885330582e-05, "loss": 0.4962, "step": 15974 }, { "epoch": 0.33880511548005343, "grad_norm": 0.34766826033592224, "learning_rate": 1.8631150440516586e-05, "loss": 0.518, "step": 15975 }, { "epoch": 0.33882632393798645, "grad_norm": 0.35279229283332825, "learning_rate": 1.8630982018127863e-05, "loss": 0.477, "step": 15976 }, { "epoch": 0.33884753239591947, "grad_norm": 0.33504021167755127, "learning_rate": 1.8630813586139834e-05, "loss": 0.5528, "step": 15977 }, { "epoch": 0.3388687408538525, "grad_norm": 0.35717177391052246, "learning_rate": 1.8630645144552692e-05, "loss": 0.6537, "step": 15978 }, { "epoch": 0.33888994931178557, "grad_norm": 0.31697770953178406, "learning_rate": 1.863047669336662e-05, "loss": 0.5245, "step": 15979 }, { "epoch": 0.3389111577697186, "grad_norm": 0.4528850018978119, "learning_rate": 1.8630308232581805e-05, "loss": 0.4607, "step": 15980 }, { "epoch": 0.3389323662276516, "grad_norm": 0.30750083923339844, "learning_rate": 1.863013976219844e-05, "loss": 0.4461, "step": 15981 }, { "epoch": 0.3389535746855846, "grad_norm": 0.352934330701828, "learning_rate": 1.8629971282216708e-05, "loss": 0.4955, "step": 15982 }, { "epoch": 0.33897478314351764, "grad_norm": 0.42613378167152405, "learning_rate": 1.8629802792636794e-05, "loss": 0.6611, "step": 15983 }, { "epoch": 0.33899599160145066, "grad_norm": 0.3291517198085785, "learning_rate": 1.8629634293458893e-05, "loss": 0.5362, "step": 15984 }, { "epoch": 0.3390172000593837, "grad_norm": 0.3768286108970642, "learning_rate": 1.8629465784683183e-05, "loss": 0.5564, "step": 15985 }, { "epoch": 0.3390384085173167, "grad_norm": 0.3423900902271271, "learning_rate": 1.862929726630986e-05, "loss": 0.5321, "step": 15986 }, { "epoch": 0.3390596169752497, "grad_norm": 0.32933709025382996, "learning_rate": 1.8629128738339105e-05, "loss": 0.5018, "step": 15987 }, { "epoch": 0.33908082543318274, "grad_norm": 0.3224600851535797, "learning_rate": 1.862896020077111e-05, "loss": 0.4675, "step": 15988 }, { "epoch": 0.33910203389111576, "grad_norm": 0.40796321630477905, "learning_rate": 1.8628791653606058e-05, "loss": 0.4779, "step": 15989 }, { "epoch": 0.3391232423490488, "grad_norm": 0.3210550844669342, "learning_rate": 1.862862309684414e-05, "loss": 0.5018, "step": 15990 }, { "epoch": 0.3391444508069818, "grad_norm": 0.3623600900173187, "learning_rate": 1.862845453048554e-05, "loss": 0.4927, "step": 15991 }, { "epoch": 0.3391656592649148, "grad_norm": 0.37403953075408936, "learning_rate": 1.8628285954530452e-05, "loss": 0.4911, "step": 15992 }, { "epoch": 0.3391868677228479, "grad_norm": 0.3185684084892273, "learning_rate": 1.8628117368979056e-05, "loss": 0.4506, "step": 15993 }, { "epoch": 0.3392080761807809, "grad_norm": 0.32693251967430115, "learning_rate": 1.8627948773831543e-05, "loss": 0.5834, "step": 15994 }, { "epoch": 0.33922928463871393, "grad_norm": 0.3562147319316864, "learning_rate": 1.8627780169088102e-05, "loss": 0.5341, "step": 15995 }, { "epoch": 0.33925049309664695, "grad_norm": 0.37914615869522095, "learning_rate": 1.8627611554748916e-05, "loss": 0.5056, "step": 15996 }, { "epoch": 0.33927170155458, "grad_norm": 0.3292858600616455, "learning_rate": 1.862744293081418e-05, "loss": 0.5526, "step": 15997 }, { "epoch": 0.339292910012513, "grad_norm": 0.49587127566337585, "learning_rate": 1.8627274297284074e-05, "loss": 0.5491, "step": 15998 }, { "epoch": 0.339314118470446, "grad_norm": 0.32619109749794006, "learning_rate": 1.8627105654158792e-05, "loss": 0.508, "step": 15999 }, { "epoch": 0.33933532692837903, "grad_norm": 0.36510780453681946, "learning_rate": 1.8626937001438513e-05, "loss": 0.4838, "step": 16000 }, { "epoch": 0.33935653538631205, "grad_norm": 0.38311025500297546, "learning_rate": 1.8626768339123432e-05, "loss": 0.5406, "step": 16001 }, { "epoch": 0.33937774384424507, "grad_norm": 0.2968701124191284, "learning_rate": 1.862659966721373e-05, "loss": 0.4408, "step": 16002 }, { "epoch": 0.3393989523021781, "grad_norm": 0.34343239665031433, "learning_rate": 1.8626430985709605e-05, "loss": 0.4902, "step": 16003 }, { "epoch": 0.3394201607601111, "grad_norm": 0.3079932928085327, "learning_rate": 1.8626262294611236e-05, "loss": 0.5041, "step": 16004 }, { "epoch": 0.33944136921804413, "grad_norm": 0.3647944927215576, "learning_rate": 1.8626093593918813e-05, "loss": 0.5085, "step": 16005 }, { "epoch": 0.3394625776759772, "grad_norm": 0.8983440399169922, "learning_rate": 1.8625924883632526e-05, "loss": 0.4686, "step": 16006 }, { "epoch": 0.3394837861339102, "grad_norm": 0.3692573010921478, "learning_rate": 1.8625756163752557e-05, "loss": 0.5178, "step": 16007 }, { "epoch": 0.33950499459184325, "grad_norm": 0.33368390798568726, "learning_rate": 1.86255874342791e-05, "loss": 0.5338, "step": 16008 }, { "epoch": 0.33952620304977627, "grad_norm": 0.37508314847946167, "learning_rate": 1.8625418695212336e-05, "loss": 0.4822, "step": 16009 }, { "epoch": 0.3395474115077093, "grad_norm": 0.3531281650066376, "learning_rate": 1.8625249946552457e-05, "loss": 0.5033, "step": 16010 }, { "epoch": 0.3395686199656423, "grad_norm": 0.32678332924842834, "learning_rate": 1.8625081188299653e-05, "loss": 0.523, "step": 16011 }, { "epoch": 0.3395898284235753, "grad_norm": 0.4404621124267578, "learning_rate": 1.8624912420454105e-05, "loss": 0.5375, "step": 16012 }, { "epoch": 0.33961103688150834, "grad_norm": 0.3494909405708313, "learning_rate": 1.8624743643016004e-05, "loss": 0.5158, "step": 16013 }, { "epoch": 0.33963224533944136, "grad_norm": 0.336273729801178, "learning_rate": 1.862457485598554e-05, "loss": 0.5558, "step": 16014 }, { "epoch": 0.3396534537973744, "grad_norm": 0.32410967350006104, "learning_rate": 1.8624406059362902e-05, "loss": 0.5508, "step": 16015 }, { "epoch": 0.3396746622553074, "grad_norm": 0.3348417282104492, "learning_rate": 1.8624237253148266e-05, "loss": 0.5318, "step": 16016 }, { "epoch": 0.3396958707132404, "grad_norm": 0.35561370849609375, "learning_rate": 1.8624068437341836e-05, "loss": 0.4772, "step": 16017 }, { "epoch": 0.33971707917117344, "grad_norm": 0.35677292943000793, "learning_rate": 1.8623899611943788e-05, "loss": 0.5107, "step": 16018 }, { "epoch": 0.33973828762910646, "grad_norm": 0.3637221157550812, "learning_rate": 1.8623730776954315e-05, "loss": 0.5578, "step": 16019 }, { "epoch": 0.33975949608703954, "grad_norm": 0.3582439422607422, "learning_rate": 1.8623561932373603e-05, "loss": 0.6155, "step": 16020 }, { "epoch": 0.33978070454497256, "grad_norm": 0.3748016655445099, "learning_rate": 1.8623393078201842e-05, "loss": 0.5455, "step": 16021 }, { "epoch": 0.3398019130029056, "grad_norm": 0.36528947949409485, "learning_rate": 1.862322421443922e-05, "loss": 0.5478, "step": 16022 }, { "epoch": 0.3398231214608386, "grad_norm": 0.32880619168281555, "learning_rate": 1.862305534108592e-05, "loss": 0.4955, "step": 16023 }, { "epoch": 0.3398443299187716, "grad_norm": 0.3760678172111511, "learning_rate": 1.862288645814213e-05, "loss": 0.5059, "step": 16024 }, { "epoch": 0.33986553837670463, "grad_norm": 0.37916454672813416, "learning_rate": 1.8622717565608043e-05, "loss": 0.5473, "step": 16025 }, { "epoch": 0.33988674683463765, "grad_norm": 0.3438335359096527, "learning_rate": 1.8622548663483843e-05, "loss": 0.5529, "step": 16026 }, { "epoch": 0.3399079552925707, "grad_norm": 0.3558514416217804, "learning_rate": 1.8622379751769722e-05, "loss": 0.5128, "step": 16027 }, { "epoch": 0.3399291637505037, "grad_norm": 0.35374772548675537, "learning_rate": 1.8622210830465867e-05, "loss": 0.4989, "step": 16028 }, { "epoch": 0.3399503722084367, "grad_norm": 0.3836172819137573, "learning_rate": 1.8622041899572457e-05, "loss": 0.5644, "step": 16029 }, { "epoch": 0.33997158066636973, "grad_norm": 0.3667658269405365, "learning_rate": 1.8621872959089696e-05, "loss": 0.5339, "step": 16030 }, { "epoch": 0.33999278912430275, "grad_norm": 0.3352007269859314, "learning_rate": 1.8621704009017756e-05, "loss": 0.5158, "step": 16031 }, { "epoch": 0.34001399758223577, "grad_norm": 0.32509845495224, "learning_rate": 1.8621535049356834e-05, "loss": 0.4764, "step": 16032 }, { "epoch": 0.34003520604016885, "grad_norm": 0.3840211033821106, "learning_rate": 1.8621366080107112e-05, "loss": 0.4576, "step": 16033 }, { "epoch": 0.34005641449810187, "grad_norm": 0.3968386650085449, "learning_rate": 1.8621197101268785e-05, "loss": 0.4484, "step": 16034 }, { "epoch": 0.3400776229560349, "grad_norm": 0.37326908111572266, "learning_rate": 1.8621028112842038e-05, "loss": 0.5475, "step": 16035 }, { "epoch": 0.3400988314139679, "grad_norm": 0.3556537330150604, "learning_rate": 1.862085911482706e-05, "loss": 0.5852, "step": 16036 }, { "epoch": 0.3401200398719009, "grad_norm": 0.6448017358779907, "learning_rate": 1.8620690107224036e-05, "loss": 0.6045, "step": 16037 }, { "epoch": 0.34014124832983395, "grad_norm": 0.3581996560096741, "learning_rate": 1.862052109003315e-05, "loss": 0.5837, "step": 16038 }, { "epoch": 0.34016245678776696, "grad_norm": 0.3293403089046478, "learning_rate": 1.8620352063254602e-05, "loss": 0.4706, "step": 16039 }, { "epoch": 0.3401836652457, "grad_norm": 0.34128662943840027, "learning_rate": 1.862018302688857e-05, "loss": 0.5125, "step": 16040 }, { "epoch": 0.340204873703633, "grad_norm": 0.3493114113807678, "learning_rate": 1.8620013980935246e-05, "loss": 0.4057, "step": 16041 }, { "epoch": 0.340226082161566, "grad_norm": 0.3344544470310211, "learning_rate": 1.861984492539482e-05, "loss": 0.5468, "step": 16042 }, { "epoch": 0.34024729061949904, "grad_norm": 0.32733747363090515, "learning_rate": 1.8619675860267473e-05, "loss": 0.5528, "step": 16043 }, { "epoch": 0.34026849907743206, "grad_norm": 0.33577287197113037, "learning_rate": 1.86195067855534e-05, "loss": 0.523, "step": 16044 }, { "epoch": 0.3402897075353651, "grad_norm": 0.34496697783470154, "learning_rate": 1.8619337701252788e-05, "loss": 0.5555, "step": 16045 }, { "epoch": 0.3403109159932981, "grad_norm": 0.3612270951271057, "learning_rate": 1.861916860736582e-05, "loss": 0.5256, "step": 16046 }, { "epoch": 0.3403321244512312, "grad_norm": 0.341758668422699, "learning_rate": 1.861899950389269e-05, "loss": 0.5601, "step": 16047 }, { "epoch": 0.3403533329091642, "grad_norm": 0.3535599112510681, "learning_rate": 1.8618830390833583e-05, "loss": 0.552, "step": 16048 }, { "epoch": 0.3403745413670972, "grad_norm": 0.3415224254131317, "learning_rate": 1.8618661268188685e-05, "loss": 0.5227, "step": 16049 }, { "epoch": 0.34039574982503024, "grad_norm": 0.3558695614337921, "learning_rate": 1.861849213595819e-05, "loss": 0.6232, "step": 16050 }, { "epoch": 0.34041695828296326, "grad_norm": 0.558341383934021, "learning_rate": 1.861832299414228e-05, "loss": 0.5621, "step": 16051 }, { "epoch": 0.3404381667408963, "grad_norm": 0.31879982352256775, "learning_rate": 1.861815384274115e-05, "loss": 0.5465, "step": 16052 }, { "epoch": 0.3404593751988293, "grad_norm": 0.3501763641834259, "learning_rate": 1.8617984681754983e-05, "loss": 0.5103, "step": 16053 }, { "epoch": 0.3404805836567623, "grad_norm": 0.317920982837677, "learning_rate": 1.8617815511183964e-05, "loss": 0.4681, "step": 16054 }, { "epoch": 0.34050179211469533, "grad_norm": 0.3190000653266907, "learning_rate": 1.861764633102829e-05, "loss": 0.5426, "step": 16055 }, { "epoch": 0.34052300057262835, "grad_norm": 0.419537216424942, "learning_rate": 1.861747714128814e-05, "loss": 0.6031, "step": 16056 }, { "epoch": 0.3405442090305614, "grad_norm": 1.0091021060943604, "learning_rate": 1.8617307941963712e-05, "loss": 0.5124, "step": 16057 }, { "epoch": 0.3405654174884944, "grad_norm": 0.36243775486946106, "learning_rate": 1.861713873305519e-05, "loss": 0.5793, "step": 16058 }, { "epoch": 0.3405866259464274, "grad_norm": 0.3958154618740082, "learning_rate": 1.861696951456276e-05, "loss": 0.4038, "step": 16059 }, { "epoch": 0.34060783440436043, "grad_norm": 0.406933456659317, "learning_rate": 1.861680028648661e-05, "loss": 0.5227, "step": 16060 }, { "epoch": 0.3406290428622935, "grad_norm": 0.5894989371299744, "learning_rate": 1.8616631048826925e-05, "loss": 0.5001, "step": 16061 }, { "epoch": 0.3406502513202265, "grad_norm": 0.30866560339927673, "learning_rate": 1.8616461801583904e-05, "loss": 0.5161, "step": 16062 }, { "epoch": 0.34067145977815955, "grad_norm": 0.3605561852455139, "learning_rate": 1.861629254475773e-05, "loss": 0.5221, "step": 16063 }, { "epoch": 0.34069266823609257, "grad_norm": 0.32011091709136963, "learning_rate": 1.8616123278348585e-05, "loss": 0.5263, "step": 16064 }, { "epoch": 0.3407138766940256, "grad_norm": 0.346761554479599, "learning_rate": 1.8615954002356663e-05, "loss": 0.53, "step": 16065 }, { "epoch": 0.3407350851519586, "grad_norm": 0.31311964988708496, "learning_rate": 1.8615784716782155e-05, "loss": 0.501, "step": 16066 }, { "epoch": 0.3407562936098916, "grad_norm": 0.3363652229309082, "learning_rate": 1.8615615421625245e-05, "loss": 0.5367, "step": 16067 }, { "epoch": 0.34077750206782464, "grad_norm": 0.36290523409843445, "learning_rate": 1.8615446116886122e-05, "loss": 0.469, "step": 16068 }, { "epoch": 0.34079871052575766, "grad_norm": 0.5898116827011108, "learning_rate": 1.8615276802564977e-05, "loss": 0.5329, "step": 16069 }, { "epoch": 0.3408199189836907, "grad_norm": 0.3577705919742584, "learning_rate": 1.8615107478661994e-05, "loss": 0.4764, "step": 16070 }, { "epoch": 0.3408411274416237, "grad_norm": 0.33005020022392273, "learning_rate": 1.8614938145177362e-05, "loss": 0.4787, "step": 16071 }, { "epoch": 0.3408623358995567, "grad_norm": 0.36886805295944214, "learning_rate": 1.8614768802111272e-05, "loss": 0.5516, "step": 16072 }, { "epoch": 0.34088354435748974, "grad_norm": 0.340236634016037, "learning_rate": 1.8614599449463912e-05, "loss": 0.4689, "step": 16073 }, { "epoch": 0.3409047528154228, "grad_norm": 0.35500216484069824, "learning_rate": 1.8614430087235467e-05, "loss": 0.4928, "step": 16074 }, { "epoch": 0.34092596127335584, "grad_norm": 0.357657790184021, "learning_rate": 1.8614260715426128e-05, "loss": 0.5331, "step": 16075 }, { "epoch": 0.34094716973128886, "grad_norm": 0.34306105971336365, "learning_rate": 1.8614091334036083e-05, "loss": 0.5073, "step": 16076 }, { "epoch": 0.3409683781892219, "grad_norm": 0.3572175204753876, "learning_rate": 1.8613921943065523e-05, "loss": 0.5825, "step": 16077 }, { "epoch": 0.3409895866471549, "grad_norm": 0.3925155997276306, "learning_rate": 1.8613752542514635e-05, "loss": 0.4866, "step": 16078 }, { "epoch": 0.3410107951050879, "grad_norm": 0.3521174490451813, "learning_rate": 1.86135831323836e-05, "loss": 0.4285, "step": 16079 }, { "epoch": 0.34103200356302094, "grad_norm": 0.3671831488609314, "learning_rate": 1.8613413712672616e-05, "loss": 0.5691, "step": 16080 }, { "epoch": 0.34105321202095396, "grad_norm": 0.3466515839099884, "learning_rate": 1.861324428338187e-05, "loss": 0.4772, "step": 16081 }, { "epoch": 0.341074420478887, "grad_norm": 0.3313645124435425, "learning_rate": 1.8613074844511546e-05, "loss": 0.5012, "step": 16082 }, { "epoch": 0.34109562893682, "grad_norm": 0.3326382339000702, "learning_rate": 1.8612905396061835e-05, "loss": 0.6135, "step": 16083 }, { "epoch": 0.341116837394753, "grad_norm": 0.3864630162715912, "learning_rate": 1.8612735938032927e-05, "loss": 0.4847, "step": 16084 }, { "epoch": 0.34113804585268603, "grad_norm": 0.3500828146934509, "learning_rate": 1.8612566470425005e-05, "loss": 0.441, "step": 16085 }, { "epoch": 0.34115925431061905, "grad_norm": 0.2805517911911011, "learning_rate": 1.8612396993238268e-05, "loss": 0.4232, "step": 16086 }, { "epoch": 0.3411804627685521, "grad_norm": 0.35397711396217346, "learning_rate": 1.8612227506472897e-05, "loss": 0.4993, "step": 16087 }, { "epoch": 0.34120167122648515, "grad_norm": 0.3662963807582855, "learning_rate": 1.8612058010129077e-05, "loss": 0.5263, "step": 16088 }, { "epoch": 0.34122287968441817, "grad_norm": 0.3483835458755493, "learning_rate": 1.8611888504207e-05, "loss": 0.4645, "step": 16089 }, { "epoch": 0.3412440881423512, "grad_norm": 0.29811784625053406, "learning_rate": 1.861171898870686e-05, "loss": 0.4966, "step": 16090 }, { "epoch": 0.3412652966002842, "grad_norm": 0.328332781791687, "learning_rate": 1.8611549463628837e-05, "loss": 0.4631, "step": 16091 }, { "epoch": 0.3412865050582172, "grad_norm": 0.41856956481933594, "learning_rate": 1.8611379928973128e-05, "loss": 0.6258, "step": 16092 }, { "epoch": 0.34130771351615025, "grad_norm": 0.37275204062461853, "learning_rate": 1.8611210384739915e-05, "loss": 0.4817, "step": 16093 }, { "epoch": 0.34132892197408327, "grad_norm": 0.3264578580856323, "learning_rate": 1.8611040830929385e-05, "loss": 0.487, "step": 16094 }, { "epoch": 0.3413501304320163, "grad_norm": 0.30360957980155945, "learning_rate": 1.8610871267541735e-05, "loss": 0.5107, "step": 16095 }, { "epoch": 0.3413713388899493, "grad_norm": 0.46111422777175903, "learning_rate": 1.8610701694577148e-05, "loss": 0.4985, "step": 16096 }, { "epoch": 0.3413925473478823, "grad_norm": 1.498536467552185, "learning_rate": 1.861053211203581e-05, "loss": 0.5414, "step": 16097 }, { "epoch": 0.34141375580581534, "grad_norm": 0.3613855540752411, "learning_rate": 1.8610362519917913e-05, "loss": 0.5349, "step": 16098 }, { "epoch": 0.34143496426374836, "grad_norm": 0.31256335973739624, "learning_rate": 1.8610192918223652e-05, "loss": 0.509, "step": 16099 }, { "epoch": 0.3414561727216814, "grad_norm": 0.3511403799057007, "learning_rate": 1.8610023306953206e-05, "loss": 0.5352, "step": 16100 }, { "epoch": 0.3414773811796144, "grad_norm": 0.3907538652420044, "learning_rate": 1.8609853686106768e-05, "loss": 0.5271, "step": 16101 }, { "epoch": 0.3414985896375475, "grad_norm": 0.3361329734325409, "learning_rate": 1.860968405568452e-05, "loss": 0.503, "step": 16102 }, { "epoch": 0.3415197980954805, "grad_norm": 0.32967010140419006, "learning_rate": 1.8609514415686664e-05, "loss": 0.4257, "step": 16103 }, { "epoch": 0.3415410065534135, "grad_norm": 0.3113279938697815, "learning_rate": 1.8609344766113374e-05, "loss": 0.5329, "step": 16104 }, { "epoch": 0.34156221501134654, "grad_norm": 0.3337821364402771, "learning_rate": 1.8609175106964848e-05, "loss": 0.3923, "step": 16105 }, { "epoch": 0.34158342346927956, "grad_norm": 0.38808098435401917, "learning_rate": 1.8609005438241272e-05, "loss": 0.5901, "step": 16106 }, { "epoch": 0.3416046319272126, "grad_norm": 0.35154154896736145, "learning_rate": 1.860883575994284e-05, "loss": 0.5166, "step": 16107 }, { "epoch": 0.3416258403851456, "grad_norm": 0.3554617166519165, "learning_rate": 1.8608666072069728e-05, "loss": 0.5147, "step": 16108 }, { "epoch": 0.3416470488430786, "grad_norm": 0.3284420669078827, "learning_rate": 1.8608496374622133e-05, "loss": 0.4954, "step": 16109 }, { "epoch": 0.34166825730101164, "grad_norm": 0.32875919342041016, "learning_rate": 1.860832666760025e-05, "loss": 0.5249, "step": 16110 }, { "epoch": 0.34168946575894465, "grad_norm": 0.45320284366607666, "learning_rate": 1.8608156951004255e-05, "loss": 0.5921, "step": 16111 }, { "epoch": 0.3417106742168777, "grad_norm": 0.35636216402053833, "learning_rate": 1.860798722483434e-05, "loss": 0.5937, "step": 16112 }, { "epoch": 0.3417318826748107, "grad_norm": 0.3815547227859497, "learning_rate": 1.8607817489090703e-05, "loss": 0.5437, "step": 16113 }, { "epoch": 0.3417530911327437, "grad_norm": 0.3862484097480774, "learning_rate": 1.8607647743773525e-05, "loss": 0.453, "step": 16114 }, { "epoch": 0.3417742995906768, "grad_norm": 0.30758652091026306, "learning_rate": 1.8607477988882995e-05, "loss": 0.5706, "step": 16115 }, { "epoch": 0.3417955080486098, "grad_norm": 0.31097763776779175, "learning_rate": 1.86073082244193e-05, "loss": 0.4406, "step": 16116 }, { "epoch": 0.34181671650654283, "grad_norm": 0.3489823341369629, "learning_rate": 1.8607138450382635e-05, "loss": 0.5088, "step": 16117 }, { "epoch": 0.34183792496447585, "grad_norm": 0.4820445775985718, "learning_rate": 1.8606968666773185e-05, "loss": 0.5494, "step": 16118 }, { "epoch": 0.34185913342240887, "grad_norm": 0.5070381164550781, "learning_rate": 1.8606798873591137e-05, "loss": 0.5293, "step": 16119 }, { "epoch": 0.3418803418803419, "grad_norm": 0.3071277439594269, "learning_rate": 1.8606629070836683e-05, "loss": 0.4896, "step": 16120 }, { "epoch": 0.3419015503382749, "grad_norm": 0.3719393014907837, "learning_rate": 1.8606459258510014e-05, "loss": 0.5396, "step": 16121 }, { "epoch": 0.3419227587962079, "grad_norm": 0.3375779688358307, "learning_rate": 1.8606289436611313e-05, "loss": 0.6129, "step": 16122 }, { "epoch": 0.34194396725414095, "grad_norm": 0.31201937794685364, "learning_rate": 1.8606119605140775e-05, "loss": 0.5045, "step": 16123 }, { "epoch": 0.34196517571207397, "grad_norm": 0.40369996428489685, "learning_rate": 1.8605949764098577e-05, "loss": 0.5198, "step": 16124 }, { "epoch": 0.341986384170007, "grad_norm": 0.343045711517334, "learning_rate": 1.8605779913484925e-05, "loss": 0.5035, "step": 16125 }, { "epoch": 0.34200759262794, "grad_norm": 0.343231737613678, "learning_rate": 1.8605610053299993e-05, "loss": 0.4797, "step": 16126 }, { "epoch": 0.342028801085873, "grad_norm": 0.45026925206184387, "learning_rate": 1.8605440183543982e-05, "loss": 0.561, "step": 16127 }, { "epoch": 0.34205000954380604, "grad_norm": 0.36649149656295776, "learning_rate": 1.8605270304217074e-05, "loss": 0.5032, "step": 16128 }, { "epoch": 0.3420712180017391, "grad_norm": 0.34852656722068787, "learning_rate": 1.8605100415319457e-05, "loss": 0.5655, "step": 16129 }, { "epoch": 0.34209242645967214, "grad_norm": 0.35990026593208313, "learning_rate": 1.8604930516851323e-05, "loss": 0.6165, "step": 16130 }, { "epoch": 0.34211363491760516, "grad_norm": 0.343305379152298, "learning_rate": 1.8604760608812858e-05, "loss": 0.5285, "step": 16131 }, { "epoch": 0.3421348433755382, "grad_norm": 0.3936571478843689, "learning_rate": 1.8604590691204256e-05, "loss": 0.519, "step": 16132 }, { "epoch": 0.3421560518334712, "grad_norm": 0.3279413878917694, "learning_rate": 1.8604420764025703e-05, "loss": 0.434, "step": 16133 }, { "epoch": 0.3421772602914042, "grad_norm": 0.3566490411758423, "learning_rate": 1.8604250827277385e-05, "loss": 0.4392, "step": 16134 }, { "epoch": 0.34219846874933724, "grad_norm": 0.3160434663295746, "learning_rate": 1.86040808809595e-05, "loss": 0.5921, "step": 16135 }, { "epoch": 0.34221967720727026, "grad_norm": 0.3471141457557678, "learning_rate": 1.8603910925072224e-05, "loss": 0.4996, "step": 16136 }, { "epoch": 0.3422408856652033, "grad_norm": 0.7843260765075684, "learning_rate": 1.8603740959615754e-05, "loss": 0.5194, "step": 16137 }, { "epoch": 0.3422620941231363, "grad_norm": 0.33249422907829285, "learning_rate": 1.8603570984590282e-05, "loss": 0.4689, "step": 16138 }, { "epoch": 0.3422833025810693, "grad_norm": 0.40011587738990784, "learning_rate": 1.860340099999599e-05, "loss": 0.527, "step": 16139 }, { "epoch": 0.34230451103900233, "grad_norm": 0.3698779344558716, "learning_rate": 1.8603231005833072e-05, "loss": 0.4932, "step": 16140 }, { "epoch": 0.34232571949693535, "grad_norm": 0.31923002004623413, "learning_rate": 1.8603061002101714e-05, "loss": 0.492, "step": 16141 }, { "epoch": 0.3423469279548684, "grad_norm": 0.36269888281822205, "learning_rate": 1.8602890988802106e-05, "loss": 0.5523, "step": 16142 }, { "epoch": 0.34236813641280145, "grad_norm": 0.32945114374160767, "learning_rate": 1.8602720965934437e-05, "loss": 0.4763, "step": 16143 }, { "epoch": 0.34238934487073447, "grad_norm": 0.44236087799072266, "learning_rate": 1.86025509334989e-05, "loss": 0.5127, "step": 16144 }, { "epoch": 0.3424105533286675, "grad_norm": 0.38466498255729675, "learning_rate": 1.8602380891495677e-05, "loss": 0.4771, "step": 16145 }, { "epoch": 0.3424317617866005, "grad_norm": 0.35012421011924744, "learning_rate": 1.8602210839924962e-05, "loss": 0.5587, "step": 16146 }, { "epoch": 0.3424529702445335, "grad_norm": 0.35535964369773865, "learning_rate": 1.8602040778786942e-05, "loss": 0.6009, "step": 16147 }, { "epoch": 0.34247417870246655, "grad_norm": 0.466436505317688, "learning_rate": 1.8601870708081807e-05, "loss": 0.4901, "step": 16148 }, { "epoch": 0.34249538716039957, "grad_norm": 0.3325722813606262, "learning_rate": 1.860170062780975e-05, "loss": 0.5205, "step": 16149 }, { "epoch": 0.3425165956183326, "grad_norm": 0.33897167444229126, "learning_rate": 1.860153053797095e-05, "loss": 0.5091, "step": 16150 }, { "epoch": 0.3425378040762656, "grad_norm": 0.37960949540138245, "learning_rate": 1.8601360438565605e-05, "loss": 0.4827, "step": 16151 }, { "epoch": 0.3425590125341986, "grad_norm": 0.32433345913887024, "learning_rate": 1.8601190329593906e-05, "loss": 0.5293, "step": 16152 }, { "epoch": 0.34258022099213165, "grad_norm": 0.342957079410553, "learning_rate": 1.860102021105603e-05, "loss": 0.5282, "step": 16153 }, { "epoch": 0.34260142945006467, "grad_norm": 0.4359630048274994, "learning_rate": 1.860085008295218e-05, "loss": 0.5272, "step": 16154 }, { "epoch": 0.3426226379079977, "grad_norm": 0.4449823200702667, "learning_rate": 1.8600679945282537e-05, "loss": 0.5936, "step": 16155 }, { "epoch": 0.34264384636593076, "grad_norm": 0.39373520016670227, "learning_rate": 1.8600509798047294e-05, "loss": 0.5152, "step": 16156 }, { "epoch": 0.3426650548238638, "grad_norm": 0.3613375723361969, "learning_rate": 1.8600339641246634e-05, "loss": 0.52, "step": 16157 }, { "epoch": 0.3426862632817968, "grad_norm": 0.353406697511673, "learning_rate": 1.8600169474880755e-05, "loss": 0.5661, "step": 16158 }, { "epoch": 0.3427074717397298, "grad_norm": 0.35013309121131897, "learning_rate": 1.8599999298949842e-05, "loss": 0.5329, "step": 16159 }, { "epoch": 0.34272868019766284, "grad_norm": 0.3804348409175873, "learning_rate": 1.8599829113454087e-05, "loss": 0.5625, "step": 16160 }, { "epoch": 0.34274988865559586, "grad_norm": 0.3513917028903961, "learning_rate": 1.8599658918393673e-05, "loss": 0.5126, "step": 16161 }, { "epoch": 0.3427710971135289, "grad_norm": 0.3160937428474426, "learning_rate": 1.8599488713768794e-05, "loss": 0.4837, "step": 16162 }, { "epoch": 0.3427923055714619, "grad_norm": 0.34566202759742737, "learning_rate": 1.859931849957964e-05, "loss": 0.4549, "step": 16163 }, { "epoch": 0.3428135140293949, "grad_norm": 0.3544215261936188, "learning_rate": 1.8599148275826394e-05, "loss": 0.5896, "step": 16164 }, { "epoch": 0.34283472248732794, "grad_norm": 0.328635573387146, "learning_rate": 1.8598978042509252e-05, "loss": 0.4617, "step": 16165 }, { "epoch": 0.34285593094526096, "grad_norm": 0.381370484828949, "learning_rate": 1.85988077996284e-05, "loss": 0.4651, "step": 16166 }, { "epoch": 0.342877139403194, "grad_norm": 0.32524141669273376, "learning_rate": 1.8598637547184032e-05, "loss": 0.5132, "step": 16167 }, { "epoch": 0.342898347861127, "grad_norm": 0.3148390054702759, "learning_rate": 1.8598467285176337e-05, "loss": 0.496, "step": 16168 }, { "epoch": 0.34291955631906, "grad_norm": 0.33537837862968445, "learning_rate": 1.8598297013605497e-05, "loss": 0.4489, "step": 16169 }, { "epoch": 0.3429407647769931, "grad_norm": 0.3145514130592346, "learning_rate": 1.8598126732471706e-05, "loss": 0.5278, "step": 16170 }, { "epoch": 0.3429619732349261, "grad_norm": 0.3385774791240692, "learning_rate": 1.859795644177515e-05, "loss": 0.5553, "step": 16171 }, { "epoch": 0.34298318169285913, "grad_norm": 0.42688778042793274, "learning_rate": 1.8597786141516026e-05, "loss": 0.5606, "step": 16172 }, { "epoch": 0.34300439015079215, "grad_norm": 0.34633269906044006, "learning_rate": 1.8597615831694516e-05, "loss": 0.4526, "step": 16173 }, { "epoch": 0.34302559860872517, "grad_norm": 0.2953563332557678, "learning_rate": 1.8597445512310818e-05, "loss": 0.4617, "step": 16174 }, { "epoch": 0.3430468070666582, "grad_norm": 0.3996964991092682, "learning_rate": 1.859727518336511e-05, "loss": 0.4134, "step": 16175 }, { "epoch": 0.3430680155245912, "grad_norm": 0.4253106713294983, "learning_rate": 1.859710484485759e-05, "loss": 0.5211, "step": 16176 }, { "epoch": 0.3430892239825242, "grad_norm": 0.3410726487636566, "learning_rate": 1.859693449678844e-05, "loss": 0.5264, "step": 16177 }, { "epoch": 0.34311043244045725, "grad_norm": 0.33521366119384766, "learning_rate": 1.859676413915786e-05, "loss": 0.5269, "step": 16178 }, { "epoch": 0.34313164089839027, "grad_norm": 0.35234901309013367, "learning_rate": 1.8596593771966033e-05, "loss": 0.5324, "step": 16179 }, { "epoch": 0.3431528493563233, "grad_norm": 0.3918198049068451, "learning_rate": 1.859642339521315e-05, "loss": 0.5321, "step": 16180 }, { "epoch": 0.3431740578142563, "grad_norm": 0.33473873138427734, "learning_rate": 1.8596253008899394e-05, "loss": 0.4253, "step": 16181 }, { "epoch": 0.3431952662721893, "grad_norm": 0.46871018409729004, "learning_rate": 1.8596082613024965e-05, "loss": 0.6361, "step": 16182 }, { "epoch": 0.34321647473012235, "grad_norm": 0.353575736284256, "learning_rate": 1.8595912207590044e-05, "loss": 0.5119, "step": 16183 }, { "epoch": 0.3432376831880554, "grad_norm": 0.45088931918144226, "learning_rate": 1.8595741792594825e-05, "loss": 0.5116, "step": 16184 }, { "epoch": 0.34325889164598844, "grad_norm": 0.3326956033706665, "learning_rate": 1.8595571368039498e-05, "loss": 0.483, "step": 16185 }, { "epoch": 0.34328010010392146, "grad_norm": 0.33314400911331177, "learning_rate": 1.859540093392425e-05, "loss": 0.5021, "step": 16186 }, { "epoch": 0.3433013085618545, "grad_norm": 0.3623732328414917, "learning_rate": 1.8595230490249273e-05, "loss": 0.5749, "step": 16187 }, { "epoch": 0.3433225170197875, "grad_norm": 0.3300328552722931, "learning_rate": 1.8595060037014755e-05, "loss": 0.4742, "step": 16188 }, { "epoch": 0.3433437254777205, "grad_norm": 0.3422521948814392, "learning_rate": 1.8594889574220888e-05, "loss": 0.5041, "step": 16189 }, { "epoch": 0.34336493393565354, "grad_norm": 0.34251299500465393, "learning_rate": 1.8594719101867854e-05, "loss": 0.5102, "step": 16190 }, { "epoch": 0.34338614239358656, "grad_norm": 0.3555324673652649, "learning_rate": 1.8594548619955853e-05, "loss": 0.5053, "step": 16191 }, { "epoch": 0.3434073508515196, "grad_norm": 0.4423947334289551, "learning_rate": 1.859437812848507e-05, "loss": 0.6068, "step": 16192 }, { "epoch": 0.3434285593094526, "grad_norm": 0.41717463731765747, "learning_rate": 1.8594207627455693e-05, "loss": 0.6132, "step": 16193 }, { "epoch": 0.3434497677673856, "grad_norm": 0.322690486907959, "learning_rate": 1.859403711686791e-05, "loss": 0.4785, "step": 16194 }, { "epoch": 0.34347097622531864, "grad_norm": 0.32796192169189453, "learning_rate": 1.8593866596721913e-05, "loss": 0.5625, "step": 16195 }, { "epoch": 0.34349218468325166, "grad_norm": 0.35689568519592285, "learning_rate": 1.8593696067017898e-05, "loss": 0.4597, "step": 16196 }, { "epoch": 0.34351339314118473, "grad_norm": 0.36683446168899536, "learning_rate": 1.8593525527756044e-05, "loss": 0.5706, "step": 16197 }, { "epoch": 0.34353460159911775, "grad_norm": 0.3709820806980133, "learning_rate": 1.8593354978936548e-05, "loss": 0.4931, "step": 16198 }, { "epoch": 0.34355581005705077, "grad_norm": 0.31621384620666504, "learning_rate": 1.8593184420559596e-05, "loss": 0.4521, "step": 16199 }, { "epoch": 0.3435770185149838, "grad_norm": 0.3472774624824524, "learning_rate": 1.8593013852625383e-05, "loss": 0.5702, "step": 16200 }, { "epoch": 0.3435982269729168, "grad_norm": 0.36905574798583984, "learning_rate": 1.859284327513409e-05, "loss": 0.5333, "step": 16201 }, { "epoch": 0.34361943543084983, "grad_norm": 0.41780102252960205, "learning_rate": 1.859267268808591e-05, "loss": 0.463, "step": 16202 }, { "epoch": 0.34364064388878285, "grad_norm": 0.34148988127708435, "learning_rate": 1.8592502091481036e-05, "loss": 0.5277, "step": 16203 }, { "epoch": 0.34366185234671587, "grad_norm": 0.34270647168159485, "learning_rate": 1.859233148531966e-05, "loss": 0.504, "step": 16204 }, { "epoch": 0.3436830608046489, "grad_norm": 0.3242352604866028, "learning_rate": 1.8592160869601962e-05, "loss": 0.5676, "step": 16205 }, { "epoch": 0.3437042692625819, "grad_norm": 0.32844841480255127, "learning_rate": 1.859199024432814e-05, "loss": 0.4755, "step": 16206 }, { "epoch": 0.3437254777205149, "grad_norm": 0.3408708870410919, "learning_rate": 1.859181960949838e-05, "loss": 0.4864, "step": 16207 }, { "epoch": 0.34374668617844795, "grad_norm": 0.4302821755409241, "learning_rate": 1.8591648965112875e-05, "loss": 0.4995, "step": 16208 }, { "epoch": 0.34376789463638097, "grad_norm": 0.40295690298080444, "learning_rate": 1.859147831117181e-05, "loss": 0.6356, "step": 16209 }, { "epoch": 0.343789103094314, "grad_norm": 0.3421338200569153, "learning_rate": 1.859130764767538e-05, "loss": 0.5012, "step": 16210 }, { "epoch": 0.34381031155224706, "grad_norm": 0.6557652354240417, "learning_rate": 1.859113697462377e-05, "loss": 0.5202, "step": 16211 }, { "epoch": 0.3438315200101801, "grad_norm": 0.3845294117927551, "learning_rate": 1.8590966292017172e-05, "loss": 0.5246, "step": 16212 }, { "epoch": 0.3438527284681131, "grad_norm": 0.3382973372936249, "learning_rate": 1.859079559985578e-05, "loss": 0.561, "step": 16213 }, { "epoch": 0.3438739369260461, "grad_norm": 0.3615536391735077, "learning_rate": 1.8590624898139775e-05, "loss": 0.5005, "step": 16214 }, { "epoch": 0.34389514538397914, "grad_norm": 0.3925306797027588, "learning_rate": 1.859045418686935e-05, "loss": 0.4839, "step": 16215 }, { "epoch": 0.34391635384191216, "grad_norm": 0.3360649347305298, "learning_rate": 1.85902834660447e-05, "loss": 0.5993, "step": 16216 }, { "epoch": 0.3439375622998452, "grad_norm": 0.3571457266807556, "learning_rate": 1.8590112735666014e-05, "loss": 0.4695, "step": 16217 }, { "epoch": 0.3439587707577782, "grad_norm": 0.34811991453170776, "learning_rate": 1.8589941995733473e-05, "loss": 0.5576, "step": 16218 }, { "epoch": 0.3439799792157112, "grad_norm": 0.31117329001426697, "learning_rate": 1.858977124624728e-05, "loss": 0.5344, "step": 16219 }, { "epoch": 0.34400118767364424, "grad_norm": 0.2938377857208252, "learning_rate": 1.8589600487207614e-05, "loss": 0.489, "step": 16220 }, { "epoch": 0.34402239613157726, "grad_norm": 0.33116012811660767, "learning_rate": 1.858942971861467e-05, "loss": 0.5554, "step": 16221 }, { "epoch": 0.3440436045895103, "grad_norm": 0.45532262325286865, "learning_rate": 1.8589258940468637e-05, "loss": 0.4886, "step": 16222 }, { "epoch": 0.3440648130474433, "grad_norm": 0.32117822766304016, "learning_rate": 1.8589088152769702e-05, "loss": 0.4507, "step": 16223 }, { "epoch": 0.34408602150537637, "grad_norm": 0.4137192964553833, "learning_rate": 1.858891735551806e-05, "loss": 0.5704, "step": 16224 }, { "epoch": 0.3441072299633094, "grad_norm": 0.3223254084587097, "learning_rate": 1.85887465487139e-05, "loss": 0.5815, "step": 16225 }, { "epoch": 0.3441284384212424, "grad_norm": 0.33657944202423096, "learning_rate": 1.8588575732357407e-05, "loss": 0.4681, "step": 16226 }, { "epoch": 0.34414964687917543, "grad_norm": 0.3350260257720947, "learning_rate": 1.8588404906448775e-05, "loss": 0.4763, "step": 16227 }, { "epoch": 0.34417085533710845, "grad_norm": 0.3366566002368927, "learning_rate": 1.8588234070988197e-05, "loss": 0.6112, "step": 16228 }, { "epoch": 0.34419206379504147, "grad_norm": 0.3572109043598175, "learning_rate": 1.858806322597586e-05, "loss": 0.4551, "step": 16229 }, { "epoch": 0.3442132722529745, "grad_norm": 0.5221603512763977, "learning_rate": 1.858789237141195e-05, "loss": 0.5826, "step": 16230 }, { "epoch": 0.3442344807109075, "grad_norm": 0.35889318585395813, "learning_rate": 1.8587721507296664e-05, "loss": 0.4532, "step": 16231 }, { "epoch": 0.34425568916884053, "grad_norm": 0.329764723777771, "learning_rate": 1.8587550633630187e-05, "loss": 0.562, "step": 16232 }, { "epoch": 0.34427689762677355, "grad_norm": 0.44100627303123474, "learning_rate": 1.8587379750412712e-05, "loss": 0.5446, "step": 16233 }, { "epoch": 0.34429810608470657, "grad_norm": 0.3756105601787567, "learning_rate": 1.8587208857644426e-05, "loss": 0.5156, "step": 16234 }, { "epoch": 0.3443193145426396, "grad_norm": 0.3187378942966461, "learning_rate": 1.858703795532552e-05, "loss": 0.5058, "step": 16235 }, { "epoch": 0.3443405230005726, "grad_norm": 0.3404030501842499, "learning_rate": 1.8586867043456188e-05, "loss": 0.4105, "step": 16236 }, { "epoch": 0.3443617314585056, "grad_norm": 0.4069378972053528, "learning_rate": 1.8586696122036616e-05, "loss": 0.5313, "step": 16237 }, { "epoch": 0.3443829399164387, "grad_norm": 0.3820429742336273, "learning_rate": 1.8586525191066998e-05, "loss": 0.5236, "step": 16238 }, { "epoch": 0.3444041483743717, "grad_norm": 0.38801291584968567, "learning_rate": 1.8586354250547517e-05, "loss": 0.6367, "step": 16239 }, { "epoch": 0.34442535683230474, "grad_norm": 0.3448656499385834, "learning_rate": 1.858618330047837e-05, "loss": 0.5481, "step": 16240 }, { "epoch": 0.34444656529023776, "grad_norm": 0.3224967122077942, "learning_rate": 1.8586012340859743e-05, "loss": 0.5437, "step": 16241 }, { "epoch": 0.3444677737481708, "grad_norm": 0.3614450991153717, "learning_rate": 1.858584137169183e-05, "loss": 0.519, "step": 16242 }, { "epoch": 0.3444889822061038, "grad_norm": 0.3155477046966553, "learning_rate": 1.8585670392974812e-05, "loss": 0.497, "step": 16243 }, { "epoch": 0.3445101906640368, "grad_norm": 0.3468155562877655, "learning_rate": 1.8585499404708894e-05, "loss": 0.46, "step": 16244 }, { "epoch": 0.34453139912196984, "grad_norm": 0.541151225566864, "learning_rate": 1.8585328406894255e-05, "loss": 0.4519, "step": 16245 }, { "epoch": 0.34455260757990286, "grad_norm": 0.36578482389450073, "learning_rate": 1.8585157399531088e-05, "loss": 0.4757, "step": 16246 }, { "epoch": 0.3445738160378359, "grad_norm": 0.3650214374065399, "learning_rate": 1.8584986382619582e-05, "loss": 0.5693, "step": 16247 }, { "epoch": 0.3445950244957689, "grad_norm": 0.3451751470565796, "learning_rate": 1.8584815356159932e-05, "loss": 0.5117, "step": 16248 }, { "epoch": 0.3446162329537019, "grad_norm": 0.53196120262146, "learning_rate": 1.8584644320152323e-05, "loss": 0.5698, "step": 16249 }, { "epoch": 0.34463744141163494, "grad_norm": 0.5101512670516968, "learning_rate": 1.8584473274596946e-05, "loss": 0.5392, "step": 16250 }, { "epoch": 0.34465864986956796, "grad_norm": 0.3482319414615631, "learning_rate": 1.8584302219494e-05, "loss": 0.6147, "step": 16251 }, { "epoch": 0.34467985832750103, "grad_norm": 0.3254554867744446, "learning_rate": 1.8584131154843656e-05, "loss": 0.4767, "step": 16252 }, { "epoch": 0.34470106678543405, "grad_norm": 0.304684579372406, "learning_rate": 1.8583960080646123e-05, "loss": 0.5545, "step": 16253 }, { "epoch": 0.34472227524336707, "grad_norm": 0.3144783675670624, "learning_rate": 1.858378899690158e-05, "loss": 0.4753, "step": 16254 }, { "epoch": 0.3447434837013001, "grad_norm": 0.3550488352775574, "learning_rate": 1.8583617903610228e-05, "loss": 0.5043, "step": 16255 }, { "epoch": 0.3447646921592331, "grad_norm": 0.3119584023952484, "learning_rate": 1.8583446800772248e-05, "loss": 0.4825, "step": 16256 }, { "epoch": 0.34478590061716613, "grad_norm": 0.34654539823532104, "learning_rate": 1.858327568838783e-05, "loss": 0.5657, "step": 16257 }, { "epoch": 0.34480710907509915, "grad_norm": 0.3316742479801178, "learning_rate": 1.858310456645717e-05, "loss": 0.5168, "step": 16258 }, { "epoch": 0.34482831753303217, "grad_norm": 0.36210566759109497, "learning_rate": 1.8582933434980456e-05, "loss": 0.5508, "step": 16259 }, { "epoch": 0.3448495259909652, "grad_norm": 0.35562562942504883, "learning_rate": 1.8582762293957875e-05, "loss": 0.53, "step": 16260 }, { "epoch": 0.3448707344488982, "grad_norm": 0.3720264136791229, "learning_rate": 1.8582591143389623e-05, "loss": 0.3915, "step": 16261 }, { "epoch": 0.34489194290683123, "grad_norm": 0.3580392301082611, "learning_rate": 1.8582419983275885e-05, "loss": 0.6102, "step": 16262 }, { "epoch": 0.34491315136476425, "grad_norm": 0.36134400963783264, "learning_rate": 1.8582248813616858e-05, "loss": 0.5823, "step": 16263 }, { "epoch": 0.34493435982269727, "grad_norm": 0.35444366931915283, "learning_rate": 1.8582077634412726e-05, "loss": 0.5714, "step": 16264 }, { "epoch": 0.34495556828063034, "grad_norm": 0.33078819513320923, "learning_rate": 1.858190644566368e-05, "loss": 0.5425, "step": 16265 }, { "epoch": 0.34497677673856336, "grad_norm": 0.3371104598045349, "learning_rate": 1.8581735247369915e-05, "loss": 0.5066, "step": 16266 }, { "epoch": 0.3449979851964964, "grad_norm": 0.3563084304332733, "learning_rate": 1.8581564039531617e-05, "loss": 0.4906, "step": 16267 }, { "epoch": 0.3450191936544294, "grad_norm": 0.3485009968280792, "learning_rate": 1.8581392822148982e-05, "loss": 0.5024, "step": 16268 }, { "epoch": 0.3450404021123624, "grad_norm": 0.33653268218040466, "learning_rate": 1.8581221595222193e-05, "loss": 0.5159, "step": 16269 }, { "epoch": 0.34506161057029544, "grad_norm": 0.36002060770988464, "learning_rate": 1.8581050358751444e-05, "loss": 0.5536, "step": 16270 }, { "epoch": 0.34508281902822846, "grad_norm": 0.32018813490867615, "learning_rate": 1.8580879112736924e-05, "loss": 0.5708, "step": 16271 }, { "epoch": 0.3451040274861615, "grad_norm": 0.3452092707157135, "learning_rate": 1.858070785717883e-05, "loss": 0.5261, "step": 16272 }, { "epoch": 0.3451252359440945, "grad_norm": 0.33557358384132385, "learning_rate": 1.8580536592077343e-05, "loss": 0.4677, "step": 16273 }, { "epoch": 0.3451464444020275, "grad_norm": 0.3549337387084961, "learning_rate": 1.8580365317432658e-05, "loss": 0.4728, "step": 16274 }, { "epoch": 0.34516765285996054, "grad_norm": 0.35121846199035645, "learning_rate": 1.8580194033244968e-05, "loss": 0.4696, "step": 16275 }, { "epoch": 0.34518886131789356, "grad_norm": 0.3195476531982422, "learning_rate": 1.8580022739514456e-05, "loss": 0.448, "step": 16276 }, { "epoch": 0.3452100697758266, "grad_norm": 0.3409576117992401, "learning_rate": 1.857985143624132e-05, "loss": 0.5482, "step": 16277 }, { "epoch": 0.3452312782337596, "grad_norm": 0.33620744943618774, "learning_rate": 1.8579680123425748e-05, "loss": 0.5082, "step": 16278 }, { "epoch": 0.3452524866916927, "grad_norm": 0.3771924078464508, "learning_rate": 1.8579508801067928e-05, "loss": 0.4782, "step": 16279 }, { "epoch": 0.3452736951496257, "grad_norm": 0.3315405547618866, "learning_rate": 1.8579337469168054e-05, "loss": 0.556, "step": 16280 }, { "epoch": 0.3452949036075587, "grad_norm": 0.3682345747947693, "learning_rate": 1.8579166127726318e-05, "loss": 0.4962, "step": 16281 }, { "epoch": 0.34531611206549173, "grad_norm": 0.3310946524143219, "learning_rate": 1.8578994776742907e-05, "loss": 0.5133, "step": 16282 }, { "epoch": 0.34533732052342475, "grad_norm": 0.3798237144947052, "learning_rate": 1.8578823416218013e-05, "loss": 0.5259, "step": 16283 }, { "epoch": 0.34535852898135777, "grad_norm": 0.398214191198349, "learning_rate": 1.857865204615182e-05, "loss": 0.4329, "step": 16284 }, { "epoch": 0.3453797374392908, "grad_norm": 0.33897924423217773, "learning_rate": 1.857848066654453e-05, "loss": 0.4788, "step": 16285 }, { "epoch": 0.3454009458972238, "grad_norm": 0.3550310432910919, "learning_rate": 1.857830927739633e-05, "loss": 0.5652, "step": 16286 }, { "epoch": 0.34542215435515683, "grad_norm": 0.3285444378852844, "learning_rate": 1.8578137878707404e-05, "loss": 0.443, "step": 16287 }, { "epoch": 0.34544336281308985, "grad_norm": 0.360996276140213, "learning_rate": 1.8577966470477953e-05, "loss": 0.5155, "step": 16288 }, { "epoch": 0.34546457127102287, "grad_norm": 0.37046387791633606, "learning_rate": 1.857779505270816e-05, "loss": 0.5782, "step": 16289 }, { "epoch": 0.3454857797289559, "grad_norm": 0.34837204217910767, "learning_rate": 1.8577623625398214e-05, "loss": 0.5234, "step": 16290 }, { "epoch": 0.3455069881868889, "grad_norm": 0.34545499086380005, "learning_rate": 1.8577452188548316e-05, "loss": 0.5947, "step": 16291 }, { "epoch": 0.3455281966448219, "grad_norm": 0.3663697838783264, "learning_rate": 1.8577280742158645e-05, "loss": 0.4826, "step": 16292 }, { "epoch": 0.345549405102755, "grad_norm": 0.3458205759525299, "learning_rate": 1.85771092862294e-05, "loss": 0.4707, "step": 16293 }, { "epoch": 0.345570613560688, "grad_norm": 0.36500465869903564, "learning_rate": 1.8576937820760766e-05, "loss": 0.5187, "step": 16294 }, { "epoch": 0.34559182201862104, "grad_norm": 0.36828669905662537, "learning_rate": 1.857676634575294e-05, "loss": 0.5304, "step": 16295 }, { "epoch": 0.34561303047655406, "grad_norm": 0.33283841609954834, "learning_rate": 1.8576594861206107e-05, "loss": 0.4999, "step": 16296 }, { "epoch": 0.3456342389344871, "grad_norm": 0.35747620463371277, "learning_rate": 1.8576423367120458e-05, "loss": 0.5524, "step": 16297 }, { "epoch": 0.3456554473924201, "grad_norm": 0.4133688807487488, "learning_rate": 1.8576251863496188e-05, "loss": 0.5208, "step": 16298 }, { "epoch": 0.3456766558503531, "grad_norm": 0.3301793336868286, "learning_rate": 1.8576080350333483e-05, "loss": 0.4672, "step": 16299 }, { "epoch": 0.34569786430828614, "grad_norm": 0.3175887167453766, "learning_rate": 1.857590882763254e-05, "loss": 0.4873, "step": 16300 }, { "epoch": 0.34571907276621916, "grad_norm": 0.3952030539512634, "learning_rate": 1.857573729539354e-05, "loss": 0.4698, "step": 16301 }, { "epoch": 0.3457402812241522, "grad_norm": 0.416818231344223, "learning_rate": 1.8575565753616682e-05, "loss": 0.5368, "step": 16302 }, { "epoch": 0.3457614896820852, "grad_norm": 0.3540634214878082, "learning_rate": 1.8575394202302156e-05, "loss": 0.5546, "step": 16303 }, { "epoch": 0.3457826981400182, "grad_norm": 0.3569357693195343, "learning_rate": 1.8575222641450148e-05, "loss": 0.5474, "step": 16304 }, { "epoch": 0.34580390659795124, "grad_norm": 0.3272886872291565, "learning_rate": 1.8575051071060854e-05, "loss": 0.4289, "step": 16305 }, { "epoch": 0.3458251150558843, "grad_norm": 0.32487916946411133, "learning_rate": 1.857487949113446e-05, "loss": 0.4823, "step": 16306 }, { "epoch": 0.34584632351381733, "grad_norm": 0.3727525770664215, "learning_rate": 1.8574707901671163e-05, "loss": 0.5483, "step": 16307 }, { "epoch": 0.34586753197175035, "grad_norm": 0.30747759342193604, "learning_rate": 1.857453630267115e-05, "loss": 0.4592, "step": 16308 }, { "epoch": 0.3458887404296834, "grad_norm": 0.3125345706939697, "learning_rate": 1.857436469413461e-05, "loss": 0.4922, "step": 16309 }, { "epoch": 0.3459099488876164, "grad_norm": 0.35978177189826965, "learning_rate": 1.8574193076061736e-05, "loss": 0.503, "step": 16310 }, { "epoch": 0.3459311573455494, "grad_norm": 0.3283540904521942, "learning_rate": 1.857402144845272e-05, "loss": 0.472, "step": 16311 }, { "epoch": 0.34595236580348243, "grad_norm": 0.43213123083114624, "learning_rate": 1.8573849811307753e-05, "loss": 0.5064, "step": 16312 }, { "epoch": 0.34597357426141545, "grad_norm": 0.3862159848213196, "learning_rate": 1.8573678164627024e-05, "loss": 0.467, "step": 16313 }, { "epoch": 0.34599478271934847, "grad_norm": 0.3341182768344879, "learning_rate": 1.8573506508410724e-05, "loss": 0.5125, "step": 16314 }, { "epoch": 0.3460159911772815, "grad_norm": 0.3628038465976715, "learning_rate": 1.8573334842659046e-05, "loss": 0.4773, "step": 16315 }, { "epoch": 0.3460371996352145, "grad_norm": 0.3305360674858093, "learning_rate": 1.8573163167372178e-05, "loss": 0.4442, "step": 16316 }, { "epoch": 0.34605840809314753, "grad_norm": 0.3457777798175812, "learning_rate": 1.8572991482550315e-05, "loss": 0.4295, "step": 16317 }, { "epoch": 0.34607961655108055, "grad_norm": 0.4281184673309326, "learning_rate": 1.857281978819364e-05, "loss": 0.552, "step": 16318 }, { "epoch": 0.34610082500901357, "grad_norm": 0.3914679288864136, "learning_rate": 1.857264808430235e-05, "loss": 0.6037, "step": 16319 }, { "epoch": 0.34612203346694664, "grad_norm": 0.3260173797607422, "learning_rate": 1.857247637087664e-05, "loss": 0.4888, "step": 16320 }, { "epoch": 0.34614324192487966, "grad_norm": 0.35516467690467834, "learning_rate": 1.8572304647916693e-05, "loss": 0.5714, "step": 16321 }, { "epoch": 0.3461644503828127, "grad_norm": 0.342573344707489, "learning_rate": 1.8572132915422705e-05, "loss": 0.5139, "step": 16322 }, { "epoch": 0.3461856588407457, "grad_norm": 0.3287472128868103, "learning_rate": 1.8571961173394862e-05, "loss": 0.4793, "step": 16323 }, { "epoch": 0.3462068672986787, "grad_norm": 0.39845630526542664, "learning_rate": 1.8571789421833362e-05, "loss": 0.5439, "step": 16324 }, { "epoch": 0.34622807575661174, "grad_norm": 0.35925185680389404, "learning_rate": 1.857161766073839e-05, "loss": 0.5987, "step": 16325 }, { "epoch": 0.34624928421454476, "grad_norm": 0.32522693276405334, "learning_rate": 1.8571445890110142e-05, "loss": 0.5078, "step": 16326 }, { "epoch": 0.3462704926724778, "grad_norm": 0.381696879863739, "learning_rate": 1.8571274109948805e-05, "loss": 0.5257, "step": 16327 }, { "epoch": 0.3462917011304108, "grad_norm": 0.3286169171333313, "learning_rate": 1.857110232025457e-05, "loss": 0.5439, "step": 16328 }, { "epoch": 0.3463129095883438, "grad_norm": 0.4275953471660614, "learning_rate": 1.857093052102763e-05, "loss": 0.553, "step": 16329 }, { "epoch": 0.34633411804627684, "grad_norm": 0.3630782961845398, "learning_rate": 1.8570758712268176e-05, "loss": 0.536, "step": 16330 }, { "epoch": 0.34635532650420986, "grad_norm": 0.32924896478652954, "learning_rate": 1.8570586893976397e-05, "loss": 0.4576, "step": 16331 }, { "epoch": 0.3463765349621429, "grad_norm": 0.32065173983573914, "learning_rate": 1.857041506615249e-05, "loss": 0.4765, "step": 16332 }, { "epoch": 0.3463977434200759, "grad_norm": 0.3258814513683319, "learning_rate": 1.8570243228796638e-05, "loss": 0.54, "step": 16333 }, { "epoch": 0.346418951878009, "grad_norm": 0.34415188431739807, "learning_rate": 1.8570071381909037e-05, "loss": 0.5595, "step": 16334 }, { "epoch": 0.346440160335942, "grad_norm": 0.3819315433502197, "learning_rate": 1.856989952548988e-05, "loss": 0.4829, "step": 16335 }, { "epoch": 0.346461368793875, "grad_norm": 0.38263049721717834, "learning_rate": 1.8569727659539348e-05, "loss": 0.5426, "step": 16336 }, { "epoch": 0.34648257725180803, "grad_norm": 0.5516663193702698, "learning_rate": 1.8569555784057646e-05, "loss": 0.5007, "step": 16337 }, { "epoch": 0.34650378570974105, "grad_norm": 0.37799322605133057, "learning_rate": 1.8569383899044953e-05, "loss": 0.5664, "step": 16338 }, { "epoch": 0.34652499416767407, "grad_norm": 0.33385729789733887, "learning_rate": 1.856921200450147e-05, "loss": 0.4552, "step": 16339 }, { "epoch": 0.3465462026256071, "grad_norm": 0.3108939826488495, "learning_rate": 1.8569040100427385e-05, "loss": 0.4332, "step": 16340 }, { "epoch": 0.3465674110835401, "grad_norm": 0.37203526496887207, "learning_rate": 1.8568868186822885e-05, "loss": 0.5701, "step": 16341 }, { "epoch": 0.34658861954147313, "grad_norm": 0.3452445864677429, "learning_rate": 1.8568696263688163e-05, "loss": 0.5764, "step": 16342 }, { "epoch": 0.34660982799940615, "grad_norm": 0.3111940920352936, "learning_rate": 1.8568524331023416e-05, "loss": 0.4822, "step": 16343 }, { "epoch": 0.34663103645733917, "grad_norm": 0.36672911047935486, "learning_rate": 1.8568352388828826e-05, "loss": 0.4585, "step": 16344 }, { "epoch": 0.3466522449152722, "grad_norm": 0.3960036337375641, "learning_rate": 1.856818043710459e-05, "loss": 0.5003, "step": 16345 }, { "epoch": 0.3466734533732052, "grad_norm": 0.3477890193462372, "learning_rate": 1.85680084758509e-05, "loss": 0.5717, "step": 16346 }, { "epoch": 0.3466946618311383, "grad_norm": 0.34669288992881775, "learning_rate": 1.8567836505067946e-05, "loss": 0.4423, "step": 16347 }, { "epoch": 0.3467158702890713, "grad_norm": 0.42441266775131226, "learning_rate": 1.8567664524755918e-05, "loss": 0.4385, "step": 16348 }, { "epoch": 0.3467370787470043, "grad_norm": 0.34452563524246216, "learning_rate": 1.8567492534915007e-05, "loss": 0.5198, "step": 16349 }, { "epoch": 0.34675828720493734, "grad_norm": 0.33047911524772644, "learning_rate": 1.8567320535545403e-05, "loss": 0.5591, "step": 16350 }, { "epoch": 0.34677949566287036, "grad_norm": 0.3190203011035919, "learning_rate": 1.8567148526647304e-05, "loss": 0.5154, "step": 16351 }, { "epoch": 0.3468007041208034, "grad_norm": 0.3557947278022766, "learning_rate": 1.8566976508220895e-05, "loss": 0.5072, "step": 16352 }, { "epoch": 0.3468219125787364, "grad_norm": 0.37226343154907227, "learning_rate": 1.8566804480266367e-05, "loss": 0.4594, "step": 16353 }, { "epoch": 0.3468431210366694, "grad_norm": 0.359723299741745, "learning_rate": 1.8566632442783915e-05, "loss": 0.496, "step": 16354 }, { "epoch": 0.34686432949460244, "grad_norm": 0.37177684903144836, "learning_rate": 1.856646039577373e-05, "loss": 0.5323, "step": 16355 }, { "epoch": 0.34688553795253546, "grad_norm": 0.38508057594299316, "learning_rate": 1.8566288339236e-05, "loss": 0.414, "step": 16356 }, { "epoch": 0.3469067464104685, "grad_norm": 0.7867081761360168, "learning_rate": 1.856611627317092e-05, "loss": 0.5113, "step": 16357 }, { "epoch": 0.3469279548684015, "grad_norm": 0.33767586946487427, "learning_rate": 1.856594419757868e-05, "loss": 0.5361, "step": 16358 }, { "epoch": 0.3469491633263345, "grad_norm": 0.3546638786792755, "learning_rate": 1.856577211245947e-05, "loss": 0.5836, "step": 16359 }, { "epoch": 0.34697037178426754, "grad_norm": 0.32209640741348267, "learning_rate": 1.8565600017813486e-05, "loss": 0.5265, "step": 16360 }, { "epoch": 0.3469915802422006, "grad_norm": 0.38649046421051025, "learning_rate": 1.8565427913640912e-05, "loss": 0.5417, "step": 16361 }, { "epoch": 0.34701278870013363, "grad_norm": 0.35057082772254944, "learning_rate": 1.8565255799941947e-05, "loss": 0.4994, "step": 16362 }, { "epoch": 0.34703399715806665, "grad_norm": 0.34803131222724915, "learning_rate": 1.8565083676716776e-05, "loss": 0.5164, "step": 16363 }, { "epoch": 0.3470552056159997, "grad_norm": 0.34389007091522217, "learning_rate": 1.8564911543965595e-05, "loss": 0.5495, "step": 16364 }, { "epoch": 0.3470764140739327, "grad_norm": 0.3953778147697449, "learning_rate": 1.8564739401688592e-05, "loss": 0.5167, "step": 16365 }, { "epoch": 0.3470976225318657, "grad_norm": 0.3677655756473541, "learning_rate": 1.856456724988596e-05, "loss": 0.4489, "step": 16366 }, { "epoch": 0.34711883098979873, "grad_norm": 0.3331690728664398, "learning_rate": 1.8564395088557895e-05, "loss": 0.5464, "step": 16367 }, { "epoch": 0.34714003944773175, "grad_norm": 0.34668803215026855, "learning_rate": 1.856422291770458e-05, "loss": 0.5979, "step": 16368 }, { "epoch": 0.34716124790566477, "grad_norm": 0.31852349638938904, "learning_rate": 1.8564050737326213e-05, "loss": 0.5852, "step": 16369 }, { "epoch": 0.3471824563635978, "grad_norm": 0.3661556839942932, "learning_rate": 1.856387854742298e-05, "loss": 0.4325, "step": 16370 }, { "epoch": 0.3472036648215308, "grad_norm": 0.3332092761993408, "learning_rate": 1.856370634799508e-05, "loss": 0.5601, "step": 16371 }, { "epoch": 0.34722487327946383, "grad_norm": 0.3670710623264313, "learning_rate": 1.8563534139042697e-05, "loss": 0.5075, "step": 16372 }, { "epoch": 0.34724608173739685, "grad_norm": 0.3241429328918457, "learning_rate": 1.8563361920566028e-05, "loss": 0.515, "step": 16373 }, { "epoch": 0.3472672901953299, "grad_norm": 0.3608141243457794, "learning_rate": 1.856318969256526e-05, "loss": 0.5435, "step": 16374 }, { "epoch": 0.34728849865326294, "grad_norm": 0.4600600004196167, "learning_rate": 1.856301745504059e-05, "loss": 0.5096, "step": 16375 }, { "epoch": 0.34730970711119596, "grad_norm": 0.3291374444961548, "learning_rate": 1.8562845207992204e-05, "loss": 0.5083, "step": 16376 }, { "epoch": 0.347330915569129, "grad_norm": 0.3165062963962555, "learning_rate": 1.8562672951420296e-05, "loss": 0.5459, "step": 16377 }, { "epoch": 0.347352124027062, "grad_norm": 0.3262198865413666, "learning_rate": 1.8562500685325058e-05, "loss": 0.5757, "step": 16378 }, { "epoch": 0.347373332484995, "grad_norm": 0.3578813374042511, "learning_rate": 1.856232840970668e-05, "loss": 0.5369, "step": 16379 }, { "epoch": 0.34739454094292804, "grad_norm": 0.3596988022327423, "learning_rate": 1.8562156124565356e-05, "loss": 0.5488, "step": 16380 }, { "epoch": 0.34741574940086106, "grad_norm": 0.3434455096721649, "learning_rate": 1.8561983829901272e-05, "loss": 0.5249, "step": 16381 }, { "epoch": 0.3474369578587941, "grad_norm": 0.39043569564819336, "learning_rate": 1.856181152571463e-05, "loss": 0.4842, "step": 16382 }, { "epoch": 0.3474581663167271, "grad_norm": 0.43805959820747375, "learning_rate": 1.8561639212005613e-05, "loss": 0.5406, "step": 16383 }, { "epoch": 0.3474793747746601, "grad_norm": 0.33110475540161133, "learning_rate": 1.8561466888774418e-05, "loss": 0.4012, "step": 16384 }, { "epoch": 0.34750058323259314, "grad_norm": 0.34625840187072754, "learning_rate": 1.856129455602123e-05, "loss": 0.5074, "step": 16385 }, { "epoch": 0.34752179169052616, "grad_norm": 0.3030008375644684, "learning_rate": 1.8561122213746245e-05, "loss": 0.4847, "step": 16386 }, { "epoch": 0.3475430001484592, "grad_norm": 0.32682672142982483, "learning_rate": 1.8560949861949656e-05, "loss": 0.5115, "step": 16387 }, { "epoch": 0.34756420860639226, "grad_norm": 0.3691292703151703, "learning_rate": 1.856077750063165e-05, "loss": 0.503, "step": 16388 }, { "epoch": 0.3475854170643253, "grad_norm": 0.3574189245700836, "learning_rate": 1.8560605129792427e-05, "loss": 0.5134, "step": 16389 }, { "epoch": 0.3476066255222583, "grad_norm": 0.31491193175315857, "learning_rate": 1.856043274943217e-05, "loss": 0.5239, "step": 16390 }, { "epoch": 0.3476278339801913, "grad_norm": 0.5360805988311768, "learning_rate": 1.8560260359551073e-05, "loss": 0.4852, "step": 16391 }, { "epoch": 0.34764904243812433, "grad_norm": 0.34874898195266724, "learning_rate": 1.8560087960149332e-05, "loss": 0.512, "step": 16392 }, { "epoch": 0.34767025089605735, "grad_norm": 0.40309152007102966, "learning_rate": 1.8559915551227134e-05, "loss": 0.5325, "step": 16393 }, { "epoch": 0.3476914593539904, "grad_norm": 0.3224269449710846, "learning_rate": 1.855974313278467e-05, "loss": 0.4658, "step": 16394 }, { "epoch": 0.3477126678119234, "grad_norm": 0.4258963465690613, "learning_rate": 1.8559570704822135e-05, "loss": 0.5426, "step": 16395 }, { "epoch": 0.3477338762698564, "grad_norm": 0.34796011447906494, "learning_rate": 1.8559398267339724e-05, "loss": 0.5147, "step": 16396 }, { "epoch": 0.34775508472778943, "grad_norm": 0.3061751425266266, "learning_rate": 1.855922582033762e-05, "loss": 0.4359, "step": 16397 }, { "epoch": 0.34777629318572245, "grad_norm": 0.3634217381477356, "learning_rate": 1.855905336381602e-05, "loss": 0.4722, "step": 16398 }, { "epoch": 0.34779750164365547, "grad_norm": 0.33453312516212463, "learning_rate": 1.8558880897775118e-05, "loss": 0.4149, "step": 16399 }, { "epoch": 0.3478187101015885, "grad_norm": 0.35032206773757935, "learning_rate": 1.85587084222151e-05, "loss": 0.478, "step": 16400 }, { "epoch": 0.3478399185595215, "grad_norm": 0.3257806897163391, "learning_rate": 1.8558535937136163e-05, "loss": 0.481, "step": 16401 }, { "epoch": 0.3478611270174546, "grad_norm": 0.3811480402946472, "learning_rate": 1.8558363442538496e-05, "loss": 0.5471, "step": 16402 }, { "epoch": 0.3478823354753876, "grad_norm": 0.3286282420158386, "learning_rate": 1.855819093842229e-05, "loss": 0.5245, "step": 16403 }, { "epoch": 0.3479035439333206, "grad_norm": 0.34004104137420654, "learning_rate": 1.855801842478774e-05, "loss": 0.4865, "step": 16404 }, { "epoch": 0.34792475239125364, "grad_norm": 0.35039395093917847, "learning_rate": 1.8557845901635036e-05, "loss": 0.5102, "step": 16405 }, { "epoch": 0.34794596084918666, "grad_norm": 0.33767518401145935, "learning_rate": 1.855767336896437e-05, "loss": 0.4859, "step": 16406 }, { "epoch": 0.3479671693071197, "grad_norm": 0.36374571919441223, "learning_rate": 1.8557500826775934e-05, "loss": 0.3847, "step": 16407 }, { "epoch": 0.3479883777650527, "grad_norm": 0.38418683409690857, "learning_rate": 1.855732827506992e-05, "loss": 0.5011, "step": 16408 }, { "epoch": 0.3480095862229857, "grad_norm": 0.3873670697212219, "learning_rate": 1.8557155713846516e-05, "loss": 0.5388, "step": 16409 }, { "epoch": 0.34803079468091874, "grad_norm": 0.40667593479156494, "learning_rate": 1.8556983143105924e-05, "loss": 0.4936, "step": 16410 }, { "epoch": 0.34805200313885176, "grad_norm": 0.3556683659553528, "learning_rate": 1.8556810562848327e-05, "loss": 0.5163, "step": 16411 }, { "epoch": 0.3480732115967848, "grad_norm": 0.33183228969573975, "learning_rate": 1.8556637973073922e-05, "loss": 0.503, "step": 16412 }, { "epoch": 0.3480944200547178, "grad_norm": 0.3705321252346039, "learning_rate": 1.8556465373782892e-05, "loss": 0.5271, "step": 16413 }, { "epoch": 0.3481156285126508, "grad_norm": 0.31453317403793335, "learning_rate": 1.855629276497544e-05, "loss": 0.5744, "step": 16414 }, { "epoch": 0.3481368369705839, "grad_norm": 0.33395105600357056, "learning_rate": 1.8556120146651756e-05, "loss": 0.4809, "step": 16415 }, { "epoch": 0.3481580454285169, "grad_norm": 0.35569649934768677, "learning_rate": 1.855594751881203e-05, "loss": 0.5584, "step": 16416 }, { "epoch": 0.34817925388644994, "grad_norm": 0.3438997268676758, "learning_rate": 1.855577488145645e-05, "loss": 0.5384, "step": 16417 }, { "epoch": 0.34820046234438295, "grad_norm": 0.3535493314266205, "learning_rate": 1.8555602234585207e-05, "loss": 0.4723, "step": 16418 }, { "epoch": 0.348221670802316, "grad_norm": 0.5344881415367126, "learning_rate": 1.8555429578198504e-05, "loss": 0.5478, "step": 16419 }, { "epoch": 0.348242879260249, "grad_norm": 0.3323422372341156, "learning_rate": 1.8555256912296525e-05, "loss": 0.4912, "step": 16420 }, { "epoch": 0.348264087718182, "grad_norm": 0.3646000623703003, "learning_rate": 1.8555084236879463e-05, "loss": 0.4839, "step": 16421 }, { "epoch": 0.34828529617611503, "grad_norm": 0.3668035864830017, "learning_rate": 1.8554911551947513e-05, "loss": 0.539, "step": 16422 }, { "epoch": 0.34830650463404805, "grad_norm": 0.4079335331916809, "learning_rate": 1.8554738857500863e-05, "loss": 0.5371, "step": 16423 }, { "epoch": 0.3483277130919811, "grad_norm": 0.3965241312980652, "learning_rate": 1.8554566153539705e-05, "loss": 0.5423, "step": 16424 }, { "epoch": 0.3483489215499141, "grad_norm": 0.366696834564209, "learning_rate": 1.8554393440064235e-05, "loss": 0.5093, "step": 16425 }, { "epoch": 0.3483701300078471, "grad_norm": 0.329888254404068, "learning_rate": 1.855422071707464e-05, "loss": 0.5128, "step": 16426 }, { "epoch": 0.34839133846578013, "grad_norm": 0.3652731776237488, "learning_rate": 1.855404798457112e-05, "loss": 0.4775, "step": 16427 }, { "epoch": 0.34841254692371315, "grad_norm": 0.3671902120113373, "learning_rate": 1.8553875242553858e-05, "loss": 0.4777, "step": 16428 }, { "epoch": 0.3484337553816462, "grad_norm": 0.31014904379844666, "learning_rate": 1.8553702491023052e-05, "loss": 0.4082, "step": 16429 }, { "epoch": 0.34845496383957925, "grad_norm": 0.3146221935749054, "learning_rate": 1.8553529729978893e-05, "loss": 0.4262, "step": 16430 }, { "epoch": 0.34847617229751227, "grad_norm": 0.3356243968009949, "learning_rate": 1.8553356959421572e-05, "loss": 0.5919, "step": 16431 }, { "epoch": 0.3484973807554453, "grad_norm": 0.3228286802768707, "learning_rate": 1.855318417935128e-05, "loss": 0.4819, "step": 16432 }, { "epoch": 0.3485185892133783, "grad_norm": 0.3728927671909332, "learning_rate": 1.855301138976821e-05, "loss": 0.5453, "step": 16433 }, { "epoch": 0.3485397976713113, "grad_norm": 0.32774847745895386, "learning_rate": 1.8552838590672558e-05, "loss": 0.4903, "step": 16434 }, { "epoch": 0.34856100612924434, "grad_norm": 0.34318476915359497, "learning_rate": 1.8552665782064514e-05, "loss": 0.5538, "step": 16435 }, { "epoch": 0.34858221458717736, "grad_norm": 0.3353271484375, "learning_rate": 1.8552492963944267e-05, "loss": 0.5569, "step": 16436 }, { "epoch": 0.3486034230451104, "grad_norm": 0.34079065918922424, "learning_rate": 1.8552320136312013e-05, "loss": 0.576, "step": 16437 }, { "epoch": 0.3486246315030434, "grad_norm": 0.35076603293418884, "learning_rate": 1.8552147299167944e-05, "loss": 0.5743, "step": 16438 }, { "epoch": 0.3486458399609764, "grad_norm": 0.37002015113830566, "learning_rate": 1.8551974452512248e-05, "loss": 0.4922, "step": 16439 }, { "epoch": 0.34866704841890944, "grad_norm": 0.3650074005126953, "learning_rate": 1.8551801596345124e-05, "loss": 0.4583, "step": 16440 }, { "epoch": 0.34868825687684246, "grad_norm": 0.35285577178001404, "learning_rate": 1.8551628730666755e-05, "loss": 0.5827, "step": 16441 }, { "epoch": 0.3487094653347755, "grad_norm": 0.4403162896633148, "learning_rate": 1.8551455855477344e-05, "loss": 0.4093, "step": 16442 }, { "epoch": 0.34873067379270856, "grad_norm": 0.34958159923553467, "learning_rate": 1.8551282970777075e-05, "loss": 0.5676, "step": 16443 }, { "epoch": 0.3487518822506416, "grad_norm": 0.3254145681858063, "learning_rate": 1.8551110076566144e-05, "loss": 0.4893, "step": 16444 }, { "epoch": 0.3487730907085746, "grad_norm": 0.3226659595966339, "learning_rate": 1.8550937172844744e-05, "loss": 0.5015, "step": 16445 }, { "epoch": 0.3487942991665076, "grad_norm": 0.32902565598487854, "learning_rate": 1.8550764259613064e-05, "loss": 0.524, "step": 16446 }, { "epoch": 0.34881550762444063, "grad_norm": 0.3795097768306732, "learning_rate": 1.8550591336871302e-05, "loss": 0.5693, "step": 16447 }, { "epoch": 0.34883671608237365, "grad_norm": 0.31733524799346924, "learning_rate": 1.8550418404619643e-05, "loss": 0.4287, "step": 16448 }, { "epoch": 0.3488579245403067, "grad_norm": 0.3403230309486389, "learning_rate": 1.8550245462858285e-05, "loss": 0.4548, "step": 16449 }, { "epoch": 0.3488791329982397, "grad_norm": 0.44659683108329773, "learning_rate": 1.8550072511587416e-05, "loss": 0.504, "step": 16450 }, { "epoch": 0.3489003414561727, "grad_norm": 0.3666961193084717, "learning_rate": 1.8549899550807234e-05, "loss": 0.4996, "step": 16451 }, { "epoch": 0.34892154991410573, "grad_norm": 0.33713439106941223, "learning_rate": 1.8549726580517924e-05, "loss": 0.4586, "step": 16452 }, { "epoch": 0.34894275837203875, "grad_norm": 0.32828518748283386, "learning_rate": 1.8549553600719684e-05, "loss": 0.5356, "step": 16453 }, { "epoch": 0.3489639668299718, "grad_norm": 0.3778937757015228, "learning_rate": 1.8549380611412706e-05, "loss": 0.4887, "step": 16454 }, { "epoch": 0.3489851752879048, "grad_norm": 0.32095837593078613, "learning_rate": 1.854920761259718e-05, "loss": 0.4695, "step": 16455 }, { "epoch": 0.34900638374583787, "grad_norm": 0.3669074475765228, "learning_rate": 1.85490346042733e-05, "loss": 0.5676, "step": 16456 }, { "epoch": 0.3490275922037709, "grad_norm": 0.34390729665756226, "learning_rate": 1.8548861586441256e-05, "loss": 0.4751, "step": 16457 }, { "epoch": 0.3490488006617039, "grad_norm": 0.35596755146980286, "learning_rate": 1.8548688559101245e-05, "loss": 0.5428, "step": 16458 }, { "epoch": 0.3490700091196369, "grad_norm": 0.35114601254463196, "learning_rate": 1.8548515522253456e-05, "loss": 0.5113, "step": 16459 }, { "epoch": 0.34909121757756995, "grad_norm": 0.34958526492118835, "learning_rate": 1.8548342475898084e-05, "loss": 0.4829, "step": 16460 }, { "epoch": 0.34911242603550297, "grad_norm": 0.3477688729763031, "learning_rate": 1.854816942003532e-05, "loss": 0.5222, "step": 16461 }, { "epoch": 0.349133634493436, "grad_norm": 0.3306107521057129, "learning_rate": 1.8547996354665348e-05, "loss": 0.5078, "step": 16462 }, { "epoch": 0.349154842951369, "grad_norm": 0.369157612323761, "learning_rate": 1.8547823279788378e-05, "loss": 0.5538, "step": 16463 }, { "epoch": 0.349176051409302, "grad_norm": 0.3633299469947815, "learning_rate": 1.854765019540459e-05, "loss": 0.6046, "step": 16464 }, { "epoch": 0.34919725986723504, "grad_norm": 0.3256736099720001, "learning_rate": 1.854747710151418e-05, "loss": 0.4961, "step": 16465 }, { "epoch": 0.34921846832516806, "grad_norm": 0.3702717423439026, "learning_rate": 1.854730399811734e-05, "loss": 0.5824, "step": 16466 }, { "epoch": 0.3492396767831011, "grad_norm": 0.35743457078933716, "learning_rate": 1.854713088521426e-05, "loss": 0.5451, "step": 16467 }, { "epoch": 0.3492608852410341, "grad_norm": 0.3259965181350708, "learning_rate": 1.8546957762805142e-05, "loss": 0.5447, "step": 16468 }, { "epoch": 0.3492820936989671, "grad_norm": 0.34824925661087036, "learning_rate": 1.8546784630890166e-05, "loss": 0.5281, "step": 16469 }, { "epoch": 0.3493033021569002, "grad_norm": 0.40798068046569824, "learning_rate": 1.854661148946953e-05, "loss": 0.5636, "step": 16470 }, { "epoch": 0.3493245106148332, "grad_norm": 0.35156309604644775, "learning_rate": 1.8546438338543433e-05, "loss": 0.4804, "step": 16471 }, { "epoch": 0.34934571907276624, "grad_norm": 0.36075836420059204, "learning_rate": 1.854626517811206e-05, "loss": 0.5027, "step": 16472 }, { "epoch": 0.34936692753069926, "grad_norm": 0.36928391456604004, "learning_rate": 1.85460920081756e-05, "loss": 0.4866, "step": 16473 }, { "epoch": 0.3493881359886323, "grad_norm": 0.3487272262573242, "learning_rate": 1.854591882873425e-05, "loss": 0.5843, "step": 16474 }, { "epoch": 0.3494093444465653, "grad_norm": 0.39304617047309875, "learning_rate": 1.854574563978821e-05, "loss": 0.5368, "step": 16475 }, { "epoch": 0.3494305529044983, "grad_norm": 0.34001457691192627, "learning_rate": 1.8545572441337665e-05, "loss": 0.4564, "step": 16476 }, { "epoch": 0.34945176136243133, "grad_norm": 0.4052288234233856, "learning_rate": 1.8545399233382806e-05, "loss": 0.5115, "step": 16477 }, { "epoch": 0.34947296982036435, "grad_norm": 0.3743536174297333, "learning_rate": 1.8545226015923827e-05, "loss": 0.6023, "step": 16478 }, { "epoch": 0.3494941782782974, "grad_norm": 0.33753514289855957, "learning_rate": 1.8545052788960926e-05, "loss": 0.5046, "step": 16479 }, { "epoch": 0.3495153867362304, "grad_norm": 0.31703025102615356, "learning_rate": 1.8544879552494287e-05, "loss": 0.491, "step": 16480 }, { "epoch": 0.3495365951941634, "grad_norm": 0.3206477761268616, "learning_rate": 1.854470630652411e-05, "loss": 0.5103, "step": 16481 }, { "epoch": 0.34955780365209643, "grad_norm": 0.31992727518081665, "learning_rate": 1.8544533051050583e-05, "loss": 0.5619, "step": 16482 }, { "epoch": 0.34957901211002945, "grad_norm": 0.29798629879951477, "learning_rate": 1.85443597860739e-05, "loss": 0.4727, "step": 16483 }, { "epoch": 0.3496002205679625, "grad_norm": 0.3847893178462982, "learning_rate": 1.854418651159426e-05, "loss": 0.4647, "step": 16484 }, { "epoch": 0.34962142902589555, "grad_norm": 0.35550257563591003, "learning_rate": 1.8544013227611844e-05, "loss": 0.5442, "step": 16485 }, { "epoch": 0.34964263748382857, "grad_norm": 0.3423953056335449, "learning_rate": 1.854383993412685e-05, "loss": 0.5417, "step": 16486 }, { "epoch": 0.3496638459417616, "grad_norm": 0.3150022327899933, "learning_rate": 1.8543666631139476e-05, "loss": 0.5237, "step": 16487 }, { "epoch": 0.3496850543996946, "grad_norm": 0.32562652230262756, "learning_rate": 1.854349331864991e-05, "loss": 0.535, "step": 16488 }, { "epoch": 0.3497062628576276, "grad_norm": 0.31212592124938965, "learning_rate": 1.8543319996658344e-05, "loss": 0.4417, "step": 16489 }, { "epoch": 0.34972747131556065, "grad_norm": 0.34660857915878296, "learning_rate": 1.854314666516497e-05, "loss": 0.4782, "step": 16490 }, { "epoch": 0.34974867977349366, "grad_norm": 0.42996102571487427, "learning_rate": 1.8542973324169983e-05, "loss": 0.4632, "step": 16491 }, { "epoch": 0.3497698882314267, "grad_norm": 0.321764200925827, "learning_rate": 1.8542799973673574e-05, "loss": 0.5292, "step": 16492 }, { "epoch": 0.3497910966893597, "grad_norm": 0.5510227680206299, "learning_rate": 1.854262661367594e-05, "loss": 0.4984, "step": 16493 }, { "epoch": 0.3498123051472927, "grad_norm": 0.33044207096099854, "learning_rate": 1.8542453244177266e-05, "loss": 0.5052, "step": 16494 }, { "epoch": 0.34983351360522574, "grad_norm": 0.438080757856369, "learning_rate": 1.8542279865177758e-05, "loss": 0.5071, "step": 16495 }, { "epoch": 0.34985472206315876, "grad_norm": 0.3663436770439148, "learning_rate": 1.8542106476677593e-05, "loss": 0.5471, "step": 16496 }, { "epoch": 0.34987593052109184, "grad_norm": 0.3450045585632324, "learning_rate": 1.8541933078676977e-05, "loss": 0.5148, "step": 16497 }, { "epoch": 0.34989713897902486, "grad_norm": 0.31033340096473694, "learning_rate": 1.8541759671176094e-05, "loss": 0.4831, "step": 16498 }, { "epoch": 0.3499183474369579, "grad_norm": 0.3342774510383606, "learning_rate": 1.854158625417514e-05, "loss": 0.4889, "step": 16499 }, { "epoch": 0.3499395558948909, "grad_norm": 0.3514500856399536, "learning_rate": 1.854141282767431e-05, "loss": 0.5196, "step": 16500 }, { "epoch": 0.3499607643528239, "grad_norm": 0.5980899930000305, "learning_rate": 1.8541239391673793e-05, "loss": 0.5503, "step": 16501 }, { "epoch": 0.34998197281075694, "grad_norm": 0.5414139628410339, "learning_rate": 1.8541065946173785e-05, "loss": 0.5094, "step": 16502 }, { "epoch": 0.35000318126868996, "grad_norm": 0.34048208594322205, "learning_rate": 1.8540892491174472e-05, "loss": 0.5079, "step": 16503 }, { "epoch": 0.350024389726623, "grad_norm": 0.3645816147327423, "learning_rate": 1.8540719026676058e-05, "loss": 0.5191, "step": 16504 }, { "epoch": 0.350045598184556, "grad_norm": 0.32447758316993713, "learning_rate": 1.854054555267873e-05, "loss": 0.4229, "step": 16505 }, { "epoch": 0.350066806642489, "grad_norm": 0.3183075785636902, "learning_rate": 1.8540372069182683e-05, "loss": 0.4996, "step": 16506 }, { "epoch": 0.35008801510042203, "grad_norm": 0.30409669876098633, "learning_rate": 1.8540198576188108e-05, "loss": 0.5047, "step": 16507 }, { "epoch": 0.35010922355835505, "grad_norm": 0.33754071593284607, "learning_rate": 1.8540025073695197e-05, "loss": 0.49, "step": 16508 }, { "epoch": 0.3501304320162881, "grad_norm": 0.37496495246887207, "learning_rate": 1.8539851561704144e-05, "loss": 0.5535, "step": 16509 }, { "epoch": 0.3501516404742211, "grad_norm": 0.36066123843193054, "learning_rate": 1.8539678040215143e-05, "loss": 0.5116, "step": 16510 }, { "epoch": 0.35017284893215417, "grad_norm": 0.33746641874313354, "learning_rate": 1.8539504509228387e-05, "loss": 0.5344, "step": 16511 }, { "epoch": 0.3501940573900872, "grad_norm": 0.33469679951667786, "learning_rate": 1.8539330968744066e-05, "loss": 0.538, "step": 16512 }, { "epoch": 0.3502152658480202, "grad_norm": 0.29529574513435364, "learning_rate": 1.8539157418762375e-05, "loss": 0.4767, "step": 16513 }, { "epoch": 0.3502364743059532, "grad_norm": 0.37136217951774597, "learning_rate": 1.8538983859283507e-05, "loss": 0.5521, "step": 16514 }, { "epoch": 0.35025768276388625, "grad_norm": 0.37785544991493225, "learning_rate": 1.8538810290307658e-05, "loss": 0.5276, "step": 16515 }, { "epoch": 0.35027889122181927, "grad_norm": 0.34562569856643677, "learning_rate": 1.8538636711835017e-05, "loss": 0.4752, "step": 16516 }, { "epoch": 0.3503000996797523, "grad_norm": 0.40377184748649597, "learning_rate": 1.853846312386578e-05, "loss": 0.5574, "step": 16517 }, { "epoch": 0.3503213081376853, "grad_norm": 0.3368249535560608, "learning_rate": 1.8538289526400136e-05, "loss": 0.5222, "step": 16518 }, { "epoch": 0.3503425165956183, "grad_norm": 0.5945877432823181, "learning_rate": 1.8538115919438285e-05, "loss": 0.4759, "step": 16519 }, { "epoch": 0.35036372505355134, "grad_norm": 0.3506881594657898, "learning_rate": 1.853794230298041e-05, "loss": 0.5088, "step": 16520 }, { "epoch": 0.35038493351148436, "grad_norm": 0.7418190240859985, "learning_rate": 1.8537768677026716e-05, "loss": 0.517, "step": 16521 }, { "epoch": 0.3504061419694174, "grad_norm": 0.36042341589927673, "learning_rate": 1.8537595041577385e-05, "loss": 0.5929, "step": 16522 }, { "epoch": 0.3504273504273504, "grad_norm": 0.38710927963256836, "learning_rate": 1.8537421396632614e-05, "loss": 0.577, "step": 16523 }, { "epoch": 0.3504485588852835, "grad_norm": 0.5448984503746033, "learning_rate": 1.85372477421926e-05, "loss": 0.4864, "step": 16524 }, { "epoch": 0.3504697673432165, "grad_norm": 0.38981103897094727, "learning_rate": 1.8537074078257535e-05, "loss": 0.5155, "step": 16525 }, { "epoch": 0.3504909758011495, "grad_norm": 0.364285945892334, "learning_rate": 1.853690040482761e-05, "loss": 0.4872, "step": 16526 }, { "epoch": 0.35051218425908254, "grad_norm": 0.3975963592529297, "learning_rate": 1.8536726721903012e-05, "loss": 0.5055, "step": 16527 }, { "epoch": 0.35053339271701556, "grad_norm": 0.3947313725948334, "learning_rate": 1.8536553029483942e-05, "loss": 0.5174, "step": 16528 }, { "epoch": 0.3505546011749486, "grad_norm": 0.3572193682193756, "learning_rate": 1.85363793275706e-05, "loss": 0.5494, "step": 16529 }, { "epoch": 0.3505758096328816, "grad_norm": 0.38189053535461426, "learning_rate": 1.8536205616163165e-05, "loss": 0.5793, "step": 16530 }, { "epoch": 0.3505970180908146, "grad_norm": 0.33293166756629944, "learning_rate": 1.8536031895261836e-05, "loss": 0.5259, "step": 16531 }, { "epoch": 0.35061822654874764, "grad_norm": 0.3091276288032532, "learning_rate": 1.853585816486681e-05, "loss": 0.4684, "step": 16532 }, { "epoch": 0.35063943500668066, "grad_norm": 0.3247547447681427, "learning_rate": 1.8535684424978272e-05, "loss": 0.4887, "step": 16533 }, { "epoch": 0.3506606434646137, "grad_norm": 0.4241567552089691, "learning_rate": 1.8535510675596423e-05, "loss": 0.5827, "step": 16534 }, { "epoch": 0.3506818519225467, "grad_norm": 0.3509781062602997, "learning_rate": 1.8535336916721452e-05, "loss": 0.4767, "step": 16535 }, { "epoch": 0.3507030603804797, "grad_norm": 0.34783339500427246, "learning_rate": 1.8535163148353556e-05, "loss": 0.5282, "step": 16536 }, { "epoch": 0.35072426883841273, "grad_norm": 0.3464084267616272, "learning_rate": 1.853498937049292e-05, "loss": 0.4725, "step": 16537 }, { "epoch": 0.3507454772963458, "grad_norm": 0.46639445424079895, "learning_rate": 1.8534815583139746e-05, "loss": 0.5287, "step": 16538 }, { "epoch": 0.35076668575427883, "grad_norm": 0.35786473751068115, "learning_rate": 1.8534641786294226e-05, "loss": 0.4991, "step": 16539 }, { "epoch": 0.35078789421221185, "grad_norm": 0.372668981552124, "learning_rate": 1.853446797995655e-05, "loss": 0.619, "step": 16540 }, { "epoch": 0.35080910267014487, "grad_norm": 0.36811941862106323, "learning_rate": 1.8534294164126914e-05, "loss": 0.525, "step": 16541 }, { "epoch": 0.3508303111280779, "grad_norm": 0.31255948543548584, "learning_rate": 1.853412033880551e-05, "loss": 0.4841, "step": 16542 }, { "epoch": 0.3508515195860109, "grad_norm": 0.31213876605033875, "learning_rate": 1.853394650399253e-05, "loss": 0.483, "step": 16543 }, { "epoch": 0.3508727280439439, "grad_norm": 0.4037107229232788, "learning_rate": 1.853377265968817e-05, "loss": 0.4898, "step": 16544 }, { "epoch": 0.35089393650187695, "grad_norm": 0.36033543944358826, "learning_rate": 1.8533598805892624e-05, "loss": 0.4956, "step": 16545 }, { "epoch": 0.35091514495980997, "grad_norm": 0.3532625138759613, "learning_rate": 1.853342494260608e-05, "loss": 0.4789, "step": 16546 }, { "epoch": 0.350936353417743, "grad_norm": 0.32168683409690857, "learning_rate": 1.8533251069828736e-05, "loss": 0.4586, "step": 16547 }, { "epoch": 0.350957561875676, "grad_norm": 0.39997100830078125, "learning_rate": 1.8533077187560783e-05, "loss": 0.5394, "step": 16548 }, { "epoch": 0.350978770333609, "grad_norm": 0.37168607115745544, "learning_rate": 1.853290329580242e-05, "loss": 0.562, "step": 16549 }, { "epoch": 0.35099997879154204, "grad_norm": 0.4021874964237213, "learning_rate": 1.8532729394553834e-05, "loss": 0.5671, "step": 16550 }, { "epoch": 0.35102118724947506, "grad_norm": 0.3785378634929657, "learning_rate": 1.853255548381522e-05, "loss": 0.5541, "step": 16551 }, { "epoch": 0.35104239570740814, "grad_norm": 0.39654773473739624, "learning_rate": 1.853238156358677e-05, "loss": 0.5642, "step": 16552 }, { "epoch": 0.35106360416534116, "grad_norm": 0.38594022393226624, "learning_rate": 1.853220763386868e-05, "loss": 0.4928, "step": 16553 }, { "epoch": 0.3510848126232742, "grad_norm": 0.38982293009757996, "learning_rate": 1.8532033694661144e-05, "loss": 0.477, "step": 16554 }, { "epoch": 0.3511060210812072, "grad_norm": 0.36562982201576233, "learning_rate": 1.8531859745964355e-05, "loss": 0.527, "step": 16555 }, { "epoch": 0.3511272295391402, "grad_norm": 0.37797966599464417, "learning_rate": 1.8531685787778506e-05, "loss": 0.5326, "step": 16556 }, { "epoch": 0.35114843799707324, "grad_norm": 0.33254146575927734, "learning_rate": 1.8531511820103788e-05, "loss": 0.5297, "step": 16557 }, { "epoch": 0.35116964645500626, "grad_norm": 0.311133474111557, "learning_rate": 1.8531337842940397e-05, "loss": 0.4892, "step": 16558 }, { "epoch": 0.3511908549129393, "grad_norm": 0.3371540606021881, "learning_rate": 1.853116385628853e-05, "loss": 0.5694, "step": 16559 }, { "epoch": 0.3512120633708723, "grad_norm": 0.3723086416721344, "learning_rate": 1.853098986014837e-05, "loss": 0.5269, "step": 16560 }, { "epoch": 0.3512332718288053, "grad_norm": 0.35737019777297974, "learning_rate": 1.8530815854520122e-05, "loss": 0.5946, "step": 16561 }, { "epoch": 0.35125448028673834, "grad_norm": 0.48495325446128845, "learning_rate": 1.8530641839403976e-05, "loss": 0.5741, "step": 16562 }, { "epoch": 0.35127568874467135, "grad_norm": 0.3871772885322571, "learning_rate": 1.853046781480012e-05, "loss": 0.5495, "step": 16563 }, { "epoch": 0.3512968972026044, "grad_norm": 0.35560256242752075, "learning_rate": 1.8530293780708755e-05, "loss": 0.584, "step": 16564 }, { "epoch": 0.35131810566053745, "grad_norm": 0.3294101357460022, "learning_rate": 1.853011973713007e-05, "loss": 0.4818, "step": 16565 }, { "epoch": 0.35133931411847047, "grad_norm": 0.4212946891784668, "learning_rate": 1.852994568406426e-05, "loss": 0.5543, "step": 16566 }, { "epoch": 0.3513605225764035, "grad_norm": 0.3091772794723511, "learning_rate": 1.852977162151152e-05, "loss": 0.5005, "step": 16567 }, { "epoch": 0.3513817310343365, "grad_norm": 0.29696574807167053, "learning_rate": 1.852959754947204e-05, "loss": 0.407, "step": 16568 }, { "epoch": 0.35140293949226953, "grad_norm": 0.35427823662757874, "learning_rate": 1.8529423467946014e-05, "loss": 0.5593, "step": 16569 }, { "epoch": 0.35142414795020255, "grad_norm": 0.33174070715904236, "learning_rate": 1.852924937693364e-05, "loss": 0.4749, "step": 16570 }, { "epoch": 0.35144535640813557, "grad_norm": 0.33175796270370483, "learning_rate": 1.8529075276435108e-05, "loss": 0.4669, "step": 16571 }, { "epoch": 0.3514665648660686, "grad_norm": 0.3897297978401184, "learning_rate": 1.852890116645061e-05, "loss": 0.5296, "step": 16572 }, { "epoch": 0.3514877733240016, "grad_norm": 0.3623468279838562, "learning_rate": 1.852872704698035e-05, "loss": 0.4883, "step": 16573 }, { "epoch": 0.3515089817819346, "grad_norm": 0.3525467813014984, "learning_rate": 1.8528552918024506e-05, "loss": 0.5121, "step": 16574 }, { "epoch": 0.35153019023986765, "grad_norm": 0.3416791558265686, "learning_rate": 1.8528378779583284e-05, "loss": 0.5237, "step": 16575 }, { "epoch": 0.35155139869780067, "grad_norm": 0.342275470495224, "learning_rate": 1.852820463165687e-05, "loss": 0.5314, "step": 16576 }, { "epoch": 0.3515726071557337, "grad_norm": 0.31019964814186096, "learning_rate": 1.8528030474245465e-05, "loss": 0.4965, "step": 16577 }, { "epoch": 0.3515938156136667, "grad_norm": 0.3949147164821625, "learning_rate": 1.8527856307349258e-05, "loss": 0.5891, "step": 16578 }, { "epoch": 0.3516150240715998, "grad_norm": 0.3307103216648102, "learning_rate": 1.8527682130968437e-05, "loss": 0.4206, "step": 16579 }, { "epoch": 0.3516362325295328, "grad_norm": 0.48260989785194397, "learning_rate": 1.852750794510321e-05, "loss": 0.557, "step": 16580 }, { "epoch": 0.3516574409874658, "grad_norm": 0.5806102752685547, "learning_rate": 1.852733374975376e-05, "loss": 0.5279, "step": 16581 }, { "epoch": 0.35167864944539884, "grad_norm": 0.33261948823928833, "learning_rate": 1.8527159544920282e-05, "loss": 0.4928, "step": 16582 }, { "epoch": 0.35169985790333186, "grad_norm": 0.372261643409729, "learning_rate": 1.8526985330602973e-05, "loss": 0.5296, "step": 16583 }, { "epoch": 0.3517210663612649, "grad_norm": 0.33254411816596985, "learning_rate": 1.8526811106802022e-05, "loss": 0.5399, "step": 16584 }, { "epoch": 0.3517422748191979, "grad_norm": 0.3530178368091583, "learning_rate": 1.852663687351763e-05, "loss": 0.4197, "step": 16585 }, { "epoch": 0.3517634832771309, "grad_norm": 0.3158283233642578, "learning_rate": 1.852646263074998e-05, "loss": 0.52, "step": 16586 }, { "epoch": 0.35178469173506394, "grad_norm": 0.35251742601394653, "learning_rate": 1.852628837849928e-05, "loss": 0.5058, "step": 16587 }, { "epoch": 0.35180590019299696, "grad_norm": 0.3496459722518921, "learning_rate": 1.852611411676571e-05, "loss": 0.4906, "step": 16588 }, { "epoch": 0.35182710865093, "grad_norm": 0.3412778079509735, "learning_rate": 1.8525939845549476e-05, "loss": 0.5406, "step": 16589 }, { "epoch": 0.351848317108863, "grad_norm": 0.40877699851989746, "learning_rate": 1.852576556485076e-05, "loss": 0.554, "step": 16590 }, { "epoch": 0.351869525566796, "grad_norm": 0.32958248257637024, "learning_rate": 1.8525591274669766e-05, "loss": 0.5244, "step": 16591 }, { "epoch": 0.35189073402472903, "grad_norm": 0.3323136270046234, "learning_rate": 1.852541697500668e-05, "loss": 0.4566, "step": 16592 }, { "epoch": 0.3519119424826621, "grad_norm": 0.3638823330402374, "learning_rate": 1.85252426658617e-05, "loss": 0.5297, "step": 16593 }, { "epoch": 0.35193315094059513, "grad_norm": 0.35943564772605896, "learning_rate": 1.8525068347235023e-05, "loss": 0.5145, "step": 16594 }, { "epoch": 0.35195435939852815, "grad_norm": 0.3862694203853607, "learning_rate": 1.8524894019126837e-05, "loss": 0.4579, "step": 16595 }, { "epoch": 0.35197556785646117, "grad_norm": 0.3236648440361023, "learning_rate": 1.8524719681537334e-05, "loss": 0.509, "step": 16596 }, { "epoch": 0.3519967763143942, "grad_norm": 0.356794536113739, "learning_rate": 1.8524545334466716e-05, "loss": 0.4882, "step": 16597 }, { "epoch": 0.3520179847723272, "grad_norm": 0.35478538274765015, "learning_rate": 1.852437097791517e-05, "loss": 0.5335, "step": 16598 }, { "epoch": 0.35203919323026023, "grad_norm": 0.4271060526371002, "learning_rate": 1.8524196611882893e-05, "loss": 0.4916, "step": 16599 }, { "epoch": 0.35206040168819325, "grad_norm": 0.35500195622444153, "learning_rate": 1.852402223637008e-05, "loss": 0.4925, "step": 16600 }, { "epoch": 0.35208161014612627, "grad_norm": 0.3859754204750061, "learning_rate": 1.8523847851376928e-05, "loss": 0.4674, "step": 16601 }, { "epoch": 0.3521028186040593, "grad_norm": 0.3332514464855194, "learning_rate": 1.852367345690362e-05, "loss": 0.544, "step": 16602 }, { "epoch": 0.3521240270619923, "grad_norm": 0.3719957172870636, "learning_rate": 1.8523499052950358e-05, "loss": 0.4496, "step": 16603 }, { "epoch": 0.3521452355199253, "grad_norm": 0.4001338481903076, "learning_rate": 1.8523324639517334e-05, "loss": 0.4661, "step": 16604 }, { "epoch": 0.35216644397785835, "grad_norm": 0.3152814209461212, "learning_rate": 1.8523150216604745e-05, "loss": 0.5527, "step": 16605 }, { "epoch": 0.3521876524357914, "grad_norm": 0.4165874123573303, "learning_rate": 1.8522975784212776e-05, "loss": 0.4963, "step": 16606 }, { "epoch": 0.35220886089372444, "grad_norm": 0.3294958770275116, "learning_rate": 1.8522801342341635e-05, "loss": 0.5736, "step": 16607 }, { "epoch": 0.35223006935165746, "grad_norm": 0.37558454275131226, "learning_rate": 1.8522626890991503e-05, "loss": 0.5105, "step": 16608 }, { "epoch": 0.3522512778095905, "grad_norm": 0.39333438873291016, "learning_rate": 1.8522452430162582e-05, "loss": 0.5498, "step": 16609 }, { "epoch": 0.3522724862675235, "grad_norm": 0.34856486320495605, "learning_rate": 1.8522277959855064e-05, "loss": 0.4797, "step": 16610 }, { "epoch": 0.3522936947254565, "grad_norm": 0.3279895484447479, "learning_rate": 1.852210348006914e-05, "loss": 0.4977, "step": 16611 }, { "epoch": 0.35231490318338954, "grad_norm": 0.3206353485584259, "learning_rate": 1.852192899080501e-05, "loss": 0.5056, "step": 16612 }, { "epoch": 0.35233611164132256, "grad_norm": 0.3228399157524109, "learning_rate": 1.8521754492062862e-05, "loss": 0.5587, "step": 16613 }, { "epoch": 0.3523573200992556, "grad_norm": 0.352136492729187, "learning_rate": 1.852157998384289e-05, "loss": 0.5313, "step": 16614 }, { "epoch": 0.3523785285571886, "grad_norm": 0.33023345470428467, "learning_rate": 1.8521405466145296e-05, "loss": 0.456, "step": 16615 }, { "epoch": 0.3523997370151216, "grad_norm": 0.3748193681240082, "learning_rate": 1.852123093897027e-05, "loss": 0.5636, "step": 16616 }, { "epoch": 0.35242094547305464, "grad_norm": 0.3091582655906677, "learning_rate": 1.8521056402318e-05, "loss": 0.4853, "step": 16617 }, { "epoch": 0.35244215393098766, "grad_norm": 0.3947593569755554, "learning_rate": 1.8520881856188684e-05, "loss": 0.5815, "step": 16618 }, { "epoch": 0.3524633623889207, "grad_norm": 0.36297816038131714, "learning_rate": 1.8520707300582523e-05, "loss": 0.488, "step": 16619 }, { "epoch": 0.35248457084685375, "grad_norm": 0.624189555644989, "learning_rate": 1.8520532735499702e-05, "loss": 0.4857, "step": 16620 }, { "epoch": 0.35250577930478677, "grad_norm": 0.3745920658111572, "learning_rate": 1.8520358160940422e-05, "loss": 0.4868, "step": 16621 }, { "epoch": 0.3525269877627198, "grad_norm": 0.4163629710674286, "learning_rate": 1.852018357690487e-05, "loss": 0.5406, "step": 16622 }, { "epoch": 0.3525481962206528, "grad_norm": 0.3212692439556122, "learning_rate": 1.852000898339324e-05, "loss": 0.4716, "step": 16623 }, { "epoch": 0.35256940467858583, "grad_norm": 0.38850972056388855, "learning_rate": 1.851983438040574e-05, "loss": 0.5995, "step": 16624 }, { "epoch": 0.35259061313651885, "grad_norm": 0.350342720746994, "learning_rate": 1.851965976794255e-05, "loss": 0.5418, "step": 16625 }, { "epoch": 0.35261182159445187, "grad_norm": 0.31881317496299744, "learning_rate": 1.8519485146003865e-05, "loss": 0.4971, "step": 16626 }, { "epoch": 0.3526330300523849, "grad_norm": 0.36925947666168213, "learning_rate": 1.8519310514589884e-05, "loss": 0.4723, "step": 16627 }, { "epoch": 0.3526542385103179, "grad_norm": 0.3427758812904358, "learning_rate": 1.85191358737008e-05, "loss": 0.4962, "step": 16628 }, { "epoch": 0.3526754469682509, "grad_norm": 0.3858240246772766, "learning_rate": 1.8518961223336808e-05, "loss": 0.4926, "step": 16629 }, { "epoch": 0.35269665542618395, "grad_norm": 0.43310120701789856, "learning_rate": 1.8518786563498102e-05, "loss": 0.46, "step": 16630 }, { "epoch": 0.35271786388411697, "grad_norm": 0.7497323751449585, "learning_rate": 1.8518611894184872e-05, "loss": 0.4911, "step": 16631 }, { "epoch": 0.35273907234205, "grad_norm": 0.46245890855789185, "learning_rate": 1.851843721539732e-05, "loss": 0.4957, "step": 16632 }, { "epoch": 0.352760280799983, "grad_norm": 0.40226060152053833, "learning_rate": 1.8518262527135635e-05, "loss": 0.5449, "step": 16633 }, { "epoch": 0.3527814892579161, "grad_norm": 0.3468063771724701, "learning_rate": 1.851808782940001e-05, "loss": 0.5192, "step": 16634 }, { "epoch": 0.3528026977158491, "grad_norm": 0.3336213231086731, "learning_rate": 1.851791312219065e-05, "loss": 0.4281, "step": 16635 }, { "epoch": 0.3528239061737821, "grad_norm": 0.393619567155838, "learning_rate": 1.851773840550773e-05, "loss": 0.5375, "step": 16636 }, { "epoch": 0.35284511463171514, "grad_norm": 0.37451115250587463, "learning_rate": 1.851756367935146e-05, "loss": 0.5427, "step": 16637 }, { "epoch": 0.35286632308964816, "grad_norm": 0.3378828167915344, "learning_rate": 1.8517388943722033e-05, "loss": 0.5459, "step": 16638 }, { "epoch": 0.3528875315475812, "grad_norm": 0.3605993092060089, "learning_rate": 1.8517214198619635e-05, "loss": 0.5989, "step": 16639 }, { "epoch": 0.3529087400055142, "grad_norm": 0.9310094714164734, "learning_rate": 1.851703944404447e-05, "loss": 0.481, "step": 16640 }, { "epoch": 0.3529299484634472, "grad_norm": 0.3221697211265564, "learning_rate": 1.8516864679996722e-05, "loss": 0.5216, "step": 16641 }, { "epoch": 0.35295115692138024, "grad_norm": 0.3910825848579407, "learning_rate": 1.8516689906476596e-05, "loss": 0.5827, "step": 16642 }, { "epoch": 0.35297236537931326, "grad_norm": 0.38270971179008484, "learning_rate": 1.8516515123484277e-05, "loss": 0.5766, "step": 16643 }, { "epoch": 0.3529935738372463, "grad_norm": 0.3619801104068756, "learning_rate": 1.8516340331019968e-05, "loss": 0.5073, "step": 16644 }, { "epoch": 0.3530147822951793, "grad_norm": 0.3715972602367401, "learning_rate": 1.851616552908386e-05, "loss": 0.5364, "step": 16645 }, { "epoch": 0.3530359907531123, "grad_norm": 0.3445431888103485, "learning_rate": 1.8515990717676142e-05, "loss": 0.4895, "step": 16646 }, { "epoch": 0.3530571992110454, "grad_norm": 0.3234294652938843, "learning_rate": 1.8515815896797015e-05, "loss": 0.4567, "step": 16647 }, { "epoch": 0.3530784076689784, "grad_norm": 0.33470484614372253, "learning_rate": 1.8515641066446673e-05, "loss": 0.5321, "step": 16648 }, { "epoch": 0.35309961612691143, "grad_norm": 0.33797091245651245, "learning_rate": 1.8515466226625308e-05, "loss": 0.5026, "step": 16649 }, { "epoch": 0.35312082458484445, "grad_norm": 0.38562291860580444, "learning_rate": 1.8515291377333114e-05, "loss": 0.4536, "step": 16650 }, { "epoch": 0.35314203304277747, "grad_norm": 0.36995941400527954, "learning_rate": 1.851511651857029e-05, "loss": 0.5262, "step": 16651 }, { "epoch": 0.3531632415007105, "grad_norm": 0.3923221230506897, "learning_rate": 1.8514941650337026e-05, "loss": 0.564, "step": 16652 }, { "epoch": 0.3531844499586435, "grad_norm": 0.35765400528907776, "learning_rate": 1.8514766772633518e-05, "loss": 0.5495, "step": 16653 }, { "epoch": 0.35320565841657653, "grad_norm": 0.33728769421577454, "learning_rate": 1.8514591885459958e-05, "loss": 0.4658, "step": 16654 }, { "epoch": 0.35322686687450955, "grad_norm": 0.3418625593185425, "learning_rate": 1.8514416988816543e-05, "loss": 0.4767, "step": 16655 }, { "epoch": 0.35324807533244257, "grad_norm": 0.34724855422973633, "learning_rate": 1.851424208270347e-05, "loss": 0.4565, "step": 16656 }, { "epoch": 0.3532692837903756, "grad_norm": 0.3677084743976593, "learning_rate": 1.851406716712093e-05, "loss": 0.568, "step": 16657 }, { "epoch": 0.3532904922483086, "grad_norm": 0.3319052755832672, "learning_rate": 1.851389224206912e-05, "loss": 0.5579, "step": 16658 }, { "epoch": 0.3533117007062416, "grad_norm": 0.38956159353256226, "learning_rate": 1.8513717307548232e-05, "loss": 0.4942, "step": 16659 }, { "epoch": 0.35333290916417465, "grad_norm": 0.34999313950538635, "learning_rate": 1.851354236355846e-05, "loss": 0.4797, "step": 16660 }, { "epoch": 0.3533541176221077, "grad_norm": 0.49277085065841675, "learning_rate": 1.85133674101e-05, "loss": 0.5612, "step": 16661 }, { "epoch": 0.35337532608004074, "grad_norm": 0.3497299551963806, "learning_rate": 1.851319244717305e-05, "loss": 0.5335, "step": 16662 }, { "epoch": 0.35339653453797376, "grad_norm": 0.40429413318634033, "learning_rate": 1.85130174747778e-05, "loss": 0.44, "step": 16663 }, { "epoch": 0.3534177429959068, "grad_norm": 0.34273210167884827, "learning_rate": 1.8512842492914444e-05, "loss": 0.536, "step": 16664 }, { "epoch": 0.3534389514538398, "grad_norm": 0.3397967219352722, "learning_rate": 1.851266750158318e-05, "loss": 0.4782, "step": 16665 }, { "epoch": 0.3534601599117728, "grad_norm": 0.33934664726257324, "learning_rate": 1.85124925007842e-05, "loss": 0.5139, "step": 16666 }, { "epoch": 0.35348136836970584, "grad_norm": 0.33810609579086304, "learning_rate": 1.85123174905177e-05, "loss": 0.459, "step": 16667 }, { "epoch": 0.35350257682763886, "grad_norm": 0.38000473380088806, "learning_rate": 1.8512142470783877e-05, "loss": 0.4838, "step": 16668 }, { "epoch": 0.3535237852855719, "grad_norm": 0.35475748777389526, "learning_rate": 1.8511967441582922e-05, "loss": 0.4747, "step": 16669 }, { "epoch": 0.3535449937435049, "grad_norm": 0.3105151653289795, "learning_rate": 1.851179240291503e-05, "loss": 0.5288, "step": 16670 }, { "epoch": 0.3535662022014379, "grad_norm": 0.3779343366622925, "learning_rate": 1.8511617354780394e-05, "loss": 0.5278, "step": 16671 }, { "epoch": 0.35358741065937094, "grad_norm": 0.39073994755744934, "learning_rate": 1.8511442297179214e-05, "loss": 0.5773, "step": 16672 }, { "epoch": 0.35360861911730396, "grad_norm": 0.3274768590927124, "learning_rate": 1.8511267230111682e-05, "loss": 0.4302, "step": 16673 }, { "epoch": 0.353629827575237, "grad_norm": 0.3903936743736267, "learning_rate": 1.851109215357799e-05, "loss": 0.5323, "step": 16674 }, { "epoch": 0.35365103603317005, "grad_norm": 0.34018582105636597, "learning_rate": 1.851091706757834e-05, "loss": 0.4634, "step": 16675 }, { "epoch": 0.35367224449110307, "grad_norm": 0.3373004198074341, "learning_rate": 1.851074197211292e-05, "loss": 0.5018, "step": 16676 }, { "epoch": 0.3536934529490361, "grad_norm": 0.3869844973087311, "learning_rate": 1.8510566867181925e-05, "loss": 0.5048, "step": 16677 }, { "epoch": 0.3537146614069691, "grad_norm": 0.3442310690879822, "learning_rate": 1.851039175278555e-05, "loss": 0.4805, "step": 16678 }, { "epoch": 0.35373586986490213, "grad_norm": 0.3849005401134491, "learning_rate": 1.8510216628923994e-05, "loss": 0.385, "step": 16679 }, { "epoch": 0.35375707832283515, "grad_norm": 0.48637598752975464, "learning_rate": 1.851004149559745e-05, "loss": 0.5137, "step": 16680 }, { "epoch": 0.35377828678076817, "grad_norm": 0.3490608036518097, "learning_rate": 1.850986635280611e-05, "loss": 0.5202, "step": 16681 }, { "epoch": 0.3537994952387012, "grad_norm": 0.3643234074115753, "learning_rate": 1.850969120055017e-05, "loss": 0.451, "step": 16682 }, { "epoch": 0.3538207036966342, "grad_norm": 0.3274242877960205, "learning_rate": 1.8509516038829827e-05, "loss": 0.4494, "step": 16683 }, { "epoch": 0.35384191215456723, "grad_norm": 0.370274156332016, "learning_rate": 1.8509340867645272e-05, "loss": 0.5177, "step": 16684 }, { "epoch": 0.35386312061250025, "grad_norm": 0.3714214265346527, "learning_rate": 1.8509165686996704e-05, "loss": 0.5222, "step": 16685 }, { "epoch": 0.35388432907043327, "grad_norm": 0.33228516578674316, "learning_rate": 1.8508990496884314e-05, "loss": 0.5006, "step": 16686 }, { "epoch": 0.3539055375283663, "grad_norm": 0.3568190336227417, "learning_rate": 1.8508815297308303e-05, "loss": 0.5257, "step": 16687 }, { "epoch": 0.35392674598629936, "grad_norm": 0.3789653778076172, "learning_rate": 1.8508640088268857e-05, "loss": 0.4965, "step": 16688 }, { "epoch": 0.3539479544442324, "grad_norm": 0.35781317949295044, "learning_rate": 1.8508464869766175e-05, "loss": 0.5726, "step": 16689 }, { "epoch": 0.3539691629021654, "grad_norm": 0.36790144443511963, "learning_rate": 1.8508289641800456e-05, "loss": 0.5246, "step": 16690 }, { "epoch": 0.3539903713600984, "grad_norm": 0.33230286836624146, "learning_rate": 1.8508114404371887e-05, "loss": 0.4073, "step": 16691 }, { "epoch": 0.35401157981803144, "grad_norm": 0.3176261782646179, "learning_rate": 1.8507939157480673e-05, "loss": 0.4894, "step": 16692 }, { "epoch": 0.35403278827596446, "grad_norm": 0.47708040475845337, "learning_rate": 1.8507763901126997e-05, "loss": 0.5624, "step": 16693 }, { "epoch": 0.3540539967338975, "grad_norm": 0.7436766028404236, "learning_rate": 1.850758863531106e-05, "loss": 0.5195, "step": 16694 }, { "epoch": 0.3540752051918305, "grad_norm": 0.4876982569694519, "learning_rate": 1.8507413360033062e-05, "loss": 0.4652, "step": 16695 }, { "epoch": 0.3540964136497635, "grad_norm": 0.3562956750392914, "learning_rate": 1.850723807529319e-05, "loss": 0.5005, "step": 16696 }, { "epoch": 0.35411762210769654, "grad_norm": 0.3468625247478485, "learning_rate": 1.8507062781091643e-05, "loss": 0.5247, "step": 16697 }, { "epoch": 0.35413883056562956, "grad_norm": 0.3390912711620331, "learning_rate": 1.850688747742861e-05, "loss": 0.5452, "step": 16698 }, { "epoch": 0.3541600390235626, "grad_norm": 0.32321274280548096, "learning_rate": 1.8506712164304293e-05, "loss": 0.5768, "step": 16699 }, { "epoch": 0.3541812474814956, "grad_norm": 0.32064899802207947, "learning_rate": 1.850653684171889e-05, "loss": 0.5035, "step": 16700 }, { "epoch": 0.3542024559394286, "grad_norm": 0.3653370141983032, "learning_rate": 1.8506361509672585e-05, "loss": 0.5273, "step": 16701 }, { "epoch": 0.3542236643973617, "grad_norm": 0.3712261915206909, "learning_rate": 1.850618616816558e-05, "loss": 0.5129, "step": 16702 }, { "epoch": 0.3542448728552947, "grad_norm": 0.3359024226665497, "learning_rate": 1.8506010817198068e-05, "loss": 0.5626, "step": 16703 }, { "epoch": 0.35426608131322773, "grad_norm": 0.35328397154808044, "learning_rate": 1.8505835456770246e-05, "loss": 0.5403, "step": 16704 }, { "epoch": 0.35428728977116075, "grad_norm": 0.38027718663215637, "learning_rate": 1.8505660086882306e-05, "loss": 0.5494, "step": 16705 }, { "epoch": 0.35430849822909377, "grad_norm": 0.3503448963165283, "learning_rate": 1.8505484707534448e-05, "loss": 0.5651, "step": 16706 }, { "epoch": 0.3543297066870268, "grad_norm": 0.338439404964447, "learning_rate": 1.8505309318726858e-05, "loss": 0.5173, "step": 16707 }, { "epoch": 0.3543509151449598, "grad_norm": 0.3635404109954834, "learning_rate": 1.850513392045974e-05, "loss": 0.4584, "step": 16708 }, { "epoch": 0.35437212360289283, "grad_norm": 0.8340663313865662, "learning_rate": 1.8504958512733287e-05, "loss": 0.5124, "step": 16709 }, { "epoch": 0.35439333206082585, "grad_norm": 0.3522525727748871, "learning_rate": 1.8504783095547695e-05, "loss": 0.5427, "step": 16710 }, { "epoch": 0.35441454051875887, "grad_norm": 0.345083624124527, "learning_rate": 1.8504607668903153e-05, "loss": 0.5184, "step": 16711 }, { "epoch": 0.3544357489766919, "grad_norm": 0.36239245533943176, "learning_rate": 1.850443223279986e-05, "loss": 0.575, "step": 16712 }, { "epoch": 0.3544569574346249, "grad_norm": 0.38114428520202637, "learning_rate": 1.8504256787238013e-05, "loss": 0.5646, "step": 16713 }, { "epoch": 0.35447816589255793, "grad_norm": 0.34657609462738037, "learning_rate": 1.8504081332217804e-05, "loss": 0.4594, "step": 16714 }, { "epoch": 0.354499374350491, "grad_norm": 0.42314788699150085, "learning_rate": 1.8503905867739435e-05, "loss": 0.5008, "step": 16715 }, { "epoch": 0.354520582808424, "grad_norm": 0.3135046660900116, "learning_rate": 1.850373039380309e-05, "loss": 0.502, "step": 16716 }, { "epoch": 0.35454179126635704, "grad_norm": 0.3369363844394684, "learning_rate": 1.850355491040897e-05, "loss": 0.4808, "step": 16717 }, { "epoch": 0.35456299972429006, "grad_norm": 0.3250705897808075, "learning_rate": 1.8503379417557274e-05, "loss": 0.5183, "step": 16718 }, { "epoch": 0.3545842081822231, "grad_norm": 0.3075212240219116, "learning_rate": 1.8503203915248187e-05, "loss": 0.5175, "step": 16719 }, { "epoch": 0.3546054166401561, "grad_norm": 0.35927191376686096, "learning_rate": 1.8503028403481917e-05, "loss": 0.5193, "step": 16720 }, { "epoch": 0.3546266250980891, "grad_norm": 0.44283750653266907, "learning_rate": 1.850285288225865e-05, "loss": 0.4857, "step": 16721 }, { "epoch": 0.35464783355602214, "grad_norm": 0.6750427484512329, "learning_rate": 1.8502677351578584e-05, "loss": 0.5274, "step": 16722 }, { "epoch": 0.35466904201395516, "grad_norm": 0.3488800823688507, "learning_rate": 1.8502501811441914e-05, "loss": 0.5567, "step": 16723 }, { "epoch": 0.3546902504718882, "grad_norm": 0.5126579999923706, "learning_rate": 1.8502326261848836e-05, "loss": 0.6533, "step": 16724 }, { "epoch": 0.3547114589298212, "grad_norm": 0.38964974880218506, "learning_rate": 1.8502150702799544e-05, "loss": 0.4271, "step": 16725 }, { "epoch": 0.3547326673877542, "grad_norm": 0.3549775183200836, "learning_rate": 1.8501975134294233e-05, "loss": 0.532, "step": 16726 }, { "epoch": 0.35475387584568724, "grad_norm": 0.34778884053230286, "learning_rate": 1.85017995563331e-05, "loss": 0.5001, "step": 16727 }, { "epoch": 0.35477508430362026, "grad_norm": 0.32067322731018066, "learning_rate": 1.850162396891634e-05, "loss": 0.5045, "step": 16728 }, { "epoch": 0.35479629276155333, "grad_norm": 0.3400460183620453, "learning_rate": 1.8501448372044145e-05, "loss": 0.4418, "step": 16729 }, { "epoch": 0.35481750121948635, "grad_norm": 0.33152657747268677, "learning_rate": 1.8501272765716716e-05, "loss": 0.5721, "step": 16730 }, { "epoch": 0.3548387096774194, "grad_norm": 0.34869685769081116, "learning_rate": 1.8501097149934247e-05, "loss": 0.4779, "step": 16731 }, { "epoch": 0.3548599181353524, "grad_norm": 0.335517555475235, "learning_rate": 1.8500921524696926e-05, "loss": 0.5929, "step": 16732 }, { "epoch": 0.3548811265932854, "grad_norm": 0.4007965326309204, "learning_rate": 1.850074589000496e-05, "loss": 0.5832, "step": 16733 }, { "epoch": 0.35490233505121843, "grad_norm": 0.3254159688949585, "learning_rate": 1.8500570245858535e-05, "loss": 0.564, "step": 16734 }, { "epoch": 0.35492354350915145, "grad_norm": 0.33814823627471924, "learning_rate": 1.8500394592257848e-05, "loss": 0.5, "step": 16735 }, { "epoch": 0.35494475196708447, "grad_norm": 0.3317605257034302, "learning_rate": 1.8500218929203097e-05, "loss": 0.5366, "step": 16736 }, { "epoch": 0.3549659604250175, "grad_norm": 0.386400043964386, "learning_rate": 1.8500043256694476e-05, "loss": 0.512, "step": 16737 }, { "epoch": 0.3549871688829505, "grad_norm": 0.3431074619293213, "learning_rate": 1.8499867574732185e-05, "loss": 0.4557, "step": 16738 }, { "epoch": 0.35500837734088353, "grad_norm": 0.40133658051490784, "learning_rate": 1.8499691883316414e-05, "loss": 0.5102, "step": 16739 }, { "epoch": 0.35502958579881655, "grad_norm": 0.3468307554721832, "learning_rate": 1.8499516182447355e-05, "loss": 0.4732, "step": 16740 }, { "epoch": 0.35505079425674957, "grad_norm": 0.3048466145992279, "learning_rate": 1.8499340472125208e-05, "loss": 0.4947, "step": 16741 }, { "epoch": 0.3550720027146826, "grad_norm": 0.3584098517894745, "learning_rate": 1.8499164752350173e-05, "loss": 0.5172, "step": 16742 }, { "epoch": 0.35509321117261566, "grad_norm": 0.3300006687641144, "learning_rate": 1.8498989023122438e-05, "loss": 0.5627, "step": 16743 }, { "epoch": 0.3551144196305487, "grad_norm": 0.3362843096256256, "learning_rate": 1.8498813284442204e-05, "loss": 0.5184, "step": 16744 }, { "epoch": 0.3551356280884817, "grad_norm": 0.3753701150417328, "learning_rate": 1.8498637536309662e-05, "loss": 0.5171, "step": 16745 }, { "epoch": 0.3551568365464147, "grad_norm": 0.3167239725589752, "learning_rate": 1.8498461778725008e-05, "loss": 0.5364, "step": 16746 }, { "epoch": 0.35517804500434774, "grad_norm": 0.3975321054458618, "learning_rate": 1.849828601168844e-05, "loss": 0.5651, "step": 16747 }, { "epoch": 0.35519925346228076, "grad_norm": 0.3336603343486786, "learning_rate": 1.8498110235200147e-05, "loss": 0.4651, "step": 16748 }, { "epoch": 0.3552204619202138, "grad_norm": 0.32493114471435547, "learning_rate": 1.8497934449260337e-05, "loss": 0.5084, "step": 16749 }, { "epoch": 0.3552416703781468, "grad_norm": 0.4792845547199249, "learning_rate": 1.8497758653869194e-05, "loss": 0.539, "step": 16750 }, { "epoch": 0.3552628788360798, "grad_norm": 0.3514062166213989, "learning_rate": 1.849758284902692e-05, "loss": 0.4621, "step": 16751 }, { "epoch": 0.35528408729401284, "grad_norm": 0.32730263471603394, "learning_rate": 1.8497407034733707e-05, "loss": 0.5347, "step": 16752 }, { "epoch": 0.35530529575194586, "grad_norm": 0.3287620544433594, "learning_rate": 1.849723121098975e-05, "loss": 0.5838, "step": 16753 }, { "epoch": 0.3553265042098789, "grad_norm": 0.38694968819618225, "learning_rate": 1.8497055377795248e-05, "loss": 0.507, "step": 16754 }, { "epoch": 0.3553477126678119, "grad_norm": 0.412242591381073, "learning_rate": 1.8496879535150396e-05, "loss": 0.5359, "step": 16755 }, { "epoch": 0.355368921125745, "grad_norm": 0.3417617678642273, "learning_rate": 1.849670368305539e-05, "loss": 0.5489, "step": 16756 }, { "epoch": 0.355390129583678, "grad_norm": 0.4002702236175537, "learning_rate": 1.8496527821510417e-05, "loss": 0.6212, "step": 16757 }, { "epoch": 0.355411338041611, "grad_norm": 0.3487143814563751, "learning_rate": 1.8496351950515685e-05, "loss": 0.5089, "step": 16758 }, { "epoch": 0.35543254649954403, "grad_norm": 0.33554762601852417, "learning_rate": 1.8496176070071382e-05, "loss": 0.4592, "step": 16759 }, { "epoch": 0.35545375495747705, "grad_norm": 0.35309046506881714, "learning_rate": 1.8496000180177708e-05, "loss": 0.4573, "step": 16760 }, { "epoch": 0.3554749634154101, "grad_norm": 0.3955470025539398, "learning_rate": 1.8495824280834857e-05, "loss": 0.4458, "step": 16761 }, { "epoch": 0.3554961718733431, "grad_norm": 0.33572623133659363, "learning_rate": 1.849564837204302e-05, "loss": 0.5344, "step": 16762 }, { "epoch": 0.3555173803312761, "grad_norm": 0.30498963594436646, "learning_rate": 1.84954724538024e-05, "loss": 0.441, "step": 16763 }, { "epoch": 0.35553858878920913, "grad_norm": 0.35695427656173706, "learning_rate": 1.8495296526113188e-05, "loss": 0.5834, "step": 16764 }, { "epoch": 0.35555979724714215, "grad_norm": 0.3795369267463684, "learning_rate": 1.8495120588975578e-05, "loss": 0.4722, "step": 16765 }, { "epoch": 0.35558100570507517, "grad_norm": 0.38707274198532104, "learning_rate": 1.8494944642389775e-05, "loss": 0.6315, "step": 16766 }, { "epoch": 0.3556022141630082, "grad_norm": 0.7343683838844299, "learning_rate": 1.8494768686355968e-05, "loss": 0.5258, "step": 16767 }, { "epoch": 0.3556234226209412, "grad_norm": 0.4271816611289978, "learning_rate": 1.8494592720874348e-05, "loss": 0.4914, "step": 16768 }, { "epoch": 0.35564463107887423, "grad_norm": 0.35756567120552063, "learning_rate": 1.8494416745945116e-05, "loss": 0.4988, "step": 16769 }, { "epoch": 0.3556658395368073, "grad_norm": 0.4149862229824066, "learning_rate": 1.849424076156847e-05, "loss": 0.5585, "step": 16770 }, { "epoch": 0.3556870479947403, "grad_norm": 0.3617144525051117, "learning_rate": 1.8494064767744603e-05, "loss": 0.5132, "step": 16771 }, { "epoch": 0.35570825645267334, "grad_norm": 0.3432309925556183, "learning_rate": 1.849388876447371e-05, "loss": 0.5382, "step": 16772 }, { "epoch": 0.35572946491060636, "grad_norm": 0.3792441785335541, "learning_rate": 1.849371275175599e-05, "loss": 0.5125, "step": 16773 }, { "epoch": 0.3557506733685394, "grad_norm": 0.4261591136455536, "learning_rate": 1.8493536729591636e-05, "loss": 0.5337, "step": 16774 }, { "epoch": 0.3557718818264724, "grad_norm": 0.36782875657081604, "learning_rate": 1.849336069798084e-05, "loss": 0.4817, "step": 16775 }, { "epoch": 0.3557930902844054, "grad_norm": 0.3607642650604248, "learning_rate": 1.8493184656923806e-05, "loss": 0.5101, "step": 16776 }, { "epoch": 0.35581429874233844, "grad_norm": 0.31912219524383545, "learning_rate": 1.8493008606420725e-05, "loss": 0.5104, "step": 16777 }, { "epoch": 0.35583550720027146, "grad_norm": 0.3257085680961609, "learning_rate": 1.8492832546471794e-05, "loss": 0.4738, "step": 16778 }, { "epoch": 0.3558567156582045, "grad_norm": 0.3435305058956146, "learning_rate": 1.849265647707721e-05, "loss": 0.5519, "step": 16779 }, { "epoch": 0.3558779241161375, "grad_norm": 0.4043879210948944, "learning_rate": 1.8492480398237163e-05, "loss": 0.4947, "step": 16780 }, { "epoch": 0.3558991325740705, "grad_norm": 0.34133878350257874, "learning_rate": 1.8492304309951853e-05, "loss": 0.5421, "step": 16781 }, { "epoch": 0.35592034103200354, "grad_norm": 0.3485604226589203, "learning_rate": 1.8492128212221478e-05, "loss": 0.5475, "step": 16782 }, { "epoch": 0.35594154948993656, "grad_norm": 0.4542514979839325, "learning_rate": 1.8491952105046235e-05, "loss": 0.431, "step": 16783 }, { "epoch": 0.35596275794786963, "grad_norm": 0.3546672761440277, "learning_rate": 1.8491775988426314e-05, "loss": 0.4664, "step": 16784 }, { "epoch": 0.35598396640580265, "grad_norm": 0.4200841784477234, "learning_rate": 1.8491599862361914e-05, "loss": 0.446, "step": 16785 }, { "epoch": 0.3560051748637357, "grad_norm": 0.3004336655139923, "learning_rate": 1.849142372685323e-05, "loss": 0.4818, "step": 16786 }, { "epoch": 0.3560263833216687, "grad_norm": 0.33494922518730164, "learning_rate": 1.8491247581900456e-05, "loss": 0.5557, "step": 16787 }, { "epoch": 0.3560475917796017, "grad_norm": 0.3873303234577179, "learning_rate": 1.849107142750379e-05, "loss": 0.5713, "step": 16788 }, { "epoch": 0.35606880023753473, "grad_norm": 0.29803648591041565, "learning_rate": 1.8490895263663432e-05, "loss": 0.4312, "step": 16789 }, { "epoch": 0.35609000869546775, "grad_norm": 0.3518669009208679, "learning_rate": 1.8490719090379572e-05, "loss": 0.5265, "step": 16790 }, { "epoch": 0.35611121715340077, "grad_norm": 0.3447668254375458, "learning_rate": 1.8490542907652405e-05, "loss": 0.5415, "step": 16791 }, { "epoch": 0.3561324256113338, "grad_norm": 0.3174189031124115, "learning_rate": 1.8490366715482136e-05, "loss": 0.4863, "step": 16792 }, { "epoch": 0.3561536340692668, "grad_norm": 0.4028388559818268, "learning_rate": 1.849019051386895e-05, "loss": 0.5388, "step": 16793 }, { "epoch": 0.35617484252719983, "grad_norm": 0.3252061605453491, "learning_rate": 1.8490014302813055e-05, "loss": 0.4969, "step": 16794 }, { "epoch": 0.35619605098513285, "grad_norm": 0.3735564947128296, "learning_rate": 1.8489838082314632e-05, "loss": 0.4829, "step": 16795 }, { "epoch": 0.35621725944306587, "grad_norm": 0.3553514778614044, "learning_rate": 1.8489661852373887e-05, "loss": 0.6181, "step": 16796 }, { "epoch": 0.35623846790099895, "grad_norm": 0.43055853247642517, "learning_rate": 1.8489485612991017e-05, "loss": 0.4772, "step": 16797 }, { "epoch": 0.35625967635893196, "grad_norm": 0.3606749176979065, "learning_rate": 1.848930936416621e-05, "loss": 0.4671, "step": 16798 }, { "epoch": 0.356280884816865, "grad_norm": 0.3855985999107361, "learning_rate": 1.8489133105899667e-05, "loss": 0.5675, "step": 16799 }, { "epoch": 0.356302093274798, "grad_norm": 0.32455363869667053, "learning_rate": 1.848895683819159e-05, "loss": 0.5092, "step": 16800 }, { "epoch": 0.356323301732731, "grad_norm": 0.34913843870162964, "learning_rate": 1.8488780561042163e-05, "loss": 0.5147, "step": 16801 }, { "epoch": 0.35634451019066404, "grad_norm": 0.33727243542671204, "learning_rate": 1.848860427445159e-05, "loss": 0.5127, "step": 16802 }, { "epoch": 0.35636571864859706, "grad_norm": 0.39575162529945374, "learning_rate": 1.8488427978420064e-05, "loss": 0.476, "step": 16803 }, { "epoch": 0.3563869271065301, "grad_norm": 0.44308775663375854, "learning_rate": 1.8488251672947785e-05, "loss": 0.5053, "step": 16804 }, { "epoch": 0.3564081355644631, "grad_norm": 0.3320169150829315, "learning_rate": 1.8488075358034943e-05, "loss": 0.4876, "step": 16805 }, { "epoch": 0.3564293440223961, "grad_norm": 0.3459865152835846, "learning_rate": 1.8487899033681737e-05, "loss": 0.5482, "step": 16806 }, { "epoch": 0.35645055248032914, "grad_norm": 0.34964442253112793, "learning_rate": 1.8487722699888365e-05, "loss": 0.5819, "step": 16807 }, { "epoch": 0.35647176093826216, "grad_norm": 0.3279361426830292, "learning_rate": 1.8487546356655025e-05, "loss": 0.3826, "step": 16808 }, { "epoch": 0.3564929693961952, "grad_norm": 0.29793035984039307, "learning_rate": 1.8487370003981905e-05, "loss": 0.4496, "step": 16809 }, { "epoch": 0.3565141778541282, "grad_norm": 0.39084866642951965, "learning_rate": 1.8487193641869206e-05, "loss": 0.5494, "step": 16810 }, { "epoch": 0.3565353863120613, "grad_norm": 0.34109413623809814, "learning_rate": 1.8487017270317125e-05, "loss": 0.5478, "step": 16811 }, { "epoch": 0.3565565947699943, "grad_norm": 0.3291741907596588, "learning_rate": 1.8486840889325858e-05, "loss": 0.4924, "step": 16812 }, { "epoch": 0.3565778032279273, "grad_norm": 0.3457718789577484, "learning_rate": 1.84866644988956e-05, "loss": 0.4985, "step": 16813 }, { "epoch": 0.35659901168586033, "grad_norm": 0.3330865502357483, "learning_rate": 1.8486488099026548e-05, "loss": 0.4648, "step": 16814 }, { "epoch": 0.35662022014379335, "grad_norm": 0.31323307752609253, "learning_rate": 1.8486311689718898e-05, "loss": 0.4527, "step": 16815 }, { "epoch": 0.3566414286017264, "grad_norm": 0.46255773305892944, "learning_rate": 1.848613527097284e-05, "loss": 0.5216, "step": 16816 }, { "epoch": 0.3566626370596594, "grad_norm": 0.31507956981658936, "learning_rate": 1.8485958842788584e-05, "loss": 0.5064, "step": 16817 }, { "epoch": 0.3566838455175924, "grad_norm": 0.3690911829471588, "learning_rate": 1.8485782405166314e-05, "loss": 0.4394, "step": 16818 }, { "epoch": 0.35670505397552543, "grad_norm": 0.38669294118881226, "learning_rate": 1.8485605958106233e-05, "loss": 0.5228, "step": 16819 }, { "epoch": 0.35672626243345845, "grad_norm": 0.36780819296836853, "learning_rate": 1.848542950160853e-05, "loss": 0.4517, "step": 16820 }, { "epoch": 0.35674747089139147, "grad_norm": 0.3498172461986542, "learning_rate": 1.848525303567341e-05, "loss": 0.5071, "step": 16821 }, { "epoch": 0.3567686793493245, "grad_norm": 0.3178677558898926, "learning_rate": 1.8485076560301064e-05, "loss": 0.4922, "step": 16822 }, { "epoch": 0.3567898878072575, "grad_norm": 0.37820902466773987, "learning_rate": 1.8484900075491693e-05, "loss": 0.4807, "step": 16823 }, { "epoch": 0.35681109626519053, "grad_norm": 0.3866864740848541, "learning_rate": 1.8484723581245486e-05, "loss": 0.5384, "step": 16824 }, { "epoch": 0.3568323047231236, "grad_norm": 0.3746243119239807, "learning_rate": 1.8484547077562645e-05, "loss": 0.6067, "step": 16825 }, { "epoch": 0.3568535131810566, "grad_norm": 0.3741171360015869, "learning_rate": 1.8484370564443362e-05, "loss": 0.482, "step": 16826 }, { "epoch": 0.35687472163898964, "grad_norm": 0.3309343457221985, "learning_rate": 1.8484194041887838e-05, "loss": 0.4648, "step": 16827 }, { "epoch": 0.35689593009692266, "grad_norm": 0.3573768138885498, "learning_rate": 1.8484017509896264e-05, "loss": 0.5082, "step": 16828 }, { "epoch": 0.3569171385548557, "grad_norm": 0.3477606177330017, "learning_rate": 1.8483840968468842e-05, "loss": 0.5414, "step": 16829 }, { "epoch": 0.3569383470127887, "grad_norm": 0.3540812134742737, "learning_rate": 1.8483664417605768e-05, "loss": 0.6256, "step": 16830 }, { "epoch": 0.3569595554707217, "grad_norm": 0.34950587153434753, "learning_rate": 1.8483487857307235e-05, "loss": 0.5345, "step": 16831 }, { "epoch": 0.35698076392865474, "grad_norm": 0.3483435809612274, "learning_rate": 1.848331128757344e-05, "loss": 0.5064, "step": 16832 }, { "epoch": 0.35700197238658776, "grad_norm": 0.34831503033638, "learning_rate": 1.8483134708404576e-05, "loss": 0.5659, "step": 16833 }, { "epoch": 0.3570231808445208, "grad_norm": 0.31949499249458313, "learning_rate": 1.8482958119800848e-05, "loss": 0.5023, "step": 16834 }, { "epoch": 0.3570443893024538, "grad_norm": 0.3480820655822754, "learning_rate": 1.8482781521762446e-05, "loss": 0.5023, "step": 16835 }, { "epoch": 0.3570655977603868, "grad_norm": 0.3335203230381012, "learning_rate": 1.8482604914289565e-05, "loss": 0.4913, "step": 16836 }, { "epoch": 0.35708680621831984, "grad_norm": 0.34960103034973145, "learning_rate": 1.848242829738241e-05, "loss": 0.4876, "step": 16837 }, { "epoch": 0.3571080146762529, "grad_norm": 0.336427241563797, "learning_rate": 1.8482251671041168e-05, "loss": 0.5301, "step": 16838 }, { "epoch": 0.35712922313418594, "grad_norm": 0.31676071882247925, "learning_rate": 1.848207503526604e-05, "loss": 0.5386, "step": 16839 }, { "epoch": 0.35715043159211896, "grad_norm": 0.3333855867385864, "learning_rate": 1.8481898390057223e-05, "loss": 0.468, "step": 16840 }, { "epoch": 0.357171640050052, "grad_norm": 0.34939926862716675, "learning_rate": 1.8481721735414913e-05, "loss": 0.4761, "step": 16841 }, { "epoch": 0.357192848507985, "grad_norm": 0.3256758451461792, "learning_rate": 1.8481545071339306e-05, "loss": 0.3793, "step": 16842 }, { "epoch": 0.357214056965918, "grad_norm": 0.35757017135620117, "learning_rate": 1.8481368397830594e-05, "loss": 0.5168, "step": 16843 }, { "epoch": 0.35723526542385103, "grad_norm": 0.3484618663787842, "learning_rate": 1.8481191714888984e-05, "loss": 0.4707, "step": 16844 }, { "epoch": 0.35725647388178405, "grad_norm": 0.3674219846725464, "learning_rate": 1.848101502251466e-05, "loss": 0.5509, "step": 16845 }, { "epoch": 0.3572776823397171, "grad_norm": 0.3050861954689026, "learning_rate": 1.848083832070783e-05, "loss": 0.4497, "step": 16846 }, { "epoch": 0.3572988907976501, "grad_norm": 0.3867933452129364, "learning_rate": 1.8480661609468684e-05, "loss": 0.5001, "step": 16847 }, { "epoch": 0.3573200992555831, "grad_norm": 0.34507906436920166, "learning_rate": 1.8480484888797418e-05, "loss": 0.5526, "step": 16848 }, { "epoch": 0.35734130771351613, "grad_norm": 0.3211387097835541, "learning_rate": 1.848030815869423e-05, "loss": 0.4865, "step": 16849 }, { "epoch": 0.35736251617144915, "grad_norm": 0.33242809772491455, "learning_rate": 1.848013141915932e-05, "loss": 0.4857, "step": 16850 }, { "epoch": 0.35738372462938217, "grad_norm": 0.31764355301856995, "learning_rate": 1.8479954670192878e-05, "loss": 0.5321, "step": 16851 }, { "epoch": 0.35740493308731525, "grad_norm": 0.3385099470615387, "learning_rate": 1.8479777911795107e-05, "loss": 0.5516, "step": 16852 }, { "epoch": 0.35742614154524827, "grad_norm": 0.3534304201602936, "learning_rate": 1.84796011439662e-05, "loss": 0.4568, "step": 16853 }, { "epoch": 0.3574473500031813, "grad_norm": 0.35421884059906006, "learning_rate": 1.8479424366706354e-05, "loss": 0.5237, "step": 16854 }, { "epoch": 0.3574685584611143, "grad_norm": 0.3791976869106293, "learning_rate": 1.8479247580015768e-05, "loss": 0.666, "step": 16855 }, { "epoch": 0.3574897669190473, "grad_norm": 0.33003222942352295, "learning_rate": 1.8479070783894632e-05, "loss": 0.5084, "step": 16856 }, { "epoch": 0.35751097537698034, "grad_norm": 0.4524397552013397, "learning_rate": 1.847889397834315e-05, "loss": 0.5615, "step": 16857 }, { "epoch": 0.35753218383491336, "grad_norm": 0.7517585158348083, "learning_rate": 1.8478717163361515e-05, "loss": 0.591, "step": 16858 }, { "epoch": 0.3575533922928464, "grad_norm": 0.35075879096984863, "learning_rate": 1.8478540338949923e-05, "loss": 0.5266, "step": 16859 }, { "epoch": 0.3575746007507794, "grad_norm": 0.35971972346305847, "learning_rate": 1.8478363505108574e-05, "loss": 0.5217, "step": 16860 }, { "epoch": 0.3575958092087124, "grad_norm": 0.33785372972488403, "learning_rate": 1.8478186661837664e-05, "loss": 0.536, "step": 16861 }, { "epoch": 0.35761701766664544, "grad_norm": 0.5279076099395752, "learning_rate": 1.8478009809137384e-05, "loss": 0.4978, "step": 16862 }, { "epoch": 0.35763822612457846, "grad_norm": 0.3415590822696686, "learning_rate": 1.847783294700794e-05, "loss": 0.4629, "step": 16863 }, { "epoch": 0.3576594345825115, "grad_norm": 0.3426077365875244, "learning_rate": 1.847765607544952e-05, "loss": 0.5046, "step": 16864 }, { "epoch": 0.35768064304044456, "grad_norm": 0.33430030941963196, "learning_rate": 1.8477479194462325e-05, "loss": 0.4955, "step": 16865 }, { "epoch": 0.3577018514983776, "grad_norm": 0.3410528302192688, "learning_rate": 1.8477302304046555e-05, "loss": 0.5012, "step": 16866 }, { "epoch": 0.3577230599563106, "grad_norm": 0.33727526664733887, "learning_rate": 1.84771254042024e-05, "loss": 0.465, "step": 16867 }, { "epoch": 0.3577442684142436, "grad_norm": 0.3561154305934906, "learning_rate": 1.847694849493006e-05, "loss": 0.541, "step": 16868 }, { "epoch": 0.35776547687217664, "grad_norm": 0.34219086170196533, "learning_rate": 1.8476771576229734e-05, "loss": 0.4838, "step": 16869 }, { "epoch": 0.35778668533010966, "grad_norm": 0.3667137324810028, "learning_rate": 1.8476594648101614e-05, "loss": 0.4731, "step": 16870 }, { "epoch": 0.3578078937880427, "grad_norm": 0.3890675902366638, "learning_rate": 1.8476417710545898e-05, "loss": 0.5096, "step": 16871 }, { "epoch": 0.3578291022459757, "grad_norm": 0.35594844818115234, "learning_rate": 1.8476240763562784e-05, "loss": 0.4753, "step": 16872 }, { "epoch": 0.3578503107039087, "grad_norm": 0.367235004901886, "learning_rate": 1.847606380715247e-05, "loss": 0.5634, "step": 16873 }, { "epoch": 0.35787151916184173, "grad_norm": 0.3536052107810974, "learning_rate": 1.8475886841315152e-05, "loss": 0.5227, "step": 16874 }, { "epoch": 0.35789272761977475, "grad_norm": 0.3447902202606201, "learning_rate": 1.8475709866051027e-05, "loss": 0.4769, "step": 16875 }, { "epoch": 0.3579139360777078, "grad_norm": 0.3914262652397156, "learning_rate": 1.847553288136029e-05, "loss": 0.5558, "step": 16876 }, { "epoch": 0.3579351445356408, "grad_norm": 0.3483523428440094, "learning_rate": 1.8475355887243136e-05, "loss": 0.491, "step": 16877 }, { "epoch": 0.3579563529935738, "grad_norm": 0.3749096095561981, "learning_rate": 1.8475178883699766e-05, "loss": 0.4833, "step": 16878 }, { "epoch": 0.3579775614515069, "grad_norm": 0.3855377733707428, "learning_rate": 1.847500187073038e-05, "loss": 0.49, "step": 16879 }, { "epoch": 0.3579987699094399, "grad_norm": 0.33898380398750305, "learning_rate": 1.847482484833517e-05, "loss": 0.5352, "step": 16880 }, { "epoch": 0.3580199783673729, "grad_norm": 0.35389140248298645, "learning_rate": 1.847464781651433e-05, "loss": 0.487, "step": 16881 }, { "epoch": 0.35804118682530595, "grad_norm": 0.31543463468551636, "learning_rate": 1.847447077526806e-05, "loss": 0.5277, "step": 16882 }, { "epoch": 0.35806239528323897, "grad_norm": 0.3589022755622864, "learning_rate": 1.847429372459656e-05, "loss": 0.6243, "step": 16883 }, { "epoch": 0.358083603741172, "grad_norm": 0.3648255169391632, "learning_rate": 1.847411666450002e-05, "loss": 0.5623, "step": 16884 }, { "epoch": 0.358104812199105, "grad_norm": 0.3349919319152832, "learning_rate": 1.8473939594978645e-05, "loss": 0.4286, "step": 16885 }, { "epoch": 0.358126020657038, "grad_norm": 0.34384968876838684, "learning_rate": 1.8473762516032626e-05, "loss": 0.5418, "step": 16886 }, { "epoch": 0.35814722911497104, "grad_norm": 0.33255431056022644, "learning_rate": 1.8473585427662166e-05, "loss": 0.549, "step": 16887 }, { "epoch": 0.35816843757290406, "grad_norm": 0.2879052460193634, "learning_rate": 1.8473408329867454e-05, "loss": 0.4394, "step": 16888 }, { "epoch": 0.3581896460308371, "grad_norm": 0.3616712987422943, "learning_rate": 1.8473231222648688e-05, "loss": 0.5966, "step": 16889 }, { "epoch": 0.3582108544887701, "grad_norm": 0.3114815056324005, "learning_rate": 1.8473054106006074e-05, "loss": 0.4259, "step": 16890 }, { "epoch": 0.3582320629467031, "grad_norm": 0.31993064284324646, "learning_rate": 1.8472876979939798e-05, "loss": 0.4188, "step": 16891 }, { "epoch": 0.35825327140463614, "grad_norm": 0.3554445505142212, "learning_rate": 1.8472699844450063e-05, "loss": 0.466, "step": 16892 }, { "epoch": 0.3582744798625692, "grad_norm": 0.3194480836391449, "learning_rate": 1.8472522699537067e-05, "loss": 0.4655, "step": 16893 }, { "epoch": 0.35829568832050224, "grad_norm": 0.345427930355072, "learning_rate": 1.8472345545201006e-05, "loss": 0.6029, "step": 16894 }, { "epoch": 0.35831689677843526, "grad_norm": 0.36836618185043335, "learning_rate": 1.8472168381442072e-05, "loss": 0.5622, "step": 16895 }, { "epoch": 0.3583381052363683, "grad_norm": 0.3569222092628479, "learning_rate": 1.847199120826047e-05, "loss": 0.5966, "step": 16896 }, { "epoch": 0.3583593136943013, "grad_norm": 0.36704468727111816, "learning_rate": 1.8471814025656387e-05, "loss": 0.5165, "step": 16897 }, { "epoch": 0.3583805221522343, "grad_norm": 0.3638158142566681, "learning_rate": 1.847163683363003e-05, "loss": 0.4638, "step": 16898 }, { "epoch": 0.35840173061016734, "grad_norm": 0.30954429507255554, "learning_rate": 1.8471459632181596e-05, "loss": 0.4679, "step": 16899 }, { "epoch": 0.35842293906810035, "grad_norm": 0.3347557783126831, "learning_rate": 1.847128242131127e-05, "loss": 0.54, "step": 16900 }, { "epoch": 0.3584441475260334, "grad_norm": 0.36626946926116943, "learning_rate": 1.8471105201019262e-05, "loss": 0.5054, "step": 16901 }, { "epoch": 0.3584653559839664, "grad_norm": 0.3351796269416809, "learning_rate": 1.8470927971305762e-05, "loss": 0.4986, "step": 16902 }, { "epoch": 0.3584865644418994, "grad_norm": 0.8800338506698608, "learning_rate": 1.8470750732170973e-05, "loss": 0.4814, "step": 16903 }, { "epoch": 0.35850777289983243, "grad_norm": 0.3364466726779938, "learning_rate": 1.8470573483615086e-05, "loss": 0.57, "step": 16904 }, { "epoch": 0.35852898135776545, "grad_norm": 0.3448542654514313, "learning_rate": 1.84703962256383e-05, "loss": 0.5933, "step": 16905 }, { "epoch": 0.35855018981569853, "grad_norm": 0.3230378329753876, "learning_rate": 1.8470218958240815e-05, "loss": 0.5257, "step": 16906 }, { "epoch": 0.35857139827363155, "grad_norm": 0.40099433064460754, "learning_rate": 1.8470041681422827e-05, "loss": 0.4167, "step": 16907 }, { "epoch": 0.35859260673156457, "grad_norm": 0.3382734954357147, "learning_rate": 1.846986439518453e-05, "loss": 0.4645, "step": 16908 }, { "epoch": 0.3586138151894976, "grad_norm": 0.37138456106185913, "learning_rate": 1.8469687099526122e-05, "loss": 0.4602, "step": 16909 }, { "epoch": 0.3586350236474306, "grad_norm": 0.33730196952819824, "learning_rate": 1.8469509794447805e-05, "loss": 0.4545, "step": 16910 }, { "epoch": 0.3586562321053636, "grad_norm": 0.4124041497707367, "learning_rate": 1.846933247994977e-05, "loss": 0.5005, "step": 16911 }, { "epoch": 0.35867744056329665, "grad_norm": 0.3265816867351532, "learning_rate": 1.846915515603222e-05, "loss": 0.4362, "step": 16912 }, { "epoch": 0.35869864902122967, "grad_norm": 0.32623565196990967, "learning_rate": 1.846897782269535e-05, "loss": 0.4361, "step": 16913 }, { "epoch": 0.3587198574791627, "grad_norm": 0.4019855558872223, "learning_rate": 1.846880047993935e-05, "loss": 0.6009, "step": 16914 }, { "epoch": 0.3587410659370957, "grad_norm": 0.3354824483394623, "learning_rate": 1.8468623127764428e-05, "loss": 0.4994, "step": 16915 }, { "epoch": 0.3587622743950287, "grad_norm": 0.30817997455596924, "learning_rate": 1.8468445766170778e-05, "loss": 0.5169, "step": 16916 }, { "epoch": 0.35878348285296174, "grad_norm": 0.31733137369155884, "learning_rate": 1.8468268395158593e-05, "loss": 0.4833, "step": 16917 }, { "epoch": 0.35880469131089476, "grad_norm": 0.3278380036354065, "learning_rate": 1.8468091014728076e-05, "loss": 0.5127, "step": 16918 }, { "epoch": 0.3588258997688278, "grad_norm": 0.3699551224708557, "learning_rate": 1.8467913624879423e-05, "loss": 0.5403, "step": 16919 }, { "epoch": 0.35884710822676086, "grad_norm": 0.3058226406574249, "learning_rate": 1.8467736225612824e-05, "loss": 0.4462, "step": 16920 }, { "epoch": 0.3588683166846939, "grad_norm": 0.4075412154197693, "learning_rate": 1.846755881692849e-05, "loss": 0.5258, "step": 16921 }, { "epoch": 0.3588895251426269, "grad_norm": 0.3478969633579254, "learning_rate": 1.8467381398826605e-05, "loss": 0.5548, "step": 16922 }, { "epoch": 0.3589107336005599, "grad_norm": 0.37409183382987976, "learning_rate": 1.846720397130737e-05, "loss": 0.5765, "step": 16923 }, { "epoch": 0.35893194205849294, "grad_norm": 0.33605316281318665, "learning_rate": 1.8467026534370995e-05, "loss": 0.4753, "step": 16924 }, { "epoch": 0.35895315051642596, "grad_norm": 0.3774681091308594, "learning_rate": 1.8466849088017658e-05, "loss": 0.4991, "step": 16925 }, { "epoch": 0.358974358974359, "grad_norm": 0.3821834921836853, "learning_rate": 1.8466671632247565e-05, "loss": 0.5094, "step": 16926 }, { "epoch": 0.358995567432292, "grad_norm": 0.34136661887168884, "learning_rate": 1.846649416706092e-05, "loss": 0.5648, "step": 16927 }, { "epoch": 0.359016775890225, "grad_norm": 0.3222203850746155, "learning_rate": 1.8466316692457905e-05, "loss": 0.4289, "step": 16928 }, { "epoch": 0.35903798434815803, "grad_norm": 0.3649762272834778, "learning_rate": 1.8466139208438728e-05, "loss": 0.4696, "step": 16929 }, { "epoch": 0.35905919280609105, "grad_norm": 0.384084552526474, "learning_rate": 1.846596171500359e-05, "loss": 0.4787, "step": 16930 }, { "epoch": 0.3590804012640241, "grad_norm": 0.34640344977378845, "learning_rate": 1.8465784212152678e-05, "loss": 0.5131, "step": 16931 }, { "epoch": 0.3591016097219571, "grad_norm": 0.34614041447639465, "learning_rate": 1.8465606699886195e-05, "loss": 0.5237, "step": 16932 }, { "epoch": 0.3591228181798901, "grad_norm": 0.32188546657562256, "learning_rate": 1.8465429178204338e-05, "loss": 0.5305, "step": 16933 }, { "epoch": 0.3591440266378232, "grad_norm": 0.36817076802253723, "learning_rate": 1.8465251647107307e-05, "loss": 0.5726, "step": 16934 }, { "epoch": 0.3591652350957562, "grad_norm": 0.40040281414985657, "learning_rate": 1.8465074106595294e-05, "loss": 0.6193, "step": 16935 }, { "epoch": 0.3591864435536892, "grad_norm": 0.32194986939430237, "learning_rate": 1.84648965566685e-05, "loss": 0.4535, "step": 16936 }, { "epoch": 0.35920765201162225, "grad_norm": 0.42489778995513916, "learning_rate": 1.846471899732712e-05, "loss": 0.5635, "step": 16937 }, { "epoch": 0.35922886046955527, "grad_norm": 0.3507263660430908, "learning_rate": 1.8464541428571358e-05, "loss": 0.4988, "step": 16938 }, { "epoch": 0.3592500689274883, "grad_norm": 0.32471713423728943, "learning_rate": 1.84643638504014e-05, "loss": 0.5003, "step": 16939 }, { "epoch": 0.3592712773854213, "grad_norm": 0.3562885522842407, "learning_rate": 1.846418626281745e-05, "loss": 0.4617, "step": 16940 }, { "epoch": 0.3592924858433543, "grad_norm": 0.32854363322257996, "learning_rate": 1.846400866581971e-05, "loss": 0.572, "step": 16941 }, { "epoch": 0.35931369430128735, "grad_norm": 0.33287519216537476, "learning_rate": 1.846383105940837e-05, "loss": 0.471, "step": 16942 }, { "epoch": 0.35933490275922036, "grad_norm": 0.31081122159957886, "learning_rate": 1.8463653443583635e-05, "loss": 0.5108, "step": 16943 }, { "epoch": 0.3593561112171534, "grad_norm": 0.39630311727523804, "learning_rate": 1.846347581834569e-05, "loss": 0.4751, "step": 16944 }, { "epoch": 0.3593773196750864, "grad_norm": 0.3588181734085083, "learning_rate": 1.846329818369475e-05, "loss": 0.4976, "step": 16945 }, { "epoch": 0.3593985281330194, "grad_norm": 0.3727039694786072, "learning_rate": 1.8463120539631e-05, "loss": 0.4432, "step": 16946 }, { "epoch": 0.3594197365909525, "grad_norm": 0.34162530303001404, "learning_rate": 1.846294288615464e-05, "loss": 0.5037, "step": 16947 }, { "epoch": 0.3594409450488855, "grad_norm": 0.3604586124420166, "learning_rate": 1.8462765223265867e-05, "loss": 0.5804, "step": 16948 }, { "epoch": 0.35946215350681854, "grad_norm": 0.34805554151535034, "learning_rate": 1.8462587550964882e-05, "loss": 0.5332, "step": 16949 }, { "epoch": 0.35948336196475156, "grad_norm": 0.32949143648147583, "learning_rate": 1.846240986925188e-05, "loss": 0.5437, "step": 16950 }, { "epoch": 0.3595045704226846, "grad_norm": 0.34668201208114624, "learning_rate": 1.846223217812706e-05, "loss": 0.503, "step": 16951 }, { "epoch": 0.3595257788806176, "grad_norm": 0.36016523838043213, "learning_rate": 1.8462054477590618e-05, "loss": 0.4565, "step": 16952 }, { "epoch": 0.3595469873385506, "grad_norm": 0.33876052498817444, "learning_rate": 1.8461876767642756e-05, "loss": 0.5086, "step": 16953 }, { "epoch": 0.35956819579648364, "grad_norm": 0.40461617708206177, "learning_rate": 1.8461699048283663e-05, "loss": 0.5312, "step": 16954 }, { "epoch": 0.35958940425441666, "grad_norm": 0.35891276597976685, "learning_rate": 1.8461521319513543e-05, "loss": 0.5102, "step": 16955 }, { "epoch": 0.3596106127123497, "grad_norm": 0.3801726996898651, "learning_rate": 1.8461343581332593e-05, "loss": 0.5669, "step": 16956 }, { "epoch": 0.3596318211702827, "grad_norm": 0.40802252292633057, "learning_rate": 1.846116583374101e-05, "loss": 0.5419, "step": 16957 }, { "epoch": 0.3596530296282157, "grad_norm": 0.400597482919693, "learning_rate": 1.8460988076738995e-05, "loss": 0.5459, "step": 16958 }, { "epoch": 0.35967423808614873, "grad_norm": 0.33859682083129883, "learning_rate": 1.846081031032674e-05, "loss": 0.439, "step": 16959 }, { "epoch": 0.35969544654408175, "grad_norm": 0.4959884285926819, "learning_rate": 1.8460632534504442e-05, "loss": 0.4777, "step": 16960 }, { "epoch": 0.35971665500201483, "grad_norm": 0.34762394428253174, "learning_rate": 1.846045474927231e-05, "loss": 0.4667, "step": 16961 }, { "epoch": 0.35973786345994785, "grad_norm": 0.3480904698371887, "learning_rate": 1.8460276954630527e-05, "loss": 0.5139, "step": 16962 }, { "epoch": 0.35975907191788087, "grad_norm": 0.3727750778198242, "learning_rate": 1.84600991505793e-05, "loss": 0.4498, "step": 16963 }, { "epoch": 0.3597802803758139, "grad_norm": 0.3628807067871094, "learning_rate": 1.845992133711882e-05, "loss": 0.5385, "step": 16964 }, { "epoch": 0.3598014888337469, "grad_norm": 0.3598943054676056, "learning_rate": 1.8459743514249294e-05, "loss": 0.4736, "step": 16965 }, { "epoch": 0.3598226972916799, "grad_norm": 0.3539235293865204, "learning_rate": 1.8459565681970915e-05, "loss": 0.5822, "step": 16966 }, { "epoch": 0.35984390574961295, "grad_norm": 0.312157541513443, "learning_rate": 1.845938784028388e-05, "loss": 0.4548, "step": 16967 }, { "epoch": 0.35986511420754597, "grad_norm": 0.371187686920166, "learning_rate": 1.8459209989188385e-05, "loss": 0.4917, "step": 16968 }, { "epoch": 0.359886322665479, "grad_norm": 0.3612973988056183, "learning_rate": 1.845903212868463e-05, "loss": 0.4904, "step": 16969 }, { "epoch": 0.359907531123412, "grad_norm": 0.3661154806613922, "learning_rate": 1.8458854258772815e-05, "loss": 0.6286, "step": 16970 }, { "epoch": 0.359928739581345, "grad_norm": 0.3088376522064209, "learning_rate": 1.8458676379453136e-05, "loss": 0.438, "step": 16971 }, { "epoch": 0.35994994803927804, "grad_norm": 0.3225156366825104, "learning_rate": 1.8458498490725788e-05, "loss": 0.4035, "step": 16972 }, { "epoch": 0.35997115649721106, "grad_norm": 0.3099547326564789, "learning_rate": 1.8458320592590976e-05, "loss": 0.4295, "step": 16973 }, { "epoch": 0.3599923649551441, "grad_norm": 0.3450694680213928, "learning_rate": 1.845814268504889e-05, "loss": 0.5257, "step": 16974 }, { "epoch": 0.36001357341307716, "grad_norm": 0.5350041389465332, "learning_rate": 1.8457964768099732e-05, "loss": 0.5703, "step": 16975 }, { "epoch": 0.3600347818710102, "grad_norm": 0.37004414200782776, "learning_rate": 1.84577868417437e-05, "loss": 0.5155, "step": 16976 }, { "epoch": 0.3600559903289432, "grad_norm": 0.40326324105262756, "learning_rate": 1.845760890598099e-05, "loss": 0.553, "step": 16977 }, { "epoch": 0.3600771987868762, "grad_norm": 0.31229424476623535, "learning_rate": 1.84574309608118e-05, "loss": 0.484, "step": 16978 }, { "epoch": 0.36009840724480924, "grad_norm": 0.3542760908603668, "learning_rate": 1.8457253006236326e-05, "loss": 0.5191, "step": 16979 }, { "epoch": 0.36011961570274226, "grad_norm": 0.3740803897380829, "learning_rate": 1.8457075042254775e-05, "loss": 0.52, "step": 16980 }, { "epoch": 0.3601408241606753, "grad_norm": 0.40210863947868347, "learning_rate": 1.8456897068867334e-05, "loss": 0.5204, "step": 16981 }, { "epoch": 0.3601620326186083, "grad_norm": 0.36931461095809937, "learning_rate": 1.845671908607421e-05, "loss": 0.5177, "step": 16982 }, { "epoch": 0.3601832410765413, "grad_norm": 0.35138484835624695, "learning_rate": 1.845654109387559e-05, "loss": 0.4855, "step": 16983 }, { "epoch": 0.36020444953447434, "grad_norm": 0.4362279176712036, "learning_rate": 1.8456363092271684e-05, "loss": 0.5245, "step": 16984 }, { "epoch": 0.36022565799240736, "grad_norm": 0.3328821361064911, "learning_rate": 1.8456185081262684e-05, "loss": 0.5077, "step": 16985 }, { "epoch": 0.3602468664503404, "grad_norm": 0.4956260621547699, "learning_rate": 1.8456007060848784e-05, "loss": 0.4318, "step": 16986 }, { "epoch": 0.3602680749082734, "grad_norm": 0.33310726284980774, "learning_rate": 1.845582903103019e-05, "loss": 0.538, "step": 16987 }, { "epoch": 0.36028928336620647, "grad_norm": 0.3192608952522278, "learning_rate": 1.8455650991807097e-05, "loss": 0.5163, "step": 16988 }, { "epoch": 0.3603104918241395, "grad_norm": 0.3779379427433014, "learning_rate": 1.84554729431797e-05, "loss": 0.4874, "step": 16989 }, { "epoch": 0.3603317002820725, "grad_norm": 0.35810789465904236, "learning_rate": 1.84552948851482e-05, "loss": 0.4405, "step": 16990 }, { "epoch": 0.36035290874000553, "grad_norm": 0.3299301266670227, "learning_rate": 1.8455116817712796e-05, "loss": 0.507, "step": 16991 }, { "epoch": 0.36037411719793855, "grad_norm": 0.40169939398765564, "learning_rate": 1.8454938740873684e-05, "loss": 0.4452, "step": 16992 }, { "epoch": 0.36039532565587157, "grad_norm": 0.33724740147590637, "learning_rate": 1.8454760654631062e-05, "loss": 0.4825, "step": 16993 }, { "epoch": 0.3604165341138046, "grad_norm": 0.3512168526649475, "learning_rate": 1.8454582558985128e-05, "loss": 0.556, "step": 16994 }, { "epoch": 0.3604377425717376, "grad_norm": 0.36146363615989685, "learning_rate": 1.8454404453936085e-05, "loss": 0.5272, "step": 16995 }, { "epoch": 0.3604589510296706, "grad_norm": 0.33505773544311523, "learning_rate": 1.8454226339484122e-05, "loss": 0.4518, "step": 16996 }, { "epoch": 0.36048015948760365, "grad_norm": 0.36277636885643005, "learning_rate": 1.8454048215629443e-05, "loss": 0.5069, "step": 16997 }, { "epoch": 0.36050136794553667, "grad_norm": 0.3648465871810913, "learning_rate": 1.8453870082372244e-05, "loss": 0.5871, "step": 16998 }, { "epoch": 0.3605225764034697, "grad_norm": 0.3840145468711853, "learning_rate": 1.8453691939712725e-05, "loss": 0.5163, "step": 16999 }, { "epoch": 0.3605437848614027, "grad_norm": 0.35620519518852234, "learning_rate": 1.8453513787651083e-05, "loss": 0.5007, "step": 17000 }, { "epoch": 0.3605649933193357, "grad_norm": 0.3654842674732208, "learning_rate": 1.8453335626187518e-05, "loss": 0.5534, "step": 17001 }, { "epoch": 0.3605862017772688, "grad_norm": 0.3427562117576599, "learning_rate": 1.8453157455322226e-05, "loss": 0.4726, "step": 17002 }, { "epoch": 0.3606074102352018, "grad_norm": 0.3599609136581421, "learning_rate": 1.8452979275055403e-05, "loss": 0.4956, "step": 17003 }, { "epoch": 0.36062861869313484, "grad_norm": 0.33680328726768494, "learning_rate": 1.8452801085387257e-05, "loss": 0.532, "step": 17004 }, { "epoch": 0.36064982715106786, "grad_norm": 0.5357456803321838, "learning_rate": 1.8452622886317973e-05, "loss": 0.5256, "step": 17005 }, { "epoch": 0.3606710356090009, "grad_norm": 0.3210676908493042, "learning_rate": 1.8452444677847755e-05, "loss": 0.4786, "step": 17006 }, { "epoch": 0.3606922440669339, "grad_norm": 0.33010172843933105, "learning_rate": 1.8452266459976803e-05, "loss": 0.5412, "step": 17007 }, { "epoch": 0.3607134525248669, "grad_norm": 0.4045872688293457, "learning_rate": 1.8452088232705314e-05, "loss": 0.5397, "step": 17008 }, { "epoch": 0.36073466098279994, "grad_norm": 0.31250110268592834, "learning_rate": 1.8451909996033486e-05, "loss": 0.4299, "step": 17009 }, { "epoch": 0.36075586944073296, "grad_norm": 0.3417510986328125, "learning_rate": 1.8451731749961518e-05, "loss": 0.5168, "step": 17010 }, { "epoch": 0.360777077898666, "grad_norm": 0.3075961768627167, "learning_rate": 1.8451553494489604e-05, "loss": 0.4838, "step": 17011 }, { "epoch": 0.360798286356599, "grad_norm": 0.4509100019931793, "learning_rate": 1.8451375229617946e-05, "loss": 0.5833, "step": 17012 }, { "epoch": 0.360819494814532, "grad_norm": 0.3221625089645386, "learning_rate": 1.8451196955346746e-05, "loss": 0.5089, "step": 17013 }, { "epoch": 0.36084070327246504, "grad_norm": 0.35040798783302307, "learning_rate": 1.8451018671676195e-05, "loss": 0.5534, "step": 17014 }, { "epoch": 0.36086191173039806, "grad_norm": 0.31052711606025696, "learning_rate": 1.8450840378606494e-05, "loss": 0.4972, "step": 17015 }, { "epoch": 0.36088312018833113, "grad_norm": 0.38348791003227234, "learning_rate": 1.8450662076137846e-05, "loss": 0.5238, "step": 17016 }, { "epoch": 0.36090432864626415, "grad_norm": 0.3534412682056427, "learning_rate": 1.845048376427044e-05, "loss": 0.4795, "step": 17017 }, { "epoch": 0.36092553710419717, "grad_norm": 0.31250521540641785, "learning_rate": 1.8450305443004485e-05, "loss": 0.4631, "step": 17018 }, { "epoch": 0.3609467455621302, "grad_norm": 0.3204275369644165, "learning_rate": 1.845012711234017e-05, "loss": 0.4077, "step": 17019 }, { "epoch": 0.3609679540200632, "grad_norm": 0.36383184790611267, "learning_rate": 1.8449948772277694e-05, "loss": 0.4921, "step": 17020 }, { "epoch": 0.36098916247799623, "grad_norm": 0.34631457924842834, "learning_rate": 1.844977042281726e-05, "loss": 0.4757, "step": 17021 }, { "epoch": 0.36101037093592925, "grad_norm": 0.3290431499481201, "learning_rate": 1.8449592063959067e-05, "loss": 0.5386, "step": 17022 }, { "epoch": 0.36103157939386227, "grad_norm": 0.31403177976608276, "learning_rate": 1.844941369570331e-05, "loss": 0.4495, "step": 17023 }, { "epoch": 0.3610527878517953, "grad_norm": 0.48183107376098633, "learning_rate": 1.844923531805019e-05, "loss": 0.5462, "step": 17024 }, { "epoch": 0.3610739963097283, "grad_norm": 0.35433295369148254, "learning_rate": 1.84490569309999e-05, "loss": 0.5137, "step": 17025 }, { "epoch": 0.3610952047676613, "grad_norm": 0.3904920816421509, "learning_rate": 1.844887853455265e-05, "loss": 0.5719, "step": 17026 }, { "epoch": 0.36111641322559435, "grad_norm": 0.3441784977912903, "learning_rate": 1.8448700128708623e-05, "loss": 0.4271, "step": 17027 }, { "epoch": 0.36113762168352737, "grad_norm": 0.362629771232605, "learning_rate": 1.8448521713468027e-05, "loss": 0.5206, "step": 17028 }, { "epoch": 0.36115883014146044, "grad_norm": 0.32137760519981384, "learning_rate": 1.8448343288831057e-05, "loss": 0.4872, "step": 17029 }, { "epoch": 0.36118003859939346, "grad_norm": 1.484617829322815, "learning_rate": 1.8448164854797917e-05, "loss": 0.4631, "step": 17030 }, { "epoch": 0.3612012470573265, "grad_norm": 0.37516847252845764, "learning_rate": 1.84479864113688e-05, "loss": 0.4309, "step": 17031 }, { "epoch": 0.3612224555152595, "grad_norm": 0.39171573519706726, "learning_rate": 1.8447807958543905e-05, "loss": 0.4925, "step": 17032 }, { "epoch": 0.3612436639731925, "grad_norm": 0.42945003509521484, "learning_rate": 1.8447629496323433e-05, "loss": 0.4766, "step": 17033 }, { "epoch": 0.36126487243112554, "grad_norm": 0.30166709423065186, "learning_rate": 1.844745102470758e-05, "loss": 0.4492, "step": 17034 }, { "epoch": 0.36128608088905856, "grad_norm": 0.3214300274848938, "learning_rate": 1.8447272543696546e-05, "loss": 0.4406, "step": 17035 }, { "epoch": 0.3613072893469916, "grad_norm": 0.36500486731529236, "learning_rate": 1.8447094053290528e-05, "loss": 0.5915, "step": 17036 }, { "epoch": 0.3613284978049246, "grad_norm": 0.3358963131904602, "learning_rate": 1.8446915553489724e-05, "loss": 0.4676, "step": 17037 }, { "epoch": 0.3613497062628576, "grad_norm": 0.45852723717689514, "learning_rate": 1.844673704429434e-05, "loss": 0.4995, "step": 17038 }, { "epoch": 0.36137091472079064, "grad_norm": 0.5273903608322144, "learning_rate": 1.844655852570456e-05, "loss": 0.4555, "step": 17039 }, { "epoch": 0.36139212317872366, "grad_norm": 0.35544928908348083, "learning_rate": 1.8446379997720596e-05, "loss": 0.4632, "step": 17040 }, { "epoch": 0.3614133316366567, "grad_norm": 0.28957635164260864, "learning_rate": 1.844620146034264e-05, "loss": 0.4095, "step": 17041 }, { "epoch": 0.3614345400945897, "grad_norm": 0.39821329712867737, "learning_rate": 1.8446022913570895e-05, "loss": 0.5935, "step": 17042 }, { "epoch": 0.36145574855252277, "grad_norm": 0.38450366258621216, "learning_rate": 1.844584435740555e-05, "loss": 0.543, "step": 17043 }, { "epoch": 0.3614769570104558, "grad_norm": 0.3159148097038269, "learning_rate": 1.8445665791846816e-05, "loss": 0.4958, "step": 17044 }, { "epoch": 0.3614981654683888, "grad_norm": 0.37474140524864197, "learning_rate": 1.8445487216894884e-05, "loss": 0.4883, "step": 17045 }, { "epoch": 0.36151937392632183, "grad_norm": 0.3350070118904114, "learning_rate": 1.8445308632549954e-05, "loss": 0.5087, "step": 17046 }, { "epoch": 0.36154058238425485, "grad_norm": 0.32642245292663574, "learning_rate": 1.8445130038812228e-05, "loss": 0.5181, "step": 17047 }, { "epoch": 0.36156179084218787, "grad_norm": 0.3083662688732147, "learning_rate": 1.8444951435681896e-05, "loss": 0.538, "step": 17048 }, { "epoch": 0.3615829993001209, "grad_norm": 0.32703858613967896, "learning_rate": 1.8444772823159168e-05, "loss": 0.4996, "step": 17049 }, { "epoch": 0.3616042077580539, "grad_norm": 0.33510109782218933, "learning_rate": 1.8444594201244233e-05, "loss": 0.4863, "step": 17050 }, { "epoch": 0.36162541621598693, "grad_norm": 0.3892994225025177, "learning_rate": 1.8444415569937296e-05, "loss": 0.4783, "step": 17051 }, { "epoch": 0.36164662467391995, "grad_norm": 0.3094751238822937, "learning_rate": 1.8444236929238556e-05, "loss": 0.4788, "step": 17052 }, { "epoch": 0.36166783313185297, "grad_norm": 0.35698196291923523, "learning_rate": 1.8444058279148205e-05, "loss": 0.4955, "step": 17053 }, { "epoch": 0.361689041589786, "grad_norm": 0.3448968231678009, "learning_rate": 1.844387961966645e-05, "loss": 0.5006, "step": 17054 }, { "epoch": 0.361710250047719, "grad_norm": 0.3605106770992279, "learning_rate": 1.8443700950793478e-05, "loss": 0.5492, "step": 17055 }, { "epoch": 0.3617314585056521, "grad_norm": 0.3633250594139099, "learning_rate": 1.84435222725295e-05, "loss": 0.6078, "step": 17056 }, { "epoch": 0.3617526669635851, "grad_norm": 0.3351028859615326, "learning_rate": 1.844334358487471e-05, "loss": 0.4478, "step": 17057 }, { "epoch": 0.3617738754215181, "grad_norm": 0.3487391769886017, "learning_rate": 1.8443164887829305e-05, "loss": 0.4594, "step": 17058 }, { "epoch": 0.36179508387945114, "grad_norm": 0.3676568269729614, "learning_rate": 1.8442986181393486e-05, "loss": 0.5647, "step": 17059 }, { "epoch": 0.36181629233738416, "grad_norm": 0.3299049735069275, "learning_rate": 1.8442807465567453e-05, "loss": 0.4998, "step": 17060 }, { "epoch": 0.3618375007953172, "grad_norm": 0.3564964532852173, "learning_rate": 1.84426287403514e-05, "loss": 0.4913, "step": 17061 }, { "epoch": 0.3618587092532502, "grad_norm": 0.4470377564430237, "learning_rate": 1.8442450005745528e-05, "loss": 0.5701, "step": 17062 }, { "epoch": 0.3618799177111832, "grad_norm": 0.37713831663131714, "learning_rate": 1.844227126175004e-05, "loss": 0.5528, "step": 17063 }, { "epoch": 0.36190112616911624, "grad_norm": 0.3990801274776459, "learning_rate": 1.844209250836513e-05, "loss": 0.4809, "step": 17064 }, { "epoch": 0.36192233462704926, "grad_norm": 0.3834560513496399, "learning_rate": 1.8441913745590997e-05, "loss": 0.5754, "step": 17065 }, { "epoch": 0.3619435430849823, "grad_norm": 0.3296085000038147, "learning_rate": 1.8441734973427844e-05, "loss": 0.5532, "step": 17066 }, { "epoch": 0.3619647515429153, "grad_norm": 0.3242809772491455, "learning_rate": 1.8441556191875862e-05, "loss": 0.4511, "step": 17067 }, { "epoch": 0.3619859600008483, "grad_norm": 0.42422476410865784, "learning_rate": 1.8441377400935256e-05, "loss": 0.5306, "step": 17068 }, { "epoch": 0.36200716845878134, "grad_norm": 0.40672415494918823, "learning_rate": 1.8441198600606225e-05, "loss": 0.6477, "step": 17069 }, { "epoch": 0.3620283769167144, "grad_norm": 0.354887455701828, "learning_rate": 1.8441019790888965e-05, "loss": 0.5351, "step": 17070 }, { "epoch": 0.36204958537464743, "grad_norm": 0.3414323627948761, "learning_rate": 1.844084097178368e-05, "loss": 0.4855, "step": 17071 }, { "epoch": 0.36207079383258045, "grad_norm": 0.3238109350204468, "learning_rate": 1.844066214329056e-05, "loss": 0.5968, "step": 17072 }, { "epoch": 0.36209200229051347, "grad_norm": 0.31783080101013184, "learning_rate": 1.844048330540981e-05, "loss": 0.5245, "step": 17073 }, { "epoch": 0.3621132107484465, "grad_norm": 0.36413565278053284, "learning_rate": 1.8440304458141627e-05, "loss": 0.5454, "step": 17074 }, { "epoch": 0.3621344192063795, "grad_norm": 0.3721500337123871, "learning_rate": 1.8440125601486212e-05, "loss": 0.6444, "step": 17075 }, { "epoch": 0.36215562766431253, "grad_norm": 0.33190011978149414, "learning_rate": 1.8439946735443765e-05, "loss": 0.4988, "step": 17076 }, { "epoch": 0.36217683612224555, "grad_norm": 0.3801925480365753, "learning_rate": 1.8439767860014478e-05, "loss": 0.5754, "step": 17077 }, { "epoch": 0.36219804458017857, "grad_norm": 0.3921225368976593, "learning_rate": 1.8439588975198555e-05, "loss": 0.5424, "step": 17078 }, { "epoch": 0.3622192530381116, "grad_norm": 0.3579495847225189, "learning_rate": 1.84394100809962e-05, "loss": 0.524, "step": 17079 }, { "epoch": 0.3622404614960446, "grad_norm": 0.3172661364078522, "learning_rate": 1.84392311774076e-05, "loss": 0.4576, "step": 17080 }, { "epoch": 0.3622616699539776, "grad_norm": 0.3334307372570038, "learning_rate": 1.8439052264432962e-05, "loss": 0.5332, "step": 17081 }, { "epoch": 0.36228287841191065, "grad_norm": 0.3292711675167084, "learning_rate": 1.8438873342072484e-05, "loss": 0.5421, "step": 17082 }, { "epoch": 0.36230408686984367, "grad_norm": 0.3596668839454651, "learning_rate": 1.8438694410326365e-05, "loss": 0.4848, "step": 17083 }, { "epoch": 0.36232529532777674, "grad_norm": 0.4215928018093109, "learning_rate": 1.8438515469194803e-05, "loss": 0.5466, "step": 17084 }, { "epoch": 0.36234650378570976, "grad_norm": 0.3322880268096924, "learning_rate": 1.8438336518678e-05, "loss": 0.5386, "step": 17085 }, { "epoch": 0.3623677122436428, "grad_norm": 0.34956419467926025, "learning_rate": 1.8438157558776148e-05, "loss": 0.5195, "step": 17086 }, { "epoch": 0.3623889207015758, "grad_norm": 0.3689426779747009, "learning_rate": 1.843797858948945e-05, "loss": 0.6667, "step": 17087 }, { "epoch": 0.3624101291595088, "grad_norm": 0.3403904139995575, "learning_rate": 1.8437799610818107e-05, "loss": 0.4795, "step": 17088 }, { "epoch": 0.36243133761744184, "grad_norm": 0.3473533093929291, "learning_rate": 1.843762062276232e-05, "loss": 0.506, "step": 17089 }, { "epoch": 0.36245254607537486, "grad_norm": 0.3606411814689636, "learning_rate": 1.8437441625322283e-05, "loss": 0.5424, "step": 17090 }, { "epoch": 0.3624737545333079, "grad_norm": 0.38860049843788147, "learning_rate": 1.8437262618498196e-05, "loss": 0.6026, "step": 17091 }, { "epoch": 0.3624949629912409, "grad_norm": 0.3223314583301544, "learning_rate": 1.843708360229026e-05, "loss": 0.4823, "step": 17092 }, { "epoch": 0.3625161714491739, "grad_norm": 0.30893781781196594, "learning_rate": 1.843690457669867e-05, "loss": 0.503, "step": 17093 }, { "epoch": 0.36253737990710694, "grad_norm": 0.35522735118865967, "learning_rate": 1.843672554172363e-05, "loss": 0.4911, "step": 17094 }, { "epoch": 0.36255858836503996, "grad_norm": 0.3297460377216339, "learning_rate": 1.8436546497365334e-05, "loss": 0.5777, "step": 17095 }, { "epoch": 0.362579796822973, "grad_norm": 0.3220798075199127, "learning_rate": 1.8436367443623987e-05, "loss": 0.4565, "step": 17096 }, { "epoch": 0.36260100528090605, "grad_norm": 0.3014848828315735, "learning_rate": 1.8436188380499785e-05, "loss": 0.5208, "step": 17097 }, { "epoch": 0.36262221373883907, "grad_norm": 0.3634660840034485, "learning_rate": 1.843600930799293e-05, "loss": 0.5365, "step": 17098 }, { "epoch": 0.3626434221967721, "grad_norm": 0.34145253896713257, "learning_rate": 1.8435830226103613e-05, "loss": 0.5742, "step": 17099 }, { "epoch": 0.3626646306547051, "grad_norm": 0.3348310589790344, "learning_rate": 1.8435651134832046e-05, "loss": 0.55, "step": 17100 }, { "epoch": 0.36268583911263813, "grad_norm": 0.3432115316390991, "learning_rate": 1.8435472034178416e-05, "loss": 0.5896, "step": 17101 }, { "epoch": 0.36270704757057115, "grad_norm": 0.3862287998199463, "learning_rate": 1.8435292924142928e-05, "loss": 0.4883, "step": 17102 }, { "epoch": 0.36272825602850417, "grad_norm": 0.3414907157421112, "learning_rate": 1.8435113804725784e-05, "loss": 0.5852, "step": 17103 }, { "epoch": 0.3627494644864372, "grad_norm": 0.3190632164478302, "learning_rate": 1.8434934675927177e-05, "loss": 0.4715, "step": 17104 }, { "epoch": 0.3627706729443702, "grad_norm": 0.39184844493865967, "learning_rate": 1.843475553774731e-05, "loss": 0.5488, "step": 17105 }, { "epoch": 0.36279188140230323, "grad_norm": 0.3921510577201843, "learning_rate": 1.8434576390186377e-05, "loss": 0.5104, "step": 17106 }, { "epoch": 0.36281308986023625, "grad_norm": 1.3250815868377686, "learning_rate": 1.8434397233244586e-05, "loss": 0.4836, "step": 17107 }, { "epoch": 0.36283429831816927, "grad_norm": 0.3115392327308655, "learning_rate": 1.8434218066922128e-05, "loss": 0.5073, "step": 17108 }, { "epoch": 0.3628555067761023, "grad_norm": 0.3244062066078186, "learning_rate": 1.843403889121921e-05, "loss": 0.4711, "step": 17109 }, { "epoch": 0.3628767152340353, "grad_norm": 0.5638554096221924, "learning_rate": 1.8433859706136024e-05, "loss": 0.4657, "step": 17110 }, { "epoch": 0.3628979236919684, "grad_norm": 0.727583646774292, "learning_rate": 1.8433680511672774e-05, "loss": 0.5161, "step": 17111 }, { "epoch": 0.3629191321499014, "grad_norm": 0.34525755047798157, "learning_rate": 1.843350130782966e-05, "loss": 0.4968, "step": 17112 }, { "epoch": 0.3629403406078344, "grad_norm": 0.33664441108703613, "learning_rate": 1.8433322094606877e-05, "loss": 0.4467, "step": 17113 }, { "epoch": 0.36296154906576744, "grad_norm": 0.29788461327552795, "learning_rate": 1.8433142872004627e-05, "loss": 0.436, "step": 17114 }, { "epoch": 0.36298275752370046, "grad_norm": 0.3628034293651581, "learning_rate": 1.8432963640023106e-05, "loss": 0.4954, "step": 17115 }, { "epoch": 0.3630039659816335, "grad_norm": 0.35113978385925293, "learning_rate": 1.8432784398662518e-05, "loss": 0.5425, "step": 17116 }, { "epoch": 0.3630251744395665, "grad_norm": 0.34871160984039307, "learning_rate": 1.8432605147923063e-05, "loss": 0.5111, "step": 17117 }, { "epoch": 0.3630463828974995, "grad_norm": 0.38171201944351196, "learning_rate": 1.843242588780493e-05, "loss": 0.488, "step": 17118 }, { "epoch": 0.36306759135543254, "grad_norm": 0.3334727883338928, "learning_rate": 1.8432246618308338e-05, "loss": 0.5374, "step": 17119 }, { "epoch": 0.36308879981336556, "grad_norm": 0.34839358925819397, "learning_rate": 1.8432067339433467e-05, "loss": 0.471, "step": 17120 }, { "epoch": 0.3631100082712986, "grad_norm": 0.4655212163925171, "learning_rate": 1.8431888051180522e-05, "loss": 0.5619, "step": 17121 }, { "epoch": 0.3631312167292316, "grad_norm": 0.348212867975235, "learning_rate": 1.8431708753549706e-05, "loss": 0.5875, "step": 17122 }, { "epoch": 0.3631524251871646, "grad_norm": 0.3283397853374481, "learning_rate": 1.8431529446541222e-05, "loss": 0.5487, "step": 17123 }, { "epoch": 0.36317363364509764, "grad_norm": 0.36180251836776733, "learning_rate": 1.843135013015526e-05, "loss": 0.5178, "step": 17124 }, { "epoch": 0.3631948421030307, "grad_norm": 0.3898181617259979, "learning_rate": 1.8431170804392023e-05, "loss": 0.6509, "step": 17125 }, { "epoch": 0.36321605056096373, "grad_norm": 0.3270586133003235, "learning_rate": 1.843099146925171e-05, "loss": 0.4827, "step": 17126 }, { "epoch": 0.36323725901889675, "grad_norm": 0.3335094153881073, "learning_rate": 1.8430812124734525e-05, "loss": 0.5717, "step": 17127 }, { "epoch": 0.36325846747682977, "grad_norm": 0.355783075094223, "learning_rate": 1.843063277084066e-05, "loss": 0.4257, "step": 17128 }, { "epoch": 0.3632796759347628, "grad_norm": 0.3927319049835205, "learning_rate": 1.8430453407570323e-05, "loss": 0.6227, "step": 17129 }, { "epoch": 0.3633008843926958, "grad_norm": 0.40804269909858704, "learning_rate": 1.8430274034923708e-05, "loss": 0.4411, "step": 17130 }, { "epoch": 0.36332209285062883, "grad_norm": 0.3563334345817566, "learning_rate": 1.8430094652901014e-05, "loss": 0.5458, "step": 17131 }, { "epoch": 0.36334330130856185, "grad_norm": 0.33977997303009033, "learning_rate": 1.842991526150244e-05, "loss": 0.4667, "step": 17132 }, { "epoch": 0.36336450976649487, "grad_norm": 0.33854928612709045, "learning_rate": 1.8429735860728193e-05, "loss": 0.4717, "step": 17133 }, { "epoch": 0.3633857182244279, "grad_norm": 0.3067173957824707, "learning_rate": 1.842955645057846e-05, "loss": 0.6548, "step": 17134 }, { "epoch": 0.3634069266823609, "grad_norm": 0.325789213180542, "learning_rate": 1.8429377031053455e-05, "loss": 0.4781, "step": 17135 }, { "epoch": 0.36342813514029393, "grad_norm": 0.34126588702201843, "learning_rate": 1.8429197602153366e-05, "loss": 0.4582, "step": 17136 }, { "epoch": 0.36344934359822695, "grad_norm": 0.37562209367752075, "learning_rate": 1.8429018163878397e-05, "loss": 0.5111, "step": 17137 }, { "epoch": 0.36347055205616, "grad_norm": 0.3529178500175476, "learning_rate": 1.842883871622875e-05, "loss": 0.5382, "step": 17138 }, { "epoch": 0.36349176051409304, "grad_norm": 0.365818053483963, "learning_rate": 1.8428659259204614e-05, "loss": 0.5105, "step": 17139 }, { "epoch": 0.36351296897202606, "grad_norm": 0.3860878050327301, "learning_rate": 1.8428479792806207e-05, "loss": 0.5361, "step": 17140 }, { "epoch": 0.3635341774299591, "grad_norm": 0.45057886838912964, "learning_rate": 1.842830031703371e-05, "loss": 0.6084, "step": 17141 }, { "epoch": 0.3635553858878921, "grad_norm": 0.4018448293209076, "learning_rate": 1.8428120831887333e-05, "loss": 0.5351, "step": 17142 }, { "epoch": 0.3635765943458251, "grad_norm": 0.3374621570110321, "learning_rate": 1.8427941337367272e-05, "loss": 0.4761, "step": 17143 }, { "epoch": 0.36359780280375814, "grad_norm": 0.36101803183555603, "learning_rate": 1.8427761833473732e-05, "loss": 0.6338, "step": 17144 }, { "epoch": 0.36361901126169116, "grad_norm": 0.33170193433761597, "learning_rate": 1.8427582320206905e-05, "loss": 0.5282, "step": 17145 }, { "epoch": 0.3636402197196242, "grad_norm": 0.3958534896373749, "learning_rate": 1.8427402797566994e-05, "loss": 0.5225, "step": 17146 }, { "epoch": 0.3636614281775572, "grad_norm": 0.4579853117465973, "learning_rate": 1.84272232655542e-05, "loss": 0.4848, "step": 17147 }, { "epoch": 0.3636826366354902, "grad_norm": 0.33325356245040894, "learning_rate": 1.842704372416872e-05, "loss": 0.4953, "step": 17148 }, { "epoch": 0.36370384509342324, "grad_norm": 0.32549914717674255, "learning_rate": 1.8426864173410758e-05, "loss": 0.5077, "step": 17149 }, { "epoch": 0.36372505355135626, "grad_norm": 0.3560425341129303, "learning_rate": 1.842668461328051e-05, "loss": 0.487, "step": 17150 }, { "epoch": 0.3637462620092893, "grad_norm": 0.32530373334884644, "learning_rate": 1.8426505043778177e-05, "loss": 0.5128, "step": 17151 }, { "epoch": 0.36376747046722235, "grad_norm": 0.3200506865978241, "learning_rate": 1.8426325464903955e-05, "loss": 0.5543, "step": 17152 }, { "epoch": 0.3637886789251554, "grad_norm": 0.3493921160697937, "learning_rate": 1.842614587665805e-05, "loss": 0.4595, "step": 17153 }, { "epoch": 0.3638098873830884, "grad_norm": 0.31325045228004456, "learning_rate": 1.8425966279040655e-05, "loss": 0.5031, "step": 17154 }, { "epoch": 0.3638310958410214, "grad_norm": 0.33702191710472107, "learning_rate": 1.842578667205198e-05, "loss": 0.4482, "step": 17155 }, { "epoch": 0.36385230429895443, "grad_norm": 0.3402796685695648, "learning_rate": 1.8425607055692214e-05, "loss": 0.4636, "step": 17156 }, { "epoch": 0.36387351275688745, "grad_norm": 0.37427908182144165, "learning_rate": 1.8425427429961563e-05, "loss": 0.5489, "step": 17157 }, { "epoch": 0.36389472121482047, "grad_norm": 0.33420446515083313, "learning_rate": 1.842524779486022e-05, "loss": 0.4824, "step": 17158 }, { "epoch": 0.3639159296727535, "grad_norm": 0.373590350151062, "learning_rate": 1.8425068150388396e-05, "loss": 0.52, "step": 17159 }, { "epoch": 0.3639371381306865, "grad_norm": 0.6270990371704102, "learning_rate": 1.842488849654628e-05, "loss": 0.5601, "step": 17160 }, { "epoch": 0.36395834658861953, "grad_norm": 0.3266480267047882, "learning_rate": 1.842470883333408e-05, "loss": 0.5347, "step": 17161 }, { "epoch": 0.36397955504655255, "grad_norm": 0.34981250762939453, "learning_rate": 1.842452916075199e-05, "loss": 0.5528, "step": 17162 }, { "epoch": 0.36400076350448557, "grad_norm": 0.3059384524822235, "learning_rate": 1.842434947880021e-05, "loss": 0.4466, "step": 17163 }, { "epoch": 0.3640219719624186, "grad_norm": 0.35460060834884644, "learning_rate": 1.8424169787478946e-05, "loss": 0.5408, "step": 17164 }, { "epoch": 0.3640431804203516, "grad_norm": 0.48295408487319946, "learning_rate": 1.842399008678839e-05, "loss": 0.6867, "step": 17165 }, { "epoch": 0.3640643888782847, "grad_norm": 0.34215304255485535, "learning_rate": 1.8423810376728744e-05, "loss": 0.4733, "step": 17166 }, { "epoch": 0.3640855973362177, "grad_norm": 0.35511139035224915, "learning_rate": 1.8423630657300213e-05, "loss": 0.5059, "step": 17167 }, { "epoch": 0.3641068057941507, "grad_norm": 0.33994147181510925, "learning_rate": 1.842345092850299e-05, "loss": 0.4573, "step": 17168 }, { "epoch": 0.36412801425208374, "grad_norm": 0.39749523997306824, "learning_rate": 1.842327119033728e-05, "loss": 0.5701, "step": 17169 }, { "epoch": 0.36414922271001676, "grad_norm": 0.33187076449394226, "learning_rate": 1.842309144280328e-05, "loss": 0.5034, "step": 17170 }, { "epoch": 0.3641704311679498, "grad_norm": 0.3146049380302429, "learning_rate": 1.842291168590119e-05, "loss": 0.524, "step": 17171 }, { "epoch": 0.3641916396258828, "grad_norm": 0.347905695438385, "learning_rate": 1.8422731919631217e-05, "loss": 0.4638, "step": 17172 }, { "epoch": 0.3642128480838158, "grad_norm": 0.3366910219192505, "learning_rate": 1.8422552143993545e-05, "loss": 0.5016, "step": 17173 }, { "epoch": 0.36423405654174884, "grad_norm": 0.35271576046943665, "learning_rate": 1.842237235898839e-05, "loss": 0.5868, "step": 17174 }, { "epoch": 0.36425526499968186, "grad_norm": 0.34745270013809204, "learning_rate": 1.8422192564615946e-05, "loss": 0.5183, "step": 17175 }, { "epoch": 0.3642764734576149, "grad_norm": 0.3746395707130432, "learning_rate": 1.8422012760876412e-05, "loss": 0.5383, "step": 17176 }, { "epoch": 0.3642976819155479, "grad_norm": 0.33535316586494446, "learning_rate": 1.8421832947769987e-05, "loss": 0.4031, "step": 17177 }, { "epoch": 0.3643188903734809, "grad_norm": 6.009862422943115, "learning_rate": 1.8421653125296873e-05, "loss": 0.5114, "step": 17178 }, { "epoch": 0.364340098831414, "grad_norm": 0.38616499304771423, "learning_rate": 1.842147329345727e-05, "loss": 0.5696, "step": 17179 }, { "epoch": 0.364361307289347, "grad_norm": 0.34216636419296265, "learning_rate": 1.8421293452251375e-05, "loss": 0.4533, "step": 17180 }, { "epoch": 0.36438251574728003, "grad_norm": 0.3249475359916687, "learning_rate": 1.8421113601679396e-05, "loss": 0.5245, "step": 17181 }, { "epoch": 0.36440372420521305, "grad_norm": 0.4291870892047882, "learning_rate": 1.8420933741741523e-05, "loss": 0.4595, "step": 17182 }, { "epoch": 0.3644249326631461, "grad_norm": 0.3432180881500244, "learning_rate": 1.8420753872437962e-05, "loss": 0.5972, "step": 17183 }, { "epoch": 0.3644461411210791, "grad_norm": 0.34713679552078247, "learning_rate": 1.842057399376891e-05, "loss": 0.5187, "step": 17184 }, { "epoch": 0.3644673495790121, "grad_norm": 0.41238802671432495, "learning_rate": 1.8420394105734575e-05, "loss": 0.5151, "step": 17185 }, { "epoch": 0.36448855803694513, "grad_norm": 0.34037041664123535, "learning_rate": 1.8420214208335145e-05, "loss": 0.5468, "step": 17186 }, { "epoch": 0.36450976649487815, "grad_norm": 0.3253506124019623, "learning_rate": 1.8420034301570827e-05, "loss": 0.4126, "step": 17187 }, { "epoch": 0.36453097495281117, "grad_norm": 0.3782762885093689, "learning_rate": 1.8419854385441818e-05, "loss": 0.5458, "step": 17188 }, { "epoch": 0.3645521834107442, "grad_norm": 0.3874988853931427, "learning_rate": 1.841967445994832e-05, "loss": 0.5003, "step": 17189 }, { "epoch": 0.3645733918686772, "grad_norm": 0.31704026460647583, "learning_rate": 1.841949452509054e-05, "loss": 0.5235, "step": 17190 }, { "epoch": 0.36459460032661023, "grad_norm": 0.32431355118751526, "learning_rate": 1.8419314580868666e-05, "loss": 0.541, "step": 17191 }, { "epoch": 0.36461580878454325, "grad_norm": 0.4741685092449188, "learning_rate": 1.8419134627282905e-05, "loss": 0.4978, "step": 17192 }, { "epoch": 0.3646370172424763, "grad_norm": 0.35224345326423645, "learning_rate": 1.8418954664333455e-05, "loss": 0.5107, "step": 17193 }, { "epoch": 0.36465822570040934, "grad_norm": 0.361546128988266, "learning_rate": 1.8418774692020513e-05, "loss": 0.4969, "step": 17194 }, { "epoch": 0.36467943415834236, "grad_norm": 0.40190213918685913, "learning_rate": 1.841859471034429e-05, "loss": 0.4906, "step": 17195 }, { "epoch": 0.3647006426162754, "grad_norm": 0.326635479927063, "learning_rate": 1.8418414719304974e-05, "loss": 0.4845, "step": 17196 }, { "epoch": 0.3647218510742084, "grad_norm": 0.3470793664455414, "learning_rate": 1.8418234718902774e-05, "loss": 0.5176, "step": 17197 }, { "epoch": 0.3647430595321414, "grad_norm": 0.36236876249313354, "learning_rate": 1.8418054709137884e-05, "loss": 0.4971, "step": 17198 }, { "epoch": 0.36476426799007444, "grad_norm": 0.34701791405677795, "learning_rate": 1.8417874690010506e-05, "loss": 0.4806, "step": 17199 }, { "epoch": 0.36478547644800746, "grad_norm": 0.324929803609848, "learning_rate": 1.841769466152084e-05, "loss": 0.4762, "step": 17200 }, { "epoch": 0.3648066849059405, "grad_norm": 0.3894655108451843, "learning_rate": 1.841751462366909e-05, "loss": 0.5086, "step": 17201 }, { "epoch": 0.3648278933638735, "grad_norm": 0.3393324911594391, "learning_rate": 1.8417334576455452e-05, "loss": 0.5157, "step": 17202 }, { "epoch": 0.3648491018218065, "grad_norm": 0.3478423058986664, "learning_rate": 1.841715451988013e-05, "loss": 0.475, "step": 17203 }, { "epoch": 0.36487031027973954, "grad_norm": 0.6368734836578369, "learning_rate": 1.841697445394332e-05, "loss": 0.5566, "step": 17204 }, { "epoch": 0.36489151873767256, "grad_norm": 0.32632195949554443, "learning_rate": 1.8416794378645225e-05, "loss": 0.4048, "step": 17205 }, { "epoch": 0.36491272719560564, "grad_norm": 0.3285840153694153, "learning_rate": 1.841661429398604e-05, "loss": 0.5028, "step": 17206 }, { "epoch": 0.36493393565353865, "grad_norm": 0.3772524297237396, "learning_rate": 1.8416434199965975e-05, "loss": 0.4529, "step": 17207 }, { "epoch": 0.3649551441114717, "grad_norm": 0.33700692653656006, "learning_rate": 1.8416254096585225e-05, "loss": 0.5135, "step": 17208 }, { "epoch": 0.3649763525694047, "grad_norm": 0.41211116313934326, "learning_rate": 1.841607398384399e-05, "loss": 0.5438, "step": 17209 }, { "epoch": 0.3649975610273377, "grad_norm": 0.3744714558124542, "learning_rate": 1.8415893861742466e-05, "loss": 0.5508, "step": 17210 }, { "epoch": 0.36501876948527073, "grad_norm": 0.36055856943130493, "learning_rate": 1.841571373028086e-05, "loss": 0.4877, "step": 17211 }, { "epoch": 0.36503997794320375, "grad_norm": 0.3329427242279053, "learning_rate": 1.8415533589459373e-05, "loss": 0.5908, "step": 17212 }, { "epoch": 0.3650611864011368, "grad_norm": 0.36548948287963867, "learning_rate": 1.84153534392782e-05, "loss": 0.5456, "step": 17213 }, { "epoch": 0.3650823948590698, "grad_norm": 0.32997193932533264, "learning_rate": 1.8415173279737546e-05, "loss": 0.4994, "step": 17214 }, { "epoch": 0.3651036033170028, "grad_norm": 0.3852109909057617, "learning_rate": 1.841499311083761e-05, "loss": 0.5783, "step": 17215 }, { "epoch": 0.36512481177493583, "grad_norm": 0.3049403727054596, "learning_rate": 1.841481293257859e-05, "loss": 0.4468, "step": 17216 }, { "epoch": 0.36514602023286885, "grad_norm": 0.41646748781204224, "learning_rate": 1.841463274496069e-05, "loss": 0.4739, "step": 17217 }, { "epoch": 0.36516722869080187, "grad_norm": 0.33922460675239563, "learning_rate": 1.8414452547984103e-05, "loss": 0.4844, "step": 17218 }, { "epoch": 0.3651884371487349, "grad_norm": 0.32495033740997314, "learning_rate": 1.841427234164904e-05, "loss": 0.5022, "step": 17219 }, { "epoch": 0.36520964560666797, "grad_norm": 0.3725239038467407, "learning_rate": 1.8414092125955695e-05, "loss": 0.5256, "step": 17220 }, { "epoch": 0.365230854064601, "grad_norm": 0.32854852080345154, "learning_rate": 1.841391190090427e-05, "loss": 0.5232, "step": 17221 }, { "epoch": 0.365252062522534, "grad_norm": 0.39962130784988403, "learning_rate": 1.8413731666494964e-05, "loss": 0.5263, "step": 17222 }, { "epoch": 0.365273270980467, "grad_norm": 0.3616395890712738, "learning_rate": 1.8413551422727983e-05, "loss": 0.5092, "step": 17223 }, { "epoch": 0.36529447943840004, "grad_norm": 0.3596882224082947, "learning_rate": 1.841337116960352e-05, "loss": 0.545, "step": 17224 }, { "epoch": 0.36531568789633306, "grad_norm": 0.33278948068618774, "learning_rate": 1.8413190907121777e-05, "loss": 0.4697, "step": 17225 }, { "epoch": 0.3653368963542661, "grad_norm": 0.33626994490623474, "learning_rate": 1.841301063528296e-05, "loss": 0.4692, "step": 17226 }, { "epoch": 0.3653581048121991, "grad_norm": 0.32843586802482605, "learning_rate": 1.841283035408726e-05, "loss": 0.4473, "step": 17227 }, { "epoch": 0.3653793132701321, "grad_norm": 0.3468654155731201, "learning_rate": 1.841265006353489e-05, "loss": 0.5277, "step": 17228 }, { "epoch": 0.36540052172806514, "grad_norm": 0.3820589482784271, "learning_rate": 1.8412469763626042e-05, "loss": 0.5529, "step": 17229 }, { "epoch": 0.36542173018599816, "grad_norm": 0.32458025217056274, "learning_rate": 1.8412289454360914e-05, "loss": 0.41, "step": 17230 }, { "epoch": 0.3654429386439312, "grad_norm": 0.3079545795917511, "learning_rate": 1.8412109135739714e-05, "loss": 0.4755, "step": 17231 }, { "epoch": 0.3654641471018642, "grad_norm": 0.4119429588317871, "learning_rate": 1.841192880776264e-05, "loss": 0.552, "step": 17232 }, { "epoch": 0.3654853555597972, "grad_norm": 0.47306329011917114, "learning_rate": 1.841174847042989e-05, "loss": 0.4517, "step": 17233 }, { "epoch": 0.3655065640177303, "grad_norm": 0.5170331001281738, "learning_rate": 1.841156812374167e-05, "loss": 0.5796, "step": 17234 }, { "epoch": 0.3655277724756633, "grad_norm": 0.35862696170806885, "learning_rate": 1.8411387767698173e-05, "loss": 0.5514, "step": 17235 }, { "epoch": 0.36554898093359633, "grad_norm": 0.3494683802127838, "learning_rate": 1.8411207402299606e-05, "loss": 0.5276, "step": 17236 }, { "epoch": 0.36557018939152935, "grad_norm": 0.34976986050605774, "learning_rate": 1.8411027027546163e-05, "loss": 0.5139, "step": 17237 }, { "epoch": 0.3655913978494624, "grad_norm": 0.4085763394832611, "learning_rate": 1.8410846643438054e-05, "loss": 0.5627, "step": 17238 }, { "epoch": 0.3656126063073954, "grad_norm": 0.33770641684532166, "learning_rate": 1.841066624997547e-05, "loss": 0.5936, "step": 17239 }, { "epoch": 0.3656338147653284, "grad_norm": 0.37008896470069885, "learning_rate": 1.8410485847158618e-05, "loss": 0.5534, "step": 17240 }, { "epoch": 0.36565502322326143, "grad_norm": 0.3299458920955658, "learning_rate": 1.8410305434987698e-05, "loss": 0.4626, "step": 17241 }, { "epoch": 0.36567623168119445, "grad_norm": 0.3165341019630432, "learning_rate": 1.8410125013462908e-05, "loss": 0.5449, "step": 17242 }, { "epoch": 0.36569744013912747, "grad_norm": 0.35717764496803284, "learning_rate": 1.840994458258445e-05, "loss": 0.5473, "step": 17243 }, { "epoch": 0.3657186485970605, "grad_norm": 0.49335166811943054, "learning_rate": 1.8409764142352526e-05, "loss": 0.5488, "step": 17244 }, { "epoch": 0.3657398570549935, "grad_norm": 0.34809592366218567, "learning_rate": 1.8409583692767334e-05, "loss": 0.4869, "step": 17245 }, { "epoch": 0.36576106551292653, "grad_norm": 0.3681071996688843, "learning_rate": 1.8409403233829075e-05, "loss": 0.5431, "step": 17246 }, { "epoch": 0.3657822739708596, "grad_norm": 0.3759100139141083, "learning_rate": 1.8409222765537956e-05, "loss": 0.4684, "step": 17247 }, { "epoch": 0.3658034824287926, "grad_norm": 0.368639200925827, "learning_rate": 1.8409042287894167e-05, "loss": 0.5504, "step": 17248 }, { "epoch": 0.36582469088672565, "grad_norm": 0.32864946126937866, "learning_rate": 1.8408861800897915e-05, "loss": 0.5121, "step": 17249 }, { "epoch": 0.36584589934465866, "grad_norm": 0.34508058428764343, "learning_rate": 1.8408681304549402e-05, "loss": 0.5672, "step": 17250 }, { "epoch": 0.3658671078025917, "grad_norm": 0.3282720446586609, "learning_rate": 1.8408500798848826e-05, "loss": 0.4988, "step": 17251 }, { "epoch": 0.3658883162605247, "grad_norm": 0.3292428255081177, "learning_rate": 1.8408320283796386e-05, "loss": 0.4793, "step": 17252 }, { "epoch": 0.3659095247184577, "grad_norm": 0.3233153223991394, "learning_rate": 1.840813975939229e-05, "loss": 0.5107, "step": 17253 }, { "epoch": 0.36593073317639074, "grad_norm": 0.346265584230423, "learning_rate": 1.840795922563673e-05, "loss": 0.5384, "step": 17254 }, { "epoch": 0.36595194163432376, "grad_norm": 0.35758456587791443, "learning_rate": 1.840777868252991e-05, "loss": 0.504, "step": 17255 }, { "epoch": 0.3659731500922568, "grad_norm": 0.35841667652130127, "learning_rate": 1.8407598130072037e-05, "loss": 0.5983, "step": 17256 }, { "epoch": 0.3659943585501898, "grad_norm": 0.35269832611083984, "learning_rate": 1.84074175682633e-05, "loss": 0.4867, "step": 17257 }, { "epoch": 0.3660155670081228, "grad_norm": 0.30782997608184814, "learning_rate": 1.8407236997103913e-05, "loss": 0.5436, "step": 17258 }, { "epoch": 0.36603677546605584, "grad_norm": 0.3219403922557831, "learning_rate": 1.8407056416594065e-05, "loss": 0.4758, "step": 17259 }, { "epoch": 0.36605798392398886, "grad_norm": 0.38459277153015137, "learning_rate": 1.8406875826733962e-05, "loss": 0.5959, "step": 17260 }, { "epoch": 0.36607919238192194, "grad_norm": 0.30291685461997986, "learning_rate": 1.8406695227523808e-05, "loss": 0.4635, "step": 17261 }, { "epoch": 0.36610040083985496, "grad_norm": 0.36515921354293823, "learning_rate": 1.84065146189638e-05, "loss": 0.5099, "step": 17262 }, { "epoch": 0.366121609297788, "grad_norm": 0.3588484823703766, "learning_rate": 1.8406334001054136e-05, "loss": 0.5418, "step": 17263 }, { "epoch": 0.366142817755721, "grad_norm": 0.30069923400878906, "learning_rate": 1.840615337379502e-05, "loss": 0.4499, "step": 17264 }, { "epoch": 0.366164026213654, "grad_norm": 0.4153019189834595, "learning_rate": 1.8405972737186658e-05, "loss": 0.5532, "step": 17265 }, { "epoch": 0.36618523467158703, "grad_norm": 0.3987670838832855, "learning_rate": 1.8405792091229244e-05, "loss": 0.6247, "step": 17266 }, { "epoch": 0.36620644312952005, "grad_norm": 0.42664802074432373, "learning_rate": 1.8405611435922978e-05, "loss": 0.5458, "step": 17267 }, { "epoch": 0.3662276515874531, "grad_norm": 0.3611326813697815, "learning_rate": 1.840543077126807e-05, "loss": 0.53, "step": 17268 }, { "epoch": 0.3662488600453861, "grad_norm": 0.3676089942455292, "learning_rate": 1.840525009726471e-05, "loss": 0.5336, "step": 17269 }, { "epoch": 0.3662700685033191, "grad_norm": 0.3336450159549713, "learning_rate": 1.840506941391311e-05, "loss": 0.4762, "step": 17270 }, { "epoch": 0.36629127696125213, "grad_norm": 0.3372197449207306, "learning_rate": 1.8404888721213458e-05, "loss": 0.5303, "step": 17271 }, { "epoch": 0.36631248541918515, "grad_norm": 0.3270708918571472, "learning_rate": 1.8404708019165962e-05, "loss": 0.4423, "step": 17272 }, { "epoch": 0.36633369387711817, "grad_norm": 0.3692069947719574, "learning_rate": 1.8404527307770824e-05, "loss": 0.504, "step": 17273 }, { "epoch": 0.3663549023350512, "grad_norm": 2.210897207260132, "learning_rate": 1.8404346587028244e-05, "loss": 0.4817, "step": 17274 }, { "epoch": 0.36637611079298427, "grad_norm": 0.3770029842853546, "learning_rate": 1.8404165856938425e-05, "loss": 0.4657, "step": 17275 }, { "epoch": 0.3663973192509173, "grad_norm": 0.4074745178222656, "learning_rate": 1.8403985117501567e-05, "loss": 0.5182, "step": 17276 }, { "epoch": 0.3664185277088503, "grad_norm": 0.357532799243927, "learning_rate": 1.8403804368717866e-05, "loss": 0.5004, "step": 17277 }, { "epoch": 0.3664397361667833, "grad_norm": 0.3450948894023895, "learning_rate": 1.8403623610587526e-05, "loss": 0.5193, "step": 17278 }, { "epoch": 0.36646094462471634, "grad_norm": 0.35879600048065186, "learning_rate": 1.8403442843110753e-05, "loss": 0.539, "step": 17279 }, { "epoch": 0.36648215308264936, "grad_norm": 0.33955520391464233, "learning_rate": 1.8403262066287738e-05, "loss": 0.4609, "step": 17280 }, { "epoch": 0.3665033615405824, "grad_norm": 0.3394424021244049, "learning_rate": 1.8403081280118693e-05, "loss": 0.4921, "step": 17281 }, { "epoch": 0.3665245699985154, "grad_norm": 0.33788689970970154, "learning_rate": 1.8402900484603812e-05, "loss": 0.4589, "step": 17282 }, { "epoch": 0.3665457784564484, "grad_norm": 0.3649468421936035, "learning_rate": 1.8402719679743298e-05, "loss": 0.5138, "step": 17283 }, { "epoch": 0.36656698691438144, "grad_norm": 0.35003677010536194, "learning_rate": 1.8402538865537354e-05, "loss": 0.4946, "step": 17284 }, { "epoch": 0.36658819537231446, "grad_norm": 0.36625027656555176, "learning_rate": 1.8402358041986177e-05, "loss": 0.515, "step": 17285 }, { "epoch": 0.3666094038302475, "grad_norm": 0.3120925724506378, "learning_rate": 1.840217720908997e-05, "loss": 0.6352, "step": 17286 }, { "epoch": 0.3666306122881805, "grad_norm": 0.3788808286190033, "learning_rate": 1.8401996366848936e-05, "loss": 0.5406, "step": 17287 }, { "epoch": 0.3666518207461136, "grad_norm": 0.3933340311050415, "learning_rate": 1.8401815515263276e-05, "loss": 0.5512, "step": 17288 }, { "epoch": 0.3666730292040466, "grad_norm": 0.34334880113601685, "learning_rate": 1.8401634654333185e-05, "loss": 0.5235, "step": 17289 }, { "epoch": 0.3666942376619796, "grad_norm": 0.33419525623321533, "learning_rate": 1.8401453784058875e-05, "loss": 0.4932, "step": 17290 }, { "epoch": 0.36671544611991264, "grad_norm": 0.35530370473861694, "learning_rate": 1.8401272904440537e-05, "loss": 0.5439, "step": 17291 }, { "epoch": 0.36673665457784566, "grad_norm": 0.3176223933696747, "learning_rate": 1.8401092015478376e-05, "loss": 0.4971, "step": 17292 }, { "epoch": 0.3667578630357787, "grad_norm": 0.333243191242218, "learning_rate": 1.8400911117172597e-05, "loss": 0.4449, "step": 17293 }, { "epoch": 0.3667790714937117, "grad_norm": 0.42154115438461304, "learning_rate": 1.8400730209523395e-05, "loss": 0.5765, "step": 17294 }, { "epoch": 0.3668002799516447, "grad_norm": 0.3257576525211334, "learning_rate": 1.8400549292530975e-05, "loss": 0.4629, "step": 17295 }, { "epoch": 0.36682148840957773, "grad_norm": 0.31920164823532104, "learning_rate": 1.8400368366195535e-05, "loss": 0.4238, "step": 17296 }, { "epoch": 0.36684269686751075, "grad_norm": 0.3454520106315613, "learning_rate": 1.840018743051728e-05, "loss": 0.5175, "step": 17297 }, { "epoch": 0.3668639053254438, "grad_norm": 0.4103304147720337, "learning_rate": 1.8400006485496412e-05, "loss": 0.5503, "step": 17298 }, { "epoch": 0.3668851137833768, "grad_norm": 0.3459112048149109, "learning_rate": 1.8399825531133126e-05, "loss": 0.5146, "step": 17299 }, { "epoch": 0.3669063222413098, "grad_norm": 0.3243209421634674, "learning_rate": 1.8399644567427625e-05, "loss": 0.5681, "step": 17300 }, { "epoch": 0.36692753069924283, "grad_norm": 0.35276684165000916, "learning_rate": 1.8399463594380117e-05, "loss": 0.57, "step": 17301 }, { "epoch": 0.3669487391571759, "grad_norm": 0.33709028363227844, "learning_rate": 1.8399282611990797e-05, "loss": 0.487, "step": 17302 }, { "epoch": 0.3669699476151089, "grad_norm": 0.3240196406841278, "learning_rate": 1.8399101620259866e-05, "loss": 0.4121, "step": 17303 }, { "epoch": 0.36699115607304195, "grad_norm": 0.3290698528289795, "learning_rate": 1.8398920619187528e-05, "loss": 0.4799, "step": 17304 }, { "epoch": 0.36701236453097497, "grad_norm": 0.34169939160346985, "learning_rate": 1.8398739608773984e-05, "loss": 0.5164, "step": 17305 }, { "epoch": 0.367033572988908, "grad_norm": 0.36531469225883484, "learning_rate": 1.8398558589019436e-05, "loss": 0.56, "step": 17306 }, { "epoch": 0.367054781446841, "grad_norm": 0.2735997438430786, "learning_rate": 1.8398377559924083e-05, "loss": 0.5032, "step": 17307 }, { "epoch": 0.367075989904774, "grad_norm": 0.32282623648643494, "learning_rate": 1.8398196521488128e-05, "loss": 0.4564, "step": 17308 }, { "epoch": 0.36709719836270704, "grad_norm": 0.36270585656166077, "learning_rate": 1.839801547371177e-05, "loss": 0.4993, "step": 17309 }, { "epoch": 0.36711840682064006, "grad_norm": 0.417367160320282, "learning_rate": 1.8397834416595217e-05, "loss": 0.5047, "step": 17310 }, { "epoch": 0.3671396152785731, "grad_norm": 0.35528627038002014, "learning_rate": 1.839765335013866e-05, "loss": 0.5429, "step": 17311 }, { "epoch": 0.3671608237365061, "grad_norm": 0.33926862478256226, "learning_rate": 1.8397472274342308e-05, "loss": 0.5038, "step": 17312 }, { "epoch": 0.3671820321944391, "grad_norm": 0.3081349730491638, "learning_rate": 1.839729118920636e-05, "loss": 0.4298, "step": 17313 }, { "epoch": 0.36720324065237214, "grad_norm": 0.3632284104824066, "learning_rate": 1.8397110094731017e-05, "loss": 0.5392, "step": 17314 }, { "epoch": 0.36722444911030516, "grad_norm": 0.3272380530834198, "learning_rate": 1.8396928990916482e-05, "loss": 0.5174, "step": 17315 }, { "epoch": 0.36724565756823824, "grad_norm": 0.32845744490623474, "learning_rate": 1.8396747877762955e-05, "loss": 0.4576, "step": 17316 }, { "epoch": 0.36726686602617126, "grad_norm": 0.34507012367248535, "learning_rate": 1.839656675527064e-05, "loss": 0.5222, "step": 17317 }, { "epoch": 0.3672880744841043, "grad_norm": 0.34328463673591614, "learning_rate": 1.8396385623439734e-05, "loss": 0.4914, "step": 17318 }, { "epoch": 0.3673092829420373, "grad_norm": 0.32554376125335693, "learning_rate": 1.8396204482270442e-05, "loss": 0.5435, "step": 17319 }, { "epoch": 0.3673304913999703, "grad_norm": 0.35481593012809753, "learning_rate": 1.8396023331762962e-05, "loss": 0.5343, "step": 17320 }, { "epoch": 0.36735169985790334, "grad_norm": 0.31675639748573303, "learning_rate": 1.83958421719175e-05, "loss": 0.3866, "step": 17321 }, { "epoch": 0.36737290831583636, "grad_norm": 0.3540748655796051, "learning_rate": 1.8395661002734257e-05, "loss": 0.6045, "step": 17322 }, { "epoch": 0.3673941167737694, "grad_norm": 0.33562955260276794, "learning_rate": 1.8395479824213432e-05, "loss": 0.5144, "step": 17323 }, { "epoch": 0.3674153252317024, "grad_norm": 0.3425280451774597, "learning_rate": 1.8395298636355224e-05, "loss": 0.498, "step": 17324 }, { "epoch": 0.3674365336896354, "grad_norm": 0.3114839792251587, "learning_rate": 1.839511743915984e-05, "loss": 0.4565, "step": 17325 }, { "epoch": 0.36745774214756843, "grad_norm": 0.3713701665401459, "learning_rate": 1.839493623262748e-05, "loss": 0.5205, "step": 17326 }, { "epoch": 0.36747895060550145, "grad_norm": 0.3312831223011017, "learning_rate": 1.8394755016758343e-05, "loss": 0.4738, "step": 17327 }, { "epoch": 0.3675001590634345, "grad_norm": 0.3315545916557312, "learning_rate": 1.8394573791552633e-05, "loss": 0.5189, "step": 17328 }, { "epoch": 0.36752136752136755, "grad_norm": 0.30986350774765015, "learning_rate": 1.8394392557010553e-05, "loss": 0.411, "step": 17329 }, { "epoch": 0.36754257597930057, "grad_norm": 0.3557683527469635, "learning_rate": 1.83942113131323e-05, "loss": 0.5183, "step": 17330 }, { "epoch": 0.3675637844372336, "grad_norm": 0.45752349495887756, "learning_rate": 1.8394030059918076e-05, "loss": 0.5066, "step": 17331 }, { "epoch": 0.3675849928951666, "grad_norm": 0.34186115860939026, "learning_rate": 1.839384879736809e-05, "loss": 0.5148, "step": 17332 }, { "epoch": 0.3676062013530996, "grad_norm": 0.34481313824653625, "learning_rate": 1.8393667525482536e-05, "loss": 0.5182, "step": 17333 }, { "epoch": 0.36762740981103265, "grad_norm": 0.34354516863822937, "learning_rate": 1.839348624426162e-05, "loss": 0.5031, "step": 17334 }, { "epoch": 0.36764861826896567, "grad_norm": 0.3889487385749817, "learning_rate": 1.8393304953705535e-05, "loss": 0.4717, "step": 17335 }, { "epoch": 0.3676698267268987, "grad_norm": 0.29227861762046814, "learning_rate": 1.8393123653814494e-05, "loss": 0.4756, "step": 17336 }, { "epoch": 0.3676910351848317, "grad_norm": 0.3403017520904541, "learning_rate": 1.8392942344588696e-05, "loss": 0.4957, "step": 17337 }, { "epoch": 0.3677122436427647, "grad_norm": 0.3853594958782196, "learning_rate": 1.8392761026028338e-05, "loss": 0.498, "step": 17338 }, { "epoch": 0.36773345210069774, "grad_norm": 0.3400297164916992, "learning_rate": 1.8392579698133622e-05, "loss": 0.4873, "step": 17339 }, { "epoch": 0.36775466055863076, "grad_norm": 0.37471169233322144, "learning_rate": 1.8392398360904752e-05, "loss": 0.5753, "step": 17340 }, { "epoch": 0.3677758690165638, "grad_norm": 0.45733582973480225, "learning_rate": 1.8392217014341934e-05, "loss": 0.4886, "step": 17341 }, { "epoch": 0.3677970774744968, "grad_norm": 0.34173446893692017, "learning_rate": 1.8392035658445363e-05, "loss": 0.5058, "step": 17342 }, { "epoch": 0.3678182859324299, "grad_norm": 0.3607371151447296, "learning_rate": 1.839185429321524e-05, "loss": 0.6237, "step": 17343 }, { "epoch": 0.3678394943903629, "grad_norm": 0.311780720949173, "learning_rate": 1.8391672918651774e-05, "loss": 0.4703, "step": 17344 }, { "epoch": 0.3678607028482959, "grad_norm": 0.34350892901420593, "learning_rate": 1.8391491534755156e-05, "loss": 0.5317, "step": 17345 }, { "epoch": 0.36788191130622894, "grad_norm": 0.43308696150779724, "learning_rate": 1.8391310141525597e-05, "loss": 0.5652, "step": 17346 }, { "epoch": 0.36790311976416196, "grad_norm": 0.36294013261795044, "learning_rate": 1.8391128738963297e-05, "loss": 0.5633, "step": 17347 }, { "epoch": 0.367924328222095, "grad_norm": 0.5094702839851379, "learning_rate": 1.839094732706846e-05, "loss": 0.4454, "step": 17348 }, { "epoch": 0.367945536680028, "grad_norm": 0.3369797468185425, "learning_rate": 1.839076590584128e-05, "loss": 0.5893, "step": 17349 }, { "epoch": 0.367966745137961, "grad_norm": 0.3048301041126251, "learning_rate": 1.839058447528196e-05, "loss": 0.4728, "step": 17350 }, { "epoch": 0.36798795359589404, "grad_norm": 0.3582462668418884, "learning_rate": 1.839040303539071e-05, "loss": 0.5351, "step": 17351 }, { "epoch": 0.36800916205382705, "grad_norm": 0.29956093430519104, "learning_rate": 1.839022158616772e-05, "loss": 0.5327, "step": 17352 }, { "epoch": 0.3680303705117601, "grad_norm": 0.3620423972606659, "learning_rate": 1.8390040127613206e-05, "loss": 0.5068, "step": 17353 }, { "epoch": 0.3680515789696931, "grad_norm": 0.4459543228149414, "learning_rate": 1.8389858659727362e-05, "loss": 0.4956, "step": 17354 }, { "epoch": 0.3680727874276261, "grad_norm": 0.33202993869781494, "learning_rate": 1.8389677182510387e-05, "loss": 0.4533, "step": 17355 }, { "epoch": 0.3680939958855592, "grad_norm": 0.3229755461215973, "learning_rate": 1.8389495695962487e-05, "loss": 0.5661, "step": 17356 }, { "epoch": 0.3681152043434922, "grad_norm": 1.0000613927841187, "learning_rate": 1.838931420008386e-05, "loss": 0.5097, "step": 17357 }, { "epoch": 0.36813641280142523, "grad_norm": 0.2782737910747528, "learning_rate": 1.8389132694874714e-05, "loss": 0.3481, "step": 17358 }, { "epoch": 0.36815762125935825, "grad_norm": 0.35277703404426575, "learning_rate": 1.8388951180335248e-05, "loss": 0.3885, "step": 17359 }, { "epoch": 0.36817882971729127, "grad_norm": 0.3537467420101166, "learning_rate": 1.838876965646566e-05, "loss": 0.5353, "step": 17360 }, { "epoch": 0.3682000381752243, "grad_norm": 0.33974194526672363, "learning_rate": 1.838858812326616e-05, "loss": 0.4526, "step": 17361 }, { "epoch": 0.3682212466331573, "grad_norm": 0.3751215934753418, "learning_rate": 1.838840658073694e-05, "loss": 0.5493, "step": 17362 }, { "epoch": 0.3682424550910903, "grad_norm": 0.3511461019515991, "learning_rate": 1.838822502887821e-05, "loss": 0.506, "step": 17363 }, { "epoch": 0.36826366354902335, "grad_norm": 0.34980159997940063, "learning_rate": 1.838804346769017e-05, "loss": 0.5607, "step": 17364 }, { "epoch": 0.36828487200695637, "grad_norm": 0.3520083427429199, "learning_rate": 1.838786189717302e-05, "loss": 0.4421, "step": 17365 }, { "epoch": 0.3683060804648894, "grad_norm": 0.3380628824234009, "learning_rate": 1.838768031732696e-05, "loss": 0.4818, "step": 17366 }, { "epoch": 0.3683272889228224, "grad_norm": 0.34564489126205444, "learning_rate": 1.83874987281522e-05, "loss": 0.4961, "step": 17367 }, { "epoch": 0.3683484973807554, "grad_norm": 0.3868577182292938, "learning_rate": 1.8387317129648932e-05, "loss": 0.5169, "step": 17368 }, { "epoch": 0.36836970583868844, "grad_norm": 0.3479347229003906, "learning_rate": 1.8387135521817362e-05, "loss": 0.5421, "step": 17369 }, { "epoch": 0.3683909142966215, "grad_norm": 0.349183589220047, "learning_rate": 1.8386953904657696e-05, "loss": 0.5149, "step": 17370 }, { "epoch": 0.36841212275455454, "grad_norm": 0.3547675609588623, "learning_rate": 1.838677227817013e-05, "loss": 0.561, "step": 17371 }, { "epoch": 0.36843333121248756, "grad_norm": 0.35703328251838684, "learning_rate": 1.8386590642354873e-05, "loss": 0.4947, "step": 17372 }, { "epoch": 0.3684545396704206, "grad_norm": 0.35650011897087097, "learning_rate": 1.838640899721212e-05, "loss": 0.4599, "step": 17373 }, { "epoch": 0.3684757481283536, "grad_norm": 0.32995155453681946, "learning_rate": 1.8386227342742078e-05, "loss": 0.5808, "step": 17374 }, { "epoch": 0.3684969565862866, "grad_norm": 0.49689796566963196, "learning_rate": 1.8386045678944944e-05, "loss": 0.4574, "step": 17375 }, { "epoch": 0.36851816504421964, "grad_norm": 0.3337807357311249, "learning_rate": 1.8385864005820923e-05, "loss": 0.4836, "step": 17376 }, { "epoch": 0.36853937350215266, "grad_norm": 0.3258473575115204, "learning_rate": 1.8385682323370216e-05, "loss": 0.5133, "step": 17377 }, { "epoch": 0.3685605819600857, "grad_norm": 0.3648819327354431, "learning_rate": 1.838550063159303e-05, "loss": 0.5916, "step": 17378 }, { "epoch": 0.3685817904180187, "grad_norm": 0.338316410779953, "learning_rate": 1.838531893048956e-05, "loss": 0.4813, "step": 17379 }, { "epoch": 0.3686029988759517, "grad_norm": 0.30868786573410034, "learning_rate": 1.8385137220060013e-05, "loss": 0.5062, "step": 17380 }, { "epoch": 0.36862420733388473, "grad_norm": 0.3575286567211151, "learning_rate": 1.8384955500304586e-05, "loss": 0.5585, "step": 17381 }, { "epoch": 0.36864541579181775, "grad_norm": 0.30832988023757935, "learning_rate": 1.838477377122349e-05, "loss": 0.4873, "step": 17382 }, { "epoch": 0.3686666242497508, "grad_norm": 0.34413033723831177, "learning_rate": 1.8384592032816914e-05, "loss": 0.4981, "step": 17383 }, { "epoch": 0.36868783270768385, "grad_norm": 0.3401290774345398, "learning_rate": 1.8384410285085073e-05, "loss": 0.5675, "step": 17384 }, { "epoch": 0.36870904116561687, "grad_norm": 0.4768248200416565, "learning_rate": 1.838422852802816e-05, "loss": 0.5305, "step": 17385 }, { "epoch": 0.3687302496235499, "grad_norm": 0.34479621052742004, "learning_rate": 1.8384046761646383e-05, "loss": 0.5264, "step": 17386 }, { "epoch": 0.3687514580814829, "grad_norm": 0.3410513699054718, "learning_rate": 1.8383864985939944e-05, "loss": 0.4898, "step": 17387 }, { "epoch": 0.3687726665394159, "grad_norm": 0.3414008319377899, "learning_rate": 1.838368320090904e-05, "loss": 0.4901, "step": 17388 }, { "epoch": 0.36879387499734895, "grad_norm": 0.3356870412826538, "learning_rate": 1.8383501406553877e-05, "loss": 0.4262, "step": 17389 }, { "epoch": 0.36881508345528197, "grad_norm": 0.36854180693626404, "learning_rate": 1.838331960287466e-05, "loss": 0.4825, "step": 17390 }, { "epoch": 0.368836291913215, "grad_norm": 0.4048865735530853, "learning_rate": 1.8383137789871584e-05, "loss": 0.5336, "step": 17391 }, { "epoch": 0.368857500371148, "grad_norm": 0.31860414147377014, "learning_rate": 1.8382955967544855e-05, "loss": 0.4836, "step": 17392 }, { "epoch": 0.368878708829081, "grad_norm": 0.32331719994544983, "learning_rate": 1.8382774135894678e-05, "loss": 0.5211, "step": 17393 }, { "epoch": 0.36889991728701405, "grad_norm": 0.38287705183029175, "learning_rate": 1.838259229492125e-05, "loss": 0.6013, "step": 17394 }, { "epoch": 0.36892112574494706, "grad_norm": 0.37574681639671326, "learning_rate": 1.8382410444624775e-05, "loss": 0.5292, "step": 17395 }, { "epoch": 0.3689423342028801, "grad_norm": 0.3487721085548401, "learning_rate": 1.8382228585005458e-05, "loss": 0.569, "step": 17396 }, { "epoch": 0.36896354266081316, "grad_norm": 0.43253853917121887, "learning_rate": 1.8382046716063497e-05, "loss": 0.554, "step": 17397 }, { "epoch": 0.3689847511187462, "grad_norm": 0.39659181237220764, "learning_rate": 1.8381864837799098e-05, "loss": 0.4069, "step": 17398 }, { "epoch": 0.3690059595766792, "grad_norm": 0.3337385654449463, "learning_rate": 1.838168295021246e-05, "loss": 0.6103, "step": 17399 }, { "epoch": 0.3690271680346122, "grad_norm": 0.338791161775589, "learning_rate": 1.838150105330379e-05, "loss": 0.4823, "step": 17400 }, { "epoch": 0.36904837649254524, "grad_norm": 0.37338531017303467, "learning_rate": 1.8381319147073287e-05, "loss": 0.5511, "step": 17401 }, { "epoch": 0.36906958495047826, "grad_norm": 0.3458690643310547, "learning_rate": 1.8381137231521152e-05, "loss": 0.421, "step": 17402 }, { "epoch": 0.3690907934084113, "grad_norm": 0.3608928322792053, "learning_rate": 1.838095530664759e-05, "loss": 0.4928, "step": 17403 }, { "epoch": 0.3691120018663443, "grad_norm": 0.40141257643699646, "learning_rate": 1.83807733724528e-05, "loss": 0.4922, "step": 17404 }, { "epoch": 0.3691332103242773, "grad_norm": 0.3452303409576416, "learning_rate": 1.838059142893699e-05, "loss": 0.4687, "step": 17405 }, { "epoch": 0.36915441878221034, "grad_norm": 0.30403512716293335, "learning_rate": 1.838040947610036e-05, "loss": 0.4259, "step": 17406 }, { "epoch": 0.36917562724014336, "grad_norm": 0.37190672755241394, "learning_rate": 1.8380227513943107e-05, "loss": 0.5143, "step": 17407 }, { "epoch": 0.3691968356980764, "grad_norm": 0.649815559387207, "learning_rate": 1.838004554246544e-05, "loss": 0.513, "step": 17408 }, { "epoch": 0.3692180441560094, "grad_norm": 0.308173269033432, "learning_rate": 1.8379863561667556e-05, "loss": 0.4748, "step": 17409 }, { "epoch": 0.3692392526139424, "grad_norm": 0.3515646159648895, "learning_rate": 1.8379681571549665e-05, "loss": 0.597, "step": 17410 }, { "epoch": 0.3692604610718755, "grad_norm": 0.34691160917282104, "learning_rate": 1.837949957211196e-05, "loss": 0.4607, "step": 17411 }, { "epoch": 0.3692816695298085, "grad_norm": 0.33217599987983704, "learning_rate": 1.8379317563354656e-05, "loss": 0.5026, "step": 17412 }, { "epoch": 0.36930287798774153, "grad_norm": 0.34477564692497253, "learning_rate": 1.837913554527794e-05, "loss": 0.5876, "step": 17413 }, { "epoch": 0.36932408644567455, "grad_norm": 0.34926074743270874, "learning_rate": 1.837895351788203e-05, "loss": 0.4768, "step": 17414 }, { "epoch": 0.36934529490360757, "grad_norm": 0.3647344410419464, "learning_rate": 1.8378771481167113e-05, "loss": 0.5341, "step": 17415 }, { "epoch": 0.3693665033615406, "grad_norm": 0.3187826871871948, "learning_rate": 1.8378589435133402e-05, "loss": 0.3972, "step": 17416 }, { "epoch": 0.3693877118194736, "grad_norm": 0.35714107751846313, "learning_rate": 1.83784073797811e-05, "loss": 0.5067, "step": 17417 }, { "epoch": 0.3694089202774066, "grad_norm": 0.3710620403289795, "learning_rate": 1.8378225315110403e-05, "loss": 0.4825, "step": 17418 }, { "epoch": 0.36943012873533965, "grad_norm": 0.34468281269073486, "learning_rate": 1.837804324112152e-05, "loss": 0.5048, "step": 17419 }, { "epoch": 0.36945133719327267, "grad_norm": 0.3393593430519104, "learning_rate": 1.837786115781464e-05, "loss": 0.5109, "step": 17420 }, { "epoch": 0.3694725456512057, "grad_norm": 0.3354395031929016, "learning_rate": 1.8377679065189988e-05, "loss": 0.5201, "step": 17421 }, { "epoch": 0.3694937541091387, "grad_norm": 0.3493984043598175, "learning_rate": 1.8377496963247745e-05, "loss": 0.4923, "step": 17422 }, { "epoch": 0.3695149625670717, "grad_norm": 0.3441295623779297, "learning_rate": 1.8377314851988127e-05, "loss": 0.4731, "step": 17423 }, { "epoch": 0.36953617102500474, "grad_norm": 0.30438491702079773, "learning_rate": 1.8377132731411332e-05, "loss": 0.4868, "step": 17424 }, { "epoch": 0.3695573794829378, "grad_norm": 0.39934924244880676, "learning_rate": 1.837695060151756e-05, "loss": 0.536, "step": 17425 }, { "epoch": 0.36957858794087084, "grad_norm": 0.3283061683177948, "learning_rate": 1.837676846230702e-05, "loss": 0.5002, "step": 17426 }, { "epoch": 0.36959979639880386, "grad_norm": 0.35421714186668396, "learning_rate": 1.837658631377991e-05, "loss": 0.5254, "step": 17427 }, { "epoch": 0.3696210048567369, "grad_norm": 0.33087900280952454, "learning_rate": 1.837640415593643e-05, "loss": 0.5158, "step": 17428 }, { "epoch": 0.3696422133146699, "grad_norm": 0.358560711145401, "learning_rate": 1.8376221988776792e-05, "loss": 0.5259, "step": 17429 }, { "epoch": 0.3696634217726029, "grad_norm": 0.37101292610168457, "learning_rate": 1.837603981230119e-05, "loss": 0.5092, "step": 17430 }, { "epoch": 0.36968463023053594, "grad_norm": 0.35697147250175476, "learning_rate": 1.8375857626509827e-05, "loss": 0.5143, "step": 17431 }, { "epoch": 0.36970583868846896, "grad_norm": 0.36016103625297546, "learning_rate": 1.8375675431402912e-05, "loss": 0.4915, "step": 17432 }, { "epoch": 0.369727047146402, "grad_norm": 0.3338002562522888, "learning_rate": 1.837549322698064e-05, "loss": 0.5322, "step": 17433 }, { "epoch": 0.369748255604335, "grad_norm": 0.3362402021884918, "learning_rate": 1.8375311013243218e-05, "loss": 0.6154, "step": 17434 }, { "epoch": 0.369769464062268, "grad_norm": 0.7348169684410095, "learning_rate": 1.837512879019085e-05, "loss": 0.522, "step": 17435 }, { "epoch": 0.36979067252020104, "grad_norm": 0.39629673957824707, "learning_rate": 1.8374946557823734e-05, "loss": 0.5313, "step": 17436 }, { "epoch": 0.36981188097813406, "grad_norm": 0.3290422558784485, "learning_rate": 1.837476431614208e-05, "loss": 0.6056, "step": 17437 }, { "epoch": 0.36983308943606713, "grad_norm": 0.3606659173965454, "learning_rate": 1.837458206514608e-05, "loss": 0.5595, "step": 17438 }, { "epoch": 0.36985429789400015, "grad_norm": 0.4678986370563507, "learning_rate": 1.8374399804835943e-05, "loss": 0.5059, "step": 17439 }, { "epoch": 0.36987550635193317, "grad_norm": 0.37950170040130615, "learning_rate": 1.8374217535211873e-05, "loss": 0.5342, "step": 17440 }, { "epoch": 0.3698967148098662, "grad_norm": 0.8739831447601318, "learning_rate": 1.8374035256274073e-05, "loss": 0.4103, "step": 17441 }, { "epoch": 0.3699179232677992, "grad_norm": 0.35239624977111816, "learning_rate": 1.837385296802274e-05, "loss": 0.4842, "step": 17442 }, { "epoch": 0.36993913172573223, "grad_norm": 0.3538740277290344, "learning_rate": 1.8373670670458082e-05, "loss": 0.5185, "step": 17443 }, { "epoch": 0.36996034018366525, "grad_norm": 0.34253719449043274, "learning_rate": 1.83734883635803e-05, "loss": 0.5595, "step": 17444 }, { "epoch": 0.36998154864159827, "grad_norm": 0.4792085886001587, "learning_rate": 1.8373306047389595e-05, "loss": 0.5678, "step": 17445 }, { "epoch": 0.3700027570995313, "grad_norm": 0.3770103454589844, "learning_rate": 1.8373123721886175e-05, "loss": 0.537, "step": 17446 }, { "epoch": 0.3700239655574643, "grad_norm": 0.3522387444972992, "learning_rate": 1.837294138707024e-05, "loss": 0.4158, "step": 17447 }, { "epoch": 0.3700451740153973, "grad_norm": 0.33000150322914124, "learning_rate": 1.837275904294199e-05, "loss": 0.5806, "step": 17448 }, { "epoch": 0.37006638247333035, "grad_norm": 0.3664209544658661, "learning_rate": 1.837257668950163e-05, "loss": 0.5249, "step": 17449 }, { "epoch": 0.37008759093126337, "grad_norm": 0.5727102756500244, "learning_rate": 1.8372394326749364e-05, "loss": 0.5383, "step": 17450 }, { "epoch": 0.3701087993891964, "grad_norm": 0.7779663801193237, "learning_rate": 1.8372211954685394e-05, "loss": 0.4999, "step": 17451 }, { "epoch": 0.37013000784712946, "grad_norm": 0.31051093339920044, "learning_rate": 1.8372029573309923e-05, "loss": 0.4402, "step": 17452 }, { "epoch": 0.3701512163050625, "grad_norm": 0.3791769742965698, "learning_rate": 1.8371847182623154e-05, "loss": 0.4905, "step": 17453 }, { "epoch": 0.3701724247629955, "grad_norm": 0.36395880579948425, "learning_rate": 1.8371664782625287e-05, "loss": 0.5459, "step": 17454 }, { "epoch": 0.3701936332209285, "grad_norm": 0.39391398429870605, "learning_rate": 1.8371482373316527e-05, "loss": 0.5306, "step": 17455 }, { "epoch": 0.37021484167886154, "grad_norm": 0.4332483410835266, "learning_rate": 1.837129995469708e-05, "loss": 0.4805, "step": 17456 }, { "epoch": 0.37023605013679456, "grad_norm": 0.34003451466560364, "learning_rate": 1.8371117526767145e-05, "loss": 0.5689, "step": 17457 }, { "epoch": 0.3702572585947276, "grad_norm": 0.36671632528305054, "learning_rate": 1.8370935089526923e-05, "loss": 0.5768, "step": 17458 }, { "epoch": 0.3702784670526606, "grad_norm": 0.34832048416137695, "learning_rate": 1.837075264297662e-05, "loss": 0.5052, "step": 17459 }, { "epoch": 0.3702996755105936, "grad_norm": 0.34315136075019836, "learning_rate": 1.8370570187116444e-05, "loss": 0.4628, "step": 17460 }, { "epoch": 0.37032088396852664, "grad_norm": 0.4088106155395508, "learning_rate": 1.8370387721946588e-05, "loss": 0.5542, "step": 17461 }, { "epoch": 0.37034209242645966, "grad_norm": 0.3347768783569336, "learning_rate": 1.837020524746726e-05, "loss": 0.4499, "step": 17462 }, { "epoch": 0.3703633008843927, "grad_norm": 0.35316845774650574, "learning_rate": 1.8370022763678663e-05, "loss": 0.582, "step": 17463 }, { "epoch": 0.3703845093423257, "grad_norm": 0.3313932716846466, "learning_rate": 1.8369840270581e-05, "loss": 0.5131, "step": 17464 }, { "epoch": 0.3704057178002587, "grad_norm": 0.3922014832496643, "learning_rate": 1.836965776817447e-05, "loss": 0.5815, "step": 17465 }, { "epoch": 0.3704269262581918, "grad_norm": 0.3645239472389221, "learning_rate": 1.8369475256459285e-05, "loss": 0.5281, "step": 17466 }, { "epoch": 0.3704481347161248, "grad_norm": 0.33789828419685364, "learning_rate": 1.8369292735435637e-05, "loss": 0.4917, "step": 17467 }, { "epoch": 0.37046934317405783, "grad_norm": 0.3189642131328583, "learning_rate": 1.8369110205103737e-05, "loss": 0.5772, "step": 17468 }, { "epoch": 0.37049055163199085, "grad_norm": 0.34270620346069336, "learning_rate": 1.8368927665463785e-05, "loss": 0.4542, "step": 17469 }, { "epoch": 0.37051176008992387, "grad_norm": 0.37871143221855164, "learning_rate": 1.8368745116515984e-05, "loss": 0.5417, "step": 17470 }, { "epoch": 0.3705329685478569, "grad_norm": 0.3523898124694824, "learning_rate": 1.836856255826054e-05, "loss": 0.529, "step": 17471 }, { "epoch": 0.3705541770057899, "grad_norm": 0.3871345520019531, "learning_rate": 1.8368379990697647e-05, "loss": 0.5492, "step": 17472 }, { "epoch": 0.37057538546372293, "grad_norm": 0.3274312913417816, "learning_rate": 1.836819741382752e-05, "loss": 0.5221, "step": 17473 }, { "epoch": 0.37059659392165595, "grad_norm": 0.3416988253593445, "learning_rate": 1.8368014827650356e-05, "loss": 0.5462, "step": 17474 }, { "epoch": 0.37061780237958897, "grad_norm": 0.38185063004493713, "learning_rate": 1.8367832232166355e-05, "loss": 0.503, "step": 17475 }, { "epoch": 0.370639010837522, "grad_norm": 0.31786608695983887, "learning_rate": 1.836764962737573e-05, "loss": 0.436, "step": 17476 }, { "epoch": 0.370660219295455, "grad_norm": 0.3499787449836731, "learning_rate": 1.836746701327867e-05, "loss": 0.5724, "step": 17477 }, { "epoch": 0.370681427753388, "grad_norm": 0.4384423494338989, "learning_rate": 1.8367284389875394e-05, "loss": 0.5124, "step": 17478 }, { "epoch": 0.3707026362113211, "grad_norm": 0.4085369110107422, "learning_rate": 1.836710175716609e-05, "loss": 0.5312, "step": 17479 }, { "epoch": 0.3707238446692541, "grad_norm": 0.3167789876461029, "learning_rate": 1.8366919115150972e-05, "loss": 0.4886, "step": 17480 }, { "epoch": 0.37074505312718714, "grad_norm": 0.2881491482257843, "learning_rate": 1.8366736463830238e-05, "loss": 0.4543, "step": 17481 }, { "epoch": 0.37076626158512016, "grad_norm": 0.37735217809677124, "learning_rate": 1.836655380320409e-05, "loss": 0.5045, "step": 17482 }, { "epoch": 0.3707874700430532, "grad_norm": 0.4454432427883148, "learning_rate": 1.836637113327274e-05, "loss": 0.4959, "step": 17483 }, { "epoch": 0.3708086785009862, "grad_norm": 0.3856060206890106, "learning_rate": 1.836618845403638e-05, "loss": 0.5505, "step": 17484 }, { "epoch": 0.3708298869589192, "grad_norm": 0.3360651135444641, "learning_rate": 1.8366005765495218e-05, "loss": 0.5469, "step": 17485 }, { "epoch": 0.37085109541685224, "grad_norm": 0.41664400696754456, "learning_rate": 1.836582306764946e-05, "loss": 0.5676, "step": 17486 }, { "epoch": 0.37087230387478526, "grad_norm": 0.3504303991794586, "learning_rate": 1.8365640360499305e-05, "loss": 0.4909, "step": 17487 }, { "epoch": 0.3708935123327183, "grad_norm": 0.37949085235595703, "learning_rate": 1.8365457644044955e-05, "loss": 0.5593, "step": 17488 }, { "epoch": 0.3709147207906513, "grad_norm": 0.348571240901947, "learning_rate": 1.8365274918286616e-05, "loss": 0.4126, "step": 17489 }, { "epoch": 0.3709359292485843, "grad_norm": 0.3090757429599762, "learning_rate": 1.836509218322449e-05, "loss": 0.455, "step": 17490 }, { "epoch": 0.37095713770651734, "grad_norm": 0.3198167383670807, "learning_rate": 1.8364909438858784e-05, "loss": 0.4857, "step": 17491 }, { "epoch": 0.37097834616445036, "grad_norm": 0.34408947825431824, "learning_rate": 1.8364726685189696e-05, "loss": 0.4899, "step": 17492 }, { "epoch": 0.37099955462238343, "grad_norm": 0.3523348271846771, "learning_rate": 1.8364543922217436e-05, "loss": 0.5257, "step": 17493 }, { "epoch": 0.37102076308031645, "grad_norm": 0.4074985980987549, "learning_rate": 1.83643611499422e-05, "loss": 0.3984, "step": 17494 }, { "epoch": 0.37104197153824947, "grad_norm": 0.36057254672050476, "learning_rate": 1.8364178368364193e-05, "loss": 0.5442, "step": 17495 }, { "epoch": 0.3710631799961825, "grad_norm": 0.37059488892555237, "learning_rate": 1.8363995577483623e-05, "loss": 0.5078, "step": 17496 }, { "epoch": 0.3710843884541155, "grad_norm": 0.3582591116428375, "learning_rate": 1.8363812777300687e-05, "loss": 0.5985, "step": 17497 }, { "epoch": 0.37110559691204853, "grad_norm": 0.37137487530708313, "learning_rate": 1.8363629967815594e-05, "loss": 0.6063, "step": 17498 }, { "epoch": 0.37112680536998155, "grad_norm": 0.4233648180961609, "learning_rate": 1.836344714902854e-05, "loss": 0.518, "step": 17499 }, { "epoch": 0.37114801382791457, "grad_norm": 0.40819603204727173, "learning_rate": 1.8363264320939736e-05, "loss": 0.5402, "step": 17500 }, { "epoch": 0.3711692222858476, "grad_norm": 0.3830934762954712, "learning_rate": 1.836308148354938e-05, "loss": 0.48, "step": 17501 }, { "epoch": 0.3711904307437806, "grad_norm": 0.3392208218574524, "learning_rate": 1.8362898636857677e-05, "loss": 0.6077, "step": 17502 }, { "epoch": 0.37121163920171363, "grad_norm": 0.349011093378067, "learning_rate": 1.836271578086483e-05, "loss": 0.4442, "step": 17503 }, { "epoch": 0.37123284765964665, "grad_norm": 0.3808929920196533, "learning_rate": 1.836253291557105e-05, "loss": 0.5538, "step": 17504 }, { "epoch": 0.37125405611757967, "grad_norm": 0.3688110113143921, "learning_rate": 1.836235004097653e-05, "loss": 0.5001, "step": 17505 }, { "epoch": 0.3712752645755127, "grad_norm": 0.33574458956718445, "learning_rate": 1.8362167157081476e-05, "loss": 0.4789, "step": 17506 }, { "epoch": 0.37129647303344576, "grad_norm": 0.3920050859451294, "learning_rate": 1.836198426388609e-05, "loss": 0.5047, "step": 17507 }, { "epoch": 0.3713176814913788, "grad_norm": 0.34987443685531616, "learning_rate": 1.836180136139058e-05, "loss": 0.5261, "step": 17508 }, { "epoch": 0.3713388899493118, "grad_norm": 0.3486422896385193, "learning_rate": 1.8361618449595147e-05, "loss": 0.5025, "step": 17509 }, { "epoch": 0.3713600984072448, "grad_norm": 0.34804484248161316, "learning_rate": 1.8361435528499996e-05, "loss": 0.5279, "step": 17510 }, { "epoch": 0.37138130686517784, "grad_norm": 0.44058820605278015, "learning_rate": 1.8361252598105328e-05, "loss": 0.4932, "step": 17511 }, { "epoch": 0.37140251532311086, "grad_norm": 0.39642319083213806, "learning_rate": 1.836106965841135e-05, "loss": 0.4331, "step": 17512 }, { "epoch": 0.3714237237810439, "grad_norm": 0.3741605579853058, "learning_rate": 1.8360886709418256e-05, "loss": 0.5982, "step": 17513 }, { "epoch": 0.3714449322389769, "grad_norm": 0.4018402397632599, "learning_rate": 1.836070375112626e-05, "loss": 0.4683, "step": 17514 }, { "epoch": 0.3714661406969099, "grad_norm": 0.3543853163719177, "learning_rate": 1.8360520783535562e-05, "loss": 0.4292, "step": 17515 }, { "epoch": 0.37148734915484294, "grad_norm": 0.7707119584083557, "learning_rate": 1.8360337806646368e-05, "loss": 0.512, "step": 17516 }, { "epoch": 0.37150855761277596, "grad_norm": 0.37328609824180603, "learning_rate": 1.8360154820458876e-05, "loss": 0.5891, "step": 17517 }, { "epoch": 0.371529766070709, "grad_norm": 0.3202957510948181, "learning_rate": 1.8359971824973293e-05, "loss": 0.4723, "step": 17518 }, { "epoch": 0.371550974528642, "grad_norm": 0.33387619256973267, "learning_rate": 1.8359788820189823e-05, "loss": 0.4956, "step": 17519 }, { "epoch": 0.3715721829865751, "grad_norm": 0.31648463010787964, "learning_rate": 1.8359605806108665e-05, "loss": 0.5003, "step": 17520 }, { "epoch": 0.3715933914445081, "grad_norm": 0.3744193911552429, "learning_rate": 1.835942278273003e-05, "loss": 0.5425, "step": 17521 }, { "epoch": 0.3716145999024411, "grad_norm": 0.364926815032959, "learning_rate": 1.8359239750054118e-05, "loss": 0.4882, "step": 17522 }, { "epoch": 0.37163580836037413, "grad_norm": 0.40638455748558044, "learning_rate": 1.835905670808113e-05, "loss": 0.5402, "step": 17523 }, { "epoch": 0.37165701681830715, "grad_norm": 0.38625505566596985, "learning_rate": 1.8358873656811273e-05, "loss": 0.6624, "step": 17524 }, { "epoch": 0.37167822527624017, "grad_norm": 0.32496437430381775, "learning_rate": 1.8358690596244747e-05, "loss": 0.5111, "step": 17525 }, { "epoch": 0.3716994337341732, "grad_norm": 0.35536956787109375, "learning_rate": 1.835850752638176e-05, "loss": 0.6029, "step": 17526 }, { "epoch": 0.3717206421921062, "grad_norm": 0.3378407955169678, "learning_rate": 1.8358324447222512e-05, "loss": 0.5506, "step": 17527 }, { "epoch": 0.37174185065003923, "grad_norm": 0.33694687485694885, "learning_rate": 1.8358141358767214e-05, "loss": 0.4412, "step": 17528 }, { "epoch": 0.37176305910797225, "grad_norm": 0.34329935908317566, "learning_rate": 1.8357958261016057e-05, "loss": 0.4916, "step": 17529 }, { "epoch": 0.37178426756590527, "grad_norm": 0.41805270314216614, "learning_rate": 1.8357775153969252e-05, "loss": 0.506, "step": 17530 }, { "epoch": 0.3718054760238383, "grad_norm": 0.36721333861351013, "learning_rate": 1.8357592037627003e-05, "loss": 0.5694, "step": 17531 }, { "epoch": 0.3718266844817713, "grad_norm": 0.3786144256591797, "learning_rate": 1.8357408911989515e-05, "loss": 0.5702, "step": 17532 }, { "epoch": 0.3718478929397043, "grad_norm": 0.3440401256084442, "learning_rate": 1.8357225777056987e-05, "loss": 0.5067, "step": 17533 }, { "epoch": 0.3718691013976374, "grad_norm": 0.40258416533470154, "learning_rate": 1.8357042632829623e-05, "loss": 0.5623, "step": 17534 }, { "epoch": 0.3718903098555704, "grad_norm": 0.3620758652687073, "learning_rate": 1.8356859479307635e-05, "loss": 0.462, "step": 17535 }, { "epoch": 0.37191151831350344, "grad_norm": 0.35582777857780457, "learning_rate": 1.8356676316491216e-05, "loss": 0.4761, "step": 17536 }, { "epoch": 0.37193272677143646, "grad_norm": 0.3717125654220581, "learning_rate": 1.8356493144380574e-05, "loss": 0.5019, "step": 17537 }, { "epoch": 0.3719539352293695, "grad_norm": 0.4914528429508209, "learning_rate": 1.8356309962975915e-05, "loss": 0.6695, "step": 17538 }, { "epoch": 0.3719751436873025, "grad_norm": 0.3714422285556793, "learning_rate": 1.835612677227744e-05, "loss": 0.5106, "step": 17539 }, { "epoch": 0.3719963521452355, "grad_norm": 0.3345373868942261, "learning_rate": 1.8355943572285352e-05, "loss": 0.5287, "step": 17540 }, { "epoch": 0.37201756060316854, "grad_norm": 0.33761683106422424, "learning_rate": 1.8355760362999858e-05, "loss": 0.6117, "step": 17541 }, { "epoch": 0.37203876906110156, "grad_norm": 0.37108880281448364, "learning_rate": 1.835557714442116e-05, "loss": 0.5143, "step": 17542 }, { "epoch": 0.3720599775190346, "grad_norm": 0.3382364511489868, "learning_rate": 1.835539391654946e-05, "loss": 0.5283, "step": 17543 }, { "epoch": 0.3720811859769676, "grad_norm": 0.31943610310554504, "learning_rate": 1.835521067938497e-05, "loss": 0.5248, "step": 17544 }, { "epoch": 0.3721023944349006, "grad_norm": 0.3829324543476105, "learning_rate": 1.8355027432927877e-05, "loss": 0.4767, "step": 17545 }, { "epoch": 0.37212360289283364, "grad_norm": 0.39464208483695984, "learning_rate": 1.83548441771784e-05, "loss": 0.4318, "step": 17546 }, { "epoch": 0.3721448113507667, "grad_norm": 0.36708834767341614, "learning_rate": 1.835466091213674e-05, "loss": 0.4253, "step": 17547 }, { "epoch": 0.37216601980869973, "grad_norm": 0.3299282491207123, "learning_rate": 1.8354477637803093e-05, "loss": 0.4379, "step": 17548 }, { "epoch": 0.37218722826663275, "grad_norm": 0.3478298485279083, "learning_rate": 1.8354294354177674e-05, "loss": 0.5147, "step": 17549 }, { "epoch": 0.37220843672456577, "grad_norm": 0.3635830879211426, "learning_rate": 1.8354111061260676e-05, "loss": 0.5075, "step": 17550 }, { "epoch": 0.3722296451824988, "grad_norm": 0.9181725382804871, "learning_rate": 1.8353927759052314e-05, "loss": 0.43, "step": 17551 }, { "epoch": 0.3722508536404318, "grad_norm": 0.361856609582901, "learning_rate": 1.8353744447552782e-05, "loss": 0.6047, "step": 17552 }, { "epoch": 0.37227206209836483, "grad_norm": 0.35662782192230225, "learning_rate": 1.8353561126762292e-05, "loss": 0.5315, "step": 17553 }, { "epoch": 0.37229327055629785, "grad_norm": 0.3967531621456146, "learning_rate": 1.835337779668104e-05, "loss": 0.6474, "step": 17554 }, { "epoch": 0.37231447901423087, "grad_norm": 0.3437215983867645, "learning_rate": 1.8353194457309235e-05, "loss": 0.5738, "step": 17555 }, { "epoch": 0.3723356874721639, "grad_norm": 0.3335077464580536, "learning_rate": 1.8353011108647082e-05, "loss": 0.4498, "step": 17556 }, { "epoch": 0.3723568959300969, "grad_norm": 0.3194672167301178, "learning_rate": 1.835282775069478e-05, "loss": 0.4523, "step": 17557 }, { "epoch": 0.37237810438802993, "grad_norm": 0.3894095718860626, "learning_rate": 1.8352644383452535e-05, "loss": 0.5104, "step": 17558 }, { "epoch": 0.37239931284596295, "grad_norm": 0.5472216010093689, "learning_rate": 1.8352461006920555e-05, "loss": 0.4745, "step": 17559 }, { "epoch": 0.37242052130389597, "grad_norm": 0.3520912230014801, "learning_rate": 1.8352277621099036e-05, "loss": 0.4524, "step": 17560 }, { "epoch": 0.37244172976182904, "grad_norm": 0.3624281883239746, "learning_rate": 1.8352094225988187e-05, "loss": 0.4753, "step": 17561 }, { "epoch": 0.37246293821976206, "grad_norm": 0.3930104970932007, "learning_rate": 1.8351910821588213e-05, "loss": 0.5122, "step": 17562 }, { "epoch": 0.3724841466776951, "grad_norm": 0.3367542624473572, "learning_rate": 1.8351727407899317e-05, "loss": 0.4289, "step": 17563 }, { "epoch": 0.3725053551356281, "grad_norm": 0.3470756411552429, "learning_rate": 1.83515439849217e-05, "loss": 0.5442, "step": 17564 }, { "epoch": 0.3725265635935611, "grad_norm": 0.3627586364746094, "learning_rate": 1.835136055265557e-05, "loss": 0.5758, "step": 17565 }, { "epoch": 0.37254777205149414, "grad_norm": 0.5161707401275635, "learning_rate": 1.835117711110113e-05, "loss": 0.5782, "step": 17566 }, { "epoch": 0.37256898050942716, "grad_norm": 0.3210003077983856, "learning_rate": 1.8350993660258583e-05, "loss": 0.486, "step": 17567 }, { "epoch": 0.3725901889673602, "grad_norm": 0.3631444573402405, "learning_rate": 1.8350810200128133e-05, "loss": 0.5154, "step": 17568 }, { "epoch": 0.3726113974252932, "grad_norm": 0.373085081577301, "learning_rate": 1.8350626730709985e-05, "loss": 0.5597, "step": 17569 }, { "epoch": 0.3726326058832262, "grad_norm": 0.35911253094673157, "learning_rate": 1.835044325200434e-05, "loss": 0.5372, "step": 17570 }, { "epoch": 0.37265381434115924, "grad_norm": 0.3512784540653229, "learning_rate": 1.835025976401141e-05, "loss": 0.4497, "step": 17571 }, { "epoch": 0.37267502279909226, "grad_norm": 0.3475765883922577, "learning_rate": 1.835007626673139e-05, "loss": 0.4805, "step": 17572 }, { "epoch": 0.3726962312570253, "grad_norm": 0.35525602102279663, "learning_rate": 1.8349892760164487e-05, "loss": 0.5643, "step": 17573 }, { "epoch": 0.3727174397149583, "grad_norm": 0.28748148679733276, "learning_rate": 1.8349709244310905e-05, "loss": 0.493, "step": 17574 }, { "epoch": 0.3727386481728914, "grad_norm": 0.3367275297641754, "learning_rate": 1.834952571917085e-05, "loss": 0.5122, "step": 17575 }, { "epoch": 0.3727598566308244, "grad_norm": 0.3458222448825836, "learning_rate": 1.834934218474453e-05, "loss": 0.502, "step": 17576 }, { "epoch": 0.3727810650887574, "grad_norm": 0.3519841134548187, "learning_rate": 1.8349158641032138e-05, "loss": 0.5705, "step": 17577 }, { "epoch": 0.37280227354669043, "grad_norm": 0.348525732755661, "learning_rate": 1.834897508803388e-05, "loss": 0.5073, "step": 17578 }, { "epoch": 0.37282348200462345, "grad_norm": 0.36235857009887695, "learning_rate": 1.8348791525749974e-05, "loss": 0.5585, "step": 17579 }, { "epoch": 0.37284469046255647, "grad_norm": 0.41092440485954285, "learning_rate": 1.8348607954180614e-05, "loss": 0.5691, "step": 17580 }, { "epoch": 0.3728658989204895, "grad_norm": 0.33481523394584656, "learning_rate": 1.8348424373325998e-05, "loss": 0.4745, "step": 17581 }, { "epoch": 0.3728871073784225, "grad_norm": 0.33181098103523254, "learning_rate": 1.8348240783186342e-05, "loss": 0.4692, "step": 17582 }, { "epoch": 0.37290831583635553, "grad_norm": 0.2995005249977112, "learning_rate": 1.8348057183761843e-05, "loss": 0.4714, "step": 17583 }, { "epoch": 0.37292952429428855, "grad_norm": 0.3474603593349457, "learning_rate": 1.8347873575052708e-05, "loss": 0.4783, "step": 17584 }, { "epoch": 0.37295073275222157, "grad_norm": 0.3996601104736328, "learning_rate": 1.8347689957059143e-05, "loss": 0.5285, "step": 17585 }, { "epoch": 0.3729719412101546, "grad_norm": 0.4926852285861969, "learning_rate": 1.8347506329781348e-05, "loss": 0.6088, "step": 17586 }, { "epoch": 0.3729931496680876, "grad_norm": 0.38106563687324524, "learning_rate": 1.8347322693219526e-05, "loss": 0.5458, "step": 17587 }, { "epoch": 0.3730143581260207, "grad_norm": 0.3961885869503021, "learning_rate": 1.8347139047373885e-05, "loss": 0.478, "step": 17588 }, { "epoch": 0.3730355665839537, "grad_norm": 0.3768078684806824, "learning_rate": 1.8346955392244633e-05, "loss": 0.5188, "step": 17589 }, { "epoch": 0.3730567750418867, "grad_norm": 0.35464924573898315, "learning_rate": 1.8346771727831965e-05, "loss": 0.5505, "step": 17590 }, { "epoch": 0.37307798349981974, "grad_norm": 0.38987961411476135, "learning_rate": 1.8346588054136094e-05, "loss": 0.4747, "step": 17591 }, { "epoch": 0.37309919195775276, "grad_norm": 0.3143449127674103, "learning_rate": 1.8346404371157215e-05, "loss": 0.6417, "step": 17592 }, { "epoch": 0.3731204004156858, "grad_norm": 3.604602575302124, "learning_rate": 1.834622067889554e-05, "loss": 0.4784, "step": 17593 }, { "epoch": 0.3731416088736188, "grad_norm": 0.3385852575302124, "learning_rate": 1.8346036977351272e-05, "loss": 0.5062, "step": 17594 }, { "epoch": 0.3731628173315518, "grad_norm": 0.44354110956192017, "learning_rate": 1.8345853266524614e-05, "loss": 0.4652, "step": 17595 }, { "epoch": 0.37318402578948484, "grad_norm": 0.3647176921367645, "learning_rate": 1.8345669546415772e-05, "loss": 0.4596, "step": 17596 }, { "epoch": 0.37320523424741786, "grad_norm": 0.37135034799575806, "learning_rate": 1.8345485817024946e-05, "loss": 0.4492, "step": 17597 }, { "epoch": 0.3732264427053509, "grad_norm": 0.366885781288147, "learning_rate": 1.8345302078352344e-05, "loss": 0.5514, "step": 17598 }, { "epoch": 0.3732476511632839, "grad_norm": 0.349029541015625, "learning_rate": 1.8345118330398168e-05, "loss": 0.4767, "step": 17599 }, { "epoch": 0.3732688596212169, "grad_norm": 0.3222316801548004, "learning_rate": 1.8344934573162626e-05, "loss": 0.5067, "step": 17600 }, { "epoch": 0.37329006807914994, "grad_norm": 0.4052874445915222, "learning_rate": 1.8344750806645923e-05, "loss": 0.4785, "step": 17601 }, { "epoch": 0.373311276537083, "grad_norm": 0.36438074707984924, "learning_rate": 1.8344567030848256e-05, "loss": 0.5059, "step": 17602 }, { "epoch": 0.37333248499501603, "grad_norm": 0.3510596454143524, "learning_rate": 1.8344383245769835e-05, "loss": 0.5258, "step": 17603 }, { "epoch": 0.37335369345294905, "grad_norm": 0.31298086047172546, "learning_rate": 1.8344199451410865e-05, "loss": 0.5287, "step": 17604 }, { "epoch": 0.3733749019108821, "grad_norm": 0.38775041699409485, "learning_rate": 1.8344015647771547e-05, "loss": 0.4964, "step": 17605 }, { "epoch": 0.3733961103688151, "grad_norm": 0.37633800506591797, "learning_rate": 1.834383183485209e-05, "loss": 0.4453, "step": 17606 }, { "epoch": 0.3734173188267481, "grad_norm": 0.324953556060791, "learning_rate": 1.8343648012652693e-05, "loss": 0.4916, "step": 17607 }, { "epoch": 0.37343852728468113, "grad_norm": 0.3154888153076172, "learning_rate": 1.8343464181173564e-05, "loss": 0.4586, "step": 17608 }, { "epoch": 0.37345973574261415, "grad_norm": 0.3236675560474396, "learning_rate": 1.8343280340414904e-05, "loss": 0.49, "step": 17609 }, { "epoch": 0.37348094420054717, "grad_norm": 0.3317698538303375, "learning_rate": 1.8343096490376924e-05, "loss": 0.5577, "step": 17610 }, { "epoch": 0.3735021526584802, "grad_norm": 0.3868717551231384, "learning_rate": 1.8342912631059823e-05, "loss": 0.5053, "step": 17611 }, { "epoch": 0.3735233611164132, "grad_norm": 0.3851463794708252, "learning_rate": 1.8342728762463805e-05, "loss": 0.4774, "step": 17612 }, { "epoch": 0.37354456957434623, "grad_norm": 0.3535124659538269, "learning_rate": 1.834254488458908e-05, "loss": 0.4828, "step": 17613 }, { "epoch": 0.37356577803227925, "grad_norm": 0.3613998293876648, "learning_rate": 1.8342360997435847e-05, "loss": 0.5521, "step": 17614 }, { "epoch": 0.37358698649021227, "grad_norm": 0.3185026943683624, "learning_rate": 1.8342177101004314e-05, "loss": 0.4492, "step": 17615 }, { "epoch": 0.37360819494814534, "grad_norm": 0.3290743827819824, "learning_rate": 1.8341993195294683e-05, "loss": 0.5226, "step": 17616 }, { "epoch": 0.37362940340607836, "grad_norm": 0.33228302001953125, "learning_rate": 1.8341809280307162e-05, "loss": 0.4816, "step": 17617 }, { "epoch": 0.3736506118640114, "grad_norm": 0.3399420380592346, "learning_rate": 1.834162535604195e-05, "loss": 0.4804, "step": 17618 }, { "epoch": 0.3736718203219444, "grad_norm": 0.35655683279037476, "learning_rate": 1.8341441422499254e-05, "loss": 0.5311, "step": 17619 }, { "epoch": 0.3736930287798774, "grad_norm": 0.34888625144958496, "learning_rate": 1.8341257479679284e-05, "loss": 0.4777, "step": 17620 }, { "epoch": 0.37371423723781044, "grad_norm": 0.3490223288536072, "learning_rate": 1.8341073527582236e-05, "loss": 0.5397, "step": 17621 }, { "epoch": 0.37373544569574346, "grad_norm": 0.3652636408805847, "learning_rate": 1.834088956620832e-05, "loss": 0.5007, "step": 17622 }, { "epoch": 0.3737566541536765, "grad_norm": 1.0096657276153564, "learning_rate": 1.834070559555774e-05, "loss": 0.5482, "step": 17623 }, { "epoch": 0.3737778626116095, "grad_norm": 0.3899471163749695, "learning_rate": 1.83405216156307e-05, "loss": 0.4852, "step": 17624 }, { "epoch": 0.3737990710695425, "grad_norm": 0.38894349336624146, "learning_rate": 1.83403376264274e-05, "loss": 0.5447, "step": 17625 }, { "epoch": 0.37382027952747554, "grad_norm": 0.3771725296974182, "learning_rate": 1.8340153627948054e-05, "loss": 0.53, "step": 17626 }, { "epoch": 0.37384148798540856, "grad_norm": 0.6027477383613586, "learning_rate": 1.8339969620192858e-05, "loss": 0.5263, "step": 17627 }, { "epoch": 0.3738626964433416, "grad_norm": 0.3239516615867615, "learning_rate": 1.833978560316202e-05, "loss": 0.5176, "step": 17628 }, { "epoch": 0.37388390490127466, "grad_norm": 0.34911954402923584, "learning_rate": 1.833960157685575e-05, "loss": 0.4426, "step": 17629 }, { "epoch": 0.3739051133592077, "grad_norm": 0.3324020504951477, "learning_rate": 1.8339417541274242e-05, "loss": 0.4917, "step": 17630 }, { "epoch": 0.3739263218171407, "grad_norm": 0.3722732365131378, "learning_rate": 1.833923349641771e-05, "loss": 0.5132, "step": 17631 }, { "epoch": 0.3739475302750737, "grad_norm": 0.32321780920028687, "learning_rate": 1.8339049442286354e-05, "loss": 0.4184, "step": 17632 }, { "epoch": 0.37396873873300673, "grad_norm": 0.3420597314834595, "learning_rate": 1.8338865378880378e-05, "loss": 0.551, "step": 17633 }, { "epoch": 0.37398994719093975, "grad_norm": 0.34408506751060486, "learning_rate": 1.8338681306199987e-05, "loss": 0.486, "step": 17634 }, { "epoch": 0.3740111556488728, "grad_norm": 0.33115914463996887, "learning_rate": 1.8338497224245392e-05, "loss": 0.439, "step": 17635 }, { "epoch": 0.3740323641068058, "grad_norm": 0.33064648509025574, "learning_rate": 1.833831313301679e-05, "loss": 0.4884, "step": 17636 }, { "epoch": 0.3740535725647388, "grad_norm": 0.4191490411758423, "learning_rate": 1.8338129032514385e-05, "loss": 0.5035, "step": 17637 }, { "epoch": 0.37407478102267183, "grad_norm": 0.36948323249816895, "learning_rate": 1.8337944922738394e-05, "loss": 0.5552, "step": 17638 }, { "epoch": 0.37409598948060485, "grad_norm": 0.33808526396751404, "learning_rate": 1.8337760803689007e-05, "loss": 0.491, "step": 17639 }, { "epoch": 0.37411719793853787, "grad_norm": 0.3193668723106384, "learning_rate": 1.8337576675366436e-05, "loss": 0.5064, "step": 17640 }, { "epoch": 0.3741384063964709, "grad_norm": 0.33250436186790466, "learning_rate": 1.8337392537770884e-05, "loss": 0.5613, "step": 17641 }, { "epoch": 0.3741596148544039, "grad_norm": 0.40647366642951965, "learning_rate": 1.8337208390902557e-05, "loss": 0.4768, "step": 17642 }, { "epoch": 0.374180823312337, "grad_norm": 0.3865910470485687, "learning_rate": 1.833702423476166e-05, "loss": 0.5367, "step": 17643 }, { "epoch": 0.37420203177027, "grad_norm": 0.34810367226600647, "learning_rate": 1.8336840069348396e-05, "loss": 0.5923, "step": 17644 }, { "epoch": 0.374223240228203, "grad_norm": 0.3666481077671051, "learning_rate": 1.8336655894662972e-05, "loss": 0.5593, "step": 17645 }, { "epoch": 0.37424444868613604, "grad_norm": 0.3381509482860565, "learning_rate": 1.8336471710705593e-05, "loss": 0.4841, "step": 17646 }, { "epoch": 0.37426565714406906, "grad_norm": 0.3390134572982788, "learning_rate": 1.8336287517476456e-05, "loss": 0.5289, "step": 17647 }, { "epoch": 0.3742868656020021, "grad_norm": 0.416198194026947, "learning_rate": 1.8336103314975778e-05, "loss": 0.508, "step": 17648 }, { "epoch": 0.3743080740599351, "grad_norm": 0.34305667877197266, "learning_rate": 1.833591910320376e-05, "loss": 0.4582, "step": 17649 }, { "epoch": 0.3743292825178681, "grad_norm": 0.3076455891132355, "learning_rate": 1.83357348821606e-05, "loss": 0.4871, "step": 17650 }, { "epoch": 0.37435049097580114, "grad_norm": 0.34604012966156006, "learning_rate": 1.833555065184651e-05, "loss": 0.6138, "step": 17651 }, { "epoch": 0.37437169943373416, "grad_norm": 0.3299308121204376, "learning_rate": 1.8335366412261697e-05, "loss": 0.5553, "step": 17652 }, { "epoch": 0.3743929078916672, "grad_norm": 0.33527255058288574, "learning_rate": 1.8335182163406358e-05, "loss": 0.4266, "step": 17653 }, { "epoch": 0.3744141163496002, "grad_norm": 0.4423547685146332, "learning_rate": 1.83349979052807e-05, "loss": 0.5709, "step": 17654 }, { "epoch": 0.3744353248075332, "grad_norm": 0.32314664125442505, "learning_rate": 1.8334813637884936e-05, "loss": 0.4822, "step": 17655 }, { "epoch": 0.37445653326546624, "grad_norm": 0.34577828645706177, "learning_rate": 1.833462936121926e-05, "loss": 0.5797, "step": 17656 }, { "epoch": 0.3744777417233993, "grad_norm": 0.33126911520957947, "learning_rate": 1.833444507528388e-05, "loss": 0.4964, "step": 17657 }, { "epoch": 0.37449895018133234, "grad_norm": 0.31677109003067017, "learning_rate": 1.8334260780079007e-05, "loss": 0.516, "step": 17658 }, { "epoch": 0.37452015863926535, "grad_norm": 0.3387901186943054, "learning_rate": 1.8334076475604838e-05, "loss": 0.5673, "step": 17659 }, { "epoch": 0.3745413670971984, "grad_norm": 0.34075507521629333, "learning_rate": 1.8333892161861585e-05, "loss": 0.4994, "step": 17660 }, { "epoch": 0.3745625755551314, "grad_norm": 0.3550882339477539, "learning_rate": 1.8333707838849446e-05, "loss": 0.4894, "step": 17661 }, { "epoch": 0.3745837840130644, "grad_norm": 0.31381091475486755, "learning_rate": 1.8333523506568633e-05, "loss": 0.4312, "step": 17662 }, { "epoch": 0.37460499247099743, "grad_norm": 0.312558650970459, "learning_rate": 1.8333339165019344e-05, "loss": 0.4937, "step": 17663 }, { "epoch": 0.37462620092893045, "grad_norm": 0.3458363711833954, "learning_rate": 1.833315481420179e-05, "loss": 0.4555, "step": 17664 }, { "epoch": 0.3746474093868635, "grad_norm": 0.348529577255249, "learning_rate": 1.8332970454116173e-05, "loss": 0.529, "step": 17665 }, { "epoch": 0.3746686178447965, "grad_norm": 0.3555266857147217, "learning_rate": 1.83327860847627e-05, "loss": 0.5172, "step": 17666 }, { "epoch": 0.3746898263027295, "grad_norm": 0.3301083743572235, "learning_rate": 1.8332601706141574e-05, "loss": 0.5265, "step": 17667 }, { "epoch": 0.37471103476066253, "grad_norm": 0.3685709238052368, "learning_rate": 1.8332417318252996e-05, "loss": 0.5062, "step": 17668 }, { "epoch": 0.37473224321859555, "grad_norm": 0.34191688895225525, "learning_rate": 1.833223292109718e-05, "loss": 0.5025, "step": 17669 }, { "epoch": 0.3747534516765286, "grad_norm": 0.34913018345832825, "learning_rate": 1.833204851467433e-05, "loss": 0.5219, "step": 17670 }, { "epoch": 0.37477466013446165, "grad_norm": 0.42400041222572327, "learning_rate": 1.833186409898464e-05, "loss": 0.5165, "step": 17671 }, { "epoch": 0.37479586859239467, "grad_norm": 0.34008538722991943, "learning_rate": 1.833167967402833e-05, "loss": 0.5466, "step": 17672 }, { "epoch": 0.3748170770503277, "grad_norm": 0.40044090151786804, "learning_rate": 1.8331495239805597e-05, "loss": 0.5156, "step": 17673 }, { "epoch": 0.3748382855082607, "grad_norm": 0.44492554664611816, "learning_rate": 1.833131079631665e-05, "loss": 0.5534, "step": 17674 }, { "epoch": 0.3748594939661937, "grad_norm": 0.32703444361686707, "learning_rate": 1.8331126343561685e-05, "loss": 0.5191, "step": 17675 }, { "epoch": 0.37488070242412674, "grad_norm": 0.3428128659725189, "learning_rate": 1.8330941881540917e-05, "loss": 0.4013, "step": 17676 }, { "epoch": 0.37490191088205976, "grad_norm": 0.3308044970035553, "learning_rate": 1.8330757410254546e-05, "loss": 0.4501, "step": 17677 }, { "epoch": 0.3749231193399928, "grad_norm": 0.370530366897583, "learning_rate": 1.833057292970278e-05, "loss": 0.546, "step": 17678 }, { "epoch": 0.3749443277979258, "grad_norm": 0.36070266366004944, "learning_rate": 1.8330388439885826e-05, "loss": 0.5683, "step": 17679 }, { "epoch": 0.3749655362558588, "grad_norm": 0.33042821288108826, "learning_rate": 1.8330203940803883e-05, "loss": 0.5915, "step": 17680 }, { "epoch": 0.37498674471379184, "grad_norm": 0.34421947598457336, "learning_rate": 1.833001943245716e-05, "loss": 0.5785, "step": 17681 }, { "epoch": 0.37500795317172486, "grad_norm": 0.34992367029190063, "learning_rate": 1.8329834914845865e-05, "loss": 0.5026, "step": 17682 }, { "epoch": 0.3750291616296579, "grad_norm": 0.48444679379463196, "learning_rate": 1.8329650387970196e-05, "loss": 0.519, "step": 17683 }, { "epoch": 0.37505037008759096, "grad_norm": 0.3305553197860718, "learning_rate": 1.8329465851830365e-05, "loss": 0.4616, "step": 17684 }, { "epoch": 0.375071578545524, "grad_norm": 0.33716586232185364, "learning_rate": 1.832928130642657e-05, "loss": 0.4593, "step": 17685 }, { "epoch": 0.375092787003457, "grad_norm": 0.3664289712905884, "learning_rate": 1.8329096751759023e-05, "loss": 0.4973, "step": 17686 }, { "epoch": 0.37511399546139, "grad_norm": 0.3491307199001312, "learning_rate": 1.832891218782793e-05, "loss": 0.5805, "step": 17687 }, { "epoch": 0.37513520391932303, "grad_norm": 0.3438473343849182, "learning_rate": 1.832872761463349e-05, "loss": 0.4791, "step": 17688 }, { "epoch": 0.37515641237725605, "grad_norm": 0.3286130428314209, "learning_rate": 1.8328543032175915e-05, "loss": 0.4744, "step": 17689 }, { "epoch": 0.3751776208351891, "grad_norm": 0.34214484691619873, "learning_rate": 1.8328358440455402e-05, "loss": 0.5777, "step": 17690 }, { "epoch": 0.3751988292931221, "grad_norm": 0.33932894468307495, "learning_rate": 1.8328173839472163e-05, "loss": 0.5013, "step": 17691 }, { "epoch": 0.3752200377510551, "grad_norm": 0.3496883511543274, "learning_rate": 1.8327989229226402e-05, "loss": 0.5068, "step": 17692 }, { "epoch": 0.37524124620898813, "grad_norm": 0.33328524231910706, "learning_rate": 1.8327804609718322e-05, "loss": 0.5167, "step": 17693 }, { "epoch": 0.37526245466692115, "grad_norm": 0.3240004777908325, "learning_rate": 1.8327619980948134e-05, "loss": 0.4614, "step": 17694 }, { "epoch": 0.37528366312485417, "grad_norm": 0.39023587107658386, "learning_rate": 1.8327435342916033e-05, "loss": 0.4907, "step": 17695 }, { "epoch": 0.3753048715827872, "grad_norm": 0.4552099406719208, "learning_rate": 1.8327250695622237e-05, "loss": 0.4972, "step": 17696 }, { "epoch": 0.37532608004072027, "grad_norm": 0.3897116780281067, "learning_rate": 1.832706603906694e-05, "loss": 0.4915, "step": 17697 }, { "epoch": 0.3753472884986533, "grad_norm": 0.33339792490005493, "learning_rate": 1.8326881373250357e-05, "loss": 0.5639, "step": 17698 }, { "epoch": 0.3753684969565863, "grad_norm": 0.38706108927726746, "learning_rate": 1.8326696698172685e-05, "loss": 0.4271, "step": 17699 }, { "epoch": 0.3753897054145193, "grad_norm": 0.3605453372001648, "learning_rate": 1.832651201383414e-05, "loss": 0.498, "step": 17700 }, { "epoch": 0.37541091387245235, "grad_norm": 0.39728108048439026, "learning_rate": 1.8326327320234912e-05, "loss": 0.4984, "step": 17701 }, { "epoch": 0.37543212233038536, "grad_norm": 0.32347533106803894, "learning_rate": 1.832614261737522e-05, "loss": 0.44, "step": 17702 }, { "epoch": 0.3754533307883184, "grad_norm": 0.36373648047447205, "learning_rate": 1.8325957905255264e-05, "loss": 0.4222, "step": 17703 }, { "epoch": 0.3754745392462514, "grad_norm": 0.32180288434028625, "learning_rate": 1.832577318387525e-05, "loss": 0.5103, "step": 17704 }, { "epoch": 0.3754957477041844, "grad_norm": 0.35310593247413635, "learning_rate": 1.832558845323538e-05, "loss": 0.4912, "step": 17705 }, { "epoch": 0.37551695616211744, "grad_norm": 0.3778524398803711, "learning_rate": 1.8325403713335868e-05, "loss": 0.5464, "step": 17706 }, { "epoch": 0.37553816462005046, "grad_norm": 0.3132433593273163, "learning_rate": 1.832521896417691e-05, "loss": 0.4783, "step": 17707 }, { "epoch": 0.3755593730779835, "grad_norm": 0.3459944427013397, "learning_rate": 1.832503420575872e-05, "loss": 0.4761, "step": 17708 }, { "epoch": 0.3755805815359165, "grad_norm": 0.3774588108062744, "learning_rate": 1.8324849438081495e-05, "loss": 0.4807, "step": 17709 }, { "epoch": 0.3756017899938495, "grad_norm": 0.3447909653186798, "learning_rate": 1.832466466114545e-05, "loss": 0.4497, "step": 17710 }, { "epoch": 0.3756229984517826, "grad_norm": 0.38305968046188354, "learning_rate": 1.832447987495078e-05, "loss": 0.4947, "step": 17711 }, { "epoch": 0.3756442069097156, "grad_norm": 0.3462856113910675, "learning_rate": 1.8324295079497698e-05, "loss": 0.5432, "step": 17712 }, { "epoch": 0.37566541536764864, "grad_norm": 0.3383631110191345, "learning_rate": 1.832411027478641e-05, "loss": 0.4739, "step": 17713 }, { "epoch": 0.37568662382558166, "grad_norm": 0.40510204434394836, "learning_rate": 1.8323925460817115e-05, "loss": 0.5454, "step": 17714 }, { "epoch": 0.3757078322835147, "grad_norm": 0.3461751937866211, "learning_rate": 1.8323740637590022e-05, "loss": 0.5137, "step": 17715 }, { "epoch": 0.3757290407414477, "grad_norm": 0.32859230041503906, "learning_rate": 1.8323555805105337e-05, "loss": 0.4802, "step": 17716 }, { "epoch": 0.3757502491993807, "grad_norm": 0.31612688302993774, "learning_rate": 1.832337096336327e-05, "loss": 0.4841, "step": 17717 }, { "epoch": 0.37577145765731373, "grad_norm": 0.31603389978408813, "learning_rate": 1.832318611236402e-05, "loss": 0.5107, "step": 17718 }, { "epoch": 0.37579266611524675, "grad_norm": 0.3747057616710663, "learning_rate": 1.8323001252107793e-05, "loss": 0.5296, "step": 17719 }, { "epoch": 0.3758138745731798, "grad_norm": 0.32591500878334045, "learning_rate": 1.8322816382594798e-05, "loss": 0.5023, "step": 17720 }, { "epoch": 0.3758350830311128, "grad_norm": 0.3217630088329315, "learning_rate": 1.8322631503825237e-05, "loss": 0.458, "step": 17721 }, { "epoch": 0.3758562914890458, "grad_norm": 0.3481956720352173, "learning_rate": 1.8322446615799317e-05, "loss": 0.469, "step": 17722 }, { "epoch": 0.37587749994697883, "grad_norm": 0.3522976040840149, "learning_rate": 1.832226171851725e-05, "loss": 0.5871, "step": 17723 }, { "epoch": 0.37589870840491185, "grad_norm": 0.4526412785053253, "learning_rate": 1.832207681197923e-05, "loss": 0.4926, "step": 17724 }, { "epoch": 0.3759199168628449, "grad_norm": 0.35960403084754944, "learning_rate": 1.832189189618547e-05, "loss": 0.4808, "step": 17725 }, { "epoch": 0.37594112532077795, "grad_norm": 0.35518044233322144, "learning_rate": 1.8321706971136174e-05, "loss": 0.598, "step": 17726 }, { "epoch": 0.37596233377871097, "grad_norm": 0.37197092175483704, "learning_rate": 1.8321522036831548e-05, "loss": 0.5424, "step": 17727 }, { "epoch": 0.375983542236644, "grad_norm": 0.3793211579322815, "learning_rate": 1.8321337093271797e-05, "loss": 0.5596, "step": 17728 }, { "epoch": 0.376004750694577, "grad_norm": 0.35186567902565, "learning_rate": 1.8321152140457126e-05, "loss": 0.5131, "step": 17729 }, { "epoch": 0.37602595915251, "grad_norm": 0.32568269968032837, "learning_rate": 1.8320967178387747e-05, "loss": 0.5053, "step": 17730 }, { "epoch": 0.37604716761044304, "grad_norm": 0.3380884528160095, "learning_rate": 1.8320782207063855e-05, "loss": 0.5698, "step": 17731 }, { "epoch": 0.37606837606837606, "grad_norm": 0.34455886483192444, "learning_rate": 1.8320597226485664e-05, "loss": 0.5307, "step": 17732 }, { "epoch": 0.3760895845263091, "grad_norm": 0.3703775405883789, "learning_rate": 1.8320412236653377e-05, "loss": 0.532, "step": 17733 }, { "epoch": 0.3761107929842421, "grad_norm": 0.3818678855895996, "learning_rate": 1.8320227237567197e-05, "loss": 0.48, "step": 17734 }, { "epoch": 0.3761320014421751, "grad_norm": 0.38258057832717896, "learning_rate": 1.8320042229227338e-05, "loss": 0.554, "step": 17735 }, { "epoch": 0.37615320990010814, "grad_norm": 0.3889527916908264, "learning_rate": 1.8319857211633997e-05, "loss": 0.4452, "step": 17736 }, { "epoch": 0.37617441835804116, "grad_norm": 0.3076905310153961, "learning_rate": 1.831967218478738e-05, "loss": 0.4099, "step": 17737 }, { "epoch": 0.37619562681597424, "grad_norm": 0.35707715153694153, "learning_rate": 1.83194871486877e-05, "loss": 0.4841, "step": 17738 }, { "epoch": 0.37621683527390726, "grad_norm": 0.3393799364566803, "learning_rate": 1.8319302103335158e-05, "loss": 0.5309, "step": 17739 }, { "epoch": 0.3762380437318403, "grad_norm": 0.3163858950138092, "learning_rate": 1.831911704872996e-05, "loss": 0.3864, "step": 17740 }, { "epoch": 0.3762592521897733, "grad_norm": 0.3338569402694702, "learning_rate": 1.8318931984872313e-05, "loss": 0.5284, "step": 17741 }, { "epoch": 0.3762804606477063, "grad_norm": 0.5971426963806152, "learning_rate": 1.8318746911762423e-05, "loss": 0.4844, "step": 17742 }, { "epoch": 0.37630166910563934, "grad_norm": 0.8150973916053772, "learning_rate": 1.831856182940049e-05, "loss": 0.4731, "step": 17743 }, { "epoch": 0.37632287756357236, "grad_norm": 0.35541480779647827, "learning_rate": 1.8318376737786732e-05, "loss": 0.5039, "step": 17744 }, { "epoch": 0.3763440860215054, "grad_norm": 0.3447939157485962, "learning_rate": 1.831819163692134e-05, "loss": 0.5404, "step": 17745 }, { "epoch": 0.3763652944794384, "grad_norm": 0.30551064014434814, "learning_rate": 1.8318006526804535e-05, "loss": 0.4859, "step": 17746 }, { "epoch": 0.3763865029373714, "grad_norm": 0.393708199262619, "learning_rate": 1.8317821407436512e-05, "loss": 0.5451, "step": 17747 }, { "epoch": 0.37640771139530443, "grad_norm": 0.4087156057357788, "learning_rate": 1.831763627881748e-05, "loss": 0.5019, "step": 17748 }, { "epoch": 0.37642891985323745, "grad_norm": 0.3269128203392029, "learning_rate": 1.8317451140947646e-05, "loss": 0.4486, "step": 17749 }, { "epoch": 0.3764501283111705, "grad_norm": 0.39858531951904297, "learning_rate": 1.8317265993827212e-05, "loss": 0.4761, "step": 17750 }, { "epoch": 0.3764713367691035, "grad_norm": 0.37626469135284424, "learning_rate": 1.8317080837456393e-05, "loss": 0.5045, "step": 17751 }, { "epoch": 0.37649254522703657, "grad_norm": 0.46413570642471313, "learning_rate": 1.8316895671835384e-05, "loss": 0.544, "step": 17752 }, { "epoch": 0.3765137536849696, "grad_norm": 0.38139763474464417, "learning_rate": 1.8316710496964396e-05, "loss": 0.5413, "step": 17753 }, { "epoch": 0.3765349621429026, "grad_norm": 0.3295089304447174, "learning_rate": 1.8316525312843638e-05, "loss": 0.4387, "step": 17754 }, { "epoch": 0.3765561706008356, "grad_norm": 0.3616689443588257, "learning_rate": 1.831634011947331e-05, "loss": 0.5507, "step": 17755 }, { "epoch": 0.37657737905876865, "grad_norm": 0.3337409198284149, "learning_rate": 1.8316154916853622e-05, "loss": 0.475, "step": 17756 }, { "epoch": 0.37659858751670167, "grad_norm": 0.3749192953109741, "learning_rate": 1.831596970498478e-05, "loss": 0.5347, "step": 17757 }, { "epoch": 0.3766197959746347, "grad_norm": 0.3299856185913086, "learning_rate": 1.8315784483866986e-05, "loss": 0.5308, "step": 17758 }, { "epoch": 0.3766410044325677, "grad_norm": 0.32368242740631104, "learning_rate": 1.8315599253500453e-05, "loss": 0.4272, "step": 17759 }, { "epoch": 0.3766622128905007, "grad_norm": 0.3712693750858307, "learning_rate": 1.8315414013885376e-05, "loss": 0.4267, "step": 17760 }, { "epoch": 0.37668342134843374, "grad_norm": 0.3551594913005829, "learning_rate": 1.8315228765021974e-05, "loss": 0.4936, "step": 17761 }, { "epoch": 0.37670462980636676, "grad_norm": 0.3493407070636749, "learning_rate": 1.8315043506910443e-05, "loss": 0.5292, "step": 17762 }, { "epoch": 0.3767258382642998, "grad_norm": 0.3170371949672699, "learning_rate": 1.8314858239550992e-05, "loss": 0.4661, "step": 17763 }, { "epoch": 0.3767470467222328, "grad_norm": 0.3416420817375183, "learning_rate": 1.8314672962943832e-05, "loss": 0.5027, "step": 17764 }, { "epoch": 0.3767682551801658, "grad_norm": 0.5686168670654297, "learning_rate": 1.8314487677089163e-05, "loss": 0.4987, "step": 17765 }, { "epoch": 0.3767894636380989, "grad_norm": 0.38412246108055115, "learning_rate": 1.831430238198719e-05, "loss": 0.5582, "step": 17766 }, { "epoch": 0.3768106720960319, "grad_norm": 0.31268593668937683, "learning_rate": 1.8314117077638126e-05, "loss": 0.4953, "step": 17767 }, { "epoch": 0.37683188055396494, "grad_norm": 0.3566116392612457, "learning_rate": 1.8313931764042173e-05, "loss": 0.474, "step": 17768 }, { "epoch": 0.37685308901189796, "grad_norm": 0.3582211136817932, "learning_rate": 1.831374644119954e-05, "loss": 0.4844, "step": 17769 }, { "epoch": 0.376874297469831, "grad_norm": 0.33859390020370483, "learning_rate": 1.8313561109110424e-05, "loss": 0.5573, "step": 17770 }, { "epoch": 0.376895505927764, "grad_norm": 0.35122665762901306, "learning_rate": 1.8313375767775042e-05, "loss": 0.566, "step": 17771 }, { "epoch": 0.376916714385697, "grad_norm": 0.3617309331893921, "learning_rate": 1.8313190417193595e-05, "loss": 0.5313, "step": 17772 }, { "epoch": 0.37693792284363004, "grad_norm": 0.35246095061302185, "learning_rate": 1.8313005057366288e-05, "loss": 0.4752, "step": 17773 }, { "epoch": 0.37695913130156306, "grad_norm": 0.35788366198539734, "learning_rate": 1.8312819688293332e-05, "loss": 0.548, "step": 17774 }, { "epoch": 0.3769803397594961, "grad_norm": 0.3440198302268982, "learning_rate": 1.8312634309974927e-05, "loss": 0.4033, "step": 17775 }, { "epoch": 0.3770015482174291, "grad_norm": 0.31652501225471497, "learning_rate": 1.8312448922411282e-05, "loss": 0.5036, "step": 17776 }, { "epoch": 0.3770227566753621, "grad_norm": 0.3249896764755249, "learning_rate": 1.831226352560261e-05, "loss": 0.5692, "step": 17777 }, { "epoch": 0.37704396513329513, "grad_norm": 0.40678611397743225, "learning_rate": 1.8312078119549102e-05, "loss": 0.5837, "step": 17778 }, { "epoch": 0.3770651735912282, "grad_norm": 0.3422250747680664, "learning_rate": 1.8311892704250976e-05, "loss": 0.5061, "step": 17779 }, { "epoch": 0.37708638204916123, "grad_norm": 0.32597988843917847, "learning_rate": 1.8311707279708438e-05, "loss": 0.5253, "step": 17780 }, { "epoch": 0.37710759050709425, "grad_norm": 0.3752358555793762, "learning_rate": 1.8311521845921687e-05, "loss": 0.5075, "step": 17781 }, { "epoch": 0.37712879896502727, "grad_norm": 0.36166033148765564, "learning_rate": 1.8311336402890937e-05, "loss": 0.4929, "step": 17782 }, { "epoch": 0.3771500074229603, "grad_norm": 0.35846593976020813, "learning_rate": 1.831115095061639e-05, "loss": 0.4844, "step": 17783 }, { "epoch": 0.3771712158808933, "grad_norm": 0.35941386222839355, "learning_rate": 1.8310965489098253e-05, "loss": 0.4606, "step": 17784 }, { "epoch": 0.3771924243388263, "grad_norm": 0.32032671570777893, "learning_rate": 1.831078001833673e-05, "loss": 0.4244, "step": 17785 }, { "epoch": 0.37721363279675935, "grad_norm": 0.4059169292449951, "learning_rate": 1.8310594538332034e-05, "loss": 0.5361, "step": 17786 }, { "epoch": 0.37723484125469237, "grad_norm": 0.3668544292449951, "learning_rate": 1.8310409049084365e-05, "loss": 0.4809, "step": 17787 }, { "epoch": 0.3772560497126254, "grad_norm": 0.3919675946235657, "learning_rate": 1.8310223550593932e-05, "loss": 0.5011, "step": 17788 }, { "epoch": 0.3772772581705584, "grad_norm": 0.32953327894210815, "learning_rate": 1.8310038042860938e-05, "loss": 0.566, "step": 17789 }, { "epoch": 0.3772984666284914, "grad_norm": 0.3205588459968567, "learning_rate": 1.8309852525885597e-05, "loss": 0.5784, "step": 17790 }, { "epoch": 0.37731967508642444, "grad_norm": 0.3882894515991211, "learning_rate": 1.8309666999668107e-05, "loss": 0.5416, "step": 17791 }, { "epoch": 0.37734088354435746, "grad_norm": 0.35686546564102173, "learning_rate": 1.8309481464208674e-05, "loss": 0.4897, "step": 17792 }, { "epoch": 0.37736209200229054, "grad_norm": 0.3333837687969208, "learning_rate": 1.8309295919507513e-05, "loss": 0.4732, "step": 17793 }, { "epoch": 0.37738330046022356, "grad_norm": 0.3856288194656372, "learning_rate": 1.8309110365564825e-05, "loss": 0.5603, "step": 17794 }, { "epoch": 0.3774045089181566, "grad_norm": 0.3426809012889862, "learning_rate": 1.8308924802380815e-05, "loss": 0.5321, "step": 17795 }, { "epoch": 0.3774257173760896, "grad_norm": 0.3751905560493469, "learning_rate": 1.830873922995569e-05, "loss": 0.5042, "step": 17796 }, { "epoch": 0.3774469258340226, "grad_norm": 0.338218092918396, "learning_rate": 1.8308553648289657e-05, "loss": 0.5192, "step": 17797 }, { "epoch": 0.37746813429195564, "grad_norm": 0.429928719997406, "learning_rate": 1.8308368057382923e-05, "loss": 0.4799, "step": 17798 }, { "epoch": 0.37748934274988866, "grad_norm": 0.3446367383003235, "learning_rate": 1.83081824572357e-05, "loss": 0.5506, "step": 17799 }, { "epoch": 0.3775105512078217, "grad_norm": 0.3750862181186676, "learning_rate": 1.8307996847848183e-05, "loss": 0.524, "step": 17800 }, { "epoch": 0.3775317596657547, "grad_norm": 0.34367504715919495, "learning_rate": 1.8307811229220585e-05, "loss": 0.5273, "step": 17801 }, { "epoch": 0.3775529681236877, "grad_norm": 0.33716440200805664, "learning_rate": 1.830762560135311e-05, "loss": 0.5184, "step": 17802 }, { "epoch": 0.37757417658162074, "grad_norm": 0.4316205382347107, "learning_rate": 1.8307439964245966e-05, "loss": 0.4191, "step": 17803 }, { "epoch": 0.37759538503955375, "grad_norm": 0.38088762760162354, "learning_rate": 1.8307254317899364e-05, "loss": 0.5645, "step": 17804 }, { "epoch": 0.3776165934974868, "grad_norm": 0.32348644733428955, "learning_rate": 1.8307068662313502e-05, "loss": 0.4795, "step": 17805 }, { "epoch": 0.3776378019554198, "grad_norm": 0.3516777753829956, "learning_rate": 1.8306882997488594e-05, "loss": 0.5398, "step": 17806 }, { "epoch": 0.37765901041335287, "grad_norm": 0.3932117223739624, "learning_rate": 1.830669732342484e-05, "loss": 0.5026, "step": 17807 }, { "epoch": 0.3776802188712859, "grad_norm": 0.302007794380188, "learning_rate": 1.8306511640122448e-05, "loss": 0.4779, "step": 17808 }, { "epoch": 0.3777014273292189, "grad_norm": 0.34917739033699036, "learning_rate": 1.830632594758163e-05, "loss": 0.5391, "step": 17809 }, { "epoch": 0.37772263578715193, "grad_norm": 0.33153247833251953, "learning_rate": 1.8306140245802585e-05, "loss": 0.4579, "step": 17810 }, { "epoch": 0.37774384424508495, "grad_norm": 0.34174203872680664, "learning_rate": 1.8305954534785525e-05, "loss": 0.5639, "step": 17811 }, { "epoch": 0.37776505270301797, "grad_norm": 0.5041338205337524, "learning_rate": 1.8305768814530652e-05, "loss": 0.5535, "step": 17812 }, { "epoch": 0.377786261160951, "grad_norm": 0.34170475602149963, "learning_rate": 1.8305583085038178e-05, "loss": 0.4867, "step": 17813 }, { "epoch": 0.377807469618884, "grad_norm": 0.3483791649341583, "learning_rate": 1.8305397346308303e-05, "loss": 0.4243, "step": 17814 }, { "epoch": 0.377828678076817, "grad_norm": 0.32514700293540955, "learning_rate": 1.830521159834124e-05, "loss": 0.4838, "step": 17815 }, { "epoch": 0.37784988653475005, "grad_norm": 0.3518363833427429, "learning_rate": 1.8305025841137195e-05, "loss": 0.4883, "step": 17816 }, { "epoch": 0.37787109499268307, "grad_norm": 0.40812060236930847, "learning_rate": 1.8304840074696367e-05, "loss": 0.4243, "step": 17817 }, { "epoch": 0.3778923034506161, "grad_norm": 0.3183193802833557, "learning_rate": 1.8304654299018973e-05, "loss": 0.4021, "step": 17818 }, { "epoch": 0.3779135119085491, "grad_norm": 0.36104458570480347, "learning_rate": 1.830446851410521e-05, "loss": 0.5015, "step": 17819 }, { "epoch": 0.3779347203664822, "grad_norm": 0.4149104952812195, "learning_rate": 1.8304282719955292e-05, "loss": 0.5864, "step": 17820 }, { "epoch": 0.3779559288244152, "grad_norm": 0.3236980140209198, "learning_rate": 1.8304096916569425e-05, "loss": 0.5668, "step": 17821 }, { "epoch": 0.3779771372823482, "grad_norm": 0.3465633988380432, "learning_rate": 1.830391110394781e-05, "loss": 0.5119, "step": 17822 }, { "epoch": 0.37799834574028124, "grad_norm": 0.3271797001361847, "learning_rate": 1.830372528209066e-05, "loss": 0.5148, "step": 17823 }, { "epoch": 0.37801955419821426, "grad_norm": 0.4127650558948517, "learning_rate": 1.8303539450998178e-05, "loss": 0.546, "step": 17824 }, { "epoch": 0.3780407626561473, "grad_norm": 0.4619815945625305, "learning_rate": 1.830335361067057e-05, "loss": 0.6111, "step": 17825 }, { "epoch": 0.3780619711140803, "grad_norm": 0.42490699887275696, "learning_rate": 1.8303167761108046e-05, "loss": 0.463, "step": 17826 }, { "epoch": 0.3780831795720133, "grad_norm": 0.3385868966579437, "learning_rate": 1.8302981902310807e-05, "loss": 0.5583, "step": 17827 }, { "epoch": 0.37810438802994634, "grad_norm": 0.3671911954879761, "learning_rate": 1.830279603427907e-05, "loss": 0.4856, "step": 17828 }, { "epoch": 0.37812559648787936, "grad_norm": 0.35960277915000916, "learning_rate": 1.830261015701303e-05, "loss": 0.4312, "step": 17829 }, { "epoch": 0.3781468049458124, "grad_norm": 0.3463091552257538, "learning_rate": 1.8302424270512903e-05, "loss": 0.485, "step": 17830 }, { "epoch": 0.3781680134037454, "grad_norm": 0.3596455454826355, "learning_rate": 1.830223837477889e-05, "loss": 0.4909, "step": 17831 }, { "epoch": 0.3781892218616784, "grad_norm": 0.34976688027381897, "learning_rate": 1.83020524698112e-05, "loss": 0.4597, "step": 17832 }, { "epoch": 0.37821043031961143, "grad_norm": 0.35530370473861694, "learning_rate": 1.8301866555610037e-05, "loss": 0.53, "step": 17833 }, { "epoch": 0.3782316387775445, "grad_norm": 0.3876510262489319, "learning_rate": 1.8301680632175613e-05, "loss": 0.5259, "step": 17834 }, { "epoch": 0.37825284723547753, "grad_norm": 0.35602688789367676, "learning_rate": 1.830149469950813e-05, "loss": 0.4499, "step": 17835 }, { "epoch": 0.37827405569341055, "grad_norm": 0.325145959854126, "learning_rate": 1.83013087576078e-05, "loss": 0.4714, "step": 17836 }, { "epoch": 0.37829526415134357, "grad_norm": 0.37641578912734985, "learning_rate": 1.8301122806474825e-05, "loss": 0.5224, "step": 17837 }, { "epoch": 0.3783164726092766, "grad_norm": 0.3672066628932953, "learning_rate": 1.830093684610941e-05, "loss": 0.4943, "step": 17838 }, { "epoch": 0.3783376810672096, "grad_norm": 0.36599189043045044, "learning_rate": 1.830075087651177e-05, "loss": 0.5238, "step": 17839 }, { "epoch": 0.3783588895251426, "grad_norm": 0.31993964314460754, "learning_rate": 1.8300564897682106e-05, "loss": 0.5164, "step": 17840 }, { "epoch": 0.37838009798307565, "grad_norm": 0.40164509415626526, "learning_rate": 1.8300378909620623e-05, "loss": 0.529, "step": 17841 }, { "epoch": 0.37840130644100867, "grad_norm": 0.33418068289756775, "learning_rate": 1.8300192912327532e-05, "loss": 0.5377, "step": 17842 }, { "epoch": 0.3784225148989417, "grad_norm": 0.6458132863044739, "learning_rate": 1.8300006905803038e-05, "loss": 0.5303, "step": 17843 }, { "epoch": 0.3784437233568747, "grad_norm": 0.33216550946235657, "learning_rate": 1.829982089004735e-05, "loss": 0.5693, "step": 17844 }, { "epoch": 0.3784649318148077, "grad_norm": 0.6411236524581909, "learning_rate": 1.829963486506067e-05, "loss": 0.4462, "step": 17845 }, { "epoch": 0.37848614027274075, "grad_norm": 0.3270358443260193, "learning_rate": 1.829944883084321e-05, "loss": 0.4974, "step": 17846 }, { "epoch": 0.37850734873067376, "grad_norm": 0.33199211955070496, "learning_rate": 1.829926278739518e-05, "loss": 0.5865, "step": 17847 }, { "epoch": 0.37852855718860684, "grad_norm": 0.3709627091884613, "learning_rate": 1.8299076734716775e-05, "loss": 0.469, "step": 17848 }, { "epoch": 0.37854976564653986, "grad_norm": 0.3375169336795807, "learning_rate": 1.8298890672808212e-05, "loss": 0.4828, "step": 17849 }, { "epoch": 0.3785709741044729, "grad_norm": 0.35955581068992615, "learning_rate": 1.8298704601669697e-05, "loss": 0.518, "step": 17850 }, { "epoch": 0.3785921825624059, "grad_norm": 0.41409799456596375, "learning_rate": 1.829851852130143e-05, "loss": 0.5574, "step": 17851 }, { "epoch": 0.3786133910203389, "grad_norm": 0.4850333034992218, "learning_rate": 1.8298332431703628e-05, "loss": 0.5434, "step": 17852 }, { "epoch": 0.37863459947827194, "grad_norm": 0.32065266370773315, "learning_rate": 1.829814633287649e-05, "loss": 0.5392, "step": 17853 }, { "epoch": 0.37865580793620496, "grad_norm": 0.42271316051483154, "learning_rate": 1.8297960224820227e-05, "loss": 0.5341, "step": 17854 }, { "epoch": 0.378677016394138, "grad_norm": 0.3577929437160492, "learning_rate": 1.8297774107535042e-05, "loss": 0.4856, "step": 17855 }, { "epoch": 0.378698224852071, "grad_norm": 0.3543040454387665, "learning_rate": 1.8297587981021146e-05, "loss": 0.4547, "step": 17856 }, { "epoch": 0.378719433310004, "grad_norm": 0.41181543469429016, "learning_rate": 1.8297401845278743e-05, "loss": 0.443, "step": 17857 }, { "epoch": 0.37874064176793704, "grad_norm": 0.35066068172454834, "learning_rate": 1.8297215700308046e-05, "loss": 0.5094, "step": 17858 }, { "epoch": 0.37876185022587006, "grad_norm": 0.3175925314426422, "learning_rate": 1.8297029546109253e-05, "loss": 0.4703, "step": 17859 }, { "epoch": 0.3787830586838031, "grad_norm": 0.36490750312805176, "learning_rate": 1.829684338268258e-05, "loss": 0.509, "step": 17860 }, { "epoch": 0.37880426714173615, "grad_norm": 0.33664461970329285, "learning_rate": 1.829665721002823e-05, "loss": 0.494, "step": 17861 }, { "epoch": 0.37882547559966917, "grad_norm": 0.31734180450439453, "learning_rate": 1.8296471028146404e-05, "loss": 0.4602, "step": 17862 }, { "epoch": 0.3788466840576022, "grad_norm": 0.4811606705188751, "learning_rate": 1.829628483703732e-05, "loss": 0.5441, "step": 17863 }, { "epoch": 0.3788678925155352, "grad_norm": 0.31579700112342834, "learning_rate": 1.8296098636701182e-05, "loss": 0.4451, "step": 17864 }, { "epoch": 0.37888910097346823, "grad_norm": 0.35919734835624695, "learning_rate": 1.829591242713819e-05, "loss": 0.4658, "step": 17865 }, { "epoch": 0.37891030943140125, "grad_norm": 0.3747636079788208, "learning_rate": 1.829572620834856e-05, "loss": 0.4758, "step": 17866 }, { "epoch": 0.37893151788933427, "grad_norm": 0.3872051537036896, "learning_rate": 1.8295539980332492e-05, "loss": 0.5318, "step": 17867 }, { "epoch": 0.3789527263472673, "grad_norm": 0.3224923610687256, "learning_rate": 1.82953537430902e-05, "loss": 0.4832, "step": 17868 }, { "epoch": 0.3789739348052003, "grad_norm": 0.31828659772872925, "learning_rate": 1.8295167496621885e-05, "loss": 0.4657, "step": 17869 }, { "epoch": 0.3789951432631333, "grad_norm": 0.33468544483184814, "learning_rate": 1.829498124092776e-05, "loss": 0.555, "step": 17870 }, { "epoch": 0.37901635172106635, "grad_norm": 0.3345382809638977, "learning_rate": 1.8294794976008025e-05, "loss": 0.54, "step": 17871 }, { "epoch": 0.37903756017899937, "grad_norm": 0.5442535877227783, "learning_rate": 1.8294608701862896e-05, "loss": 0.5025, "step": 17872 }, { "epoch": 0.3790587686369324, "grad_norm": 0.4826008677482605, "learning_rate": 1.829442241849257e-05, "loss": 0.5178, "step": 17873 }, { "epoch": 0.3790799770948654, "grad_norm": 0.6898119449615479, "learning_rate": 1.829423612589726e-05, "loss": 0.4867, "step": 17874 }, { "epoch": 0.3791011855527985, "grad_norm": 0.7598884105682373, "learning_rate": 1.8294049824077177e-05, "loss": 0.4676, "step": 17875 }, { "epoch": 0.3791223940107315, "grad_norm": 0.3594960868358612, "learning_rate": 1.8293863513032518e-05, "loss": 0.5409, "step": 17876 }, { "epoch": 0.3791436024686645, "grad_norm": 0.3701595067977905, "learning_rate": 1.82936771927635e-05, "loss": 0.52, "step": 17877 }, { "epoch": 0.37916481092659754, "grad_norm": 0.317391961812973, "learning_rate": 1.8293490863270328e-05, "loss": 0.4289, "step": 17878 }, { "epoch": 0.37918601938453056, "grad_norm": 0.40934643149375916, "learning_rate": 1.8293304524553204e-05, "loss": 0.485, "step": 17879 }, { "epoch": 0.3792072278424636, "grad_norm": 0.3553417921066284, "learning_rate": 1.8293118176612338e-05, "loss": 0.5468, "step": 17880 }, { "epoch": 0.3792284363003966, "grad_norm": 0.3418419361114502, "learning_rate": 1.829293181944794e-05, "loss": 0.5666, "step": 17881 }, { "epoch": 0.3792496447583296, "grad_norm": 0.35192349553108215, "learning_rate": 1.8292745453060215e-05, "loss": 0.5217, "step": 17882 }, { "epoch": 0.37927085321626264, "grad_norm": 0.35540109872817993, "learning_rate": 1.829255907744937e-05, "loss": 0.5514, "step": 17883 }, { "epoch": 0.37929206167419566, "grad_norm": 0.31480032205581665, "learning_rate": 1.8292372692615613e-05, "loss": 0.4726, "step": 17884 }, { "epoch": 0.3793132701321287, "grad_norm": 0.3477001488208771, "learning_rate": 1.8292186298559154e-05, "loss": 0.4432, "step": 17885 }, { "epoch": 0.3793344785900617, "grad_norm": 0.35491394996643066, "learning_rate": 1.8291999895280192e-05, "loss": 0.4805, "step": 17886 }, { "epoch": 0.3793556870479947, "grad_norm": 0.39496323466300964, "learning_rate": 1.8291813482778943e-05, "loss": 0.5262, "step": 17887 }, { "epoch": 0.3793768955059278, "grad_norm": 0.36837238073349, "learning_rate": 1.8291627061055612e-05, "loss": 0.4825, "step": 17888 }, { "epoch": 0.3793981039638608, "grad_norm": 0.327920526266098, "learning_rate": 1.8291440630110403e-05, "loss": 0.4527, "step": 17889 }, { "epoch": 0.37941931242179383, "grad_norm": 0.3467440605163574, "learning_rate": 1.829125418994353e-05, "loss": 0.4705, "step": 17890 }, { "epoch": 0.37944052087972685, "grad_norm": 0.3938547968864441, "learning_rate": 1.829106774055519e-05, "loss": 0.6067, "step": 17891 }, { "epoch": 0.37946172933765987, "grad_norm": 0.3588509261608124, "learning_rate": 1.8290881281945602e-05, "loss": 0.5557, "step": 17892 }, { "epoch": 0.3794829377955929, "grad_norm": 0.3453442454338074, "learning_rate": 1.8290694814114964e-05, "loss": 0.497, "step": 17893 }, { "epoch": 0.3795041462535259, "grad_norm": 0.3846176862716675, "learning_rate": 1.829050833706349e-05, "loss": 0.5293, "step": 17894 }, { "epoch": 0.37952535471145893, "grad_norm": 0.3319421410560608, "learning_rate": 1.8290321850791382e-05, "loss": 0.4439, "step": 17895 }, { "epoch": 0.37954656316939195, "grad_norm": 0.3911094069480896, "learning_rate": 1.829013535529885e-05, "loss": 0.5736, "step": 17896 }, { "epoch": 0.37956777162732497, "grad_norm": 0.4088082015514374, "learning_rate": 1.8289948850586102e-05, "loss": 0.4948, "step": 17897 }, { "epoch": 0.379588980085258, "grad_norm": 2.4198009967803955, "learning_rate": 1.8289762336653348e-05, "loss": 0.5461, "step": 17898 }, { "epoch": 0.379610188543191, "grad_norm": 0.3671683073043823, "learning_rate": 1.828957581350079e-05, "loss": 0.4745, "step": 17899 }, { "epoch": 0.379631397001124, "grad_norm": 0.3118511736392975, "learning_rate": 1.8289389281128637e-05, "loss": 0.4823, "step": 17900 }, { "epoch": 0.37965260545905705, "grad_norm": 0.3166651427745819, "learning_rate": 1.8289202739537097e-05, "loss": 0.5272, "step": 17901 }, { "epoch": 0.3796738139169901, "grad_norm": 0.4194013178348541, "learning_rate": 1.8289016188726382e-05, "loss": 0.5601, "step": 17902 }, { "epoch": 0.37969502237492314, "grad_norm": 0.399976909160614, "learning_rate": 1.8288829628696693e-05, "loss": 0.5038, "step": 17903 }, { "epoch": 0.37971623083285616, "grad_norm": 0.3215998411178589, "learning_rate": 1.8288643059448236e-05, "loss": 0.5356, "step": 17904 }, { "epoch": 0.3797374392907892, "grad_norm": 0.47127869725227356, "learning_rate": 1.8288456480981224e-05, "loss": 0.4756, "step": 17905 }, { "epoch": 0.3797586477487222, "grad_norm": 0.30966711044311523, "learning_rate": 1.8288269893295865e-05, "loss": 0.4683, "step": 17906 }, { "epoch": 0.3797798562066552, "grad_norm": 0.3253161907196045, "learning_rate": 1.828808329639236e-05, "loss": 0.5947, "step": 17907 }, { "epoch": 0.37980106466458824, "grad_norm": 0.42356008291244507, "learning_rate": 1.8287896690270924e-05, "loss": 0.495, "step": 17908 }, { "epoch": 0.37982227312252126, "grad_norm": 0.32178351283073425, "learning_rate": 1.8287710074931762e-05, "loss": 0.5002, "step": 17909 }, { "epoch": 0.3798434815804543, "grad_norm": 0.31934690475463867, "learning_rate": 1.828752345037508e-05, "loss": 0.5337, "step": 17910 }, { "epoch": 0.3798646900383873, "grad_norm": 0.3410917818546295, "learning_rate": 1.828733681660109e-05, "loss": 0.4721, "step": 17911 }, { "epoch": 0.3798858984963203, "grad_norm": 0.36005252599716187, "learning_rate": 1.828715017360999e-05, "loss": 0.5749, "step": 17912 }, { "epoch": 0.37990710695425334, "grad_norm": 0.3135894238948822, "learning_rate": 1.828696352140199e-05, "loss": 0.4615, "step": 17913 }, { "epoch": 0.37992831541218636, "grad_norm": 0.36397743225097656, "learning_rate": 1.8286776859977312e-05, "loss": 0.5881, "step": 17914 }, { "epoch": 0.3799495238701194, "grad_norm": 0.3320396840572357, "learning_rate": 1.8286590189336148e-05, "loss": 0.4683, "step": 17915 }, { "epoch": 0.37997073232805245, "grad_norm": 0.2970322072505951, "learning_rate": 1.828640350947871e-05, "loss": 0.4733, "step": 17916 }, { "epoch": 0.37999194078598547, "grad_norm": 0.44409245252609253, "learning_rate": 1.8286216820405204e-05, "loss": 0.5496, "step": 17917 }, { "epoch": 0.3800131492439185, "grad_norm": 0.5281069278717041, "learning_rate": 1.828603012211584e-05, "loss": 0.5118, "step": 17918 }, { "epoch": 0.3800343577018515, "grad_norm": 0.3665313124656677, "learning_rate": 1.8285843414610828e-05, "loss": 0.5744, "step": 17919 }, { "epoch": 0.38005556615978453, "grad_norm": 0.34171822667121887, "learning_rate": 1.8285656697890374e-05, "loss": 0.4714, "step": 17920 }, { "epoch": 0.38007677461771755, "grad_norm": 0.4173826277256012, "learning_rate": 1.8285469971954682e-05, "loss": 0.4977, "step": 17921 }, { "epoch": 0.38009798307565057, "grad_norm": 0.40146347880363464, "learning_rate": 1.8285283236803964e-05, "loss": 0.4789, "step": 17922 }, { "epoch": 0.3801191915335836, "grad_norm": 0.3474079370498657, "learning_rate": 1.8285096492438424e-05, "loss": 0.4862, "step": 17923 }, { "epoch": 0.3801403999915166, "grad_norm": 0.3165343701839447, "learning_rate": 1.828490973885827e-05, "loss": 0.459, "step": 17924 }, { "epoch": 0.38016160844944963, "grad_norm": 0.44921454787254333, "learning_rate": 1.8284722976063714e-05, "loss": 0.5614, "step": 17925 }, { "epoch": 0.38018281690738265, "grad_norm": 0.31357473134994507, "learning_rate": 1.8284536204054963e-05, "loss": 0.4921, "step": 17926 }, { "epoch": 0.38020402536531567, "grad_norm": 0.3113733232021332, "learning_rate": 1.8284349422832218e-05, "loss": 0.4651, "step": 17927 }, { "epoch": 0.3802252338232487, "grad_norm": 0.3594205677509308, "learning_rate": 1.8284162632395696e-05, "loss": 0.529, "step": 17928 }, { "epoch": 0.38024644228118176, "grad_norm": 0.3450044095516205, "learning_rate": 1.8283975832745597e-05, "loss": 0.5238, "step": 17929 }, { "epoch": 0.3802676507391148, "grad_norm": 0.33328527212142944, "learning_rate": 1.8283789023882134e-05, "loss": 0.5027, "step": 17930 }, { "epoch": 0.3802888591970478, "grad_norm": 0.30800577998161316, "learning_rate": 1.8283602205805514e-05, "loss": 0.4939, "step": 17931 }, { "epoch": 0.3803100676549808, "grad_norm": 0.36186301708221436, "learning_rate": 1.828341537851594e-05, "loss": 0.4932, "step": 17932 }, { "epoch": 0.38033127611291384, "grad_norm": 0.363004595041275, "learning_rate": 1.8283228542013626e-05, "loss": 0.4738, "step": 17933 }, { "epoch": 0.38035248457084686, "grad_norm": 0.33016929030418396, "learning_rate": 1.8283041696298775e-05, "loss": 0.5289, "step": 17934 }, { "epoch": 0.3803736930287799, "grad_norm": 0.3028290271759033, "learning_rate": 1.82828548413716e-05, "loss": 0.4628, "step": 17935 }, { "epoch": 0.3803949014867129, "grad_norm": 0.3808704912662506, "learning_rate": 1.8282667977232306e-05, "loss": 0.6049, "step": 17936 }, { "epoch": 0.3804161099446459, "grad_norm": 0.3208681344985962, "learning_rate": 1.8282481103881094e-05, "loss": 0.5461, "step": 17937 }, { "epoch": 0.38043731840257894, "grad_norm": 0.37630167603492737, "learning_rate": 1.8282294221318186e-05, "loss": 0.5126, "step": 17938 }, { "epoch": 0.38045852686051196, "grad_norm": 0.34510600566864014, "learning_rate": 1.8282107329543777e-05, "loss": 0.5392, "step": 17939 }, { "epoch": 0.380479735318445, "grad_norm": 0.39649713039398193, "learning_rate": 1.8281920428558085e-05, "loss": 0.421, "step": 17940 }, { "epoch": 0.380500943776378, "grad_norm": 0.38156479597091675, "learning_rate": 1.8281733518361313e-05, "loss": 0.4423, "step": 17941 }, { "epoch": 0.380522152234311, "grad_norm": 0.36627480387687683, "learning_rate": 1.8281546598953663e-05, "loss": 0.5126, "step": 17942 }, { "epoch": 0.3805433606922441, "grad_norm": 0.3886612057685852, "learning_rate": 1.828135967033535e-05, "loss": 0.4932, "step": 17943 }, { "epoch": 0.3805645691501771, "grad_norm": 0.38281089067459106, "learning_rate": 1.828117273250659e-05, "loss": 0.46, "step": 17944 }, { "epoch": 0.38058577760811013, "grad_norm": 0.3455203175544739, "learning_rate": 1.828098578546757e-05, "loss": 0.5856, "step": 17945 }, { "epoch": 0.38060698606604315, "grad_norm": 0.3246172070503235, "learning_rate": 1.8280798829218517e-05, "loss": 0.5178, "step": 17946 }, { "epoch": 0.38062819452397617, "grad_norm": 0.3425449728965759, "learning_rate": 1.8280611863759628e-05, "loss": 0.5064, "step": 17947 }, { "epoch": 0.3806494029819092, "grad_norm": 0.4096468985080719, "learning_rate": 1.8280424889091116e-05, "loss": 0.38, "step": 17948 }, { "epoch": 0.3806706114398422, "grad_norm": 0.3545084297657013, "learning_rate": 1.8280237905213183e-05, "loss": 0.5735, "step": 17949 }, { "epoch": 0.38069181989777523, "grad_norm": 0.31408777832984924, "learning_rate": 1.8280050912126048e-05, "loss": 0.5161, "step": 17950 }, { "epoch": 0.38071302835570825, "grad_norm": 0.39370453357696533, "learning_rate": 1.827986390982991e-05, "loss": 0.5967, "step": 17951 }, { "epoch": 0.38073423681364127, "grad_norm": 0.4689435362815857, "learning_rate": 1.8279676898324975e-05, "loss": 0.5527, "step": 17952 }, { "epoch": 0.3807554452715743, "grad_norm": 0.33647704124450684, "learning_rate": 1.827948987761146e-05, "loss": 0.4923, "step": 17953 }, { "epoch": 0.3807766537295073, "grad_norm": 0.39905431866645813, "learning_rate": 1.827930284768957e-05, "loss": 0.5231, "step": 17954 }, { "epoch": 0.38079786218744033, "grad_norm": 0.381672739982605, "learning_rate": 1.8279115808559508e-05, "loss": 0.4907, "step": 17955 }, { "epoch": 0.38081907064537335, "grad_norm": 0.3472031056880951, "learning_rate": 1.8278928760221484e-05, "loss": 0.5646, "step": 17956 }, { "epoch": 0.3808402791033064, "grad_norm": 0.32821789383888245, "learning_rate": 1.827874170267571e-05, "loss": 0.4283, "step": 17957 }, { "epoch": 0.38086148756123944, "grad_norm": 0.32499781250953674, "learning_rate": 1.827855463592239e-05, "loss": 0.4955, "step": 17958 }, { "epoch": 0.38088269601917246, "grad_norm": 0.34502115845680237, "learning_rate": 1.8278367559961732e-05, "loss": 0.5663, "step": 17959 }, { "epoch": 0.3809039044771055, "grad_norm": 0.372568279504776, "learning_rate": 1.827818047479395e-05, "loss": 0.475, "step": 17960 }, { "epoch": 0.3809251129350385, "grad_norm": 0.39915648102760315, "learning_rate": 1.8277993380419245e-05, "loss": 0.5937, "step": 17961 }, { "epoch": 0.3809463213929715, "grad_norm": 0.36383184790611267, "learning_rate": 1.8277806276837827e-05, "loss": 0.5225, "step": 17962 }, { "epoch": 0.38096752985090454, "grad_norm": 0.30791178345680237, "learning_rate": 1.8277619164049907e-05, "loss": 0.5067, "step": 17963 }, { "epoch": 0.38098873830883756, "grad_norm": 0.36532193422317505, "learning_rate": 1.827743204205569e-05, "loss": 0.4672, "step": 17964 }, { "epoch": 0.3810099467667706, "grad_norm": 0.3397628664970398, "learning_rate": 1.8277244910855384e-05, "loss": 0.5218, "step": 17965 }, { "epoch": 0.3810311552247036, "grad_norm": 0.3058258593082428, "learning_rate": 1.82770577704492e-05, "loss": 0.4453, "step": 17966 }, { "epoch": 0.3810523636826366, "grad_norm": 0.4221542775630951, "learning_rate": 1.8276870620837338e-05, "loss": 0.5562, "step": 17967 }, { "epoch": 0.38107357214056964, "grad_norm": 0.37483444809913635, "learning_rate": 1.827668346202002e-05, "loss": 0.4972, "step": 17968 }, { "epoch": 0.38109478059850266, "grad_norm": 0.33732667565345764, "learning_rate": 1.8276496293997444e-05, "loss": 0.5373, "step": 17969 }, { "epoch": 0.38111598905643573, "grad_norm": 0.33818185329437256, "learning_rate": 1.827630911676982e-05, "loss": 0.5153, "step": 17970 }, { "epoch": 0.38113719751436875, "grad_norm": 0.33443427085876465, "learning_rate": 1.8276121930337357e-05, "loss": 0.447, "step": 17971 }, { "epoch": 0.3811584059723018, "grad_norm": 0.7398560047149658, "learning_rate": 1.8275934734700263e-05, "loss": 0.5832, "step": 17972 }, { "epoch": 0.3811796144302348, "grad_norm": 0.35214489698410034, "learning_rate": 1.8275747529858747e-05, "loss": 0.461, "step": 17973 }, { "epoch": 0.3812008228881678, "grad_norm": 0.42199182510375977, "learning_rate": 1.8275560315813018e-05, "loss": 0.4941, "step": 17974 }, { "epoch": 0.38122203134610083, "grad_norm": 0.3440784811973572, "learning_rate": 1.8275373092563277e-05, "loss": 0.4841, "step": 17975 }, { "epoch": 0.38124323980403385, "grad_norm": 0.42022451758384705, "learning_rate": 1.8275185860109747e-05, "loss": 0.4879, "step": 17976 }, { "epoch": 0.38126444826196687, "grad_norm": 0.31644320487976074, "learning_rate": 1.827499861845262e-05, "loss": 0.5231, "step": 17977 }, { "epoch": 0.3812856567198999, "grad_norm": 0.31838107109069824, "learning_rate": 1.8274811367592113e-05, "loss": 0.416, "step": 17978 }, { "epoch": 0.3813068651778329, "grad_norm": 0.32669445872306824, "learning_rate": 1.8274624107528432e-05, "loss": 0.5032, "step": 17979 }, { "epoch": 0.38132807363576593, "grad_norm": 0.32084378600120544, "learning_rate": 1.8274436838261786e-05, "loss": 0.3752, "step": 17980 }, { "epoch": 0.38134928209369895, "grad_norm": 0.3865438401699066, "learning_rate": 1.827424955979239e-05, "loss": 0.5494, "step": 17981 }, { "epoch": 0.38137049055163197, "grad_norm": 0.37670302391052246, "learning_rate": 1.8274062272120435e-05, "loss": 0.4872, "step": 17982 }, { "epoch": 0.381391699009565, "grad_norm": 0.3122767508029938, "learning_rate": 1.8273874975246143e-05, "loss": 0.4661, "step": 17983 }, { "epoch": 0.38141290746749806, "grad_norm": 0.34865814447402954, "learning_rate": 1.8273687669169722e-05, "loss": 0.4468, "step": 17984 }, { "epoch": 0.3814341159254311, "grad_norm": 0.44430187344551086, "learning_rate": 1.8273500353891375e-05, "loss": 0.5522, "step": 17985 }, { "epoch": 0.3814553243833641, "grad_norm": 0.36749300360679626, "learning_rate": 1.8273313029411314e-05, "loss": 0.5225, "step": 17986 }, { "epoch": 0.3814765328412971, "grad_norm": 0.31901419162750244, "learning_rate": 1.8273125695729747e-05, "loss": 0.4464, "step": 17987 }, { "epoch": 0.38149774129923014, "grad_norm": 0.4538773000240326, "learning_rate": 1.827293835284688e-05, "loss": 0.5541, "step": 17988 }, { "epoch": 0.38151894975716316, "grad_norm": 0.3558531701564789, "learning_rate": 1.8272751000762924e-05, "loss": 0.5542, "step": 17989 }, { "epoch": 0.3815401582150962, "grad_norm": 0.4186020791530609, "learning_rate": 1.827256363947809e-05, "loss": 0.4405, "step": 17990 }, { "epoch": 0.3815613666730292, "grad_norm": 0.50315922498703, "learning_rate": 1.8272376268992574e-05, "loss": 0.6102, "step": 17991 }, { "epoch": 0.3815825751309622, "grad_norm": 0.36063405871391296, "learning_rate": 1.8272188889306598e-05, "loss": 0.5104, "step": 17992 }, { "epoch": 0.38160378358889524, "grad_norm": 0.35694295167922974, "learning_rate": 1.8272001500420363e-05, "loss": 0.5248, "step": 17993 }, { "epoch": 0.38162499204682826, "grad_norm": 0.3786812722682953, "learning_rate": 1.827181410233408e-05, "loss": 0.4559, "step": 17994 }, { "epoch": 0.3816462005047613, "grad_norm": 0.7726967334747314, "learning_rate": 1.8271626695047963e-05, "loss": 0.487, "step": 17995 }, { "epoch": 0.3816674089626943, "grad_norm": 0.37105846405029297, "learning_rate": 1.827143927856221e-05, "loss": 0.5326, "step": 17996 }, { "epoch": 0.3816886174206273, "grad_norm": 0.3367304503917694, "learning_rate": 1.8271251852877032e-05, "loss": 0.417, "step": 17997 }, { "epoch": 0.3817098258785604, "grad_norm": 0.3765218257904053, "learning_rate": 1.8271064417992643e-05, "loss": 0.4243, "step": 17998 }, { "epoch": 0.3817310343364934, "grad_norm": 0.33623945713043213, "learning_rate": 1.8270876973909246e-05, "loss": 0.5307, "step": 17999 }, { "epoch": 0.38175224279442643, "grad_norm": 0.3193589746952057, "learning_rate": 1.8270689520627054e-05, "loss": 0.5292, "step": 18000 }, { "epoch": 0.38177345125235945, "grad_norm": 0.34504976868629456, "learning_rate": 1.827050205814627e-05, "loss": 0.535, "step": 18001 }, { "epoch": 0.38179465971029247, "grad_norm": 0.35318076610565186, "learning_rate": 1.827031458646711e-05, "loss": 0.5292, "step": 18002 }, { "epoch": 0.3818158681682255, "grad_norm": 0.4492167532444, "learning_rate": 1.827012710558977e-05, "loss": 0.5204, "step": 18003 }, { "epoch": 0.3818370766261585, "grad_norm": 0.3466077744960785, "learning_rate": 1.8269939615514475e-05, "loss": 0.474, "step": 18004 }, { "epoch": 0.38185828508409153, "grad_norm": 0.4526090919971466, "learning_rate": 1.826975211624142e-05, "loss": 0.6156, "step": 18005 }, { "epoch": 0.38187949354202455, "grad_norm": 0.3288019299507141, "learning_rate": 1.826956460777082e-05, "loss": 0.5613, "step": 18006 }, { "epoch": 0.38190070199995757, "grad_norm": 0.3516997992992401, "learning_rate": 1.826937709010288e-05, "loss": 0.567, "step": 18007 }, { "epoch": 0.3819219104578906, "grad_norm": 0.33053961396217346, "learning_rate": 1.8269189563237812e-05, "loss": 0.4804, "step": 18008 }, { "epoch": 0.3819431189158236, "grad_norm": 0.34789615869522095, "learning_rate": 1.8269002027175825e-05, "loss": 0.5105, "step": 18009 }, { "epoch": 0.38196432737375663, "grad_norm": 0.3588578999042511, "learning_rate": 1.8268814481917124e-05, "loss": 0.4935, "step": 18010 }, { "epoch": 0.3819855358316897, "grad_norm": 0.3781544268131256, "learning_rate": 1.8268626927461918e-05, "loss": 0.5537, "step": 18011 }, { "epoch": 0.3820067442896227, "grad_norm": 0.3316567838191986, "learning_rate": 1.8268439363810418e-05, "loss": 0.465, "step": 18012 }, { "epoch": 0.38202795274755574, "grad_norm": 0.42625799775123596, "learning_rate": 1.8268251790962834e-05, "loss": 0.4995, "step": 18013 }, { "epoch": 0.38204916120548876, "grad_norm": 0.3910405933856964, "learning_rate": 1.826806420891937e-05, "loss": 0.5071, "step": 18014 }, { "epoch": 0.3820703696634218, "grad_norm": 0.28945013880729675, "learning_rate": 1.826787661768024e-05, "loss": 0.4518, "step": 18015 }, { "epoch": 0.3820915781213548, "grad_norm": 0.3739107549190521, "learning_rate": 1.8267689017245643e-05, "loss": 0.5737, "step": 18016 }, { "epoch": 0.3821127865792878, "grad_norm": 0.3844534754753113, "learning_rate": 1.82675014076158e-05, "loss": 0.4874, "step": 18017 }, { "epoch": 0.38213399503722084, "grad_norm": 0.3322194516658783, "learning_rate": 1.826731378879091e-05, "loss": 0.4933, "step": 18018 }, { "epoch": 0.38215520349515386, "grad_norm": 0.45418667793273926, "learning_rate": 1.8267126160771188e-05, "loss": 0.5394, "step": 18019 }, { "epoch": 0.3821764119530869, "grad_norm": 0.4147382378578186, "learning_rate": 1.826693852355684e-05, "loss": 0.5119, "step": 18020 }, { "epoch": 0.3821976204110199, "grad_norm": 0.3466275930404663, "learning_rate": 1.8266750877148075e-05, "loss": 0.582, "step": 18021 }, { "epoch": 0.3822188288689529, "grad_norm": 0.35023707151412964, "learning_rate": 1.8266563221545097e-05, "loss": 0.4099, "step": 18022 }, { "epoch": 0.38224003732688594, "grad_norm": 0.34424248337745667, "learning_rate": 1.8266375556748122e-05, "loss": 0.5449, "step": 18023 }, { "epoch": 0.38226124578481896, "grad_norm": 0.3361082077026367, "learning_rate": 1.8266187882757357e-05, "loss": 0.5185, "step": 18024 }, { "epoch": 0.38228245424275203, "grad_norm": 0.34369853138923645, "learning_rate": 1.826600019957301e-05, "loss": 0.505, "step": 18025 }, { "epoch": 0.38230366270068505, "grad_norm": 0.32784485816955566, "learning_rate": 1.826581250719529e-05, "loss": 0.4605, "step": 18026 }, { "epoch": 0.3823248711586181, "grad_norm": 0.3676629364490509, "learning_rate": 1.8265624805624402e-05, "loss": 0.5501, "step": 18027 }, { "epoch": 0.3823460796165511, "grad_norm": 0.32986757159233093, "learning_rate": 1.8265437094860562e-05, "loss": 0.5149, "step": 18028 }, { "epoch": 0.3823672880744841, "grad_norm": 0.35249194502830505, "learning_rate": 1.8265249374903972e-05, "loss": 0.553, "step": 18029 }, { "epoch": 0.38238849653241713, "grad_norm": 0.3720887005329132, "learning_rate": 1.826506164575484e-05, "loss": 0.488, "step": 18030 }, { "epoch": 0.38240970499035015, "grad_norm": 0.3881465792655945, "learning_rate": 1.8264873907413384e-05, "loss": 0.5443, "step": 18031 }, { "epoch": 0.38243091344828317, "grad_norm": 0.33648931980133057, "learning_rate": 1.8264686159879806e-05, "loss": 0.5248, "step": 18032 }, { "epoch": 0.3824521219062162, "grad_norm": 0.3912788927555084, "learning_rate": 1.8264498403154315e-05, "loss": 0.477, "step": 18033 }, { "epoch": 0.3824733303641492, "grad_norm": 0.35448187589645386, "learning_rate": 1.8264310637237118e-05, "loss": 0.5117, "step": 18034 }, { "epoch": 0.38249453882208223, "grad_norm": 0.31510722637176514, "learning_rate": 1.826412286212843e-05, "loss": 0.4448, "step": 18035 }, { "epoch": 0.38251574728001525, "grad_norm": 0.3557405173778534, "learning_rate": 1.8263935077828456e-05, "loss": 0.4684, "step": 18036 }, { "epoch": 0.38253695573794827, "grad_norm": 0.423130065202713, "learning_rate": 1.8263747284337405e-05, "loss": 0.5102, "step": 18037 }, { "epoch": 0.38255816419588135, "grad_norm": 0.352861225605011, "learning_rate": 1.8263559481655484e-05, "loss": 0.486, "step": 18038 }, { "epoch": 0.38257937265381436, "grad_norm": 0.33504873514175415, "learning_rate": 1.8263371669782907e-05, "loss": 0.5224, "step": 18039 }, { "epoch": 0.3826005811117474, "grad_norm": 0.32499170303344727, "learning_rate": 1.826318384871988e-05, "loss": 0.4672, "step": 18040 }, { "epoch": 0.3826217895696804, "grad_norm": 0.31734591722488403, "learning_rate": 1.826299601846661e-05, "loss": 0.5344, "step": 18041 }, { "epoch": 0.3826429980276134, "grad_norm": 0.33968013525009155, "learning_rate": 1.8262808179023306e-05, "loss": 0.5685, "step": 18042 }, { "epoch": 0.38266420648554644, "grad_norm": 0.33689120411872864, "learning_rate": 1.826262033039018e-05, "loss": 0.481, "step": 18043 }, { "epoch": 0.38268541494347946, "grad_norm": 0.3380907475948334, "learning_rate": 1.826243247256744e-05, "loss": 0.5136, "step": 18044 }, { "epoch": 0.3827066234014125, "grad_norm": 0.3156096935272217, "learning_rate": 1.8262244605555292e-05, "loss": 0.534, "step": 18045 }, { "epoch": 0.3827278318593455, "grad_norm": 0.3308057188987732, "learning_rate": 1.826205672935395e-05, "loss": 0.441, "step": 18046 }, { "epoch": 0.3827490403172785, "grad_norm": 0.3233731985092163, "learning_rate": 1.826186884396362e-05, "loss": 0.5689, "step": 18047 }, { "epoch": 0.38277024877521154, "grad_norm": 0.31640058755874634, "learning_rate": 1.826168094938451e-05, "loss": 0.4562, "step": 18048 }, { "epoch": 0.38279145723314456, "grad_norm": 0.3914038836956024, "learning_rate": 1.8261493045616833e-05, "loss": 0.515, "step": 18049 }, { "epoch": 0.3828126656910776, "grad_norm": 0.390163391828537, "learning_rate": 1.826130513266079e-05, "loss": 0.5773, "step": 18050 }, { "epoch": 0.3828338741490106, "grad_norm": 0.3392432928085327, "learning_rate": 1.82611172105166e-05, "loss": 0.5375, "step": 18051 }, { "epoch": 0.3828550826069437, "grad_norm": 0.34271660447120667, "learning_rate": 1.8260929279184468e-05, "loss": 0.5285, "step": 18052 }, { "epoch": 0.3828762910648767, "grad_norm": 0.34584909677505493, "learning_rate": 1.8260741338664597e-05, "loss": 0.4762, "step": 18053 }, { "epoch": 0.3828974995228097, "grad_norm": 0.3217257261276245, "learning_rate": 1.8260553388957205e-05, "loss": 0.5591, "step": 18054 }, { "epoch": 0.38291870798074273, "grad_norm": 0.37517932057380676, "learning_rate": 1.8260365430062496e-05, "loss": 0.472, "step": 18055 }, { "epoch": 0.38293991643867575, "grad_norm": 0.9284926056861877, "learning_rate": 1.8260177461980682e-05, "loss": 0.4771, "step": 18056 }, { "epoch": 0.3829611248966088, "grad_norm": 0.4917251765727997, "learning_rate": 1.825998948471197e-05, "loss": 0.3714, "step": 18057 }, { "epoch": 0.3829823333545418, "grad_norm": 0.38587817549705505, "learning_rate": 1.8259801498256567e-05, "loss": 0.5303, "step": 18058 }, { "epoch": 0.3830035418124748, "grad_norm": 0.3890042304992676, "learning_rate": 1.8259613502614686e-05, "loss": 0.6373, "step": 18059 }, { "epoch": 0.38302475027040783, "grad_norm": 0.31377676129341125, "learning_rate": 1.8259425497786533e-05, "loss": 0.4114, "step": 18060 }, { "epoch": 0.38304595872834085, "grad_norm": 0.34846946597099304, "learning_rate": 1.825923748377232e-05, "loss": 0.4995, "step": 18061 }, { "epoch": 0.38306716718627387, "grad_norm": 0.3399059474468231, "learning_rate": 1.8259049460572254e-05, "loss": 0.602, "step": 18062 }, { "epoch": 0.3830883756442069, "grad_norm": 0.3463153541088104, "learning_rate": 1.8258861428186548e-05, "loss": 0.5002, "step": 18063 }, { "epoch": 0.3831095841021399, "grad_norm": 0.3163056969642639, "learning_rate": 1.825867338661541e-05, "loss": 0.449, "step": 18064 }, { "epoch": 0.38313079256007293, "grad_norm": 0.35634860396385193, "learning_rate": 1.8258485335859043e-05, "loss": 0.5314, "step": 18065 }, { "epoch": 0.383152001018006, "grad_norm": 0.3174836337566376, "learning_rate": 1.8258297275917658e-05, "loss": 0.5973, "step": 18066 }, { "epoch": 0.383173209475939, "grad_norm": 0.3489793539047241, "learning_rate": 1.825810920679147e-05, "loss": 0.4915, "step": 18067 }, { "epoch": 0.38319441793387204, "grad_norm": 0.3377951681613922, "learning_rate": 1.8257921128480686e-05, "loss": 0.5896, "step": 18068 }, { "epoch": 0.38321562639180506, "grad_norm": 0.40372270345687866, "learning_rate": 1.825773304098551e-05, "loss": 0.5178, "step": 18069 }, { "epoch": 0.3832368348497381, "grad_norm": 0.35217031836509705, "learning_rate": 1.8257544944306156e-05, "loss": 0.4602, "step": 18070 }, { "epoch": 0.3832580433076711, "grad_norm": 0.3579886555671692, "learning_rate": 1.8257356838442836e-05, "loss": 0.543, "step": 18071 }, { "epoch": 0.3832792517656041, "grad_norm": 0.3202316462993622, "learning_rate": 1.825716872339575e-05, "loss": 0.503, "step": 18072 }, { "epoch": 0.38330046022353714, "grad_norm": 0.3351777195930481, "learning_rate": 1.8256980599165117e-05, "loss": 0.4718, "step": 18073 }, { "epoch": 0.38332166868147016, "grad_norm": 0.3089086413383484, "learning_rate": 1.8256792465751138e-05, "loss": 0.4943, "step": 18074 }, { "epoch": 0.3833428771394032, "grad_norm": 0.4270680546760559, "learning_rate": 1.8256604323154028e-05, "loss": 0.6194, "step": 18075 }, { "epoch": 0.3833640855973362, "grad_norm": 0.36053404211997986, "learning_rate": 1.8256416171373997e-05, "loss": 0.5351, "step": 18076 }, { "epoch": 0.3833852940552692, "grad_norm": 0.37031757831573486, "learning_rate": 1.8256228010411248e-05, "loss": 0.4757, "step": 18077 }, { "epoch": 0.38340650251320224, "grad_norm": 0.353414386510849, "learning_rate": 1.8256039840265997e-05, "loss": 0.5706, "step": 18078 }, { "epoch": 0.3834277109711353, "grad_norm": 0.44329360127449036, "learning_rate": 1.825585166093845e-05, "loss": 0.5025, "step": 18079 }, { "epoch": 0.38344891942906834, "grad_norm": 0.38609984517097473, "learning_rate": 1.8255663472428812e-05, "loss": 0.5037, "step": 18080 }, { "epoch": 0.38347012788700136, "grad_norm": 0.3606046736240387, "learning_rate": 1.82554752747373e-05, "loss": 0.5215, "step": 18081 }, { "epoch": 0.3834913363449344, "grad_norm": 0.43527576327323914, "learning_rate": 1.825528706786412e-05, "loss": 0.5839, "step": 18082 }, { "epoch": 0.3835125448028674, "grad_norm": 0.3823876678943634, "learning_rate": 1.825509885180948e-05, "loss": 0.4578, "step": 18083 }, { "epoch": 0.3835337532608004, "grad_norm": 0.3265143930912018, "learning_rate": 1.8254910626573593e-05, "loss": 0.4399, "step": 18084 }, { "epoch": 0.38355496171873343, "grad_norm": 0.3869345784187317, "learning_rate": 1.8254722392156664e-05, "loss": 0.55, "step": 18085 }, { "epoch": 0.38357617017666645, "grad_norm": 0.3786475360393524, "learning_rate": 1.8254534148558905e-05, "loss": 0.561, "step": 18086 }, { "epoch": 0.3835973786345995, "grad_norm": 0.39402714371681213, "learning_rate": 1.825434589578053e-05, "loss": 0.5311, "step": 18087 }, { "epoch": 0.3836185870925325, "grad_norm": 0.333848237991333, "learning_rate": 1.8254157633821735e-05, "loss": 0.5469, "step": 18088 }, { "epoch": 0.3836397955504655, "grad_norm": 0.336990624666214, "learning_rate": 1.825396936268274e-05, "loss": 0.4381, "step": 18089 }, { "epoch": 0.38366100400839853, "grad_norm": 0.3666157126426697, "learning_rate": 1.825378108236376e-05, "loss": 0.4987, "step": 18090 }, { "epoch": 0.38368221246633155, "grad_norm": 0.3275628983974457, "learning_rate": 1.8253592792864986e-05, "loss": 0.4394, "step": 18091 }, { "epoch": 0.38370342092426457, "grad_norm": 0.4082691967487335, "learning_rate": 1.8253404494186643e-05, "loss": 0.5025, "step": 18092 }, { "epoch": 0.38372462938219765, "grad_norm": 0.3697730004787445, "learning_rate": 1.8253216186328933e-05, "loss": 0.5005, "step": 18093 }, { "epoch": 0.38374583784013067, "grad_norm": 0.32948699593544006, "learning_rate": 1.8253027869292065e-05, "loss": 0.5621, "step": 18094 }, { "epoch": 0.3837670462980637, "grad_norm": 0.35739803314208984, "learning_rate": 1.8252839543076257e-05, "loss": 0.5026, "step": 18095 }, { "epoch": 0.3837882547559967, "grad_norm": 0.3219609558582306, "learning_rate": 1.825265120768171e-05, "loss": 0.4555, "step": 18096 }, { "epoch": 0.3838094632139297, "grad_norm": 0.32689109444618225, "learning_rate": 1.8252462863108634e-05, "loss": 0.4918, "step": 18097 }, { "epoch": 0.38383067167186274, "grad_norm": 0.3607683777809143, "learning_rate": 1.8252274509357242e-05, "loss": 0.5512, "step": 18098 }, { "epoch": 0.38385188012979576, "grad_norm": 0.3808072507381439, "learning_rate": 1.8252086146427747e-05, "loss": 0.5159, "step": 18099 }, { "epoch": 0.3838730885877288, "grad_norm": 0.3404443860054016, "learning_rate": 1.8251897774320345e-05, "loss": 0.5566, "step": 18100 }, { "epoch": 0.3838942970456618, "grad_norm": 0.38746362924575806, "learning_rate": 1.825170939303526e-05, "loss": 0.5743, "step": 18101 }, { "epoch": 0.3839155055035948, "grad_norm": 0.4101220667362213, "learning_rate": 1.8251521002572694e-05, "loss": 0.4387, "step": 18102 }, { "epoch": 0.38393671396152784, "grad_norm": 0.3121015429496765, "learning_rate": 1.8251332602932855e-05, "loss": 0.5298, "step": 18103 }, { "epoch": 0.38395792241946086, "grad_norm": 0.2997947335243225, "learning_rate": 1.825114419411596e-05, "loss": 0.4481, "step": 18104 }, { "epoch": 0.3839791308773939, "grad_norm": 0.38077834248542786, "learning_rate": 1.825095577612221e-05, "loss": 0.548, "step": 18105 }, { "epoch": 0.3840003393353269, "grad_norm": 0.3658290505409241, "learning_rate": 1.8250767348951822e-05, "loss": 0.5057, "step": 18106 }, { "epoch": 0.38402154779326, "grad_norm": 0.3693893849849701, "learning_rate": 1.8250578912605e-05, "loss": 0.5821, "step": 18107 }, { "epoch": 0.384042756251193, "grad_norm": 0.3400766849517822, "learning_rate": 1.825039046708196e-05, "loss": 0.5025, "step": 18108 }, { "epoch": 0.384063964709126, "grad_norm": 0.3483017683029175, "learning_rate": 1.8250202012382902e-05, "loss": 0.4889, "step": 18109 }, { "epoch": 0.38408517316705904, "grad_norm": 0.3092631697654724, "learning_rate": 1.8250013548508042e-05, "loss": 0.4386, "step": 18110 }, { "epoch": 0.38410638162499205, "grad_norm": 0.3270522356033325, "learning_rate": 1.8249825075457592e-05, "loss": 0.513, "step": 18111 }, { "epoch": 0.3841275900829251, "grad_norm": 0.39138758182525635, "learning_rate": 1.8249636593231756e-05, "loss": 0.5052, "step": 18112 }, { "epoch": 0.3841487985408581, "grad_norm": 0.3267434537410736, "learning_rate": 1.8249448101830745e-05, "loss": 0.5648, "step": 18113 }, { "epoch": 0.3841700069987911, "grad_norm": 0.3438819646835327, "learning_rate": 1.824925960125477e-05, "loss": 0.4374, "step": 18114 }, { "epoch": 0.38419121545672413, "grad_norm": 0.3858656883239746, "learning_rate": 1.824907109150404e-05, "loss": 0.5516, "step": 18115 }, { "epoch": 0.38421242391465715, "grad_norm": 0.3696232736110687, "learning_rate": 1.8248882572578766e-05, "loss": 0.5593, "step": 18116 }, { "epoch": 0.3842336323725902, "grad_norm": 0.33268487453460693, "learning_rate": 1.8248694044479158e-05, "loss": 0.5435, "step": 18117 }, { "epoch": 0.3842548408305232, "grad_norm": 0.34996524453163147, "learning_rate": 1.824850550720542e-05, "loss": 0.5499, "step": 18118 }, { "epoch": 0.3842760492884562, "grad_norm": 0.32904958724975586, "learning_rate": 1.824831696075777e-05, "loss": 0.508, "step": 18119 }, { "epoch": 0.3842972577463893, "grad_norm": 0.3492463231086731, "learning_rate": 1.824812840513641e-05, "loss": 0.5227, "step": 18120 }, { "epoch": 0.3843184662043223, "grad_norm": 0.522219181060791, "learning_rate": 1.824793984034155e-05, "loss": 0.5618, "step": 18121 }, { "epoch": 0.3843396746622553, "grad_norm": 0.3673518896102905, "learning_rate": 1.824775126637341e-05, "loss": 0.4892, "step": 18122 }, { "epoch": 0.38436088312018835, "grad_norm": 0.34427547454833984, "learning_rate": 1.8247562683232192e-05, "loss": 0.4873, "step": 18123 }, { "epoch": 0.38438209157812137, "grad_norm": 0.4160805642604828, "learning_rate": 1.8247374090918104e-05, "loss": 0.5745, "step": 18124 }, { "epoch": 0.3844033000360544, "grad_norm": 0.4697093963623047, "learning_rate": 1.8247185489431357e-05, "loss": 0.5628, "step": 18125 }, { "epoch": 0.3844245084939874, "grad_norm": 0.3356785178184509, "learning_rate": 1.8246996878772165e-05, "loss": 0.5051, "step": 18126 }, { "epoch": 0.3844457169519204, "grad_norm": 0.35753461718559265, "learning_rate": 1.8246808258940735e-05, "loss": 0.4855, "step": 18127 }, { "epoch": 0.38446692540985344, "grad_norm": 0.3215380012989044, "learning_rate": 1.8246619629937276e-05, "loss": 0.5216, "step": 18128 }, { "epoch": 0.38448813386778646, "grad_norm": 0.37570828199386597, "learning_rate": 1.8246430991761995e-05, "loss": 0.4591, "step": 18129 }, { "epoch": 0.3845093423257195, "grad_norm": 0.4231160283088684, "learning_rate": 1.824624234441511e-05, "loss": 0.5802, "step": 18130 }, { "epoch": 0.3845305507836525, "grad_norm": 0.34771737456321716, "learning_rate": 1.824605368789682e-05, "loss": 0.4755, "step": 18131 }, { "epoch": 0.3845517592415855, "grad_norm": 0.4829665720462799, "learning_rate": 1.8245865022207346e-05, "loss": 0.5414, "step": 18132 }, { "epoch": 0.38457296769951854, "grad_norm": 0.3507721424102783, "learning_rate": 1.824567634734689e-05, "loss": 0.5086, "step": 18133 }, { "epoch": 0.3845941761574516, "grad_norm": 0.3517869710922241, "learning_rate": 1.8245487663315663e-05, "loss": 0.4878, "step": 18134 }, { "epoch": 0.38461538461538464, "grad_norm": 0.31509166955947876, "learning_rate": 1.824529897011388e-05, "loss": 0.4531, "step": 18135 }, { "epoch": 0.38463659307331766, "grad_norm": 0.38086065649986267, "learning_rate": 1.8245110267741744e-05, "loss": 0.5074, "step": 18136 }, { "epoch": 0.3846578015312507, "grad_norm": 0.3411465585231781, "learning_rate": 1.824492155619947e-05, "loss": 0.4594, "step": 18137 }, { "epoch": 0.3846790099891837, "grad_norm": 0.6454589366912842, "learning_rate": 1.824473283548726e-05, "loss": 0.4329, "step": 18138 }, { "epoch": 0.3847002184471167, "grad_norm": 0.32961583137512207, "learning_rate": 1.824454410560534e-05, "loss": 0.5098, "step": 18139 }, { "epoch": 0.38472142690504973, "grad_norm": 0.8740726113319397, "learning_rate": 1.8244355366553902e-05, "loss": 0.5179, "step": 18140 }, { "epoch": 0.38474263536298275, "grad_norm": 0.39775943756103516, "learning_rate": 1.8244166618333165e-05, "loss": 0.5101, "step": 18141 }, { "epoch": 0.3847638438209158, "grad_norm": 0.37403804063796997, "learning_rate": 1.824397786094334e-05, "loss": 0.4321, "step": 18142 }, { "epoch": 0.3847850522788488, "grad_norm": 0.3492080271244049, "learning_rate": 1.8243789094384634e-05, "loss": 0.5559, "step": 18143 }, { "epoch": 0.3848062607367818, "grad_norm": 0.33164089918136597, "learning_rate": 1.8243600318657257e-05, "loss": 0.5285, "step": 18144 }, { "epoch": 0.38482746919471483, "grad_norm": 0.39178022742271423, "learning_rate": 1.8243411533761416e-05, "loss": 0.4299, "step": 18145 }, { "epoch": 0.38484867765264785, "grad_norm": 0.3729160726070404, "learning_rate": 1.8243222739697327e-05, "loss": 0.4304, "step": 18146 }, { "epoch": 0.38486988611058087, "grad_norm": 0.32562586665153503, "learning_rate": 1.82430339364652e-05, "loss": 0.4515, "step": 18147 }, { "epoch": 0.38489109456851395, "grad_norm": 0.37150293588638306, "learning_rate": 1.8242845124065242e-05, "loss": 0.5587, "step": 18148 }, { "epoch": 0.38491230302644697, "grad_norm": 0.3453338146209717, "learning_rate": 1.8242656302497658e-05, "loss": 0.4992, "step": 18149 }, { "epoch": 0.38493351148438, "grad_norm": 0.31555959582328796, "learning_rate": 1.8242467471762667e-05, "loss": 0.5869, "step": 18150 }, { "epoch": 0.384954719942313, "grad_norm": 0.3489416241645813, "learning_rate": 1.8242278631860473e-05, "loss": 0.5033, "step": 18151 }, { "epoch": 0.384975928400246, "grad_norm": 0.41567444801330566, "learning_rate": 1.824208978279129e-05, "loss": 0.5064, "step": 18152 }, { "epoch": 0.38499713685817905, "grad_norm": 0.3648994565010071, "learning_rate": 1.8241900924555326e-05, "loss": 0.5044, "step": 18153 }, { "epoch": 0.38501834531611207, "grad_norm": 0.3525589108467102, "learning_rate": 1.824171205715279e-05, "loss": 0.5034, "step": 18154 }, { "epoch": 0.3850395537740451, "grad_norm": 0.7009634971618652, "learning_rate": 1.82415231805839e-05, "loss": 0.4675, "step": 18155 }, { "epoch": 0.3850607622319781, "grad_norm": 0.3554793894290924, "learning_rate": 1.8241334294848853e-05, "loss": 0.4725, "step": 18156 }, { "epoch": 0.3850819706899111, "grad_norm": 0.32492974400520325, "learning_rate": 1.8241145399947867e-05, "loss": 0.5021, "step": 18157 }, { "epoch": 0.38510317914784414, "grad_norm": 0.34422922134399414, "learning_rate": 1.8240956495881148e-05, "loss": 0.4836, "step": 18158 }, { "epoch": 0.38512438760577716, "grad_norm": 0.34301242232322693, "learning_rate": 1.8240767582648912e-05, "loss": 0.5261, "step": 18159 }, { "epoch": 0.3851455960637102, "grad_norm": 0.348638117313385, "learning_rate": 1.8240578660251363e-05, "loss": 0.5436, "step": 18160 }, { "epoch": 0.38516680452164326, "grad_norm": 0.34568676352500916, "learning_rate": 1.8240389728688718e-05, "loss": 0.5152, "step": 18161 }, { "epoch": 0.3851880129795763, "grad_norm": 0.31868958473205566, "learning_rate": 1.8240200787961183e-05, "loss": 0.4451, "step": 18162 }, { "epoch": 0.3852092214375093, "grad_norm": 0.34861740469932556, "learning_rate": 1.8240011838068965e-05, "loss": 0.5654, "step": 18163 }, { "epoch": 0.3852304298954423, "grad_norm": 0.3262016475200653, "learning_rate": 1.8239822879012278e-05, "loss": 0.5506, "step": 18164 }, { "epoch": 0.38525163835337534, "grad_norm": 0.39082396030426025, "learning_rate": 1.8239633910791334e-05, "loss": 0.6133, "step": 18165 }, { "epoch": 0.38527284681130836, "grad_norm": 0.3699967563152313, "learning_rate": 1.823944493340634e-05, "loss": 0.4519, "step": 18166 }, { "epoch": 0.3852940552692414, "grad_norm": 0.3187629282474518, "learning_rate": 1.8239255946857505e-05, "loss": 0.4898, "step": 18167 }, { "epoch": 0.3853152637271744, "grad_norm": 0.3955283761024475, "learning_rate": 1.8239066951145043e-05, "loss": 0.5696, "step": 18168 }, { "epoch": 0.3853364721851074, "grad_norm": 0.35841992497444153, "learning_rate": 1.8238877946269165e-05, "loss": 0.5691, "step": 18169 }, { "epoch": 0.38535768064304043, "grad_norm": 0.3403528928756714, "learning_rate": 1.823868893223007e-05, "loss": 0.6095, "step": 18170 }, { "epoch": 0.38537888910097345, "grad_norm": 0.3369486927986145, "learning_rate": 1.8238499909027985e-05, "loss": 0.4623, "step": 18171 }, { "epoch": 0.3854000975589065, "grad_norm": 0.41985729336738586, "learning_rate": 1.823831087666311e-05, "loss": 0.4422, "step": 18172 }, { "epoch": 0.3854213060168395, "grad_norm": 0.33271029591560364, "learning_rate": 1.8238121835135656e-05, "loss": 0.4771, "step": 18173 }, { "epoch": 0.3854425144747725, "grad_norm": 0.36675843596458435, "learning_rate": 1.8237932784445833e-05, "loss": 0.525, "step": 18174 }, { "epoch": 0.3854637229327056, "grad_norm": 0.391148179769516, "learning_rate": 1.8237743724593854e-05, "loss": 0.4468, "step": 18175 }, { "epoch": 0.3854849313906386, "grad_norm": 0.8362887501716614, "learning_rate": 1.823755465557993e-05, "loss": 0.5608, "step": 18176 }, { "epoch": 0.3855061398485716, "grad_norm": 0.3151387870311737, "learning_rate": 1.8237365577404268e-05, "loss": 0.5007, "step": 18177 }, { "epoch": 0.38552734830650465, "grad_norm": 0.3418351709842682, "learning_rate": 1.823717649006708e-05, "loss": 0.5594, "step": 18178 }, { "epoch": 0.38554855676443767, "grad_norm": 0.3411295413970947, "learning_rate": 1.8236987393568576e-05, "loss": 0.4895, "step": 18179 }, { "epoch": 0.3855697652223707, "grad_norm": 0.3626096248626709, "learning_rate": 1.8236798287908964e-05, "loss": 0.469, "step": 18180 }, { "epoch": 0.3855909736803037, "grad_norm": 0.36727818846702576, "learning_rate": 1.8236609173088457e-05, "loss": 0.4254, "step": 18181 }, { "epoch": 0.3856121821382367, "grad_norm": 0.4463154673576355, "learning_rate": 1.8236420049107265e-05, "loss": 0.5175, "step": 18182 }, { "epoch": 0.38563339059616975, "grad_norm": 0.34861814975738525, "learning_rate": 1.82362309159656e-05, "loss": 0.4802, "step": 18183 }, { "epoch": 0.38565459905410276, "grad_norm": 0.37211206555366516, "learning_rate": 1.823604177366367e-05, "loss": 0.5966, "step": 18184 }, { "epoch": 0.3856758075120358, "grad_norm": 0.388674795627594, "learning_rate": 1.8235852622201683e-05, "loss": 0.4505, "step": 18185 }, { "epoch": 0.3856970159699688, "grad_norm": 0.3504346013069153, "learning_rate": 1.8235663461579855e-05, "loss": 0.521, "step": 18186 }, { "epoch": 0.3857182244279018, "grad_norm": 0.359552800655365, "learning_rate": 1.8235474291798396e-05, "loss": 0.4846, "step": 18187 }, { "epoch": 0.3857394328858349, "grad_norm": 0.36112675070762634, "learning_rate": 1.8235285112857508e-05, "loss": 0.4475, "step": 18188 }, { "epoch": 0.3857606413437679, "grad_norm": 0.35673096776008606, "learning_rate": 1.823509592475741e-05, "loss": 0.4944, "step": 18189 }, { "epoch": 0.38578184980170094, "grad_norm": 0.31999820470809937, "learning_rate": 1.8234906727498313e-05, "loss": 0.5279, "step": 18190 }, { "epoch": 0.38580305825963396, "grad_norm": 0.3070274591445923, "learning_rate": 1.8234717521080422e-05, "loss": 0.4505, "step": 18191 }, { "epoch": 0.385824266717567, "grad_norm": 0.35263749957084656, "learning_rate": 1.823452830550395e-05, "loss": 0.4078, "step": 18192 }, { "epoch": 0.3858454751755, "grad_norm": 0.32463470101356506, "learning_rate": 1.8234339080769104e-05, "loss": 0.491, "step": 18193 }, { "epoch": 0.385866683633433, "grad_norm": 0.3534872233867645, "learning_rate": 1.82341498468761e-05, "loss": 0.5445, "step": 18194 }, { "epoch": 0.38588789209136604, "grad_norm": 0.3949921727180481, "learning_rate": 1.823396060382515e-05, "loss": 0.4499, "step": 18195 }, { "epoch": 0.38590910054929906, "grad_norm": 0.3789861798286438, "learning_rate": 1.8233771351616453e-05, "loss": 0.5526, "step": 18196 }, { "epoch": 0.3859303090072321, "grad_norm": 0.3238266110420227, "learning_rate": 1.823358209025023e-05, "loss": 0.4618, "step": 18197 }, { "epoch": 0.3859515174651651, "grad_norm": 0.4624894857406616, "learning_rate": 1.823339281972669e-05, "loss": 0.5493, "step": 18198 }, { "epoch": 0.3859727259230981, "grad_norm": 0.34450310468673706, "learning_rate": 1.8233203540046044e-05, "loss": 0.5393, "step": 18199 }, { "epoch": 0.38599393438103113, "grad_norm": 0.34602394700050354, "learning_rate": 1.8233014251208496e-05, "loss": 0.4892, "step": 18200 }, { "epoch": 0.38601514283896415, "grad_norm": 0.347184419631958, "learning_rate": 1.823282495321426e-05, "loss": 0.4907, "step": 18201 }, { "epoch": 0.38603635129689723, "grad_norm": 0.34066709876060486, "learning_rate": 1.8232635646063553e-05, "loss": 0.5172, "step": 18202 }, { "epoch": 0.38605755975483025, "grad_norm": 0.4267389178276062, "learning_rate": 1.8232446329756578e-05, "loss": 0.494, "step": 18203 }, { "epoch": 0.38607876821276327, "grad_norm": 0.3450554311275482, "learning_rate": 1.8232257004293546e-05, "loss": 0.4865, "step": 18204 }, { "epoch": 0.3860999766706963, "grad_norm": 0.3273110091686249, "learning_rate": 1.8232067669674674e-05, "loss": 0.4555, "step": 18205 }, { "epoch": 0.3861211851286293, "grad_norm": 0.34101417660713196, "learning_rate": 1.8231878325900162e-05, "loss": 0.4736, "step": 18206 }, { "epoch": 0.3861423935865623, "grad_norm": 0.3256297707557678, "learning_rate": 1.823168897297023e-05, "loss": 0.5685, "step": 18207 }, { "epoch": 0.38616360204449535, "grad_norm": 0.34256643056869507, "learning_rate": 1.8231499610885085e-05, "loss": 0.5317, "step": 18208 }, { "epoch": 0.38618481050242837, "grad_norm": 0.36145976185798645, "learning_rate": 1.8231310239644937e-05, "loss": 0.5394, "step": 18209 }, { "epoch": 0.3862060189603614, "grad_norm": 0.3138416111469269, "learning_rate": 1.8231120859249995e-05, "loss": 0.3996, "step": 18210 }, { "epoch": 0.3862272274182944, "grad_norm": 0.38191932439804077, "learning_rate": 1.8230931469700474e-05, "loss": 0.468, "step": 18211 }, { "epoch": 0.3862484358762274, "grad_norm": 0.9313735365867615, "learning_rate": 1.823074207099658e-05, "loss": 0.5954, "step": 18212 }, { "epoch": 0.38626964433416044, "grad_norm": 0.3398679792881012, "learning_rate": 1.823055266313853e-05, "loss": 0.4202, "step": 18213 }, { "epoch": 0.38629085279209346, "grad_norm": 0.3587783873081207, "learning_rate": 1.823036324612653e-05, "loss": 0.5319, "step": 18214 }, { "epoch": 0.3863120612500265, "grad_norm": 0.32756975293159485, "learning_rate": 1.823017381996079e-05, "loss": 0.5537, "step": 18215 }, { "epoch": 0.38633326970795956, "grad_norm": 0.3337116241455078, "learning_rate": 1.8229984384641523e-05, "loss": 0.464, "step": 18216 }, { "epoch": 0.3863544781658926, "grad_norm": 0.3552756905555725, "learning_rate": 1.822979494016894e-05, "loss": 0.5264, "step": 18217 }, { "epoch": 0.3863756866238256, "grad_norm": 0.45276734232902527, "learning_rate": 1.822960548654325e-05, "loss": 0.4932, "step": 18218 }, { "epoch": 0.3863968950817586, "grad_norm": 0.4104849696159363, "learning_rate": 1.8229416023764664e-05, "loss": 0.4961, "step": 18219 }, { "epoch": 0.38641810353969164, "grad_norm": 0.41848984360694885, "learning_rate": 1.8229226551833392e-05, "loss": 0.6015, "step": 18220 }, { "epoch": 0.38643931199762466, "grad_norm": 0.477382630109787, "learning_rate": 1.8229037070749646e-05, "loss": 0.5176, "step": 18221 }, { "epoch": 0.3864605204555577, "grad_norm": 0.4394072890281677, "learning_rate": 1.8228847580513637e-05, "loss": 0.6108, "step": 18222 }, { "epoch": 0.3864817289134907, "grad_norm": 0.3334377110004425, "learning_rate": 1.8228658081125576e-05, "loss": 0.4614, "step": 18223 }, { "epoch": 0.3865029373714237, "grad_norm": 0.3492886424064636, "learning_rate": 1.8228468572585672e-05, "loss": 0.4618, "step": 18224 }, { "epoch": 0.38652414582935674, "grad_norm": 0.42074960470199585, "learning_rate": 1.8228279054894137e-05, "loss": 0.4914, "step": 18225 }, { "epoch": 0.38654535428728976, "grad_norm": 0.305004745721817, "learning_rate": 1.822808952805118e-05, "loss": 0.4729, "step": 18226 }, { "epoch": 0.3865665627452228, "grad_norm": 0.31326115131378174, "learning_rate": 1.8227899992057015e-05, "loss": 0.4814, "step": 18227 }, { "epoch": 0.3865877712031558, "grad_norm": 0.45156362652778625, "learning_rate": 1.822771044691185e-05, "loss": 0.5521, "step": 18228 }, { "epoch": 0.38660897966108887, "grad_norm": 0.3595138490200043, "learning_rate": 1.82275208926159e-05, "loss": 0.5944, "step": 18229 }, { "epoch": 0.3866301881190219, "grad_norm": 0.3557996451854706, "learning_rate": 1.822733132916937e-05, "loss": 0.4727, "step": 18230 }, { "epoch": 0.3866513965769549, "grad_norm": 0.3283630907535553, "learning_rate": 1.8227141756572474e-05, "loss": 0.4524, "step": 18231 }, { "epoch": 0.38667260503488793, "grad_norm": 0.38343024253845215, "learning_rate": 1.822695217482542e-05, "loss": 0.5329, "step": 18232 }, { "epoch": 0.38669381349282095, "grad_norm": 0.36523598432540894, "learning_rate": 1.8226762583928423e-05, "loss": 0.5773, "step": 18233 }, { "epoch": 0.38671502195075397, "grad_norm": 0.3287723660469055, "learning_rate": 1.8226572983881694e-05, "loss": 0.5125, "step": 18234 }, { "epoch": 0.386736230408687, "grad_norm": 0.37034571170806885, "learning_rate": 1.822638337468544e-05, "loss": 0.5093, "step": 18235 }, { "epoch": 0.38675743886662, "grad_norm": 0.3614319860935211, "learning_rate": 1.8226193756339875e-05, "loss": 0.5082, "step": 18236 }, { "epoch": 0.386778647324553, "grad_norm": 0.4063490629196167, "learning_rate": 1.822600412884521e-05, "loss": 0.5562, "step": 18237 }, { "epoch": 0.38679985578248605, "grad_norm": 0.3551768362522125, "learning_rate": 1.8225814492201653e-05, "loss": 0.4912, "step": 18238 }, { "epoch": 0.38682106424041907, "grad_norm": 0.32163500785827637, "learning_rate": 1.8225624846409415e-05, "loss": 0.5443, "step": 18239 }, { "epoch": 0.3868422726983521, "grad_norm": 0.3360048234462738, "learning_rate": 1.822543519146871e-05, "loss": 0.4938, "step": 18240 }, { "epoch": 0.3868634811562851, "grad_norm": 0.4025512635707855, "learning_rate": 1.8225245527379748e-05, "loss": 0.5088, "step": 18241 }, { "epoch": 0.3868846896142181, "grad_norm": 0.4488373398780823, "learning_rate": 1.8225055854142738e-05, "loss": 0.5061, "step": 18242 }, { "epoch": 0.3869058980721512, "grad_norm": 0.33481013774871826, "learning_rate": 1.8224866171757895e-05, "loss": 0.535, "step": 18243 }, { "epoch": 0.3869271065300842, "grad_norm": 0.3568243980407715, "learning_rate": 1.8224676480225424e-05, "loss": 0.4781, "step": 18244 }, { "epoch": 0.38694831498801724, "grad_norm": 0.40732041001319885, "learning_rate": 1.822448677954554e-05, "loss": 0.58, "step": 18245 }, { "epoch": 0.38696952344595026, "grad_norm": 0.37955397367477417, "learning_rate": 1.8224297069718458e-05, "loss": 0.5555, "step": 18246 }, { "epoch": 0.3869907319038833, "grad_norm": 0.34778299927711487, "learning_rate": 1.8224107350744376e-05, "loss": 0.4844, "step": 18247 }, { "epoch": 0.3870119403618163, "grad_norm": 0.32532304525375366, "learning_rate": 1.8223917622623518e-05, "loss": 0.5258, "step": 18248 }, { "epoch": 0.3870331488197493, "grad_norm": 0.4627380073070526, "learning_rate": 1.822372788535609e-05, "loss": 0.5751, "step": 18249 }, { "epoch": 0.38705435727768234, "grad_norm": 0.3348531424999237, "learning_rate": 1.8223538138942304e-05, "loss": 0.3673, "step": 18250 }, { "epoch": 0.38707556573561536, "grad_norm": 0.374689519405365, "learning_rate": 1.822334838338237e-05, "loss": 0.5765, "step": 18251 }, { "epoch": 0.3870967741935484, "grad_norm": 0.303440123796463, "learning_rate": 1.82231586186765e-05, "loss": 0.4467, "step": 18252 }, { "epoch": 0.3871179826514814, "grad_norm": 0.30650657415390015, "learning_rate": 1.8222968844824902e-05, "loss": 0.5283, "step": 18253 }, { "epoch": 0.3871391911094144, "grad_norm": 0.35246947407722473, "learning_rate": 1.8222779061827788e-05, "loss": 0.5364, "step": 18254 }, { "epoch": 0.38716039956734744, "grad_norm": 0.3610282838344574, "learning_rate": 1.8222589269685377e-05, "loss": 0.4794, "step": 18255 }, { "epoch": 0.38718160802528045, "grad_norm": 0.3187767267227173, "learning_rate": 1.822239946839787e-05, "loss": 0.4451, "step": 18256 }, { "epoch": 0.38720281648321353, "grad_norm": 0.31677597761154175, "learning_rate": 1.822220965796548e-05, "loss": 0.4606, "step": 18257 }, { "epoch": 0.38722402494114655, "grad_norm": 0.3289673328399658, "learning_rate": 1.8222019838388422e-05, "loss": 0.5773, "step": 18258 }, { "epoch": 0.38724523339907957, "grad_norm": 0.4041883051395416, "learning_rate": 1.8221830009666903e-05, "loss": 0.5435, "step": 18259 }, { "epoch": 0.3872664418570126, "grad_norm": 0.3655744791030884, "learning_rate": 1.822164017180114e-05, "loss": 0.4569, "step": 18260 }, { "epoch": 0.3872876503149456, "grad_norm": 0.41922909021377563, "learning_rate": 1.8221450324791336e-05, "loss": 0.5573, "step": 18261 }, { "epoch": 0.38730885877287863, "grad_norm": 0.3259885907173157, "learning_rate": 1.8221260468637707e-05, "loss": 0.4612, "step": 18262 }, { "epoch": 0.38733006723081165, "grad_norm": 0.37694498896598816, "learning_rate": 1.8221070603340466e-05, "loss": 0.559, "step": 18263 }, { "epoch": 0.38735127568874467, "grad_norm": 0.328380823135376, "learning_rate": 1.8220880728899822e-05, "loss": 0.4818, "step": 18264 }, { "epoch": 0.3873724841466777, "grad_norm": 0.33127760887145996, "learning_rate": 1.8220690845315984e-05, "loss": 0.5272, "step": 18265 }, { "epoch": 0.3873936926046107, "grad_norm": 0.3548092544078827, "learning_rate": 1.8220500952589163e-05, "loss": 0.5349, "step": 18266 }, { "epoch": 0.3874149010625437, "grad_norm": 0.3432936370372772, "learning_rate": 1.8220311050719573e-05, "loss": 0.513, "step": 18267 }, { "epoch": 0.38743610952047675, "grad_norm": 0.33501318097114563, "learning_rate": 1.822012113970743e-05, "loss": 0.4935, "step": 18268 }, { "epoch": 0.38745731797840977, "grad_norm": 0.3319379687309265, "learning_rate": 1.8219931219552932e-05, "loss": 0.4768, "step": 18269 }, { "epoch": 0.38747852643634284, "grad_norm": 0.37821701169013977, "learning_rate": 1.8219741290256304e-05, "loss": 0.4615, "step": 18270 }, { "epoch": 0.38749973489427586, "grad_norm": 0.3311539888381958, "learning_rate": 1.8219551351817747e-05, "loss": 0.4915, "step": 18271 }, { "epoch": 0.3875209433522089, "grad_norm": 0.5359045267105103, "learning_rate": 1.821936140423748e-05, "loss": 0.5993, "step": 18272 }, { "epoch": 0.3875421518101419, "grad_norm": 0.3117128312587738, "learning_rate": 1.821917144751571e-05, "loss": 0.4734, "step": 18273 }, { "epoch": 0.3875633602680749, "grad_norm": 0.38807252049446106, "learning_rate": 1.8218981481652645e-05, "loss": 0.4524, "step": 18274 }, { "epoch": 0.38758456872600794, "grad_norm": 0.3495355248451233, "learning_rate": 1.8218791506648503e-05, "loss": 0.5463, "step": 18275 }, { "epoch": 0.38760577718394096, "grad_norm": 2.923887252807617, "learning_rate": 1.8218601522503493e-05, "loss": 0.5121, "step": 18276 }, { "epoch": 0.387626985641874, "grad_norm": 0.36044347286224365, "learning_rate": 1.8218411529217825e-05, "loss": 0.4151, "step": 18277 }, { "epoch": 0.387648194099807, "grad_norm": 0.37097764015197754, "learning_rate": 1.821822152679171e-05, "loss": 0.5149, "step": 18278 }, { "epoch": 0.38766940255774, "grad_norm": 0.48778894543647766, "learning_rate": 1.821803151522536e-05, "loss": 0.4857, "step": 18279 }, { "epoch": 0.38769061101567304, "grad_norm": 0.3336310386657715, "learning_rate": 1.821784149451899e-05, "loss": 0.5103, "step": 18280 }, { "epoch": 0.38771181947360606, "grad_norm": 0.45020341873168945, "learning_rate": 1.8217651464672808e-05, "loss": 0.5354, "step": 18281 }, { "epoch": 0.3877330279315391, "grad_norm": 0.4138224422931671, "learning_rate": 1.8217461425687022e-05, "loss": 0.4894, "step": 18282 }, { "epoch": 0.3877542363894721, "grad_norm": 0.42281627655029297, "learning_rate": 1.821727137756185e-05, "loss": 0.5555, "step": 18283 }, { "epoch": 0.38777544484740517, "grad_norm": 0.32718509435653687, "learning_rate": 1.82170813202975e-05, "loss": 0.5165, "step": 18284 }, { "epoch": 0.3877966533053382, "grad_norm": 0.30083978176116943, "learning_rate": 1.821689125389418e-05, "loss": 0.4375, "step": 18285 }, { "epoch": 0.3878178617632712, "grad_norm": 0.33371856808662415, "learning_rate": 1.821670117835211e-05, "loss": 0.5702, "step": 18286 }, { "epoch": 0.38783907022120423, "grad_norm": 0.3452182710170746, "learning_rate": 1.821651109367149e-05, "loss": 0.4922, "step": 18287 }, { "epoch": 0.38786027867913725, "grad_norm": 0.3086915612220764, "learning_rate": 1.8216320999852543e-05, "loss": 0.4095, "step": 18288 }, { "epoch": 0.38788148713707027, "grad_norm": 0.3185839056968689, "learning_rate": 1.821613089689547e-05, "loss": 0.48, "step": 18289 }, { "epoch": 0.3879026955950033, "grad_norm": 0.352306067943573, "learning_rate": 1.8215940784800494e-05, "loss": 0.4823, "step": 18290 }, { "epoch": 0.3879239040529363, "grad_norm": 0.3570045828819275, "learning_rate": 1.8215750663567816e-05, "loss": 0.5408, "step": 18291 }, { "epoch": 0.3879451125108693, "grad_norm": 0.3681991398334503, "learning_rate": 1.8215560533197655e-05, "loss": 0.6057, "step": 18292 }, { "epoch": 0.38796632096880235, "grad_norm": 0.6051591634750366, "learning_rate": 1.8215370393690214e-05, "loss": 0.4478, "step": 18293 }, { "epoch": 0.38798752942673537, "grad_norm": 0.4207191467285156, "learning_rate": 1.8215180245045713e-05, "loss": 0.4414, "step": 18294 }, { "epoch": 0.3880087378846684, "grad_norm": 0.35252147912979126, "learning_rate": 1.821499008726436e-05, "loss": 0.4924, "step": 18295 }, { "epoch": 0.3880299463426014, "grad_norm": 0.48908233642578125, "learning_rate": 1.8214799920346364e-05, "loss": 0.496, "step": 18296 }, { "epoch": 0.3880511548005344, "grad_norm": 0.3485087454319, "learning_rate": 1.821460974429194e-05, "loss": 0.5378, "step": 18297 }, { "epoch": 0.3880723632584675, "grad_norm": 0.3210732936859131, "learning_rate": 1.82144195591013e-05, "loss": 0.4891, "step": 18298 }, { "epoch": 0.3880935717164005, "grad_norm": 0.3571699857711792, "learning_rate": 1.8214229364774653e-05, "loss": 0.553, "step": 18299 }, { "epoch": 0.38811478017433354, "grad_norm": 0.31447070837020874, "learning_rate": 1.821403916131221e-05, "loss": 0.5834, "step": 18300 }, { "epoch": 0.38813598863226656, "grad_norm": 0.3283742368221283, "learning_rate": 1.8213848948714187e-05, "loss": 0.5461, "step": 18301 }, { "epoch": 0.3881571970901996, "grad_norm": 0.43953558802604675, "learning_rate": 1.821365872698079e-05, "loss": 0.4809, "step": 18302 }, { "epoch": 0.3881784055481326, "grad_norm": 0.33886390924453735, "learning_rate": 1.821346849611223e-05, "loss": 0.5646, "step": 18303 }, { "epoch": 0.3881996140060656, "grad_norm": 0.30087220668792725, "learning_rate": 1.8213278256108726e-05, "loss": 0.3857, "step": 18304 }, { "epoch": 0.38822082246399864, "grad_norm": 0.3750661611557007, "learning_rate": 1.8213088006970486e-05, "loss": 0.4838, "step": 18305 }, { "epoch": 0.38824203092193166, "grad_norm": 0.3312169015407562, "learning_rate": 1.821289774869772e-05, "loss": 0.4727, "step": 18306 }, { "epoch": 0.3882632393798647, "grad_norm": 0.32983317971229553, "learning_rate": 1.8212707481290638e-05, "loss": 0.4957, "step": 18307 }, { "epoch": 0.3882844478377977, "grad_norm": 0.3418805003166199, "learning_rate": 1.8212517204749455e-05, "loss": 0.5116, "step": 18308 }, { "epoch": 0.3883056562957307, "grad_norm": 0.40544116497039795, "learning_rate": 1.8212326919074383e-05, "loss": 0.5309, "step": 18309 }, { "epoch": 0.38832686475366374, "grad_norm": 0.3477398455142975, "learning_rate": 1.8212136624265633e-05, "loss": 0.5312, "step": 18310 }, { "epoch": 0.3883480732115968, "grad_norm": 0.38873356580734253, "learning_rate": 1.8211946320323415e-05, "loss": 0.5749, "step": 18311 }, { "epoch": 0.38836928166952983, "grad_norm": 0.39870086312294006, "learning_rate": 1.8211756007247943e-05, "loss": 0.5216, "step": 18312 }, { "epoch": 0.38839049012746285, "grad_norm": 0.3782237768173218, "learning_rate": 1.8211565685039425e-05, "loss": 0.5079, "step": 18313 }, { "epoch": 0.38841169858539587, "grad_norm": 0.3581787347793579, "learning_rate": 1.8211375353698077e-05, "loss": 0.5572, "step": 18314 }, { "epoch": 0.3884329070433289, "grad_norm": 0.45357370376586914, "learning_rate": 1.8211185013224107e-05, "loss": 0.5225, "step": 18315 }, { "epoch": 0.3884541155012619, "grad_norm": 0.3887571394443512, "learning_rate": 1.8210994663617726e-05, "loss": 0.4303, "step": 18316 }, { "epoch": 0.38847532395919493, "grad_norm": 0.3149355947971344, "learning_rate": 1.821080430487915e-05, "loss": 0.5098, "step": 18317 }, { "epoch": 0.38849653241712795, "grad_norm": 0.36624088883399963, "learning_rate": 1.8210613937008592e-05, "loss": 0.5278, "step": 18318 }, { "epoch": 0.38851774087506097, "grad_norm": 0.32233551144599915, "learning_rate": 1.821042356000626e-05, "loss": 0.5026, "step": 18319 }, { "epoch": 0.388538949332994, "grad_norm": 0.3269723653793335, "learning_rate": 1.8210233173872365e-05, "loss": 0.4462, "step": 18320 }, { "epoch": 0.388560157790927, "grad_norm": 0.32952290773391724, "learning_rate": 1.8210042778607117e-05, "loss": 0.5093, "step": 18321 }, { "epoch": 0.38858136624886, "grad_norm": 0.3472285866737366, "learning_rate": 1.8209852374210735e-05, "loss": 0.4913, "step": 18322 }, { "epoch": 0.38860257470679305, "grad_norm": 0.3185048997402191, "learning_rate": 1.8209661960683423e-05, "loss": 0.5365, "step": 18323 }, { "epoch": 0.38862378316472607, "grad_norm": 0.3129807710647583, "learning_rate": 1.82094715380254e-05, "loss": 0.4841, "step": 18324 }, { "epoch": 0.38864499162265914, "grad_norm": 0.3226518929004669, "learning_rate": 1.820928110623687e-05, "loss": 0.4804, "step": 18325 }, { "epoch": 0.38866620008059216, "grad_norm": 0.3569714426994324, "learning_rate": 1.8209090665318052e-05, "loss": 0.5045, "step": 18326 }, { "epoch": 0.3886874085385252, "grad_norm": 0.3242435157299042, "learning_rate": 1.8208900215269156e-05, "loss": 0.4867, "step": 18327 }, { "epoch": 0.3887086169964582, "grad_norm": 0.34794774651527405, "learning_rate": 1.820870975609039e-05, "loss": 0.5293, "step": 18328 }, { "epoch": 0.3887298254543912, "grad_norm": 0.36145544052124023, "learning_rate": 1.8208519287781963e-05, "loss": 0.559, "step": 18329 }, { "epoch": 0.38875103391232424, "grad_norm": 0.5622318387031555, "learning_rate": 1.82083288103441e-05, "loss": 0.5828, "step": 18330 }, { "epoch": 0.38877224237025726, "grad_norm": 0.3156658709049225, "learning_rate": 1.8208138323777006e-05, "loss": 0.5017, "step": 18331 }, { "epoch": 0.3887934508281903, "grad_norm": 0.3666042685508728, "learning_rate": 1.8207947828080884e-05, "loss": 0.5103, "step": 18332 }, { "epoch": 0.3888146592861233, "grad_norm": 0.3538517951965332, "learning_rate": 1.820775732325596e-05, "loss": 0.5429, "step": 18333 }, { "epoch": 0.3888358677440563, "grad_norm": 0.3415152430534363, "learning_rate": 1.8207566809302436e-05, "loss": 0.5074, "step": 18334 }, { "epoch": 0.38885707620198934, "grad_norm": 0.6402155160903931, "learning_rate": 1.820737628622053e-05, "loss": 0.5848, "step": 18335 }, { "epoch": 0.38887828465992236, "grad_norm": 0.3455144166946411, "learning_rate": 1.8207185754010454e-05, "loss": 0.4577, "step": 18336 }, { "epoch": 0.3888994931178554, "grad_norm": 0.37937501072883606, "learning_rate": 1.8206995212672412e-05, "loss": 0.5519, "step": 18337 }, { "epoch": 0.3889207015757884, "grad_norm": 0.36193785071372986, "learning_rate": 1.8206804662206625e-05, "loss": 0.5118, "step": 18338 }, { "epoch": 0.38894191003372147, "grad_norm": 0.38118603825569153, "learning_rate": 1.82066141026133e-05, "loss": 0.5113, "step": 18339 }, { "epoch": 0.3889631184916545, "grad_norm": 0.3591383397579193, "learning_rate": 1.8206423533892648e-05, "loss": 0.4414, "step": 18340 }, { "epoch": 0.3889843269495875, "grad_norm": 0.6165856122970581, "learning_rate": 1.8206232956044884e-05, "loss": 0.4629, "step": 18341 }, { "epoch": 0.38900553540752053, "grad_norm": 0.34243062138557434, "learning_rate": 1.820604236907022e-05, "loss": 0.4847, "step": 18342 }, { "epoch": 0.38902674386545355, "grad_norm": 0.3033396303653717, "learning_rate": 1.8205851772968865e-05, "loss": 0.4521, "step": 18343 }, { "epoch": 0.38904795232338657, "grad_norm": 0.35935255885124207, "learning_rate": 1.8205661167741036e-05, "loss": 0.5117, "step": 18344 }, { "epoch": 0.3890691607813196, "grad_norm": 0.3656904399394989, "learning_rate": 1.820547055338694e-05, "loss": 0.4782, "step": 18345 }, { "epoch": 0.3890903692392526, "grad_norm": 0.35763999819755554, "learning_rate": 1.820527992990679e-05, "loss": 0.4716, "step": 18346 }, { "epoch": 0.38911157769718563, "grad_norm": 0.31080693006515503, "learning_rate": 1.82050892973008e-05, "loss": 0.4874, "step": 18347 }, { "epoch": 0.38913278615511865, "grad_norm": 0.34514954686164856, "learning_rate": 1.8204898655569183e-05, "loss": 0.5177, "step": 18348 }, { "epoch": 0.38915399461305167, "grad_norm": 0.32808974385261536, "learning_rate": 1.8204708004712147e-05, "loss": 0.4867, "step": 18349 }, { "epoch": 0.3891752030709847, "grad_norm": 0.340457022190094, "learning_rate": 1.8204517344729905e-05, "loss": 0.4811, "step": 18350 }, { "epoch": 0.3891964115289177, "grad_norm": 0.349171906709671, "learning_rate": 1.8204326675622673e-05, "loss": 0.502, "step": 18351 }, { "epoch": 0.3892176199868508, "grad_norm": 0.34052807092666626, "learning_rate": 1.820413599739066e-05, "loss": 0.4796, "step": 18352 }, { "epoch": 0.3892388284447838, "grad_norm": 0.3903140425682068, "learning_rate": 1.8203945310034076e-05, "loss": 0.5084, "step": 18353 }, { "epoch": 0.3892600369027168, "grad_norm": 0.36341938376426697, "learning_rate": 1.8203754613553134e-05, "loss": 0.5332, "step": 18354 }, { "epoch": 0.38928124536064984, "grad_norm": 0.36976802349090576, "learning_rate": 1.820356390794805e-05, "loss": 0.5505, "step": 18355 }, { "epoch": 0.38930245381858286, "grad_norm": 0.34555524587631226, "learning_rate": 1.8203373193219032e-05, "loss": 0.4979, "step": 18356 }, { "epoch": 0.3893236622765159, "grad_norm": 0.3515811264514923, "learning_rate": 1.8203182469366295e-05, "loss": 0.545, "step": 18357 }, { "epoch": 0.3893448707344489, "grad_norm": 0.3535092771053314, "learning_rate": 1.8202991736390048e-05, "loss": 0.4749, "step": 18358 }, { "epoch": 0.3893660791923819, "grad_norm": 0.3712409734725952, "learning_rate": 1.8202800994290507e-05, "loss": 0.4167, "step": 18359 }, { "epoch": 0.38938728765031494, "grad_norm": 0.5424154996871948, "learning_rate": 1.8202610243067883e-05, "loss": 0.4463, "step": 18360 }, { "epoch": 0.38940849610824796, "grad_norm": 0.35545507073402405, "learning_rate": 1.8202419482722385e-05, "loss": 0.5676, "step": 18361 }, { "epoch": 0.389429704566181, "grad_norm": 0.32150545716285706, "learning_rate": 1.8202228713254224e-05, "loss": 0.5232, "step": 18362 }, { "epoch": 0.389450913024114, "grad_norm": 0.36207714676856995, "learning_rate": 1.820203793466362e-05, "loss": 0.5373, "step": 18363 }, { "epoch": 0.389472121482047, "grad_norm": 0.3438878357410431, "learning_rate": 1.8201847146950778e-05, "loss": 0.5588, "step": 18364 }, { "epoch": 0.38949332993998004, "grad_norm": 0.5490902066230774, "learning_rate": 1.8201656350115917e-05, "loss": 0.6173, "step": 18365 }, { "epoch": 0.3895145383979131, "grad_norm": 0.3724428713321686, "learning_rate": 1.8201465544159244e-05, "loss": 0.5437, "step": 18366 }, { "epoch": 0.38953574685584613, "grad_norm": 0.3577810227870941, "learning_rate": 1.820127472908097e-05, "loss": 0.5105, "step": 18367 }, { "epoch": 0.38955695531377915, "grad_norm": 0.36696603894233704, "learning_rate": 1.8201083904881307e-05, "loss": 0.5322, "step": 18368 }, { "epoch": 0.38957816377171217, "grad_norm": 0.3424116373062134, "learning_rate": 1.8200893071560475e-05, "loss": 0.5661, "step": 18369 }, { "epoch": 0.3895993722296452, "grad_norm": 0.33176329731941223, "learning_rate": 1.8200702229118677e-05, "loss": 0.4786, "step": 18370 }, { "epoch": 0.3896205806875782, "grad_norm": 1.2825695276260376, "learning_rate": 1.820051137755613e-05, "loss": 0.4602, "step": 18371 }, { "epoch": 0.38964178914551123, "grad_norm": 0.3112931549549103, "learning_rate": 1.8200320516873047e-05, "loss": 0.4377, "step": 18372 }, { "epoch": 0.38966299760344425, "grad_norm": 0.324360728263855, "learning_rate": 1.820012964706964e-05, "loss": 0.53, "step": 18373 }, { "epoch": 0.38968420606137727, "grad_norm": 0.3394027650356293, "learning_rate": 1.8199938768146118e-05, "loss": 0.5021, "step": 18374 }, { "epoch": 0.3897054145193103, "grad_norm": 0.34097820520401, "learning_rate": 1.8199747880102696e-05, "loss": 0.5318, "step": 18375 }, { "epoch": 0.3897266229772433, "grad_norm": 0.3511081039905548, "learning_rate": 1.8199556982939584e-05, "loss": 0.5951, "step": 18376 }, { "epoch": 0.38974783143517633, "grad_norm": 0.36574167013168335, "learning_rate": 1.8199366076656998e-05, "loss": 0.476, "step": 18377 }, { "epoch": 0.38976903989310935, "grad_norm": 0.3386465311050415, "learning_rate": 1.819917516125515e-05, "loss": 0.4491, "step": 18378 }, { "epoch": 0.3897902483510424, "grad_norm": 0.3888372778892517, "learning_rate": 1.8198984236734246e-05, "loss": 0.5506, "step": 18379 }, { "epoch": 0.38981145680897544, "grad_norm": 0.3396448791027069, "learning_rate": 1.8198793303094508e-05, "loss": 0.5441, "step": 18380 }, { "epoch": 0.38983266526690846, "grad_norm": 0.4255518913269043, "learning_rate": 1.8198602360336138e-05, "loss": 0.6676, "step": 18381 }, { "epoch": 0.3898538737248415, "grad_norm": 0.3397933840751648, "learning_rate": 1.8198411408459357e-05, "loss": 0.5044, "step": 18382 }, { "epoch": 0.3898750821827745, "grad_norm": 0.35499444603919983, "learning_rate": 1.8198220447464375e-05, "loss": 0.5146, "step": 18383 }, { "epoch": 0.3898962906407075, "grad_norm": 0.3076530694961548, "learning_rate": 1.81980294773514e-05, "loss": 0.5132, "step": 18384 }, { "epoch": 0.38991749909864054, "grad_norm": 0.34737303853034973, "learning_rate": 1.819783849812065e-05, "loss": 0.5463, "step": 18385 }, { "epoch": 0.38993870755657356, "grad_norm": 0.43521684408187866, "learning_rate": 1.8197647509772334e-05, "loss": 0.5268, "step": 18386 }, { "epoch": 0.3899599160145066, "grad_norm": 0.3135181963443756, "learning_rate": 1.8197456512306665e-05, "loss": 0.5354, "step": 18387 }, { "epoch": 0.3899811244724396, "grad_norm": 0.3624984323978424, "learning_rate": 1.8197265505723857e-05, "loss": 0.4674, "step": 18388 }, { "epoch": 0.3900023329303726, "grad_norm": 0.3584742844104767, "learning_rate": 1.8197074490024126e-05, "loss": 0.4731, "step": 18389 }, { "epoch": 0.39002354138830564, "grad_norm": 0.33595120906829834, "learning_rate": 1.8196883465207675e-05, "loss": 0.4642, "step": 18390 }, { "epoch": 0.39004474984623866, "grad_norm": 0.34261611104011536, "learning_rate": 1.8196692431274722e-05, "loss": 0.5038, "step": 18391 }, { "epoch": 0.3900659583041717, "grad_norm": 0.33753156661987305, "learning_rate": 1.819650138822548e-05, "loss": 0.5305, "step": 18392 }, { "epoch": 0.39008716676210475, "grad_norm": 0.3389052748680115, "learning_rate": 1.819631033606016e-05, "loss": 0.4848, "step": 18393 }, { "epoch": 0.3901083752200378, "grad_norm": 0.3347790837287903, "learning_rate": 1.8196119274778974e-05, "loss": 0.5317, "step": 18394 }, { "epoch": 0.3901295836779708, "grad_norm": 0.33854588866233826, "learning_rate": 1.8195928204382135e-05, "loss": 0.4189, "step": 18395 }, { "epoch": 0.3901507921359038, "grad_norm": 0.325058251619339, "learning_rate": 1.8195737124869855e-05, "loss": 0.5325, "step": 18396 }, { "epoch": 0.39017200059383683, "grad_norm": 0.37874290347099304, "learning_rate": 1.8195546036242352e-05, "loss": 0.5042, "step": 18397 }, { "epoch": 0.39019320905176985, "grad_norm": 0.31630799174308777, "learning_rate": 1.819535493849983e-05, "loss": 0.4712, "step": 18398 }, { "epoch": 0.39021441750970287, "grad_norm": 0.34045863151550293, "learning_rate": 1.8195163831642505e-05, "loss": 0.5421, "step": 18399 }, { "epoch": 0.3902356259676359, "grad_norm": 0.37213948369026184, "learning_rate": 1.8194972715670593e-05, "loss": 0.5997, "step": 18400 }, { "epoch": 0.3902568344255689, "grad_norm": 0.3727538585662842, "learning_rate": 1.8194781590584302e-05, "loss": 0.5357, "step": 18401 }, { "epoch": 0.39027804288350193, "grad_norm": 0.3756076395511627, "learning_rate": 1.8194590456383845e-05, "loss": 0.5299, "step": 18402 }, { "epoch": 0.39029925134143495, "grad_norm": 0.3435829281806946, "learning_rate": 1.819439931306944e-05, "loss": 0.4576, "step": 18403 }, { "epoch": 0.39032045979936797, "grad_norm": 0.3499285578727722, "learning_rate": 1.819420816064129e-05, "loss": 0.5443, "step": 18404 }, { "epoch": 0.390341668257301, "grad_norm": 0.36523008346557617, "learning_rate": 1.8194016999099613e-05, "loss": 0.589, "step": 18405 }, { "epoch": 0.390362876715234, "grad_norm": 0.3287275433540344, "learning_rate": 1.8193825828444623e-05, "loss": 0.5105, "step": 18406 }, { "epoch": 0.3903840851731671, "grad_norm": 0.37222519516944885, "learning_rate": 1.819363464867653e-05, "loss": 0.5248, "step": 18407 }, { "epoch": 0.3904052936311001, "grad_norm": 0.3680386245250702, "learning_rate": 1.819344345979555e-05, "loss": 0.4953, "step": 18408 }, { "epoch": 0.3904265020890331, "grad_norm": 0.3436610698699951, "learning_rate": 1.8193252261801895e-05, "loss": 0.5283, "step": 18409 }, { "epoch": 0.39044771054696614, "grad_norm": 0.383563756942749, "learning_rate": 1.8193061054695774e-05, "loss": 0.4858, "step": 18410 }, { "epoch": 0.39046891900489916, "grad_norm": 0.4370286762714386, "learning_rate": 1.8192869838477398e-05, "loss": 0.5522, "step": 18411 }, { "epoch": 0.3904901274628322, "grad_norm": 0.32979056239128113, "learning_rate": 1.8192678613146987e-05, "loss": 0.4822, "step": 18412 }, { "epoch": 0.3905113359207652, "grad_norm": 0.3358859717845917, "learning_rate": 1.819248737870475e-05, "loss": 0.4891, "step": 18413 }, { "epoch": 0.3905325443786982, "grad_norm": 0.3327777683734894, "learning_rate": 1.81922961351509e-05, "loss": 0.4768, "step": 18414 }, { "epoch": 0.39055375283663124, "grad_norm": 0.41228675842285156, "learning_rate": 1.819210488248565e-05, "loss": 0.4997, "step": 18415 }, { "epoch": 0.39057496129456426, "grad_norm": 0.4138122498989105, "learning_rate": 1.819191362070921e-05, "loss": 0.457, "step": 18416 }, { "epoch": 0.3905961697524973, "grad_norm": 0.3209961950778961, "learning_rate": 1.8191722349821796e-05, "loss": 0.5225, "step": 18417 }, { "epoch": 0.3906173782104303, "grad_norm": 0.39185696840286255, "learning_rate": 1.8191531069823618e-05, "loss": 0.5294, "step": 18418 }, { "epoch": 0.3906385866683633, "grad_norm": 0.3284185230731964, "learning_rate": 1.8191339780714892e-05, "loss": 0.5415, "step": 18419 }, { "epoch": 0.3906597951262964, "grad_norm": 0.3953770399093628, "learning_rate": 1.8191148482495826e-05, "loss": 0.4663, "step": 18420 }, { "epoch": 0.3906810035842294, "grad_norm": 0.3299800455570221, "learning_rate": 1.819095717516664e-05, "loss": 0.5534, "step": 18421 }, { "epoch": 0.39070221204216243, "grad_norm": 0.3110548257827759, "learning_rate": 1.8190765858727538e-05, "loss": 0.4243, "step": 18422 }, { "epoch": 0.39072342050009545, "grad_norm": 0.3447894752025604, "learning_rate": 1.8190574533178743e-05, "loss": 0.4963, "step": 18423 }, { "epoch": 0.3907446289580285, "grad_norm": 0.4971342384815216, "learning_rate": 1.819038319852046e-05, "loss": 0.5045, "step": 18424 }, { "epoch": 0.3907658374159615, "grad_norm": 0.34164896607398987, "learning_rate": 1.8190191854752903e-05, "loss": 0.5158, "step": 18425 }, { "epoch": 0.3907870458738945, "grad_norm": 0.6574715971946716, "learning_rate": 1.8190000501876285e-05, "loss": 0.4812, "step": 18426 }, { "epoch": 0.39080825433182753, "grad_norm": 0.3266459107398987, "learning_rate": 1.8189809139890823e-05, "loss": 0.4824, "step": 18427 }, { "epoch": 0.39082946278976055, "grad_norm": 0.3550536632537842, "learning_rate": 1.818961776879672e-05, "loss": 0.4507, "step": 18428 }, { "epoch": 0.39085067124769357, "grad_norm": 0.3551878333091736, "learning_rate": 1.81894263885942e-05, "loss": 0.4736, "step": 18429 }, { "epoch": 0.3908718797056266, "grad_norm": 0.37320324778556824, "learning_rate": 1.8189234999283472e-05, "loss": 0.4321, "step": 18430 }, { "epoch": 0.3908930881635596, "grad_norm": 0.335456520318985, "learning_rate": 1.8189043600864745e-05, "loss": 0.4746, "step": 18431 }, { "epoch": 0.39091429662149263, "grad_norm": 0.33367326855659485, "learning_rate": 1.8188852193338238e-05, "loss": 0.5856, "step": 18432 }, { "epoch": 0.39093550507942565, "grad_norm": 0.305207759141922, "learning_rate": 1.818866077670416e-05, "loss": 0.4119, "step": 18433 }, { "epoch": 0.3909567135373587, "grad_norm": 0.36727243661880493, "learning_rate": 1.8188469350962724e-05, "loss": 0.5064, "step": 18434 }, { "epoch": 0.39097792199529174, "grad_norm": 0.3472985029220581, "learning_rate": 1.818827791611414e-05, "loss": 0.534, "step": 18435 }, { "epoch": 0.39099913045322476, "grad_norm": 0.31981828808784485, "learning_rate": 1.818808647215863e-05, "loss": 0.5132, "step": 18436 }, { "epoch": 0.3910203389111578, "grad_norm": 0.3575221598148346, "learning_rate": 1.8187895019096397e-05, "loss": 0.4987, "step": 18437 }, { "epoch": 0.3910415473690908, "grad_norm": 0.38226568698883057, "learning_rate": 1.8187703556927663e-05, "loss": 0.5835, "step": 18438 }, { "epoch": 0.3910627558270238, "grad_norm": 0.7002855539321899, "learning_rate": 1.8187512085652633e-05, "loss": 0.5937, "step": 18439 }, { "epoch": 0.39108396428495684, "grad_norm": 0.3395419418811798, "learning_rate": 1.8187320605271522e-05, "loss": 0.4743, "step": 18440 }, { "epoch": 0.39110517274288986, "grad_norm": 0.32984575629234314, "learning_rate": 1.818712911578455e-05, "loss": 0.4594, "step": 18441 }, { "epoch": 0.3911263812008229, "grad_norm": 0.36757728457450867, "learning_rate": 1.8186937617191914e-05, "loss": 0.5379, "step": 18442 }, { "epoch": 0.3911475896587559, "grad_norm": 0.33770009875297546, "learning_rate": 1.8186746109493844e-05, "loss": 0.4646, "step": 18443 }, { "epoch": 0.3911687981166889, "grad_norm": 0.44497042894363403, "learning_rate": 1.8186554592690546e-05, "loss": 0.423, "step": 18444 }, { "epoch": 0.39119000657462194, "grad_norm": 0.3496551215648651, "learning_rate": 1.8186363066782234e-05, "loss": 0.5239, "step": 18445 }, { "epoch": 0.39121121503255496, "grad_norm": 0.4043278992176056, "learning_rate": 1.8186171531769117e-05, "loss": 0.5641, "step": 18446 }, { "epoch": 0.391232423490488, "grad_norm": 0.38083234429359436, "learning_rate": 1.8185979987651412e-05, "loss": 0.5811, "step": 18447 }, { "epoch": 0.39125363194842105, "grad_norm": 0.3159337043762207, "learning_rate": 1.8185788434429333e-05, "loss": 0.5244, "step": 18448 }, { "epoch": 0.3912748404063541, "grad_norm": 0.36446860432624817, "learning_rate": 1.818559687210309e-05, "loss": 0.3857, "step": 18449 }, { "epoch": 0.3912960488642871, "grad_norm": 0.3891791105270386, "learning_rate": 1.8185405300672895e-05, "loss": 0.5028, "step": 18450 }, { "epoch": 0.3913172573222201, "grad_norm": 0.3406049907207489, "learning_rate": 1.8185213720138967e-05, "loss": 0.4219, "step": 18451 }, { "epoch": 0.39133846578015313, "grad_norm": 0.36106181144714355, "learning_rate": 1.8185022130501512e-05, "loss": 0.5552, "step": 18452 }, { "epoch": 0.39135967423808615, "grad_norm": 0.3791438937187195, "learning_rate": 1.818483053176075e-05, "loss": 0.5278, "step": 18453 }, { "epoch": 0.39138088269601917, "grad_norm": 0.40963926911354065, "learning_rate": 1.8184638923916887e-05, "loss": 0.4703, "step": 18454 }, { "epoch": 0.3914020911539522, "grad_norm": 0.3227815628051758, "learning_rate": 1.8184447306970144e-05, "loss": 0.5011, "step": 18455 }, { "epoch": 0.3914232996118852, "grad_norm": 0.3280605673789978, "learning_rate": 1.8184255680920725e-05, "loss": 0.458, "step": 18456 }, { "epoch": 0.39144450806981823, "grad_norm": 0.37861061096191406, "learning_rate": 1.8184064045768852e-05, "loss": 0.5733, "step": 18457 }, { "epoch": 0.39146571652775125, "grad_norm": 0.31455743312835693, "learning_rate": 1.8183872401514732e-05, "loss": 0.4427, "step": 18458 }, { "epoch": 0.39148692498568427, "grad_norm": 0.359011709690094, "learning_rate": 1.818368074815858e-05, "loss": 0.5642, "step": 18459 }, { "epoch": 0.3915081334436173, "grad_norm": 0.41414934396743774, "learning_rate": 1.818348908570061e-05, "loss": 0.5414, "step": 18460 }, { "epoch": 0.39152934190155037, "grad_norm": 0.29146188497543335, "learning_rate": 1.8183297414141033e-05, "loss": 0.5585, "step": 18461 }, { "epoch": 0.3915505503594834, "grad_norm": 0.34174126386642456, "learning_rate": 1.818310573348007e-05, "loss": 0.5572, "step": 18462 }, { "epoch": 0.3915717588174164, "grad_norm": 0.37674015760421753, "learning_rate": 1.818291404371792e-05, "loss": 0.5042, "step": 18463 }, { "epoch": 0.3915929672753494, "grad_norm": 0.35055866837501526, "learning_rate": 1.818272234485481e-05, "loss": 0.5079, "step": 18464 }, { "epoch": 0.39161417573328244, "grad_norm": 0.3254924416542053, "learning_rate": 1.8182530636890944e-05, "loss": 0.5637, "step": 18465 }, { "epoch": 0.39163538419121546, "grad_norm": 0.3291511833667755, "learning_rate": 1.818233891982654e-05, "loss": 0.4607, "step": 18466 }, { "epoch": 0.3916565926491485, "grad_norm": 0.3701605498790741, "learning_rate": 1.8182147193661807e-05, "loss": 0.6072, "step": 18467 }, { "epoch": 0.3916778011070815, "grad_norm": 0.3044237196445465, "learning_rate": 1.8181955458396962e-05, "loss": 0.4626, "step": 18468 }, { "epoch": 0.3916990095650145, "grad_norm": 0.35967308282852173, "learning_rate": 1.8181763714032218e-05, "loss": 0.4966, "step": 18469 }, { "epoch": 0.39172021802294754, "grad_norm": 0.33766892552375793, "learning_rate": 1.8181571960567786e-05, "loss": 0.5205, "step": 18470 }, { "epoch": 0.39174142648088056, "grad_norm": 0.3249579668045044, "learning_rate": 1.8181380198003882e-05, "loss": 0.4616, "step": 18471 }, { "epoch": 0.3917626349388136, "grad_norm": 0.3714074194431305, "learning_rate": 1.818118842634072e-05, "loss": 0.5383, "step": 18472 }, { "epoch": 0.3917838433967466, "grad_norm": 0.4426354467868805, "learning_rate": 1.818099664557851e-05, "loss": 0.5953, "step": 18473 }, { "epoch": 0.3918050518546796, "grad_norm": 0.3405808210372925, "learning_rate": 1.8180804855717466e-05, "loss": 0.5688, "step": 18474 }, { "epoch": 0.3918262603126127, "grad_norm": 0.32682251930236816, "learning_rate": 1.81806130567578e-05, "loss": 0.5099, "step": 18475 }, { "epoch": 0.3918474687705457, "grad_norm": 0.3314496576786041, "learning_rate": 1.8180421248699733e-05, "loss": 0.4436, "step": 18476 }, { "epoch": 0.39186867722847873, "grad_norm": 0.33328717947006226, "learning_rate": 1.8180229431543468e-05, "loss": 0.5336, "step": 18477 }, { "epoch": 0.39188988568641175, "grad_norm": 0.3406426012516022, "learning_rate": 1.8180037605289222e-05, "loss": 0.4441, "step": 18478 }, { "epoch": 0.3919110941443448, "grad_norm": 0.438660591840744, "learning_rate": 1.817984576993721e-05, "loss": 0.4982, "step": 18479 }, { "epoch": 0.3919323026022778, "grad_norm": 0.3181649446487427, "learning_rate": 1.817965392548765e-05, "loss": 0.5256, "step": 18480 }, { "epoch": 0.3919535110602108, "grad_norm": 0.3992560803890228, "learning_rate": 1.817946207194074e-05, "loss": 0.5152, "step": 18481 }, { "epoch": 0.39197471951814383, "grad_norm": 0.3631247580051422, "learning_rate": 1.817927020929671e-05, "loss": 0.3927, "step": 18482 }, { "epoch": 0.39199592797607685, "grad_norm": 0.41430985927581787, "learning_rate": 1.8179078337555765e-05, "loss": 0.5473, "step": 18483 }, { "epoch": 0.39201713643400987, "grad_norm": 0.3339102268218994, "learning_rate": 1.817888645671812e-05, "loss": 0.5123, "step": 18484 }, { "epoch": 0.3920383448919429, "grad_norm": 0.3662168085575104, "learning_rate": 1.8178694566783992e-05, "loss": 0.5172, "step": 18485 }, { "epoch": 0.3920595533498759, "grad_norm": 0.31897270679473877, "learning_rate": 1.8178502667753587e-05, "loss": 0.4865, "step": 18486 }, { "epoch": 0.39208076180780893, "grad_norm": 0.3692728877067566, "learning_rate": 1.8178310759627122e-05, "loss": 0.5242, "step": 18487 }, { "epoch": 0.39210197026574195, "grad_norm": 0.3450670838356018, "learning_rate": 1.8178118842404814e-05, "loss": 0.5495, "step": 18488 }, { "epoch": 0.392123178723675, "grad_norm": 0.3418545126914978, "learning_rate": 1.8177926916086873e-05, "loss": 0.4293, "step": 18489 }, { "epoch": 0.39214438718160805, "grad_norm": 0.3180062174797058, "learning_rate": 1.817773498067351e-05, "loss": 0.4911, "step": 18490 }, { "epoch": 0.39216559563954106, "grad_norm": 0.3364763855934143, "learning_rate": 1.8177543036164943e-05, "loss": 0.4983, "step": 18491 }, { "epoch": 0.3921868040974741, "grad_norm": 0.36173269152641296, "learning_rate": 1.8177351082561385e-05, "loss": 0.5143, "step": 18492 }, { "epoch": 0.3922080125554071, "grad_norm": 0.48976850509643555, "learning_rate": 1.8177159119863043e-05, "loss": 0.4757, "step": 18493 }, { "epoch": 0.3922292210133401, "grad_norm": 0.40531131625175476, "learning_rate": 1.817696714807014e-05, "loss": 0.5492, "step": 18494 }, { "epoch": 0.39225042947127314, "grad_norm": 0.3617261052131653, "learning_rate": 1.8176775167182884e-05, "loss": 0.5862, "step": 18495 }, { "epoch": 0.39227163792920616, "grad_norm": 0.3326921761035919, "learning_rate": 1.817658317720149e-05, "loss": 0.5063, "step": 18496 }, { "epoch": 0.3922928463871392, "grad_norm": 0.3095357120037079, "learning_rate": 1.817639117812617e-05, "loss": 0.433, "step": 18497 }, { "epoch": 0.3923140548450722, "grad_norm": 0.29833680391311646, "learning_rate": 1.8176199169957142e-05, "loss": 0.4622, "step": 18498 }, { "epoch": 0.3923352633030052, "grad_norm": 0.3392307758331299, "learning_rate": 1.8176007152694612e-05, "loss": 0.5457, "step": 18499 }, { "epoch": 0.39235647176093824, "grad_norm": 0.3292120695114136, "learning_rate": 1.81758151263388e-05, "loss": 0.5282, "step": 18500 }, { "epoch": 0.39237768021887126, "grad_norm": 0.42230933904647827, "learning_rate": 1.8175623090889917e-05, "loss": 0.561, "step": 18501 }, { "epoch": 0.39239888867680434, "grad_norm": 0.3376178443431854, "learning_rate": 1.817543104634818e-05, "loss": 0.5266, "step": 18502 }, { "epoch": 0.39242009713473736, "grad_norm": 0.35069429874420166, "learning_rate": 1.8175238992713797e-05, "loss": 0.565, "step": 18503 }, { "epoch": 0.3924413055926704, "grad_norm": 0.34084615111351013, "learning_rate": 1.8175046929986984e-05, "loss": 0.5504, "step": 18504 }, { "epoch": 0.3924625140506034, "grad_norm": 0.3622451722621918, "learning_rate": 1.8174854858167953e-05, "loss": 0.4985, "step": 18505 }, { "epoch": 0.3924837225085364, "grad_norm": 0.4043987989425659, "learning_rate": 1.8174662777256924e-05, "loss": 0.5108, "step": 18506 }, { "epoch": 0.39250493096646943, "grad_norm": 0.34484103322029114, "learning_rate": 1.8174470687254102e-05, "loss": 0.5151, "step": 18507 }, { "epoch": 0.39252613942440245, "grad_norm": 0.33331313729286194, "learning_rate": 1.817427858815971e-05, "loss": 0.4756, "step": 18508 }, { "epoch": 0.3925473478823355, "grad_norm": 0.4108937382698059, "learning_rate": 1.8174086479973948e-05, "loss": 0.4907, "step": 18509 }, { "epoch": 0.3925685563402685, "grad_norm": 0.3525039851665497, "learning_rate": 1.8173894362697044e-05, "loss": 0.449, "step": 18510 }, { "epoch": 0.3925897647982015, "grad_norm": 0.3459988832473755, "learning_rate": 1.8173702236329206e-05, "loss": 0.6294, "step": 18511 }, { "epoch": 0.39261097325613453, "grad_norm": 0.40629449486732483, "learning_rate": 1.8173510100870645e-05, "loss": 0.4295, "step": 18512 }, { "epoch": 0.39263218171406755, "grad_norm": 0.32557958364486694, "learning_rate": 1.8173317956321577e-05, "loss": 0.4147, "step": 18513 }, { "epoch": 0.39265339017200057, "grad_norm": 0.37973761558532715, "learning_rate": 1.817312580268222e-05, "loss": 0.5637, "step": 18514 }, { "epoch": 0.3926745986299336, "grad_norm": 0.36680108308792114, "learning_rate": 1.817293363995278e-05, "loss": 0.6335, "step": 18515 }, { "epoch": 0.39269580708786667, "grad_norm": 0.35080021619796753, "learning_rate": 1.8172741468133472e-05, "loss": 0.5507, "step": 18516 }, { "epoch": 0.3927170155457997, "grad_norm": 0.328226774930954, "learning_rate": 1.8172549287224515e-05, "loss": 0.4948, "step": 18517 }, { "epoch": 0.3927382240037327, "grad_norm": 0.3632957637310028, "learning_rate": 1.8172357097226116e-05, "loss": 0.4662, "step": 18518 }, { "epoch": 0.3927594324616657, "grad_norm": 0.3280056416988373, "learning_rate": 1.8172164898138497e-05, "loss": 0.5408, "step": 18519 }, { "epoch": 0.39278064091959874, "grad_norm": 0.4033341407775879, "learning_rate": 1.8171972689961866e-05, "loss": 0.6168, "step": 18520 }, { "epoch": 0.39280184937753176, "grad_norm": 0.33453765511512756, "learning_rate": 1.8171780472696436e-05, "loss": 0.4714, "step": 18521 }, { "epoch": 0.3928230578354648, "grad_norm": 0.3066631853580475, "learning_rate": 1.8171588246342427e-05, "loss": 0.4893, "step": 18522 }, { "epoch": 0.3928442662933978, "grad_norm": 0.3643658459186554, "learning_rate": 1.8171396010900046e-05, "loss": 0.5728, "step": 18523 }, { "epoch": 0.3928654747513308, "grad_norm": 0.3134988844394684, "learning_rate": 1.8171203766369506e-05, "loss": 0.5035, "step": 18524 }, { "epoch": 0.39288668320926384, "grad_norm": 0.33626168966293335, "learning_rate": 1.8171011512751028e-05, "loss": 0.4602, "step": 18525 }, { "epoch": 0.39290789166719686, "grad_norm": 0.38263455033302307, "learning_rate": 1.817081925004482e-05, "loss": 0.4727, "step": 18526 }, { "epoch": 0.3929291001251299, "grad_norm": 0.3559033274650574, "learning_rate": 1.8170626978251103e-05, "loss": 0.5046, "step": 18527 }, { "epoch": 0.3929503085830629, "grad_norm": 0.3183988928794861, "learning_rate": 1.817043469737008e-05, "loss": 0.4451, "step": 18528 }, { "epoch": 0.392971517040996, "grad_norm": 0.3625037968158722, "learning_rate": 1.8170242407401973e-05, "loss": 0.4583, "step": 18529 }, { "epoch": 0.392992725498929, "grad_norm": 0.3481336534023285, "learning_rate": 1.8170050108346994e-05, "loss": 0.4832, "step": 18530 }, { "epoch": 0.393013933956862, "grad_norm": 0.36007747054100037, "learning_rate": 1.8169857800205354e-05, "loss": 0.5126, "step": 18531 }, { "epoch": 0.39303514241479504, "grad_norm": 0.351347953081131, "learning_rate": 1.816966548297727e-05, "loss": 0.4443, "step": 18532 }, { "epoch": 0.39305635087272806, "grad_norm": 0.42055973410606384, "learning_rate": 1.8169473156662954e-05, "loss": 0.4872, "step": 18533 }, { "epoch": 0.3930775593306611, "grad_norm": 0.38173922896385193, "learning_rate": 1.816928082126262e-05, "loss": 0.5154, "step": 18534 }, { "epoch": 0.3930987677885941, "grad_norm": 0.3526952564716339, "learning_rate": 1.8169088476776488e-05, "loss": 0.5298, "step": 18535 }, { "epoch": 0.3931199762465271, "grad_norm": 0.3289611339569092, "learning_rate": 1.816889612320476e-05, "loss": 0.4735, "step": 18536 }, { "epoch": 0.39314118470446013, "grad_norm": 0.3726336658000946, "learning_rate": 1.8168703760547664e-05, "loss": 0.4885, "step": 18537 }, { "epoch": 0.39316239316239315, "grad_norm": 0.33147263526916504, "learning_rate": 1.8168511388805403e-05, "loss": 0.4517, "step": 18538 }, { "epoch": 0.3931836016203262, "grad_norm": 0.43848299980163574, "learning_rate": 1.8168319007978192e-05, "loss": 0.4783, "step": 18539 }, { "epoch": 0.3932048100782592, "grad_norm": 0.3703048825263977, "learning_rate": 1.816812661806625e-05, "loss": 0.4649, "step": 18540 }, { "epoch": 0.3932260185361922, "grad_norm": 0.35313940048217773, "learning_rate": 1.816793421906979e-05, "loss": 0.4745, "step": 18541 }, { "epoch": 0.39324722699412523, "grad_norm": 0.40262410044670105, "learning_rate": 1.8167741810989023e-05, "loss": 0.499, "step": 18542 }, { "epoch": 0.3932684354520583, "grad_norm": 0.3079303205013275, "learning_rate": 1.8167549393824163e-05, "loss": 0.4625, "step": 18543 }, { "epoch": 0.3932896439099913, "grad_norm": 0.7235628962516785, "learning_rate": 1.816735696757543e-05, "loss": 0.4539, "step": 18544 }, { "epoch": 0.39331085236792435, "grad_norm": 0.34445181488990784, "learning_rate": 1.8167164532243028e-05, "loss": 0.4839, "step": 18545 }, { "epoch": 0.39333206082585737, "grad_norm": 0.3476772606372833, "learning_rate": 1.816697208782718e-05, "loss": 0.5157, "step": 18546 }, { "epoch": 0.3933532692837904, "grad_norm": 0.34448951482772827, "learning_rate": 1.8166779634328096e-05, "loss": 0.4585, "step": 18547 }, { "epoch": 0.3933744777417234, "grad_norm": 0.3348029553890228, "learning_rate": 1.816658717174599e-05, "loss": 0.5244, "step": 18548 }, { "epoch": 0.3933956861996564, "grad_norm": 0.35153907537460327, "learning_rate": 1.816639470008108e-05, "loss": 0.5249, "step": 18549 }, { "epoch": 0.39341689465758944, "grad_norm": 0.3382830023765564, "learning_rate": 1.8166202219333572e-05, "loss": 0.5654, "step": 18550 }, { "epoch": 0.39343810311552246, "grad_norm": 0.4442650377750397, "learning_rate": 1.8166009729503687e-05, "loss": 0.4733, "step": 18551 }, { "epoch": 0.3934593115734555, "grad_norm": 0.33218052983283997, "learning_rate": 1.8165817230591636e-05, "loss": 0.5292, "step": 18552 }, { "epoch": 0.3934805200313885, "grad_norm": 0.3557497262954712, "learning_rate": 1.8165624722597634e-05, "loss": 0.4658, "step": 18553 }, { "epoch": 0.3935017284893215, "grad_norm": 0.34530898928642273, "learning_rate": 1.8165432205521897e-05, "loss": 0.5322, "step": 18554 }, { "epoch": 0.39352293694725454, "grad_norm": 0.3848607540130615, "learning_rate": 1.8165239679364636e-05, "loss": 0.4755, "step": 18555 }, { "epoch": 0.39354414540518756, "grad_norm": 0.3927476108074188, "learning_rate": 1.8165047144126066e-05, "loss": 0.5883, "step": 18556 }, { "epoch": 0.39356535386312064, "grad_norm": 0.37762778997421265, "learning_rate": 1.8164854599806402e-05, "loss": 0.5399, "step": 18557 }, { "epoch": 0.39358656232105366, "grad_norm": 0.33385521173477173, "learning_rate": 1.8164662046405857e-05, "loss": 0.4682, "step": 18558 }, { "epoch": 0.3936077707789867, "grad_norm": 0.3275448679924011, "learning_rate": 1.8164469483924648e-05, "loss": 0.487, "step": 18559 }, { "epoch": 0.3936289792369197, "grad_norm": 0.34937185049057007, "learning_rate": 1.8164276912362985e-05, "loss": 0.5844, "step": 18560 }, { "epoch": 0.3936501876948527, "grad_norm": 0.33636435866355896, "learning_rate": 1.8164084331721085e-05, "loss": 0.547, "step": 18561 }, { "epoch": 0.39367139615278574, "grad_norm": 0.32873618602752686, "learning_rate": 1.816389174199916e-05, "loss": 0.5346, "step": 18562 }, { "epoch": 0.39369260461071875, "grad_norm": 0.33096832036972046, "learning_rate": 1.8163699143197426e-05, "loss": 0.4592, "step": 18563 }, { "epoch": 0.3937138130686518, "grad_norm": 0.36830833554267883, "learning_rate": 1.8163506535316098e-05, "loss": 0.5801, "step": 18564 }, { "epoch": 0.3937350215265848, "grad_norm": 0.31688985228538513, "learning_rate": 1.8163313918355386e-05, "loss": 0.4593, "step": 18565 }, { "epoch": 0.3937562299845178, "grad_norm": 0.31850698590278625, "learning_rate": 1.8163121292315508e-05, "loss": 0.4417, "step": 18566 }, { "epoch": 0.39377743844245083, "grad_norm": 0.3559773564338684, "learning_rate": 1.816292865719668e-05, "loss": 0.5106, "step": 18567 }, { "epoch": 0.39379864690038385, "grad_norm": 0.3833904266357422, "learning_rate": 1.8162736012999113e-05, "loss": 0.4883, "step": 18568 }, { "epoch": 0.3938198553583169, "grad_norm": 0.3384212851524353, "learning_rate": 1.816254335972302e-05, "loss": 0.5184, "step": 18569 }, { "epoch": 0.39384106381624995, "grad_norm": 0.34512192010879517, "learning_rate": 1.816235069736862e-05, "loss": 0.5171, "step": 18570 }, { "epoch": 0.39386227227418297, "grad_norm": 0.38006195425987244, "learning_rate": 1.8162158025936124e-05, "loss": 0.4734, "step": 18571 }, { "epoch": 0.393883480732116, "grad_norm": 0.37718334794044495, "learning_rate": 1.8161965345425742e-05, "loss": 0.5476, "step": 18572 }, { "epoch": 0.393904689190049, "grad_norm": 0.3855457901954651, "learning_rate": 1.81617726558377e-05, "loss": 0.5383, "step": 18573 }, { "epoch": 0.393925897647982, "grad_norm": 0.33489781618118286, "learning_rate": 1.81615799571722e-05, "loss": 0.5568, "step": 18574 }, { "epoch": 0.39394710610591505, "grad_norm": 0.433918833732605, "learning_rate": 1.8161387249429464e-05, "loss": 0.5031, "step": 18575 }, { "epoch": 0.39396831456384807, "grad_norm": 0.37046459317207336, "learning_rate": 1.8161194532609704e-05, "loss": 0.4876, "step": 18576 }, { "epoch": 0.3939895230217811, "grad_norm": 0.33654531836509705, "learning_rate": 1.8161001806713133e-05, "loss": 0.5494, "step": 18577 }, { "epoch": 0.3940107314797141, "grad_norm": 0.3621451258659363, "learning_rate": 1.816080907173997e-05, "loss": 0.4695, "step": 18578 }, { "epoch": 0.3940319399376471, "grad_norm": 0.3370465338230133, "learning_rate": 1.816061632769042e-05, "loss": 0.4597, "step": 18579 }, { "epoch": 0.39405314839558014, "grad_norm": 0.3583730459213257, "learning_rate": 1.8160423574564708e-05, "loss": 0.5986, "step": 18580 }, { "epoch": 0.39407435685351316, "grad_norm": 0.3374437987804413, "learning_rate": 1.8160230812363043e-05, "loss": 0.5579, "step": 18581 }, { "epoch": 0.3940955653114462, "grad_norm": 0.3534492254257202, "learning_rate": 1.816003804108564e-05, "loss": 0.4986, "step": 18582 }, { "epoch": 0.3941167737693792, "grad_norm": 0.3452298939228058, "learning_rate": 1.8159845260732714e-05, "loss": 0.4507, "step": 18583 }, { "epoch": 0.3941379822273123, "grad_norm": 0.31997284293174744, "learning_rate": 1.8159652471304477e-05, "loss": 0.4503, "step": 18584 }, { "epoch": 0.3941591906852453, "grad_norm": 0.3700999915599823, "learning_rate": 1.815945967280115e-05, "loss": 0.5863, "step": 18585 }, { "epoch": 0.3941803991431783, "grad_norm": 0.3225530683994293, "learning_rate": 1.8159266865222937e-05, "loss": 0.423, "step": 18586 }, { "epoch": 0.39420160760111134, "grad_norm": 0.32751625776290894, "learning_rate": 1.815907404857006e-05, "loss": 0.49, "step": 18587 }, { "epoch": 0.39422281605904436, "grad_norm": 0.365305632352829, "learning_rate": 1.8158881222842735e-05, "loss": 0.4675, "step": 18588 }, { "epoch": 0.3942440245169774, "grad_norm": 0.5317932963371277, "learning_rate": 1.8158688388041172e-05, "loss": 0.4509, "step": 18589 }, { "epoch": 0.3942652329749104, "grad_norm": 0.3896840512752533, "learning_rate": 1.8158495544165585e-05, "loss": 0.4407, "step": 18590 }, { "epoch": 0.3942864414328434, "grad_norm": 0.3326132297515869, "learning_rate": 1.815830269121619e-05, "loss": 0.5494, "step": 18591 }, { "epoch": 0.39430764989077643, "grad_norm": 0.366614431142807, "learning_rate": 1.8158109829193203e-05, "loss": 0.4825, "step": 18592 }, { "epoch": 0.39432885834870945, "grad_norm": 0.31559842824935913, "learning_rate": 1.8157916958096837e-05, "loss": 0.4843, "step": 18593 }, { "epoch": 0.3943500668066425, "grad_norm": 0.4044174253940582, "learning_rate": 1.8157724077927302e-05, "loss": 0.4374, "step": 18594 }, { "epoch": 0.3943712752645755, "grad_norm": 0.3430556654930115, "learning_rate": 1.8157531188684824e-05, "loss": 0.4555, "step": 18595 }, { "epoch": 0.3943924837225085, "grad_norm": 0.32965147495269775, "learning_rate": 1.8157338290369607e-05, "loss": 0.5036, "step": 18596 }, { "epoch": 0.39441369218044153, "grad_norm": 0.3264870345592499, "learning_rate": 1.815714538298187e-05, "loss": 0.529, "step": 18597 }, { "epoch": 0.3944349006383746, "grad_norm": 0.42919814586639404, "learning_rate": 1.815695246652183e-05, "loss": 0.5372, "step": 18598 }, { "epoch": 0.3944561090963076, "grad_norm": 0.3491363823413849, "learning_rate": 1.8156759540989694e-05, "loss": 0.5249, "step": 18599 }, { "epoch": 0.39447731755424065, "grad_norm": 0.5057157278060913, "learning_rate": 1.815656660638568e-05, "loss": 0.4555, "step": 18600 }, { "epoch": 0.39449852601217367, "grad_norm": 0.319301962852478, "learning_rate": 1.8156373662710007e-05, "loss": 0.4602, "step": 18601 }, { "epoch": 0.3945197344701067, "grad_norm": 0.3637484908103943, "learning_rate": 1.8156180709962883e-05, "loss": 0.4914, "step": 18602 }, { "epoch": 0.3945409429280397, "grad_norm": 0.3571195602416992, "learning_rate": 1.815598774814453e-05, "loss": 0.5549, "step": 18603 }, { "epoch": 0.3945621513859727, "grad_norm": 0.3338549733161926, "learning_rate": 1.8155794777255156e-05, "loss": 0.4579, "step": 18604 }, { "epoch": 0.39458335984390575, "grad_norm": 0.3496800363063812, "learning_rate": 1.8155601797294978e-05, "loss": 0.4833, "step": 18605 }, { "epoch": 0.39460456830183877, "grad_norm": 0.4036101996898651, "learning_rate": 1.8155408808264206e-05, "loss": 0.555, "step": 18606 }, { "epoch": 0.3946257767597718, "grad_norm": 0.34957069158554077, "learning_rate": 1.8155215810163066e-05, "loss": 0.5467, "step": 18607 }, { "epoch": 0.3946469852177048, "grad_norm": 0.33171892166137695, "learning_rate": 1.8155022802991766e-05, "loss": 0.5191, "step": 18608 }, { "epoch": 0.3946681936756378, "grad_norm": 0.35417309403419495, "learning_rate": 1.8154829786750517e-05, "loss": 0.5526, "step": 18609 }, { "epoch": 0.39468940213357084, "grad_norm": 0.4979736804962158, "learning_rate": 1.815463676143954e-05, "loss": 0.462, "step": 18610 }, { "epoch": 0.3947106105915039, "grad_norm": 0.3480725586414337, "learning_rate": 1.815444372705904e-05, "loss": 0.6358, "step": 18611 }, { "epoch": 0.39473181904943694, "grad_norm": 0.3220300078392029, "learning_rate": 1.8154250683609246e-05, "loss": 0.5087, "step": 18612 }, { "epoch": 0.39475302750736996, "grad_norm": 0.3620046377182007, "learning_rate": 1.815405763109036e-05, "loss": 0.5417, "step": 18613 }, { "epoch": 0.394774235965303, "grad_norm": 0.3391886353492737, "learning_rate": 1.8153864569502603e-05, "loss": 0.4773, "step": 18614 }, { "epoch": 0.394795444423236, "grad_norm": 2.6903271675109863, "learning_rate": 1.8153671498846193e-05, "loss": 0.4307, "step": 18615 }, { "epoch": 0.394816652881169, "grad_norm": 0.33973458409309387, "learning_rate": 1.8153478419121336e-05, "loss": 0.5306, "step": 18616 }, { "epoch": 0.39483786133910204, "grad_norm": 0.348235160112381, "learning_rate": 1.8153285330328253e-05, "loss": 0.491, "step": 18617 }, { "epoch": 0.39485906979703506, "grad_norm": 0.3549402952194214, "learning_rate": 1.8153092232467154e-05, "loss": 0.4938, "step": 18618 }, { "epoch": 0.3948802782549681, "grad_norm": 0.40608659386634827, "learning_rate": 1.815289912553826e-05, "loss": 0.5983, "step": 18619 }, { "epoch": 0.3949014867129011, "grad_norm": 0.41649770736694336, "learning_rate": 1.8152706009541782e-05, "loss": 0.5988, "step": 18620 }, { "epoch": 0.3949226951708341, "grad_norm": 0.36334288120269775, "learning_rate": 1.815251288447793e-05, "loss": 0.5529, "step": 18621 }, { "epoch": 0.39494390362876713, "grad_norm": 0.3302008807659149, "learning_rate": 1.8152319750346933e-05, "loss": 0.5153, "step": 18622 }, { "epoch": 0.39496511208670015, "grad_norm": 0.34050315618515015, "learning_rate": 1.815212660714899e-05, "loss": 0.5632, "step": 18623 }, { "epoch": 0.3949863205446332, "grad_norm": 0.3616649806499481, "learning_rate": 1.8151933454884328e-05, "loss": 0.4461, "step": 18624 }, { "epoch": 0.39500752900256625, "grad_norm": 0.3757072687149048, "learning_rate": 1.815174029355315e-05, "loss": 0.6089, "step": 18625 }, { "epoch": 0.39502873746049927, "grad_norm": 0.3528469502925873, "learning_rate": 1.8151547123155683e-05, "loss": 0.5097, "step": 18626 }, { "epoch": 0.3950499459184323, "grad_norm": 0.37842240929603577, "learning_rate": 1.8151353943692134e-05, "loss": 0.4803, "step": 18627 }, { "epoch": 0.3950711543763653, "grad_norm": 0.38631659746170044, "learning_rate": 1.815116075516272e-05, "loss": 0.5915, "step": 18628 }, { "epoch": 0.3950923628342983, "grad_norm": 0.3559565842151642, "learning_rate": 1.8150967557567653e-05, "loss": 0.6215, "step": 18629 }, { "epoch": 0.39511357129223135, "grad_norm": 0.3311217725276947, "learning_rate": 1.8150774350907157e-05, "loss": 0.4794, "step": 18630 }, { "epoch": 0.39513477975016437, "grad_norm": 0.36412513256073, "learning_rate": 1.8150581135181434e-05, "loss": 0.4823, "step": 18631 }, { "epoch": 0.3951559882080974, "grad_norm": 0.3501480519771576, "learning_rate": 1.8150387910390706e-05, "loss": 0.4471, "step": 18632 }, { "epoch": 0.3951771966660304, "grad_norm": 0.29199814796447754, "learning_rate": 1.8150194676535194e-05, "loss": 0.377, "step": 18633 }, { "epoch": 0.3951984051239634, "grad_norm": 0.3948940336704254, "learning_rate": 1.8150001433615102e-05, "loss": 0.56, "step": 18634 }, { "epoch": 0.39521961358189645, "grad_norm": 0.4100300073623657, "learning_rate": 1.814980818163065e-05, "loss": 0.5847, "step": 18635 }, { "epoch": 0.39524082203982946, "grad_norm": 0.33686140179634094, "learning_rate": 1.814961492058205e-05, "loss": 0.5429, "step": 18636 }, { "epoch": 0.3952620304977625, "grad_norm": 0.3169732987880707, "learning_rate": 1.814942165046952e-05, "loss": 0.4642, "step": 18637 }, { "epoch": 0.3952832389556955, "grad_norm": 0.3586789071559906, "learning_rate": 1.8149228371293276e-05, "loss": 0.4401, "step": 18638 }, { "epoch": 0.3953044474136286, "grad_norm": 0.36303409934043884, "learning_rate": 1.8149035083053526e-05, "loss": 0.4839, "step": 18639 }, { "epoch": 0.3953256558715616, "grad_norm": 0.35900527238845825, "learning_rate": 1.8148841785750497e-05, "loss": 0.4788, "step": 18640 }, { "epoch": 0.3953468643294946, "grad_norm": 0.390165776014328, "learning_rate": 1.814864847938439e-05, "loss": 0.5267, "step": 18641 }, { "epoch": 0.39536807278742764, "grad_norm": 0.3107146620750427, "learning_rate": 1.814845516395543e-05, "loss": 0.5208, "step": 18642 }, { "epoch": 0.39538928124536066, "grad_norm": 0.30905774235725403, "learning_rate": 1.814826183946383e-05, "loss": 0.4904, "step": 18643 }, { "epoch": 0.3954104897032937, "grad_norm": 0.39000174403190613, "learning_rate": 1.8148068505909804e-05, "loss": 0.4775, "step": 18644 }, { "epoch": 0.3954316981612267, "grad_norm": 0.34818795323371887, "learning_rate": 1.8147875163293567e-05, "loss": 0.5005, "step": 18645 }, { "epoch": 0.3954529066191597, "grad_norm": 0.36055368185043335, "learning_rate": 1.8147681811615332e-05, "loss": 0.5568, "step": 18646 }, { "epoch": 0.39547411507709274, "grad_norm": 0.44002243876457214, "learning_rate": 1.814748845087532e-05, "loss": 0.5386, "step": 18647 }, { "epoch": 0.39549532353502576, "grad_norm": 0.5675560832023621, "learning_rate": 1.8147295081073736e-05, "loss": 0.5089, "step": 18648 }, { "epoch": 0.3955165319929588, "grad_norm": 0.670609712600708, "learning_rate": 1.8147101702210802e-05, "loss": 0.6006, "step": 18649 }, { "epoch": 0.3955377404508918, "grad_norm": 0.3096887767314911, "learning_rate": 1.8146908314286736e-05, "loss": 0.4245, "step": 18650 }, { "epoch": 0.3955589489088248, "grad_norm": 0.35865318775177, "learning_rate": 1.8146714917301752e-05, "loss": 0.5488, "step": 18651 }, { "epoch": 0.3955801573667579, "grad_norm": 0.3137015700340271, "learning_rate": 1.8146521511256057e-05, "loss": 0.4675, "step": 18652 }, { "epoch": 0.3956013658246909, "grad_norm": 0.3568035364151001, "learning_rate": 1.8146328096149874e-05, "loss": 0.5117, "step": 18653 }, { "epoch": 0.39562257428262393, "grad_norm": 0.5653820037841797, "learning_rate": 1.8146134671983413e-05, "loss": 0.6192, "step": 18654 }, { "epoch": 0.39564378274055695, "grad_norm": 0.3546592891216278, "learning_rate": 1.8145941238756894e-05, "loss": 0.4571, "step": 18655 }, { "epoch": 0.39566499119848997, "grad_norm": 0.36239105463027954, "learning_rate": 1.814574779647053e-05, "loss": 0.5229, "step": 18656 }, { "epoch": 0.395686199656423, "grad_norm": 0.35152000188827515, "learning_rate": 1.8145554345124535e-05, "loss": 0.4619, "step": 18657 }, { "epoch": 0.395707408114356, "grad_norm": 0.3457707464694977, "learning_rate": 1.8145360884719125e-05, "loss": 0.5672, "step": 18658 }, { "epoch": 0.395728616572289, "grad_norm": 0.30874884128570557, "learning_rate": 1.8145167415254518e-05, "loss": 0.4023, "step": 18659 }, { "epoch": 0.39574982503022205, "grad_norm": 0.35683995485305786, "learning_rate": 1.8144973936730924e-05, "loss": 0.6089, "step": 18660 }, { "epoch": 0.39577103348815507, "grad_norm": 0.3131248354911804, "learning_rate": 1.8144780449148563e-05, "loss": 0.4346, "step": 18661 }, { "epoch": 0.3957922419460881, "grad_norm": 0.3263886272907257, "learning_rate": 1.8144586952507642e-05, "loss": 0.5193, "step": 18662 }, { "epoch": 0.3958134504040211, "grad_norm": 0.3312259316444397, "learning_rate": 1.814439344680839e-05, "loss": 0.4976, "step": 18663 }, { "epoch": 0.3958346588619541, "grad_norm": 0.31306636333465576, "learning_rate": 1.814419993205101e-05, "loss": 0.5054, "step": 18664 }, { "epoch": 0.39585586731988714, "grad_norm": 0.3489832580089569, "learning_rate": 1.8144006408235722e-05, "loss": 0.5014, "step": 18665 }, { "epoch": 0.3958770757778202, "grad_norm": 0.37652984261512756, "learning_rate": 1.814381287536274e-05, "loss": 0.5255, "step": 18666 }, { "epoch": 0.39589828423575324, "grad_norm": 0.34933868050575256, "learning_rate": 1.8143619333432286e-05, "loss": 0.535, "step": 18667 }, { "epoch": 0.39591949269368626, "grad_norm": 0.35048559308052063, "learning_rate": 1.8143425782444562e-05, "loss": 0.4334, "step": 18668 }, { "epoch": 0.3959407011516193, "grad_norm": 0.3601462244987488, "learning_rate": 1.8143232222399793e-05, "loss": 0.5505, "step": 18669 }, { "epoch": 0.3959619096095523, "grad_norm": 0.343827486038208, "learning_rate": 1.8143038653298192e-05, "loss": 0.4949, "step": 18670 }, { "epoch": 0.3959831180674853, "grad_norm": 0.34918004274368286, "learning_rate": 1.8142845075139976e-05, "loss": 0.4821, "step": 18671 }, { "epoch": 0.39600432652541834, "grad_norm": 0.33040690422058105, "learning_rate": 1.8142651487925355e-05, "loss": 0.4812, "step": 18672 }, { "epoch": 0.39602553498335136, "grad_norm": 0.3020267188549042, "learning_rate": 1.814245789165455e-05, "loss": 0.4213, "step": 18673 }, { "epoch": 0.3960467434412844, "grad_norm": 0.4303995370864868, "learning_rate": 1.8142264286327774e-05, "loss": 0.4975, "step": 18674 }, { "epoch": 0.3960679518992174, "grad_norm": 0.33435729146003723, "learning_rate": 1.814207067194524e-05, "loss": 0.5268, "step": 18675 }, { "epoch": 0.3960891603571504, "grad_norm": 0.3933495283126831, "learning_rate": 1.814187704850717e-05, "loss": 0.4665, "step": 18676 }, { "epoch": 0.39611036881508344, "grad_norm": 0.38242191076278687, "learning_rate": 1.814168341601377e-05, "loss": 0.5756, "step": 18677 }, { "epoch": 0.39613157727301646, "grad_norm": 0.4451107680797577, "learning_rate": 1.8141489774465265e-05, "loss": 0.4657, "step": 18678 }, { "epoch": 0.39615278573094953, "grad_norm": 0.5487915277481079, "learning_rate": 1.8141296123861866e-05, "loss": 0.4592, "step": 18679 }, { "epoch": 0.39617399418888255, "grad_norm": 0.35223841667175293, "learning_rate": 1.8141102464203783e-05, "loss": 0.5681, "step": 18680 }, { "epoch": 0.39619520264681557, "grad_norm": 0.3449506163597107, "learning_rate": 1.8140908795491243e-05, "loss": 0.5146, "step": 18681 }, { "epoch": 0.3962164111047486, "grad_norm": 0.35795196890830994, "learning_rate": 1.814071511772445e-05, "loss": 0.4643, "step": 18682 }, { "epoch": 0.3962376195626816, "grad_norm": 0.31700414419174194, "learning_rate": 1.8140521430903625e-05, "loss": 0.5578, "step": 18683 }, { "epoch": 0.39625882802061463, "grad_norm": 0.32396164536476135, "learning_rate": 1.8140327735028985e-05, "loss": 0.4458, "step": 18684 }, { "epoch": 0.39628003647854765, "grad_norm": 0.3479122817516327, "learning_rate": 1.814013403010074e-05, "loss": 0.4927, "step": 18685 }, { "epoch": 0.39630124493648067, "grad_norm": 0.4033757746219635, "learning_rate": 1.813994031611911e-05, "loss": 0.5317, "step": 18686 }, { "epoch": 0.3963224533944137, "grad_norm": 0.34180906414985657, "learning_rate": 1.8139746593084312e-05, "loss": 0.5747, "step": 18687 }, { "epoch": 0.3963436618523467, "grad_norm": 0.3332549035549164, "learning_rate": 1.8139552860996557e-05, "loss": 0.4444, "step": 18688 }, { "epoch": 0.3963648703102797, "grad_norm": 0.30828335881233215, "learning_rate": 1.8139359119856062e-05, "loss": 0.5157, "step": 18689 }, { "epoch": 0.39638607876821275, "grad_norm": 0.3335855007171631, "learning_rate": 1.8139165369663042e-05, "loss": 0.4432, "step": 18690 }, { "epoch": 0.39640728722614577, "grad_norm": 0.3900514841079712, "learning_rate": 1.813897161041771e-05, "loss": 0.5842, "step": 18691 }, { "epoch": 0.3964284956840788, "grad_norm": 0.34636008739471436, "learning_rate": 1.813877784212029e-05, "loss": 0.4862, "step": 18692 }, { "epoch": 0.39644970414201186, "grad_norm": 0.3091670274734497, "learning_rate": 1.813858406477099e-05, "loss": 0.4611, "step": 18693 }, { "epoch": 0.3964709125999449, "grad_norm": 0.3964371979236603, "learning_rate": 1.8138390278370024e-05, "loss": 0.5616, "step": 18694 }, { "epoch": 0.3964921210578779, "grad_norm": 0.4559161365032196, "learning_rate": 1.8138196482917614e-05, "loss": 0.5714, "step": 18695 }, { "epoch": 0.3965133295158109, "grad_norm": 0.3511253595352173, "learning_rate": 1.8138002678413973e-05, "loss": 0.5325, "step": 18696 }, { "epoch": 0.39653453797374394, "grad_norm": 0.3812786340713501, "learning_rate": 1.8137808864859318e-05, "loss": 0.5889, "step": 18697 }, { "epoch": 0.39655574643167696, "grad_norm": 0.36096858978271484, "learning_rate": 1.8137615042253857e-05, "loss": 0.5055, "step": 18698 }, { "epoch": 0.39657695488961, "grad_norm": 0.34413841366767883, "learning_rate": 1.8137421210597813e-05, "loss": 0.5388, "step": 18699 }, { "epoch": 0.396598163347543, "grad_norm": 0.30282410979270935, "learning_rate": 1.8137227369891404e-05, "loss": 0.4905, "step": 18700 }, { "epoch": 0.396619371805476, "grad_norm": 0.5186243653297424, "learning_rate": 1.8137033520134838e-05, "loss": 0.5195, "step": 18701 }, { "epoch": 0.39664058026340904, "grad_norm": 0.335967481136322, "learning_rate": 1.8136839661328335e-05, "loss": 0.4538, "step": 18702 }, { "epoch": 0.39666178872134206, "grad_norm": 0.3609250783920288, "learning_rate": 1.813664579347211e-05, "loss": 0.4916, "step": 18703 }, { "epoch": 0.3966829971792751, "grad_norm": 0.5282996892929077, "learning_rate": 1.8136451916566374e-05, "loss": 0.4573, "step": 18704 }, { "epoch": 0.3967042056372081, "grad_norm": 0.3347100615501404, "learning_rate": 1.8136258030611354e-05, "loss": 0.4982, "step": 18705 }, { "epoch": 0.3967254140951411, "grad_norm": 0.2978985011577606, "learning_rate": 1.8136064135607253e-05, "loss": 0.4376, "step": 18706 }, { "epoch": 0.3967466225530742, "grad_norm": 0.34678295254707336, "learning_rate": 1.8135870231554296e-05, "loss": 0.4802, "step": 18707 }, { "epoch": 0.3967678310110072, "grad_norm": 0.39448297023773193, "learning_rate": 1.813567631845269e-05, "loss": 0.484, "step": 18708 }, { "epoch": 0.39678903946894023, "grad_norm": 0.35672423243522644, "learning_rate": 1.813548239630266e-05, "loss": 0.5298, "step": 18709 }, { "epoch": 0.39681024792687325, "grad_norm": 0.3485356569290161, "learning_rate": 1.813528846510442e-05, "loss": 0.5829, "step": 18710 }, { "epoch": 0.39683145638480627, "grad_norm": 0.5209095478057861, "learning_rate": 1.8135094524858174e-05, "loss": 0.4679, "step": 18711 }, { "epoch": 0.3968526648427393, "grad_norm": 0.3599136173725128, "learning_rate": 1.813490057556415e-05, "loss": 0.4969, "step": 18712 }, { "epoch": 0.3968738733006723, "grad_norm": 0.33571550250053406, "learning_rate": 1.8134706617222565e-05, "loss": 0.4982, "step": 18713 }, { "epoch": 0.39689508175860533, "grad_norm": 0.366146981716156, "learning_rate": 1.813451264983362e-05, "loss": 0.5565, "step": 18714 }, { "epoch": 0.39691629021653835, "grad_norm": 0.31863412261009216, "learning_rate": 1.813431867339755e-05, "loss": 0.4716, "step": 18715 }, { "epoch": 0.39693749867447137, "grad_norm": 0.5993453860282898, "learning_rate": 1.8134124687914558e-05, "loss": 0.4404, "step": 18716 }, { "epoch": 0.3969587071324044, "grad_norm": 0.33602243661880493, "learning_rate": 1.813393069338486e-05, "loss": 0.5519, "step": 18717 }, { "epoch": 0.3969799155903374, "grad_norm": 0.3674018383026123, "learning_rate": 1.813373668980868e-05, "loss": 0.5535, "step": 18718 }, { "epoch": 0.3970011240482704, "grad_norm": 0.3420493006706238, "learning_rate": 1.813354267718623e-05, "loss": 0.4656, "step": 18719 }, { "epoch": 0.3970223325062035, "grad_norm": 0.31570446491241455, "learning_rate": 1.813334865551772e-05, "loss": 0.4674, "step": 18720 }, { "epoch": 0.3970435409641365, "grad_norm": 0.40068885684013367, "learning_rate": 1.813315462480337e-05, "loss": 0.5154, "step": 18721 }, { "epoch": 0.39706474942206954, "grad_norm": 0.36181411147117615, "learning_rate": 1.81329605850434e-05, "loss": 0.5445, "step": 18722 }, { "epoch": 0.39708595788000256, "grad_norm": 0.32077130675315857, "learning_rate": 1.813276653623802e-05, "loss": 0.4829, "step": 18723 }, { "epoch": 0.3971071663379356, "grad_norm": 0.3994789123535156, "learning_rate": 1.8132572478387447e-05, "loss": 0.4427, "step": 18724 }, { "epoch": 0.3971283747958686, "grad_norm": 0.34055185317993164, "learning_rate": 1.8132378411491898e-05, "loss": 0.4714, "step": 18725 }, { "epoch": 0.3971495832538016, "grad_norm": 0.3736891448497772, "learning_rate": 1.813218433555159e-05, "loss": 0.441, "step": 18726 }, { "epoch": 0.39717079171173464, "grad_norm": 0.36407923698425293, "learning_rate": 1.8131990250566733e-05, "loss": 0.5751, "step": 18727 }, { "epoch": 0.39719200016966766, "grad_norm": 0.3598787188529968, "learning_rate": 1.813179615653755e-05, "loss": 0.4818, "step": 18728 }, { "epoch": 0.3972132086276007, "grad_norm": 0.36324194073677063, "learning_rate": 1.8131602053464255e-05, "loss": 0.6063, "step": 18729 }, { "epoch": 0.3972344170855337, "grad_norm": 0.36889541149139404, "learning_rate": 1.813140794134706e-05, "loss": 0.5427, "step": 18730 }, { "epoch": 0.3972556255434667, "grad_norm": 0.38705116510391235, "learning_rate": 1.8131213820186186e-05, "loss": 0.5353, "step": 18731 }, { "epoch": 0.39727683400139974, "grad_norm": 0.3189070522785187, "learning_rate": 1.8131019689981846e-05, "loss": 0.5125, "step": 18732 }, { "epoch": 0.39729804245933276, "grad_norm": 0.3904311954975128, "learning_rate": 1.8130825550734257e-05, "loss": 0.4477, "step": 18733 }, { "epoch": 0.39731925091726583, "grad_norm": 0.30658406019210815, "learning_rate": 1.8130631402443636e-05, "loss": 0.5032, "step": 18734 }, { "epoch": 0.39734045937519885, "grad_norm": 0.35130658745765686, "learning_rate": 1.8130437245110193e-05, "loss": 0.4734, "step": 18735 }, { "epoch": 0.39736166783313187, "grad_norm": 0.3581729829311371, "learning_rate": 1.813024307873415e-05, "loss": 0.5284, "step": 18736 }, { "epoch": 0.3973828762910649, "grad_norm": 0.33396318554878235, "learning_rate": 1.813004890331572e-05, "loss": 0.4354, "step": 18737 }, { "epoch": 0.3974040847489979, "grad_norm": 0.43690457940101624, "learning_rate": 1.8129854718855126e-05, "loss": 0.4869, "step": 18738 }, { "epoch": 0.39742529320693093, "grad_norm": 0.3296854794025421, "learning_rate": 1.8129660525352572e-05, "loss": 0.4913, "step": 18739 }, { "epoch": 0.39744650166486395, "grad_norm": 0.35857510566711426, "learning_rate": 1.8129466322808285e-05, "loss": 0.5544, "step": 18740 }, { "epoch": 0.39746771012279697, "grad_norm": 0.34350642561912537, "learning_rate": 1.8129272111222473e-05, "loss": 0.5808, "step": 18741 }, { "epoch": 0.39748891858073, "grad_norm": 0.3608661890029907, "learning_rate": 1.8129077890595353e-05, "loss": 0.5672, "step": 18742 }, { "epoch": 0.397510127038663, "grad_norm": 0.3698957562446594, "learning_rate": 1.812888366092715e-05, "loss": 0.4919, "step": 18743 }, { "epoch": 0.397531335496596, "grad_norm": 0.4155077040195465, "learning_rate": 1.8128689422218065e-05, "loss": 0.5023, "step": 18744 }, { "epoch": 0.39755254395452905, "grad_norm": 0.334780752658844, "learning_rate": 1.8128495174468327e-05, "loss": 0.5772, "step": 18745 }, { "epoch": 0.39757375241246207, "grad_norm": 0.36715957522392273, "learning_rate": 1.812830091767815e-05, "loss": 0.515, "step": 18746 }, { "epoch": 0.3975949608703951, "grad_norm": 0.31803637742996216, "learning_rate": 1.812810665184774e-05, "loss": 0.5048, "step": 18747 }, { "epoch": 0.39761616932832816, "grad_norm": 0.3532049059867859, "learning_rate": 1.8127912376977325e-05, "loss": 0.5116, "step": 18748 }, { "epoch": 0.3976373777862612, "grad_norm": 0.36397433280944824, "learning_rate": 1.8127718093067117e-05, "loss": 0.486, "step": 18749 }, { "epoch": 0.3976585862441942, "grad_norm": 0.41510921716690063, "learning_rate": 1.812752380011733e-05, "loss": 0.4592, "step": 18750 }, { "epoch": 0.3976797947021272, "grad_norm": 0.4162440001964569, "learning_rate": 1.812732949812818e-05, "loss": 0.4568, "step": 18751 }, { "epoch": 0.39770100316006024, "grad_norm": 0.3170779347419739, "learning_rate": 1.812713518709989e-05, "loss": 0.5011, "step": 18752 }, { "epoch": 0.39772221161799326, "grad_norm": 0.33831700682640076, "learning_rate": 1.8126940867032666e-05, "loss": 0.5525, "step": 18753 }, { "epoch": 0.3977434200759263, "grad_norm": 0.38069742918014526, "learning_rate": 1.812674653792673e-05, "loss": 0.5097, "step": 18754 }, { "epoch": 0.3977646285338593, "grad_norm": 0.5500456094741821, "learning_rate": 1.8126552199782298e-05, "loss": 0.4916, "step": 18755 }, { "epoch": 0.3977858369917923, "grad_norm": 0.3333398699760437, "learning_rate": 1.812635785259959e-05, "loss": 0.4582, "step": 18756 }, { "epoch": 0.39780704544972534, "grad_norm": 0.3459950387477875, "learning_rate": 1.8126163496378808e-05, "loss": 0.4906, "step": 18757 }, { "epoch": 0.39782825390765836, "grad_norm": 0.38676193356513977, "learning_rate": 1.8125969131120183e-05, "loss": 0.4382, "step": 18758 }, { "epoch": 0.3978494623655914, "grad_norm": 0.3494674563407898, "learning_rate": 1.8125774756823926e-05, "loss": 0.4601, "step": 18759 }, { "epoch": 0.3978706708235244, "grad_norm": 0.36108314990997314, "learning_rate": 1.812558037349025e-05, "loss": 0.496, "step": 18760 }, { "epoch": 0.3978918792814575, "grad_norm": 0.44225218892097473, "learning_rate": 1.8125385981119374e-05, "loss": 0.4967, "step": 18761 }, { "epoch": 0.3979130877393905, "grad_norm": 0.34202730655670166, "learning_rate": 1.812519157971152e-05, "loss": 0.4388, "step": 18762 }, { "epoch": 0.3979342961973235, "grad_norm": 0.3495181202888489, "learning_rate": 1.8124997169266892e-05, "loss": 0.4815, "step": 18763 }, { "epoch": 0.39795550465525653, "grad_norm": 0.38466310501098633, "learning_rate": 1.8124802749785716e-05, "loss": 0.6488, "step": 18764 }, { "epoch": 0.39797671311318955, "grad_norm": 0.3610190153121948, "learning_rate": 1.8124608321268204e-05, "loss": 0.4783, "step": 18765 }, { "epoch": 0.39799792157112257, "grad_norm": 0.3595198392868042, "learning_rate": 1.812441388371457e-05, "loss": 0.519, "step": 18766 }, { "epoch": 0.3980191300290556, "grad_norm": 0.37428852915763855, "learning_rate": 1.8124219437125037e-05, "loss": 0.519, "step": 18767 }, { "epoch": 0.3980403384869886, "grad_norm": 0.3206784427165985, "learning_rate": 1.812402498149982e-05, "loss": 0.5132, "step": 18768 }, { "epoch": 0.39806154694492163, "grad_norm": 0.35734522342681885, "learning_rate": 1.8123830516839128e-05, "loss": 0.4991, "step": 18769 }, { "epoch": 0.39808275540285465, "grad_norm": 0.42301809787750244, "learning_rate": 1.812363604314319e-05, "loss": 0.5314, "step": 18770 }, { "epoch": 0.39810396386078767, "grad_norm": 0.3424355089664459, "learning_rate": 1.8123441560412206e-05, "loss": 0.4829, "step": 18771 }, { "epoch": 0.3981251723187207, "grad_norm": 0.3672114312648773, "learning_rate": 1.8123247068646404e-05, "loss": 0.5523, "step": 18772 }, { "epoch": 0.3981463807766537, "grad_norm": 0.3399466574192047, "learning_rate": 1.8123052567845997e-05, "loss": 0.4993, "step": 18773 }, { "epoch": 0.3981675892345867, "grad_norm": 0.3444030284881592, "learning_rate": 1.8122858058011204e-05, "loss": 0.5479, "step": 18774 }, { "epoch": 0.3981887976925198, "grad_norm": 0.326241135597229, "learning_rate": 1.8122663539142235e-05, "loss": 0.5788, "step": 18775 }, { "epoch": 0.3982100061504528, "grad_norm": 0.34816357493400574, "learning_rate": 1.812246901123931e-05, "loss": 0.5078, "step": 18776 }, { "epoch": 0.39823121460838584, "grad_norm": 0.3378174602985382, "learning_rate": 1.812227447430265e-05, "loss": 0.5805, "step": 18777 }, { "epoch": 0.39825242306631886, "grad_norm": 0.302362859249115, "learning_rate": 1.8122079928332464e-05, "loss": 0.4611, "step": 18778 }, { "epoch": 0.3982736315242519, "grad_norm": 0.3377666175365448, "learning_rate": 1.812188537332897e-05, "loss": 0.4601, "step": 18779 }, { "epoch": 0.3982948399821849, "grad_norm": 0.36657917499542236, "learning_rate": 1.812169080929239e-05, "loss": 0.4848, "step": 18780 }, { "epoch": 0.3983160484401179, "grad_norm": 0.32645532488822937, "learning_rate": 1.8121496236222932e-05, "loss": 0.4887, "step": 18781 }, { "epoch": 0.39833725689805094, "grad_norm": 0.31736689805984497, "learning_rate": 1.812130165412082e-05, "loss": 0.4594, "step": 18782 }, { "epoch": 0.39835846535598396, "grad_norm": 0.32283690571784973, "learning_rate": 1.8121107062986262e-05, "loss": 0.4721, "step": 18783 }, { "epoch": 0.398379673813917, "grad_norm": 0.3229328393936157, "learning_rate": 1.812091246281948e-05, "loss": 0.5448, "step": 18784 }, { "epoch": 0.39840088227185, "grad_norm": 0.3576570451259613, "learning_rate": 1.8120717853620692e-05, "loss": 0.5454, "step": 18785 }, { "epoch": 0.398422090729783, "grad_norm": 0.3239593505859375, "learning_rate": 1.8120523235390114e-05, "loss": 0.4886, "step": 18786 }, { "epoch": 0.39844329918771604, "grad_norm": 0.4152737259864807, "learning_rate": 1.8120328608127958e-05, "loss": 0.5623, "step": 18787 }, { "epoch": 0.39846450764564906, "grad_norm": 0.33635789155960083, "learning_rate": 1.8120133971834443e-05, "loss": 0.541, "step": 18788 }, { "epoch": 0.39848571610358213, "grad_norm": 0.3239947557449341, "learning_rate": 1.8119939326509787e-05, "loss": 0.4308, "step": 18789 }, { "epoch": 0.39850692456151515, "grad_norm": 0.3249642848968506, "learning_rate": 1.8119744672154202e-05, "loss": 0.4505, "step": 18790 }, { "epoch": 0.39852813301944817, "grad_norm": 0.3084847927093506, "learning_rate": 1.811955000876791e-05, "loss": 0.4342, "step": 18791 }, { "epoch": 0.3985493414773812, "grad_norm": 0.37513118982315063, "learning_rate": 1.8119355336351123e-05, "loss": 0.4056, "step": 18792 }, { "epoch": 0.3985705499353142, "grad_norm": 0.3349936306476593, "learning_rate": 1.8119160654904064e-05, "loss": 0.5174, "step": 18793 }, { "epoch": 0.39859175839324723, "grad_norm": 0.498519629240036, "learning_rate": 1.811896596442694e-05, "loss": 0.4756, "step": 18794 }, { "epoch": 0.39861296685118025, "grad_norm": 0.3402591645717621, "learning_rate": 1.8118771264919974e-05, "loss": 0.5394, "step": 18795 }, { "epoch": 0.39863417530911327, "grad_norm": 0.722159743309021, "learning_rate": 1.8118576556383384e-05, "loss": 0.5373, "step": 18796 }, { "epoch": 0.3986553837670463, "grad_norm": 0.3275336027145386, "learning_rate": 1.811838183881738e-05, "loss": 0.4759, "step": 18797 }, { "epoch": 0.3986765922249793, "grad_norm": 0.4231240451335907, "learning_rate": 1.8118187112222182e-05, "loss": 0.4569, "step": 18798 }, { "epoch": 0.39869780068291233, "grad_norm": 0.34368252754211426, "learning_rate": 1.811799237659801e-05, "loss": 0.5547, "step": 18799 }, { "epoch": 0.39871900914084535, "grad_norm": 0.3516043722629547, "learning_rate": 1.8117797631945074e-05, "loss": 0.5295, "step": 18800 }, { "epoch": 0.39874021759877837, "grad_norm": 0.33577755093574524, "learning_rate": 1.8117602878263597e-05, "loss": 0.5408, "step": 18801 }, { "epoch": 0.39876142605671144, "grad_norm": 0.3357478082180023, "learning_rate": 1.8117408115553792e-05, "loss": 0.4655, "step": 18802 }, { "epoch": 0.39878263451464446, "grad_norm": 0.3712656795978546, "learning_rate": 1.8117213343815876e-05, "loss": 0.5621, "step": 18803 }, { "epoch": 0.3988038429725775, "grad_norm": 0.34778741002082825, "learning_rate": 1.8117018563050063e-05, "loss": 0.4995, "step": 18804 }, { "epoch": 0.3988250514305105, "grad_norm": 0.3410731554031372, "learning_rate": 1.8116823773256576e-05, "loss": 0.4329, "step": 18805 }, { "epoch": 0.3988462598884435, "grad_norm": 0.3574330806732178, "learning_rate": 1.811662897443563e-05, "loss": 0.5662, "step": 18806 }, { "epoch": 0.39886746834637654, "grad_norm": 0.3549689054489136, "learning_rate": 1.8116434166587435e-05, "loss": 0.5127, "step": 18807 }, { "epoch": 0.39888867680430956, "grad_norm": 0.33684656023979187, "learning_rate": 1.8116239349712214e-05, "loss": 0.5711, "step": 18808 }, { "epoch": 0.3989098852622426, "grad_norm": 0.3576999306678772, "learning_rate": 1.811604452381018e-05, "loss": 0.5138, "step": 18809 }, { "epoch": 0.3989310937201756, "grad_norm": 0.3378012776374817, "learning_rate": 1.8115849688881556e-05, "loss": 0.5469, "step": 18810 }, { "epoch": 0.3989523021781086, "grad_norm": 0.34481316804885864, "learning_rate": 1.8115654844926553e-05, "loss": 0.5763, "step": 18811 }, { "epoch": 0.39897351063604164, "grad_norm": 0.3571138381958008, "learning_rate": 1.811545999194539e-05, "loss": 0.5241, "step": 18812 }, { "epoch": 0.39899471909397466, "grad_norm": 0.7004662156105042, "learning_rate": 1.8115265129938283e-05, "loss": 0.4989, "step": 18813 }, { "epoch": 0.3990159275519077, "grad_norm": 0.37483370304107666, "learning_rate": 1.8115070258905446e-05, "loss": 0.4879, "step": 18814 }, { "epoch": 0.3990371360098407, "grad_norm": 0.34380802512168884, "learning_rate": 1.81148753788471e-05, "loss": 0.4378, "step": 18815 }, { "epoch": 0.3990583444677738, "grad_norm": 0.3328886032104492, "learning_rate": 1.811468048976346e-05, "loss": 0.5116, "step": 18816 }, { "epoch": 0.3990795529257068, "grad_norm": 0.3036417067050934, "learning_rate": 1.811448559165474e-05, "loss": 0.506, "step": 18817 }, { "epoch": 0.3991007613836398, "grad_norm": 0.9012380838394165, "learning_rate": 1.811429068452116e-05, "loss": 0.5032, "step": 18818 }, { "epoch": 0.39912196984157283, "grad_norm": 0.45248082280158997, "learning_rate": 1.8114095768362942e-05, "loss": 0.5183, "step": 18819 }, { "epoch": 0.39914317829950585, "grad_norm": 0.377765029668808, "learning_rate": 1.8113900843180295e-05, "loss": 0.4844, "step": 18820 }, { "epoch": 0.39916438675743887, "grad_norm": 0.31935635209083557, "learning_rate": 1.8113705908973434e-05, "loss": 0.5303, "step": 18821 }, { "epoch": 0.3991855952153719, "grad_norm": 0.3236740827560425, "learning_rate": 1.8113510965742585e-05, "loss": 0.4766, "step": 18822 }, { "epoch": 0.3992068036733049, "grad_norm": 0.7463613152503967, "learning_rate": 1.8113316013487955e-05, "loss": 0.5485, "step": 18823 }, { "epoch": 0.39922801213123793, "grad_norm": 0.37845203280448914, "learning_rate": 1.8113121052209768e-05, "loss": 0.5311, "step": 18824 }, { "epoch": 0.39924922058917095, "grad_norm": 0.3590981066226959, "learning_rate": 1.811292608190824e-05, "loss": 0.4998, "step": 18825 }, { "epoch": 0.39927042904710397, "grad_norm": 0.35466402769088745, "learning_rate": 1.8112731102583583e-05, "loss": 0.5072, "step": 18826 }, { "epoch": 0.399291637505037, "grad_norm": 0.36021700501441956, "learning_rate": 1.8112536114236018e-05, "loss": 0.5129, "step": 18827 }, { "epoch": 0.39931284596297, "grad_norm": 0.3523422181606293, "learning_rate": 1.811234111686576e-05, "loss": 0.4946, "step": 18828 }, { "epoch": 0.39933405442090303, "grad_norm": 0.3809404671192169, "learning_rate": 1.8112146110473027e-05, "loss": 0.5127, "step": 18829 }, { "epoch": 0.3993552628788361, "grad_norm": 0.35349881649017334, "learning_rate": 1.8111951095058037e-05, "loss": 0.5458, "step": 18830 }, { "epoch": 0.3993764713367691, "grad_norm": 0.3391541540622711, "learning_rate": 1.8111756070621004e-05, "loss": 0.4724, "step": 18831 }, { "epoch": 0.39939767979470214, "grad_norm": 0.34084323048591614, "learning_rate": 1.8111561037162145e-05, "loss": 0.4245, "step": 18832 }, { "epoch": 0.39941888825263516, "grad_norm": 0.33171355724334717, "learning_rate": 1.811136599468168e-05, "loss": 0.5198, "step": 18833 }, { "epoch": 0.3994400967105682, "grad_norm": 0.38701796531677246, "learning_rate": 1.8111170943179825e-05, "loss": 0.5131, "step": 18834 }, { "epoch": 0.3994613051685012, "grad_norm": 0.36217454075813293, "learning_rate": 1.8110975882656796e-05, "loss": 0.5211, "step": 18835 }, { "epoch": 0.3994825136264342, "grad_norm": 0.3415180742740631, "learning_rate": 1.811078081311281e-05, "loss": 0.4775, "step": 18836 }, { "epoch": 0.39950372208436724, "grad_norm": 0.3829018175601959, "learning_rate": 1.8110585734548085e-05, "loss": 0.613, "step": 18837 }, { "epoch": 0.39952493054230026, "grad_norm": 0.33341261744499207, "learning_rate": 1.811039064696283e-05, "loss": 0.445, "step": 18838 }, { "epoch": 0.3995461390002333, "grad_norm": 0.33534106612205505, "learning_rate": 1.8110195550357277e-05, "loss": 0.524, "step": 18839 }, { "epoch": 0.3995673474581663, "grad_norm": 0.4785049855709076, "learning_rate": 1.811000044473163e-05, "loss": 0.4943, "step": 18840 }, { "epoch": 0.3995885559160993, "grad_norm": 0.32409390807151794, "learning_rate": 1.8109805330086115e-05, "loss": 0.5467, "step": 18841 }, { "epoch": 0.39960976437403234, "grad_norm": 0.35994282364845276, "learning_rate": 1.810961020642094e-05, "loss": 0.5688, "step": 18842 }, { "epoch": 0.3996309728319654, "grad_norm": 0.39169424772262573, "learning_rate": 1.8109415073736333e-05, "loss": 0.5496, "step": 18843 }, { "epoch": 0.39965218128989843, "grad_norm": 0.3497316241264343, "learning_rate": 1.8109219932032505e-05, "loss": 0.5511, "step": 18844 }, { "epoch": 0.39967338974783145, "grad_norm": 0.3587910830974579, "learning_rate": 1.810902478130967e-05, "loss": 0.4479, "step": 18845 }, { "epoch": 0.3996945982057645, "grad_norm": 0.3854178786277771, "learning_rate": 1.8108829621568048e-05, "loss": 0.5771, "step": 18846 }, { "epoch": 0.3997158066636975, "grad_norm": 0.42705801129341125, "learning_rate": 1.8108634452807857e-05, "loss": 0.5278, "step": 18847 }, { "epoch": 0.3997370151216305, "grad_norm": 0.38212355971336365, "learning_rate": 1.8108439275029312e-05, "loss": 0.5343, "step": 18848 }, { "epoch": 0.39975822357956353, "grad_norm": 0.3450043797492981, "learning_rate": 1.810824408823263e-05, "loss": 0.5291, "step": 18849 }, { "epoch": 0.39977943203749655, "grad_norm": 0.32643648982048035, "learning_rate": 1.8108048892418034e-05, "loss": 0.4804, "step": 18850 }, { "epoch": 0.39980064049542957, "grad_norm": 0.3708229660987854, "learning_rate": 1.8107853687585732e-05, "loss": 0.4939, "step": 18851 }, { "epoch": 0.3998218489533626, "grad_norm": 0.3430240750312805, "learning_rate": 1.810765847373595e-05, "loss": 0.5286, "step": 18852 }, { "epoch": 0.3998430574112956, "grad_norm": 0.3536815345287323, "learning_rate": 1.8107463250868897e-05, "loss": 0.4259, "step": 18853 }, { "epoch": 0.39986426586922863, "grad_norm": 0.35858485102653503, "learning_rate": 1.8107268018984794e-05, "loss": 0.4819, "step": 18854 }, { "epoch": 0.39988547432716165, "grad_norm": 0.31124982237815857, "learning_rate": 1.8107072778083857e-05, "loss": 0.3872, "step": 18855 }, { "epoch": 0.39990668278509467, "grad_norm": 0.31708934903144836, "learning_rate": 1.8106877528166308e-05, "loss": 0.4518, "step": 18856 }, { "epoch": 0.39992789124302774, "grad_norm": 0.37718039751052856, "learning_rate": 1.8106682269232358e-05, "loss": 0.4744, "step": 18857 }, { "epoch": 0.39994909970096076, "grad_norm": 0.3269127309322357, "learning_rate": 1.8106487001282226e-05, "loss": 0.3905, "step": 18858 }, { "epoch": 0.3999703081588938, "grad_norm": 0.3267115354537964, "learning_rate": 1.810629172431613e-05, "loss": 0.5328, "step": 18859 }, { "epoch": 0.3999915166168268, "grad_norm": 0.4143509268760681, "learning_rate": 1.810609643833429e-05, "loss": 0.5188, "step": 18860 }, { "epoch": 0.4000127250747598, "grad_norm": 0.35757389664649963, "learning_rate": 1.8105901143336916e-05, "loss": 0.4901, "step": 18861 }, { "epoch": 0.40003393353269284, "grad_norm": 0.8762553334236145, "learning_rate": 1.8105705839324228e-05, "loss": 0.5897, "step": 18862 }, { "epoch": 0.40005514199062586, "grad_norm": 0.32726749777793884, "learning_rate": 1.8105510526296446e-05, "loss": 0.3846, "step": 18863 }, { "epoch": 0.4000763504485589, "grad_norm": 0.5357391238212585, "learning_rate": 1.8105315204253784e-05, "loss": 0.425, "step": 18864 }, { "epoch": 0.4000975589064919, "grad_norm": 0.3494180738925934, "learning_rate": 1.8105119873196466e-05, "loss": 0.5612, "step": 18865 }, { "epoch": 0.4001187673644249, "grad_norm": 0.3692544102668762, "learning_rate": 1.8104924533124698e-05, "loss": 0.5402, "step": 18866 }, { "epoch": 0.40013997582235794, "grad_norm": 0.3826180696487427, "learning_rate": 1.8104729184038706e-05, "loss": 0.5749, "step": 18867 }, { "epoch": 0.40016118428029096, "grad_norm": 0.3271644115447998, "learning_rate": 1.810453382593871e-05, "loss": 0.4738, "step": 18868 }, { "epoch": 0.400182392738224, "grad_norm": 0.3481942117214203, "learning_rate": 1.8104338458824916e-05, "loss": 0.4721, "step": 18869 }, { "epoch": 0.40020360119615705, "grad_norm": 0.5901103019714355, "learning_rate": 1.8104143082697544e-05, "loss": 0.5742, "step": 18870 }, { "epoch": 0.4002248096540901, "grad_norm": 0.36679768562316895, "learning_rate": 1.810394769755682e-05, "loss": 0.48, "step": 18871 }, { "epoch": 0.4002460181120231, "grad_norm": 0.3624102771282196, "learning_rate": 1.8103752303402956e-05, "loss": 0.5243, "step": 18872 }, { "epoch": 0.4002672265699561, "grad_norm": 0.37443768978118896, "learning_rate": 1.8103556900236164e-05, "loss": 0.5504, "step": 18873 }, { "epoch": 0.40028843502788913, "grad_norm": 0.3099381923675537, "learning_rate": 1.810336148805667e-05, "loss": 0.5167, "step": 18874 }, { "epoch": 0.40030964348582215, "grad_norm": 0.36801332235336304, "learning_rate": 1.810316606686469e-05, "loss": 0.5108, "step": 18875 }, { "epoch": 0.4003308519437552, "grad_norm": 0.3816932439804077, "learning_rate": 1.8102970636660437e-05, "loss": 0.5664, "step": 18876 }, { "epoch": 0.4003520604016882, "grad_norm": 0.3377968370914459, "learning_rate": 1.810277519744413e-05, "loss": 0.5869, "step": 18877 }, { "epoch": 0.4003732688596212, "grad_norm": 0.3987354338169098, "learning_rate": 1.8102579749215988e-05, "loss": 0.5227, "step": 18878 }, { "epoch": 0.40039447731755423, "grad_norm": 0.4390489161014557, "learning_rate": 1.8102384291976222e-05, "loss": 0.4229, "step": 18879 }, { "epoch": 0.40041568577548725, "grad_norm": 0.3369111120700836, "learning_rate": 1.810218882572506e-05, "loss": 0.4424, "step": 18880 }, { "epoch": 0.40043689423342027, "grad_norm": 0.34259188175201416, "learning_rate": 1.8101993350462712e-05, "loss": 0.5136, "step": 18881 }, { "epoch": 0.4004581026913533, "grad_norm": 0.41323208808898926, "learning_rate": 1.8101797866189395e-05, "loss": 0.5456, "step": 18882 }, { "epoch": 0.4004793111492863, "grad_norm": 0.34369468688964844, "learning_rate": 1.8101602372905335e-05, "loss": 0.4896, "step": 18883 }, { "epoch": 0.4005005196072194, "grad_norm": 0.38984790444374084, "learning_rate": 1.8101406870610742e-05, "loss": 0.4958, "step": 18884 }, { "epoch": 0.4005217280651524, "grad_norm": 0.36472535133361816, "learning_rate": 1.810121135930583e-05, "loss": 0.501, "step": 18885 }, { "epoch": 0.4005429365230854, "grad_norm": 0.34591054916381836, "learning_rate": 1.8101015838990826e-05, "loss": 0.5475, "step": 18886 }, { "epoch": 0.40056414498101844, "grad_norm": 0.35421356558799744, "learning_rate": 1.8100820309665938e-05, "loss": 0.4997, "step": 18887 }, { "epoch": 0.40058535343895146, "grad_norm": 0.36596354842185974, "learning_rate": 1.810062477133139e-05, "loss": 0.5433, "step": 18888 }, { "epoch": 0.4006065618968845, "grad_norm": 0.32396769523620605, "learning_rate": 1.81004292239874e-05, "loss": 0.5153, "step": 18889 }, { "epoch": 0.4006277703548175, "grad_norm": 0.30709415674209595, "learning_rate": 1.8100233667634178e-05, "loss": 0.5445, "step": 18890 }, { "epoch": 0.4006489788127505, "grad_norm": 0.4134877920150757, "learning_rate": 1.810003810227195e-05, "loss": 0.4636, "step": 18891 }, { "epoch": 0.40067018727068354, "grad_norm": 0.4621754288673401, "learning_rate": 1.8099842527900932e-05, "loss": 0.5419, "step": 18892 }, { "epoch": 0.40069139572861656, "grad_norm": 0.44580164551734924, "learning_rate": 1.8099646944521335e-05, "loss": 0.4828, "step": 18893 }, { "epoch": 0.4007126041865496, "grad_norm": 0.3290974199771881, "learning_rate": 1.8099451352133383e-05, "loss": 0.4745, "step": 18894 }, { "epoch": 0.4007338126444826, "grad_norm": 0.5212528109550476, "learning_rate": 1.809925575073729e-05, "loss": 0.5674, "step": 18895 }, { "epoch": 0.4007550211024156, "grad_norm": 0.37025701999664307, "learning_rate": 1.809906014033328e-05, "loss": 0.494, "step": 18896 }, { "epoch": 0.40077622956034864, "grad_norm": 0.3323313593864441, "learning_rate": 1.8098864520921558e-05, "loss": 0.4908, "step": 18897 }, { "epoch": 0.4007974380182817, "grad_norm": 0.4324296712875366, "learning_rate": 1.8098668892502353e-05, "loss": 0.5794, "step": 18898 }, { "epoch": 0.40081864647621473, "grad_norm": 0.32754161953926086, "learning_rate": 1.809847325507588e-05, "loss": 0.5316, "step": 18899 }, { "epoch": 0.40083985493414775, "grad_norm": 0.342149555683136, "learning_rate": 1.8098277608642354e-05, "loss": 0.4867, "step": 18900 }, { "epoch": 0.4008610633920808, "grad_norm": 0.3348373770713806, "learning_rate": 1.8098081953201995e-05, "loss": 0.5133, "step": 18901 }, { "epoch": 0.4008822718500138, "grad_norm": 0.3998241126537323, "learning_rate": 1.809788628875502e-05, "loss": 0.5623, "step": 18902 }, { "epoch": 0.4009034803079468, "grad_norm": 0.367675244808197, "learning_rate": 1.8097690615301644e-05, "loss": 0.541, "step": 18903 }, { "epoch": 0.40092468876587983, "grad_norm": 0.32782313227653503, "learning_rate": 1.809749493284209e-05, "loss": 0.5298, "step": 18904 }, { "epoch": 0.40094589722381285, "grad_norm": 0.3584284484386444, "learning_rate": 1.8097299241376568e-05, "loss": 0.4575, "step": 18905 }, { "epoch": 0.4009671056817459, "grad_norm": 0.4295356571674347, "learning_rate": 1.8097103540905304e-05, "loss": 0.5396, "step": 18906 }, { "epoch": 0.4009883141396789, "grad_norm": 0.32140621542930603, "learning_rate": 1.809690783142851e-05, "loss": 0.5881, "step": 18907 }, { "epoch": 0.4010095225976119, "grad_norm": 0.33152058720588684, "learning_rate": 1.8096712112946407e-05, "loss": 0.4816, "step": 18908 }, { "epoch": 0.40103073105554493, "grad_norm": 0.38522809743881226, "learning_rate": 1.8096516385459208e-05, "loss": 0.4598, "step": 18909 }, { "epoch": 0.40105193951347795, "grad_norm": 0.3180612027645111, "learning_rate": 1.8096320648967137e-05, "loss": 0.5253, "step": 18910 }, { "epoch": 0.401073147971411, "grad_norm": 0.31824445724487305, "learning_rate": 1.809612490347041e-05, "loss": 0.5149, "step": 18911 }, { "epoch": 0.40109435642934405, "grad_norm": 0.3595927357673645, "learning_rate": 1.809592914896924e-05, "loss": 0.5186, "step": 18912 }, { "epoch": 0.40111556488727707, "grad_norm": 0.3814825713634491, "learning_rate": 1.8095733385463848e-05, "loss": 0.4076, "step": 18913 }, { "epoch": 0.4011367733452101, "grad_norm": 0.3563101887702942, "learning_rate": 1.809553761295445e-05, "loss": 0.4665, "step": 18914 }, { "epoch": 0.4011579818031431, "grad_norm": 0.34037500619888306, "learning_rate": 1.8095341831441268e-05, "loss": 0.456, "step": 18915 }, { "epoch": 0.4011791902610761, "grad_norm": 0.8725358843803406, "learning_rate": 1.8095146040924517e-05, "loss": 0.5735, "step": 18916 }, { "epoch": 0.40120039871900914, "grad_norm": 0.3617461919784546, "learning_rate": 1.8094950241404416e-05, "loss": 0.5417, "step": 18917 }, { "epoch": 0.40122160717694216, "grad_norm": 0.3608349561691284, "learning_rate": 1.8094754432881178e-05, "loss": 0.5372, "step": 18918 }, { "epoch": 0.4012428156348752, "grad_norm": 0.3224469721317291, "learning_rate": 1.809455861535503e-05, "loss": 0.4539, "step": 18919 }, { "epoch": 0.4012640240928082, "grad_norm": 0.40160462260246277, "learning_rate": 1.8094362788826178e-05, "loss": 0.538, "step": 18920 }, { "epoch": 0.4012852325507412, "grad_norm": 0.3190821707248688, "learning_rate": 1.809416695329485e-05, "loss": 0.4918, "step": 18921 }, { "epoch": 0.40130644100867424, "grad_norm": 0.32616519927978516, "learning_rate": 1.809397110876126e-05, "loss": 0.5108, "step": 18922 }, { "epoch": 0.40132764946660726, "grad_norm": 0.3615504801273346, "learning_rate": 1.8093775255225624e-05, "loss": 0.5425, "step": 18923 }, { "epoch": 0.4013488579245403, "grad_norm": 0.32999706268310547, "learning_rate": 1.8093579392688162e-05, "loss": 0.5403, "step": 18924 }, { "epoch": 0.40137006638247336, "grad_norm": 0.3489651381969452, "learning_rate": 1.809338352114909e-05, "loss": 0.5469, "step": 18925 }, { "epoch": 0.4013912748404064, "grad_norm": 0.3500930368900299, "learning_rate": 1.809318764060863e-05, "loss": 0.4967, "step": 18926 }, { "epoch": 0.4014124832983394, "grad_norm": 0.35472825169563293, "learning_rate": 1.8092991751066994e-05, "loss": 0.493, "step": 18927 }, { "epoch": 0.4014336917562724, "grad_norm": 0.36161157488822937, "learning_rate": 1.8092795852524404e-05, "loss": 0.4755, "step": 18928 }, { "epoch": 0.40145490021420543, "grad_norm": 0.31111642718315125, "learning_rate": 1.809259994498108e-05, "loss": 0.4749, "step": 18929 }, { "epoch": 0.40147610867213845, "grad_norm": 0.35224297642707825, "learning_rate": 1.8092404028437235e-05, "loss": 0.4911, "step": 18930 }, { "epoch": 0.4014973171300715, "grad_norm": 0.35615262389183044, "learning_rate": 1.8092208102893085e-05, "loss": 0.5508, "step": 18931 }, { "epoch": 0.4015185255880045, "grad_norm": 0.37680792808532715, "learning_rate": 1.8092012168348854e-05, "loss": 0.5318, "step": 18932 }, { "epoch": 0.4015397340459375, "grad_norm": 0.3453710079193115, "learning_rate": 1.8091816224804756e-05, "loss": 0.4839, "step": 18933 }, { "epoch": 0.40156094250387053, "grad_norm": 0.3531716465950012, "learning_rate": 1.8091620272261014e-05, "loss": 0.5755, "step": 18934 }, { "epoch": 0.40158215096180355, "grad_norm": 0.3501509726047516, "learning_rate": 1.8091424310717838e-05, "loss": 0.5429, "step": 18935 }, { "epoch": 0.40160335941973657, "grad_norm": 0.3502683639526367, "learning_rate": 1.8091228340175452e-05, "loss": 0.5461, "step": 18936 }, { "epoch": 0.4016245678776696, "grad_norm": 0.4582197666168213, "learning_rate": 1.809103236063407e-05, "loss": 0.5731, "step": 18937 }, { "epoch": 0.4016457763356026, "grad_norm": 0.35697826743125916, "learning_rate": 1.8090836372093915e-05, "loss": 0.4973, "step": 18938 }, { "epoch": 0.4016669847935357, "grad_norm": 0.32462581992149353, "learning_rate": 1.8090640374555203e-05, "loss": 0.4335, "step": 18939 }, { "epoch": 0.4016881932514687, "grad_norm": 0.46110770106315613, "learning_rate": 1.809044436801815e-05, "loss": 0.461, "step": 18940 }, { "epoch": 0.4017094017094017, "grad_norm": 0.37327083945274353, "learning_rate": 1.8090248352482972e-05, "loss": 0.5688, "step": 18941 }, { "epoch": 0.40173061016733475, "grad_norm": 0.3053290843963623, "learning_rate": 1.8090052327949893e-05, "loss": 0.4832, "step": 18942 }, { "epoch": 0.40175181862526776, "grad_norm": 0.3661738932132721, "learning_rate": 1.8089856294419126e-05, "loss": 0.4291, "step": 18943 }, { "epoch": 0.4017730270832008, "grad_norm": 0.3143223822116852, "learning_rate": 1.8089660251890893e-05, "loss": 0.5042, "step": 18944 }, { "epoch": 0.4017942355411338, "grad_norm": 0.3554990291595459, "learning_rate": 1.8089464200365408e-05, "loss": 0.4523, "step": 18945 }, { "epoch": 0.4018154439990668, "grad_norm": 0.3227313160896301, "learning_rate": 1.8089268139842896e-05, "loss": 0.5326, "step": 18946 }, { "epoch": 0.40183665245699984, "grad_norm": 0.33128297328948975, "learning_rate": 1.8089072070323565e-05, "loss": 0.5008, "step": 18947 }, { "epoch": 0.40185786091493286, "grad_norm": 0.3621843755245209, "learning_rate": 1.8088875991807643e-05, "loss": 0.5182, "step": 18948 }, { "epoch": 0.4018790693728659, "grad_norm": 0.40637823939323425, "learning_rate": 1.808867990429534e-05, "loss": 0.5164, "step": 18949 }, { "epoch": 0.4019002778307989, "grad_norm": 0.32473182678222656, "learning_rate": 1.8088483807786877e-05, "loss": 0.4712, "step": 18950 }, { "epoch": 0.4019214862887319, "grad_norm": 0.6945667266845703, "learning_rate": 1.8088287702282473e-05, "loss": 0.4807, "step": 18951 }, { "epoch": 0.401942694746665, "grad_norm": 0.33594754338264465, "learning_rate": 1.8088091587782345e-05, "loss": 0.5938, "step": 18952 }, { "epoch": 0.401963903204598, "grad_norm": 0.3716680705547333, "learning_rate": 1.8087895464286714e-05, "loss": 0.4639, "step": 18953 }, { "epoch": 0.40198511166253104, "grad_norm": 0.3356395959854126, "learning_rate": 1.8087699331795796e-05, "loss": 0.4431, "step": 18954 }, { "epoch": 0.40200632012046406, "grad_norm": 0.34825170040130615, "learning_rate": 1.8087503190309808e-05, "loss": 0.4326, "step": 18955 }, { "epoch": 0.4020275285783971, "grad_norm": 0.3783065974712372, "learning_rate": 1.808730703982897e-05, "loss": 0.5474, "step": 18956 }, { "epoch": 0.4020487370363301, "grad_norm": 0.3128444254398346, "learning_rate": 1.8087110880353498e-05, "loss": 0.5128, "step": 18957 }, { "epoch": 0.4020699454942631, "grad_norm": 0.35937437415122986, "learning_rate": 1.8086914711883616e-05, "loss": 0.5031, "step": 18958 }, { "epoch": 0.40209115395219613, "grad_norm": 0.3180890381336212, "learning_rate": 1.808671853441953e-05, "loss": 0.5113, "step": 18959 }, { "epoch": 0.40211236241012915, "grad_norm": 0.4068564474582672, "learning_rate": 1.808652234796147e-05, "loss": 0.5154, "step": 18960 }, { "epoch": 0.4021335708680622, "grad_norm": 0.3698286712169647, "learning_rate": 1.8086326152509652e-05, "loss": 0.5287, "step": 18961 }, { "epoch": 0.4021547793259952, "grad_norm": 0.7504275441169739, "learning_rate": 1.8086129948064288e-05, "loss": 0.4721, "step": 18962 }, { "epoch": 0.4021759877839282, "grad_norm": 0.39290282130241394, "learning_rate": 1.8085933734625603e-05, "loss": 0.5067, "step": 18963 }, { "epoch": 0.40219719624186123, "grad_norm": 0.34346118569374084, "learning_rate": 1.8085737512193816e-05, "loss": 0.5338, "step": 18964 }, { "epoch": 0.40221840469979425, "grad_norm": 0.3361471891403198, "learning_rate": 1.8085541280769135e-05, "loss": 0.5658, "step": 18965 }, { "epoch": 0.4022396131577273, "grad_norm": 0.43121662735939026, "learning_rate": 1.808534504035179e-05, "loss": 0.4605, "step": 18966 }, { "epoch": 0.40226082161566035, "grad_norm": 0.34475263953208923, "learning_rate": 1.8085148790941994e-05, "loss": 0.4209, "step": 18967 }, { "epoch": 0.40228203007359337, "grad_norm": 0.3511595129966736, "learning_rate": 1.8084952532539964e-05, "loss": 0.42, "step": 18968 }, { "epoch": 0.4023032385315264, "grad_norm": 0.3964328169822693, "learning_rate": 1.808475626514592e-05, "loss": 0.4909, "step": 18969 }, { "epoch": 0.4023244469894594, "grad_norm": 0.3731835186481476, "learning_rate": 1.8084559988760083e-05, "loss": 0.5412, "step": 18970 }, { "epoch": 0.4023456554473924, "grad_norm": 0.35385453701019287, "learning_rate": 1.808436370338267e-05, "loss": 0.5376, "step": 18971 }, { "epoch": 0.40236686390532544, "grad_norm": 0.37015774846076965, "learning_rate": 1.8084167409013892e-05, "loss": 0.5541, "step": 18972 }, { "epoch": 0.40238807236325846, "grad_norm": 0.45745015144348145, "learning_rate": 1.8083971105653976e-05, "loss": 0.5719, "step": 18973 }, { "epoch": 0.4024092808211915, "grad_norm": 0.410251259803772, "learning_rate": 1.8083774793303137e-05, "loss": 0.5317, "step": 18974 }, { "epoch": 0.4024304892791245, "grad_norm": 0.6214250922203064, "learning_rate": 1.8083578471961595e-05, "loss": 0.4204, "step": 18975 }, { "epoch": 0.4024516977370575, "grad_norm": 0.32386696338653564, "learning_rate": 1.8083382141629565e-05, "loss": 0.4118, "step": 18976 }, { "epoch": 0.40247290619499054, "grad_norm": 0.3829340934753418, "learning_rate": 1.808318580230727e-05, "loss": 0.546, "step": 18977 }, { "epoch": 0.40249411465292356, "grad_norm": 0.3440537452697754, "learning_rate": 1.8082989453994924e-05, "loss": 0.4785, "step": 18978 }, { "epoch": 0.4025153231108566, "grad_norm": 2.276305913925171, "learning_rate": 1.808279309669275e-05, "loss": 0.4722, "step": 18979 }, { "epoch": 0.40253653156878966, "grad_norm": 0.3414353132247925, "learning_rate": 1.8082596730400963e-05, "loss": 0.495, "step": 18980 }, { "epoch": 0.4025577400267227, "grad_norm": 0.3761802911758423, "learning_rate": 1.808240035511978e-05, "loss": 0.5015, "step": 18981 }, { "epoch": 0.4025789484846557, "grad_norm": 0.3288360834121704, "learning_rate": 1.8082203970849424e-05, "loss": 0.4781, "step": 18982 }, { "epoch": 0.4026001569425887, "grad_norm": 0.3868897259235382, "learning_rate": 1.808200757759011e-05, "loss": 0.5806, "step": 18983 }, { "epoch": 0.40262136540052174, "grad_norm": 0.4102046489715576, "learning_rate": 1.8081811175342058e-05, "loss": 0.543, "step": 18984 }, { "epoch": 0.40264257385845476, "grad_norm": 0.3674536943435669, "learning_rate": 1.8081614764105486e-05, "loss": 0.5021, "step": 18985 }, { "epoch": 0.4026637823163878, "grad_norm": 0.3460872173309326, "learning_rate": 1.8081418343880613e-05, "loss": 0.5371, "step": 18986 }, { "epoch": 0.4026849907743208, "grad_norm": 0.34957247972488403, "learning_rate": 1.8081221914667654e-05, "loss": 0.5362, "step": 18987 }, { "epoch": 0.4027061992322538, "grad_norm": 0.3265036344528198, "learning_rate": 1.808102547646683e-05, "loss": 0.5142, "step": 18988 }, { "epoch": 0.40272740769018683, "grad_norm": 0.3899764120578766, "learning_rate": 1.8080829029278365e-05, "loss": 0.5309, "step": 18989 }, { "epoch": 0.40274861614811985, "grad_norm": 0.40937092900276184, "learning_rate": 1.8080632573102467e-05, "loss": 0.5016, "step": 18990 }, { "epoch": 0.4027698246060529, "grad_norm": 0.3217526376247406, "learning_rate": 1.8080436107939362e-05, "loss": 0.5512, "step": 18991 }, { "epoch": 0.4027910330639859, "grad_norm": 0.3525025546550751, "learning_rate": 1.8080239633789266e-05, "loss": 0.5836, "step": 18992 }, { "epoch": 0.40281224152191897, "grad_norm": 0.3853033185005188, "learning_rate": 1.8080043150652393e-05, "loss": 0.5784, "step": 18993 }, { "epoch": 0.402833449979852, "grad_norm": 0.32782042026519775, "learning_rate": 1.8079846658528972e-05, "loss": 0.4739, "step": 18994 }, { "epoch": 0.402854658437785, "grad_norm": 0.350651353597641, "learning_rate": 1.8079650157419212e-05, "loss": 0.5064, "step": 18995 }, { "epoch": 0.402875866895718, "grad_norm": 0.333454430103302, "learning_rate": 1.807945364732334e-05, "loss": 0.4774, "step": 18996 }, { "epoch": 0.40289707535365105, "grad_norm": 0.3457636535167694, "learning_rate": 1.8079257128241565e-05, "loss": 0.5559, "step": 18997 }, { "epoch": 0.40291828381158407, "grad_norm": 0.3533996343612671, "learning_rate": 1.807906060017411e-05, "loss": 0.5144, "step": 18998 }, { "epoch": 0.4029394922695171, "grad_norm": 0.3343540132045746, "learning_rate": 1.80788640631212e-05, "loss": 0.422, "step": 18999 }, { "epoch": 0.4029607007274501, "grad_norm": 0.4248001277446747, "learning_rate": 1.8078667517083043e-05, "loss": 0.4923, "step": 19000 }, { "epoch": 0.4029819091853831, "grad_norm": 1.010722041130066, "learning_rate": 1.8078470962059863e-05, "loss": 0.5545, "step": 19001 }, { "epoch": 0.40300311764331614, "grad_norm": 0.37705153226852417, "learning_rate": 1.8078274398051876e-05, "loss": 0.5636, "step": 19002 }, { "epoch": 0.40302432610124916, "grad_norm": 0.30159780383110046, "learning_rate": 1.8078077825059303e-05, "loss": 0.4219, "step": 19003 }, { "epoch": 0.4030455345591822, "grad_norm": 0.3162616491317749, "learning_rate": 1.8077881243082365e-05, "loss": 0.5149, "step": 19004 }, { "epoch": 0.4030667430171152, "grad_norm": 0.3753385841846466, "learning_rate": 1.8077684652121275e-05, "loss": 0.5318, "step": 19005 }, { "epoch": 0.4030879514750482, "grad_norm": 0.3333132863044739, "learning_rate": 1.807748805217626e-05, "loss": 0.524, "step": 19006 }, { "epoch": 0.4031091599329813, "grad_norm": 0.33480241894721985, "learning_rate": 1.8077291443247526e-05, "loss": 0.5026, "step": 19007 }, { "epoch": 0.4031303683909143, "grad_norm": 0.32268479466438293, "learning_rate": 1.80770948253353e-05, "loss": 0.5269, "step": 19008 }, { "epoch": 0.40315157684884734, "grad_norm": 0.3642267882823944, "learning_rate": 1.80768981984398e-05, "loss": 0.539, "step": 19009 }, { "epoch": 0.40317278530678036, "grad_norm": 0.33696267008781433, "learning_rate": 1.8076701562561244e-05, "loss": 0.5513, "step": 19010 }, { "epoch": 0.4031939937647134, "grad_norm": 0.3525960445404053, "learning_rate": 1.8076504917699852e-05, "loss": 0.5081, "step": 19011 }, { "epoch": 0.4032152022226464, "grad_norm": 0.3934485912322998, "learning_rate": 1.807630826385584e-05, "loss": 0.5602, "step": 19012 }, { "epoch": 0.4032364106805794, "grad_norm": 0.32138729095458984, "learning_rate": 1.8076111601029427e-05, "loss": 0.4996, "step": 19013 }, { "epoch": 0.40325761913851244, "grad_norm": 0.344242125749588, "learning_rate": 1.807591492922083e-05, "loss": 0.5008, "step": 19014 }, { "epoch": 0.40327882759644545, "grad_norm": 0.3789624273777008, "learning_rate": 1.8075718248430277e-05, "loss": 0.5502, "step": 19015 }, { "epoch": 0.4033000360543785, "grad_norm": 0.37591493129730225, "learning_rate": 1.807552155865798e-05, "loss": 0.5384, "step": 19016 }, { "epoch": 0.4033212445123115, "grad_norm": 0.32362014055252075, "learning_rate": 1.8075324859904154e-05, "loss": 0.482, "step": 19017 }, { "epoch": 0.4033424529702445, "grad_norm": 0.34105387330055237, "learning_rate": 1.8075128152169024e-05, "loss": 0.4751, "step": 19018 }, { "epoch": 0.40336366142817753, "grad_norm": 0.330948144197464, "learning_rate": 1.8074931435452808e-05, "loss": 0.5587, "step": 19019 }, { "epoch": 0.4033848698861106, "grad_norm": 0.44498997926712036, "learning_rate": 1.8074734709755718e-05, "loss": 0.5409, "step": 19020 }, { "epoch": 0.40340607834404363, "grad_norm": 0.3957400619983673, "learning_rate": 1.8074537975077985e-05, "loss": 0.5613, "step": 19021 }, { "epoch": 0.40342728680197665, "grad_norm": 0.3956484794616699, "learning_rate": 1.807434123141982e-05, "loss": 0.4591, "step": 19022 }, { "epoch": 0.40344849525990967, "grad_norm": 0.33555760979652405, "learning_rate": 1.8074144478781437e-05, "loss": 0.5051, "step": 19023 }, { "epoch": 0.4034697037178427, "grad_norm": 0.48581433296203613, "learning_rate": 1.8073947717163066e-05, "loss": 0.5225, "step": 19024 }, { "epoch": 0.4034909121757757, "grad_norm": 0.3194977939128876, "learning_rate": 1.8073750946564922e-05, "loss": 0.4924, "step": 19025 }, { "epoch": 0.4035121206337087, "grad_norm": 0.32103267312049866, "learning_rate": 1.8073554166987216e-05, "loss": 0.5144, "step": 19026 }, { "epoch": 0.40353332909164175, "grad_norm": 0.2973212003707886, "learning_rate": 1.8073357378430175e-05, "loss": 0.5006, "step": 19027 }, { "epoch": 0.40355453754957477, "grad_norm": 0.3354713022708893, "learning_rate": 1.8073160580894022e-05, "loss": 0.5758, "step": 19028 }, { "epoch": 0.4035757460075078, "grad_norm": 0.34510257840156555, "learning_rate": 1.807296377437896e-05, "loss": 0.547, "step": 19029 }, { "epoch": 0.4035969544654408, "grad_norm": 0.39643096923828125, "learning_rate": 1.8072766958885225e-05, "loss": 0.5393, "step": 19030 }, { "epoch": 0.4036181629233738, "grad_norm": 0.398727685213089, "learning_rate": 1.8072570134413028e-05, "loss": 0.4638, "step": 19031 }, { "epoch": 0.40363937138130684, "grad_norm": 0.36508363485336304, "learning_rate": 1.807237330096259e-05, "loss": 0.591, "step": 19032 }, { "epoch": 0.40366057983923986, "grad_norm": 0.37416112422943115, "learning_rate": 1.8072176458534124e-05, "loss": 0.4942, "step": 19033 }, { "epoch": 0.40368178829717294, "grad_norm": 0.40338051319122314, "learning_rate": 1.8071979607127855e-05, "loss": 0.5334, "step": 19034 }, { "epoch": 0.40370299675510596, "grad_norm": 0.45602846145629883, "learning_rate": 1.8071782746744002e-05, "loss": 0.498, "step": 19035 }, { "epoch": 0.403724205213039, "grad_norm": 0.3548247516155243, "learning_rate": 1.8071585877382782e-05, "loss": 0.5274, "step": 19036 }, { "epoch": 0.403745413670972, "grad_norm": 0.41460055112838745, "learning_rate": 1.8071388999044416e-05, "loss": 0.4945, "step": 19037 }, { "epoch": 0.403766622128905, "grad_norm": 0.33700624108314514, "learning_rate": 1.8071192111729116e-05, "loss": 0.4438, "step": 19038 }, { "epoch": 0.40378783058683804, "grad_norm": 1.0924718379974365, "learning_rate": 1.807099521543711e-05, "loss": 0.4624, "step": 19039 }, { "epoch": 0.40380903904477106, "grad_norm": 0.3975302577018738, "learning_rate": 1.8070798310168613e-05, "loss": 0.5002, "step": 19040 }, { "epoch": 0.4038302475027041, "grad_norm": 0.3638361394405365, "learning_rate": 1.8070601395923842e-05, "loss": 0.5545, "step": 19041 }, { "epoch": 0.4038514559606371, "grad_norm": 0.3601343035697937, "learning_rate": 1.807040447270302e-05, "loss": 0.6128, "step": 19042 }, { "epoch": 0.4038726644185701, "grad_norm": 0.4174317717552185, "learning_rate": 1.8070207540506365e-05, "loss": 0.5139, "step": 19043 }, { "epoch": 0.40389387287650313, "grad_norm": 0.361362487077713, "learning_rate": 1.8070010599334095e-05, "loss": 0.5339, "step": 19044 }, { "epoch": 0.40391508133443615, "grad_norm": 0.304599404335022, "learning_rate": 1.806981364918643e-05, "loss": 0.4495, "step": 19045 }, { "epoch": 0.4039362897923692, "grad_norm": 0.3496837615966797, "learning_rate": 1.8069616690063588e-05, "loss": 0.587, "step": 19046 }, { "epoch": 0.4039574982503022, "grad_norm": 0.3503948748111725, "learning_rate": 1.8069419721965787e-05, "loss": 0.469, "step": 19047 }, { "epoch": 0.40397870670823527, "grad_norm": 0.36164987087249756, "learning_rate": 1.8069222744893245e-05, "loss": 0.4837, "step": 19048 }, { "epoch": 0.4039999151661683, "grad_norm": 0.35825470089912415, "learning_rate": 1.8069025758846187e-05, "loss": 0.6198, "step": 19049 }, { "epoch": 0.4040211236241013, "grad_norm": 0.3348800241947174, "learning_rate": 1.806882876382483e-05, "loss": 0.5409, "step": 19050 }, { "epoch": 0.40404233208203433, "grad_norm": 0.33582520484924316, "learning_rate": 1.806863175982939e-05, "loss": 0.5269, "step": 19051 }, { "epoch": 0.40406354053996735, "grad_norm": 0.3639393746852875, "learning_rate": 1.8068434746860088e-05, "loss": 0.5287, "step": 19052 }, { "epoch": 0.40408474899790037, "grad_norm": 0.46488556265830994, "learning_rate": 1.8068237724917145e-05, "loss": 0.5181, "step": 19053 }, { "epoch": 0.4041059574558334, "grad_norm": 0.36675751209259033, "learning_rate": 1.8068040694000775e-05, "loss": 0.4911, "step": 19054 }, { "epoch": 0.4041271659137664, "grad_norm": 0.34379279613494873, "learning_rate": 1.80678436541112e-05, "loss": 0.5365, "step": 19055 }, { "epoch": 0.4041483743716994, "grad_norm": 0.31688255071640015, "learning_rate": 1.806764660524864e-05, "loss": 0.5039, "step": 19056 }, { "epoch": 0.40416958282963245, "grad_norm": 0.3283877670764923, "learning_rate": 1.8067449547413318e-05, "loss": 0.5257, "step": 19057 }, { "epoch": 0.40419079128756547, "grad_norm": 0.31558066606521606, "learning_rate": 1.8067252480605438e-05, "loss": 0.4747, "step": 19058 }, { "epoch": 0.4042119997454985, "grad_norm": 0.4011768698692322, "learning_rate": 1.806705540482524e-05, "loss": 0.5246, "step": 19059 }, { "epoch": 0.4042332082034315, "grad_norm": 0.3133593201637268, "learning_rate": 1.8066858320072927e-05, "loss": 0.3869, "step": 19060 }, { "epoch": 0.4042544166613646, "grad_norm": 0.33611932396888733, "learning_rate": 1.806666122634873e-05, "loss": 0.458, "step": 19061 }, { "epoch": 0.4042756251192976, "grad_norm": 0.3437698483467102, "learning_rate": 1.8066464123652857e-05, "loss": 0.4779, "step": 19062 }, { "epoch": 0.4042968335772306, "grad_norm": 0.47214293479919434, "learning_rate": 1.8066267011985536e-05, "loss": 0.4814, "step": 19063 }, { "epoch": 0.40431804203516364, "grad_norm": 0.3443447947502136, "learning_rate": 1.8066069891346982e-05, "loss": 0.5501, "step": 19064 }, { "epoch": 0.40433925049309666, "grad_norm": 0.3250845968723297, "learning_rate": 1.8065872761737417e-05, "loss": 0.5123, "step": 19065 }, { "epoch": 0.4043604589510297, "grad_norm": 0.34087809920310974, "learning_rate": 1.8065675623157054e-05, "loss": 0.4488, "step": 19066 }, { "epoch": 0.4043816674089627, "grad_norm": 0.3187808394432068, "learning_rate": 1.806547847560612e-05, "loss": 0.4832, "step": 19067 }, { "epoch": 0.4044028758668957, "grad_norm": 0.37596064805984497, "learning_rate": 1.8065281319084832e-05, "loss": 0.5166, "step": 19068 }, { "epoch": 0.40442408432482874, "grad_norm": 0.3379104733467102, "learning_rate": 1.8065084153593406e-05, "loss": 0.4942, "step": 19069 }, { "epoch": 0.40444529278276176, "grad_norm": 0.337736040353775, "learning_rate": 1.8064886979132064e-05, "loss": 0.4979, "step": 19070 }, { "epoch": 0.4044665012406948, "grad_norm": 0.37600451707839966, "learning_rate": 1.8064689795701023e-05, "loss": 0.6211, "step": 19071 }, { "epoch": 0.4044877096986278, "grad_norm": 0.3463309407234192, "learning_rate": 1.806449260330051e-05, "loss": 0.435, "step": 19072 }, { "epoch": 0.4045089181565608, "grad_norm": 0.35213541984558105, "learning_rate": 1.8064295401930732e-05, "loss": 0.5954, "step": 19073 }, { "epoch": 0.40453012661449383, "grad_norm": 0.37952396273612976, "learning_rate": 1.8064098191591914e-05, "loss": 0.5449, "step": 19074 }, { "epoch": 0.4045513350724269, "grad_norm": 0.3528153896331787, "learning_rate": 1.806390097228428e-05, "loss": 0.5103, "step": 19075 }, { "epoch": 0.40457254353035993, "grad_norm": 0.3548307716846466, "learning_rate": 1.8063703744008045e-05, "loss": 0.5155, "step": 19076 }, { "epoch": 0.40459375198829295, "grad_norm": 0.354824036359787, "learning_rate": 1.8063506506763428e-05, "loss": 0.5312, "step": 19077 }, { "epoch": 0.40461496044622597, "grad_norm": 0.35993584990501404, "learning_rate": 1.8063309260550646e-05, "loss": 0.4858, "step": 19078 }, { "epoch": 0.404636168904159, "grad_norm": 0.5898173451423645, "learning_rate": 1.8063112005369924e-05, "loss": 0.4708, "step": 19079 }, { "epoch": 0.404657377362092, "grad_norm": 0.37029439210891724, "learning_rate": 1.806291474122148e-05, "loss": 0.5407, "step": 19080 }, { "epoch": 0.404678585820025, "grad_norm": 0.35821300745010376, "learning_rate": 1.8062717468105534e-05, "loss": 0.473, "step": 19081 }, { "epoch": 0.40469979427795805, "grad_norm": 0.3260919451713562, "learning_rate": 1.80625201860223e-05, "loss": 0.4858, "step": 19082 }, { "epoch": 0.40472100273589107, "grad_norm": 0.37763580679893494, "learning_rate": 1.8062322894972002e-05, "loss": 0.5344, "step": 19083 }, { "epoch": 0.4047422111938241, "grad_norm": 0.3480973243713379, "learning_rate": 1.806212559495486e-05, "loss": 0.5493, "step": 19084 }, { "epoch": 0.4047634196517571, "grad_norm": 0.3968130648136139, "learning_rate": 1.806192828597109e-05, "loss": 0.4367, "step": 19085 }, { "epoch": 0.4047846281096901, "grad_norm": 0.3724938631057739, "learning_rate": 1.8061730968020913e-05, "loss": 0.504, "step": 19086 }, { "epoch": 0.40480583656762315, "grad_norm": 0.3307475745677948, "learning_rate": 1.806153364110455e-05, "loss": 0.4927, "step": 19087 }, { "epoch": 0.40482704502555616, "grad_norm": 0.40282881259918213, "learning_rate": 1.8061336305222222e-05, "loss": 0.5912, "step": 19088 }, { "epoch": 0.40484825348348924, "grad_norm": 0.3606085777282715, "learning_rate": 1.806113896037414e-05, "loss": 0.644, "step": 19089 }, { "epoch": 0.40486946194142226, "grad_norm": 0.32507413625717163, "learning_rate": 1.8060941606560535e-05, "loss": 0.4227, "step": 19090 }, { "epoch": 0.4048906703993553, "grad_norm": 0.366563081741333, "learning_rate": 1.8060744243781617e-05, "loss": 0.5192, "step": 19091 }, { "epoch": 0.4049118788572883, "grad_norm": 0.3323371708393097, "learning_rate": 1.806054687203761e-05, "loss": 0.564, "step": 19092 }, { "epoch": 0.4049330873152213, "grad_norm": 0.35801541805267334, "learning_rate": 1.8060349491328733e-05, "loss": 0.465, "step": 19093 }, { "epoch": 0.40495429577315434, "grad_norm": 0.3591713011264801, "learning_rate": 1.8060152101655205e-05, "loss": 0.5648, "step": 19094 }, { "epoch": 0.40497550423108736, "grad_norm": 0.36044034361839294, "learning_rate": 1.805995470301725e-05, "loss": 0.5369, "step": 19095 }, { "epoch": 0.4049967126890204, "grad_norm": 0.3647015690803528, "learning_rate": 1.805975729541508e-05, "loss": 0.546, "step": 19096 }, { "epoch": 0.4050179211469534, "grad_norm": 0.405604749917984, "learning_rate": 1.8059559878848914e-05, "loss": 0.5374, "step": 19097 }, { "epoch": 0.4050391296048864, "grad_norm": 0.33448559045791626, "learning_rate": 1.805936245331898e-05, "loss": 0.5245, "step": 19098 }, { "epoch": 0.40506033806281944, "grad_norm": 0.42226332426071167, "learning_rate": 1.8059165018825493e-05, "loss": 0.4258, "step": 19099 }, { "epoch": 0.40508154652075246, "grad_norm": 0.34177398681640625, "learning_rate": 1.8058967575368672e-05, "loss": 0.4817, "step": 19100 }, { "epoch": 0.4051027549786855, "grad_norm": 0.43565934896469116, "learning_rate": 1.805877012294874e-05, "loss": 0.5009, "step": 19101 }, { "epoch": 0.40512396343661855, "grad_norm": 0.366397500038147, "learning_rate": 1.805857266156591e-05, "loss": 0.5647, "step": 19102 }, { "epoch": 0.40514517189455157, "grad_norm": 0.47290223836898804, "learning_rate": 1.8058375191220407e-05, "loss": 0.4897, "step": 19103 }, { "epoch": 0.4051663803524846, "grad_norm": 0.39837464690208435, "learning_rate": 1.805817771191245e-05, "loss": 0.5108, "step": 19104 }, { "epoch": 0.4051875888104176, "grad_norm": 0.3335903286933899, "learning_rate": 1.8057980223642256e-05, "loss": 0.4782, "step": 19105 }, { "epoch": 0.40520879726835063, "grad_norm": 0.3588271737098694, "learning_rate": 1.8057782726410044e-05, "loss": 0.5463, "step": 19106 }, { "epoch": 0.40523000572628365, "grad_norm": 0.38083767890930176, "learning_rate": 1.8057585220216037e-05, "loss": 0.5398, "step": 19107 }, { "epoch": 0.40525121418421667, "grad_norm": 0.3375181555747986, "learning_rate": 1.805738770506046e-05, "loss": 0.4695, "step": 19108 }, { "epoch": 0.4052724226421497, "grad_norm": 0.801068902015686, "learning_rate": 1.8057190180943524e-05, "loss": 0.5093, "step": 19109 }, { "epoch": 0.4052936311000827, "grad_norm": 0.3940219283103943, "learning_rate": 1.8056992647865447e-05, "loss": 0.4736, "step": 19110 }, { "epoch": 0.4053148395580157, "grad_norm": 0.3233952522277832, "learning_rate": 1.8056795105826456e-05, "loss": 0.5145, "step": 19111 }, { "epoch": 0.40533604801594875, "grad_norm": 0.40248510241508484, "learning_rate": 1.8056597554826765e-05, "loss": 0.4903, "step": 19112 }, { "epoch": 0.40535725647388177, "grad_norm": 0.3211267292499542, "learning_rate": 1.8056399994866596e-05, "loss": 0.4441, "step": 19113 }, { "epoch": 0.4053784649318148, "grad_norm": 0.37214159965515137, "learning_rate": 1.805620242594617e-05, "loss": 0.5046, "step": 19114 }, { "epoch": 0.4053996733897478, "grad_norm": 0.4088497459888458, "learning_rate": 1.8056004848065706e-05, "loss": 0.5356, "step": 19115 }, { "epoch": 0.4054208818476809, "grad_norm": 0.3520980179309845, "learning_rate": 1.8055807261225425e-05, "loss": 0.5055, "step": 19116 }, { "epoch": 0.4054420903056139, "grad_norm": 0.4065674841403961, "learning_rate": 1.8055609665425544e-05, "loss": 0.5092, "step": 19117 }, { "epoch": 0.4054632987635469, "grad_norm": 0.3827674388885498, "learning_rate": 1.8055412060666285e-05, "loss": 0.4859, "step": 19118 }, { "epoch": 0.40548450722147994, "grad_norm": 0.36315539479255676, "learning_rate": 1.8055214446947863e-05, "loss": 0.5384, "step": 19119 }, { "epoch": 0.40550571567941296, "grad_norm": 0.3461006283760071, "learning_rate": 1.8055016824270505e-05, "loss": 0.4756, "step": 19120 }, { "epoch": 0.405526924137346, "grad_norm": 0.3693484663963318, "learning_rate": 1.805481919263443e-05, "loss": 0.5845, "step": 19121 }, { "epoch": 0.405548132595279, "grad_norm": 0.356410950422287, "learning_rate": 1.8054621552039845e-05, "loss": 0.4664, "step": 19122 }, { "epoch": 0.405569341053212, "grad_norm": 0.35951006412506104, "learning_rate": 1.8054423902486985e-05, "loss": 0.4704, "step": 19123 }, { "epoch": 0.40559054951114504, "grad_norm": 0.328328400850296, "learning_rate": 1.805422624397607e-05, "loss": 0.4889, "step": 19124 }, { "epoch": 0.40561175796907806, "grad_norm": 0.3650861382484436, "learning_rate": 1.805402857650731e-05, "loss": 0.4777, "step": 19125 }, { "epoch": 0.4056329664270111, "grad_norm": 0.3846108317375183, "learning_rate": 1.8053830900080925e-05, "loss": 0.6134, "step": 19126 }, { "epoch": 0.4056541748849441, "grad_norm": 0.46771731972694397, "learning_rate": 1.805363321469715e-05, "loss": 0.503, "step": 19127 }, { "epoch": 0.4056753833428771, "grad_norm": 0.3558416962623596, "learning_rate": 1.8053435520356183e-05, "loss": 0.5525, "step": 19128 }, { "epoch": 0.40569659180081014, "grad_norm": 0.3421824872493744, "learning_rate": 1.8053237817058264e-05, "loss": 0.5453, "step": 19129 }, { "epoch": 0.4057178002587432, "grad_norm": 0.4025878608226776, "learning_rate": 1.8053040104803598e-05, "loss": 0.5983, "step": 19130 }, { "epoch": 0.40573900871667623, "grad_norm": 0.431365966796875, "learning_rate": 1.8052842383592412e-05, "loss": 0.5799, "step": 19131 }, { "epoch": 0.40576021717460925, "grad_norm": 0.36565810441970825, "learning_rate": 1.8052644653424923e-05, "loss": 0.5655, "step": 19132 }, { "epoch": 0.40578142563254227, "grad_norm": 0.4699740409851074, "learning_rate": 1.8052446914301356e-05, "loss": 0.6036, "step": 19133 }, { "epoch": 0.4058026340904753, "grad_norm": 0.3868265748023987, "learning_rate": 1.805224916622193e-05, "loss": 0.5484, "step": 19134 }, { "epoch": 0.4058238425484083, "grad_norm": 0.32772108912467957, "learning_rate": 1.8052051409186857e-05, "loss": 0.5466, "step": 19135 }, { "epoch": 0.40584505100634133, "grad_norm": 0.42232972383499146, "learning_rate": 1.8051853643196363e-05, "loss": 0.5051, "step": 19136 }, { "epoch": 0.40586625946427435, "grad_norm": 0.3536895215511322, "learning_rate": 1.8051655868250667e-05, "loss": 0.5491, "step": 19137 }, { "epoch": 0.40588746792220737, "grad_norm": 0.3652356266975403, "learning_rate": 1.805145808434999e-05, "loss": 0.4298, "step": 19138 }, { "epoch": 0.4059086763801404, "grad_norm": 0.3335808515548706, "learning_rate": 1.8051260291494556e-05, "loss": 0.5025, "step": 19139 }, { "epoch": 0.4059298848380734, "grad_norm": 0.32678309082984924, "learning_rate": 1.8051062489684573e-05, "loss": 0.4738, "step": 19140 }, { "epoch": 0.4059510932960064, "grad_norm": 0.3355938196182251, "learning_rate": 1.8050864678920274e-05, "loss": 0.586, "step": 19141 }, { "epoch": 0.40597230175393945, "grad_norm": 0.4211290180683136, "learning_rate": 1.805066685920187e-05, "loss": 0.5109, "step": 19142 }, { "epoch": 0.4059935102118725, "grad_norm": 0.34085580706596375, "learning_rate": 1.8050469030529586e-05, "loss": 0.5161, "step": 19143 }, { "epoch": 0.40601471866980554, "grad_norm": 0.4390725791454315, "learning_rate": 1.805027119290364e-05, "loss": 0.4865, "step": 19144 }, { "epoch": 0.40603592712773856, "grad_norm": 0.3773946762084961, "learning_rate": 1.805007334632425e-05, "loss": 0.5907, "step": 19145 }, { "epoch": 0.4060571355856716, "grad_norm": 0.3463608920574188, "learning_rate": 1.8049875490791638e-05, "loss": 0.4311, "step": 19146 }, { "epoch": 0.4060783440436046, "grad_norm": 0.32735374569892883, "learning_rate": 1.804967762630603e-05, "loss": 0.5753, "step": 19147 }, { "epoch": 0.4060995525015376, "grad_norm": 0.3385885953903198, "learning_rate": 1.8049479752867634e-05, "loss": 0.5473, "step": 19148 }, { "epoch": 0.40612076095947064, "grad_norm": 0.3845479190349579, "learning_rate": 1.804928187047668e-05, "loss": 0.5556, "step": 19149 }, { "epoch": 0.40614196941740366, "grad_norm": 0.37515759468078613, "learning_rate": 1.8049083979133383e-05, "loss": 0.4548, "step": 19150 }, { "epoch": 0.4061631778753367, "grad_norm": 0.3301098048686981, "learning_rate": 1.8048886078837965e-05, "loss": 0.4969, "step": 19151 }, { "epoch": 0.4061843863332697, "grad_norm": 0.32970765233039856, "learning_rate": 1.804868816959065e-05, "loss": 0.4347, "step": 19152 }, { "epoch": 0.4062055947912027, "grad_norm": 0.3435421586036682, "learning_rate": 1.8048490251391648e-05, "loss": 0.4665, "step": 19153 }, { "epoch": 0.40622680324913574, "grad_norm": 0.36137059330940247, "learning_rate": 1.804829232424119e-05, "loss": 0.5579, "step": 19154 }, { "epoch": 0.40624801170706876, "grad_norm": 0.3502745032310486, "learning_rate": 1.8048094388139486e-05, "loss": 0.4086, "step": 19155 }, { "epoch": 0.4062692201650018, "grad_norm": 0.3405229449272156, "learning_rate": 1.8047896443086763e-05, "loss": 0.5161, "step": 19156 }, { "epoch": 0.40629042862293485, "grad_norm": 0.36669114232063293, "learning_rate": 1.8047698489083242e-05, "loss": 0.4958, "step": 19157 }, { "epoch": 0.40631163708086787, "grad_norm": 1.6737415790557861, "learning_rate": 1.804750052612914e-05, "loss": 0.5795, "step": 19158 }, { "epoch": 0.4063328455388009, "grad_norm": 0.36761850118637085, "learning_rate": 1.8047302554224674e-05, "loss": 0.5159, "step": 19159 }, { "epoch": 0.4063540539967339, "grad_norm": 0.3328138589859009, "learning_rate": 1.804710457337007e-05, "loss": 0.4506, "step": 19160 }, { "epoch": 0.40637526245466693, "grad_norm": 0.3237522840499878, "learning_rate": 1.804690658356555e-05, "loss": 0.4375, "step": 19161 }, { "epoch": 0.40639647091259995, "grad_norm": 0.3508014380931854, "learning_rate": 1.8046708584811326e-05, "loss": 0.6671, "step": 19162 }, { "epoch": 0.40641767937053297, "grad_norm": 0.30454179644584656, "learning_rate": 1.8046510577107625e-05, "loss": 0.4374, "step": 19163 }, { "epoch": 0.406438887828466, "grad_norm": 0.31456851959228516, "learning_rate": 1.8046312560454664e-05, "loss": 0.5283, "step": 19164 }, { "epoch": 0.406460096286399, "grad_norm": 0.3162156939506531, "learning_rate": 1.8046114534852664e-05, "loss": 0.4747, "step": 19165 }, { "epoch": 0.40648130474433203, "grad_norm": 0.4001234471797943, "learning_rate": 1.8045916500301847e-05, "loss": 0.5118, "step": 19166 }, { "epoch": 0.40650251320226505, "grad_norm": 0.42093926668167114, "learning_rate": 1.804571845680243e-05, "loss": 0.5264, "step": 19167 }, { "epoch": 0.40652372166019807, "grad_norm": 0.3505669832229614, "learning_rate": 1.8045520404354633e-05, "loss": 0.4809, "step": 19168 }, { "epoch": 0.4065449301181311, "grad_norm": 0.3586384356021881, "learning_rate": 1.8045322342958682e-05, "loss": 0.5719, "step": 19169 }, { "epoch": 0.4065661385760641, "grad_norm": 0.31515517830848694, "learning_rate": 1.8045124272614793e-05, "loss": 0.4903, "step": 19170 }, { "epoch": 0.4065873470339972, "grad_norm": 0.3570556938648224, "learning_rate": 1.8044926193323183e-05, "loss": 0.4949, "step": 19171 }, { "epoch": 0.4066085554919302, "grad_norm": 0.48782360553741455, "learning_rate": 1.8044728105084076e-05, "loss": 0.5376, "step": 19172 }, { "epoch": 0.4066297639498632, "grad_norm": 0.37630695104599, "learning_rate": 1.8044530007897693e-05, "loss": 0.4919, "step": 19173 }, { "epoch": 0.40665097240779624, "grad_norm": 6.165912628173828, "learning_rate": 1.8044331901764256e-05, "loss": 0.8824, "step": 19174 }, { "epoch": 0.40667218086572926, "grad_norm": 0.3862980902194977, "learning_rate": 1.8044133786683983e-05, "loss": 0.6212, "step": 19175 }, { "epoch": 0.4066933893236623, "grad_norm": 0.3320787847042084, "learning_rate": 1.8043935662657094e-05, "loss": 0.4696, "step": 19176 }, { "epoch": 0.4067145977815953, "grad_norm": 0.3521958589553833, "learning_rate": 1.8043737529683806e-05, "loss": 0.5013, "step": 19177 }, { "epoch": 0.4067358062395283, "grad_norm": 0.327383428812027, "learning_rate": 1.8043539387764346e-05, "loss": 0.4941, "step": 19178 }, { "epoch": 0.40675701469746134, "grad_norm": 0.33603596687316895, "learning_rate": 1.8043341236898933e-05, "loss": 0.4613, "step": 19179 }, { "epoch": 0.40677822315539436, "grad_norm": 0.3134746253490448, "learning_rate": 1.8043143077087785e-05, "loss": 0.5135, "step": 19180 }, { "epoch": 0.4067994316133274, "grad_norm": 0.35140541195869446, "learning_rate": 1.804294490833112e-05, "loss": 0.4801, "step": 19181 }, { "epoch": 0.4068206400712604, "grad_norm": 0.36438101530075073, "learning_rate": 1.804274673062916e-05, "loss": 0.5211, "step": 19182 }, { "epoch": 0.4068418485291934, "grad_norm": 0.3867208659648895, "learning_rate": 1.8042548543982133e-05, "loss": 0.5591, "step": 19183 }, { "epoch": 0.4068630569871265, "grad_norm": 0.37228164076805115, "learning_rate": 1.804235034839025e-05, "loss": 0.4997, "step": 19184 }, { "epoch": 0.4068842654450595, "grad_norm": 0.3439241647720337, "learning_rate": 1.8042152143853737e-05, "loss": 0.5719, "step": 19185 }, { "epoch": 0.40690547390299253, "grad_norm": 0.38489770889282227, "learning_rate": 1.804195393037281e-05, "loss": 0.4651, "step": 19186 }, { "epoch": 0.40692668236092555, "grad_norm": 0.332943320274353, "learning_rate": 1.8041755707947692e-05, "loss": 0.5114, "step": 19187 }, { "epoch": 0.40694789081885857, "grad_norm": 0.3312655985355377, "learning_rate": 1.8041557476578604e-05, "loss": 0.48, "step": 19188 }, { "epoch": 0.4069690992767916, "grad_norm": 0.32184037566185, "learning_rate": 1.8041359236265767e-05, "loss": 0.4713, "step": 19189 }, { "epoch": 0.4069903077347246, "grad_norm": 0.3652603328227997, "learning_rate": 1.8041160987009397e-05, "loss": 0.4943, "step": 19190 }, { "epoch": 0.40701151619265763, "grad_norm": 0.3144654631614685, "learning_rate": 1.804096272880972e-05, "loss": 0.4608, "step": 19191 }, { "epoch": 0.40703272465059065, "grad_norm": 0.3654283881187439, "learning_rate": 1.8040764461666953e-05, "loss": 0.4805, "step": 19192 }, { "epoch": 0.40705393310852367, "grad_norm": 0.31469792127609253, "learning_rate": 1.8040566185581315e-05, "loss": 0.4104, "step": 19193 }, { "epoch": 0.4070751415664567, "grad_norm": 0.32475751638412476, "learning_rate": 1.8040367900553036e-05, "loss": 0.5125, "step": 19194 }, { "epoch": 0.4070963500243897, "grad_norm": 0.43418940901756287, "learning_rate": 1.8040169606582326e-05, "loss": 0.5692, "step": 19195 }, { "epoch": 0.40711755848232273, "grad_norm": 0.34789761900901794, "learning_rate": 1.8039971303669407e-05, "loss": 0.4906, "step": 19196 }, { "epoch": 0.40713876694025575, "grad_norm": 0.334515243768692, "learning_rate": 1.8039772991814503e-05, "loss": 0.4786, "step": 19197 }, { "epoch": 0.4071599753981888, "grad_norm": 0.6755418181419373, "learning_rate": 1.8039574671017835e-05, "loss": 0.4965, "step": 19198 }, { "epoch": 0.40718118385612184, "grad_norm": 0.3423585593700409, "learning_rate": 1.8039376341279623e-05, "loss": 0.4705, "step": 19199 }, { "epoch": 0.40720239231405486, "grad_norm": 0.36417731642723083, "learning_rate": 1.803917800260008e-05, "loss": 0.5438, "step": 19200 }, { "epoch": 0.4072236007719879, "grad_norm": 0.33358901739120483, "learning_rate": 1.803897965497944e-05, "loss": 0.437, "step": 19201 }, { "epoch": 0.4072448092299209, "grad_norm": 0.32798513770103455, "learning_rate": 1.8038781298417912e-05, "loss": 0.5368, "step": 19202 }, { "epoch": 0.4072660176878539, "grad_norm": 0.36202165484428406, "learning_rate": 1.8038582932915726e-05, "loss": 0.5058, "step": 19203 }, { "epoch": 0.40728722614578694, "grad_norm": 0.5117464065551758, "learning_rate": 1.803838455847309e-05, "loss": 0.48, "step": 19204 }, { "epoch": 0.40730843460371996, "grad_norm": 0.32805323600769043, "learning_rate": 1.8038186175090243e-05, "loss": 0.6097, "step": 19205 }, { "epoch": 0.407329643061653, "grad_norm": 0.3282076418399811, "learning_rate": 1.803798778276739e-05, "loss": 0.5072, "step": 19206 }, { "epoch": 0.407350851519586, "grad_norm": 0.3455120623111725, "learning_rate": 1.8037789381504756e-05, "loss": 0.4296, "step": 19207 }, { "epoch": 0.407372059977519, "grad_norm": 0.40757831931114197, "learning_rate": 1.8037590971302565e-05, "loss": 0.5052, "step": 19208 }, { "epoch": 0.40739326843545204, "grad_norm": 0.3476068377494812, "learning_rate": 1.8037392552161028e-05, "loss": 0.5137, "step": 19209 }, { "epoch": 0.40741447689338506, "grad_norm": 0.321378231048584, "learning_rate": 1.803719412408038e-05, "loss": 0.4681, "step": 19210 }, { "epoch": 0.40743568535131813, "grad_norm": 0.32680997252464294, "learning_rate": 1.803699568706083e-05, "loss": 0.444, "step": 19211 }, { "epoch": 0.40745689380925115, "grad_norm": 0.2990112006664276, "learning_rate": 1.8036797241102607e-05, "loss": 0.4066, "step": 19212 }, { "epoch": 0.4074781022671842, "grad_norm": 0.36519721150398254, "learning_rate": 1.8036598786205926e-05, "loss": 0.5804, "step": 19213 }, { "epoch": 0.4074993107251172, "grad_norm": 0.36643317341804504, "learning_rate": 1.803640032237101e-05, "loss": 0.4095, "step": 19214 }, { "epoch": 0.4075205191830502, "grad_norm": 0.35051488876342773, "learning_rate": 1.803620184959808e-05, "loss": 0.5091, "step": 19215 }, { "epoch": 0.40754172764098323, "grad_norm": 0.3751845359802246, "learning_rate": 1.8036003367887357e-05, "loss": 0.4777, "step": 19216 }, { "epoch": 0.40756293609891625, "grad_norm": 0.33973994851112366, "learning_rate": 1.803580487723906e-05, "loss": 0.5097, "step": 19217 }, { "epoch": 0.40758414455684927, "grad_norm": 0.3852106034755707, "learning_rate": 1.803560637765341e-05, "loss": 0.4809, "step": 19218 }, { "epoch": 0.4076053530147823, "grad_norm": 0.3225482404232025, "learning_rate": 1.8035407869130627e-05, "loss": 0.5091, "step": 19219 }, { "epoch": 0.4076265614727153, "grad_norm": 0.34510523080825806, "learning_rate": 1.803520935167094e-05, "loss": 0.4294, "step": 19220 }, { "epoch": 0.40764776993064833, "grad_norm": 0.38606706261634827, "learning_rate": 1.8035010825274555e-05, "loss": 0.5848, "step": 19221 }, { "epoch": 0.40766897838858135, "grad_norm": 0.3242306709289551, "learning_rate": 1.8034812289941704e-05, "loss": 0.4702, "step": 19222 }, { "epoch": 0.40769018684651437, "grad_norm": 0.3994203805923462, "learning_rate": 1.8034613745672605e-05, "loss": 0.5003, "step": 19223 }, { "epoch": 0.4077113953044474, "grad_norm": 0.37427419424057007, "learning_rate": 1.8034415192467477e-05, "loss": 0.5068, "step": 19224 }, { "epoch": 0.40773260376238046, "grad_norm": 0.34220078587532043, "learning_rate": 1.8034216630326542e-05, "loss": 0.4825, "step": 19225 }, { "epoch": 0.4077538122203135, "grad_norm": 0.36960843205451965, "learning_rate": 1.8034018059250022e-05, "loss": 0.5639, "step": 19226 }, { "epoch": 0.4077750206782465, "grad_norm": 0.3550627529621124, "learning_rate": 1.8033819479238138e-05, "loss": 0.5122, "step": 19227 }, { "epoch": 0.4077962291361795, "grad_norm": 0.3677574396133423, "learning_rate": 1.8033620890291107e-05, "loss": 0.5419, "step": 19228 }, { "epoch": 0.40781743759411254, "grad_norm": 0.3357057571411133, "learning_rate": 1.8033422292409156e-05, "loss": 0.4961, "step": 19229 }, { "epoch": 0.40783864605204556, "grad_norm": 0.38416147232055664, "learning_rate": 1.80332236855925e-05, "loss": 0.5473, "step": 19230 }, { "epoch": 0.4078598545099786, "grad_norm": 0.3338661789894104, "learning_rate": 1.803302506984136e-05, "loss": 0.5524, "step": 19231 }, { "epoch": 0.4078810629679116, "grad_norm": 0.33306118845939636, "learning_rate": 1.8032826445155966e-05, "loss": 0.4969, "step": 19232 }, { "epoch": 0.4079022714258446, "grad_norm": 0.30604830384254456, "learning_rate": 1.803262781153653e-05, "loss": 0.5221, "step": 19233 }, { "epoch": 0.40792347988377764, "grad_norm": 0.3312963545322418, "learning_rate": 1.8032429168983273e-05, "loss": 0.5733, "step": 19234 }, { "epoch": 0.40794468834171066, "grad_norm": 0.3469668924808502, "learning_rate": 1.8032230517496418e-05, "loss": 0.5112, "step": 19235 }, { "epoch": 0.4079658967996437, "grad_norm": 0.30225250124931335, "learning_rate": 1.8032031857076187e-05, "loss": 0.4478, "step": 19236 }, { "epoch": 0.4079871052575767, "grad_norm": 0.3228413164615631, "learning_rate": 1.80318331877228e-05, "loss": 0.5428, "step": 19237 }, { "epoch": 0.4080083137155097, "grad_norm": 0.3147449195384979, "learning_rate": 1.8031634509436475e-05, "loss": 0.4639, "step": 19238 }, { "epoch": 0.4080295221734428, "grad_norm": 0.3434876799583435, "learning_rate": 1.803143582221744e-05, "loss": 0.5086, "step": 19239 }, { "epoch": 0.4080507306313758, "grad_norm": 0.331744909286499, "learning_rate": 1.803123712606591e-05, "loss": 0.517, "step": 19240 }, { "epoch": 0.40807193908930883, "grad_norm": 0.36219125986099243, "learning_rate": 1.8031038420982108e-05, "loss": 0.4793, "step": 19241 }, { "epoch": 0.40809314754724185, "grad_norm": 0.34862712025642395, "learning_rate": 1.8030839706966255e-05, "loss": 0.4984, "step": 19242 }, { "epoch": 0.40811435600517487, "grad_norm": 0.34925591945648193, "learning_rate": 1.8030640984018574e-05, "loss": 0.518, "step": 19243 }, { "epoch": 0.4081355644631079, "grad_norm": 0.3190400004386902, "learning_rate": 1.803044225213928e-05, "loss": 0.4888, "step": 19244 }, { "epoch": 0.4081567729210409, "grad_norm": 0.39898809790611267, "learning_rate": 1.80302435113286e-05, "loss": 0.445, "step": 19245 }, { "epoch": 0.40817798137897393, "grad_norm": 0.35504886507987976, "learning_rate": 1.8030044761586753e-05, "loss": 0.5433, "step": 19246 }, { "epoch": 0.40819918983690695, "grad_norm": 0.34474971890449524, "learning_rate": 1.8029846002913957e-05, "loss": 0.5917, "step": 19247 }, { "epoch": 0.40822039829483997, "grad_norm": 0.36234959959983826, "learning_rate": 1.8029647235310438e-05, "loss": 0.4776, "step": 19248 }, { "epoch": 0.408241606752773, "grad_norm": 0.37080061435699463, "learning_rate": 1.802944845877642e-05, "loss": 0.5135, "step": 19249 }, { "epoch": 0.408262815210706, "grad_norm": 0.3548276126384735, "learning_rate": 1.802924967331211e-05, "loss": 0.5942, "step": 19250 }, { "epoch": 0.40828402366863903, "grad_norm": 0.3233380913734436, "learning_rate": 1.8029050878917747e-05, "loss": 0.4843, "step": 19251 }, { "epoch": 0.4083052321265721, "grad_norm": 0.4202461838722229, "learning_rate": 1.8028852075593538e-05, "loss": 0.486, "step": 19252 }, { "epoch": 0.4083264405845051, "grad_norm": 0.36263588070869446, "learning_rate": 1.802865326333971e-05, "loss": 0.5044, "step": 19253 }, { "epoch": 0.40834764904243814, "grad_norm": 0.31739047169685364, "learning_rate": 1.8028454442156483e-05, "loss": 0.4524, "step": 19254 }, { "epoch": 0.40836885750037116, "grad_norm": 0.3039480447769165, "learning_rate": 1.8028255612044085e-05, "loss": 0.4567, "step": 19255 }, { "epoch": 0.4083900659583042, "grad_norm": 0.3215146064758301, "learning_rate": 1.8028056773002726e-05, "loss": 0.5545, "step": 19256 }, { "epoch": 0.4084112744162372, "grad_norm": 0.3083065152168274, "learning_rate": 1.802785792503263e-05, "loss": 0.4665, "step": 19257 }, { "epoch": 0.4084324828741702, "grad_norm": 0.32350897789001465, "learning_rate": 1.8027659068134024e-05, "loss": 0.5062, "step": 19258 }, { "epoch": 0.40845369133210324, "grad_norm": 0.38640424609184265, "learning_rate": 1.8027460202307125e-05, "loss": 0.6037, "step": 19259 }, { "epoch": 0.40847489979003626, "grad_norm": 0.34829673171043396, "learning_rate": 1.802726132755215e-05, "loss": 0.4328, "step": 19260 }, { "epoch": 0.4084961082479693, "grad_norm": 0.3256131112575531, "learning_rate": 1.802706244386933e-05, "loss": 0.5285, "step": 19261 }, { "epoch": 0.4085173167059023, "grad_norm": 0.30111441016197205, "learning_rate": 1.8026863551258875e-05, "loss": 0.4774, "step": 19262 }, { "epoch": 0.4085385251638353, "grad_norm": 0.3353675603866577, "learning_rate": 1.8026664649721016e-05, "loss": 0.5207, "step": 19263 }, { "epoch": 0.40855973362176834, "grad_norm": 0.33650118112564087, "learning_rate": 1.8026465739255972e-05, "loss": 0.5352, "step": 19264 }, { "epoch": 0.40858094207970136, "grad_norm": 0.34797897934913635, "learning_rate": 1.802626681986396e-05, "loss": 0.5083, "step": 19265 }, { "epoch": 0.40860215053763443, "grad_norm": 0.3409143388271332, "learning_rate": 1.8026067891545206e-05, "loss": 0.5146, "step": 19266 }, { "epoch": 0.40862335899556745, "grad_norm": 0.3731969892978668, "learning_rate": 1.8025868954299925e-05, "loss": 0.5569, "step": 19267 }, { "epoch": 0.4086445674535005, "grad_norm": 0.33967819809913635, "learning_rate": 1.8025670008128346e-05, "loss": 0.4371, "step": 19268 }, { "epoch": 0.4086657759114335, "grad_norm": 0.32258540391921997, "learning_rate": 1.8025471053030682e-05, "loss": 0.4352, "step": 19269 }, { "epoch": 0.4086869843693665, "grad_norm": 0.31669506430625916, "learning_rate": 1.8025272089007166e-05, "loss": 0.5078, "step": 19270 }, { "epoch": 0.40870819282729953, "grad_norm": 0.4214109480381012, "learning_rate": 1.8025073116058006e-05, "loss": 0.5746, "step": 19271 }, { "epoch": 0.40872940128523255, "grad_norm": 0.3377307057380676, "learning_rate": 1.8024874134183432e-05, "loss": 0.483, "step": 19272 }, { "epoch": 0.40875060974316557, "grad_norm": 0.4054742753505707, "learning_rate": 1.8024675143383662e-05, "loss": 0.4857, "step": 19273 }, { "epoch": 0.4087718182010986, "grad_norm": 0.34028249979019165, "learning_rate": 1.8024476143658917e-05, "loss": 0.4689, "step": 19274 }, { "epoch": 0.4087930266590316, "grad_norm": 0.32974129915237427, "learning_rate": 1.802427713500942e-05, "loss": 0.4337, "step": 19275 }, { "epoch": 0.40881423511696463, "grad_norm": 0.3455875515937805, "learning_rate": 1.8024078117435394e-05, "loss": 0.4718, "step": 19276 }, { "epoch": 0.40883544357489765, "grad_norm": 0.3349333703517914, "learning_rate": 1.8023879090937055e-05, "loss": 0.5329, "step": 19277 }, { "epoch": 0.40885665203283067, "grad_norm": 0.42803722620010376, "learning_rate": 1.802368005551463e-05, "loss": 0.5541, "step": 19278 }, { "epoch": 0.4088778604907637, "grad_norm": 0.37981823086738586, "learning_rate": 1.8023481011168335e-05, "loss": 0.531, "step": 19279 }, { "epoch": 0.40889906894869676, "grad_norm": 0.3645459711551666, "learning_rate": 1.8023281957898395e-05, "loss": 0.5387, "step": 19280 }, { "epoch": 0.4089202774066298, "grad_norm": 0.28564611077308655, "learning_rate": 1.8023082895705034e-05, "loss": 0.5285, "step": 19281 }, { "epoch": 0.4089414858645628, "grad_norm": 0.509675145149231, "learning_rate": 1.8022883824588465e-05, "loss": 0.5075, "step": 19282 }, { "epoch": 0.4089626943224958, "grad_norm": 0.35279127955436707, "learning_rate": 1.8022684744548915e-05, "loss": 0.5351, "step": 19283 }, { "epoch": 0.40898390278042884, "grad_norm": 0.32237064838409424, "learning_rate": 1.8022485655586606e-05, "loss": 0.4959, "step": 19284 }, { "epoch": 0.40900511123836186, "grad_norm": 0.341259241104126, "learning_rate": 1.802228655770176e-05, "loss": 0.4751, "step": 19285 }, { "epoch": 0.4090263196962949, "grad_norm": 0.37220504879951477, "learning_rate": 1.8022087450894594e-05, "loss": 0.5115, "step": 19286 }, { "epoch": 0.4090475281542279, "grad_norm": 0.35633575916290283, "learning_rate": 1.802188833516533e-05, "loss": 0.5859, "step": 19287 }, { "epoch": 0.4090687366121609, "grad_norm": 0.3646484613418579, "learning_rate": 1.8021689210514197e-05, "loss": 0.6041, "step": 19288 }, { "epoch": 0.40908994507009394, "grad_norm": 0.31097474694252014, "learning_rate": 1.8021490076941406e-05, "loss": 0.4579, "step": 19289 }, { "epoch": 0.40911115352802696, "grad_norm": 0.34572187066078186, "learning_rate": 1.8021290934447187e-05, "loss": 0.4643, "step": 19290 }, { "epoch": 0.40913236198596, "grad_norm": 0.36812031269073486, "learning_rate": 1.8021091783031756e-05, "loss": 0.5843, "step": 19291 }, { "epoch": 0.409153570443893, "grad_norm": 0.32228535413742065, "learning_rate": 1.8020892622695336e-05, "loss": 0.4819, "step": 19292 }, { "epoch": 0.4091747789018261, "grad_norm": 0.3710297644138336, "learning_rate": 1.802069345343815e-05, "loss": 0.5182, "step": 19293 }, { "epoch": 0.4091959873597591, "grad_norm": 0.36014458537101746, "learning_rate": 1.8020494275260417e-05, "loss": 0.4898, "step": 19294 }, { "epoch": 0.4092171958176921, "grad_norm": 0.3316453993320465, "learning_rate": 1.802029508816236e-05, "loss": 0.4905, "step": 19295 }, { "epoch": 0.40923840427562513, "grad_norm": 0.3693416118621826, "learning_rate": 1.8020095892144204e-05, "loss": 0.5156, "step": 19296 }, { "epoch": 0.40925961273355815, "grad_norm": 0.33446982502937317, "learning_rate": 1.8019896687206164e-05, "loss": 0.4675, "step": 19297 }, { "epoch": 0.4092808211914912, "grad_norm": 0.35329484939575195, "learning_rate": 1.8019697473348467e-05, "loss": 0.4614, "step": 19298 }, { "epoch": 0.4093020296494242, "grad_norm": 0.38669338822364807, "learning_rate": 1.801949825057133e-05, "loss": 0.5264, "step": 19299 }, { "epoch": 0.4093232381073572, "grad_norm": 0.3319191336631775, "learning_rate": 1.8019299018874977e-05, "loss": 0.4882, "step": 19300 }, { "epoch": 0.40934444656529023, "grad_norm": 0.34052756428718567, "learning_rate": 1.8019099778259627e-05, "loss": 0.5031, "step": 19301 }, { "epoch": 0.40936565502322325, "grad_norm": 0.3572981357574463, "learning_rate": 1.8018900528725506e-05, "loss": 0.5713, "step": 19302 }, { "epoch": 0.40938686348115627, "grad_norm": 0.34016111493110657, "learning_rate": 1.8018701270272835e-05, "loss": 0.4604, "step": 19303 }, { "epoch": 0.4094080719390893, "grad_norm": 0.3173791766166687, "learning_rate": 1.8018502002901832e-05, "loss": 0.4356, "step": 19304 }, { "epoch": 0.4094292803970223, "grad_norm": 0.3107544481754303, "learning_rate": 1.801830272661272e-05, "loss": 0.4868, "step": 19305 }, { "epoch": 0.40945048885495533, "grad_norm": 0.3010365664958954, "learning_rate": 1.8018103441405722e-05, "loss": 0.4127, "step": 19306 }, { "epoch": 0.4094716973128884, "grad_norm": 0.3268184959888458, "learning_rate": 1.8017904147281062e-05, "loss": 0.503, "step": 19307 }, { "epoch": 0.4094929057708214, "grad_norm": 0.32188957929611206, "learning_rate": 1.8017704844238958e-05, "loss": 0.5114, "step": 19308 }, { "epoch": 0.40951411422875444, "grad_norm": 0.32692521810531616, "learning_rate": 1.801750553227963e-05, "loss": 0.5425, "step": 19309 }, { "epoch": 0.40953532268668746, "grad_norm": 0.34311601519584656, "learning_rate": 1.8017306211403304e-05, "loss": 0.5074, "step": 19310 }, { "epoch": 0.4095565311446205, "grad_norm": 0.3492242097854614, "learning_rate": 1.8017106881610198e-05, "loss": 0.5722, "step": 19311 }, { "epoch": 0.4095777396025535, "grad_norm": 0.375924289226532, "learning_rate": 1.8016907542900536e-05, "loss": 0.4581, "step": 19312 }, { "epoch": 0.4095989480604865, "grad_norm": 0.3221535086631775, "learning_rate": 1.8016708195274537e-05, "loss": 0.4521, "step": 19313 }, { "epoch": 0.40962015651841954, "grad_norm": 0.31672731041908264, "learning_rate": 1.801650883873243e-05, "loss": 0.4563, "step": 19314 }, { "epoch": 0.40964136497635256, "grad_norm": 0.30939945578575134, "learning_rate": 1.8016309473274425e-05, "loss": 0.4785, "step": 19315 }, { "epoch": 0.4096625734342856, "grad_norm": 0.32659992575645447, "learning_rate": 1.8016110098900755e-05, "loss": 0.555, "step": 19316 }, { "epoch": 0.4096837818922186, "grad_norm": 0.35136908292770386, "learning_rate": 1.8015910715611634e-05, "loss": 0.473, "step": 19317 }, { "epoch": 0.4097049903501516, "grad_norm": 0.38917410373687744, "learning_rate": 1.8015711323407287e-05, "loss": 0.4615, "step": 19318 }, { "epoch": 0.40972619880808464, "grad_norm": 0.3594405949115753, "learning_rate": 1.801551192228794e-05, "loss": 0.5656, "step": 19319 }, { "epoch": 0.40974740726601766, "grad_norm": 0.36311015486717224, "learning_rate": 1.8015312512253803e-05, "loss": 0.4837, "step": 19320 }, { "epoch": 0.40976861572395074, "grad_norm": 0.3615941107273102, "learning_rate": 1.8015113093305113e-05, "loss": 0.5555, "step": 19321 }, { "epoch": 0.40978982418188376, "grad_norm": 0.3901660442352295, "learning_rate": 1.801491366544208e-05, "loss": 0.5456, "step": 19322 }, { "epoch": 0.4098110326398168, "grad_norm": 0.33173486590385437, "learning_rate": 1.801471422866493e-05, "loss": 0.4543, "step": 19323 }, { "epoch": 0.4098322410977498, "grad_norm": 0.36538878083229065, "learning_rate": 1.8014514782973882e-05, "loss": 0.5375, "step": 19324 }, { "epoch": 0.4098534495556828, "grad_norm": 0.33842331171035767, "learning_rate": 1.801431532836916e-05, "loss": 0.4121, "step": 19325 }, { "epoch": 0.40987465801361583, "grad_norm": 0.34930598735809326, "learning_rate": 1.801411586485099e-05, "loss": 0.5708, "step": 19326 }, { "epoch": 0.40989586647154885, "grad_norm": 0.3573935329914093, "learning_rate": 1.8013916392419588e-05, "loss": 0.5124, "step": 19327 }, { "epoch": 0.4099170749294819, "grad_norm": 0.32385146617889404, "learning_rate": 1.8013716911075178e-05, "loss": 0.5404, "step": 19328 }, { "epoch": 0.4099382833874149, "grad_norm": 0.2989155650138855, "learning_rate": 1.8013517420817982e-05, "loss": 0.4718, "step": 19329 }, { "epoch": 0.4099594918453479, "grad_norm": 0.3233919143676758, "learning_rate": 1.801331792164822e-05, "loss": 0.5276, "step": 19330 }, { "epoch": 0.40998070030328093, "grad_norm": 0.39688533544540405, "learning_rate": 1.8013118413566118e-05, "loss": 0.5206, "step": 19331 }, { "epoch": 0.41000190876121395, "grad_norm": 0.33522355556488037, "learning_rate": 1.8012918896571894e-05, "loss": 0.5123, "step": 19332 }, { "epoch": 0.41002311721914697, "grad_norm": 0.325296550989151, "learning_rate": 1.801271937066577e-05, "loss": 0.5159, "step": 19333 }, { "epoch": 0.41004432567708005, "grad_norm": 0.40618693828582764, "learning_rate": 1.801251983584797e-05, "loss": 0.465, "step": 19334 }, { "epoch": 0.41006553413501307, "grad_norm": 0.3091730773448944, "learning_rate": 1.8012320292118717e-05, "loss": 0.4601, "step": 19335 }, { "epoch": 0.4100867425929461, "grad_norm": 0.3448522984981537, "learning_rate": 1.801212073947823e-05, "loss": 0.5051, "step": 19336 }, { "epoch": 0.4101079510508791, "grad_norm": 0.30668145418167114, "learning_rate": 1.801192117792673e-05, "loss": 0.4479, "step": 19337 }, { "epoch": 0.4101291595088121, "grad_norm": 0.3798719346523285, "learning_rate": 1.8011721607464443e-05, "loss": 0.5126, "step": 19338 }, { "epoch": 0.41015036796674514, "grad_norm": 0.3246224820613861, "learning_rate": 1.801152202809159e-05, "loss": 0.4907, "step": 19339 }, { "epoch": 0.41017157642467816, "grad_norm": 0.35469886660575867, "learning_rate": 1.801132243980839e-05, "loss": 0.5441, "step": 19340 }, { "epoch": 0.4101927848826112, "grad_norm": 0.30813294649124146, "learning_rate": 1.8011122842615062e-05, "loss": 0.4011, "step": 19341 }, { "epoch": 0.4102139933405442, "grad_norm": 0.33494502305984497, "learning_rate": 1.801092323651184e-05, "loss": 0.4798, "step": 19342 }, { "epoch": 0.4102352017984772, "grad_norm": 0.36283349990844727, "learning_rate": 1.801072362149894e-05, "loss": 0.568, "step": 19343 }, { "epoch": 0.41025641025641024, "grad_norm": 0.40767210721969604, "learning_rate": 1.8010523997576576e-05, "loss": 0.4509, "step": 19344 }, { "epoch": 0.41027761871434326, "grad_norm": 0.3516058325767517, "learning_rate": 1.801032436474498e-05, "loss": 0.4609, "step": 19345 }, { "epoch": 0.4102988271722763, "grad_norm": 0.8890022039413452, "learning_rate": 1.801012472300437e-05, "loss": 0.502, "step": 19346 }, { "epoch": 0.4103200356302093, "grad_norm": 0.307644784450531, "learning_rate": 1.8009925072354967e-05, "loss": 0.41, "step": 19347 }, { "epoch": 0.4103412440881424, "grad_norm": 0.39748385548591614, "learning_rate": 1.8009725412797e-05, "loss": 0.4746, "step": 19348 }, { "epoch": 0.4103624525460754, "grad_norm": 0.3332102298736572, "learning_rate": 1.8009525744330683e-05, "loss": 0.4776, "step": 19349 }, { "epoch": 0.4103836610040084, "grad_norm": 0.35754433274269104, "learning_rate": 1.8009326066956242e-05, "loss": 0.4965, "step": 19350 }, { "epoch": 0.41040486946194144, "grad_norm": 0.40404874086380005, "learning_rate": 1.8009126380673897e-05, "loss": 0.4435, "step": 19351 }, { "epoch": 0.41042607791987445, "grad_norm": 0.335400253534317, "learning_rate": 1.8008926685483873e-05, "loss": 0.5398, "step": 19352 }, { "epoch": 0.4104472863778075, "grad_norm": 0.320307195186615, "learning_rate": 1.8008726981386386e-05, "loss": 0.433, "step": 19353 }, { "epoch": 0.4104684948357405, "grad_norm": 0.34580597281455994, "learning_rate": 1.8008527268381667e-05, "loss": 0.5168, "step": 19354 }, { "epoch": 0.4104897032936735, "grad_norm": 0.3586769700050354, "learning_rate": 1.8008327546469932e-05, "loss": 0.5752, "step": 19355 }, { "epoch": 0.41051091175160653, "grad_norm": 0.3457200527191162, "learning_rate": 1.8008127815651405e-05, "loss": 0.5276, "step": 19356 }, { "epoch": 0.41053212020953955, "grad_norm": 0.3332868218421936, "learning_rate": 1.8007928075926306e-05, "loss": 0.4654, "step": 19357 }, { "epoch": 0.4105533286674726, "grad_norm": 0.3504428565502167, "learning_rate": 1.8007728327294863e-05, "loss": 0.5065, "step": 19358 }, { "epoch": 0.4105745371254056, "grad_norm": 0.3367924094200134, "learning_rate": 1.8007528569757288e-05, "loss": 0.523, "step": 19359 }, { "epoch": 0.4105957455833386, "grad_norm": 0.3248618543148041, "learning_rate": 1.8007328803313812e-05, "loss": 0.5316, "step": 19360 }, { "epoch": 0.4106169540412717, "grad_norm": 0.3331088721752167, "learning_rate": 1.8007129027964656e-05, "loss": 0.4592, "step": 19361 }, { "epoch": 0.4106381624992047, "grad_norm": 0.33085912466049194, "learning_rate": 1.8006929243710042e-05, "loss": 0.5253, "step": 19362 }, { "epoch": 0.4106593709571377, "grad_norm": 0.3561708927154541, "learning_rate": 1.800672945055019e-05, "loss": 0.5396, "step": 19363 }, { "epoch": 0.41068057941507075, "grad_norm": 0.3303467631340027, "learning_rate": 1.800652964848532e-05, "loss": 0.4761, "step": 19364 }, { "epoch": 0.41070178787300377, "grad_norm": 0.37402522563934326, "learning_rate": 1.8006329837515658e-05, "loss": 0.5679, "step": 19365 }, { "epoch": 0.4107229963309368, "grad_norm": 0.33629491925239563, "learning_rate": 1.8006130017641425e-05, "loss": 0.5633, "step": 19366 }, { "epoch": 0.4107442047888698, "grad_norm": 0.31891024112701416, "learning_rate": 1.8005930188862847e-05, "loss": 0.484, "step": 19367 }, { "epoch": 0.4107654132468028, "grad_norm": 0.35222887992858887, "learning_rate": 1.8005730351180143e-05, "loss": 0.5332, "step": 19368 }, { "epoch": 0.41078662170473584, "grad_norm": 0.7126697897911072, "learning_rate": 1.800553050459353e-05, "loss": 0.5624, "step": 19369 }, { "epoch": 0.41080783016266886, "grad_norm": 0.3512726128101349, "learning_rate": 1.8005330649103242e-05, "loss": 0.5628, "step": 19370 }, { "epoch": 0.4108290386206019, "grad_norm": 0.3639034628868103, "learning_rate": 1.8005130784709493e-05, "loss": 0.5099, "step": 19371 }, { "epoch": 0.4108502470785349, "grad_norm": 0.3372528553009033, "learning_rate": 1.8004930911412506e-05, "loss": 0.544, "step": 19372 }, { "epoch": 0.4108714555364679, "grad_norm": 0.32409271597862244, "learning_rate": 1.8004731029212505e-05, "loss": 0.434, "step": 19373 }, { "epoch": 0.41089266399440094, "grad_norm": 0.33582603931427, "learning_rate": 1.800453113810971e-05, "loss": 0.5762, "step": 19374 }, { "epoch": 0.410913872452334, "grad_norm": 0.33999302983283997, "learning_rate": 1.8004331238104344e-05, "loss": 0.5532, "step": 19375 }, { "epoch": 0.41093508091026704, "grad_norm": 0.3965909779071808, "learning_rate": 1.800413132919663e-05, "loss": 0.5104, "step": 19376 }, { "epoch": 0.41095628936820006, "grad_norm": 0.328559935092926, "learning_rate": 1.8003931411386796e-05, "loss": 0.5582, "step": 19377 }, { "epoch": 0.4109774978261331, "grad_norm": 0.42412716150283813, "learning_rate": 1.8003731484675058e-05, "loss": 0.5173, "step": 19378 }, { "epoch": 0.4109987062840661, "grad_norm": 0.3512718975543976, "learning_rate": 1.8003531549061633e-05, "loss": 0.4761, "step": 19379 }, { "epoch": 0.4110199147419991, "grad_norm": 0.33871227502822876, "learning_rate": 1.8003331604546756e-05, "loss": 0.5459, "step": 19380 }, { "epoch": 0.41104112319993213, "grad_norm": 0.36138370633125305, "learning_rate": 1.8003131651130644e-05, "loss": 0.5195, "step": 19381 }, { "epoch": 0.41106233165786515, "grad_norm": 0.4741852283477783, "learning_rate": 1.8002931688813513e-05, "loss": 0.4901, "step": 19382 }, { "epoch": 0.4110835401157982, "grad_norm": 0.32629379630088806, "learning_rate": 1.8002731717595594e-05, "loss": 0.4974, "step": 19383 }, { "epoch": 0.4111047485737312, "grad_norm": 0.3361666202545166, "learning_rate": 1.8002531737477107e-05, "loss": 0.4463, "step": 19384 }, { "epoch": 0.4111259570316642, "grad_norm": 0.3430745601654053, "learning_rate": 1.8002331748458272e-05, "loss": 0.5153, "step": 19385 }, { "epoch": 0.41114716548959723, "grad_norm": 0.34571948647499084, "learning_rate": 1.8002131750539313e-05, "loss": 0.4481, "step": 19386 }, { "epoch": 0.41116837394753025, "grad_norm": 0.3066147267818451, "learning_rate": 1.8001931743720457e-05, "loss": 0.5083, "step": 19387 }, { "epoch": 0.41118958240546327, "grad_norm": 0.34977400302886963, "learning_rate": 1.8001731728001917e-05, "loss": 0.5584, "step": 19388 }, { "epoch": 0.41121079086339635, "grad_norm": 0.31049254536628723, "learning_rate": 1.8001531703383923e-05, "loss": 0.4331, "step": 19389 }, { "epoch": 0.41123199932132937, "grad_norm": 0.37828004360198975, "learning_rate": 1.8001331669866694e-05, "loss": 0.5781, "step": 19390 }, { "epoch": 0.4112532077792624, "grad_norm": 0.3899887502193451, "learning_rate": 1.800113162745045e-05, "loss": 0.4992, "step": 19391 }, { "epoch": 0.4112744162371954, "grad_norm": 0.34125274419784546, "learning_rate": 1.8000931576135423e-05, "loss": 0.507, "step": 19392 }, { "epoch": 0.4112956246951284, "grad_norm": 0.34055253863334656, "learning_rate": 1.800073151592183e-05, "loss": 0.481, "step": 19393 }, { "epoch": 0.41131683315306145, "grad_norm": 0.3414549231529236, "learning_rate": 1.8000531446809886e-05, "loss": 0.4594, "step": 19394 }, { "epoch": 0.41133804161099446, "grad_norm": 0.3363111615180969, "learning_rate": 1.8000331368799825e-05, "loss": 0.5046, "step": 19395 }, { "epoch": 0.4113592500689275, "grad_norm": 0.37588557600975037, "learning_rate": 1.8000131281891862e-05, "loss": 0.5334, "step": 19396 }, { "epoch": 0.4113804585268605, "grad_norm": 0.5210537910461426, "learning_rate": 1.7999931186086225e-05, "loss": 0.5533, "step": 19397 }, { "epoch": 0.4114016669847935, "grad_norm": 0.42796728014945984, "learning_rate": 1.7999731081383134e-05, "loss": 0.4998, "step": 19398 }, { "epoch": 0.41142287544272654, "grad_norm": 0.36199337244033813, "learning_rate": 1.799953096778281e-05, "loss": 0.528, "step": 19399 }, { "epoch": 0.41144408390065956, "grad_norm": 0.44665056467056274, "learning_rate": 1.799933084528548e-05, "loss": 0.5478, "step": 19400 }, { "epoch": 0.4114652923585926, "grad_norm": 0.36241886019706726, "learning_rate": 1.7999130713891363e-05, "loss": 0.5772, "step": 19401 }, { "epoch": 0.41148650081652566, "grad_norm": 0.36171603202819824, "learning_rate": 1.799893057360068e-05, "loss": 0.4928, "step": 19402 }, { "epoch": 0.4115077092744587, "grad_norm": 0.3888736069202423, "learning_rate": 1.7998730424413654e-05, "loss": 0.6099, "step": 19403 }, { "epoch": 0.4115289177323917, "grad_norm": 0.29341214895248413, "learning_rate": 1.7998530266330516e-05, "loss": 0.4223, "step": 19404 }, { "epoch": 0.4115501261903247, "grad_norm": 0.40604522824287415, "learning_rate": 1.7998330099351478e-05, "loss": 0.5274, "step": 19405 }, { "epoch": 0.41157133464825774, "grad_norm": 0.34856000542640686, "learning_rate": 1.799812992347677e-05, "loss": 0.4138, "step": 19406 }, { "epoch": 0.41159254310619076, "grad_norm": 0.347373902797699, "learning_rate": 1.7997929738706607e-05, "loss": 0.5458, "step": 19407 }, { "epoch": 0.4116137515641238, "grad_norm": 0.7373903393745422, "learning_rate": 1.7997729545041217e-05, "loss": 0.5654, "step": 19408 }, { "epoch": 0.4116349600220568, "grad_norm": 0.4294723868370056, "learning_rate": 1.7997529342480822e-05, "loss": 0.5438, "step": 19409 }, { "epoch": 0.4116561684799898, "grad_norm": 0.33870673179626465, "learning_rate": 1.7997329131025643e-05, "loss": 0.4909, "step": 19410 }, { "epoch": 0.41167737693792283, "grad_norm": 0.3555060923099518, "learning_rate": 1.799712891067591e-05, "loss": 0.5839, "step": 19411 }, { "epoch": 0.41169858539585585, "grad_norm": 0.3750144839286804, "learning_rate": 1.7996928681431837e-05, "loss": 0.5285, "step": 19412 }, { "epoch": 0.4117197938537889, "grad_norm": 0.39699849486351013, "learning_rate": 1.7996728443293646e-05, "loss": 0.5018, "step": 19413 }, { "epoch": 0.4117410023117219, "grad_norm": 0.35097119212150574, "learning_rate": 1.7996528196261565e-05, "loss": 0.4925, "step": 19414 }, { "epoch": 0.4117622107696549, "grad_norm": 0.3208838105201721, "learning_rate": 1.7996327940335815e-05, "loss": 0.485, "step": 19415 }, { "epoch": 0.411783419227588, "grad_norm": 0.35277754068374634, "learning_rate": 1.7996127675516618e-05, "loss": 0.5781, "step": 19416 }, { "epoch": 0.411804627685521, "grad_norm": 0.3580494225025177, "learning_rate": 1.79959274018042e-05, "loss": 0.495, "step": 19417 }, { "epoch": 0.411825836143454, "grad_norm": 0.3877030611038208, "learning_rate": 1.7995727119198774e-05, "loss": 0.5206, "step": 19418 }, { "epoch": 0.41184704460138705, "grad_norm": 0.3028857409954071, "learning_rate": 1.7995526827700577e-05, "loss": 0.4895, "step": 19419 }, { "epoch": 0.41186825305932007, "grad_norm": 0.39040088653564453, "learning_rate": 1.7995326527309822e-05, "loss": 0.5502, "step": 19420 }, { "epoch": 0.4118894615172531, "grad_norm": 0.3598986566066742, "learning_rate": 1.7995126218026736e-05, "loss": 0.5011, "step": 19421 }, { "epoch": 0.4119106699751861, "grad_norm": 0.4313357472419739, "learning_rate": 1.7994925899851538e-05, "loss": 0.4354, "step": 19422 }, { "epoch": 0.4119318784331191, "grad_norm": 0.3304305970668793, "learning_rate": 1.799472557278445e-05, "loss": 0.5011, "step": 19423 }, { "epoch": 0.41195308689105214, "grad_norm": 0.31244179606437683, "learning_rate": 1.7994525236825703e-05, "loss": 0.4731, "step": 19424 }, { "epoch": 0.41197429534898516, "grad_norm": 0.35181599855422974, "learning_rate": 1.7994324891975513e-05, "loss": 0.4769, "step": 19425 }, { "epoch": 0.4119955038069182, "grad_norm": 0.34202414751052856, "learning_rate": 1.7994124538234103e-05, "loss": 0.4803, "step": 19426 }, { "epoch": 0.4120167122648512, "grad_norm": 0.3324098587036133, "learning_rate": 1.7993924175601697e-05, "loss": 0.4922, "step": 19427 }, { "epoch": 0.4120379207227842, "grad_norm": 0.336455374956131, "learning_rate": 1.799372380407852e-05, "loss": 0.5381, "step": 19428 }, { "epoch": 0.41205912918071724, "grad_norm": 0.3339557647705078, "learning_rate": 1.7993523423664792e-05, "loss": 0.5301, "step": 19429 }, { "epoch": 0.4120803376386503, "grad_norm": 0.4009557068347931, "learning_rate": 1.7993323034360734e-05, "loss": 0.5832, "step": 19430 }, { "epoch": 0.41210154609658334, "grad_norm": 0.3550950884819031, "learning_rate": 1.7993122636166574e-05, "loss": 0.5168, "step": 19431 }, { "epoch": 0.41212275455451636, "grad_norm": 0.3753424286842346, "learning_rate": 1.7992922229082533e-05, "loss": 0.5482, "step": 19432 }, { "epoch": 0.4121439630124494, "grad_norm": 0.3335598409175873, "learning_rate": 1.7992721813108833e-05, "loss": 0.5171, "step": 19433 }, { "epoch": 0.4121651714703824, "grad_norm": 0.33232659101486206, "learning_rate": 1.7992521388245698e-05, "loss": 0.5428, "step": 19434 }, { "epoch": 0.4121863799283154, "grad_norm": 0.38015779852867126, "learning_rate": 1.7992320954493346e-05, "loss": 0.4945, "step": 19435 }, { "epoch": 0.41220758838624844, "grad_norm": 0.32661986351013184, "learning_rate": 1.799212051185201e-05, "loss": 0.4309, "step": 19436 }, { "epoch": 0.41222879684418146, "grad_norm": 0.3525357246398926, "learning_rate": 1.7991920060321903e-05, "loss": 0.5029, "step": 19437 }, { "epoch": 0.4122500053021145, "grad_norm": 0.46760013699531555, "learning_rate": 1.7991719599903256e-05, "loss": 0.5618, "step": 19438 }, { "epoch": 0.4122712137600475, "grad_norm": 0.36823832988739014, "learning_rate": 1.7991519130596283e-05, "loss": 0.4715, "step": 19439 }, { "epoch": 0.4122924222179805, "grad_norm": 0.3885924518108368, "learning_rate": 1.7991318652401213e-05, "loss": 0.4981, "step": 19440 }, { "epoch": 0.41231363067591353, "grad_norm": 0.3769398331642151, "learning_rate": 1.799111816531827e-05, "loss": 0.555, "step": 19441 }, { "epoch": 0.41233483913384655, "grad_norm": 0.31744861602783203, "learning_rate": 1.7990917669347672e-05, "loss": 0.557, "step": 19442 }, { "epoch": 0.41235604759177963, "grad_norm": 0.3796369135379791, "learning_rate": 1.7990717164489645e-05, "loss": 0.4618, "step": 19443 }, { "epoch": 0.41237725604971265, "grad_norm": 0.3340524733066559, "learning_rate": 1.7990516650744412e-05, "loss": 0.4994, "step": 19444 }, { "epoch": 0.41239846450764567, "grad_norm": 0.379361093044281, "learning_rate": 1.79903161281122e-05, "loss": 0.5553, "step": 19445 }, { "epoch": 0.4124196729655787, "grad_norm": 0.3661905825138092, "learning_rate": 1.7990115596593223e-05, "loss": 0.3835, "step": 19446 }, { "epoch": 0.4124408814235117, "grad_norm": 0.3086186349391937, "learning_rate": 1.798991505618771e-05, "loss": 0.4814, "step": 19447 }, { "epoch": 0.4124620898814447, "grad_norm": 0.33056408166885376, "learning_rate": 1.7989714506895883e-05, "loss": 0.5498, "step": 19448 }, { "epoch": 0.41248329833937775, "grad_norm": 0.33359915018081665, "learning_rate": 1.7989513948717965e-05, "loss": 0.5204, "step": 19449 }, { "epoch": 0.41250450679731077, "grad_norm": 0.3438020348548889, "learning_rate": 1.798931338165418e-05, "loss": 0.5441, "step": 19450 }, { "epoch": 0.4125257152552438, "grad_norm": 0.34387215971946716, "learning_rate": 1.798911280570475e-05, "loss": 0.5083, "step": 19451 }, { "epoch": 0.4125469237131768, "grad_norm": 0.35526496171951294, "learning_rate": 1.7988912220869895e-05, "loss": 0.5005, "step": 19452 }, { "epoch": 0.4125681321711098, "grad_norm": 0.3427410125732422, "learning_rate": 1.7988711627149845e-05, "loss": 0.561, "step": 19453 }, { "epoch": 0.41258934062904284, "grad_norm": 0.3449748456478119, "learning_rate": 1.798851102454482e-05, "loss": 0.5486, "step": 19454 }, { "epoch": 0.41261054908697586, "grad_norm": 0.39607056975364685, "learning_rate": 1.798831041305504e-05, "loss": 0.5416, "step": 19455 }, { "epoch": 0.4126317575449089, "grad_norm": 0.3878815770149231, "learning_rate": 1.798810979268073e-05, "loss": 0.5571, "step": 19456 }, { "epoch": 0.41265296600284196, "grad_norm": 0.3386213481426239, "learning_rate": 1.7987909163422114e-05, "loss": 0.5389, "step": 19457 }, { "epoch": 0.412674174460775, "grad_norm": 0.34396904706954956, "learning_rate": 1.7987708525279414e-05, "loss": 0.5396, "step": 19458 }, { "epoch": 0.412695382918708, "grad_norm": 0.3907860517501831, "learning_rate": 1.7987507878252856e-05, "loss": 0.5043, "step": 19459 }, { "epoch": 0.412716591376641, "grad_norm": 0.3733668327331543, "learning_rate": 1.798730722234266e-05, "loss": 0.5908, "step": 19460 }, { "epoch": 0.41273779983457404, "grad_norm": 0.3725789785385132, "learning_rate": 1.7987106557549055e-05, "loss": 0.549, "step": 19461 }, { "epoch": 0.41275900829250706, "grad_norm": 0.3309943377971649, "learning_rate": 1.7986905883872252e-05, "loss": 0.5374, "step": 19462 }, { "epoch": 0.4127802167504401, "grad_norm": 0.3440771698951721, "learning_rate": 1.7986705201312483e-05, "loss": 0.4657, "step": 19463 }, { "epoch": 0.4128014252083731, "grad_norm": 0.3811408281326294, "learning_rate": 1.7986504509869974e-05, "loss": 0.5091, "step": 19464 }, { "epoch": 0.4128226336663061, "grad_norm": 0.3385985791683197, "learning_rate": 1.7986303809544942e-05, "loss": 0.5575, "step": 19465 }, { "epoch": 0.41284384212423914, "grad_norm": 0.311491459608078, "learning_rate": 1.7986103100337614e-05, "loss": 0.4657, "step": 19466 }, { "epoch": 0.41286505058217216, "grad_norm": 0.35530027747154236, "learning_rate": 1.7985902382248207e-05, "loss": 0.4699, "step": 19467 }, { "epoch": 0.4128862590401052, "grad_norm": 0.5967230200767517, "learning_rate": 1.7985701655276955e-05, "loss": 0.4778, "step": 19468 }, { "epoch": 0.4129074674980382, "grad_norm": 0.44942939281463623, "learning_rate": 1.798550091942407e-05, "loss": 0.4814, "step": 19469 }, { "epoch": 0.4129286759559712, "grad_norm": 0.5254689455032349, "learning_rate": 1.798530017468978e-05, "loss": 0.4727, "step": 19470 }, { "epoch": 0.4129498844139043, "grad_norm": 0.42752885818481445, "learning_rate": 1.798509942107431e-05, "loss": 0.5507, "step": 19471 }, { "epoch": 0.4129710928718373, "grad_norm": 0.35079437494277954, "learning_rate": 1.7984898658577884e-05, "loss": 0.5073, "step": 19472 }, { "epoch": 0.41299230132977033, "grad_norm": 0.418544203042984, "learning_rate": 1.798469788720072e-05, "loss": 0.5885, "step": 19473 }, { "epoch": 0.41301350978770335, "grad_norm": 0.3795366585254669, "learning_rate": 1.7984497106943045e-05, "loss": 0.6062, "step": 19474 }, { "epoch": 0.41303471824563637, "grad_norm": 0.35474225878715515, "learning_rate": 1.798429631780508e-05, "loss": 0.5217, "step": 19475 }, { "epoch": 0.4130559267035694, "grad_norm": 0.389026939868927, "learning_rate": 1.7984095519787056e-05, "loss": 0.681, "step": 19476 }, { "epoch": 0.4130771351615024, "grad_norm": 0.3446982502937317, "learning_rate": 1.7983894712889186e-05, "loss": 0.5411, "step": 19477 }, { "epoch": 0.4130983436194354, "grad_norm": 0.31731632351875305, "learning_rate": 1.79836938971117e-05, "loss": 0.5265, "step": 19478 }, { "epoch": 0.41311955207736845, "grad_norm": 0.3104651868343353, "learning_rate": 1.7983493072454814e-05, "loss": 0.5264, "step": 19479 }, { "epoch": 0.41314076053530147, "grad_norm": 0.3659161925315857, "learning_rate": 1.798329223891876e-05, "loss": 0.5501, "step": 19480 }, { "epoch": 0.4131619689932345, "grad_norm": 0.3506544828414917, "learning_rate": 1.7983091396503755e-05, "loss": 0.5922, "step": 19481 }, { "epoch": 0.4131831774511675, "grad_norm": 0.327615886926651, "learning_rate": 1.798289054521003e-05, "loss": 0.5129, "step": 19482 }, { "epoch": 0.4132043859091005, "grad_norm": 0.3494397699832916, "learning_rate": 1.79826896850378e-05, "loss": 0.541, "step": 19483 }, { "epoch": 0.4132255943670336, "grad_norm": 0.335296094417572, "learning_rate": 1.7982488815987292e-05, "loss": 0.5271, "step": 19484 }, { "epoch": 0.4132468028249666, "grad_norm": 0.3344474732875824, "learning_rate": 1.798228793805873e-05, "loss": 0.4622, "step": 19485 }, { "epoch": 0.41326801128289964, "grad_norm": 0.3873726427555084, "learning_rate": 1.7982087051252337e-05, "loss": 0.5745, "step": 19486 }, { "epoch": 0.41328921974083266, "grad_norm": 0.38615044951438904, "learning_rate": 1.7981886155568334e-05, "loss": 0.5731, "step": 19487 }, { "epoch": 0.4133104281987657, "grad_norm": 0.3144858777523041, "learning_rate": 1.7981685251006952e-05, "loss": 0.4815, "step": 19488 }, { "epoch": 0.4133316366566987, "grad_norm": 0.34399136900901794, "learning_rate": 1.7981484337568403e-05, "loss": 0.5105, "step": 19489 }, { "epoch": 0.4133528451146317, "grad_norm": 0.36071112751960754, "learning_rate": 1.798128341525292e-05, "loss": 0.576, "step": 19490 }, { "epoch": 0.41337405357256474, "grad_norm": 0.2958458662033081, "learning_rate": 1.798108248406072e-05, "loss": 0.429, "step": 19491 }, { "epoch": 0.41339526203049776, "grad_norm": 0.36282557249069214, "learning_rate": 1.7980881543992034e-05, "loss": 0.4814, "step": 19492 }, { "epoch": 0.4134164704884308, "grad_norm": 0.32694342732429504, "learning_rate": 1.7980680595047077e-05, "loss": 0.4673, "step": 19493 }, { "epoch": 0.4134376789463638, "grad_norm": 0.3851377069950104, "learning_rate": 1.7980479637226075e-05, "loss": 0.5718, "step": 19494 }, { "epoch": 0.4134588874042968, "grad_norm": 0.33758753538131714, "learning_rate": 1.7980278670529256e-05, "loss": 0.5352, "step": 19495 }, { "epoch": 0.41348009586222984, "grad_norm": 0.43538710474967957, "learning_rate": 1.798007769495684e-05, "loss": 0.4899, "step": 19496 }, { "epoch": 0.41350130432016285, "grad_norm": 0.32796066999435425, "learning_rate": 1.797987671050905e-05, "loss": 0.5698, "step": 19497 }, { "epoch": 0.41352251277809593, "grad_norm": 0.34283187985420227, "learning_rate": 1.797967571718611e-05, "loss": 0.5128, "step": 19498 }, { "epoch": 0.41354372123602895, "grad_norm": 0.32059139013290405, "learning_rate": 1.7979474714988248e-05, "loss": 0.4567, "step": 19499 }, { "epoch": 0.41356492969396197, "grad_norm": 0.3385751247406006, "learning_rate": 1.7979273703915677e-05, "loss": 0.4802, "step": 19500 }, { "epoch": 0.413586138151895, "grad_norm": 0.709087073802948, "learning_rate": 1.797907268396863e-05, "loss": 0.5757, "step": 19501 }, { "epoch": 0.413607346609828, "grad_norm": 0.3268066942691803, "learning_rate": 1.797887165514733e-05, "loss": 0.517, "step": 19502 }, { "epoch": 0.41362855506776103, "grad_norm": 0.4055063724517822, "learning_rate": 1.7978670617451998e-05, "loss": 0.574, "step": 19503 }, { "epoch": 0.41364976352569405, "grad_norm": 0.3215485215187073, "learning_rate": 1.7978469570882855e-05, "loss": 0.4837, "step": 19504 }, { "epoch": 0.41367097198362707, "grad_norm": 0.3659774959087372, "learning_rate": 1.797826851544013e-05, "loss": 0.5106, "step": 19505 }, { "epoch": 0.4136921804415601, "grad_norm": 0.33782005310058594, "learning_rate": 1.797806745112404e-05, "loss": 0.4517, "step": 19506 }, { "epoch": 0.4137133888994931, "grad_norm": 0.36259403824806213, "learning_rate": 1.7977866377934815e-05, "loss": 0.5001, "step": 19507 }, { "epoch": 0.4137345973574261, "grad_norm": 0.33802661299705505, "learning_rate": 1.7977665295872678e-05, "loss": 0.4329, "step": 19508 }, { "epoch": 0.41375580581535915, "grad_norm": 0.36201387643814087, "learning_rate": 1.797746420493785e-05, "loss": 0.557, "step": 19509 }, { "epoch": 0.41377701427329217, "grad_norm": 0.3491964638233185, "learning_rate": 1.7977263105130555e-05, "loss": 0.5457, "step": 19510 }, { "epoch": 0.41379822273122524, "grad_norm": 0.3385242223739624, "learning_rate": 1.7977061996451017e-05, "loss": 0.4992, "step": 19511 }, { "epoch": 0.41381943118915826, "grad_norm": 0.3878401815891266, "learning_rate": 1.7976860878899463e-05, "loss": 0.5631, "step": 19512 }, { "epoch": 0.4138406396470913, "grad_norm": 0.3316763639450073, "learning_rate": 1.797665975247611e-05, "loss": 0.4768, "step": 19513 }, { "epoch": 0.4138618481050243, "grad_norm": 0.37765443325042725, "learning_rate": 1.7976458617181187e-05, "loss": 0.6013, "step": 19514 }, { "epoch": 0.4138830565629573, "grad_norm": 0.3495352864265442, "learning_rate": 1.7976257473014916e-05, "loss": 0.5093, "step": 19515 }, { "epoch": 0.41390426502089034, "grad_norm": 0.38967227935791016, "learning_rate": 1.797605631997752e-05, "loss": 0.5394, "step": 19516 }, { "epoch": 0.41392547347882336, "grad_norm": 0.41923093795776367, "learning_rate": 1.7975855158069225e-05, "loss": 0.555, "step": 19517 }, { "epoch": 0.4139466819367564, "grad_norm": 0.35276469588279724, "learning_rate": 1.7975653987290253e-05, "loss": 0.5409, "step": 19518 }, { "epoch": 0.4139678903946894, "grad_norm": 0.41686147451400757, "learning_rate": 1.7975452807640826e-05, "loss": 0.4953, "step": 19519 }, { "epoch": 0.4139890988526224, "grad_norm": 0.4383860230445862, "learning_rate": 1.797525161912117e-05, "loss": 0.5347, "step": 19520 }, { "epoch": 0.41401030731055544, "grad_norm": 0.30981749296188354, "learning_rate": 1.7975050421731508e-05, "loss": 0.5275, "step": 19521 }, { "epoch": 0.41403151576848846, "grad_norm": 0.34154972434043884, "learning_rate": 1.7974849215472066e-05, "loss": 0.5269, "step": 19522 }, { "epoch": 0.4140527242264215, "grad_norm": 0.4240773618221283, "learning_rate": 1.7974648000343066e-05, "loss": 0.5453, "step": 19523 }, { "epoch": 0.4140739326843545, "grad_norm": 0.31366920471191406, "learning_rate": 1.797444677634473e-05, "loss": 0.4374, "step": 19524 }, { "epoch": 0.41409514114228757, "grad_norm": 0.33785054087638855, "learning_rate": 1.7974245543477286e-05, "loss": 0.4804, "step": 19525 }, { "epoch": 0.4141163496002206, "grad_norm": 0.3214957118034363, "learning_rate": 1.797404430174095e-05, "loss": 0.5343, "step": 19526 }, { "epoch": 0.4141375580581536, "grad_norm": 0.3255537748336792, "learning_rate": 1.7973843051135957e-05, "loss": 0.4864, "step": 19527 }, { "epoch": 0.41415876651608663, "grad_norm": 0.3693329691886902, "learning_rate": 1.797364179166252e-05, "loss": 0.5182, "step": 19528 }, { "epoch": 0.41417997497401965, "grad_norm": 0.32811641693115234, "learning_rate": 1.7973440523320874e-05, "loss": 0.4154, "step": 19529 }, { "epoch": 0.41420118343195267, "grad_norm": 0.34911563992500305, "learning_rate": 1.7973239246111233e-05, "loss": 0.4599, "step": 19530 }, { "epoch": 0.4142223918898857, "grad_norm": 0.3709208071231842, "learning_rate": 1.7973037960033827e-05, "loss": 0.5016, "step": 19531 }, { "epoch": 0.4142436003478187, "grad_norm": 0.3815111815929413, "learning_rate": 1.7972836665088875e-05, "loss": 0.5916, "step": 19532 }, { "epoch": 0.4142648088057517, "grad_norm": 0.3169083893299103, "learning_rate": 1.79726353612766e-05, "loss": 0.5225, "step": 19533 }, { "epoch": 0.41428601726368475, "grad_norm": 0.37405651807785034, "learning_rate": 1.7972434048597235e-05, "loss": 0.4172, "step": 19534 }, { "epoch": 0.41430722572161777, "grad_norm": 0.40792185068130493, "learning_rate": 1.7972232727051e-05, "loss": 0.4739, "step": 19535 }, { "epoch": 0.4143284341795508, "grad_norm": 0.3563932776451111, "learning_rate": 1.797203139663811e-05, "loss": 0.563, "step": 19536 }, { "epoch": 0.4143496426374838, "grad_norm": 0.37578868865966797, "learning_rate": 1.79718300573588e-05, "loss": 0.5707, "step": 19537 }, { "epoch": 0.4143708510954168, "grad_norm": 0.3477688431739807, "learning_rate": 1.7971628709213293e-05, "loss": 0.4822, "step": 19538 }, { "epoch": 0.4143920595533499, "grad_norm": 0.3211938440799713, "learning_rate": 1.7971427352201804e-05, "loss": 0.4501, "step": 19539 }, { "epoch": 0.4144132680112829, "grad_norm": 0.3561916649341583, "learning_rate": 1.7971225986324564e-05, "loss": 0.5533, "step": 19540 }, { "epoch": 0.41443447646921594, "grad_norm": 0.3202834725379944, "learning_rate": 1.7971024611581796e-05, "loss": 0.4606, "step": 19541 }, { "epoch": 0.41445568492714896, "grad_norm": 0.288496196269989, "learning_rate": 1.7970823227973728e-05, "loss": 0.5525, "step": 19542 }, { "epoch": 0.414476893385082, "grad_norm": 0.4297492206096649, "learning_rate": 1.797062183550057e-05, "loss": 0.4692, "step": 19543 }, { "epoch": 0.414498101843015, "grad_norm": 0.34603551030158997, "learning_rate": 1.7970420434162564e-05, "loss": 0.5362, "step": 19544 }, { "epoch": 0.414519310300948, "grad_norm": 0.32547858357429504, "learning_rate": 1.7970219023959924e-05, "loss": 0.467, "step": 19545 }, { "epoch": 0.41454051875888104, "grad_norm": 0.38885295391082764, "learning_rate": 1.7970017604892874e-05, "loss": 0.6276, "step": 19546 }, { "epoch": 0.41456172721681406, "grad_norm": 0.3533923923969269, "learning_rate": 1.7969816176961638e-05, "loss": 0.5109, "step": 19547 }, { "epoch": 0.4145829356747471, "grad_norm": 0.33273202180862427, "learning_rate": 1.7969614740166446e-05, "loss": 0.5004, "step": 19548 }, { "epoch": 0.4146041441326801, "grad_norm": 0.3547632098197937, "learning_rate": 1.7969413294507515e-05, "loss": 0.5402, "step": 19549 }, { "epoch": 0.4146253525906131, "grad_norm": 0.3559402525424957, "learning_rate": 1.796921183998507e-05, "loss": 0.5766, "step": 19550 }, { "epoch": 0.41464656104854614, "grad_norm": 0.3922908902168274, "learning_rate": 1.796901037659934e-05, "loss": 0.4703, "step": 19551 }, { "epoch": 0.4146677695064792, "grad_norm": 0.32800212502479553, "learning_rate": 1.7968808904350547e-05, "loss": 0.431, "step": 19552 }, { "epoch": 0.41468897796441223, "grad_norm": 0.39628738164901733, "learning_rate": 1.7968607423238913e-05, "loss": 0.587, "step": 19553 }, { "epoch": 0.41471018642234525, "grad_norm": 0.30552244186401367, "learning_rate": 1.7968405933264665e-05, "loss": 0.5407, "step": 19554 }, { "epoch": 0.41473139488027827, "grad_norm": 0.34776175022125244, "learning_rate": 1.796820443442802e-05, "loss": 0.5398, "step": 19555 }, { "epoch": 0.4147526033382113, "grad_norm": 0.325324684381485, "learning_rate": 1.796800292672921e-05, "loss": 0.4655, "step": 19556 }, { "epoch": 0.4147738117961443, "grad_norm": 0.3423222005367279, "learning_rate": 1.7967801410168454e-05, "loss": 0.5074, "step": 19557 }, { "epoch": 0.41479502025407733, "grad_norm": 0.352258563041687, "learning_rate": 1.7967599884745982e-05, "loss": 0.5013, "step": 19558 }, { "epoch": 0.41481622871201035, "grad_norm": 0.3192698359489441, "learning_rate": 1.796739835046201e-05, "loss": 0.4451, "step": 19559 }, { "epoch": 0.41483743716994337, "grad_norm": 0.4083031415939331, "learning_rate": 1.796719680731677e-05, "loss": 0.5444, "step": 19560 }, { "epoch": 0.4148586456278764, "grad_norm": 0.3461759686470032, "learning_rate": 1.7966995255310485e-05, "loss": 0.4348, "step": 19561 }, { "epoch": 0.4148798540858094, "grad_norm": 0.3825940787792206, "learning_rate": 1.7966793694443373e-05, "loss": 0.4834, "step": 19562 }, { "epoch": 0.4149010625437424, "grad_norm": 0.37174659967422485, "learning_rate": 1.7966592124715664e-05, "loss": 0.5941, "step": 19563 }, { "epoch": 0.41492227100167545, "grad_norm": 0.38186079263687134, "learning_rate": 1.7966390546127583e-05, "loss": 0.4938, "step": 19564 }, { "epoch": 0.41494347945960847, "grad_norm": 0.35718172788619995, "learning_rate": 1.796618895867935e-05, "loss": 0.4691, "step": 19565 }, { "epoch": 0.41496468791754154, "grad_norm": 0.32416993379592896, "learning_rate": 1.796598736237119e-05, "loss": 0.4433, "step": 19566 }, { "epoch": 0.41498589637547456, "grad_norm": 0.36677199602127075, "learning_rate": 1.7965785757203327e-05, "loss": 0.569, "step": 19567 }, { "epoch": 0.4150071048334076, "grad_norm": 0.34511256217956543, "learning_rate": 1.7965584143175987e-05, "loss": 0.4575, "step": 19568 }, { "epoch": 0.4150283132913406, "grad_norm": 0.35063573718070984, "learning_rate": 1.796538252028939e-05, "loss": 0.5441, "step": 19569 }, { "epoch": 0.4150495217492736, "grad_norm": 0.31712961196899414, "learning_rate": 1.7965180888543773e-05, "loss": 0.4867, "step": 19570 }, { "epoch": 0.41507073020720664, "grad_norm": 0.3475269079208374, "learning_rate": 1.7964979247939345e-05, "loss": 0.566, "step": 19571 }, { "epoch": 0.41509193866513966, "grad_norm": 0.38330939412117004, "learning_rate": 1.7964777598476335e-05, "loss": 0.5898, "step": 19572 }, { "epoch": 0.4151131471230727, "grad_norm": 1.0336756706237793, "learning_rate": 1.7964575940154973e-05, "loss": 0.5155, "step": 19573 }, { "epoch": 0.4151343555810057, "grad_norm": 0.32659822702407837, "learning_rate": 1.7964374272975472e-05, "loss": 0.4771, "step": 19574 }, { "epoch": 0.4151555640389387, "grad_norm": 0.34688037633895874, "learning_rate": 1.796417259693807e-05, "loss": 0.5118, "step": 19575 }, { "epoch": 0.41517677249687174, "grad_norm": 0.403170108795166, "learning_rate": 1.796397091204298e-05, "loss": 0.5482, "step": 19576 }, { "epoch": 0.41519798095480476, "grad_norm": 0.3717224895954132, "learning_rate": 1.7963769218290434e-05, "loss": 0.5872, "step": 19577 }, { "epoch": 0.4152191894127378, "grad_norm": 0.33401134610176086, "learning_rate": 1.796356751568065e-05, "loss": 0.5548, "step": 19578 }, { "epoch": 0.4152403978706708, "grad_norm": 0.4004053771495819, "learning_rate": 1.796336580421386e-05, "loss": 0.4927, "step": 19579 }, { "epoch": 0.41526160632860387, "grad_norm": 0.32757997512817383, "learning_rate": 1.7963164083890278e-05, "loss": 0.5349, "step": 19580 }, { "epoch": 0.4152828147865369, "grad_norm": 0.3404613137245178, "learning_rate": 1.7962962354710137e-05, "loss": 0.5437, "step": 19581 }, { "epoch": 0.4153040232444699, "grad_norm": 0.4429648220539093, "learning_rate": 1.7962760616673658e-05, "loss": 0.5102, "step": 19582 }, { "epoch": 0.41532523170240293, "grad_norm": 0.4111568033695221, "learning_rate": 1.7962558869781066e-05, "loss": 0.4914, "step": 19583 }, { "epoch": 0.41534644016033595, "grad_norm": 0.3392505943775177, "learning_rate": 1.7962357114032588e-05, "loss": 0.5307, "step": 19584 }, { "epoch": 0.41536764861826897, "grad_norm": 0.31278038024902344, "learning_rate": 1.7962155349428442e-05, "loss": 0.5251, "step": 19585 }, { "epoch": 0.415388857076202, "grad_norm": 0.34643393754959106, "learning_rate": 1.7961953575968855e-05, "loss": 0.4561, "step": 19586 }, { "epoch": 0.415410065534135, "grad_norm": 0.3523736894130707, "learning_rate": 1.7961751793654056e-05, "loss": 0.539, "step": 19587 }, { "epoch": 0.41543127399206803, "grad_norm": 0.4818606376647949, "learning_rate": 1.7961550002484264e-05, "loss": 0.6252, "step": 19588 }, { "epoch": 0.41545248245000105, "grad_norm": 0.3497684895992279, "learning_rate": 1.7961348202459704e-05, "loss": 0.553, "step": 19589 }, { "epoch": 0.41547369090793407, "grad_norm": 0.3088866174221039, "learning_rate": 1.7961146393580604e-05, "loss": 0.4402, "step": 19590 }, { "epoch": 0.4154948993658671, "grad_norm": 0.3506222367286682, "learning_rate": 1.7960944575847187e-05, "loss": 0.5292, "step": 19591 }, { "epoch": 0.4155161078238001, "grad_norm": 0.3356415629386902, "learning_rate": 1.7960742749259674e-05, "loss": 0.5397, "step": 19592 }, { "epoch": 0.4155373162817332, "grad_norm": 0.3325943052768707, "learning_rate": 1.796054091381829e-05, "loss": 0.4434, "step": 19593 }, { "epoch": 0.4155585247396662, "grad_norm": 0.3555574119091034, "learning_rate": 1.7960339069523266e-05, "loss": 0.5194, "step": 19594 }, { "epoch": 0.4155797331975992, "grad_norm": 0.36482757329940796, "learning_rate": 1.796013721637482e-05, "loss": 0.497, "step": 19595 }, { "epoch": 0.41560094165553224, "grad_norm": 0.34633952379226685, "learning_rate": 1.795993535437318e-05, "loss": 0.5126, "step": 19596 }, { "epoch": 0.41562215011346526, "grad_norm": 0.3453832268714905, "learning_rate": 1.7959733483518567e-05, "loss": 0.4831, "step": 19597 }, { "epoch": 0.4156433585713983, "grad_norm": 0.37768182158470154, "learning_rate": 1.795953160381121e-05, "loss": 0.4883, "step": 19598 }, { "epoch": 0.4156645670293313, "grad_norm": 0.4253484606742859, "learning_rate": 1.795932971525133e-05, "loss": 0.4809, "step": 19599 }, { "epoch": 0.4156857754872643, "grad_norm": 0.3388165235519409, "learning_rate": 1.795912781783915e-05, "loss": 0.4157, "step": 19600 }, { "epoch": 0.41570698394519734, "grad_norm": 0.33621659874916077, "learning_rate": 1.79589259115749e-05, "loss": 0.5098, "step": 19601 }, { "epoch": 0.41572819240313036, "grad_norm": 0.3544735610485077, "learning_rate": 1.7958723996458803e-05, "loss": 0.5838, "step": 19602 }, { "epoch": 0.4157494008610634, "grad_norm": 0.3489391505718231, "learning_rate": 1.7958522072491076e-05, "loss": 0.4922, "step": 19603 }, { "epoch": 0.4157706093189964, "grad_norm": 0.42888617515563965, "learning_rate": 1.7958320139671957e-05, "loss": 0.5359, "step": 19604 }, { "epoch": 0.4157918177769294, "grad_norm": 0.3653029799461365, "learning_rate": 1.7958118198001662e-05, "loss": 0.4976, "step": 19605 }, { "epoch": 0.41581302623486244, "grad_norm": 0.33255475759506226, "learning_rate": 1.7957916247480415e-05, "loss": 0.5364, "step": 19606 }, { "epoch": 0.4158342346927955, "grad_norm": 0.46530866622924805, "learning_rate": 1.7957714288108442e-05, "loss": 0.4987, "step": 19607 }, { "epoch": 0.41585544315072853, "grad_norm": 0.40851885080337524, "learning_rate": 1.7957512319885972e-05, "loss": 0.5156, "step": 19608 }, { "epoch": 0.41587665160866155, "grad_norm": 0.3092123568058014, "learning_rate": 1.7957310342813223e-05, "loss": 0.5461, "step": 19609 }, { "epoch": 0.41589786006659457, "grad_norm": 0.34225475788116455, "learning_rate": 1.7957108356890424e-05, "loss": 0.4787, "step": 19610 }, { "epoch": 0.4159190685245276, "grad_norm": 0.34468552470207214, "learning_rate": 1.79569063621178e-05, "loss": 0.4839, "step": 19611 }, { "epoch": 0.4159402769824606, "grad_norm": 0.3695753812789917, "learning_rate": 1.795670435849557e-05, "loss": 0.534, "step": 19612 }, { "epoch": 0.41596148544039363, "grad_norm": 0.32156458497047424, "learning_rate": 1.7956502346023962e-05, "loss": 0.4918, "step": 19613 }, { "epoch": 0.41598269389832665, "grad_norm": 0.40894854068756104, "learning_rate": 1.7956300324703205e-05, "loss": 0.5557, "step": 19614 }, { "epoch": 0.41600390235625967, "grad_norm": 0.3458526134490967, "learning_rate": 1.795609829453352e-05, "loss": 0.5159, "step": 19615 }, { "epoch": 0.4160251108141927, "grad_norm": 0.31699880957603455, "learning_rate": 1.7955896255515126e-05, "loss": 0.447, "step": 19616 }, { "epoch": 0.4160463192721257, "grad_norm": 0.324470192193985, "learning_rate": 1.795569420764826e-05, "loss": 0.495, "step": 19617 }, { "epoch": 0.41606752773005873, "grad_norm": 0.3703017830848694, "learning_rate": 1.7955492150933132e-05, "loss": 0.5112, "step": 19618 }, { "epoch": 0.41608873618799175, "grad_norm": 0.3723715543746948, "learning_rate": 1.7955290085369984e-05, "loss": 0.5004, "step": 19619 }, { "epoch": 0.41610994464592477, "grad_norm": 0.3665456175804138, "learning_rate": 1.7955088010959025e-05, "loss": 0.5669, "step": 19620 }, { "epoch": 0.41613115310385784, "grad_norm": 0.348939448595047, "learning_rate": 1.7954885927700488e-05, "loss": 0.4688, "step": 19621 }, { "epoch": 0.41615236156179086, "grad_norm": 0.36435163021087646, "learning_rate": 1.79546838355946e-05, "loss": 0.4958, "step": 19622 }, { "epoch": 0.4161735700197239, "grad_norm": 0.30892443656921387, "learning_rate": 1.795448173464158e-05, "loss": 0.4637, "step": 19623 }, { "epoch": 0.4161947784776569, "grad_norm": 0.3465211093425751, "learning_rate": 1.795427962484165e-05, "loss": 0.5115, "step": 19624 }, { "epoch": 0.4162159869355899, "grad_norm": 0.3087579309940338, "learning_rate": 1.795407750619504e-05, "loss": 0.488, "step": 19625 }, { "epoch": 0.41623719539352294, "grad_norm": 0.32872894406318665, "learning_rate": 1.7953875378701976e-05, "loss": 0.4867, "step": 19626 }, { "epoch": 0.41625840385145596, "grad_norm": 0.31549152731895447, "learning_rate": 1.7953673242362682e-05, "loss": 0.4649, "step": 19627 }, { "epoch": 0.416279612309389, "grad_norm": 0.5480430722236633, "learning_rate": 1.795347109717738e-05, "loss": 0.5378, "step": 19628 }, { "epoch": 0.416300820767322, "grad_norm": 0.39132243394851685, "learning_rate": 1.79532689431463e-05, "loss": 0.5039, "step": 19629 }, { "epoch": 0.416322029225255, "grad_norm": 0.35387375950813293, "learning_rate": 1.7953066780269657e-05, "loss": 0.4672, "step": 19630 }, { "epoch": 0.41634323768318804, "grad_norm": 0.3681945502758026, "learning_rate": 1.7952864608547687e-05, "loss": 0.5387, "step": 19631 }, { "epoch": 0.41636444614112106, "grad_norm": 0.3776261806488037, "learning_rate": 1.795266242798061e-05, "loss": 0.5094, "step": 19632 }, { "epoch": 0.4163856545990541, "grad_norm": 0.33341100811958313, "learning_rate": 1.795246023856865e-05, "loss": 0.5431, "step": 19633 }, { "epoch": 0.41640686305698715, "grad_norm": 0.3169650435447693, "learning_rate": 1.7952258040312032e-05, "loss": 0.4884, "step": 19634 }, { "epoch": 0.4164280715149202, "grad_norm": 0.36060631275177, "learning_rate": 1.7952055833210983e-05, "loss": 0.5139, "step": 19635 }, { "epoch": 0.4164492799728532, "grad_norm": 0.37868013978004456, "learning_rate": 1.7951853617265724e-05, "loss": 0.5042, "step": 19636 }, { "epoch": 0.4164704884307862, "grad_norm": 0.3734830915927887, "learning_rate": 1.7951651392476486e-05, "loss": 0.4901, "step": 19637 }, { "epoch": 0.41649169688871923, "grad_norm": 0.5403239727020264, "learning_rate": 1.7951449158843492e-05, "loss": 0.5267, "step": 19638 }, { "epoch": 0.41651290534665225, "grad_norm": 0.44204574823379517, "learning_rate": 1.795124691636696e-05, "loss": 0.5453, "step": 19639 }, { "epoch": 0.41653411380458527, "grad_norm": 0.34604576230049133, "learning_rate": 1.795104466504712e-05, "loss": 0.4093, "step": 19640 }, { "epoch": 0.4165553222625183, "grad_norm": 0.3970840275287628, "learning_rate": 1.79508424048842e-05, "loss": 0.4886, "step": 19641 }, { "epoch": 0.4165765307204513, "grad_norm": 0.33523282408714294, "learning_rate": 1.795064013587842e-05, "loss": 0.578, "step": 19642 }, { "epoch": 0.41659773917838433, "grad_norm": 0.3432328999042511, "learning_rate": 1.795043785803001e-05, "loss": 0.5796, "step": 19643 }, { "epoch": 0.41661894763631735, "grad_norm": 0.36731284856796265, "learning_rate": 1.7950235571339192e-05, "loss": 0.5191, "step": 19644 }, { "epoch": 0.41664015609425037, "grad_norm": 0.345394492149353, "learning_rate": 1.7950033275806192e-05, "loss": 0.5367, "step": 19645 }, { "epoch": 0.4166613645521834, "grad_norm": 0.350813090801239, "learning_rate": 1.794983097143123e-05, "loss": 0.5069, "step": 19646 }, { "epoch": 0.4166825730101164, "grad_norm": 0.34543219208717346, "learning_rate": 1.7949628658214536e-05, "loss": 0.5271, "step": 19647 }, { "epoch": 0.4167037814680495, "grad_norm": 0.3714810311794281, "learning_rate": 1.7949426336156337e-05, "loss": 0.4728, "step": 19648 }, { "epoch": 0.4167249899259825, "grad_norm": 0.35583725571632385, "learning_rate": 1.7949224005256852e-05, "loss": 0.51, "step": 19649 }, { "epoch": 0.4167461983839155, "grad_norm": 0.3709428310394287, "learning_rate": 1.7949021665516312e-05, "loss": 0.5898, "step": 19650 }, { "epoch": 0.41676740684184854, "grad_norm": 0.4673735499382019, "learning_rate": 1.7948819316934937e-05, "loss": 0.5378, "step": 19651 }, { "epoch": 0.41678861529978156, "grad_norm": 0.4735090434551239, "learning_rate": 1.7948616959512956e-05, "loss": 0.5692, "step": 19652 }, { "epoch": 0.4168098237577146, "grad_norm": 0.3803693652153015, "learning_rate": 1.7948414593250588e-05, "loss": 0.4835, "step": 19653 }, { "epoch": 0.4168310322156476, "grad_norm": 0.3556317687034607, "learning_rate": 1.7948212218148065e-05, "loss": 0.4579, "step": 19654 }, { "epoch": 0.4168522406735806, "grad_norm": 0.4118269979953766, "learning_rate": 1.7948009834205612e-05, "loss": 0.5426, "step": 19655 }, { "epoch": 0.41687344913151364, "grad_norm": 0.38084349036216736, "learning_rate": 1.7947807441423452e-05, "loss": 0.5475, "step": 19656 }, { "epoch": 0.41689465758944666, "grad_norm": 0.5912991166114807, "learning_rate": 1.79476050398018e-05, "loss": 0.531, "step": 19657 }, { "epoch": 0.4169158660473797, "grad_norm": 0.3440474569797516, "learning_rate": 1.79474026293409e-05, "loss": 0.4113, "step": 19658 }, { "epoch": 0.4169370745053127, "grad_norm": 0.36345720291137695, "learning_rate": 1.7947200210040965e-05, "loss": 0.5396, "step": 19659 }, { "epoch": 0.4169582829632457, "grad_norm": 0.29417920112609863, "learning_rate": 1.7946997781902224e-05, "loss": 0.4441, "step": 19660 }, { "epoch": 0.41697949142117874, "grad_norm": 0.3625306785106659, "learning_rate": 1.79467953449249e-05, "loss": 0.4747, "step": 19661 }, { "epoch": 0.4170006998791118, "grad_norm": 0.5039672255516052, "learning_rate": 1.794659289910922e-05, "loss": 0.4457, "step": 19662 }, { "epoch": 0.41702190833704483, "grad_norm": 0.33946314454078674, "learning_rate": 1.7946390444455407e-05, "loss": 0.5242, "step": 19663 }, { "epoch": 0.41704311679497785, "grad_norm": 0.4696161448955536, "learning_rate": 1.7946187980963688e-05, "loss": 0.5145, "step": 19664 }, { "epoch": 0.4170643252529109, "grad_norm": 0.3337499797344208, "learning_rate": 1.794598550863429e-05, "loss": 0.4573, "step": 19665 }, { "epoch": 0.4170855337108439, "grad_norm": 0.3067191541194916, "learning_rate": 1.7945783027467432e-05, "loss": 0.4695, "step": 19666 }, { "epoch": 0.4171067421687769, "grad_norm": 0.3972409665584564, "learning_rate": 1.794558053746335e-05, "loss": 0.5423, "step": 19667 }, { "epoch": 0.41712795062670993, "grad_norm": 0.32719525694847107, "learning_rate": 1.7945378038622255e-05, "loss": 0.5073, "step": 19668 }, { "epoch": 0.41714915908464295, "grad_norm": 0.3495521545410156, "learning_rate": 1.7945175530944382e-05, "loss": 0.452, "step": 19669 }, { "epoch": 0.41717036754257597, "grad_norm": 0.3752425014972687, "learning_rate": 1.794497301442996e-05, "loss": 0.5209, "step": 19670 }, { "epoch": 0.417191576000509, "grad_norm": 0.39325645565986633, "learning_rate": 1.7944770489079198e-05, "loss": 0.5278, "step": 19671 }, { "epoch": 0.417212784458442, "grad_norm": 0.36608779430389404, "learning_rate": 1.7944567954892335e-05, "loss": 0.4665, "step": 19672 }, { "epoch": 0.41723399291637503, "grad_norm": 0.37169963121414185, "learning_rate": 1.7944365411869596e-05, "loss": 0.4408, "step": 19673 }, { "epoch": 0.41725520137430805, "grad_norm": 0.3638211488723755, "learning_rate": 1.79441628600112e-05, "loss": 0.5036, "step": 19674 }, { "epoch": 0.4172764098322411, "grad_norm": 0.37245258688926697, "learning_rate": 1.7943960299317374e-05, "loss": 0.5398, "step": 19675 }, { "epoch": 0.41729761829017414, "grad_norm": 0.35521379113197327, "learning_rate": 1.7943757729788346e-05, "loss": 0.4819, "step": 19676 }, { "epoch": 0.41731882674810716, "grad_norm": 0.39602652192115784, "learning_rate": 1.7943555151424336e-05, "loss": 0.5226, "step": 19677 }, { "epoch": 0.4173400352060402, "grad_norm": 0.3337404131889343, "learning_rate": 1.7943352564225577e-05, "loss": 0.4838, "step": 19678 }, { "epoch": 0.4173612436639732, "grad_norm": 0.3155215382575989, "learning_rate": 1.794314996819229e-05, "loss": 0.4461, "step": 19679 }, { "epoch": 0.4173824521219062, "grad_norm": 0.3544241487979889, "learning_rate": 1.7942947363324702e-05, "loss": 0.5366, "step": 19680 }, { "epoch": 0.41740366057983924, "grad_norm": 0.40165406465530396, "learning_rate": 1.7942744749623034e-05, "loss": 0.5596, "step": 19681 }, { "epoch": 0.41742486903777226, "grad_norm": 0.32727932929992676, "learning_rate": 1.7942542127087514e-05, "loss": 0.5078, "step": 19682 }, { "epoch": 0.4174460774957053, "grad_norm": 0.3387656807899475, "learning_rate": 1.7942339495718368e-05, "loss": 0.5328, "step": 19683 }, { "epoch": 0.4174672859536383, "grad_norm": 0.379647433757782, "learning_rate": 1.7942136855515824e-05, "loss": 0.4635, "step": 19684 }, { "epoch": 0.4174884944115713, "grad_norm": 0.31327953934669495, "learning_rate": 1.79419342064801e-05, "loss": 0.5062, "step": 19685 }, { "epoch": 0.41750970286950434, "grad_norm": 0.3394046127796173, "learning_rate": 1.794173154861143e-05, "loss": 0.5016, "step": 19686 }, { "epoch": 0.41753091132743736, "grad_norm": 0.49087968468666077, "learning_rate": 1.794152888191003e-05, "loss": 0.5743, "step": 19687 }, { "epoch": 0.4175521197853704, "grad_norm": 0.34915968775749207, "learning_rate": 1.7941326206376134e-05, "loss": 0.4531, "step": 19688 }, { "epoch": 0.41757332824330345, "grad_norm": 0.3809796869754791, "learning_rate": 1.7941123522009966e-05, "loss": 0.4636, "step": 19689 }, { "epoch": 0.4175945367012365, "grad_norm": 0.7674973607063293, "learning_rate": 1.7940920828811745e-05, "loss": 0.4367, "step": 19690 }, { "epoch": 0.4176157451591695, "grad_norm": 0.378863126039505, "learning_rate": 1.7940718126781705e-05, "loss": 0.5863, "step": 19691 }, { "epoch": 0.4176369536171025, "grad_norm": 0.396757572889328, "learning_rate": 1.7940515415920065e-05, "loss": 0.5004, "step": 19692 }, { "epoch": 0.41765816207503553, "grad_norm": 0.338558167219162, "learning_rate": 1.794031269622705e-05, "loss": 0.4432, "step": 19693 }, { "epoch": 0.41767937053296855, "grad_norm": 0.3556766211986542, "learning_rate": 1.7940109967702893e-05, "loss": 0.5206, "step": 19694 }, { "epoch": 0.41770057899090157, "grad_norm": 0.33136656880378723, "learning_rate": 1.793990723034781e-05, "loss": 0.4898, "step": 19695 }, { "epoch": 0.4177217874488346, "grad_norm": 0.36665409803390503, "learning_rate": 1.793970448416204e-05, "loss": 0.5412, "step": 19696 }, { "epoch": 0.4177429959067676, "grad_norm": 0.32660672068595886, "learning_rate": 1.793950172914579e-05, "loss": 0.4749, "step": 19697 }, { "epoch": 0.41776420436470063, "grad_norm": 0.4639304578304291, "learning_rate": 1.79392989652993e-05, "loss": 0.4769, "step": 19698 }, { "epoch": 0.41778541282263365, "grad_norm": 0.31493955850601196, "learning_rate": 1.7939096192622788e-05, "loss": 0.4643, "step": 19699 }, { "epoch": 0.41780662128056667, "grad_norm": 0.3571276366710663, "learning_rate": 1.7938893411116483e-05, "loss": 0.616, "step": 19700 }, { "epoch": 0.4178278297384997, "grad_norm": 0.40636691451072693, "learning_rate": 1.793869062078061e-05, "loss": 0.4839, "step": 19701 }, { "epoch": 0.41784903819643276, "grad_norm": 0.32946741580963135, "learning_rate": 1.7938487821615395e-05, "loss": 0.4459, "step": 19702 }, { "epoch": 0.4178702466543658, "grad_norm": 0.375415563583374, "learning_rate": 1.7938285013621062e-05, "loss": 0.5417, "step": 19703 }, { "epoch": 0.4178914551122988, "grad_norm": 0.3481428921222687, "learning_rate": 1.793808219679784e-05, "loss": 0.4647, "step": 19704 }, { "epoch": 0.4179126635702318, "grad_norm": 0.31526979804039, "learning_rate": 1.793787937114595e-05, "loss": 0.46, "step": 19705 }, { "epoch": 0.41793387202816484, "grad_norm": 0.32945993542671204, "learning_rate": 1.793767653666562e-05, "loss": 0.5055, "step": 19706 }, { "epoch": 0.41795508048609786, "grad_norm": 0.3183855712413788, "learning_rate": 1.7937473693357073e-05, "loss": 0.5108, "step": 19707 }, { "epoch": 0.4179762889440309, "grad_norm": 0.34837156534194946, "learning_rate": 1.7937270841220536e-05, "loss": 0.5593, "step": 19708 }, { "epoch": 0.4179974974019639, "grad_norm": 0.3647884428501129, "learning_rate": 1.793706798025624e-05, "loss": 0.4463, "step": 19709 }, { "epoch": 0.4180187058598969, "grad_norm": 0.35522323846817017, "learning_rate": 1.79368651104644e-05, "loss": 0.5777, "step": 19710 }, { "epoch": 0.41803991431782994, "grad_norm": 0.3688429296016693, "learning_rate": 1.7936662231845254e-05, "loss": 0.5417, "step": 19711 }, { "epoch": 0.41806112277576296, "grad_norm": 0.3584803640842438, "learning_rate": 1.7936459344399017e-05, "loss": 0.4834, "step": 19712 }, { "epoch": 0.418082331233696, "grad_norm": 0.33154168725013733, "learning_rate": 1.793625644812592e-05, "loss": 0.4072, "step": 19713 }, { "epoch": 0.418103539691629, "grad_norm": 0.3325745761394501, "learning_rate": 1.7936053543026186e-05, "loss": 0.5104, "step": 19714 }, { "epoch": 0.418124748149562, "grad_norm": 0.32608839869499207, "learning_rate": 1.7935850629100046e-05, "loss": 0.477, "step": 19715 }, { "epoch": 0.4181459566074951, "grad_norm": 0.39989373087882996, "learning_rate": 1.7935647706347717e-05, "loss": 0.4968, "step": 19716 }, { "epoch": 0.4181671650654281, "grad_norm": 0.3621653616428375, "learning_rate": 1.7935444774769434e-05, "loss": 0.424, "step": 19717 }, { "epoch": 0.41818837352336113, "grad_norm": 0.4685491621494293, "learning_rate": 1.7935241834365413e-05, "loss": 0.4721, "step": 19718 }, { "epoch": 0.41820958198129415, "grad_norm": 0.4264146089553833, "learning_rate": 1.793503888513589e-05, "loss": 0.6058, "step": 19719 }, { "epoch": 0.4182307904392272, "grad_norm": 0.3790552020072937, "learning_rate": 1.7934835927081083e-05, "loss": 0.565, "step": 19720 }, { "epoch": 0.4182519988971602, "grad_norm": 0.3245948553085327, "learning_rate": 1.793463296020122e-05, "loss": 0.4912, "step": 19721 }, { "epoch": 0.4182732073550932, "grad_norm": 0.40563997626304626, "learning_rate": 1.793442998449653e-05, "loss": 0.547, "step": 19722 }, { "epoch": 0.41829441581302623, "grad_norm": 0.35571131110191345, "learning_rate": 1.7934226999967236e-05, "loss": 0.3763, "step": 19723 }, { "epoch": 0.41831562427095925, "grad_norm": 0.4162479639053345, "learning_rate": 1.7934024006613562e-05, "loss": 0.5284, "step": 19724 }, { "epoch": 0.41833683272889227, "grad_norm": 0.3374418616294861, "learning_rate": 1.7933821004435734e-05, "loss": 0.4123, "step": 19725 }, { "epoch": 0.4183580411868253, "grad_norm": 0.35577115416526794, "learning_rate": 1.793361799343398e-05, "loss": 0.4866, "step": 19726 }, { "epoch": 0.4183792496447583, "grad_norm": 0.36395201086997986, "learning_rate": 1.7933414973608525e-05, "loss": 0.4896, "step": 19727 }, { "epoch": 0.41840045810269133, "grad_norm": 0.35853254795074463, "learning_rate": 1.7933211944959595e-05, "loss": 0.5086, "step": 19728 }, { "epoch": 0.41842166656062435, "grad_norm": 0.3813641369342804, "learning_rate": 1.7933008907487418e-05, "loss": 0.5551, "step": 19729 }, { "epoch": 0.4184428750185574, "grad_norm": 0.3952369689941406, "learning_rate": 1.7932805861192217e-05, "loss": 0.4838, "step": 19730 }, { "epoch": 0.41846408347649044, "grad_norm": 0.3655197024345398, "learning_rate": 1.7932602806074216e-05, "loss": 0.596, "step": 19731 }, { "epoch": 0.41848529193442346, "grad_norm": 0.3375152349472046, "learning_rate": 1.793239974213364e-05, "loss": 0.4376, "step": 19732 }, { "epoch": 0.4185065003923565, "grad_norm": 0.3153829872608185, "learning_rate": 1.7932196669370724e-05, "loss": 0.4505, "step": 19733 }, { "epoch": 0.4185277088502895, "grad_norm": 0.35350221395492554, "learning_rate": 1.7931993587785684e-05, "loss": 0.4818, "step": 19734 }, { "epoch": 0.4185489173082225, "grad_norm": 0.34817031025886536, "learning_rate": 1.7931790497378752e-05, "loss": 0.4734, "step": 19735 }, { "epoch": 0.41857012576615554, "grad_norm": 0.3049003481864929, "learning_rate": 1.793158739815015e-05, "loss": 0.436, "step": 19736 }, { "epoch": 0.41859133422408856, "grad_norm": 0.5373028516769409, "learning_rate": 1.7931384290100106e-05, "loss": 0.476, "step": 19737 }, { "epoch": 0.4186125426820216, "grad_norm": 0.34403902292251587, "learning_rate": 1.7931181173228846e-05, "loss": 0.4623, "step": 19738 }, { "epoch": 0.4186337511399546, "grad_norm": 0.3483743667602539, "learning_rate": 1.7930978047536595e-05, "loss": 0.4861, "step": 19739 }, { "epoch": 0.4186549595978876, "grad_norm": 0.3635016679763794, "learning_rate": 1.793077491302358e-05, "loss": 0.5878, "step": 19740 }, { "epoch": 0.41867616805582064, "grad_norm": 0.3285565972328186, "learning_rate": 1.7930571769690025e-05, "loss": 0.4877, "step": 19741 }, { "epoch": 0.41869737651375366, "grad_norm": 0.3924245834350586, "learning_rate": 1.7930368617536156e-05, "loss": 0.512, "step": 19742 }, { "epoch": 0.41871858497168674, "grad_norm": 0.33694615960121155, "learning_rate": 1.79301654565622e-05, "loss": 0.4981, "step": 19743 }, { "epoch": 0.41873979342961976, "grad_norm": 0.38187816739082336, "learning_rate": 1.7929962286768383e-05, "loss": 0.4827, "step": 19744 }, { "epoch": 0.4187610018875528, "grad_norm": 0.33104726672172546, "learning_rate": 1.792975910815493e-05, "loss": 0.5255, "step": 19745 }, { "epoch": 0.4187822103454858, "grad_norm": 0.3591631352901459, "learning_rate": 1.792955592072207e-05, "loss": 0.5555, "step": 19746 }, { "epoch": 0.4188034188034188, "grad_norm": 0.3786275088787079, "learning_rate": 1.7929352724470028e-05, "loss": 0.5173, "step": 19747 }, { "epoch": 0.41882462726135183, "grad_norm": 0.3282146453857422, "learning_rate": 1.7929149519399023e-05, "loss": 0.492, "step": 19748 }, { "epoch": 0.41884583571928485, "grad_norm": 0.36935746669769287, "learning_rate": 1.7928946305509293e-05, "loss": 0.5429, "step": 19749 }, { "epoch": 0.4188670441772179, "grad_norm": 0.36361831426620483, "learning_rate": 1.7928743082801053e-05, "loss": 0.4551, "step": 19750 }, { "epoch": 0.4188882526351509, "grad_norm": 0.37909576296806335, "learning_rate": 1.7928539851274535e-05, "loss": 0.5912, "step": 19751 }, { "epoch": 0.4189094610930839, "grad_norm": 0.33070889115333557, "learning_rate": 1.7928336610929968e-05, "loss": 0.5572, "step": 19752 }, { "epoch": 0.41893066955101693, "grad_norm": 0.30987945199012756, "learning_rate": 1.792813336176757e-05, "loss": 0.4582, "step": 19753 }, { "epoch": 0.41895187800894995, "grad_norm": 0.3192843794822693, "learning_rate": 1.792793010378757e-05, "loss": 0.4616, "step": 19754 }, { "epoch": 0.41897308646688297, "grad_norm": 0.3384086787700653, "learning_rate": 1.7927726836990195e-05, "loss": 0.4964, "step": 19755 }, { "epoch": 0.418994294924816, "grad_norm": 0.34877917170524597, "learning_rate": 1.7927523561375677e-05, "loss": 0.4938, "step": 19756 }, { "epoch": 0.41901550338274907, "grad_norm": 0.35475054383277893, "learning_rate": 1.792732027694423e-05, "loss": 0.518, "step": 19757 }, { "epoch": 0.4190367118406821, "grad_norm": 0.36612239480018616, "learning_rate": 1.7927116983696086e-05, "loss": 0.5691, "step": 19758 }, { "epoch": 0.4190579202986151, "grad_norm": 0.32564616203308105, "learning_rate": 1.7926913681631477e-05, "loss": 0.4679, "step": 19759 }, { "epoch": 0.4190791287565481, "grad_norm": 0.33432263135910034, "learning_rate": 1.792671037075062e-05, "loss": 0.4665, "step": 19760 }, { "epoch": 0.41910033721448114, "grad_norm": 0.31628987193107605, "learning_rate": 1.7926507051053743e-05, "loss": 0.4201, "step": 19761 }, { "epoch": 0.41912154567241416, "grad_norm": 0.35071277618408203, "learning_rate": 1.7926303722541076e-05, "loss": 0.4718, "step": 19762 }, { "epoch": 0.4191427541303472, "grad_norm": 0.3642025291919708, "learning_rate": 1.7926100385212842e-05, "loss": 0.5279, "step": 19763 }, { "epoch": 0.4191639625882802, "grad_norm": 0.5012632012367249, "learning_rate": 1.7925897039069265e-05, "loss": 0.5179, "step": 19764 }, { "epoch": 0.4191851710462132, "grad_norm": 0.3399650752544403, "learning_rate": 1.7925693684110582e-05, "loss": 0.5586, "step": 19765 }, { "epoch": 0.41920637950414624, "grad_norm": 0.3898453116416931, "learning_rate": 1.7925490320337004e-05, "loss": 0.5374, "step": 19766 }, { "epoch": 0.41922758796207926, "grad_norm": 0.3482770025730133, "learning_rate": 1.7925286947748767e-05, "loss": 0.4529, "step": 19767 }, { "epoch": 0.4192487964200123, "grad_norm": 0.34679919481277466, "learning_rate": 1.79250835663461e-05, "loss": 0.482, "step": 19768 }, { "epoch": 0.4192700048779453, "grad_norm": 0.3841509521007538, "learning_rate": 1.7924880176129216e-05, "loss": 0.5927, "step": 19769 }, { "epoch": 0.4192912133358783, "grad_norm": 0.3312162458896637, "learning_rate": 1.7924676777098352e-05, "loss": 0.5129, "step": 19770 }, { "epoch": 0.4193124217938114, "grad_norm": 0.3979128897190094, "learning_rate": 1.792447336925373e-05, "loss": 0.4843, "step": 19771 }, { "epoch": 0.4193336302517444, "grad_norm": 0.34753313660621643, "learning_rate": 1.792426995259558e-05, "loss": 0.5909, "step": 19772 }, { "epoch": 0.41935483870967744, "grad_norm": 0.34368082880973816, "learning_rate": 1.7924066527124124e-05, "loss": 0.5519, "step": 19773 }, { "epoch": 0.41937604716761046, "grad_norm": 0.34972473978996277, "learning_rate": 1.792386309283959e-05, "loss": 0.5023, "step": 19774 }, { "epoch": 0.4193972556255435, "grad_norm": 0.3876817524433136, "learning_rate": 1.7923659649742203e-05, "loss": 0.5974, "step": 19775 }, { "epoch": 0.4194184640834765, "grad_norm": 0.3393835425376892, "learning_rate": 1.7923456197832197e-05, "loss": 0.4935, "step": 19776 }, { "epoch": 0.4194396725414095, "grad_norm": 0.3104446530342102, "learning_rate": 1.7923252737109786e-05, "loss": 0.5111, "step": 19777 }, { "epoch": 0.41946088099934253, "grad_norm": 0.34168121218681335, "learning_rate": 1.7923049267575203e-05, "loss": 0.4896, "step": 19778 }, { "epoch": 0.41948208945727555, "grad_norm": 0.3855968713760376, "learning_rate": 1.7922845789228675e-05, "loss": 0.6024, "step": 19779 }, { "epoch": 0.4195032979152086, "grad_norm": 0.7689988017082214, "learning_rate": 1.7922642302070423e-05, "loss": 0.5778, "step": 19780 }, { "epoch": 0.4195245063731416, "grad_norm": 0.31917738914489746, "learning_rate": 1.792243880610068e-05, "loss": 0.4486, "step": 19781 }, { "epoch": 0.4195457148310746, "grad_norm": 0.3677181601524353, "learning_rate": 1.792223530131967e-05, "loss": 0.4796, "step": 19782 }, { "epoch": 0.41956692328900763, "grad_norm": 0.3901415765285492, "learning_rate": 1.7922031787727617e-05, "loss": 0.5153, "step": 19783 }, { "epoch": 0.4195881317469407, "grad_norm": 0.33173251152038574, "learning_rate": 1.792182826532475e-05, "loss": 0.4972, "step": 19784 }, { "epoch": 0.4196093402048737, "grad_norm": 0.32905226945877075, "learning_rate": 1.7921624734111292e-05, "loss": 0.5192, "step": 19785 }, { "epoch": 0.41963054866280675, "grad_norm": 0.34341761469841003, "learning_rate": 1.7921421194087474e-05, "loss": 0.468, "step": 19786 }, { "epoch": 0.41965175712073977, "grad_norm": 0.3205842077732086, "learning_rate": 1.7921217645253525e-05, "loss": 0.5025, "step": 19787 }, { "epoch": 0.4196729655786728, "grad_norm": 0.3390223979949951, "learning_rate": 1.792101408760966e-05, "loss": 0.4567, "step": 19788 }, { "epoch": 0.4196941740366058, "grad_norm": 0.39402496814727783, "learning_rate": 1.7920810521156113e-05, "loss": 0.5393, "step": 19789 }, { "epoch": 0.4197153824945388, "grad_norm": 0.34196409583091736, "learning_rate": 1.7920606945893112e-05, "loss": 0.4608, "step": 19790 }, { "epoch": 0.41973659095247184, "grad_norm": 0.3421382009983063, "learning_rate": 1.7920403361820878e-05, "loss": 0.5164, "step": 19791 }, { "epoch": 0.41975779941040486, "grad_norm": 0.3302696645259857, "learning_rate": 1.7920199768939642e-05, "loss": 0.3845, "step": 19792 }, { "epoch": 0.4197790078683379, "grad_norm": 0.33763912320137024, "learning_rate": 1.7919996167249627e-05, "loss": 0.5446, "step": 19793 }, { "epoch": 0.4198002163262709, "grad_norm": 0.3359851837158203, "learning_rate": 1.7919792556751062e-05, "loss": 0.475, "step": 19794 }, { "epoch": 0.4198214247842039, "grad_norm": 0.37657827138900757, "learning_rate": 1.791958893744417e-05, "loss": 0.5614, "step": 19795 }, { "epoch": 0.41984263324213694, "grad_norm": 0.3428723216056824, "learning_rate": 1.7919385309329184e-05, "loss": 0.5048, "step": 19796 }, { "epoch": 0.41986384170006996, "grad_norm": 0.3633304834365845, "learning_rate": 1.7919181672406325e-05, "loss": 0.5303, "step": 19797 }, { "epoch": 0.41988505015800304, "grad_norm": 0.32562363147735596, "learning_rate": 1.791897802667582e-05, "loss": 0.5888, "step": 19798 }, { "epoch": 0.41990625861593606, "grad_norm": 0.3623276352882385, "learning_rate": 1.79187743721379e-05, "loss": 0.579, "step": 19799 }, { "epoch": 0.4199274670738691, "grad_norm": 0.3531692624092102, "learning_rate": 1.7918570708792785e-05, "loss": 0.4576, "step": 19800 }, { "epoch": 0.4199486755318021, "grad_norm": 0.30832451581954956, "learning_rate": 1.79183670366407e-05, "loss": 0.4787, "step": 19801 }, { "epoch": 0.4199698839897351, "grad_norm": 0.35262635350227356, "learning_rate": 1.791816335568188e-05, "loss": 0.5226, "step": 19802 }, { "epoch": 0.41999109244766814, "grad_norm": 0.3311700224876404, "learning_rate": 1.791795966591655e-05, "loss": 0.4598, "step": 19803 }, { "epoch": 0.42001230090560115, "grad_norm": 0.3571566939353943, "learning_rate": 1.791775596734493e-05, "loss": 0.4868, "step": 19804 }, { "epoch": 0.4200335093635342, "grad_norm": 0.3405310809612274, "learning_rate": 1.7917552259967254e-05, "loss": 0.4257, "step": 19805 }, { "epoch": 0.4200547178214672, "grad_norm": 0.3826746642589569, "learning_rate": 1.7917348543783744e-05, "loss": 0.4269, "step": 19806 }, { "epoch": 0.4200759262794002, "grad_norm": 0.47019118070602417, "learning_rate": 1.791714481879463e-05, "loss": 0.5281, "step": 19807 }, { "epoch": 0.42009713473733323, "grad_norm": 0.5429978370666504, "learning_rate": 1.7916941085000132e-05, "loss": 0.4877, "step": 19808 }, { "epoch": 0.42011834319526625, "grad_norm": 0.34601420164108276, "learning_rate": 1.7916737342400484e-05, "loss": 0.4991, "step": 19809 }, { "epoch": 0.4201395516531993, "grad_norm": 0.4109237790107727, "learning_rate": 1.7916533590995904e-05, "loss": 0.5662, "step": 19810 }, { "epoch": 0.4201607601111323, "grad_norm": 0.3521471321582794, "learning_rate": 1.791632983078663e-05, "loss": 0.5197, "step": 19811 }, { "epoch": 0.42018196856906537, "grad_norm": 0.3511447310447693, "learning_rate": 1.7916126061772884e-05, "loss": 0.5366, "step": 19812 }, { "epoch": 0.4202031770269984, "grad_norm": 0.34514641761779785, "learning_rate": 1.7915922283954886e-05, "loss": 0.4999, "step": 19813 }, { "epoch": 0.4202243854849314, "grad_norm": 0.34974589943885803, "learning_rate": 1.791571849733287e-05, "loss": 0.4436, "step": 19814 }, { "epoch": 0.4202455939428644, "grad_norm": 0.34027209877967834, "learning_rate": 1.7915514701907062e-05, "loss": 0.4702, "step": 19815 }, { "epoch": 0.42026680240079745, "grad_norm": 0.3412225544452667, "learning_rate": 1.7915310897677686e-05, "loss": 0.4048, "step": 19816 }, { "epoch": 0.42028801085873047, "grad_norm": 0.34455153346061707, "learning_rate": 1.7915107084644968e-05, "loss": 0.4429, "step": 19817 }, { "epoch": 0.4203092193166635, "grad_norm": 0.31796297430992126, "learning_rate": 1.791490326280914e-05, "loss": 0.4817, "step": 19818 }, { "epoch": 0.4203304277745965, "grad_norm": 0.33428114652633667, "learning_rate": 1.7914699432170427e-05, "loss": 0.5334, "step": 19819 }, { "epoch": 0.4203516362325295, "grad_norm": 0.33179718255996704, "learning_rate": 1.7914495592729047e-05, "loss": 0.4682, "step": 19820 }, { "epoch": 0.42037284469046254, "grad_norm": 0.32816335558891296, "learning_rate": 1.791429174448524e-05, "loss": 0.4844, "step": 19821 }, { "epoch": 0.42039405314839556, "grad_norm": 1.4734026193618774, "learning_rate": 1.7914087887439224e-05, "loss": 0.5849, "step": 19822 }, { "epoch": 0.4204152616063286, "grad_norm": 0.3301876485347748, "learning_rate": 1.791388402159123e-05, "loss": 0.5448, "step": 19823 }, { "epoch": 0.4204364700642616, "grad_norm": 0.3179549276828766, "learning_rate": 1.791368014694148e-05, "loss": 0.5226, "step": 19824 }, { "epoch": 0.4204576785221947, "grad_norm": 0.36225131154060364, "learning_rate": 1.791347626349021e-05, "loss": 0.5584, "step": 19825 }, { "epoch": 0.4204788869801277, "grad_norm": 0.3072134256362915, "learning_rate": 1.7913272371237634e-05, "loss": 0.4688, "step": 19826 }, { "epoch": 0.4205000954380607, "grad_norm": 0.33363139629364014, "learning_rate": 1.7913068470183986e-05, "loss": 0.4619, "step": 19827 }, { "epoch": 0.42052130389599374, "grad_norm": 0.33241963386535645, "learning_rate": 1.7912864560329494e-05, "loss": 0.5088, "step": 19828 }, { "epoch": 0.42054251235392676, "grad_norm": 0.3510398864746094, "learning_rate": 1.791266064167438e-05, "loss": 0.4445, "step": 19829 }, { "epoch": 0.4205637208118598, "grad_norm": 0.3369416296482086, "learning_rate": 1.7912456714218877e-05, "loss": 0.5221, "step": 19830 }, { "epoch": 0.4205849292697928, "grad_norm": 0.39508453011512756, "learning_rate": 1.7912252777963207e-05, "loss": 0.5651, "step": 19831 }, { "epoch": 0.4206061377277258, "grad_norm": 0.3749464154243469, "learning_rate": 1.7912048832907597e-05, "loss": 0.4819, "step": 19832 }, { "epoch": 0.42062734618565883, "grad_norm": 0.3430537283420563, "learning_rate": 1.7911844879052277e-05, "loss": 0.5144, "step": 19833 }, { "epoch": 0.42064855464359185, "grad_norm": 0.34361743927001953, "learning_rate": 1.7911640916397467e-05, "loss": 0.5703, "step": 19834 }, { "epoch": 0.4206697631015249, "grad_norm": 0.3485177457332611, "learning_rate": 1.7911436944943404e-05, "loss": 0.558, "step": 19835 }, { "epoch": 0.4206909715594579, "grad_norm": 0.31337982416152954, "learning_rate": 1.791123296469031e-05, "loss": 0.4199, "step": 19836 }, { "epoch": 0.4207121800173909, "grad_norm": 0.34590011835098267, "learning_rate": 1.791102897563841e-05, "loss": 0.5216, "step": 19837 }, { "epoch": 0.42073338847532393, "grad_norm": 0.3451533615589142, "learning_rate": 1.791082497778793e-05, "loss": 0.4878, "step": 19838 }, { "epoch": 0.420754596933257, "grad_norm": 0.3275037109851837, "learning_rate": 1.7910620971139102e-05, "loss": 0.517, "step": 19839 }, { "epoch": 0.42077580539119, "grad_norm": 0.3384920358657837, "learning_rate": 1.791041695569215e-05, "loss": 0.511, "step": 19840 }, { "epoch": 0.42079701384912305, "grad_norm": 0.3336089551448822, "learning_rate": 1.7910212931447298e-05, "loss": 0.4959, "step": 19841 }, { "epoch": 0.42081822230705607, "grad_norm": 0.35302412509918213, "learning_rate": 1.791000889840478e-05, "loss": 0.399, "step": 19842 }, { "epoch": 0.4208394307649891, "grad_norm": 0.337022602558136, "learning_rate": 1.7909804856564816e-05, "loss": 0.5106, "step": 19843 }, { "epoch": 0.4208606392229221, "grad_norm": 0.3621288537979126, "learning_rate": 1.7909600805927633e-05, "loss": 0.45, "step": 19844 }, { "epoch": 0.4208818476808551, "grad_norm": 0.30425044894218445, "learning_rate": 1.7909396746493464e-05, "loss": 0.396, "step": 19845 }, { "epoch": 0.42090305613878815, "grad_norm": 0.3358897268772125, "learning_rate": 1.7909192678262533e-05, "loss": 0.5115, "step": 19846 }, { "epoch": 0.42092426459672116, "grad_norm": 0.335918664932251, "learning_rate": 1.790898860123507e-05, "loss": 0.4226, "step": 19847 }, { "epoch": 0.4209454730546542, "grad_norm": 0.4469450116157532, "learning_rate": 1.7908784515411295e-05, "loss": 0.5084, "step": 19848 }, { "epoch": 0.4209666815125872, "grad_norm": 0.4739089608192444, "learning_rate": 1.790858042079144e-05, "loss": 0.5532, "step": 19849 }, { "epoch": 0.4209878899705202, "grad_norm": 0.35084813833236694, "learning_rate": 1.7908376317375728e-05, "loss": 0.4862, "step": 19850 }, { "epoch": 0.42100909842845324, "grad_norm": 0.3393004536628723, "learning_rate": 1.790817220516439e-05, "loss": 0.4672, "step": 19851 }, { "epoch": 0.4210303068863863, "grad_norm": 0.393718421459198, "learning_rate": 1.790796808415765e-05, "loss": 0.446, "step": 19852 }, { "epoch": 0.42105151534431934, "grad_norm": 0.33250200748443604, "learning_rate": 1.790776395435574e-05, "loss": 0.485, "step": 19853 }, { "epoch": 0.42107272380225236, "grad_norm": 0.4314735531806946, "learning_rate": 1.790755981575888e-05, "loss": 0.4938, "step": 19854 }, { "epoch": 0.4210939322601854, "grad_norm": 0.36031126976013184, "learning_rate": 1.7907355668367303e-05, "loss": 0.5054, "step": 19855 }, { "epoch": 0.4211151407181184, "grad_norm": 0.34194543957710266, "learning_rate": 1.7907151512181233e-05, "loss": 0.4348, "step": 19856 }, { "epoch": 0.4211363491760514, "grad_norm": 0.31832462549209595, "learning_rate": 1.7906947347200897e-05, "loss": 0.5281, "step": 19857 }, { "epoch": 0.42115755763398444, "grad_norm": 0.35095474123954773, "learning_rate": 1.7906743173426523e-05, "loss": 0.5347, "step": 19858 }, { "epoch": 0.42117876609191746, "grad_norm": 0.40460094809532166, "learning_rate": 1.7906538990858338e-05, "loss": 0.5714, "step": 19859 }, { "epoch": 0.4211999745498505, "grad_norm": 0.3620831072330475, "learning_rate": 1.790633479949657e-05, "loss": 0.4526, "step": 19860 }, { "epoch": 0.4212211830077835, "grad_norm": 0.36802175641059875, "learning_rate": 1.7906130599341445e-05, "loss": 0.4816, "step": 19861 }, { "epoch": 0.4212423914657165, "grad_norm": 0.33570194244384766, "learning_rate": 1.790592639039319e-05, "loss": 0.5826, "step": 19862 }, { "epoch": 0.42126359992364953, "grad_norm": 0.5304481983184814, "learning_rate": 1.790572217265203e-05, "loss": 0.5172, "step": 19863 }, { "epoch": 0.42128480838158255, "grad_norm": 0.40630805492401123, "learning_rate": 1.7905517946118197e-05, "loss": 0.5431, "step": 19864 }, { "epoch": 0.4213060168395156, "grad_norm": 0.36737310886383057, "learning_rate": 1.7905313710791918e-05, "loss": 0.4288, "step": 19865 }, { "epoch": 0.42132722529744865, "grad_norm": 0.3380357325077057, "learning_rate": 1.7905109466673413e-05, "loss": 0.4846, "step": 19866 }, { "epoch": 0.42134843375538167, "grad_norm": 0.3361911475658417, "learning_rate": 1.790490521376292e-05, "loss": 0.4863, "step": 19867 }, { "epoch": 0.4213696422133147, "grad_norm": 0.6913533210754395, "learning_rate": 1.7904700952060653e-05, "loss": 0.5007, "step": 19868 }, { "epoch": 0.4213908506712477, "grad_norm": 0.3167664408683777, "learning_rate": 1.790449668156685e-05, "loss": 0.5017, "step": 19869 }, { "epoch": 0.4214120591291807, "grad_norm": 0.3684597611427307, "learning_rate": 1.7904292402281732e-05, "loss": 0.5283, "step": 19870 }, { "epoch": 0.42143326758711375, "grad_norm": 0.35270777344703674, "learning_rate": 1.790408811420553e-05, "loss": 0.4615, "step": 19871 }, { "epoch": 0.42145447604504677, "grad_norm": 0.35010242462158203, "learning_rate": 1.7903883817338473e-05, "loss": 0.4768, "step": 19872 }, { "epoch": 0.4214756845029798, "grad_norm": 0.3719097971916199, "learning_rate": 1.790367951168078e-05, "loss": 0.4761, "step": 19873 }, { "epoch": 0.4214968929609128, "grad_norm": 0.3787740170955658, "learning_rate": 1.7903475197232686e-05, "loss": 0.4662, "step": 19874 }, { "epoch": 0.4215181014188458, "grad_norm": 0.35871538519859314, "learning_rate": 1.7903270873994416e-05, "loss": 0.5807, "step": 19875 }, { "epoch": 0.42153930987677884, "grad_norm": 0.47957462072372437, "learning_rate": 1.7903066541966196e-05, "loss": 0.4229, "step": 19876 }, { "epoch": 0.42156051833471186, "grad_norm": 0.33551040291786194, "learning_rate": 1.7902862201148255e-05, "loss": 0.4445, "step": 19877 }, { "epoch": 0.4215817267926449, "grad_norm": 0.4257577955722809, "learning_rate": 1.7902657851540816e-05, "loss": 0.5521, "step": 19878 }, { "epoch": 0.4216029352505779, "grad_norm": 0.3181706964969635, "learning_rate": 1.7902453493144115e-05, "loss": 0.4505, "step": 19879 }, { "epoch": 0.421624143708511, "grad_norm": 0.35400304198265076, "learning_rate": 1.790224912595837e-05, "loss": 0.4902, "step": 19880 }, { "epoch": 0.421645352166444, "grad_norm": 0.346880704164505, "learning_rate": 1.7902044749983815e-05, "loss": 0.5361, "step": 19881 }, { "epoch": 0.421666560624377, "grad_norm": 0.3231479823589325, "learning_rate": 1.790184036522067e-05, "loss": 0.4686, "step": 19882 }, { "epoch": 0.42168776908231004, "grad_norm": 0.3406478464603424, "learning_rate": 1.790163597166917e-05, "loss": 0.548, "step": 19883 }, { "epoch": 0.42170897754024306, "grad_norm": 0.36502912640571594, "learning_rate": 1.790143156932954e-05, "loss": 0.5041, "step": 19884 }, { "epoch": 0.4217301859981761, "grad_norm": 0.29313144087791443, "learning_rate": 1.7901227158202005e-05, "loss": 0.4917, "step": 19885 }, { "epoch": 0.4217513944561091, "grad_norm": 0.3124557137489319, "learning_rate": 1.7901022738286792e-05, "loss": 0.4599, "step": 19886 }, { "epoch": 0.4217726029140421, "grad_norm": 0.5874635577201843, "learning_rate": 1.790081830958413e-05, "loss": 0.5444, "step": 19887 }, { "epoch": 0.42179381137197514, "grad_norm": 0.36384424567222595, "learning_rate": 1.7900613872094248e-05, "loss": 0.4943, "step": 19888 }, { "epoch": 0.42181501982990816, "grad_norm": 0.38414981961250305, "learning_rate": 1.7900409425817374e-05, "loss": 0.5556, "step": 19889 }, { "epoch": 0.4218362282878412, "grad_norm": 0.3126286566257477, "learning_rate": 1.7900204970753733e-05, "loss": 0.4879, "step": 19890 }, { "epoch": 0.4218574367457742, "grad_norm": 0.34220945835113525, "learning_rate": 1.790000050690355e-05, "loss": 0.5133, "step": 19891 }, { "epoch": 0.4218786452037072, "grad_norm": 0.3144901990890503, "learning_rate": 1.7899796034267053e-05, "loss": 0.517, "step": 19892 }, { "epoch": 0.4218998536616403, "grad_norm": 0.3732508718967438, "learning_rate": 1.789959155284448e-05, "loss": 0.4798, "step": 19893 }, { "epoch": 0.4219210621195733, "grad_norm": 0.35636353492736816, "learning_rate": 1.789938706263604e-05, "loss": 0.5283, "step": 19894 }, { "epoch": 0.42194227057750633, "grad_norm": 0.32329434156417847, "learning_rate": 1.7899182563641978e-05, "loss": 0.5348, "step": 19895 }, { "epoch": 0.42196347903543935, "grad_norm": 0.3368864059448242, "learning_rate": 1.789897805586251e-05, "loss": 0.5167, "step": 19896 }, { "epoch": 0.42198468749337237, "grad_norm": 0.35779768228530884, "learning_rate": 1.7898773539297866e-05, "loss": 0.4858, "step": 19897 }, { "epoch": 0.4220058959513054, "grad_norm": 0.395913690328598, "learning_rate": 1.7898569013948278e-05, "loss": 0.4675, "step": 19898 }, { "epoch": 0.4220271044092384, "grad_norm": 0.33421850204467773, "learning_rate": 1.789836447981397e-05, "loss": 0.5057, "step": 19899 }, { "epoch": 0.4220483128671714, "grad_norm": 0.32472649216651917, "learning_rate": 1.7898159936895166e-05, "loss": 0.5232, "step": 19900 }, { "epoch": 0.42206952132510445, "grad_norm": 0.3347349166870117, "learning_rate": 1.78979553851921e-05, "loss": 0.5409, "step": 19901 }, { "epoch": 0.42209072978303747, "grad_norm": 0.45411720871925354, "learning_rate": 1.7897750824704996e-05, "loss": 0.5379, "step": 19902 }, { "epoch": 0.4221119382409705, "grad_norm": 0.335252046585083, "learning_rate": 1.7897546255434083e-05, "loss": 0.4099, "step": 19903 }, { "epoch": 0.4221331466989035, "grad_norm": 0.3429208993911743, "learning_rate": 1.7897341677379588e-05, "loss": 0.5209, "step": 19904 }, { "epoch": 0.4221543551568365, "grad_norm": 0.31492435932159424, "learning_rate": 1.7897137090541737e-05, "loss": 0.4803, "step": 19905 }, { "epoch": 0.42217556361476954, "grad_norm": 0.36208072304725647, "learning_rate": 1.7896932494920757e-05, "loss": 0.5286, "step": 19906 }, { "epoch": 0.4221967720727026, "grad_norm": 0.3388260006904602, "learning_rate": 1.789672789051688e-05, "loss": 0.4491, "step": 19907 }, { "epoch": 0.42221798053063564, "grad_norm": 0.33137527108192444, "learning_rate": 1.789652327733033e-05, "loss": 0.4899, "step": 19908 }, { "epoch": 0.42223918898856866, "grad_norm": 0.36996060609817505, "learning_rate": 1.7896318655361337e-05, "loss": 0.5346, "step": 19909 }, { "epoch": 0.4222603974465017, "grad_norm": 0.4509704113006592, "learning_rate": 1.7896114024610123e-05, "loss": 0.5627, "step": 19910 }, { "epoch": 0.4222816059044347, "grad_norm": 0.3276870548725128, "learning_rate": 1.7895909385076923e-05, "loss": 0.4431, "step": 19911 }, { "epoch": 0.4223028143623677, "grad_norm": 0.351715624332428, "learning_rate": 1.789570473676196e-05, "loss": 0.503, "step": 19912 }, { "epoch": 0.42232402282030074, "grad_norm": 0.32727575302124023, "learning_rate": 1.7895500079665463e-05, "loss": 0.4376, "step": 19913 }, { "epoch": 0.42234523127823376, "grad_norm": 0.33039891719818115, "learning_rate": 1.7895295413787657e-05, "loss": 0.5414, "step": 19914 }, { "epoch": 0.4223664397361668, "grad_norm": 0.3341463506221771, "learning_rate": 1.7895090739128776e-05, "loss": 0.4652, "step": 19915 }, { "epoch": 0.4223876481940998, "grad_norm": 0.4438171982765198, "learning_rate": 1.789488605568904e-05, "loss": 0.5261, "step": 19916 }, { "epoch": 0.4224088566520328, "grad_norm": 0.4817394018173218, "learning_rate": 1.7894681363468683e-05, "loss": 0.5837, "step": 19917 }, { "epoch": 0.42243006510996584, "grad_norm": 0.31946298480033875, "learning_rate": 1.789447666246793e-05, "loss": 0.4368, "step": 19918 }, { "epoch": 0.42245127356789886, "grad_norm": 0.3687625229358673, "learning_rate": 1.789427195268701e-05, "loss": 0.5494, "step": 19919 }, { "epoch": 0.4224724820258319, "grad_norm": 0.3651703894138336, "learning_rate": 1.7894067234126146e-05, "loss": 0.5603, "step": 19920 }, { "epoch": 0.42249369048376495, "grad_norm": 0.35597625374794006, "learning_rate": 1.789386250678557e-05, "loss": 0.4764, "step": 19921 }, { "epoch": 0.42251489894169797, "grad_norm": 0.3909968435764313, "learning_rate": 1.789365777066551e-05, "loss": 0.5127, "step": 19922 }, { "epoch": 0.422536107399631, "grad_norm": 0.3915984332561493, "learning_rate": 1.789345302576619e-05, "loss": 0.4935, "step": 19923 }, { "epoch": 0.422557315857564, "grad_norm": 0.30660122632980347, "learning_rate": 1.789324827208784e-05, "loss": 0.4663, "step": 19924 }, { "epoch": 0.42257852431549703, "grad_norm": 0.4225510060787201, "learning_rate": 1.789304350963069e-05, "loss": 0.6186, "step": 19925 }, { "epoch": 0.42259973277343005, "grad_norm": 0.363812118768692, "learning_rate": 1.7892838738394968e-05, "loss": 0.4995, "step": 19926 }, { "epoch": 0.42262094123136307, "grad_norm": 0.3450007438659668, "learning_rate": 1.7892633958380892e-05, "loss": 0.5453, "step": 19927 }, { "epoch": 0.4226421496892961, "grad_norm": 0.3283664286136627, "learning_rate": 1.7892429169588703e-05, "loss": 0.5294, "step": 19928 }, { "epoch": 0.4226633581472291, "grad_norm": 0.34534522891044617, "learning_rate": 1.789222437201862e-05, "loss": 0.6012, "step": 19929 }, { "epoch": 0.4226845666051621, "grad_norm": 0.31874075531959534, "learning_rate": 1.789201956567088e-05, "loss": 0.5302, "step": 19930 }, { "epoch": 0.42270577506309515, "grad_norm": 0.2908395826816559, "learning_rate": 1.7891814750545695e-05, "loss": 0.4395, "step": 19931 }, { "epoch": 0.42272698352102817, "grad_norm": 0.29935362935066223, "learning_rate": 1.789160992664331e-05, "loss": 0.4496, "step": 19932 }, { "epoch": 0.4227481919789612, "grad_norm": 0.3586062490940094, "learning_rate": 1.789140509396394e-05, "loss": 0.524, "step": 19933 }, { "epoch": 0.42276940043689426, "grad_norm": 0.4264187812805176, "learning_rate": 1.7891200252507815e-05, "loss": 0.4939, "step": 19934 }, { "epoch": 0.4227906088948273, "grad_norm": 0.30506008863449097, "learning_rate": 1.7890995402275173e-05, "loss": 0.5392, "step": 19935 }, { "epoch": 0.4228118173527603, "grad_norm": 0.4047659635543823, "learning_rate": 1.789079054326623e-05, "loss": 0.3571, "step": 19936 }, { "epoch": 0.4228330258106933, "grad_norm": 0.35940077900886536, "learning_rate": 1.7890585675481216e-05, "loss": 0.534, "step": 19937 }, { "epoch": 0.42285423426862634, "grad_norm": 0.44694915413856506, "learning_rate": 1.7890380798920367e-05, "loss": 0.494, "step": 19938 }, { "epoch": 0.42287544272655936, "grad_norm": 0.3410041034221649, "learning_rate": 1.78901759135839e-05, "loss": 0.4832, "step": 19939 }, { "epoch": 0.4228966511844924, "grad_norm": 0.35163936018943787, "learning_rate": 1.788997101947205e-05, "loss": 0.5598, "step": 19940 }, { "epoch": 0.4229178596424254, "grad_norm": 0.3923921287059784, "learning_rate": 1.788976611658505e-05, "loss": 0.4661, "step": 19941 }, { "epoch": 0.4229390681003584, "grad_norm": 0.31512343883514404, "learning_rate": 1.788956120492311e-05, "loss": 0.5221, "step": 19942 }, { "epoch": 0.42296027655829144, "grad_norm": 0.3378241956233978, "learning_rate": 1.7889356284486473e-05, "loss": 0.4488, "step": 19943 }, { "epoch": 0.42298148501622446, "grad_norm": 0.34495916962623596, "learning_rate": 1.7889151355275363e-05, "loss": 0.5006, "step": 19944 }, { "epoch": 0.4230026934741575, "grad_norm": 0.3695337772369385, "learning_rate": 1.788894641729001e-05, "loss": 0.5121, "step": 19945 }, { "epoch": 0.4230239019320905, "grad_norm": 0.3415527045726776, "learning_rate": 1.7888741470530633e-05, "loss": 0.4722, "step": 19946 }, { "epoch": 0.4230451103900235, "grad_norm": 0.3536638021469116, "learning_rate": 1.7888536514997467e-05, "loss": 0.4275, "step": 19947 }, { "epoch": 0.4230663188479566, "grad_norm": 0.4138736426830292, "learning_rate": 1.7888331550690743e-05, "loss": 0.4742, "step": 19948 }, { "epoch": 0.4230875273058896, "grad_norm": 0.3571223318576813, "learning_rate": 1.7888126577610683e-05, "loss": 0.484, "step": 19949 }, { "epoch": 0.42310873576382263, "grad_norm": 0.3343019485473633, "learning_rate": 1.788792159575752e-05, "loss": 0.5727, "step": 19950 }, { "epoch": 0.42312994422175565, "grad_norm": 0.39953115582466125, "learning_rate": 1.7887716605131476e-05, "loss": 0.4907, "step": 19951 }, { "epoch": 0.42315115267968867, "grad_norm": 0.3383983373641968, "learning_rate": 1.788751160573278e-05, "loss": 0.4488, "step": 19952 }, { "epoch": 0.4231723611376217, "grad_norm": 0.3248230814933777, "learning_rate": 1.7887306597561663e-05, "loss": 0.4578, "step": 19953 }, { "epoch": 0.4231935695955547, "grad_norm": 0.3251664936542511, "learning_rate": 1.7887101580618357e-05, "loss": 0.5524, "step": 19954 }, { "epoch": 0.42321477805348773, "grad_norm": 0.38866421580314636, "learning_rate": 1.788689655490308e-05, "loss": 0.5255, "step": 19955 }, { "epoch": 0.42323598651142075, "grad_norm": 0.30259212851524353, "learning_rate": 1.7886691520416072e-05, "loss": 0.4948, "step": 19956 }, { "epoch": 0.42325719496935377, "grad_norm": 0.3407583236694336, "learning_rate": 1.7886486477157546e-05, "loss": 0.5784, "step": 19957 }, { "epoch": 0.4232784034272868, "grad_norm": 0.40972617268562317, "learning_rate": 1.7886281425127744e-05, "loss": 0.5122, "step": 19958 }, { "epoch": 0.4232996118852198, "grad_norm": 0.3360961675643921, "learning_rate": 1.7886076364326884e-05, "loss": 0.4812, "step": 19959 }, { "epoch": 0.4233208203431528, "grad_norm": 0.3429769277572632, "learning_rate": 1.7885871294755202e-05, "loss": 0.5084, "step": 19960 }, { "epoch": 0.42334202880108585, "grad_norm": 0.42333415150642395, "learning_rate": 1.788566621641292e-05, "loss": 0.4447, "step": 19961 }, { "epoch": 0.4233632372590189, "grad_norm": 0.3664810061454773, "learning_rate": 1.788546112930027e-05, "loss": 0.4617, "step": 19962 }, { "epoch": 0.42338444571695194, "grad_norm": 0.33712974190711975, "learning_rate": 1.788525603341748e-05, "loss": 0.4811, "step": 19963 }, { "epoch": 0.42340565417488496, "grad_norm": 0.3472396731376648, "learning_rate": 1.788505092876477e-05, "loss": 0.5282, "step": 19964 }, { "epoch": 0.423426862632818, "grad_norm": 0.3774075210094452, "learning_rate": 1.7884845815342384e-05, "loss": 0.3793, "step": 19965 }, { "epoch": 0.423448071090751, "grad_norm": 0.5437992811203003, "learning_rate": 1.7884640693150535e-05, "loss": 0.4631, "step": 19966 }, { "epoch": 0.423469279548684, "grad_norm": 0.3483235239982605, "learning_rate": 1.788443556218946e-05, "loss": 0.4862, "step": 19967 }, { "epoch": 0.42349048800661704, "grad_norm": 0.3480234146118164, "learning_rate": 1.7884230422459383e-05, "loss": 0.4657, "step": 19968 }, { "epoch": 0.42351169646455006, "grad_norm": 0.4011506736278534, "learning_rate": 1.7884025273960534e-05, "loss": 0.447, "step": 19969 }, { "epoch": 0.4235329049224831, "grad_norm": 0.33368802070617676, "learning_rate": 1.788382011669314e-05, "loss": 0.5362, "step": 19970 }, { "epoch": 0.4235541133804161, "grad_norm": 0.3118736743927002, "learning_rate": 1.7883614950657428e-05, "loss": 0.5198, "step": 19971 }, { "epoch": 0.4235753218383491, "grad_norm": 0.36364731192588806, "learning_rate": 1.7883409775853628e-05, "loss": 0.4697, "step": 19972 }, { "epoch": 0.42359653029628214, "grad_norm": 0.3050595223903656, "learning_rate": 1.788320459228197e-05, "loss": 0.5042, "step": 19973 }, { "epoch": 0.42361773875421516, "grad_norm": 0.3418445587158203, "learning_rate": 1.788299939994268e-05, "loss": 0.5481, "step": 19974 }, { "epoch": 0.42363894721214823, "grad_norm": 0.3243996202945709, "learning_rate": 1.7882794198835988e-05, "loss": 0.4549, "step": 19975 }, { "epoch": 0.42366015567008125, "grad_norm": 0.3200962543487549, "learning_rate": 1.788258898896212e-05, "loss": 0.5314, "step": 19976 }, { "epoch": 0.42368136412801427, "grad_norm": 0.3655330538749695, "learning_rate": 1.7882383770321304e-05, "loss": 0.5742, "step": 19977 }, { "epoch": 0.4237025725859473, "grad_norm": 0.344706267118454, "learning_rate": 1.788217854291377e-05, "loss": 0.5103, "step": 19978 }, { "epoch": 0.4237237810438803, "grad_norm": 0.3426136076450348, "learning_rate": 1.7881973306739746e-05, "loss": 0.4794, "step": 19979 }, { "epoch": 0.42374498950181333, "grad_norm": 0.38342320919036865, "learning_rate": 1.788176806179946e-05, "loss": 0.5453, "step": 19980 }, { "epoch": 0.42376619795974635, "grad_norm": 0.38534513115882874, "learning_rate": 1.788156280809314e-05, "loss": 0.4913, "step": 19981 }, { "epoch": 0.42378740641767937, "grad_norm": 0.32999780774116516, "learning_rate": 1.788135754562101e-05, "loss": 0.4334, "step": 19982 }, { "epoch": 0.4238086148756124, "grad_norm": 0.3205045163631439, "learning_rate": 1.788115227438331e-05, "loss": 0.4418, "step": 19983 }, { "epoch": 0.4238298233335454, "grad_norm": 0.3795563280582428, "learning_rate": 1.7880946994380254e-05, "loss": 0.4414, "step": 19984 }, { "epoch": 0.4238510317914784, "grad_norm": 0.3341516852378845, "learning_rate": 1.7880741705612078e-05, "loss": 0.4508, "step": 19985 }, { "epoch": 0.42387224024941145, "grad_norm": 1.6292152404785156, "learning_rate": 1.7880536408079015e-05, "loss": 0.519, "step": 19986 }, { "epoch": 0.42389344870734447, "grad_norm": 0.3863065838813782, "learning_rate": 1.7880331101781282e-05, "loss": 0.518, "step": 19987 }, { "epoch": 0.4239146571652775, "grad_norm": 0.3155098259449005, "learning_rate": 1.7880125786719116e-05, "loss": 0.4727, "step": 19988 }, { "epoch": 0.42393586562321056, "grad_norm": 0.3551478683948517, "learning_rate": 1.7879920462892744e-05, "loss": 0.5474, "step": 19989 }, { "epoch": 0.4239570740811436, "grad_norm": 0.33392220735549927, "learning_rate": 1.787971513030239e-05, "loss": 0.4635, "step": 19990 }, { "epoch": 0.4239782825390766, "grad_norm": 0.34982308745384216, "learning_rate": 1.7879509788948285e-05, "loss": 0.5352, "step": 19991 }, { "epoch": 0.4239994909970096, "grad_norm": 0.3550388216972351, "learning_rate": 1.787930443883066e-05, "loss": 0.4201, "step": 19992 }, { "epoch": 0.42402069945494264, "grad_norm": 0.3320018947124481, "learning_rate": 1.787909907994974e-05, "loss": 0.5463, "step": 19993 }, { "epoch": 0.42404190791287566, "grad_norm": 0.38322699069976807, "learning_rate": 1.7878893712305757e-05, "loss": 0.5179, "step": 19994 }, { "epoch": 0.4240631163708087, "grad_norm": 0.32063624262809753, "learning_rate": 1.7878688335898934e-05, "loss": 0.4424, "step": 19995 }, { "epoch": 0.4240843248287417, "grad_norm": 0.3553312122821808, "learning_rate": 1.78784829507295e-05, "loss": 0.4744, "step": 19996 }, { "epoch": 0.4241055332866747, "grad_norm": 0.33593806624412537, "learning_rate": 1.7878277556797692e-05, "loss": 0.5364, "step": 19997 }, { "epoch": 0.42412674174460774, "grad_norm": 0.3431273400783539, "learning_rate": 1.7878072154103727e-05, "loss": 0.5143, "step": 19998 }, { "epoch": 0.42414795020254076, "grad_norm": 0.38057538866996765, "learning_rate": 1.787786674264784e-05, "loss": 0.5249, "step": 19999 }, { "epoch": 0.4241691586604738, "grad_norm": 0.33373114466667175, "learning_rate": 1.787766132243026e-05, "loss": 0.5408, "step": 20000 }, { "epoch": 0.4241903671184068, "grad_norm": 0.35380998253822327, "learning_rate": 1.787745589345121e-05, "loss": 0.601, "step": 20001 }, { "epoch": 0.4242115755763398, "grad_norm": 0.3887275457382202, "learning_rate": 1.7877250455710924e-05, "loss": 0.5394, "step": 20002 }, { "epoch": 0.4242327840342729, "grad_norm": 0.3865729570388794, "learning_rate": 1.7877045009209628e-05, "loss": 0.5779, "step": 20003 }, { "epoch": 0.4242539924922059, "grad_norm": 0.3899860680103302, "learning_rate": 1.787683955394755e-05, "loss": 0.4997, "step": 20004 }, { "epoch": 0.42427520095013893, "grad_norm": 0.37141647934913635, "learning_rate": 1.787663408992492e-05, "loss": 0.5087, "step": 20005 }, { "epoch": 0.42429640940807195, "grad_norm": 0.37691569328308105, "learning_rate": 1.7876428617141968e-05, "loss": 0.496, "step": 20006 }, { "epoch": 0.42431761786600497, "grad_norm": 0.3444159924983978, "learning_rate": 1.787622313559892e-05, "loss": 0.4641, "step": 20007 }, { "epoch": 0.424338826323938, "grad_norm": 0.6180981397628784, "learning_rate": 1.7876017645296e-05, "loss": 0.5962, "step": 20008 }, { "epoch": 0.424360034781871, "grad_norm": 0.31869056820869446, "learning_rate": 1.7875812146233447e-05, "loss": 0.5018, "step": 20009 }, { "epoch": 0.42438124323980403, "grad_norm": 0.3243480920791626, "learning_rate": 1.7875606638411485e-05, "loss": 0.4039, "step": 20010 }, { "epoch": 0.42440245169773705, "grad_norm": 0.3492059111595154, "learning_rate": 1.787540112183034e-05, "loss": 0.5764, "step": 20011 }, { "epoch": 0.42442366015567007, "grad_norm": 0.37138980627059937, "learning_rate": 1.787519559649024e-05, "loss": 0.5164, "step": 20012 }, { "epoch": 0.4244448686136031, "grad_norm": 0.33848950266838074, "learning_rate": 1.787499006239142e-05, "loss": 0.4693, "step": 20013 }, { "epoch": 0.4244660770715361, "grad_norm": 0.4051586985588074, "learning_rate": 1.7874784519534102e-05, "loss": 0.5247, "step": 20014 }, { "epoch": 0.4244872855294691, "grad_norm": 0.40209731459617615, "learning_rate": 1.7874578967918518e-05, "loss": 0.5431, "step": 20015 }, { "epoch": 0.4245084939874022, "grad_norm": 0.35841307044029236, "learning_rate": 1.7874373407544894e-05, "loss": 0.5672, "step": 20016 }, { "epoch": 0.4245297024453352, "grad_norm": 0.35761377215385437, "learning_rate": 1.787416783841346e-05, "loss": 0.5386, "step": 20017 }, { "epoch": 0.42455091090326824, "grad_norm": 0.5102354288101196, "learning_rate": 1.7873962260524446e-05, "loss": 0.5013, "step": 20018 }, { "epoch": 0.42457211936120126, "grad_norm": 0.3673989474773407, "learning_rate": 1.7873756673878082e-05, "loss": 0.6071, "step": 20019 }, { "epoch": 0.4245933278191343, "grad_norm": 0.34550389647483826, "learning_rate": 1.7873551078474594e-05, "loss": 0.5802, "step": 20020 }, { "epoch": 0.4246145362770673, "grad_norm": 0.34598442912101746, "learning_rate": 1.787334547431421e-05, "loss": 0.6276, "step": 20021 }, { "epoch": 0.4246357447350003, "grad_norm": 0.39710432291030884, "learning_rate": 1.7873139861397156e-05, "loss": 0.4927, "step": 20022 }, { "epoch": 0.42465695319293334, "grad_norm": 0.394379585981369, "learning_rate": 1.7872934239723666e-05, "loss": 0.5898, "step": 20023 }, { "epoch": 0.42467816165086636, "grad_norm": 0.34847766160964966, "learning_rate": 1.787272860929397e-05, "loss": 0.5145, "step": 20024 }, { "epoch": 0.4246993701087994, "grad_norm": 0.3204520046710968, "learning_rate": 1.787252297010829e-05, "loss": 0.4294, "step": 20025 }, { "epoch": 0.4247205785667324, "grad_norm": 0.36467498540878296, "learning_rate": 1.787231732216686e-05, "loss": 0.5775, "step": 20026 }, { "epoch": 0.4247417870246654, "grad_norm": 0.35575252771377563, "learning_rate": 1.7872111665469908e-05, "loss": 0.5692, "step": 20027 }, { "epoch": 0.42476299548259844, "grad_norm": 0.3758523762226105, "learning_rate": 1.787190600001766e-05, "loss": 0.5418, "step": 20028 }, { "epoch": 0.42478420394053146, "grad_norm": 0.33916908502578735, "learning_rate": 1.7871700325810346e-05, "loss": 0.4764, "step": 20029 }, { "epoch": 0.42480541239846453, "grad_norm": 0.358403742313385, "learning_rate": 1.7871494642848195e-05, "loss": 0.5403, "step": 20030 }, { "epoch": 0.42482662085639755, "grad_norm": 0.367402046918869, "learning_rate": 1.7871288951131436e-05, "loss": 0.5377, "step": 20031 }, { "epoch": 0.42484782931433057, "grad_norm": 0.3663202226161957, "learning_rate": 1.78710832506603e-05, "loss": 0.5468, "step": 20032 }, { "epoch": 0.4248690377722636, "grad_norm": 0.35527515411376953, "learning_rate": 1.7870877541435012e-05, "loss": 0.5178, "step": 20033 }, { "epoch": 0.4248902462301966, "grad_norm": 0.39613139629364014, "learning_rate": 1.7870671823455804e-05, "loss": 0.5137, "step": 20034 }, { "epoch": 0.42491145468812963, "grad_norm": 0.34613725543022156, "learning_rate": 1.7870466096722904e-05, "loss": 0.5631, "step": 20035 }, { "epoch": 0.42493266314606265, "grad_norm": 0.37134480476379395, "learning_rate": 1.7870260361236534e-05, "loss": 0.5012, "step": 20036 }, { "epoch": 0.42495387160399567, "grad_norm": 0.3606179356575012, "learning_rate": 1.7870054616996937e-05, "loss": 0.4792, "step": 20037 }, { "epoch": 0.4249750800619287, "grad_norm": 0.4207608699798584, "learning_rate": 1.7869848864004327e-05, "loss": 0.4756, "step": 20038 }, { "epoch": 0.4249962885198617, "grad_norm": 0.3168928623199463, "learning_rate": 1.7869643102258944e-05, "loss": 0.5405, "step": 20039 }, { "epoch": 0.42501749697779473, "grad_norm": 0.3532581925392151, "learning_rate": 1.786943733176101e-05, "loss": 0.4379, "step": 20040 }, { "epoch": 0.42503870543572775, "grad_norm": 0.3113124966621399, "learning_rate": 1.786923155251076e-05, "loss": 0.5105, "step": 20041 }, { "epoch": 0.42505991389366077, "grad_norm": 0.34349262714385986, "learning_rate": 1.7869025764508414e-05, "loss": 0.5084, "step": 20042 }, { "epoch": 0.42508112235159384, "grad_norm": 0.34727242588996887, "learning_rate": 1.7868819967754207e-05, "loss": 0.5118, "step": 20043 }, { "epoch": 0.42510233080952686, "grad_norm": 0.3628547787666321, "learning_rate": 1.7868614162248367e-05, "loss": 0.5393, "step": 20044 }, { "epoch": 0.4251235392674599, "grad_norm": 0.4023370146751404, "learning_rate": 1.7868408347991125e-05, "loss": 0.5779, "step": 20045 }, { "epoch": 0.4251447477253929, "grad_norm": 0.3234686255455017, "learning_rate": 1.7868202524982704e-05, "loss": 0.4681, "step": 20046 }, { "epoch": 0.4251659561833259, "grad_norm": 0.3165246248245239, "learning_rate": 1.7867996693223336e-05, "loss": 0.5445, "step": 20047 }, { "epoch": 0.42518716464125894, "grad_norm": 0.3583311438560486, "learning_rate": 1.7867790852713253e-05, "loss": 0.4869, "step": 20048 }, { "epoch": 0.42520837309919196, "grad_norm": 0.3694133460521698, "learning_rate": 1.7867585003452683e-05, "loss": 0.5668, "step": 20049 }, { "epoch": 0.425229581557125, "grad_norm": 0.3221639096736908, "learning_rate": 1.786737914544185e-05, "loss": 0.4745, "step": 20050 }, { "epoch": 0.425250790015058, "grad_norm": 0.32452359795570374, "learning_rate": 1.786717327868099e-05, "loss": 0.5021, "step": 20051 }, { "epoch": 0.425271998472991, "grad_norm": 0.3455781638622284, "learning_rate": 1.7866967403170322e-05, "loss": 0.4684, "step": 20052 }, { "epoch": 0.42529320693092404, "grad_norm": 0.365429550409317, "learning_rate": 1.7866761518910086e-05, "loss": 0.5832, "step": 20053 }, { "epoch": 0.42531441538885706, "grad_norm": 0.3843395411968231, "learning_rate": 1.7866555625900505e-05, "loss": 0.5115, "step": 20054 }, { "epoch": 0.4253356238467901, "grad_norm": 0.3913930654525757, "learning_rate": 1.7866349724141812e-05, "loss": 0.6244, "step": 20055 }, { "epoch": 0.4253568323047231, "grad_norm": 0.3472282588481903, "learning_rate": 1.786614381363423e-05, "loss": 0.548, "step": 20056 }, { "epoch": 0.4253780407626562, "grad_norm": 0.358585000038147, "learning_rate": 1.786593789437799e-05, "loss": 0.4847, "step": 20057 }, { "epoch": 0.4253992492205892, "grad_norm": 0.4832654297351837, "learning_rate": 1.7865731966373326e-05, "loss": 0.5119, "step": 20058 }, { "epoch": 0.4254204576785222, "grad_norm": 0.34518298506736755, "learning_rate": 1.7865526029620456e-05, "loss": 0.5098, "step": 20059 }, { "epoch": 0.42544166613645523, "grad_norm": 0.3125710189342499, "learning_rate": 1.7865320084119623e-05, "loss": 0.5001, "step": 20060 }, { "epoch": 0.42546287459438825, "grad_norm": 0.32993873953819275, "learning_rate": 1.786511412987105e-05, "loss": 0.5272, "step": 20061 }, { "epoch": 0.42548408305232127, "grad_norm": 0.3389986753463745, "learning_rate": 1.786490816687496e-05, "loss": 0.5291, "step": 20062 }, { "epoch": 0.4255052915102543, "grad_norm": 0.38024458289146423, "learning_rate": 1.786470219513159e-05, "loss": 0.6107, "step": 20063 }, { "epoch": 0.4255264999681873, "grad_norm": 0.35184359550476074, "learning_rate": 1.7864496214641168e-05, "loss": 0.5679, "step": 20064 }, { "epoch": 0.42554770842612033, "grad_norm": 0.3214661478996277, "learning_rate": 1.786429022540392e-05, "loss": 0.4798, "step": 20065 }, { "epoch": 0.42556891688405335, "grad_norm": 0.3303535580635071, "learning_rate": 1.786408422742008e-05, "loss": 0.5299, "step": 20066 }, { "epoch": 0.42559012534198637, "grad_norm": 0.3350467383861542, "learning_rate": 1.786387822068987e-05, "loss": 0.5757, "step": 20067 }, { "epoch": 0.4256113337999194, "grad_norm": 0.3084503412246704, "learning_rate": 1.786367220521352e-05, "loss": 0.5161, "step": 20068 }, { "epoch": 0.4256325422578524, "grad_norm": 0.3673946261405945, "learning_rate": 1.7863466180991272e-05, "loss": 0.5229, "step": 20069 }, { "epoch": 0.42565375071578543, "grad_norm": 0.40153926610946655, "learning_rate": 1.7863260148023337e-05, "loss": 0.5503, "step": 20070 }, { "epoch": 0.4256749591737185, "grad_norm": 0.3362974226474762, "learning_rate": 1.7863054106309956e-05, "loss": 0.4871, "step": 20071 }, { "epoch": 0.4256961676316515, "grad_norm": 0.3055090308189392, "learning_rate": 1.7862848055851353e-05, "loss": 0.4722, "step": 20072 }, { "epoch": 0.42571737608958454, "grad_norm": 0.5741853713989258, "learning_rate": 1.786264199664776e-05, "loss": 0.6166, "step": 20073 }, { "epoch": 0.42573858454751756, "grad_norm": 0.34995245933532715, "learning_rate": 1.78624359286994e-05, "loss": 0.5063, "step": 20074 }, { "epoch": 0.4257597930054506, "grad_norm": 0.3369956910610199, "learning_rate": 1.7862229852006514e-05, "loss": 0.4703, "step": 20075 }, { "epoch": 0.4257810014633836, "grad_norm": 0.3666761815547943, "learning_rate": 1.7862023766569322e-05, "loss": 0.5309, "step": 20076 }, { "epoch": 0.4258022099213166, "grad_norm": 0.3822788596153259, "learning_rate": 1.7861817672388054e-05, "loss": 0.5869, "step": 20077 }, { "epoch": 0.42582341837924964, "grad_norm": 0.3471907377243042, "learning_rate": 1.7861611569462943e-05, "loss": 0.6016, "step": 20078 }, { "epoch": 0.42584462683718266, "grad_norm": 0.36301887035369873, "learning_rate": 1.7861405457794215e-05, "loss": 0.4744, "step": 20079 }, { "epoch": 0.4258658352951157, "grad_norm": 0.3297085762023926, "learning_rate": 1.7861199337382098e-05, "loss": 0.5423, "step": 20080 }, { "epoch": 0.4258870437530487, "grad_norm": 0.3642463982105255, "learning_rate": 1.7860993208226826e-05, "loss": 0.4719, "step": 20081 }, { "epoch": 0.4259082522109817, "grad_norm": 0.3467447757720947, "learning_rate": 1.7860787070328625e-05, "loss": 0.452, "step": 20082 }, { "epoch": 0.42592946066891474, "grad_norm": 0.41419848799705505, "learning_rate": 1.7860580923687723e-05, "loss": 0.4845, "step": 20083 }, { "epoch": 0.4259506691268478, "grad_norm": 0.5021370649337769, "learning_rate": 1.786037476830435e-05, "loss": 0.5306, "step": 20084 }, { "epoch": 0.42597187758478083, "grad_norm": 0.3325037360191345, "learning_rate": 1.786016860417874e-05, "loss": 0.4733, "step": 20085 }, { "epoch": 0.42599308604271385, "grad_norm": 0.40416526794433594, "learning_rate": 1.7859962431311116e-05, "loss": 0.6127, "step": 20086 }, { "epoch": 0.4260142945006469, "grad_norm": 0.41901877522468567, "learning_rate": 1.7859756249701716e-05, "loss": 0.5449, "step": 20087 }, { "epoch": 0.4260355029585799, "grad_norm": 0.33596786856651306, "learning_rate": 1.7859550059350756e-05, "loss": 0.3893, "step": 20088 }, { "epoch": 0.4260567114165129, "grad_norm": 0.31996241211891174, "learning_rate": 1.7859343860258477e-05, "loss": 0.4792, "step": 20089 }, { "epoch": 0.42607791987444593, "grad_norm": 0.3340511918067932, "learning_rate": 1.7859137652425103e-05, "loss": 0.4669, "step": 20090 }, { "epoch": 0.42609912833237895, "grad_norm": 0.36010488867759705, "learning_rate": 1.7858931435850863e-05, "loss": 0.519, "step": 20091 }, { "epoch": 0.42612033679031197, "grad_norm": 0.28988319635391235, "learning_rate": 1.7858725210535993e-05, "loss": 0.3826, "step": 20092 }, { "epoch": 0.426141545248245, "grad_norm": 0.3275676667690277, "learning_rate": 1.785851897648071e-05, "loss": 0.5226, "step": 20093 }, { "epoch": 0.426162753706178, "grad_norm": 0.3431672751903534, "learning_rate": 1.7858312733685256e-05, "loss": 0.479, "step": 20094 }, { "epoch": 0.42618396216411103, "grad_norm": 0.5497553944587708, "learning_rate": 1.785810648214985e-05, "loss": 0.5387, "step": 20095 }, { "epoch": 0.42620517062204405, "grad_norm": 0.3405678868293762, "learning_rate": 1.785790022187473e-05, "loss": 0.4732, "step": 20096 }, { "epoch": 0.42622637907997707, "grad_norm": 0.3744947016239166, "learning_rate": 1.785769395286012e-05, "loss": 0.4642, "step": 20097 }, { "epoch": 0.42624758753791014, "grad_norm": 0.35449713468551636, "learning_rate": 1.7857487675106253e-05, "loss": 0.5108, "step": 20098 }, { "epoch": 0.42626879599584316, "grad_norm": 0.34857597947120667, "learning_rate": 1.7857281388613353e-05, "loss": 0.5336, "step": 20099 }, { "epoch": 0.4262900044537762, "grad_norm": 0.3970996141433716, "learning_rate": 1.7857075093381656e-05, "loss": 0.5107, "step": 20100 }, { "epoch": 0.4263112129117092, "grad_norm": 0.32561489939689636, "learning_rate": 1.7856868789411387e-05, "loss": 0.4977, "step": 20101 }, { "epoch": 0.4263324213696422, "grad_norm": 0.3300866484642029, "learning_rate": 1.7856662476702778e-05, "loss": 0.4842, "step": 20102 }, { "epoch": 0.42635362982757524, "grad_norm": 0.3264361023902893, "learning_rate": 1.7856456155256054e-05, "loss": 0.4722, "step": 20103 }, { "epoch": 0.42637483828550826, "grad_norm": 0.3795238137245178, "learning_rate": 1.785624982507145e-05, "loss": 0.5805, "step": 20104 }, { "epoch": 0.4263960467434413, "grad_norm": 0.3268468976020813, "learning_rate": 1.7856043486149194e-05, "loss": 0.5541, "step": 20105 }, { "epoch": 0.4264172552013743, "grad_norm": 0.32895907759666443, "learning_rate": 1.7855837138489515e-05, "loss": 0.4162, "step": 20106 }, { "epoch": 0.4264384636593073, "grad_norm": 0.5350690484046936, "learning_rate": 1.785563078209264e-05, "loss": 0.4315, "step": 20107 }, { "epoch": 0.42645967211724034, "grad_norm": 0.36570504307746887, "learning_rate": 1.78554244169588e-05, "loss": 0.5095, "step": 20108 }, { "epoch": 0.42648088057517336, "grad_norm": 0.35054782032966614, "learning_rate": 1.7855218043088228e-05, "loss": 0.5104, "step": 20109 }, { "epoch": 0.4265020890331064, "grad_norm": 0.4749740958213806, "learning_rate": 1.7855011660481148e-05, "loss": 0.4853, "step": 20110 }, { "epoch": 0.4265232974910394, "grad_norm": 0.3297717571258545, "learning_rate": 1.7854805269137795e-05, "loss": 0.4279, "step": 20111 }, { "epoch": 0.4265445059489725, "grad_norm": 0.3998901844024658, "learning_rate": 1.7854598869058394e-05, "loss": 0.4865, "step": 20112 }, { "epoch": 0.4265657144069055, "grad_norm": 0.3464454114437103, "learning_rate": 1.7854392460243176e-05, "loss": 0.5103, "step": 20113 }, { "epoch": 0.4265869228648385, "grad_norm": 0.3672758638858795, "learning_rate": 1.7854186042692372e-05, "loss": 0.4825, "step": 20114 }, { "epoch": 0.42660813132277153, "grad_norm": 0.4101627767086029, "learning_rate": 1.7853979616406213e-05, "loss": 0.4805, "step": 20115 }, { "epoch": 0.42662933978070455, "grad_norm": 0.3359472155570984, "learning_rate": 1.785377318138492e-05, "loss": 0.5668, "step": 20116 }, { "epoch": 0.4266505482386376, "grad_norm": 0.38785606622695923, "learning_rate": 1.7853566737628733e-05, "loss": 0.4051, "step": 20117 }, { "epoch": 0.4266717566965706, "grad_norm": 0.3914010226726532, "learning_rate": 1.7853360285137878e-05, "loss": 0.5259, "step": 20118 }, { "epoch": 0.4266929651545036, "grad_norm": 0.3321145474910736, "learning_rate": 1.7853153823912582e-05, "loss": 0.5088, "step": 20119 }, { "epoch": 0.42671417361243663, "grad_norm": 0.4045780599117279, "learning_rate": 1.7852947353953078e-05, "loss": 0.5269, "step": 20120 }, { "epoch": 0.42673538207036965, "grad_norm": 0.35690391063690186, "learning_rate": 1.7852740875259594e-05, "loss": 0.419, "step": 20121 }, { "epoch": 0.42675659052830267, "grad_norm": 0.37981417775154114, "learning_rate": 1.7852534387832356e-05, "loss": 0.5832, "step": 20122 }, { "epoch": 0.4267777989862357, "grad_norm": 0.34771886467933655, "learning_rate": 1.7852327891671603e-05, "loss": 0.5611, "step": 20123 }, { "epoch": 0.4267990074441687, "grad_norm": 0.34051376581192017, "learning_rate": 1.7852121386777557e-05, "loss": 0.4669, "step": 20124 }, { "epoch": 0.4268202159021018, "grad_norm": 0.6683940887451172, "learning_rate": 1.785191487315045e-05, "loss": 0.5096, "step": 20125 }, { "epoch": 0.4268414243600348, "grad_norm": 0.3460131585597992, "learning_rate": 1.785170835079051e-05, "loss": 0.4982, "step": 20126 }, { "epoch": 0.4268626328179678, "grad_norm": 0.3532923460006714, "learning_rate": 1.785150181969797e-05, "loss": 0.4553, "step": 20127 }, { "epoch": 0.42688384127590084, "grad_norm": 0.3660517632961273, "learning_rate": 1.7851295279873055e-05, "loss": 0.5045, "step": 20128 }, { "epoch": 0.42690504973383386, "grad_norm": 0.3300464451313019, "learning_rate": 1.7851088731315998e-05, "loss": 0.5299, "step": 20129 }, { "epoch": 0.4269262581917669, "grad_norm": 0.3485063314437866, "learning_rate": 1.7850882174027032e-05, "loss": 0.4997, "step": 20130 }, { "epoch": 0.4269474666496999, "grad_norm": 0.34372684359550476, "learning_rate": 1.785067560800638e-05, "loss": 0.5275, "step": 20131 }, { "epoch": 0.4269686751076329, "grad_norm": 0.38179346919059753, "learning_rate": 1.7850469033254276e-05, "loss": 0.5185, "step": 20132 }, { "epoch": 0.42698988356556594, "grad_norm": 0.3393559157848358, "learning_rate": 1.785026244977095e-05, "loss": 0.5123, "step": 20133 }, { "epoch": 0.42701109202349896, "grad_norm": 0.6123172640800476, "learning_rate": 1.7850055857556628e-05, "loss": 0.5355, "step": 20134 }, { "epoch": 0.427032300481432, "grad_norm": 0.3319329619407654, "learning_rate": 1.7849849256611542e-05, "loss": 0.5168, "step": 20135 }, { "epoch": 0.427053508939365, "grad_norm": 0.3219892978668213, "learning_rate": 1.7849642646935922e-05, "loss": 0.4553, "step": 20136 }, { "epoch": 0.427074717397298, "grad_norm": 0.30219030380249023, "learning_rate": 1.7849436028529998e-05, "loss": 0.5014, "step": 20137 }, { "epoch": 0.42709592585523104, "grad_norm": 0.32728102803230286, "learning_rate": 1.7849229401394e-05, "loss": 0.5306, "step": 20138 }, { "epoch": 0.4271171343131641, "grad_norm": 0.43744930624961853, "learning_rate": 1.7849022765528156e-05, "loss": 0.6124, "step": 20139 }, { "epoch": 0.42713834277109713, "grad_norm": 0.37650415301322937, "learning_rate": 1.7848816120932698e-05, "loss": 0.5234, "step": 20140 }, { "epoch": 0.42715955122903015, "grad_norm": 0.3767339885234833, "learning_rate": 1.7848609467607857e-05, "loss": 0.5316, "step": 20141 }, { "epoch": 0.4271807596869632, "grad_norm": 0.37698718905448914, "learning_rate": 1.7848402805553858e-05, "loss": 0.5074, "step": 20142 }, { "epoch": 0.4272019681448962, "grad_norm": 0.33607515692710876, "learning_rate": 1.7848196134770933e-05, "loss": 0.5183, "step": 20143 }, { "epoch": 0.4272231766028292, "grad_norm": 0.343606561422348, "learning_rate": 1.7847989455259314e-05, "loss": 0.4391, "step": 20144 }, { "epoch": 0.42724438506076223, "grad_norm": 0.4399411678314209, "learning_rate": 1.7847782767019226e-05, "loss": 0.6224, "step": 20145 }, { "epoch": 0.42726559351869525, "grad_norm": 0.39626678824424744, "learning_rate": 1.7847576070050908e-05, "loss": 0.541, "step": 20146 }, { "epoch": 0.42728680197662827, "grad_norm": 0.33270612359046936, "learning_rate": 1.784736936435458e-05, "loss": 0.5263, "step": 20147 }, { "epoch": 0.4273080104345613, "grad_norm": 0.34309592843055725, "learning_rate": 1.7847162649930478e-05, "loss": 0.458, "step": 20148 }, { "epoch": 0.4273292188924943, "grad_norm": 0.40433764457702637, "learning_rate": 1.7846955926778826e-05, "loss": 0.4463, "step": 20149 }, { "epoch": 0.42735042735042733, "grad_norm": 0.5201246738433838, "learning_rate": 1.7846749194899862e-05, "loss": 0.554, "step": 20150 }, { "epoch": 0.42737163580836035, "grad_norm": 0.33525151014328003, "learning_rate": 1.7846542454293813e-05, "loss": 0.4715, "step": 20151 }, { "epoch": 0.42739284426629337, "grad_norm": 0.33552050590515137, "learning_rate": 1.7846335704960902e-05, "loss": 0.4804, "step": 20152 }, { "epoch": 0.42741405272422645, "grad_norm": 0.3796551525592804, "learning_rate": 1.784612894690137e-05, "loss": 0.5385, "step": 20153 }, { "epoch": 0.42743526118215946, "grad_norm": 0.3818046450614929, "learning_rate": 1.7845922180115435e-05, "loss": 0.4353, "step": 20154 }, { "epoch": 0.4274564696400925, "grad_norm": 0.4286128282546997, "learning_rate": 1.7845715404603338e-05, "loss": 0.5434, "step": 20155 }, { "epoch": 0.4274776780980255, "grad_norm": 0.32532402873039246, "learning_rate": 1.7845508620365307e-05, "loss": 0.4851, "step": 20156 }, { "epoch": 0.4274988865559585, "grad_norm": 0.3920195996761322, "learning_rate": 1.7845301827401566e-05, "loss": 0.4708, "step": 20157 }, { "epoch": 0.42752009501389154, "grad_norm": 0.31199464201927185, "learning_rate": 1.784509502571235e-05, "loss": 0.4923, "step": 20158 }, { "epoch": 0.42754130347182456, "grad_norm": 0.3575087785720825, "learning_rate": 1.7844888215297886e-05, "loss": 0.4924, "step": 20159 }, { "epoch": 0.4275625119297576, "grad_norm": 0.3603840172290802, "learning_rate": 1.7844681396158404e-05, "loss": 0.5425, "step": 20160 }, { "epoch": 0.4275837203876906, "grad_norm": 0.3523329794406891, "learning_rate": 1.7844474568294137e-05, "loss": 0.538, "step": 20161 }, { "epoch": 0.4276049288456236, "grad_norm": 0.33459553122520447, "learning_rate": 1.7844267731705316e-05, "loss": 0.555, "step": 20162 }, { "epoch": 0.42762613730355664, "grad_norm": 0.3398873209953308, "learning_rate": 1.7844060886392168e-05, "loss": 0.4051, "step": 20163 }, { "epoch": 0.42764734576148966, "grad_norm": 0.35378262400627136, "learning_rate": 1.7843854032354922e-05, "loss": 0.514, "step": 20164 }, { "epoch": 0.4276685542194227, "grad_norm": 0.41514578461647034, "learning_rate": 1.7843647169593806e-05, "loss": 0.4695, "step": 20165 }, { "epoch": 0.42768976267735576, "grad_norm": 0.3902558982372284, "learning_rate": 1.7843440298109057e-05, "loss": 0.5208, "step": 20166 }, { "epoch": 0.4277109711352888, "grad_norm": 0.3893358111381531, "learning_rate": 1.7843233417900905e-05, "loss": 0.5332, "step": 20167 }, { "epoch": 0.4277321795932218, "grad_norm": 0.35427823662757874, "learning_rate": 1.7843026528969575e-05, "loss": 0.5358, "step": 20168 }, { "epoch": 0.4277533880511548, "grad_norm": 0.33908483386039734, "learning_rate": 1.78428196313153e-05, "loss": 0.5408, "step": 20169 }, { "epoch": 0.42777459650908783, "grad_norm": 0.35901859402656555, "learning_rate": 1.7842612724938305e-05, "loss": 0.5746, "step": 20170 }, { "epoch": 0.42779580496702085, "grad_norm": 0.31753215193748474, "learning_rate": 1.7842405809838827e-05, "loss": 0.5198, "step": 20171 }, { "epoch": 0.4278170134249539, "grad_norm": 0.32414332032203674, "learning_rate": 1.7842198886017094e-05, "loss": 0.4915, "step": 20172 }, { "epoch": 0.4278382218828869, "grad_norm": 0.3571036458015442, "learning_rate": 1.7841991953473335e-05, "loss": 0.5161, "step": 20173 }, { "epoch": 0.4278594303408199, "grad_norm": 0.3722538650035858, "learning_rate": 1.784178501220778e-05, "loss": 0.4644, "step": 20174 }, { "epoch": 0.42788063879875293, "grad_norm": 0.41805651783943176, "learning_rate": 1.784157806222066e-05, "loss": 0.5485, "step": 20175 }, { "epoch": 0.42790184725668595, "grad_norm": 0.3397766947746277, "learning_rate": 1.7841371103512206e-05, "loss": 0.4637, "step": 20176 }, { "epoch": 0.42792305571461897, "grad_norm": 0.31682294607162476, "learning_rate": 1.7841164136082646e-05, "loss": 0.3938, "step": 20177 }, { "epoch": 0.427944264172552, "grad_norm": 0.3796408474445343, "learning_rate": 1.784095715993221e-05, "loss": 0.5329, "step": 20178 }, { "epoch": 0.427965472630485, "grad_norm": 0.3519222140312195, "learning_rate": 1.784075017506113e-05, "loss": 0.5004, "step": 20179 }, { "epoch": 0.4279866810884181, "grad_norm": 0.9310734272003174, "learning_rate": 1.7840543181469637e-05, "loss": 0.5055, "step": 20180 }, { "epoch": 0.4280078895463511, "grad_norm": 0.47464117407798767, "learning_rate": 1.7840336179157966e-05, "loss": 0.5329, "step": 20181 }, { "epoch": 0.4280290980042841, "grad_norm": 0.2915664613246918, "learning_rate": 1.784012916812633e-05, "loss": 0.4682, "step": 20182 }, { "epoch": 0.42805030646221714, "grad_norm": 0.3608829379081726, "learning_rate": 1.7839922148374977e-05, "loss": 0.5323, "step": 20183 }, { "epoch": 0.42807151492015016, "grad_norm": 0.35868626832962036, "learning_rate": 1.783971511990413e-05, "loss": 0.498, "step": 20184 }, { "epoch": 0.4280927233780832, "grad_norm": 0.3419077396392822, "learning_rate": 1.783950808271402e-05, "loss": 0.5202, "step": 20185 }, { "epoch": 0.4281139318360162, "grad_norm": 0.3548750579357147, "learning_rate": 1.7839301036804876e-05, "loss": 0.413, "step": 20186 }, { "epoch": 0.4281351402939492, "grad_norm": 0.359863817691803, "learning_rate": 1.7839093982176933e-05, "loss": 0.4606, "step": 20187 }, { "epoch": 0.42815634875188224, "grad_norm": 0.3556249439716339, "learning_rate": 1.7838886918830412e-05, "loss": 0.5782, "step": 20188 }, { "epoch": 0.42817755720981526, "grad_norm": 0.3366978168487549, "learning_rate": 1.7838679846765554e-05, "loss": 0.4386, "step": 20189 }, { "epoch": 0.4281987656677483, "grad_norm": 0.3080498278141022, "learning_rate": 1.783847276598258e-05, "loss": 0.4911, "step": 20190 }, { "epoch": 0.4282199741256813, "grad_norm": 0.38107118010520935, "learning_rate": 1.7838265676481728e-05, "loss": 0.4393, "step": 20191 }, { "epoch": 0.4282411825836143, "grad_norm": 0.3515058755874634, "learning_rate": 1.7838058578263223e-05, "loss": 0.5057, "step": 20192 }, { "epoch": 0.4282623910415474, "grad_norm": 0.3376673758029938, "learning_rate": 1.78378514713273e-05, "loss": 0.4869, "step": 20193 }, { "epoch": 0.4282835994994804, "grad_norm": 0.35193970799446106, "learning_rate": 1.7837644355674183e-05, "loss": 0.5183, "step": 20194 }, { "epoch": 0.42830480795741344, "grad_norm": 0.35948988795280457, "learning_rate": 1.783743723130411e-05, "loss": 0.546, "step": 20195 }, { "epoch": 0.42832601641534646, "grad_norm": 0.49333176016807556, "learning_rate": 1.7837230098217304e-05, "loss": 0.5285, "step": 20196 }, { "epoch": 0.4283472248732795, "grad_norm": 0.3482903838157654, "learning_rate": 1.7837022956414e-05, "loss": 0.5543, "step": 20197 }, { "epoch": 0.4283684333312125, "grad_norm": 0.36147382855415344, "learning_rate": 1.7836815805894427e-05, "loss": 0.5494, "step": 20198 }, { "epoch": 0.4283896417891455, "grad_norm": 0.37511155009269714, "learning_rate": 1.7836608646658816e-05, "loss": 0.4974, "step": 20199 }, { "epoch": 0.42841085024707853, "grad_norm": 0.904266893863678, "learning_rate": 1.78364014787074e-05, "loss": 0.6068, "step": 20200 }, { "epoch": 0.42843205870501155, "grad_norm": 0.40203580260276794, "learning_rate": 1.78361943020404e-05, "loss": 0.4569, "step": 20201 }, { "epoch": 0.4284532671629446, "grad_norm": 0.33794572949409485, "learning_rate": 1.7835987116658058e-05, "loss": 0.5045, "step": 20202 }, { "epoch": 0.4284744756208776, "grad_norm": 0.36404427886009216, "learning_rate": 1.7835779922560594e-05, "loss": 0.5024, "step": 20203 }, { "epoch": 0.4284956840788106, "grad_norm": 0.4047142565250397, "learning_rate": 1.783557271974825e-05, "loss": 0.5496, "step": 20204 }, { "epoch": 0.42851689253674363, "grad_norm": 0.3894149363040924, "learning_rate": 1.7835365508221245e-05, "loss": 0.4602, "step": 20205 }, { "epoch": 0.42853810099467665, "grad_norm": 0.453111469745636, "learning_rate": 1.7835158287979818e-05, "loss": 0.5502, "step": 20206 }, { "epoch": 0.4285593094526097, "grad_norm": 0.37262290716171265, "learning_rate": 1.7834951059024194e-05, "loss": 0.5531, "step": 20207 }, { "epoch": 0.42858051791054275, "grad_norm": 0.403492271900177, "learning_rate": 1.7834743821354606e-05, "loss": 0.4644, "step": 20208 }, { "epoch": 0.42860172636847577, "grad_norm": 0.3262181878089905, "learning_rate": 1.7834536574971283e-05, "loss": 0.5944, "step": 20209 }, { "epoch": 0.4286229348264088, "grad_norm": 0.3400998115539551, "learning_rate": 1.783432931987446e-05, "loss": 0.4987, "step": 20210 }, { "epoch": 0.4286441432843418, "grad_norm": 0.32992973923683167, "learning_rate": 1.7834122056064358e-05, "loss": 0.4992, "step": 20211 }, { "epoch": 0.4286653517422748, "grad_norm": 0.34485018253326416, "learning_rate": 1.7833914783541218e-05, "loss": 0.5469, "step": 20212 }, { "epoch": 0.42868656020020784, "grad_norm": 0.31823408603668213, "learning_rate": 1.7833707502305263e-05, "loss": 0.4917, "step": 20213 }, { "epoch": 0.42870776865814086, "grad_norm": 0.42616981267929077, "learning_rate": 1.783350021235673e-05, "loss": 0.4627, "step": 20214 }, { "epoch": 0.4287289771160739, "grad_norm": 0.37293314933776855, "learning_rate": 1.7833292913695845e-05, "loss": 0.5095, "step": 20215 }, { "epoch": 0.4287501855740069, "grad_norm": 0.3661377429962158, "learning_rate": 1.783308560632284e-05, "loss": 0.5161, "step": 20216 }, { "epoch": 0.4287713940319399, "grad_norm": 0.34132808446884155, "learning_rate": 1.7832878290237943e-05, "loss": 0.4583, "step": 20217 }, { "epoch": 0.42879260248987294, "grad_norm": 0.35193824768066406, "learning_rate": 1.783267096544139e-05, "loss": 0.5467, "step": 20218 }, { "epoch": 0.42881381094780596, "grad_norm": 0.39484137296676636, "learning_rate": 1.7832463631933404e-05, "loss": 0.5068, "step": 20219 }, { "epoch": 0.428835019405739, "grad_norm": 0.36508262157440186, "learning_rate": 1.7832256289714225e-05, "loss": 0.5222, "step": 20220 }, { "epoch": 0.42885622786367206, "grad_norm": 0.3273509740829468, "learning_rate": 1.7832048938784077e-05, "loss": 0.4967, "step": 20221 }, { "epoch": 0.4288774363216051, "grad_norm": 0.3720664381980896, "learning_rate": 1.783184157914319e-05, "loss": 0.5647, "step": 20222 }, { "epoch": 0.4288986447795381, "grad_norm": 0.3520810306072235, "learning_rate": 1.78316342107918e-05, "loss": 0.4716, "step": 20223 }, { "epoch": 0.4289198532374711, "grad_norm": 0.34130191802978516, "learning_rate": 1.7831426833730133e-05, "loss": 0.5761, "step": 20224 }, { "epoch": 0.42894106169540414, "grad_norm": 0.3738114535808563, "learning_rate": 1.783121944795842e-05, "loss": 0.5268, "step": 20225 }, { "epoch": 0.42896227015333716, "grad_norm": 0.35527554154396057, "learning_rate": 1.7831012053476895e-05, "loss": 0.4752, "step": 20226 }, { "epoch": 0.4289834786112702, "grad_norm": 0.3849584758281708, "learning_rate": 1.7830804650285784e-05, "loss": 0.4939, "step": 20227 }, { "epoch": 0.4290046870692032, "grad_norm": 0.31385040283203125, "learning_rate": 1.783059723838532e-05, "loss": 0.4902, "step": 20228 }, { "epoch": 0.4290258955271362, "grad_norm": 0.34678152203559875, "learning_rate": 1.7830389817775736e-05, "loss": 0.445, "step": 20229 }, { "epoch": 0.42904710398506923, "grad_norm": 0.3428395986557007, "learning_rate": 1.783018238845726e-05, "loss": 0.5232, "step": 20230 }, { "epoch": 0.42906831244300225, "grad_norm": 0.4630086123943329, "learning_rate": 1.7829974950430123e-05, "loss": 0.512, "step": 20231 }, { "epoch": 0.4290895209009353, "grad_norm": 0.3355872929096222, "learning_rate": 1.7829767503694552e-05, "loss": 0.4503, "step": 20232 }, { "epoch": 0.4291107293588683, "grad_norm": 0.3144253194332123, "learning_rate": 1.7829560048250788e-05, "loss": 0.4937, "step": 20233 }, { "epoch": 0.42913193781680137, "grad_norm": 0.3391219973564148, "learning_rate": 1.7829352584099054e-05, "loss": 0.4409, "step": 20234 }, { "epoch": 0.4291531462747344, "grad_norm": 0.4061241149902344, "learning_rate": 1.7829145111239578e-05, "loss": 0.4566, "step": 20235 }, { "epoch": 0.4291743547326674, "grad_norm": 0.3288092315196991, "learning_rate": 1.7828937629672596e-05, "loss": 0.4645, "step": 20236 }, { "epoch": 0.4291955631906004, "grad_norm": 0.3562493920326233, "learning_rate": 1.7828730139398343e-05, "loss": 0.4862, "step": 20237 }, { "epoch": 0.42921677164853345, "grad_norm": 0.32952481508255005, "learning_rate": 1.7828522640417038e-05, "loss": 0.4336, "step": 20238 }, { "epoch": 0.42923798010646647, "grad_norm": 0.37773552536964417, "learning_rate": 1.7828315132728922e-05, "loss": 0.4064, "step": 20239 }, { "epoch": 0.4292591885643995, "grad_norm": 0.38003435730934143, "learning_rate": 1.782810761633422e-05, "loss": 0.5553, "step": 20240 }, { "epoch": 0.4292803970223325, "grad_norm": 0.36679282784461975, "learning_rate": 1.7827900091233162e-05, "loss": 0.5049, "step": 20241 }, { "epoch": 0.4293016054802655, "grad_norm": 0.3478245735168457, "learning_rate": 1.7827692557425986e-05, "loss": 0.5108, "step": 20242 }, { "epoch": 0.42932281393819854, "grad_norm": 0.34966444969177246, "learning_rate": 1.7827485014912917e-05, "loss": 0.5199, "step": 20243 }, { "epoch": 0.42934402239613156, "grad_norm": 0.31670448184013367, "learning_rate": 1.782727746369419e-05, "loss": 0.5441, "step": 20244 }, { "epoch": 0.4293652308540646, "grad_norm": 0.3889015018939972, "learning_rate": 1.7827069903770026e-05, "loss": 0.6288, "step": 20245 }, { "epoch": 0.4293864393119976, "grad_norm": 0.3997800648212433, "learning_rate": 1.7826862335140667e-05, "loss": 0.4921, "step": 20246 }, { "epoch": 0.4294076477699306, "grad_norm": 0.33198466897010803, "learning_rate": 1.782665475780634e-05, "loss": 0.552, "step": 20247 }, { "epoch": 0.4294288562278637, "grad_norm": 0.3451066017150879, "learning_rate": 1.7826447171767275e-05, "loss": 0.451, "step": 20248 }, { "epoch": 0.4294500646857967, "grad_norm": 0.38430511951446533, "learning_rate": 1.7826239577023705e-05, "loss": 0.4798, "step": 20249 }, { "epoch": 0.42947127314372974, "grad_norm": 0.39860010147094727, "learning_rate": 1.7826031973575858e-05, "loss": 0.4764, "step": 20250 }, { "epoch": 0.42949248160166276, "grad_norm": 0.38416701555252075, "learning_rate": 1.7825824361423968e-05, "loss": 0.551, "step": 20251 }, { "epoch": 0.4295136900595958, "grad_norm": 0.40223780274391174, "learning_rate": 1.782561674056826e-05, "loss": 0.4579, "step": 20252 }, { "epoch": 0.4295348985175288, "grad_norm": 0.45376890897750854, "learning_rate": 1.7825409111008972e-05, "loss": 0.5943, "step": 20253 }, { "epoch": 0.4295561069754618, "grad_norm": 0.38455069065093994, "learning_rate": 1.782520147274633e-05, "loss": 0.5023, "step": 20254 }, { "epoch": 0.42957731543339484, "grad_norm": 0.3722836375236511, "learning_rate": 1.782499382578057e-05, "loss": 0.5389, "step": 20255 }, { "epoch": 0.42959852389132785, "grad_norm": 0.3227320909500122, "learning_rate": 1.7824786170111915e-05, "loss": 0.3983, "step": 20256 }, { "epoch": 0.4296197323492609, "grad_norm": 0.3535947799682617, "learning_rate": 1.7824578505740606e-05, "loss": 0.5116, "step": 20257 }, { "epoch": 0.4296409408071939, "grad_norm": 0.33834657073020935, "learning_rate": 1.7824370832666865e-05, "loss": 0.5131, "step": 20258 }, { "epoch": 0.4296621492651269, "grad_norm": 0.38679584860801697, "learning_rate": 1.782416315089093e-05, "loss": 0.4875, "step": 20259 }, { "epoch": 0.42968335772305993, "grad_norm": 0.35473597049713135, "learning_rate": 1.7823955460413027e-05, "loss": 0.5407, "step": 20260 }, { "epoch": 0.42970456618099295, "grad_norm": 0.31729355454444885, "learning_rate": 1.7823747761233387e-05, "loss": 0.4333, "step": 20261 }, { "epoch": 0.42972577463892603, "grad_norm": 0.31882429122924805, "learning_rate": 1.7823540053352246e-05, "loss": 0.5365, "step": 20262 }, { "epoch": 0.42974698309685905, "grad_norm": 0.3507173955440521, "learning_rate": 1.782333233676983e-05, "loss": 0.4634, "step": 20263 }, { "epoch": 0.42976819155479207, "grad_norm": 0.3253363370895386, "learning_rate": 1.782312461148637e-05, "loss": 0.4814, "step": 20264 }, { "epoch": 0.4297894000127251, "grad_norm": 0.40148216485977173, "learning_rate": 1.78229168775021e-05, "loss": 0.5219, "step": 20265 }, { "epoch": 0.4298106084706581, "grad_norm": 0.34007564187049866, "learning_rate": 1.7822709134817253e-05, "loss": 0.469, "step": 20266 }, { "epoch": 0.4298318169285911, "grad_norm": 0.3502475321292877, "learning_rate": 1.7822501383432053e-05, "loss": 0.4976, "step": 20267 }, { "epoch": 0.42985302538652415, "grad_norm": 0.3690849840641022, "learning_rate": 1.7822293623346736e-05, "loss": 0.526, "step": 20268 }, { "epoch": 0.42987423384445717, "grad_norm": 0.3471088409423828, "learning_rate": 1.7822085854561534e-05, "loss": 0.5309, "step": 20269 }, { "epoch": 0.4298954423023902, "grad_norm": 0.3708363175392151, "learning_rate": 1.7821878077076674e-05, "loss": 0.5855, "step": 20270 }, { "epoch": 0.4299166507603232, "grad_norm": 0.3448556065559387, "learning_rate": 1.782167029089239e-05, "loss": 0.4992, "step": 20271 }, { "epoch": 0.4299378592182562, "grad_norm": 0.34966713190078735, "learning_rate": 1.7821462496008912e-05, "loss": 0.4387, "step": 20272 }, { "epoch": 0.42995906767618924, "grad_norm": 0.35538986325263977, "learning_rate": 1.782125469242647e-05, "loss": 0.5169, "step": 20273 }, { "epoch": 0.42998027613412226, "grad_norm": 0.292681485414505, "learning_rate": 1.7821046880145298e-05, "loss": 0.4066, "step": 20274 }, { "epoch": 0.43000148459205534, "grad_norm": 0.37821853160858154, "learning_rate": 1.7820839059165625e-05, "loss": 0.5266, "step": 20275 }, { "epoch": 0.43002269304998836, "grad_norm": 0.3527653217315674, "learning_rate": 1.782063122948768e-05, "loss": 0.5382, "step": 20276 }, { "epoch": 0.4300439015079214, "grad_norm": 0.39135539531707764, "learning_rate": 1.78204233911117e-05, "loss": 0.5392, "step": 20277 }, { "epoch": 0.4300651099658544, "grad_norm": 0.42332735657691956, "learning_rate": 1.7820215544037915e-05, "loss": 0.4657, "step": 20278 }, { "epoch": 0.4300863184237874, "grad_norm": 0.45032623410224915, "learning_rate": 1.7820007688266553e-05, "loss": 0.5613, "step": 20279 }, { "epoch": 0.43010752688172044, "grad_norm": 0.34682855010032654, "learning_rate": 1.7819799823797847e-05, "loss": 0.5134, "step": 20280 }, { "epoch": 0.43012873533965346, "grad_norm": 0.3523239493370056, "learning_rate": 1.7819591950632022e-05, "loss": 0.5693, "step": 20281 }, { "epoch": 0.4301499437975865, "grad_norm": 0.35955047607421875, "learning_rate": 1.781938406876932e-05, "loss": 0.4963, "step": 20282 }, { "epoch": 0.4301711522555195, "grad_norm": 0.3782859146595001, "learning_rate": 1.7819176178209964e-05, "loss": 0.5249, "step": 20283 }, { "epoch": 0.4301923607134525, "grad_norm": 0.3439931869506836, "learning_rate": 1.7818968278954193e-05, "loss": 0.5126, "step": 20284 }, { "epoch": 0.43021356917138553, "grad_norm": 0.39955055713653564, "learning_rate": 1.781876037100223e-05, "loss": 0.5435, "step": 20285 }, { "epoch": 0.43023477762931855, "grad_norm": 0.4023245871067047, "learning_rate": 1.781855245435431e-05, "loss": 0.4955, "step": 20286 }, { "epoch": 0.4302559860872516, "grad_norm": 0.36072370409965515, "learning_rate": 1.7818344529010662e-05, "loss": 0.5586, "step": 20287 }, { "epoch": 0.4302771945451846, "grad_norm": 0.36917293071746826, "learning_rate": 1.7818136594971524e-05, "loss": 0.4811, "step": 20288 }, { "epoch": 0.43029840300311767, "grad_norm": 0.36598578095436096, "learning_rate": 1.781792865223712e-05, "loss": 0.5096, "step": 20289 }, { "epoch": 0.4303196114610507, "grad_norm": 0.33627086877822876, "learning_rate": 1.781772070080768e-05, "loss": 0.53, "step": 20290 }, { "epoch": 0.4303408199189837, "grad_norm": 0.3310908079147339, "learning_rate": 1.781751274068344e-05, "loss": 0.5628, "step": 20291 }, { "epoch": 0.4303620283769167, "grad_norm": 0.3526766300201416, "learning_rate": 1.7817304771864635e-05, "loss": 0.5017, "step": 20292 }, { "epoch": 0.43038323683484975, "grad_norm": 0.34172287583351135, "learning_rate": 1.7817096794351487e-05, "loss": 0.5129, "step": 20293 }, { "epoch": 0.43040444529278277, "grad_norm": 0.3257095515727997, "learning_rate": 1.7816888808144234e-05, "loss": 0.4843, "step": 20294 }, { "epoch": 0.4304256537507158, "grad_norm": 0.32337677478790283, "learning_rate": 1.7816680813243102e-05, "loss": 0.4161, "step": 20295 }, { "epoch": 0.4304468622086488, "grad_norm": 0.36598360538482666, "learning_rate": 1.781647280964833e-05, "loss": 0.5145, "step": 20296 }, { "epoch": 0.4304680706665818, "grad_norm": 0.4000491797924042, "learning_rate": 1.7816264797360142e-05, "loss": 0.4773, "step": 20297 }, { "epoch": 0.43048927912451485, "grad_norm": 0.3546014130115509, "learning_rate": 1.7816056776378775e-05, "loss": 0.5417, "step": 20298 }, { "epoch": 0.43051048758244786, "grad_norm": 0.4453003406524658, "learning_rate": 1.7815848746704457e-05, "loss": 0.5224, "step": 20299 }, { "epoch": 0.4305316960403809, "grad_norm": 0.3454114496707916, "learning_rate": 1.7815640708337416e-05, "loss": 0.4659, "step": 20300 }, { "epoch": 0.4305529044983139, "grad_norm": 0.3314896821975708, "learning_rate": 1.7815432661277893e-05, "loss": 0.4109, "step": 20301 }, { "epoch": 0.4305741129562469, "grad_norm": 0.3523807227611542, "learning_rate": 1.781522460552611e-05, "loss": 0.5306, "step": 20302 }, { "epoch": 0.43059532141418, "grad_norm": 0.3446660339832306, "learning_rate": 1.78150165410823e-05, "loss": 0.5529, "step": 20303 }, { "epoch": 0.430616529872113, "grad_norm": 0.35482823848724365, "learning_rate": 1.7814808467946702e-05, "loss": 0.407, "step": 20304 }, { "epoch": 0.43063773833004604, "grad_norm": 0.3521769344806671, "learning_rate": 1.781460038611954e-05, "loss": 0.4917, "step": 20305 }, { "epoch": 0.43065894678797906, "grad_norm": 0.35442718863487244, "learning_rate": 1.7814392295601046e-05, "loss": 0.5125, "step": 20306 }, { "epoch": 0.4306801552459121, "grad_norm": 0.32761532068252563, "learning_rate": 1.7814184196391452e-05, "loss": 0.4355, "step": 20307 }, { "epoch": 0.4307013637038451, "grad_norm": 0.3984043002128601, "learning_rate": 1.7813976088490993e-05, "loss": 0.495, "step": 20308 }, { "epoch": 0.4307225721617781, "grad_norm": 0.3462705612182617, "learning_rate": 1.7813767971899897e-05, "loss": 0.5177, "step": 20309 }, { "epoch": 0.43074378061971114, "grad_norm": 0.34432652592658997, "learning_rate": 1.7813559846618392e-05, "loss": 0.4912, "step": 20310 }, { "epoch": 0.43076498907764416, "grad_norm": 0.3438534438610077, "learning_rate": 1.781335171264672e-05, "loss": 0.5697, "step": 20311 }, { "epoch": 0.4307861975355772, "grad_norm": 0.4148576557636261, "learning_rate": 1.78131435699851e-05, "loss": 0.4783, "step": 20312 }, { "epoch": 0.4308074059935102, "grad_norm": 0.41150301694869995, "learning_rate": 1.7812935418633775e-05, "loss": 0.4643, "step": 20313 }, { "epoch": 0.4308286144514432, "grad_norm": 0.3246331214904785, "learning_rate": 1.781272725859297e-05, "loss": 0.4544, "step": 20314 }, { "epoch": 0.43084982290937623, "grad_norm": 0.4731524586677551, "learning_rate": 1.7812519089862913e-05, "loss": 0.4222, "step": 20315 }, { "epoch": 0.4308710313673093, "grad_norm": 0.3965209722518921, "learning_rate": 1.7812310912443846e-05, "loss": 0.4777, "step": 20316 }, { "epoch": 0.43089223982524233, "grad_norm": 0.304146409034729, "learning_rate": 1.781210272633599e-05, "loss": 0.4156, "step": 20317 }, { "epoch": 0.43091344828317535, "grad_norm": 0.3244530260562897, "learning_rate": 1.7811894531539583e-05, "loss": 0.5131, "step": 20318 }, { "epoch": 0.43093465674110837, "grad_norm": 0.36926761269569397, "learning_rate": 1.7811686328054857e-05, "loss": 0.4668, "step": 20319 }, { "epoch": 0.4309558651990414, "grad_norm": 0.33706560730934143, "learning_rate": 1.781147811588204e-05, "loss": 0.4529, "step": 20320 }, { "epoch": 0.4309770736569744, "grad_norm": 0.3598109185695648, "learning_rate": 1.7811269895021364e-05, "loss": 0.5302, "step": 20321 }, { "epoch": 0.4309982821149074, "grad_norm": 0.36130791902542114, "learning_rate": 1.781106166547306e-05, "loss": 0.5259, "step": 20322 }, { "epoch": 0.43101949057284045, "grad_norm": 0.33626559376716614, "learning_rate": 1.7810853427237365e-05, "loss": 0.4234, "step": 20323 }, { "epoch": 0.43104069903077347, "grad_norm": 0.37994667887687683, "learning_rate": 1.7810645180314505e-05, "loss": 0.6114, "step": 20324 }, { "epoch": 0.4310619074887065, "grad_norm": 0.4029989540576935, "learning_rate": 1.7810436924704713e-05, "loss": 0.428, "step": 20325 }, { "epoch": 0.4310831159466395, "grad_norm": 0.34424734115600586, "learning_rate": 1.781022866040822e-05, "loss": 0.5212, "step": 20326 }, { "epoch": 0.4311043244045725, "grad_norm": 0.3322567939758301, "learning_rate": 1.781002038742526e-05, "loss": 0.4761, "step": 20327 }, { "epoch": 0.43112553286250554, "grad_norm": 0.3335080146789551, "learning_rate": 1.780981210575606e-05, "loss": 0.5345, "step": 20328 }, { "epoch": 0.43114674132043856, "grad_norm": 0.3406962752342224, "learning_rate": 1.780960381540086e-05, "loss": 0.5105, "step": 20329 }, { "epoch": 0.43116794977837164, "grad_norm": 0.37608903646469116, "learning_rate": 1.7809395516359883e-05, "loss": 0.4304, "step": 20330 }, { "epoch": 0.43118915823630466, "grad_norm": 0.32012149691581726, "learning_rate": 1.7809187208633366e-05, "loss": 0.437, "step": 20331 }, { "epoch": 0.4312103666942377, "grad_norm": 0.30043092370033264, "learning_rate": 1.7808978892221537e-05, "loss": 0.4079, "step": 20332 }, { "epoch": 0.4312315751521707, "grad_norm": 0.33780062198638916, "learning_rate": 1.780877056712463e-05, "loss": 0.454, "step": 20333 }, { "epoch": 0.4312527836101037, "grad_norm": 0.4514021575450897, "learning_rate": 1.7808562233342876e-05, "loss": 0.496, "step": 20334 }, { "epoch": 0.43127399206803674, "grad_norm": 0.4248078763484955, "learning_rate": 1.780835389087651e-05, "loss": 0.6039, "step": 20335 }, { "epoch": 0.43129520052596976, "grad_norm": 0.3540711998939514, "learning_rate": 1.7808145539725755e-05, "loss": 0.603, "step": 20336 }, { "epoch": 0.4313164089839028, "grad_norm": 0.3909555971622467, "learning_rate": 1.7807937179890852e-05, "loss": 0.4957, "step": 20337 }, { "epoch": 0.4313376174418358, "grad_norm": 0.40287676453590393, "learning_rate": 1.7807728811372026e-05, "loss": 0.5771, "step": 20338 }, { "epoch": 0.4313588258997688, "grad_norm": 0.35660091042518616, "learning_rate": 1.7807520434169515e-05, "loss": 0.5194, "step": 20339 }, { "epoch": 0.43138003435770184, "grad_norm": 0.3648773729801178, "learning_rate": 1.780731204828355e-05, "loss": 0.4991, "step": 20340 }, { "epoch": 0.43140124281563486, "grad_norm": 0.3976471424102783, "learning_rate": 1.7807103653714356e-05, "loss": 0.5457, "step": 20341 }, { "epoch": 0.4314224512735679, "grad_norm": 0.3146652579307556, "learning_rate": 1.780689525046217e-05, "loss": 0.4794, "step": 20342 }, { "epoch": 0.43144365973150095, "grad_norm": 0.3620522916316986, "learning_rate": 1.7806686838527224e-05, "loss": 0.5661, "step": 20343 }, { "epoch": 0.43146486818943397, "grad_norm": 0.36829015612602234, "learning_rate": 1.780647841790975e-05, "loss": 0.4741, "step": 20344 }, { "epoch": 0.431486076647367, "grad_norm": 0.3543076813220978, "learning_rate": 1.7806269988609976e-05, "loss": 0.514, "step": 20345 }, { "epoch": 0.4315072851053, "grad_norm": 0.3643830418586731, "learning_rate": 1.7806061550628135e-05, "loss": 0.4562, "step": 20346 }, { "epoch": 0.43152849356323303, "grad_norm": 0.3146525025367737, "learning_rate": 1.7805853103964465e-05, "loss": 0.4899, "step": 20347 }, { "epoch": 0.43154970202116605, "grad_norm": 0.32013508677482605, "learning_rate": 1.780564464861919e-05, "loss": 0.4711, "step": 20348 }, { "epoch": 0.43157091047909907, "grad_norm": 0.35123196244239807, "learning_rate": 1.7805436184592544e-05, "loss": 0.5787, "step": 20349 }, { "epoch": 0.4315921189370321, "grad_norm": 0.3400678336620331, "learning_rate": 1.780522771188476e-05, "loss": 0.5169, "step": 20350 }, { "epoch": 0.4316133273949651, "grad_norm": 0.35121461749076843, "learning_rate": 1.7805019230496068e-05, "loss": 0.4099, "step": 20351 }, { "epoch": 0.4316345358528981, "grad_norm": 0.34704241156578064, "learning_rate": 1.7804810740426702e-05, "loss": 0.4411, "step": 20352 }, { "epoch": 0.43165574431083115, "grad_norm": 0.30911409854888916, "learning_rate": 1.78046022416769e-05, "loss": 0.4598, "step": 20353 }, { "epoch": 0.43167695276876417, "grad_norm": 0.4029744267463684, "learning_rate": 1.780439373424688e-05, "loss": 0.632, "step": 20354 }, { "epoch": 0.4316981612266972, "grad_norm": 0.3330520987510681, "learning_rate": 1.7804185218136883e-05, "loss": 0.4849, "step": 20355 }, { "epoch": 0.4317193696846302, "grad_norm": 0.3988500237464905, "learning_rate": 1.780397669334714e-05, "loss": 0.5467, "step": 20356 }, { "epoch": 0.4317405781425633, "grad_norm": 0.32240185141563416, "learning_rate": 1.7803768159877882e-05, "loss": 0.5108, "step": 20357 }, { "epoch": 0.4317617866004963, "grad_norm": 0.4074997007846832, "learning_rate": 1.780355961772934e-05, "loss": 0.5275, "step": 20358 }, { "epoch": 0.4317829950584293, "grad_norm": 0.3518291115760803, "learning_rate": 1.7803351066901744e-05, "loss": 0.5336, "step": 20359 }, { "epoch": 0.43180420351636234, "grad_norm": 0.4425305426120758, "learning_rate": 1.780314250739533e-05, "loss": 0.5256, "step": 20360 }, { "epoch": 0.43182541197429536, "grad_norm": 0.3376268446445465, "learning_rate": 1.780293393921033e-05, "loss": 0.5117, "step": 20361 }, { "epoch": 0.4318466204322284, "grad_norm": 0.35139864683151245, "learning_rate": 1.7802725362346976e-05, "loss": 0.4717, "step": 20362 }, { "epoch": 0.4318678288901614, "grad_norm": 0.4364766478538513, "learning_rate": 1.7802516776805498e-05, "loss": 0.5243, "step": 20363 }, { "epoch": 0.4318890373480944, "grad_norm": 0.39146682620048523, "learning_rate": 1.7802308182586127e-05, "loss": 0.5211, "step": 20364 }, { "epoch": 0.43191024580602744, "grad_norm": 0.45888710021972656, "learning_rate": 1.78020995796891e-05, "loss": 0.5663, "step": 20365 }, { "epoch": 0.43193145426396046, "grad_norm": 0.560111939907074, "learning_rate": 1.7801890968114643e-05, "loss": 0.528, "step": 20366 }, { "epoch": 0.4319526627218935, "grad_norm": 0.3233947157859802, "learning_rate": 1.780168234786299e-05, "loss": 0.457, "step": 20367 }, { "epoch": 0.4319738711798265, "grad_norm": 0.3719298541545868, "learning_rate": 1.7801473718934374e-05, "loss": 0.5201, "step": 20368 }, { "epoch": 0.4319950796377595, "grad_norm": 0.3334098160266876, "learning_rate": 1.7801265081329027e-05, "loss": 0.4886, "step": 20369 }, { "epoch": 0.43201628809569254, "grad_norm": 0.33331096172332764, "learning_rate": 1.780105643504718e-05, "loss": 0.5394, "step": 20370 }, { "epoch": 0.4320374965536256, "grad_norm": 0.38408204913139343, "learning_rate": 1.7800847780089066e-05, "loss": 0.5037, "step": 20371 }, { "epoch": 0.43205870501155863, "grad_norm": 0.36774811148643494, "learning_rate": 1.7800639116454918e-05, "loss": 0.5109, "step": 20372 }, { "epoch": 0.43207991346949165, "grad_norm": 0.31080564856529236, "learning_rate": 1.7800430444144967e-05, "loss": 0.5582, "step": 20373 }, { "epoch": 0.43210112192742467, "grad_norm": 0.3918769657611847, "learning_rate": 1.7800221763159447e-05, "loss": 0.5676, "step": 20374 }, { "epoch": 0.4321223303853577, "grad_norm": 0.34906259179115295, "learning_rate": 1.7800013073498585e-05, "loss": 0.5003, "step": 20375 }, { "epoch": 0.4321435388432907, "grad_norm": 0.3582065999507904, "learning_rate": 1.7799804375162618e-05, "loss": 0.5028, "step": 20376 }, { "epoch": 0.43216474730122373, "grad_norm": 0.4051508903503418, "learning_rate": 1.7799595668151774e-05, "loss": 0.5322, "step": 20377 }, { "epoch": 0.43218595575915675, "grad_norm": 0.3899625241756439, "learning_rate": 1.779938695246629e-05, "loss": 0.5191, "step": 20378 }, { "epoch": 0.43220716421708977, "grad_norm": 0.3466816842556, "learning_rate": 1.7799178228106393e-05, "loss": 0.5527, "step": 20379 }, { "epoch": 0.4322283726750228, "grad_norm": 0.4151563346385956, "learning_rate": 1.779896949507232e-05, "loss": 0.4926, "step": 20380 }, { "epoch": 0.4322495811329558, "grad_norm": 0.3468261957168579, "learning_rate": 1.77987607533643e-05, "loss": 0.4304, "step": 20381 }, { "epoch": 0.4322707895908888, "grad_norm": 0.3064303398132324, "learning_rate": 1.7798552002982567e-05, "loss": 0.4543, "step": 20382 }, { "epoch": 0.43229199804882185, "grad_norm": 0.33596956729888916, "learning_rate": 1.779834324392735e-05, "loss": 0.467, "step": 20383 }, { "epoch": 0.4323132065067549, "grad_norm": 0.3477616012096405, "learning_rate": 1.779813447619888e-05, "loss": 0.5632, "step": 20384 }, { "epoch": 0.43233441496468794, "grad_norm": 0.3092769384384155, "learning_rate": 1.77979256997974e-05, "loss": 0.4574, "step": 20385 }, { "epoch": 0.43235562342262096, "grad_norm": 0.37344688177108765, "learning_rate": 1.7797716914723132e-05, "loss": 0.4862, "step": 20386 }, { "epoch": 0.432376831880554, "grad_norm": 0.3162359297275543, "learning_rate": 1.7797508120976314e-05, "loss": 0.4469, "step": 20387 }, { "epoch": 0.432398040338487, "grad_norm": 0.33536145091056824, "learning_rate": 1.779729931855717e-05, "loss": 0.4869, "step": 20388 }, { "epoch": 0.43241924879642, "grad_norm": 0.35350140929222107, "learning_rate": 1.7797090507465938e-05, "loss": 0.4626, "step": 20389 }, { "epoch": 0.43244045725435304, "grad_norm": 0.3464275002479553, "learning_rate": 1.7796881687702852e-05, "loss": 0.4899, "step": 20390 }, { "epoch": 0.43246166571228606, "grad_norm": 0.35962894558906555, "learning_rate": 1.7796672859268144e-05, "loss": 0.5367, "step": 20391 }, { "epoch": 0.4324828741702191, "grad_norm": 0.3507644832134247, "learning_rate": 1.7796464022162037e-05, "loss": 0.4072, "step": 20392 }, { "epoch": 0.4325040826281521, "grad_norm": 0.36592981219291687, "learning_rate": 1.7796255176384777e-05, "loss": 0.4844, "step": 20393 }, { "epoch": 0.4325252910860851, "grad_norm": 0.3200909495353699, "learning_rate": 1.779604632193659e-05, "loss": 0.4725, "step": 20394 }, { "epoch": 0.43254649954401814, "grad_norm": 0.3136528432369232, "learning_rate": 1.7795837458817705e-05, "loss": 0.5266, "step": 20395 }, { "epoch": 0.43256770800195116, "grad_norm": 0.4421301484107971, "learning_rate": 1.7795628587028354e-05, "loss": 0.5044, "step": 20396 }, { "epoch": 0.4325889164598842, "grad_norm": 0.36862361431121826, "learning_rate": 1.7795419706568777e-05, "loss": 0.4849, "step": 20397 }, { "epoch": 0.43261012491781725, "grad_norm": 0.3187999725341797, "learning_rate": 1.77952108174392e-05, "loss": 0.5419, "step": 20398 }, { "epoch": 0.43263133337575027, "grad_norm": 0.32213878631591797, "learning_rate": 1.779500191963986e-05, "loss": 0.4541, "step": 20399 }, { "epoch": 0.4326525418336833, "grad_norm": 0.326052725315094, "learning_rate": 1.7794793013170985e-05, "loss": 0.4389, "step": 20400 }, { "epoch": 0.4326737502916163, "grad_norm": 0.34742528200149536, "learning_rate": 1.7794584098032812e-05, "loss": 0.5132, "step": 20401 }, { "epoch": 0.43269495874954933, "grad_norm": 0.3434737026691437, "learning_rate": 1.7794375174225565e-05, "loss": 0.5549, "step": 20402 }, { "epoch": 0.43271616720748235, "grad_norm": 0.35841724276542664, "learning_rate": 1.7794166241749485e-05, "loss": 0.4808, "step": 20403 }, { "epoch": 0.43273737566541537, "grad_norm": 0.39329543709754944, "learning_rate": 1.77939573006048e-05, "loss": 0.5687, "step": 20404 }, { "epoch": 0.4327585841233484, "grad_norm": 0.3712315261363983, "learning_rate": 1.7793748350791743e-05, "loss": 0.5, "step": 20405 }, { "epoch": 0.4327797925812814, "grad_norm": 0.34511932730674744, "learning_rate": 1.7793539392310548e-05, "loss": 0.55, "step": 20406 }, { "epoch": 0.43280100103921443, "grad_norm": 0.3509238660335541, "learning_rate": 1.7793330425161442e-05, "loss": 0.5407, "step": 20407 }, { "epoch": 0.43282220949714745, "grad_norm": 0.33823543787002563, "learning_rate": 1.7793121449344668e-05, "loss": 0.4947, "step": 20408 }, { "epoch": 0.43284341795508047, "grad_norm": 0.36228707432746887, "learning_rate": 1.7792912464860446e-05, "loss": 0.5602, "step": 20409 }, { "epoch": 0.4328646264130135, "grad_norm": 0.3583209812641144, "learning_rate": 1.7792703471709018e-05, "loss": 0.5987, "step": 20410 }, { "epoch": 0.4328858348709465, "grad_norm": 0.30298930406570435, "learning_rate": 1.7792494469890616e-05, "loss": 0.4791, "step": 20411 }, { "epoch": 0.4329070433288796, "grad_norm": 0.467105895280838, "learning_rate": 1.7792285459405464e-05, "loss": 0.4938, "step": 20412 }, { "epoch": 0.4329282517868126, "grad_norm": 0.3627249300479889, "learning_rate": 1.7792076440253804e-05, "loss": 0.5024, "step": 20413 }, { "epoch": 0.4329494602447456, "grad_norm": 0.3356797695159912, "learning_rate": 1.7791867412435858e-05, "loss": 0.5545, "step": 20414 }, { "epoch": 0.43297066870267864, "grad_norm": 0.39851656556129456, "learning_rate": 1.779165837595187e-05, "loss": 0.4801, "step": 20415 }, { "epoch": 0.43299187716061166, "grad_norm": 0.34820976853370667, "learning_rate": 1.7791449330802068e-05, "loss": 0.5682, "step": 20416 }, { "epoch": 0.4330130856185447, "grad_norm": 0.3876070976257324, "learning_rate": 1.7791240276986678e-05, "loss": 0.4205, "step": 20417 }, { "epoch": 0.4330342940764777, "grad_norm": 0.35483860969543457, "learning_rate": 1.779103121450594e-05, "loss": 0.4648, "step": 20418 }, { "epoch": 0.4330555025344107, "grad_norm": 0.36388352513313293, "learning_rate": 1.779082214336009e-05, "loss": 0.4856, "step": 20419 }, { "epoch": 0.43307671099234374, "grad_norm": 0.32526010274887085, "learning_rate": 1.7790613063549353e-05, "loss": 0.4264, "step": 20420 }, { "epoch": 0.43309791945027676, "grad_norm": 0.3161350190639496, "learning_rate": 1.779040397507396e-05, "loss": 0.4205, "step": 20421 }, { "epoch": 0.4331191279082098, "grad_norm": 0.30884477496147156, "learning_rate": 1.779019487793415e-05, "loss": 0.4219, "step": 20422 }, { "epoch": 0.4331403363661428, "grad_norm": 0.32851138710975647, "learning_rate": 1.7789985772130155e-05, "loss": 0.4055, "step": 20423 }, { "epoch": 0.4331615448240758, "grad_norm": 0.35472509264945984, "learning_rate": 1.7789776657662203e-05, "loss": 0.4916, "step": 20424 }, { "epoch": 0.4331827532820089, "grad_norm": 0.38649675250053406, "learning_rate": 1.778956753453053e-05, "loss": 0.496, "step": 20425 }, { "epoch": 0.4332039617399419, "grad_norm": 0.30911684036254883, "learning_rate": 1.7789358402735365e-05, "loss": 0.5321, "step": 20426 }, { "epoch": 0.43322517019787493, "grad_norm": 0.30357879400253296, "learning_rate": 1.7789149262276945e-05, "loss": 0.4776, "step": 20427 }, { "epoch": 0.43324637865580795, "grad_norm": 0.34567779302597046, "learning_rate": 1.7788940113155502e-05, "loss": 0.4831, "step": 20428 }, { "epoch": 0.43326758711374097, "grad_norm": 0.430589497089386, "learning_rate": 1.7788730955371267e-05, "loss": 0.5234, "step": 20429 }, { "epoch": 0.433288795571674, "grad_norm": 0.3429277241230011, "learning_rate": 1.7788521788924473e-05, "loss": 0.4247, "step": 20430 }, { "epoch": 0.433310004029607, "grad_norm": 0.36805984377861023, "learning_rate": 1.778831261381535e-05, "loss": 0.54, "step": 20431 }, { "epoch": 0.43333121248754003, "grad_norm": 0.3359387516975403, "learning_rate": 1.7788103430044138e-05, "loss": 0.4614, "step": 20432 }, { "epoch": 0.43335242094547305, "grad_norm": 0.7856115102767944, "learning_rate": 1.7787894237611065e-05, "loss": 0.503, "step": 20433 }, { "epoch": 0.43337362940340607, "grad_norm": 0.3372945487499237, "learning_rate": 1.778768503651636e-05, "loss": 0.4775, "step": 20434 }, { "epoch": 0.4333948378613391, "grad_norm": 0.40179526805877686, "learning_rate": 1.778747582676026e-05, "loss": 0.5897, "step": 20435 }, { "epoch": 0.4334160463192721, "grad_norm": 0.3478943705558777, "learning_rate": 1.7787266608342998e-05, "loss": 0.5042, "step": 20436 }, { "epoch": 0.4334372547772051, "grad_norm": 0.32977351546287537, "learning_rate": 1.7787057381264803e-05, "loss": 0.4978, "step": 20437 }, { "epoch": 0.43345846323513815, "grad_norm": 0.32660576701164246, "learning_rate": 1.7786848145525917e-05, "loss": 0.4843, "step": 20438 }, { "epoch": 0.4334796716930712, "grad_norm": 0.3477761447429657, "learning_rate": 1.7786638901126562e-05, "loss": 0.4742, "step": 20439 }, { "epoch": 0.43350088015100424, "grad_norm": 0.3234926164150238, "learning_rate": 1.7786429648066974e-05, "loss": 0.4635, "step": 20440 }, { "epoch": 0.43352208860893726, "grad_norm": 0.31044235825538635, "learning_rate": 1.7786220386347388e-05, "loss": 0.4803, "step": 20441 }, { "epoch": 0.4335432970668703, "grad_norm": 0.32424643635749817, "learning_rate": 1.7786011115968035e-05, "loss": 0.4763, "step": 20442 }, { "epoch": 0.4335645055248033, "grad_norm": 0.35689687728881836, "learning_rate": 1.778580183692915e-05, "loss": 0.4913, "step": 20443 }, { "epoch": 0.4335857139827363, "grad_norm": 0.3434414565563202, "learning_rate": 1.7785592549230962e-05, "loss": 0.4403, "step": 20444 }, { "epoch": 0.43360692244066934, "grad_norm": 0.35399749875068665, "learning_rate": 1.7785383252873705e-05, "loss": 0.4578, "step": 20445 }, { "epoch": 0.43362813089860236, "grad_norm": 0.4131067395210266, "learning_rate": 1.7785173947857613e-05, "loss": 0.5456, "step": 20446 }, { "epoch": 0.4336493393565354, "grad_norm": 0.33266597986221313, "learning_rate": 1.7784964634182917e-05, "loss": 0.549, "step": 20447 }, { "epoch": 0.4336705478144684, "grad_norm": 0.35729026794433594, "learning_rate": 1.7784755311849853e-05, "loss": 0.4978, "step": 20448 }, { "epoch": 0.4336917562724014, "grad_norm": 0.349983811378479, "learning_rate": 1.7784545980858653e-05, "loss": 0.5078, "step": 20449 }, { "epoch": 0.43371296473033444, "grad_norm": 0.38872212171554565, "learning_rate": 1.7784336641209545e-05, "loss": 0.4952, "step": 20450 }, { "epoch": 0.43373417318826746, "grad_norm": 0.3188733458518982, "learning_rate": 1.7784127292902765e-05, "loss": 0.434, "step": 20451 }, { "epoch": 0.4337553816462005, "grad_norm": 0.3478366732597351, "learning_rate": 1.778391793593855e-05, "loss": 0.4971, "step": 20452 }, { "epoch": 0.43377659010413355, "grad_norm": 0.3249936103820801, "learning_rate": 1.7783708570317125e-05, "loss": 0.4998, "step": 20453 }, { "epoch": 0.43379779856206657, "grad_norm": 0.32855284214019775, "learning_rate": 1.7783499196038732e-05, "loss": 0.5342, "step": 20454 }, { "epoch": 0.4338190070199996, "grad_norm": 0.33013275265693665, "learning_rate": 1.7783289813103595e-05, "loss": 0.5657, "step": 20455 }, { "epoch": 0.4338402154779326, "grad_norm": 0.33446601033210754, "learning_rate": 1.778308042151195e-05, "loss": 0.4273, "step": 20456 }, { "epoch": 0.43386142393586563, "grad_norm": 0.3487536609172821, "learning_rate": 1.7782871021264033e-05, "loss": 0.5191, "step": 20457 }, { "epoch": 0.43388263239379865, "grad_norm": 0.3319748342037201, "learning_rate": 1.7782661612360072e-05, "loss": 0.5266, "step": 20458 }, { "epoch": 0.43390384085173167, "grad_norm": 0.36829957365989685, "learning_rate": 1.7782452194800305e-05, "loss": 0.38, "step": 20459 }, { "epoch": 0.4339250493096647, "grad_norm": 0.3486138880252838, "learning_rate": 1.778224276858496e-05, "loss": 0.5016, "step": 20460 }, { "epoch": 0.4339462577675977, "grad_norm": 0.3367615044116974, "learning_rate": 1.7782033333714275e-05, "loss": 0.4955, "step": 20461 }, { "epoch": 0.43396746622553073, "grad_norm": 0.3265536427497864, "learning_rate": 1.7781823890188482e-05, "loss": 0.4328, "step": 20462 }, { "epoch": 0.43398867468346375, "grad_norm": 0.34979039430618286, "learning_rate": 1.7781614438007806e-05, "loss": 0.4654, "step": 20463 }, { "epoch": 0.43400988314139677, "grad_norm": 0.3511177897453308, "learning_rate": 1.778140497717249e-05, "loss": 0.4749, "step": 20464 }, { "epoch": 0.4340310915993298, "grad_norm": 0.5184397101402283, "learning_rate": 1.778119550768276e-05, "loss": 0.5198, "step": 20465 }, { "epoch": 0.43405230005726286, "grad_norm": 0.34085845947265625, "learning_rate": 1.7780986029538858e-05, "loss": 0.4889, "step": 20466 }, { "epoch": 0.4340735085151959, "grad_norm": 0.41989046335220337, "learning_rate": 1.7780776542741e-05, "loss": 0.5424, "step": 20467 }, { "epoch": 0.4340947169731289, "grad_norm": 0.36728253960609436, "learning_rate": 1.778056704728944e-05, "loss": 0.5119, "step": 20468 }, { "epoch": 0.4341159254310619, "grad_norm": 0.33426135778427124, "learning_rate": 1.7780357543184396e-05, "loss": 0.4222, "step": 20469 }, { "epoch": 0.43413713388899494, "grad_norm": 0.36231064796447754, "learning_rate": 1.778014803042611e-05, "loss": 0.5051, "step": 20470 }, { "epoch": 0.43415834234692796, "grad_norm": 0.36446163058280945, "learning_rate": 1.7779938509014807e-05, "loss": 0.5748, "step": 20471 }, { "epoch": 0.434179550804861, "grad_norm": 0.3529477119445801, "learning_rate": 1.7779728978950724e-05, "loss": 0.4811, "step": 20472 }, { "epoch": 0.434200759262794, "grad_norm": 0.33362480998039246, "learning_rate": 1.7779519440234095e-05, "loss": 0.5294, "step": 20473 }, { "epoch": 0.434221967720727, "grad_norm": 0.5753158926963806, "learning_rate": 1.7779309892865154e-05, "loss": 0.4756, "step": 20474 }, { "epoch": 0.43424317617866004, "grad_norm": 0.37413978576660156, "learning_rate": 1.777910033684413e-05, "loss": 0.5478, "step": 20475 }, { "epoch": 0.43426438463659306, "grad_norm": 0.43446820974349976, "learning_rate": 1.7778890772171257e-05, "loss": 0.6115, "step": 20476 }, { "epoch": 0.4342855930945261, "grad_norm": 0.31302621960639954, "learning_rate": 1.7778681198846772e-05, "loss": 0.4579, "step": 20477 }, { "epoch": 0.4343068015524591, "grad_norm": 0.41378432512283325, "learning_rate": 1.7778471616870904e-05, "loss": 0.588, "step": 20478 }, { "epoch": 0.4343280100103921, "grad_norm": 0.318832665681839, "learning_rate": 1.7778262026243888e-05, "loss": 0.5407, "step": 20479 }, { "epoch": 0.4343492184683252, "grad_norm": 0.30904629826545715, "learning_rate": 1.7778052426965958e-05, "loss": 0.4694, "step": 20480 }, { "epoch": 0.4343704269262582, "grad_norm": 0.33486029505729675, "learning_rate": 1.7777842819037342e-05, "loss": 0.4474, "step": 20481 }, { "epoch": 0.43439163538419123, "grad_norm": 0.33751240372657776, "learning_rate": 1.777763320245828e-05, "loss": 0.5159, "step": 20482 }, { "epoch": 0.43441284384212425, "grad_norm": 0.33702877163887024, "learning_rate": 1.7777423577229e-05, "loss": 0.4817, "step": 20483 }, { "epoch": 0.43443405230005727, "grad_norm": 0.39023879170417786, "learning_rate": 1.7777213943349737e-05, "loss": 0.4873, "step": 20484 }, { "epoch": 0.4344552607579903, "grad_norm": 0.31829583644866943, "learning_rate": 1.7777004300820726e-05, "loss": 0.4802, "step": 20485 }, { "epoch": 0.4344764692159233, "grad_norm": 0.38237276673316956, "learning_rate": 1.7776794649642198e-05, "loss": 0.5788, "step": 20486 }, { "epoch": 0.43449767767385633, "grad_norm": 0.32809028029441833, "learning_rate": 1.7776584989814386e-05, "loss": 0.5688, "step": 20487 }, { "epoch": 0.43451888613178935, "grad_norm": 0.3944791555404663, "learning_rate": 1.7776375321337523e-05, "loss": 0.5817, "step": 20488 }, { "epoch": 0.43454009458972237, "grad_norm": 0.3388252258300781, "learning_rate": 1.7776165644211842e-05, "loss": 0.5007, "step": 20489 }, { "epoch": 0.4345613030476554, "grad_norm": 0.3664891719818115, "learning_rate": 1.7775955958437577e-05, "loss": 0.4307, "step": 20490 }, { "epoch": 0.4345825115055884, "grad_norm": 0.3140162527561188, "learning_rate": 1.7775746264014965e-05, "loss": 0.4964, "step": 20491 }, { "epoch": 0.43460371996352143, "grad_norm": 0.4078984260559082, "learning_rate": 1.7775536560944233e-05, "loss": 0.491, "step": 20492 }, { "epoch": 0.43462492842145445, "grad_norm": 0.34206557273864746, "learning_rate": 1.7775326849225618e-05, "loss": 0.455, "step": 20493 }, { "epoch": 0.4346461368793875, "grad_norm": 0.39396119117736816, "learning_rate": 1.777511712885935e-05, "loss": 0.5662, "step": 20494 }, { "epoch": 0.43466734533732054, "grad_norm": 0.38470903038978577, "learning_rate": 1.7774907399845664e-05, "loss": 0.5099, "step": 20495 }, { "epoch": 0.43468855379525356, "grad_norm": 0.33914291858673096, "learning_rate": 1.7774697662184798e-05, "loss": 0.4862, "step": 20496 }, { "epoch": 0.4347097622531866, "grad_norm": 0.3664501905441284, "learning_rate": 1.7774487915876978e-05, "loss": 0.5707, "step": 20497 }, { "epoch": 0.4347309707111196, "grad_norm": 0.6106072664260864, "learning_rate": 1.777427816092244e-05, "loss": 0.5305, "step": 20498 }, { "epoch": 0.4347521791690526, "grad_norm": 0.33913761377334595, "learning_rate": 1.7774068397321416e-05, "loss": 0.5327, "step": 20499 }, { "epoch": 0.43477338762698564, "grad_norm": 0.3143930435180664, "learning_rate": 1.7773858625074145e-05, "loss": 0.4804, "step": 20500 }, { "epoch": 0.43479459608491866, "grad_norm": 0.35546091198921204, "learning_rate": 1.7773648844180852e-05, "loss": 0.4919, "step": 20501 }, { "epoch": 0.4348158045428517, "grad_norm": 0.339800626039505, "learning_rate": 1.7773439054641772e-05, "loss": 0.5635, "step": 20502 }, { "epoch": 0.4348370130007847, "grad_norm": 0.3303817808628082, "learning_rate": 1.7773229256457145e-05, "loss": 0.5, "step": 20503 }, { "epoch": 0.4348582214587177, "grad_norm": 0.3504657745361328, "learning_rate": 1.77730194496272e-05, "loss": 0.584, "step": 20504 }, { "epoch": 0.43487942991665074, "grad_norm": 0.3427352011203766, "learning_rate": 1.777280963415217e-05, "loss": 0.4589, "step": 20505 }, { "epoch": 0.43490063837458376, "grad_norm": 0.33753225207328796, "learning_rate": 1.7772599810032285e-05, "loss": 0.5165, "step": 20506 }, { "epoch": 0.43492184683251683, "grad_norm": 0.37362951040267944, "learning_rate": 1.7772389977267786e-05, "loss": 0.4972, "step": 20507 }, { "epoch": 0.43494305529044985, "grad_norm": 0.4753483533859253, "learning_rate": 1.7772180135858902e-05, "loss": 0.426, "step": 20508 }, { "epoch": 0.4349642637483829, "grad_norm": 0.5049505829811096, "learning_rate": 1.7771970285805867e-05, "loss": 0.529, "step": 20509 }, { "epoch": 0.4349854722063159, "grad_norm": 0.3863534927368164, "learning_rate": 1.7771760427108912e-05, "loss": 0.5272, "step": 20510 }, { "epoch": 0.4350066806642489, "grad_norm": 0.34512925148010254, "learning_rate": 1.7771550559768273e-05, "loss": 0.5612, "step": 20511 }, { "epoch": 0.43502788912218193, "grad_norm": 0.33773964643478394, "learning_rate": 1.7771340683784186e-05, "loss": 0.4935, "step": 20512 }, { "epoch": 0.43504909758011495, "grad_norm": 0.34987229108810425, "learning_rate": 1.7771130799156875e-05, "loss": 0.4802, "step": 20513 }, { "epoch": 0.43507030603804797, "grad_norm": 0.3738173544406891, "learning_rate": 1.7770920905886586e-05, "loss": 0.4739, "step": 20514 }, { "epoch": 0.435091514495981, "grad_norm": 0.34652119874954224, "learning_rate": 1.7770711003973543e-05, "loss": 0.478, "step": 20515 }, { "epoch": 0.435112722953914, "grad_norm": 0.3949231505393982, "learning_rate": 1.7770501093417984e-05, "loss": 0.5317, "step": 20516 }, { "epoch": 0.43513393141184703, "grad_norm": 0.3565441071987152, "learning_rate": 1.777029117422014e-05, "loss": 0.5215, "step": 20517 }, { "epoch": 0.43515513986978005, "grad_norm": 0.34828513860702515, "learning_rate": 1.7770081246380247e-05, "loss": 0.575, "step": 20518 }, { "epoch": 0.43517634832771307, "grad_norm": 0.3742749094963074, "learning_rate": 1.776987130989854e-05, "loss": 0.5527, "step": 20519 }, { "epoch": 0.4351975567856461, "grad_norm": 0.34261268377304077, "learning_rate": 1.7769661364775244e-05, "loss": 0.456, "step": 20520 }, { "epoch": 0.43521876524357916, "grad_norm": 0.3461278975009918, "learning_rate": 1.77694514110106e-05, "loss": 0.4927, "step": 20521 }, { "epoch": 0.4352399737015122, "grad_norm": 0.35179197788238525, "learning_rate": 1.776924144860484e-05, "loss": 0.6067, "step": 20522 }, { "epoch": 0.4352611821594452, "grad_norm": 0.3264404833316803, "learning_rate": 1.77690314775582e-05, "loss": 0.5139, "step": 20523 }, { "epoch": 0.4352823906173782, "grad_norm": 0.3159615695476532, "learning_rate": 1.7768821497870905e-05, "loss": 0.4877, "step": 20524 }, { "epoch": 0.43530359907531124, "grad_norm": 0.36225011944770813, "learning_rate": 1.7768611509543198e-05, "loss": 0.5271, "step": 20525 }, { "epoch": 0.43532480753324426, "grad_norm": 0.3740575313568115, "learning_rate": 1.7768401512575307e-05, "loss": 0.4948, "step": 20526 }, { "epoch": 0.4353460159911773, "grad_norm": 0.3425029218196869, "learning_rate": 1.776819150696747e-05, "loss": 0.4502, "step": 20527 }, { "epoch": 0.4353672244491103, "grad_norm": 0.3955276906490326, "learning_rate": 1.7767981492719917e-05, "loss": 0.5394, "step": 20528 }, { "epoch": 0.4353884329070433, "grad_norm": 0.3391995429992676, "learning_rate": 1.776777146983288e-05, "loss": 0.5295, "step": 20529 }, { "epoch": 0.43540964136497634, "grad_norm": 0.3920097351074219, "learning_rate": 1.7767561438306596e-05, "loss": 0.472, "step": 20530 }, { "epoch": 0.43543084982290936, "grad_norm": 0.3435875475406647, "learning_rate": 1.77673513981413e-05, "loss": 0.423, "step": 20531 }, { "epoch": 0.4354520582808424, "grad_norm": 0.3598473072052002, "learning_rate": 1.776714134933722e-05, "loss": 0.5679, "step": 20532 }, { "epoch": 0.4354732667387754, "grad_norm": 0.334041565656662, "learning_rate": 1.7766931291894597e-05, "loss": 0.5189, "step": 20533 }, { "epoch": 0.4354944751967085, "grad_norm": 0.34882059693336487, "learning_rate": 1.7766721225813656e-05, "loss": 0.4941, "step": 20534 }, { "epoch": 0.4355156836546415, "grad_norm": 0.3481937050819397, "learning_rate": 1.776651115109464e-05, "loss": 0.5122, "step": 20535 }, { "epoch": 0.4355368921125745, "grad_norm": 0.38726118206977844, "learning_rate": 1.7766301067737776e-05, "loss": 0.6111, "step": 20536 }, { "epoch": 0.43555810057050753, "grad_norm": 0.33952897787094116, "learning_rate": 1.7766090975743295e-05, "loss": 0.5159, "step": 20537 }, { "epoch": 0.43557930902844055, "grad_norm": 0.4519498348236084, "learning_rate": 1.776588087511144e-05, "loss": 0.5537, "step": 20538 }, { "epoch": 0.4356005174863736, "grad_norm": 0.38171905279159546, "learning_rate": 1.7765670765842437e-05, "loss": 0.575, "step": 20539 }, { "epoch": 0.4356217259443066, "grad_norm": 0.3328104019165039, "learning_rate": 1.7765460647936523e-05, "loss": 0.3997, "step": 20540 }, { "epoch": 0.4356429344022396, "grad_norm": 0.3263721764087677, "learning_rate": 1.7765250521393933e-05, "loss": 0.5099, "step": 20541 }, { "epoch": 0.43566414286017263, "grad_norm": 0.3932115137577057, "learning_rate": 1.7765040386214897e-05, "loss": 0.518, "step": 20542 }, { "epoch": 0.43568535131810565, "grad_norm": 0.41862836480140686, "learning_rate": 1.7764830242399652e-05, "loss": 0.5459, "step": 20543 }, { "epoch": 0.43570655977603867, "grad_norm": 0.38958248496055603, "learning_rate": 1.776462008994843e-05, "loss": 0.4456, "step": 20544 }, { "epoch": 0.4357277682339717, "grad_norm": 0.3465358316898346, "learning_rate": 1.7764409928861463e-05, "loss": 0.4967, "step": 20545 }, { "epoch": 0.4357489766919047, "grad_norm": 0.39116916060447693, "learning_rate": 1.7764199759138987e-05, "loss": 0.6183, "step": 20546 }, { "epoch": 0.43577018514983773, "grad_norm": 0.34645771980285645, "learning_rate": 1.7763989580781237e-05, "loss": 0.5533, "step": 20547 }, { "epoch": 0.4357913936077708, "grad_norm": 0.31821033358573914, "learning_rate": 1.7763779393788446e-05, "loss": 0.4982, "step": 20548 }, { "epoch": 0.4358126020657038, "grad_norm": 0.3309018909931183, "learning_rate": 1.7763569198160846e-05, "loss": 0.5126, "step": 20549 }, { "epoch": 0.43583381052363684, "grad_norm": 0.3773598372936249, "learning_rate": 1.776335899389867e-05, "loss": 0.5586, "step": 20550 }, { "epoch": 0.43585501898156986, "grad_norm": 0.34771043062210083, "learning_rate": 1.7763148781002156e-05, "loss": 0.6639, "step": 20551 }, { "epoch": 0.4358762274395029, "grad_norm": 0.32862067222595215, "learning_rate": 1.7762938559471533e-05, "loss": 0.4899, "step": 20552 }, { "epoch": 0.4358974358974359, "grad_norm": 0.4670267701148987, "learning_rate": 1.776272832930704e-05, "loss": 0.5132, "step": 20553 }, { "epoch": 0.4359186443553689, "grad_norm": 0.3206838071346283, "learning_rate": 1.7762518090508907e-05, "loss": 0.4936, "step": 20554 }, { "epoch": 0.43593985281330194, "grad_norm": 0.31593599915504456, "learning_rate": 1.7762307843077368e-05, "loss": 0.4502, "step": 20555 }, { "epoch": 0.43596106127123496, "grad_norm": 0.3746795654296875, "learning_rate": 1.7762097587012656e-05, "loss": 0.5346, "step": 20556 }, { "epoch": 0.435982269729168, "grad_norm": 0.4049126207828522, "learning_rate": 1.776188732231501e-05, "loss": 0.5492, "step": 20557 }, { "epoch": 0.436003478187101, "grad_norm": 0.3194791376590729, "learning_rate": 1.776167704898466e-05, "loss": 0.4654, "step": 20558 }, { "epoch": 0.436024686645034, "grad_norm": 0.3145185708999634, "learning_rate": 1.776146676702184e-05, "loss": 0.5746, "step": 20559 }, { "epoch": 0.43604589510296704, "grad_norm": 0.39949801564216614, "learning_rate": 1.776125647642678e-05, "loss": 0.5594, "step": 20560 }, { "epoch": 0.43606710356090006, "grad_norm": 0.3648107051849365, "learning_rate": 1.7761046177199724e-05, "loss": 0.577, "step": 20561 }, { "epoch": 0.43608831201883314, "grad_norm": 0.32417038083076477, "learning_rate": 1.7760835869340895e-05, "loss": 0.433, "step": 20562 }, { "epoch": 0.43610952047676615, "grad_norm": 0.3781316578388214, "learning_rate": 1.7760625552850533e-05, "loss": 0.5663, "step": 20563 }, { "epoch": 0.4361307289346992, "grad_norm": 0.3810901939868927, "learning_rate": 1.7760415227728875e-05, "loss": 0.4773, "step": 20564 }, { "epoch": 0.4361519373926322, "grad_norm": 0.35710084438323975, "learning_rate": 1.7760204893976144e-05, "loss": 0.516, "step": 20565 }, { "epoch": 0.4361731458505652, "grad_norm": 0.3668960928916931, "learning_rate": 1.7759994551592585e-05, "loss": 0.4822, "step": 20566 }, { "epoch": 0.43619435430849823, "grad_norm": 1.0918630361557007, "learning_rate": 1.7759784200578427e-05, "loss": 0.523, "step": 20567 }, { "epoch": 0.43621556276643125, "grad_norm": 0.3483571410179138, "learning_rate": 1.77595738409339e-05, "loss": 0.4878, "step": 20568 }, { "epoch": 0.4362367712243643, "grad_norm": 0.3314566910266876, "learning_rate": 1.775936347265925e-05, "loss": 0.4644, "step": 20569 }, { "epoch": 0.4362579796822973, "grad_norm": 0.3121621310710907, "learning_rate": 1.77591530957547e-05, "loss": 0.4309, "step": 20570 }, { "epoch": 0.4362791881402303, "grad_norm": 0.32779860496520996, "learning_rate": 1.7758942710220486e-05, "loss": 0.4876, "step": 20571 }, { "epoch": 0.43630039659816333, "grad_norm": 0.34870287775993347, "learning_rate": 1.775873231605684e-05, "loss": 0.4377, "step": 20572 }, { "epoch": 0.43632160505609635, "grad_norm": 0.3724536597728729, "learning_rate": 1.7758521913264005e-05, "loss": 0.5674, "step": 20573 }, { "epoch": 0.43634281351402937, "grad_norm": 0.30679208040237427, "learning_rate": 1.775831150184221e-05, "loss": 0.4143, "step": 20574 }, { "epoch": 0.43636402197196245, "grad_norm": 0.33785542845726013, "learning_rate": 1.7758101081791685e-05, "loss": 0.5114, "step": 20575 }, { "epoch": 0.43638523042989547, "grad_norm": 0.35773786902427673, "learning_rate": 1.775789065311267e-05, "loss": 0.4914, "step": 20576 }, { "epoch": 0.4364064388878285, "grad_norm": 0.34675878286361694, "learning_rate": 1.775768021580539e-05, "loss": 0.4215, "step": 20577 }, { "epoch": 0.4364276473457615, "grad_norm": 0.32336390018463135, "learning_rate": 1.775746976987009e-05, "loss": 0.4404, "step": 20578 }, { "epoch": 0.4364488558036945, "grad_norm": 0.3064548969268799, "learning_rate": 1.7757259315307003e-05, "loss": 0.4377, "step": 20579 }, { "epoch": 0.43647006426162754, "grad_norm": 0.39094600081443787, "learning_rate": 1.7757048852116355e-05, "loss": 0.4117, "step": 20580 }, { "epoch": 0.43649127271956056, "grad_norm": 0.3452380299568176, "learning_rate": 1.7756838380298386e-05, "loss": 0.5001, "step": 20581 }, { "epoch": 0.4365124811774936, "grad_norm": 0.5143591165542603, "learning_rate": 1.7756627899853328e-05, "loss": 0.492, "step": 20582 }, { "epoch": 0.4365336896354266, "grad_norm": 0.37378984689712524, "learning_rate": 1.7756417410781416e-05, "loss": 0.4792, "step": 20583 }, { "epoch": 0.4365548980933596, "grad_norm": 0.30879291892051697, "learning_rate": 1.7756206913082887e-05, "loss": 0.5159, "step": 20584 }, { "epoch": 0.43657610655129264, "grad_norm": 0.3290950059890747, "learning_rate": 1.7755996406757967e-05, "loss": 0.4599, "step": 20585 }, { "epoch": 0.43659731500922566, "grad_norm": 0.3273533880710602, "learning_rate": 1.7755785891806898e-05, "loss": 0.4604, "step": 20586 }, { "epoch": 0.4366185234671587, "grad_norm": 0.3879268169403076, "learning_rate": 1.7755575368229913e-05, "loss": 0.5275, "step": 20587 }, { "epoch": 0.4366397319250917, "grad_norm": 0.34632617235183716, "learning_rate": 1.775536483602724e-05, "loss": 0.4544, "step": 20588 }, { "epoch": 0.4366609403830248, "grad_norm": 0.374466210603714, "learning_rate": 1.775515429519912e-05, "loss": 0.4686, "step": 20589 }, { "epoch": 0.4366821488409578, "grad_norm": 0.33210283517837524, "learning_rate": 1.7754943745745787e-05, "loss": 0.5207, "step": 20590 }, { "epoch": 0.4367033572988908, "grad_norm": 0.4423411190509796, "learning_rate": 1.775473318766747e-05, "loss": 0.4639, "step": 20591 }, { "epoch": 0.43672456575682383, "grad_norm": 0.33852896094322205, "learning_rate": 1.7754522620964405e-05, "loss": 0.4794, "step": 20592 }, { "epoch": 0.43674577421475685, "grad_norm": 0.356343537569046, "learning_rate": 1.775431204563683e-05, "loss": 0.603, "step": 20593 }, { "epoch": 0.4367669826726899, "grad_norm": 0.33322930335998535, "learning_rate": 1.7754101461684974e-05, "loss": 0.4304, "step": 20594 }, { "epoch": 0.4367881911306229, "grad_norm": 0.3653322458267212, "learning_rate": 1.7753890869109078e-05, "loss": 0.5283, "step": 20595 }, { "epoch": 0.4368093995885559, "grad_norm": 0.32673102617263794, "learning_rate": 1.7753680267909367e-05, "loss": 0.5291, "step": 20596 }, { "epoch": 0.43683060804648893, "grad_norm": 0.35591885447502136, "learning_rate": 1.7753469658086084e-05, "loss": 0.5273, "step": 20597 }, { "epoch": 0.43685181650442195, "grad_norm": 0.31049656867980957, "learning_rate": 1.7753259039639457e-05, "loss": 0.4756, "step": 20598 }, { "epoch": 0.43687302496235497, "grad_norm": 0.328161358833313, "learning_rate": 1.7753048412569724e-05, "loss": 0.4493, "step": 20599 }, { "epoch": 0.436894233420288, "grad_norm": 0.37151363492012024, "learning_rate": 1.7752837776877117e-05, "loss": 0.5368, "step": 20600 }, { "epoch": 0.436915441878221, "grad_norm": 0.3380083441734314, "learning_rate": 1.7752627132561873e-05, "loss": 0.4735, "step": 20601 }, { "epoch": 0.43693665033615403, "grad_norm": 0.35461941361427307, "learning_rate": 1.775241647962422e-05, "loss": 0.5101, "step": 20602 }, { "epoch": 0.4369578587940871, "grad_norm": 0.37114205956459045, "learning_rate": 1.77522058180644e-05, "loss": 0.4644, "step": 20603 }, { "epoch": 0.4369790672520201, "grad_norm": 0.34461650252342224, "learning_rate": 1.7751995147882644e-05, "loss": 0.5056, "step": 20604 }, { "epoch": 0.43700027570995315, "grad_norm": 0.3378063142299652, "learning_rate": 1.7751784469079187e-05, "loss": 0.4811, "step": 20605 }, { "epoch": 0.43702148416788617, "grad_norm": 0.3488270342350006, "learning_rate": 1.7751573781654263e-05, "loss": 0.4706, "step": 20606 }, { "epoch": 0.4370426926258192, "grad_norm": 0.40053826570510864, "learning_rate": 1.7751363085608102e-05, "loss": 0.6003, "step": 20607 }, { "epoch": 0.4370639010837522, "grad_norm": 0.3467099368572235, "learning_rate": 1.7751152380940948e-05, "loss": 0.5092, "step": 20608 }, { "epoch": 0.4370851095416852, "grad_norm": 0.3039798438549042, "learning_rate": 1.7750941667653025e-05, "loss": 0.5057, "step": 20609 }, { "epoch": 0.43710631799961824, "grad_norm": 0.3317641317844391, "learning_rate": 1.7750730945744576e-05, "loss": 0.5425, "step": 20610 }, { "epoch": 0.43712752645755126, "grad_norm": 0.40546008944511414, "learning_rate": 1.775052021521583e-05, "loss": 0.5542, "step": 20611 }, { "epoch": 0.4371487349154843, "grad_norm": 0.4102662205696106, "learning_rate": 1.7750309476067017e-05, "loss": 0.4993, "step": 20612 }, { "epoch": 0.4371699433734173, "grad_norm": 0.3535217046737671, "learning_rate": 1.7750098728298385e-05, "loss": 0.5801, "step": 20613 }, { "epoch": 0.4371911518313503, "grad_norm": 0.3235434293746948, "learning_rate": 1.7749887971910156e-05, "loss": 0.5912, "step": 20614 }, { "epoch": 0.43721236028928334, "grad_norm": 0.4203327000141144, "learning_rate": 1.774967720690257e-05, "loss": 0.5564, "step": 20615 }, { "epoch": 0.4372335687472164, "grad_norm": 0.3665109872817993, "learning_rate": 1.7749466433275862e-05, "loss": 0.4904, "step": 20616 }, { "epoch": 0.43725477720514944, "grad_norm": 0.3172188997268677, "learning_rate": 1.7749255651030265e-05, "loss": 0.4336, "step": 20617 }, { "epoch": 0.43727598566308246, "grad_norm": 0.33466634154319763, "learning_rate": 1.7749044860166013e-05, "loss": 0.4999, "step": 20618 }, { "epoch": 0.4372971941210155, "grad_norm": 0.42947277426719666, "learning_rate": 1.774883406068334e-05, "loss": 0.5025, "step": 20619 }, { "epoch": 0.4373184025789485, "grad_norm": 0.32894378900527954, "learning_rate": 1.7748623252582478e-05, "loss": 0.5001, "step": 20620 }, { "epoch": 0.4373396110368815, "grad_norm": 0.3948712944984436, "learning_rate": 1.7748412435863667e-05, "loss": 0.5296, "step": 20621 }, { "epoch": 0.43736081949481453, "grad_norm": 0.3568304479122162, "learning_rate": 1.7748201610527135e-05, "loss": 0.4808, "step": 20622 }, { "epoch": 0.43738202795274755, "grad_norm": 0.3436194658279419, "learning_rate": 1.7747990776573124e-05, "loss": 0.5002, "step": 20623 }, { "epoch": 0.4374032364106806, "grad_norm": 0.39389175176620483, "learning_rate": 1.774777993400187e-05, "loss": 0.535, "step": 20624 }, { "epoch": 0.4374244448686136, "grad_norm": 0.3894224762916565, "learning_rate": 1.7747569082813596e-05, "loss": 0.4889, "step": 20625 }, { "epoch": 0.4374456533265466, "grad_norm": 0.32220664620399475, "learning_rate": 1.7747358223008543e-05, "loss": 0.5027, "step": 20626 }, { "epoch": 0.43746686178447963, "grad_norm": 0.3477209508419037, "learning_rate": 1.774714735458695e-05, "loss": 0.4975, "step": 20627 }, { "epoch": 0.43748807024241265, "grad_norm": 0.30255550146102905, "learning_rate": 1.7746936477549044e-05, "loss": 0.4641, "step": 20628 }, { "epoch": 0.43750927870034567, "grad_norm": 0.3176873028278351, "learning_rate": 1.7746725591895062e-05, "loss": 0.412, "step": 20629 }, { "epoch": 0.43753048715827875, "grad_norm": 0.3814985752105713, "learning_rate": 1.7746514697625238e-05, "loss": 0.526, "step": 20630 }, { "epoch": 0.43755169561621177, "grad_norm": 0.33205753564834595, "learning_rate": 1.774630379473981e-05, "loss": 0.524, "step": 20631 }, { "epoch": 0.4375729040741448, "grad_norm": 0.3450608253479004, "learning_rate": 1.7746092883239012e-05, "loss": 0.4847, "step": 20632 }, { "epoch": 0.4375941125320778, "grad_norm": 0.31736329197883606, "learning_rate": 1.7745881963123072e-05, "loss": 0.4896, "step": 20633 }, { "epoch": 0.4376153209900108, "grad_norm": 0.35205695033073425, "learning_rate": 1.7745671034392234e-05, "loss": 0.3899, "step": 20634 }, { "epoch": 0.43763652944794385, "grad_norm": 0.3271541893482208, "learning_rate": 1.774546009704673e-05, "loss": 0.504, "step": 20635 }, { "epoch": 0.43765773790587686, "grad_norm": 0.39771154522895813, "learning_rate": 1.7745249151086783e-05, "loss": 0.5033, "step": 20636 }, { "epoch": 0.4376789463638099, "grad_norm": 0.3382384181022644, "learning_rate": 1.7745038196512644e-05, "loss": 0.4846, "step": 20637 }, { "epoch": 0.4377001548217429, "grad_norm": 0.3493939936161041, "learning_rate": 1.774482723332454e-05, "loss": 0.5846, "step": 20638 }, { "epoch": 0.4377213632796759, "grad_norm": 0.3701447546482086, "learning_rate": 1.7744616261522703e-05, "loss": 0.4699, "step": 20639 }, { "epoch": 0.43774257173760894, "grad_norm": 0.37310224771499634, "learning_rate": 1.7744405281107375e-05, "loss": 0.4702, "step": 20640 }, { "epoch": 0.43776378019554196, "grad_norm": 0.40559589862823486, "learning_rate": 1.7744194292078785e-05, "loss": 0.459, "step": 20641 }, { "epoch": 0.437784988653475, "grad_norm": 0.34580889344215393, "learning_rate": 1.7743983294437172e-05, "loss": 0.4678, "step": 20642 }, { "epoch": 0.437806197111408, "grad_norm": 0.3564559817314148, "learning_rate": 1.7743772288182765e-05, "loss": 0.6402, "step": 20643 }, { "epoch": 0.4378274055693411, "grad_norm": 0.31894195079803467, "learning_rate": 1.7743561273315805e-05, "loss": 0.5095, "step": 20644 }, { "epoch": 0.4378486140272741, "grad_norm": 0.36915189027786255, "learning_rate": 1.774335024983652e-05, "loss": 0.423, "step": 20645 }, { "epoch": 0.4378698224852071, "grad_norm": 0.4052182734012604, "learning_rate": 1.774313921774515e-05, "loss": 0.4975, "step": 20646 }, { "epoch": 0.43789103094314014, "grad_norm": 0.31295716762542725, "learning_rate": 1.7742928177041926e-05, "loss": 0.4373, "step": 20647 }, { "epoch": 0.43791223940107316, "grad_norm": 0.42874839901924133, "learning_rate": 1.774271712772709e-05, "loss": 0.4753, "step": 20648 }, { "epoch": 0.4379334478590062, "grad_norm": 0.3015233874320984, "learning_rate": 1.7742506069800863e-05, "loss": 0.4535, "step": 20649 }, { "epoch": 0.4379546563169392, "grad_norm": 0.34680357575416565, "learning_rate": 1.7742295003263493e-05, "loss": 0.4653, "step": 20650 }, { "epoch": 0.4379758647748722, "grad_norm": 0.3981415033340454, "learning_rate": 1.7742083928115208e-05, "loss": 0.5208, "step": 20651 }, { "epoch": 0.43799707323280523, "grad_norm": 1.3349754810333252, "learning_rate": 1.7741872844356246e-05, "loss": 0.5414, "step": 20652 }, { "epoch": 0.43801828169073825, "grad_norm": 0.31290239095687866, "learning_rate": 1.7741661751986838e-05, "loss": 0.4384, "step": 20653 }, { "epoch": 0.4380394901486713, "grad_norm": 0.9806849360466003, "learning_rate": 1.7741450651007222e-05, "loss": 0.5575, "step": 20654 }, { "epoch": 0.4380606986066043, "grad_norm": 0.41758888959884644, "learning_rate": 1.7741239541417635e-05, "loss": 0.4851, "step": 20655 }, { "epoch": 0.4380819070645373, "grad_norm": 0.3827950954437256, "learning_rate": 1.7741028423218306e-05, "loss": 0.4699, "step": 20656 }, { "epoch": 0.4381031155224704, "grad_norm": 0.3348706066608429, "learning_rate": 1.774081729640947e-05, "loss": 0.4722, "step": 20657 }, { "epoch": 0.4381243239804034, "grad_norm": 1.368483304977417, "learning_rate": 1.774060616099137e-05, "loss": 0.5085, "step": 20658 }, { "epoch": 0.4381455324383364, "grad_norm": 0.3468315005302429, "learning_rate": 1.774039501696423e-05, "loss": 0.6033, "step": 20659 }, { "epoch": 0.43816674089626945, "grad_norm": 0.35980159044265747, "learning_rate": 1.7740183864328293e-05, "loss": 0.5124, "step": 20660 }, { "epoch": 0.43818794935420247, "grad_norm": 0.2962963283061981, "learning_rate": 1.7739972703083787e-05, "loss": 0.4254, "step": 20661 }, { "epoch": 0.4382091578121355, "grad_norm": 0.3492485582828522, "learning_rate": 1.7739761533230956e-05, "loss": 0.4899, "step": 20662 }, { "epoch": 0.4382303662700685, "grad_norm": 0.37931403517723083, "learning_rate": 1.7739550354770025e-05, "loss": 0.5142, "step": 20663 }, { "epoch": 0.4382515747280015, "grad_norm": 0.4328424334526062, "learning_rate": 1.7739339167701236e-05, "loss": 0.5024, "step": 20664 }, { "epoch": 0.43827278318593454, "grad_norm": 0.3667948544025421, "learning_rate": 1.773912797202482e-05, "loss": 0.5905, "step": 20665 }, { "epoch": 0.43829399164386756, "grad_norm": 0.35746070742607117, "learning_rate": 1.773891676774101e-05, "loss": 0.53, "step": 20666 }, { "epoch": 0.4383152001018006, "grad_norm": 0.3529394865036011, "learning_rate": 1.7738705554850047e-05, "loss": 0.5158, "step": 20667 }, { "epoch": 0.4383364085597336, "grad_norm": 0.34307587146759033, "learning_rate": 1.7738494333352164e-05, "loss": 0.5037, "step": 20668 }, { "epoch": 0.4383576170176666, "grad_norm": 0.34449708461761475, "learning_rate": 1.7738283103247596e-05, "loss": 0.5301, "step": 20669 }, { "epoch": 0.43837882547559964, "grad_norm": 0.35882917046546936, "learning_rate": 1.773807186453657e-05, "loss": 0.5021, "step": 20670 }, { "epoch": 0.4384000339335327, "grad_norm": 0.394002228975296, "learning_rate": 1.7737860617219335e-05, "loss": 0.5704, "step": 20671 }, { "epoch": 0.43842124239146574, "grad_norm": 0.36164307594299316, "learning_rate": 1.7737649361296115e-05, "loss": 0.5214, "step": 20672 }, { "epoch": 0.43844245084939876, "grad_norm": 0.3569253385066986, "learning_rate": 1.7737438096767153e-05, "loss": 0.4238, "step": 20673 }, { "epoch": 0.4384636593073318, "grad_norm": 0.42651236057281494, "learning_rate": 1.7737226823632677e-05, "loss": 0.4339, "step": 20674 }, { "epoch": 0.4384848677652648, "grad_norm": 0.38768133521080017, "learning_rate": 1.773701554189292e-05, "loss": 0.5481, "step": 20675 }, { "epoch": 0.4385060762231978, "grad_norm": 0.3633124828338623, "learning_rate": 1.7736804251548125e-05, "loss": 0.5197, "step": 20676 }, { "epoch": 0.43852728468113084, "grad_norm": 0.33706146478652954, "learning_rate": 1.7736592952598525e-05, "loss": 0.4014, "step": 20677 }, { "epoch": 0.43854849313906386, "grad_norm": 0.34699633717536926, "learning_rate": 1.773638164504435e-05, "loss": 0.5517, "step": 20678 }, { "epoch": 0.4385697015969969, "grad_norm": 0.3877730071544647, "learning_rate": 1.7736170328885844e-05, "loss": 0.4371, "step": 20679 }, { "epoch": 0.4385909100549299, "grad_norm": 0.3692001700401306, "learning_rate": 1.7735959004123233e-05, "loss": 0.4418, "step": 20680 }, { "epoch": 0.4386121185128629, "grad_norm": 0.34574025869369507, "learning_rate": 1.7735747670756757e-05, "loss": 0.4822, "step": 20681 }, { "epoch": 0.43863332697079593, "grad_norm": 0.3385261595249176, "learning_rate": 1.773553632878665e-05, "loss": 0.5119, "step": 20682 }, { "epoch": 0.43865453542872895, "grad_norm": 0.34033310413360596, "learning_rate": 1.7735324978213145e-05, "loss": 0.4828, "step": 20683 }, { "epoch": 0.43867574388666203, "grad_norm": 0.35390207171440125, "learning_rate": 1.7735113619036477e-05, "loss": 0.5184, "step": 20684 }, { "epoch": 0.43869695234459505, "grad_norm": 0.37234029173851013, "learning_rate": 1.7734902251256885e-05, "loss": 0.4952, "step": 20685 }, { "epoch": 0.43871816080252807, "grad_norm": 0.3382704257965088, "learning_rate": 1.77346908748746e-05, "loss": 0.5382, "step": 20686 }, { "epoch": 0.4387393692604611, "grad_norm": 0.3298565745353699, "learning_rate": 1.773447948988986e-05, "loss": 0.5222, "step": 20687 }, { "epoch": 0.4387605777183941, "grad_norm": 0.3735353350639343, "learning_rate": 1.7734268096302902e-05, "loss": 0.5202, "step": 20688 }, { "epoch": 0.4387817861763271, "grad_norm": 0.38129958510398865, "learning_rate": 1.7734056694113956e-05, "loss": 0.5156, "step": 20689 }, { "epoch": 0.43880299463426015, "grad_norm": 0.35820087790489197, "learning_rate": 1.773384528332326e-05, "loss": 0.5, "step": 20690 }, { "epoch": 0.43882420309219317, "grad_norm": 0.3678007125854492, "learning_rate": 1.7733633863931047e-05, "loss": 0.4895, "step": 20691 }, { "epoch": 0.4388454115501262, "grad_norm": 0.3993798792362213, "learning_rate": 1.7733422435937557e-05, "loss": 0.4901, "step": 20692 }, { "epoch": 0.4388666200080592, "grad_norm": 0.44155940413475037, "learning_rate": 1.773321099934302e-05, "loss": 0.5445, "step": 20693 }, { "epoch": 0.4388878284659922, "grad_norm": 0.3601469099521637, "learning_rate": 1.773299955414767e-05, "loss": 0.5433, "step": 20694 }, { "epoch": 0.43890903692392524, "grad_norm": 0.334943562746048, "learning_rate": 1.7732788100351745e-05, "loss": 0.446, "step": 20695 }, { "epoch": 0.43893024538185826, "grad_norm": 0.33602580428123474, "learning_rate": 1.7732576637955486e-05, "loss": 0.5347, "step": 20696 }, { "epoch": 0.4389514538397913, "grad_norm": 0.4085836410522461, "learning_rate": 1.773236516695912e-05, "loss": 0.4974, "step": 20697 }, { "epoch": 0.43897266229772436, "grad_norm": 0.37834835052490234, "learning_rate": 1.773215368736288e-05, "loss": 0.5544, "step": 20698 }, { "epoch": 0.4389938707556574, "grad_norm": 0.46986839175224304, "learning_rate": 1.773194219916701e-05, "loss": 0.5515, "step": 20699 }, { "epoch": 0.4390150792135904, "grad_norm": 0.3360457420349121, "learning_rate": 1.773173070237174e-05, "loss": 0.5232, "step": 20700 }, { "epoch": 0.4390362876715234, "grad_norm": 0.3208388388156891, "learning_rate": 1.773151919697731e-05, "loss": 0.4235, "step": 20701 }, { "epoch": 0.43905749612945644, "grad_norm": 0.3532097339630127, "learning_rate": 1.7731307682983946e-05, "loss": 0.4668, "step": 20702 }, { "epoch": 0.43907870458738946, "grad_norm": 0.3847145140171051, "learning_rate": 1.7731096160391888e-05, "loss": 0.4656, "step": 20703 }, { "epoch": 0.4390999130453225, "grad_norm": 0.33791080117225647, "learning_rate": 1.773088462920138e-05, "loss": 0.4398, "step": 20704 }, { "epoch": 0.4391211215032555, "grad_norm": 0.33812958002090454, "learning_rate": 1.773067308941264e-05, "loss": 0.5286, "step": 20705 }, { "epoch": 0.4391423299611885, "grad_norm": 0.3497195243835449, "learning_rate": 1.773046154102592e-05, "loss": 0.4685, "step": 20706 }, { "epoch": 0.43916353841912154, "grad_norm": 0.3682987093925476, "learning_rate": 1.7730249984041444e-05, "loss": 0.5453, "step": 20707 }, { "epoch": 0.43918474687705455, "grad_norm": 0.32955825328826904, "learning_rate": 1.7730038418459452e-05, "loss": 0.5016, "step": 20708 }, { "epoch": 0.4392059553349876, "grad_norm": 0.3678864538669586, "learning_rate": 1.7729826844280177e-05, "loss": 0.4931, "step": 20709 }, { "epoch": 0.4392271637929206, "grad_norm": 0.6438224911689758, "learning_rate": 1.7729615261503857e-05, "loss": 0.4639, "step": 20710 }, { "epoch": 0.4392483722508536, "grad_norm": 0.34363073110580444, "learning_rate": 1.7729403670130728e-05, "loss": 0.5391, "step": 20711 }, { "epoch": 0.4392695807087867, "grad_norm": 0.3849523961544037, "learning_rate": 1.772919207016102e-05, "loss": 0.5232, "step": 20712 }, { "epoch": 0.4392907891667197, "grad_norm": 0.4257175028324127, "learning_rate": 1.7728980461594973e-05, "loss": 0.563, "step": 20713 }, { "epoch": 0.43931199762465273, "grad_norm": 0.34044915437698364, "learning_rate": 1.7728768844432818e-05, "loss": 0.534, "step": 20714 }, { "epoch": 0.43933320608258575, "grad_norm": 0.36592164635658264, "learning_rate": 1.77285572186748e-05, "loss": 0.5216, "step": 20715 }, { "epoch": 0.43935441454051877, "grad_norm": 0.3517559766769409, "learning_rate": 1.7728345584321146e-05, "loss": 0.482, "step": 20716 }, { "epoch": 0.4393756229984518, "grad_norm": 0.32669797539711, "learning_rate": 1.772813394137209e-05, "loss": 0.507, "step": 20717 }, { "epoch": 0.4393968314563848, "grad_norm": 0.3346819579601288, "learning_rate": 1.772792228982787e-05, "loss": 0.4224, "step": 20718 }, { "epoch": 0.4394180399143178, "grad_norm": 0.33621636033058167, "learning_rate": 1.7727710629688725e-05, "loss": 0.5009, "step": 20719 }, { "epoch": 0.43943924837225085, "grad_norm": 0.3343108296394348, "learning_rate": 1.7727498960954888e-05, "loss": 0.4736, "step": 20720 }, { "epoch": 0.43946045683018387, "grad_norm": 0.3426712155342102, "learning_rate": 1.772728728362659e-05, "loss": 0.5588, "step": 20721 }, { "epoch": 0.4394816652881169, "grad_norm": 0.43239790201187134, "learning_rate": 1.7727075597704076e-05, "loss": 0.5016, "step": 20722 }, { "epoch": 0.4395028737460499, "grad_norm": 0.33925488591194153, "learning_rate": 1.772686390318757e-05, "loss": 0.5363, "step": 20723 }, { "epoch": 0.4395240822039829, "grad_norm": 0.544762134552002, "learning_rate": 1.7726652200077317e-05, "loss": 0.4426, "step": 20724 }, { "epoch": 0.439545290661916, "grad_norm": 0.36550241708755493, "learning_rate": 1.7726440488373545e-05, "loss": 0.4733, "step": 20725 }, { "epoch": 0.439566499119849, "grad_norm": 0.36881187558174133, "learning_rate": 1.7726228768076493e-05, "loss": 0.5694, "step": 20726 }, { "epoch": 0.43958770757778204, "grad_norm": 0.392318457365036, "learning_rate": 1.77260170391864e-05, "loss": 0.5655, "step": 20727 }, { "epoch": 0.43960891603571506, "grad_norm": 0.41158488392829895, "learning_rate": 1.7725805301703496e-05, "loss": 0.585, "step": 20728 }, { "epoch": 0.4396301244936481, "grad_norm": 0.33519113063812256, "learning_rate": 1.772559355562802e-05, "loss": 0.5377, "step": 20729 }, { "epoch": 0.4396513329515811, "grad_norm": 0.34247463941574097, "learning_rate": 1.7725381800960206e-05, "loss": 0.5147, "step": 20730 }, { "epoch": 0.4396725414095141, "grad_norm": 0.36818650364875793, "learning_rate": 1.7725170037700287e-05, "loss": 0.4973, "step": 20731 }, { "epoch": 0.43969374986744714, "grad_norm": 0.3536241948604584, "learning_rate": 1.7724958265848503e-05, "loss": 0.4641, "step": 20732 }, { "epoch": 0.43971495832538016, "grad_norm": 0.3656325042247772, "learning_rate": 1.7724746485405084e-05, "loss": 0.5411, "step": 20733 }, { "epoch": 0.4397361667833132, "grad_norm": 0.39691513776779175, "learning_rate": 1.772453469637027e-05, "loss": 0.4562, "step": 20734 }, { "epoch": 0.4397573752412462, "grad_norm": 0.31902599334716797, "learning_rate": 1.77243228987443e-05, "loss": 0.4937, "step": 20735 }, { "epoch": 0.4397785836991792, "grad_norm": 0.32992252707481384, "learning_rate": 1.7724111092527404e-05, "loss": 0.4637, "step": 20736 }, { "epoch": 0.43979979215711223, "grad_norm": 0.3916087746620178, "learning_rate": 1.7723899277719815e-05, "loss": 0.5751, "step": 20737 }, { "epoch": 0.43982100061504525, "grad_norm": 0.6589024662971497, "learning_rate": 1.7723687454321777e-05, "loss": 0.5273, "step": 20738 }, { "epoch": 0.43984220907297833, "grad_norm": 0.31883618235588074, "learning_rate": 1.7723475622333516e-05, "loss": 0.4736, "step": 20739 }, { "epoch": 0.43986341753091135, "grad_norm": 0.3122142553329468, "learning_rate": 1.7723263781755275e-05, "loss": 0.4968, "step": 20740 }, { "epoch": 0.43988462598884437, "grad_norm": 0.34734201431274414, "learning_rate": 1.772305193258729e-05, "loss": 0.4707, "step": 20741 }, { "epoch": 0.4399058344467774, "grad_norm": 0.3760458528995514, "learning_rate": 1.772284007482979e-05, "loss": 0.5518, "step": 20742 }, { "epoch": 0.4399270429047104, "grad_norm": 0.34790509939193726, "learning_rate": 1.7722628208483013e-05, "loss": 0.5355, "step": 20743 }, { "epoch": 0.4399482513626434, "grad_norm": 0.4464527368545532, "learning_rate": 1.7722416333547197e-05, "loss": 0.546, "step": 20744 }, { "epoch": 0.43996945982057645, "grad_norm": 0.5413572192192078, "learning_rate": 1.772220445002258e-05, "loss": 0.4252, "step": 20745 }, { "epoch": 0.43999066827850947, "grad_norm": 0.3875086307525635, "learning_rate": 1.7721992557909393e-05, "loss": 0.5617, "step": 20746 }, { "epoch": 0.4400118767364425, "grad_norm": 0.3484855890274048, "learning_rate": 1.772178065720787e-05, "loss": 0.5217, "step": 20747 }, { "epoch": 0.4400330851943755, "grad_norm": 0.3406047224998474, "learning_rate": 1.7721568747918253e-05, "loss": 0.5187, "step": 20748 }, { "epoch": 0.4400542936523085, "grad_norm": 0.39522305130958557, "learning_rate": 1.772135683004077e-05, "loss": 0.5087, "step": 20749 }, { "epoch": 0.44007550211024155, "grad_norm": 0.41578835248947144, "learning_rate": 1.7721144903575665e-05, "loss": 0.4099, "step": 20750 }, { "epoch": 0.44009671056817457, "grad_norm": 0.356503963470459, "learning_rate": 1.772093296852317e-05, "loss": 0.5694, "step": 20751 }, { "epoch": 0.4401179190261076, "grad_norm": 0.3484681248664856, "learning_rate": 1.772072102488352e-05, "loss": 0.5709, "step": 20752 }, { "epoch": 0.44013912748404066, "grad_norm": 0.39639031887054443, "learning_rate": 1.7720509072656947e-05, "loss": 0.4952, "step": 20753 }, { "epoch": 0.4401603359419737, "grad_norm": 0.3426002562046051, "learning_rate": 1.7720297111843696e-05, "loss": 0.5317, "step": 20754 }, { "epoch": 0.4401815443999067, "grad_norm": 0.43291333317756653, "learning_rate": 1.7720085142443998e-05, "loss": 0.5782, "step": 20755 }, { "epoch": 0.4402027528578397, "grad_norm": 0.38002023100852966, "learning_rate": 1.7719873164458086e-05, "loss": 0.551, "step": 20756 }, { "epoch": 0.44022396131577274, "grad_norm": 0.3454737067222595, "learning_rate": 1.7719661177886198e-05, "loss": 0.5231, "step": 20757 }, { "epoch": 0.44024516977370576, "grad_norm": 0.32668423652648926, "learning_rate": 1.771944918272857e-05, "loss": 0.5346, "step": 20758 }, { "epoch": 0.4402663782316388, "grad_norm": 0.3184676468372345, "learning_rate": 1.7719237178985437e-05, "loss": 0.5169, "step": 20759 }, { "epoch": 0.4402875866895718, "grad_norm": 0.3963550925254822, "learning_rate": 1.7719025166657035e-05, "loss": 0.4178, "step": 20760 }, { "epoch": 0.4403087951475048, "grad_norm": 0.31180280447006226, "learning_rate": 1.7718813145743602e-05, "loss": 0.4683, "step": 20761 }, { "epoch": 0.44033000360543784, "grad_norm": 0.3617546856403351, "learning_rate": 1.7718601116245375e-05, "loss": 0.4727, "step": 20762 }, { "epoch": 0.44035121206337086, "grad_norm": 0.3185986578464508, "learning_rate": 1.771838907816258e-05, "loss": 0.478, "step": 20763 }, { "epoch": 0.4403724205213039, "grad_norm": 0.38805878162384033, "learning_rate": 1.7718177031495463e-05, "loss": 0.5618, "step": 20764 }, { "epoch": 0.4403936289792369, "grad_norm": 0.33460533618927, "learning_rate": 1.771796497624426e-05, "loss": 0.5222, "step": 20765 }, { "epoch": 0.44041483743716997, "grad_norm": 0.3359575867652893, "learning_rate": 1.7717752912409197e-05, "loss": 0.4565, "step": 20766 }, { "epoch": 0.440436045895103, "grad_norm": 0.39684444665908813, "learning_rate": 1.7717540839990523e-05, "loss": 0.4959, "step": 20767 }, { "epoch": 0.440457254353036, "grad_norm": 0.3472892940044403, "learning_rate": 1.771732875898846e-05, "loss": 0.5194, "step": 20768 }, { "epoch": 0.44047846281096903, "grad_norm": 0.3518207371234894, "learning_rate": 1.7717116669403258e-05, "loss": 0.3879, "step": 20769 }, { "epoch": 0.44049967126890205, "grad_norm": 0.36801305413246155, "learning_rate": 1.771690457123514e-05, "loss": 0.4628, "step": 20770 }, { "epoch": 0.44052087972683507, "grad_norm": 0.3433559238910675, "learning_rate": 1.771669246448435e-05, "loss": 0.5388, "step": 20771 }, { "epoch": 0.4405420881847681, "grad_norm": 0.41190120577812195, "learning_rate": 1.7716480349151122e-05, "loss": 0.544, "step": 20772 }, { "epoch": 0.4405632966427011, "grad_norm": 0.33485016226768494, "learning_rate": 1.7716268225235694e-05, "loss": 0.4628, "step": 20773 }, { "epoch": 0.4405845051006341, "grad_norm": 0.34829211235046387, "learning_rate": 1.7716056092738294e-05, "loss": 0.538, "step": 20774 }, { "epoch": 0.44060571355856715, "grad_norm": 0.2907180190086365, "learning_rate": 1.771584395165917e-05, "loss": 0.3697, "step": 20775 }, { "epoch": 0.44062692201650017, "grad_norm": 0.3434426784515381, "learning_rate": 1.771563180199855e-05, "loss": 0.4927, "step": 20776 }, { "epoch": 0.4406481304744332, "grad_norm": 0.33075302839279175, "learning_rate": 1.7715419643756666e-05, "loss": 0.4783, "step": 20777 }, { "epoch": 0.4406693389323662, "grad_norm": 0.339590460062027, "learning_rate": 1.771520747693376e-05, "loss": 0.4499, "step": 20778 }, { "epoch": 0.4406905473902992, "grad_norm": 0.34496140480041504, "learning_rate": 1.771499530153007e-05, "loss": 0.4357, "step": 20779 }, { "epoch": 0.4407117558482323, "grad_norm": 0.40799325704574585, "learning_rate": 1.7714783117545827e-05, "loss": 0.5222, "step": 20780 }, { "epoch": 0.4407329643061653, "grad_norm": 0.5237735509872437, "learning_rate": 1.7714570924981272e-05, "loss": 0.5922, "step": 20781 }, { "epoch": 0.44075417276409834, "grad_norm": 0.3589938282966614, "learning_rate": 1.771435872383664e-05, "loss": 0.5172, "step": 20782 }, { "epoch": 0.44077538122203136, "grad_norm": 0.3792060911655426, "learning_rate": 1.7714146514112163e-05, "loss": 0.3922, "step": 20783 }, { "epoch": 0.4407965896799644, "grad_norm": 0.3612053692340851, "learning_rate": 1.7713934295808076e-05, "loss": 0.4691, "step": 20784 }, { "epoch": 0.4408177981378974, "grad_norm": 0.3307783305644989, "learning_rate": 1.7713722068924622e-05, "loss": 0.4943, "step": 20785 }, { "epoch": 0.4408390065958304, "grad_norm": 0.6200154423713684, "learning_rate": 1.7713509833462036e-05, "loss": 0.4886, "step": 20786 }, { "epoch": 0.44086021505376344, "grad_norm": 0.33980047702789307, "learning_rate": 1.7713297589420545e-05, "loss": 0.4905, "step": 20787 }, { "epoch": 0.44088142351169646, "grad_norm": 0.3774033486843109, "learning_rate": 1.7713085336800395e-05, "loss": 0.5559, "step": 20788 }, { "epoch": 0.4409026319696295, "grad_norm": 0.3680986762046814, "learning_rate": 1.771287307560182e-05, "loss": 0.5502, "step": 20789 }, { "epoch": 0.4409238404275625, "grad_norm": 0.9650757908821106, "learning_rate": 1.771266080582505e-05, "loss": 0.6086, "step": 20790 }, { "epoch": 0.4409450488854955, "grad_norm": 0.3338789939880371, "learning_rate": 1.7712448527470328e-05, "loss": 0.4453, "step": 20791 }, { "epoch": 0.44096625734342854, "grad_norm": 0.43189573287963867, "learning_rate": 1.7712236240537887e-05, "loss": 0.4778, "step": 20792 }, { "epoch": 0.44098746580136156, "grad_norm": 0.3872835338115692, "learning_rate": 1.7712023945027966e-05, "loss": 0.4718, "step": 20793 }, { "epoch": 0.44100867425929463, "grad_norm": 0.9189322590827942, "learning_rate": 1.7711811640940797e-05, "loss": 0.575, "step": 20794 }, { "epoch": 0.44102988271722765, "grad_norm": 0.4347876310348511, "learning_rate": 1.771159932827662e-05, "loss": 0.5154, "step": 20795 }, { "epoch": 0.44105109117516067, "grad_norm": 0.32751065492630005, "learning_rate": 1.7711387007035668e-05, "loss": 0.5183, "step": 20796 }, { "epoch": 0.4410722996330937, "grad_norm": 0.36718136072158813, "learning_rate": 1.7711174677218176e-05, "loss": 0.5822, "step": 20797 }, { "epoch": 0.4410935080910267, "grad_norm": 0.3326209485530853, "learning_rate": 1.771096233882439e-05, "loss": 0.4482, "step": 20798 }, { "epoch": 0.44111471654895973, "grad_norm": 0.3581041991710663, "learning_rate": 1.771074999185453e-05, "loss": 0.5199, "step": 20799 }, { "epoch": 0.44113592500689275, "grad_norm": 1.6783268451690674, "learning_rate": 1.7710537636308847e-05, "loss": 0.5514, "step": 20800 }, { "epoch": 0.44115713346482577, "grad_norm": 0.38319575786590576, "learning_rate": 1.7710325272187567e-05, "loss": 0.5251, "step": 20801 }, { "epoch": 0.4411783419227588, "grad_norm": 0.32034817337989807, "learning_rate": 1.771011289949093e-05, "loss": 0.4677, "step": 20802 }, { "epoch": 0.4411995503806918, "grad_norm": 0.3821822702884674, "learning_rate": 1.7709900518219173e-05, "loss": 0.5192, "step": 20803 }, { "epoch": 0.4412207588386248, "grad_norm": 0.35982564091682434, "learning_rate": 1.7709688128372534e-05, "loss": 0.5086, "step": 20804 }, { "epoch": 0.44124196729655785, "grad_norm": 0.3587491512298584, "learning_rate": 1.7709475729951248e-05, "loss": 0.5429, "step": 20805 }, { "epoch": 0.44126317575449087, "grad_norm": 0.3407607674598694, "learning_rate": 1.7709263322955545e-05, "loss": 0.4698, "step": 20806 }, { "epoch": 0.44128438421242394, "grad_norm": 0.33520540595054626, "learning_rate": 1.770905090738567e-05, "loss": 0.5288, "step": 20807 }, { "epoch": 0.44130559267035696, "grad_norm": 0.3065875172615051, "learning_rate": 1.7708838483241854e-05, "loss": 0.4545, "step": 20808 }, { "epoch": 0.44132680112829, "grad_norm": 0.34851962327957153, "learning_rate": 1.7708626050524336e-05, "loss": 0.4908, "step": 20809 }, { "epoch": 0.441348009586223, "grad_norm": 0.38185831904411316, "learning_rate": 1.7708413609233347e-05, "loss": 0.5745, "step": 20810 }, { "epoch": 0.441369218044156, "grad_norm": 0.32602766156196594, "learning_rate": 1.770820115936913e-05, "loss": 0.5413, "step": 20811 }, { "epoch": 0.44139042650208904, "grad_norm": 0.5609335899353027, "learning_rate": 1.770798870093192e-05, "loss": 0.5227, "step": 20812 }, { "epoch": 0.44141163496002206, "grad_norm": 0.32663509249687195, "learning_rate": 1.7707776233921953e-05, "loss": 0.4478, "step": 20813 }, { "epoch": 0.4414328434179551, "grad_norm": 0.32728660106658936, "learning_rate": 1.7707563758339462e-05, "loss": 0.4701, "step": 20814 }, { "epoch": 0.4414540518758881, "grad_norm": 0.33291611075401306, "learning_rate": 1.770735127418469e-05, "loss": 0.4409, "step": 20815 }, { "epoch": 0.4414752603338211, "grad_norm": 0.3719637393951416, "learning_rate": 1.770713878145786e-05, "loss": 0.4799, "step": 20816 }, { "epoch": 0.44149646879175414, "grad_norm": 0.3634985089302063, "learning_rate": 1.7706926280159223e-05, "loss": 0.5223, "step": 20817 }, { "epoch": 0.44151767724968716, "grad_norm": 0.38763427734375, "learning_rate": 1.7706713770289008e-05, "loss": 0.5445, "step": 20818 }, { "epoch": 0.4415388857076202, "grad_norm": 0.3557155430316925, "learning_rate": 1.7706501251847456e-05, "loss": 0.4163, "step": 20819 }, { "epoch": 0.4415600941655532, "grad_norm": 0.3645109236240387, "learning_rate": 1.7706288724834798e-05, "loss": 0.5702, "step": 20820 }, { "epoch": 0.44158130262348627, "grad_norm": 0.31232643127441406, "learning_rate": 1.770607618925127e-05, "loss": 0.4093, "step": 20821 }, { "epoch": 0.4416025110814193, "grad_norm": 0.3861684203147888, "learning_rate": 1.7705863645097113e-05, "loss": 0.4905, "step": 20822 }, { "epoch": 0.4416237195393523, "grad_norm": 0.310575395822525, "learning_rate": 1.7705651092372563e-05, "loss": 0.488, "step": 20823 }, { "epoch": 0.44164492799728533, "grad_norm": 0.3473885655403137, "learning_rate": 1.7705438531077856e-05, "loss": 0.5428, "step": 20824 }, { "epoch": 0.44166613645521835, "grad_norm": 0.39085853099823, "learning_rate": 1.7705225961213222e-05, "loss": 0.4488, "step": 20825 }, { "epoch": 0.44168734491315137, "grad_norm": 0.3366554379463196, "learning_rate": 1.7705013382778905e-05, "loss": 0.5685, "step": 20826 }, { "epoch": 0.4417085533710844, "grad_norm": 0.32147014141082764, "learning_rate": 1.7704800795775142e-05, "loss": 0.5704, "step": 20827 }, { "epoch": 0.4417297618290174, "grad_norm": 0.3571566641330719, "learning_rate": 1.770458820020216e-05, "loss": 0.4545, "step": 20828 }, { "epoch": 0.44175097028695043, "grad_norm": 0.3133932054042816, "learning_rate": 1.7704375596060207e-05, "loss": 0.5762, "step": 20829 }, { "epoch": 0.44177217874488345, "grad_norm": 0.35371431708335876, "learning_rate": 1.7704162983349515e-05, "loss": 0.4355, "step": 20830 }, { "epoch": 0.44179338720281647, "grad_norm": 0.36559128761291504, "learning_rate": 1.7703950362070317e-05, "loss": 0.5873, "step": 20831 }, { "epoch": 0.4418145956607495, "grad_norm": 0.3246367871761322, "learning_rate": 1.770373773222285e-05, "loss": 0.4642, "step": 20832 }, { "epoch": 0.4418358041186825, "grad_norm": 0.3422405421733856, "learning_rate": 1.770352509380736e-05, "loss": 0.4947, "step": 20833 }, { "epoch": 0.4418570125766155, "grad_norm": 0.31743690371513367, "learning_rate": 1.7703312446824072e-05, "loss": 0.4687, "step": 20834 }, { "epoch": 0.4418782210345486, "grad_norm": 0.36061447858810425, "learning_rate": 1.7703099791273226e-05, "loss": 0.4903, "step": 20835 }, { "epoch": 0.4418994294924816, "grad_norm": 0.3930310010910034, "learning_rate": 1.7702887127155062e-05, "loss": 0.5248, "step": 20836 }, { "epoch": 0.44192063795041464, "grad_norm": 0.3710331320762634, "learning_rate": 1.7702674454469813e-05, "loss": 0.5329, "step": 20837 }, { "epoch": 0.44194184640834766, "grad_norm": 0.44090113043785095, "learning_rate": 1.7702461773217714e-05, "loss": 0.5356, "step": 20838 }, { "epoch": 0.4419630548662807, "grad_norm": 0.3733272850513458, "learning_rate": 1.7702249083399005e-05, "loss": 0.5627, "step": 20839 }, { "epoch": 0.4419842633242137, "grad_norm": 0.3590894341468811, "learning_rate": 1.7702036385013923e-05, "loss": 0.5772, "step": 20840 }, { "epoch": 0.4420054717821467, "grad_norm": 0.321903258562088, "learning_rate": 1.7701823678062705e-05, "loss": 0.4893, "step": 20841 }, { "epoch": 0.44202668024007974, "grad_norm": 0.33546435832977295, "learning_rate": 1.7701610962545582e-05, "loss": 0.503, "step": 20842 }, { "epoch": 0.44204788869801276, "grad_norm": 0.4048653542995453, "learning_rate": 1.7701398238462794e-05, "loss": 0.4713, "step": 20843 }, { "epoch": 0.4420690971559458, "grad_norm": 0.31553009152412415, "learning_rate": 1.770118550581458e-05, "loss": 0.4528, "step": 20844 }, { "epoch": 0.4420903056138788, "grad_norm": 0.3616374433040619, "learning_rate": 1.7700972764601173e-05, "loss": 0.4599, "step": 20845 }, { "epoch": 0.4421115140718118, "grad_norm": 0.3789823055267334, "learning_rate": 1.770076001482281e-05, "loss": 0.5503, "step": 20846 }, { "epoch": 0.44213272252974484, "grad_norm": 0.5088914632797241, "learning_rate": 1.770054725647973e-05, "loss": 0.4756, "step": 20847 }, { "epoch": 0.4421539309876779, "grad_norm": 0.39008745551109314, "learning_rate": 1.7700334489572172e-05, "loss": 0.5166, "step": 20848 }, { "epoch": 0.44217513944561093, "grad_norm": 0.33161237835884094, "learning_rate": 1.7700121714100363e-05, "loss": 0.4736, "step": 20849 }, { "epoch": 0.44219634790354395, "grad_norm": 0.31798917055130005, "learning_rate": 1.7699908930064546e-05, "loss": 0.4398, "step": 20850 }, { "epoch": 0.44221755636147697, "grad_norm": 0.3382694125175476, "learning_rate": 1.7699696137464962e-05, "loss": 0.5328, "step": 20851 }, { "epoch": 0.44223876481941, "grad_norm": 0.37701526284217834, "learning_rate": 1.7699483336301837e-05, "loss": 0.4914, "step": 20852 }, { "epoch": 0.442259973277343, "grad_norm": 0.5403106808662415, "learning_rate": 1.7699270526575416e-05, "loss": 0.4235, "step": 20853 }, { "epoch": 0.44228118173527603, "grad_norm": 0.3350118398666382, "learning_rate": 1.7699057708285935e-05, "loss": 0.4586, "step": 20854 }, { "epoch": 0.44230239019320905, "grad_norm": 0.3127692639827728, "learning_rate": 1.7698844881433626e-05, "loss": 0.5589, "step": 20855 }, { "epoch": 0.44232359865114207, "grad_norm": 0.3181891143321991, "learning_rate": 1.769863204601873e-05, "loss": 0.4594, "step": 20856 }, { "epoch": 0.4423448071090751, "grad_norm": 0.3177059292793274, "learning_rate": 1.769841920204148e-05, "loss": 0.4763, "step": 20857 }, { "epoch": 0.4423660155670081, "grad_norm": 0.4538467824459076, "learning_rate": 1.7698206349502117e-05, "loss": 0.483, "step": 20858 }, { "epoch": 0.44238722402494113, "grad_norm": 0.38175952434539795, "learning_rate": 1.7697993488400878e-05, "loss": 0.5342, "step": 20859 }, { "epoch": 0.44240843248287415, "grad_norm": 0.36798444390296936, "learning_rate": 1.7697780618737995e-05, "loss": 0.487, "step": 20860 }, { "epoch": 0.44242964094080717, "grad_norm": 0.3555780053138733, "learning_rate": 1.7697567740513707e-05, "loss": 0.4872, "step": 20861 }, { "epoch": 0.44245084939874024, "grad_norm": 0.4241948127746582, "learning_rate": 1.7697354853728253e-05, "loss": 0.5196, "step": 20862 }, { "epoch": 0.44247205785667326, "grad_norm": 0.34429213404655457, "learning_rate": 1.7697141958381868e-05, "loss": 0.4574, "step": 20863 }, { "epoch": 0.4424932663146063, "grad_norm": 0.33416423201560974, "learning_rate": 1.7696929054474785e-05, "loss": 0.4799, "step": 20864 }, { "epoch": 0.4425144747725393, "grad_norm": 0.37537652254104614, "learning_rate": 1.769671614200725e-05, "loss": 0.5248, "step": 20865 }, { "epoch": 0.4425356832304723, "grad_norm": 0.3578014671802521, "learning_rate": 1.769650322097949e-05, "loss": 0.5432, "step": 20866 }, { "epoch": 0.44255689168840534, "grad_norm": 0.36727821826934814, "learning_rate": 1.7696290291391746e-05, "loss": 0.4797, "step": 20867 }, { "epoch": 0.44257810014633836, "grad_norm": 0.35629162192344666, "learning_rate": 1.7696077353244254e-05, "loss": 0.5354, "step": 20868 }, { "epoch": 0.4425993086042714, "grad_norm": 0.39476627111434937, "learning_rate": 1.7695864406537254e-05, "loss": 0.4819, "step": 20869 }, { "epoch": 0.4426205170622044, "grad_norm": 0.3809865415096283, "learning_rate": 1.7695651451270975e-05, "loss": 0.4756, "step": 20870 }, { "epoch": 0.4426417255201374, "grad_norm": 0.30756428837776184, "learning_rate": 1.769543848744567e-05, "loss": 0.4361, "step": 20871 }, { "epoch": 0.44266293397807044, "grad_norm": 0.37926822900772095, "learning_rate": 1.7695225515061556e-05, "loss": 0.5334, "step": 20872 }, { "epoch": 0.44268414243600346, "grad_norm": 0.37444862723350525, "learning_rate": 1.769501253411888e-05, "loss": 0.5468, "step": 20873 }, { "epoch": 0.4427053508939365, "grad_norm": 0.29748281836509705, "learning_rate": 1.769479954461788e-05, "loss": 0.4362, "step": 20874 }, { "epoch": 0.44272655935186955, "grad_norm": 0.34367626905441284, "learning_rate": 1.769458654655879e-05, "loss": 0.5096, "step": 20875 }, { "epoch": 0.4427477678098026, "grad_norm": 0.3196278512477875, "learning_rate": 1.769437353994185e-05, "loss": 0.4864, "step": 20876 }, { "epoch": 0.4427689762677356, "grad_norm": 0.3378325402736664, "learning_rate": 1.7694160524767294e-05, "loss": 0.4657, "step": 20877 }, { "epoch": 0.4427901847256686, "grad_norm": 0.41282013058662415, "learning_rate": 1.769394750103536e-05, "loss": 0.4435, "step": 20878 }, { "epoch": 0.44281139318360163, "grad_norm": 0.3081102967262268, "learning_rate": 1.769373446874628e-05, "loss": 0.4995, "step": 20879 }, { "epoch": 0.44283260164153465, "grad_norm": 0.3639783561229706, "learning_rate": 1.76935214279003e-05, "loss": 0.4911, "step": 20880 }, { "epoch": 0.44285381009946767, "grad_norm": 0.3925325274467468, "learning_rate": 1.769330837849765e-05, "loss": 0.4732, "step": 20881 }, { "epoch": 0.4428750185574007, "grad_norm": 0.3518420457839966, "learning_rate": 1.769309532053857e-05, "loss": 0.5385, "step": 20882 }, { "epoch": 0.4428962270153337, "grad_norm": 0.36097797751426697, "learning_rate": 1.7692882254023296e-05, "loss": 0.5094, "step": 20883 }, { "epoch": 0.44291743547326673, "grad_norm": 0.3620012700557709, "learning_rate": 1.7692669178952067e-05, "loss": 0.5128, "step": 20884 }, { "epoch": 0.44293864393119975, "grad_norm": 0.4119374752044678, "learning_rate": 1.7692456095325115e-05, "loss": 0.5545, "step": 20885 }, { "epoch": 0.44295985238913277, "grad_norm": 0.4192870557308197, "learning_rate": 1.7692243003142683e-05, "loss": 0.4629, "step": 20886 }, { "epoch": 0.4429810608470658, "grad_norm": 0.36528831720352173, "learning_rate": 1.7692029902405004e-05, "loss": 0.5495, "step": 20887 }, { "epoch": 0.4430022693049988, "grad_norm": 0.3045405149459839, "learning_rate": 1.7691816793112317e-05, "loss": 0.4656, "step": 20888 }, { "epoch": 0.4430234777629319, "grad_norm": 0.35129833221435547, "learning_rate": 1.7691603675264854e-05, "loss": 0.5069, "step": 20889 }, { "epoch": 0.4430446862208649, "grad_norm": 0.33172276616096497, "learning_rate": 1.769139054886286e-05, "loss": 0.4324, "step": 20890 }, { "epoch": 0.4430658946787979, "grad_norm": 0.3148386478424072, "learning_rate": 1.7691177413906568e-05, "loss": 0.4767, "step": 20891 }, { "epoch": 0.44308710313673094, "grad_norm": 0.33283400535583496, "learning_rate": 1.7690964270396212e-05, "loss": 0.4845, "step": 20892 }, { "epoch": 0.44310831159466396, "grad_norm": 0.3147474527359009, "learning_rate": 1.7690751118332037e-05, "loss": 0.5406, "step": 20893 }, { "epoch": 0.443129520052597, "grad_norm": 0.4012409746646881, "learning_rate": 1.7690537957714272e-05, "loss": 0.5079, "step": 20894 }, { "epoch": 0.44315072851053, "grad_norm": 0.46699270606040955, "learning_rate": 1.7690324788543162e-05, "loss": 0.5614, "step": 20895 }, { "epoch": 0.443171936968463, "grad_norm": 0.34849342703819275, "learning_rate": 1.7690111610818937e-05, "loss": 0.5116, "step": 20896 }, { "epoch": 0.44319314542639604, "grad_norm": 0.39155295491218567, "learning_rate": 1.7689898424541833e-05, "loss": 0.4987, "step": 20897 }, { "epoch": 0.44321435388432906, "grad_norm": 0.353948712348938, "learning_rate": 1.7689685229712096e-05, "loss": 0.4658, "step": 20898 }, { "epoch": 0.4432355623422621, "grad_norm": 0.32859882712364197, "learning_rate": 1.7689472026329955e-05, "loss": 0.5898, "step": 20899 }, { "epoch": 0.4432567708001951, "grad_norm": 0.3855670392513275, "learning_rate": 1.7689258814395652e-05, "loss": 0.5292, "step": 20900 }, { "epoch": 0.4432779792581281, "grad_norm": 0.3502066731452942, "learning_rate": 1.7689045593909422e-05, "loss": 0.4898, "step": 20901 }, { "epoch": 0.44329918771606114, "grad_norm": 0.3861963450908661, "learning_rate": 1.76888323648715e-05, "loss": 0.5726, "step": 20902 }, { "epoch": 0.4433203961739942, "grad_norm": 0.5186741352081299, "learning_rate": 1.7688619127282127e-05, "loss": 0.4798, "step": 20903 }, { "epoch": 0.44334160463192723, "grad_norm": 0.3714391887187958, "learning_rate": 1.768840588114154e-05, "loss": 0.4664, "step": 20904 }, { "epoch": 0.44336281308986025, "grad_norm": 0.36116474866867065, "learning_rate": 1.768819262644997e-05, "loss": 0.5463, "step": 20905 }, { "epoch": 0.44338402154779327, "grad_norm": 0.40292972326278687, "learning_rate": 1.7687979363207663e-05, "loss": 0.6128, "step": 20906 }, { "epoch": 0.4434052300057263, "grad_norm": 0.3629380464553833, "learning_rate": 1.7687766091414852e-05, "loss": 0.5322, "step": 20907 }, { "epoch": 0.4434264384636593, "grad_norm": 0.3195779621601105, "learning_rate": 1.7687552811071772e-05, "loss": 0.454, "step": 20908 }, { "epoch": 0.44344764692159233, "grad_norm": 0.33623889088630676, "learning_rate": 1.7687339522178667e-05, "loss": 0.4682, "step": 20909 }, { "epoch": 0.44346885537952535, "grad_norm": 0.32922402024269104, "learning_rate": 1.7687126224735767e-05, "loss": 0.4106, "step": 20910 }, { "epoch": 0.44349006383745837, "grad_norm": 1.3306925296783447, "learning_rate": 1.768691291874331e-05, "loss": 0.428, "step": 20911 }, { "epoch": 0.4435112722953914, "grad_norm": 0.3536689877510071, "learning_rate": 1.768669960420154e-05, "loss": 0.499, "step": 20912 }, { "epoch": 0.4435324807533244, "grad_norm": 0.3445947766304016, "learning_rate": 1.7686486281110685e-05, "loss": 0.502, "step": 20913 }, { "epoch": 0.44355368921125743, "grad_norm": 0.3259091377258301, "learning_rate": 1.7686272949470988e-05, "loss": 0.4292, "step": 20914 }, { "epoch": 0.44357489766919045, "grad_norm": 0.35390496253967285, "learning_rate": 1.7686059609282686e-05, "loss": 0.4477, "step": 20915 }, { "epoch": 0.4435961061271235, "grad_norm": 0.3795833885669708, "learning_rate": 1.7685846260546015e-05, "loss": 0.5195, "step": 20916 }, { "epoch": 0.44361731458505654, "grad_norm": 0.3554765284061432, "learning_rate": 1.7685632903261213e-05, "loss": 0.5521, "step": 20917 }, { "epoch": 0.44363852304298956, "grad_norm": 0.363616406917572, "learning_rate": 1.7685419537428514e-05, "loss": 0.5308, "step": 20918 }, { "epoch": 0.4436597315009226, "grad_norm": 0.34823545813560486, "learning_rate": 1.768520616304816e-05, "loss": 0.4754, "step": 20919 }, { "epoch": 0.4436809399588556, "grad_norm": 0.49418944120407104, "learning_rate": 1.7684992780120386e-05, "loss": 0.4839, "step": 20920 }, { "epoch": 0.4437021484167886, "grad_norm": 0.448830783367157, "learning_rate": 1.768477938864543e-05, "loss": 0.4614, "step": 20921 }, { "epoch": 0.44372335687472164, "grad_norm": 0.3701443374156952, "learning_rate": 1.768456598862353e-05, "loss": 0.5379, "step": 20922 }, { "epoch": 0.44374456533265466, "grad_norm": 0.3642461597919464, "learning_rate": 1.7684352580054923e-05, "loss": 0.5527, "step": 20923 }, { "epoch": 0.4437657737905877, "grad_norm": 1.5203347206115723, "learning_rate": 1.7684139162939848e-05, "loss": 0.5232, "step": 20924 }, { "epoch": 0.4437869822485207, "grad_norm": 0.34590962529182434, "learning_rate": 1.7683925737278535e-05, "loss": 0.4684, "step": 20925 }, { "epoch": 0.4438081907064537, "grad_norm": 0.3778035640716553, "learning_rate": 1.768371230307123e-05, "loss": 0.5745, "step": 20926 }, { "epoch": 0.44382939916438674, "grad_norm": 0.35066327452659607, "learning_rate": 1.7683498860318163e-05, "loss": 0.4755, "step": 20927 }, { "epoch": 0.44385060762231976, "grad_norm": 0.3552534878253937, "learning_rate": 1.768328540901958e-05, "loss": 0.5067, "step": 20928 }, { "epoch": 0.4438718160802528, "grad_norm": 0.4113790988922119, "learning_rate": 1.768307194917571e-05, "loss": 0.4714, "step": 20929 }, { "epoch": 0.44389302453818585, "grad_norm": 0.3455750644207001, "learning_rate": 1.7682858480786795e-05, "loss": 0.5092, "step": 20930 }, { "epoch": 0.4439142329961189, "grad_norm": 0.36532557010650635, "learning_rate": 1.7682645003853074e-05, "loss": 0.423, "step": 20931 }, { "epoch": 0.4439354414540519, "grad_norm": 0.3379284739494324, "learning_rate": 1.768243151837478e-05, "loss": 0.4633, "step": 20932 }, { "epoch": 0.4439566499119849, "grad_norm": 0.43864527344703674, "learning_rate": 1.7682218024352154e-05, "loss": 0.579, "step": 20933 }, { "epoch": 0.44397785836991793, "grad_norm": 0.3266773819923401, "learning_rate": 1.7682004521785427e-05, "loss": 0.5331, "step": 20934 }, { "epoch": 0.44399906682785095, "grad_norm": 0.338483989238739, "learning_rate": 1.7681791010674847e-05, "loss": 0.5226, "step": 20935 }, { "epoch": 0.44402027528578397, "grad_norm": 0.3502291440963745, "learning_rate": 1.7681577491020643e-05, "loss": 0.5294, "step": 20936 }, { "epoch": 0.444041483743717, "grad_norm": 0.3486381471157074, "learning_rate": 1.7681363962823054e-05, "loss": 0.4577, "step": 20937 }, { "epoch": 0.44406269220165, "grad_norm": 0.3813606798648834, "learning_rate": 1.7681150426082322e-05, "loss": 0.489, "step": 20938 }, { "epoch": 0.44408390065958303, "grad_norm": 0.3855357766151428, "learning_rate": 1.768093688079868e-05, "loss": 0.5156, "step": 20939 }, { "epoch": 0.44410510911751605, "grad_norm": 0.5312240123748779, "learning_rate": 1.7680723326972368e-05, "loss": 0.4279, "step": 20940 }, { "epoch": 0.44412631757544907, "grad_norm": 0.4515986442565918, "learning_rate": 1.7680509764603617e-05, "loss": 0.4723, "step": 20941 }, { "epoch": 0.4441475260333821, "grad_norm": 0.3828594982624054, "learning_rate": 1.7680296193692674e-05, "loss": 0.4837, "step": 20942 }, { "epoch": 0.4441687344913151, "grad_norm": 0.3816981911659241, "learning_rate": 1.7680082614239773e-05, "loss": 0.5129, "step": 20943 }, { "epoch": 0.4441899429492482, "grad_norm": 0.3554943799972534, "learning_rate": 1.7679869026245148e-05, "loss": 0.5758, "step": 20944 }, { "epoch": 0.4442111514071812, "grad_norm": 0.3534960448741913, "learning_rate": 1.7679655429709043e-05, "loss": 0.507, "step": 20945 }, { "epoch": 0.4442323598651142, "grad_norm": 0.3200451731681824, "learning_rate": 1.7679441824631688e-05, "loss": 0.4887, "step": 20946 }, { "epoch": 0.44425356832304724, "grad_norm": 0.3754587173461914, "learning_rate": 1.7679228211013327e-05, "loss": 0.5013, "step": 20947 }, { "epoch": 0.44427477678098026, "grad_norm": 0.3399798274040222, "learning_rate": 1.7679014588854194e-05, "loss": 0.4857, "step": 20948 }, { "epoch": 0.4442959852389133, "grad_norm": 0.4120156764984131, "learning_rate": 1.767880095815453e-05, "loss": 0.5609, "step": 20949 }, { "epoch": 0.4443171936968463, "grad_norm": 0.3717713952064514, "learning_rate": 1.7678587318914567e-05, "loss": 0.4769, "step": 20950 }, { "epoch": 0.4443384021547793, "grad_norm": 0.37996241450309753, "learning_rate": 1.7678373671134548e-05, "loss": 0.4781, "step": 20951 }, { "epoch": 0.44435961061271234, "grad_norm": 0.34384679794311523, "learning_rate": 1.7678160014814708e-05, "loss": 0.5247, "step": 20952 }, { "epoch": 0.44438081907064536, "grad_norm": 0.37962546944618225, "learning_rate": 1.7677946349955286e-05, "loss": 0.5189, "step": 20953 }, { "epoch": 0.4444020275285784, "grad_norm": 0.3835698664188385, "learning_rate": 1.7677732676556517e-05, "loss": 0.4017, "step": 20954 }, { "epoch": 0.4444232359865114, "grad_norm": 0.4156028926372528, "learning_rate": 1.767751899461864e-05, "loss": 0.5396, "step": 20955 }, { "epoch": 0.4444444444444444, "grad_norm": 0.4190431535243988, "learning_rate": 1.7677305304141896e-05, "loss": 0.5447, "step": 20956 }, { "epoch": 0.4444656529023775, "grad_norm": 0.33286377787590027, "learning_rate": 1.7677091605126517e-05, "loss": 0.462, "step": 20957 }, { "epoch": 0.4444868613603105, "grad_norm": 0.40798598527908325, "learning_rate": 1.7676877897572744e-05, "loss": 0.5634, "step": 20958 }, { "epoch": 0.44450806981824353, "grad_norm": 0.33715248107910156, "learning_rate": 1.7676664181480815e-05, "loss": 0.4813, "step": 20959 }, { "epoch": 0.44452927827617655, "grad_norm": 0.3850550949573517, "learning_rate": 1.7676450456850966e-05, "loss": 0.4598, "step": 20960 }, { "epoch": 0.4445504867341096, "grad_norm": 0.337819904088974, "learning_rate": 1.7676236723683436e-05, "loss": 0.4576, "step": 20961 }, { "epoch": 0.4445716951920426, "grad_norm": 0.32904407382011414, "learning_rate": 1.7676022981978464e-05, "loss": 0.5059, "step": 20962 }, { "epoch": 0.4445929036499756, "grad_norm": 0.34634482860565186, "learning_rate": 1.767580923173628e-05, "loss": 0.5267, "step": 20963 }, { "epoch": 0.44461411210790863, "grad_norm": 0.33851268887519836, "learning_rate": 1.7675595472957133e-05, "loss": 0.4849, "step": 20964 }, { "epoch": 0.44463532056584165, "grad_norm": 0.3778086304664612, "learning_rate": 1.7675381705641256e-05, "loss": 0.5128, "step": 20965 }, { "epoch": 0.44465652902377467, "grad_norm": 0.377914160490036, "learning_rate": 1.7675167929788882e-05, "loss": 0.5807, "step": 20966 }, { "epoch": 0.4446777374817077, "grad_norm": 0.36877596378326416, "learning_rate": 1.7674954145400253e-05, "loss": 0.5271, "step": 20967 }, { "epoch": 0.4446989459396407, "grad_norm": 0.3327113091945648, "learning_rate": 1.767474035247561e-05, "loss": 0.4645, "step": 20968 }, { "epoch": 0.44472015439757373, "grad_norm": 0.31842705607414246, "learning_rate": 1.7674526551015186e-05, "loss": 0.4772, "step": 20969 }, { "epoch": 0.44474136285550675, "grad_norm": 0.3804607391357422, "learning_rate": 1.767431274101922e-05, "loss": 0.4999, "step": 20970 }, { "epoch": 0.4447625713134398, "grad_norm": 0.6842105388641357, "learning_rate": 1.7674098922487952e-05, "loss": 0.5107, "step": 20971 }, { "epoch": 0.44478377977137284, "grad_norm": 0.3666234016418457, "learning_rate": 1.7673885095421615e-05, "loss": 0.5192, "step": 20972 }, { "epoch": 0.44480498822930586, "grad_norm": 0.6482394933700562, "learning_rate": 1.7673671259820452e-05, "loss": 0.478, "step": 20973 }, { "epoch": 0.4448261966872389, "grad_norm": 0.36095643043518066, "learning_rate": 1.7673457415684697e-05, "loss": 0.476, "step": 20974 }, { "epoch": 0.4448474051451719, "grad_norm": 0.3405407965183258, "learning_rate": 1.767324356301459e-05, "loss": 0.4444, "step": 20975 }, { "epoch": 0.4448686136031049, "grad_norm": 0.33735883235931396, "learning_rate": 1.7673029701810366e-05, "loss": 0.4986, "step": 20976 }, { "epoch": 0.44488982206103794, "grad_norm": 1.7730929851531982, "learning_rate": 1.7672815832072267e-05, "loss": 0.5058, "step": 20977 }, { "epoch": 0.44491103051897096, "grad_norm": 0.3517738878726959, "learning_rate": 1.767260195380053e-05, "loss": 0.4871, "step": 20978 }, { "epoch": 0.444932238976904, "grad_norm": 0.3321343958377838, "learning_rate": 1.767238806699539e-05, "loss": 0.5199, "step": 20979 }, { "epoch": 0.444953447434837, "grad_norm": 0.3359103202819824, "learning_rate": 1.7672174171657087e-05, "loss": 0.4743, "step": 20980 }, { "epoch": 0.44497465589277, "grad_norm": 0.5046311616897583, "learning_rate": 1.7671960267785858e-05, "loss": 0.5013, "step": 20981 }, { "epoch": 0.44499586435070304, "grad_norm": 0.37072932720184326, "learning_rate": 1.7671746355381944e-05, "loss": 0.4963, "step": 20982 }, { "epoch": 0.44501707280863606, "grad_norm": 0.41998395323753357, "learning_rate": 1.7671532434445576e-05, "loss": 0.5729, "step": 20983 }, { "epoch": 0.4450382812665691, "grad_norm": 0.31364893913269043, "learning_rate": 1.7671318504976998e-05, "loss": 0.4454, "step": 20984 }, { "epoch": 0.44505948972450216, "grad_norm": 0.352125346660614, "learning_rate": 1.767110456697645e-05, "loss": 0.5168, "step": 20985 }, { "epoch": 0.4450806981824352, "grad_norm": 0.4749290347099304, "learning_rate": 1.7670890620444163e-05, "loss": 0.4646, "step": 20986 }, { "epoch": 0.4451019066403682, "grad_norm": 0.3805641829967499, "learning_rate": 1.7670676665380377e-05, "loss": 0.5792, "step": 20987 }, { "epoch": 0.4451231150983012, "grad_norm": 0.3547080159187317, "learning_rate": 1.7670462701785332e-05, "loss": 0.5578, "step": 20988 }, { "epoch": 0.44514432355623423, "grad_norm": 0.32808417081832886, "learning_rate": 1.7670248729659263e-05, "loss": 0.5279, "step": 20989 }, { "epoch": 0.44516553201416725, "grad_norm": 0.3714466094970703, "learning_rate": 1.7670034749002414e-05, "loss": 0.5829, "step": 20990 }, { "epoch": 0.4451867404721003, "grad_norm": 0.3346221148967743, "learning_rate": 1.7669820759815018e-05, "loss": 0.4955, "step": 20991 }, { "epoch": 0.4452079489300333, "grad_norm": 0.5137296915054321, "learning_rate": 1.7669606762097312e-05, "loss": 0.5696, "step": 20992 }, { "epoch": 0.4452291573879663, "grad_norm": 0.3425573408603668, "learning_rate": 1.766939275584954e-05, "loss": 0.4899, "step": 20993 }, { "epoch": 0.44525036584589933, "grad_norm": 0.3327811360359192, "learning_rate": 1.7669178741071934e-05, "loss": 0.5637, "step": 20994 }, { "epoch": 0.44527157430383235, "grad_norm": 0.3608975112438202, "learning_rate": 1.7668964717764733e-05, "loss": 0.5288, "step": 20995 }, { "epoch": 0.44529278276176537, "grad_norm": 0.2984645962715149, "learning_rate": 1.7668750685928174e-05, "loss": 0.4954, "step": 20996 }, { "epoch": 0.4453139912196984, "grad_norm": 0.3305257558822632, "learning_rate": 1.76685366455625e-05, "loss": 0.51, "step": 20997 }, { "epoch": 0.44533519967763147, "grad_norm": 0.3655734658241272, "learning_rate": 1.7668322596667943e-05, "loss": 0.4874, "step": 20998 }, { "epoch": 0.4453564081355645, "grad_norm": 0.3348410725593567, "learning_rate": 1.766810853924475e-05, "loss": 0.4529, "step": 20999 }, { "epoch": 0.4453776165934975, "grad_norm": 0.36342817544937134, "learning_rate": 1.766789447329315e-05, "loss": 0.4647, "step": 21000 }, { "epoch": 0.4453988250514305, "grad_norm": 1.5839563608169556, "learning_rate": 1.7667680398813383e-05, "loss": 0.4537, "step": 21001 }, { "epoch": 0.44542003350936354, "grad_norm": 0.3639521598815918, "learning_rate": 1.766746631580569e-05, "loss": 0.4084, "step": 21002 }, { "epoch": 0.44544124196729656, "grad_norm": 0.37635159492492676, "learning_rate": 1.7667252224270307e-05, "loss": 0.3941, "step": 21003 }, { "epoch": 0.4454624504252296, "grad_norm": 0.33899545669555664, "learning_rate": 1.7667038124207474e-05, "loss": 0.53, "step": 21004 }, { "epoch": 0.4454836588831626, "grad_norm": 0.3267163038253784, "learning_rate": 1.7666824015617428e-05, "loss": 0.517, "step": 21005 }, { "epoch": 0.4455048673410956, "grad_norm": 0.5583882331848145, "learning_rate": 1.7666609898500406e-05, "loss": 0.4471, "step": 21006 }, { "epoch": 0.44552607579902864, "grad_norm": 0.35974252223968506, "learning_rate": 1.7666395772856646e-05, "loss": 0.4736, "step": 21007 }, { "epoch": 0.44554728425696166, "grad_norm": 0.334139883518219, "learning_rate": 1.7666181638686388e-05, "loss": 0.5394, "step": 21008 }, { "epoch": 0.4455684927148947, "grad_norm": 0.3869476020336151, "learning_rate": 1.766596749598987e-05, "loss": 0.5849, "step": 21009 }, { "epoch": 0.4455897011728277, "grad_norm": 0.3366892337799072, "learning_rate": 1.766575334476733e-05, "loss": 0.4506, "step": 21010 }, { "epoch": 0.4456109096307607, "grad_norm": 0.3699604868888855, "learning_rate": 1.7665539185019e-05, "loss": 0.5119, "step": 21011 }, { "epoch": 0.4456321180886938, "grad_norm": 0.30192458629608154, "learning_rate": 1.7665325016745133e-05, "loss": 0.3984, "step": 21012 }, { "epoch": 0.4456533265466268, "grad_norm": 0.36774736642837524, "learning_rate": 1.7665110839945953e-05, "loss": 0.5284, "step": 21013 }, { "epoch": 0.44567453500455984, "grad_norm": 0.32969939708709717, "learning_rate": 1.7664896654621705e-05, "loss": 0.5406, "step": 21014 }, { "epoch": 0.44569574346249285, "grad_norm": 0.3595116436481476, "learning_rate": 1.7664682460772623e-05, "loss": 0.5318, "step": 21015 }, { "epoch": 0.4457169519204259, "grad_norm": 0.4445778727531433, "learning_rate": 1.766446825839895e-05, "loss": 0.4698, "step": 21016 }, { "epoch": 0.4457381603783589, "grad_norm": 0.3270116150379181, "learning_rate": 1.766425404750092e-05, "loss": 0.5546, "step": 21017 }, { "epoch": 0.4457593688362919, "grad_norm": 0.48061221837997437, "learning_rate": 1.7664039828078775e-05, "loss": 0.457, "step": 21018 }, { "epoch": 0.44578057729422493, "grad_norm": 0.35994359850883484, "learning_rate": 1.7663825600132754e-05, "loss": 0.5402, "step": 21019 }, { "epoch": 0.44580178575215795, "grad_norm": 0.40142831206321716, "learning_rate": 1.7663611363663087e-05, "loss": 0.5494, "step": 21020 }, { "epoch": 0.445822994210091, "grad_norm": 0.33956289291381836, "learning_rate": 1.7663397118670023e-05, "loss": 0.4773, "step": 21021 }, { "epoch": 0.445844202668024, "grad_norm": 0.373291939496994, "learning_rate": 1.7663182865153792e-05, "loss": 0.5652, "step": 21022 }, { "epoch": 0.445865411125957, "grad_norm": 0.37154099345207214, "learning_rate": 1.7662968603114637e-05, "loss": 0.5811, "step": 21023 }, { "epoch": 0.44588661958389003, "grad_norm": 0.34045514464378357, "learning_rate": 1.7662754332552796e-05, "loss": 0.4633, "step": 21024 }, { "epoch": 0.4459078280418231, "grad_norm": 0.49925726652145386, "learning_rate": 1.7662540053468503e-05, "loss": 0.5678, "step": 21025 }, { "epoch": 0.4459290364997561, "grad_norm": 0.415589302778244, "learning_rate": 1.7662325765862004e-05, "loss": 0.4653, "step": 21026 }, { "epoch": 0.44595024495768915, "grad_norm": 0.3257112205028534, "learning_rate": 1.766211146973353e-05, "loss": 0.4904, "step": 21027 }, { "epoch": 0.44597145341562217, "grad_norm": 0.39202189445495605, "learning_rate": 1.7661897165083323e-05, "loss": 0.4788, "step": 21028 }, { "epoch": 0.4459926618735552, "grad_norm": 0.8358542323112488, "learning_rate": 1.7661682851911618e-05, "loss": 0.6031, "step": 21029 }, { "epoch": 0.4460138703314882, "grad_norm": 0.33516931533813477, "learning_rate": 1.7661468530218657e-05, "loss": 0.4779, "step": 21030 }, { "epoch": 0.4460350787894212, "grad_norm": 0.34930744767189026, "learning_rate": 1.766125420000468e-05, "loss": 0.5346, "step": 21031 }, { "epoch": 0.44605628724735424, "grad_norm": 0.3922897279262543, "learning_rate": 1.766103986126992e-05, "loss": 0.4713, "step": 21032 }, { "epoch": 0.44607749570528726, "grad_norm": 0.373941570520401, "learning_rate": 1.766082551401462e-05, "loss": 0.5241, "step": 21033 }, { "epoch": 0.4460987041632203, "grad_norm": 0.43316689133644104, "learning_rate": 1.7660611158239014e-05, "loss": 0.542, "step": 21034 }, { "epoch": 0.4461199126211533, "grad_norm": 0.30342376232147217, "learning_rate": 1.7660396793943345e-05, "loss": 0.4281, "step": 21035 }, { "epoch": 0.4461411210790863, "grad_norm": 0.3492578864097595, "learning_rate": 1.7660182421127846e-05, "loss": 0.5048, "step": 21036 }, { "epoch": 0.44616232953701934, "grad_norm": 0.37288638949394226, "learning_rate": 1.765996803979276e-05, "loss": 0.597, "step": 21037 }, { "epoch": 0.44618353799495236, "grad_norm": 0.32732096314430237, "learning_rate": 1.7659753649938326e-05, "loss": 0.5304, "step": 21038 }, { "epoch": 0.44620474645288544, "grad_norm": 0.34162789583206177, "learning_rate": 1.765953925156478e-05, "loss": 0.5113, "step": 21039 }, { "epoch": 0.44622595491081846, "grad_norm": 0.33455562591552734, "learning_rate": 1.7659324844672362e-05, "loss": 0.5251, "step": 21040 }, { "epoch": 0.4462471633687515, "grad_norm": 0.3613455593585968, "learning_rate": 1.7659110429261304e-05, "loss": 0.4622, "step": 21041 }, { "epoch": 0.4462683718266845, "grad_norm": 0.3657229542732239, "learning_rate": 1.7658896005331856e-05, "loss": 0.5376, "step": 21042 }, { "epoch": 0.4462895802846175, "grad_norm": 0.3251352310180664, "learning_rate": 1.7658681572884248e-05, "loss": 0.4947, "step": 21043 }, { "epoch": 0.44631078874255053, "grad_norm": 0.38081344962120056, "learning_rate": 1.765846713191872e-05, "loss": 0.5457, "step": 21044 }, { "epoch": 0.44633199720048355, "grad_norm": 0.35559865832328796, "learning_rate": 1.7658252682435515e-05, "loss": 0.5264, "step": 21045 }, { "epoch": 0.4463532056584166, "grad_norm": 0.36672037839889526, "learning_rate": 1.7658038224434864e-05, "loss": 0.4734, "step": 21046 }, { "epoch": 0.4463744141163496, "grad_norm": 0.3860420882701874, "learning_rate": 1.7657823757917013e-05, "loss": 0.52, "step": 21047 }, { "epoch": 0.4463956225742826, "grad_norm": 0.396197646856308, "learning_rate": 1.7657609282882194e-05, "loss": 0.4092, "step": 21048 }, { "epoch": 0.44641683103221563, "grad_norm": 0.3384244740009308, "learning_rate": 1.765739479933065e-05, "loss": 0.5254, "step": 21049 }, { "epoch": 0.44643803949014865, "grad_norm": 0.32622721791267395, "learning_rate": 1.7657180307262614e-05, "loss": 0.4513, "step": 21050 }, { "epoch": 0.44645924794808167, "grad_norm": 0.3587571382522583, "learning_rate": 1.765696580667833e-05, "loss": 0.5273, "step": 21051 }, { "epoch": 0.4464804564060147, "grad_norm": 0.4484441876411438, "learning_rate": 1.7656751297578042e-05, "loss": 0.4511, "step": 21052 }, { "epoch": 0.44650166486394777, "grad_norm": 0.5481975078582764, "learning_rate": 1.7656536779961976e-05, "loss": 0.5836, "step": 21053 }, { "epoch": 0.4465228733218808, "grad_norm": 0.3691628575325012, "learning_rate": 1.7656322253830376e-05, "loss": 0.4753, "step": 21054 }, { "epoch": 0.4465440817798138, "grad_norm": 0.32203561067581177, "learning_rate": 1.7656107719183483e-05, "loss": 0.4567, "step": 21055 }, { "epoch": 0.4465652902377468, "grad_norm": 0.33991602063179016, "learning_rate": 1.765589317602153e-05, "loss": 0.435, "step": 21056 }, { "epoch": 0.44658649869567985, "grad_norm": 0.35781824588775635, "learning_rate": 1.7655678624344764e-05, "loss": 0.4629, "step": 21057 }, { "epoch": 0.44660770715361287, "grad_norm": 0.377713680267334, "learning_rate": 1.7655464064153417e-05, "loss": 0.5156, "step": 21058 }, { "epoch": 0.4466289156115459, "grad_norm": 0.3562760055065155, "learning_rate": 1.7655249495447725e-05, "loss": 0.4877, "step": 21059 }, { "epoch": 0.4466501240694789, "grad_norm": 0.3547534942626953, "learning_rate": 1.7655034918227936e-05, "loss": 0.5308, "step": 21060 }, { "epoch": 0.4466713325274119, "grad_norm": 0.33839187026023865, "learning_rate": 1.7654820332494283e-05, "loss": 0.5573, "step": 21061 }, { "epoch": 0.44669254098534494, "grad_norm": 0.36917972564697266, "learning_rate": 1.7654605738247004e-05, "loss": 0.4822, "step": 21062 }, { "epoch": 0.44671374944327796, "grad_norm": 0.37950602173805237, "learning_rate": 1.765439113548634e-05, "loss": 0.5273, "step": 21063 }, { "epoch": 0.446734957901211, "grad_norm": 0.3572622537612915, "learning_rate": 1.765417652421253e-05, "loss": 0.5257, "step": 21064 }, { "epoch": 0.446756166359144, "grad_norm": 0.3476659059524536, "learning_rate": 1.765396190442581e-05, "loss": 0.4045, "step": 21065 }, { "epoch": 0.4467773748170771, "grad_norm": 0.40612250566482544, "learning_rate": 1.7653747276126417e-05, "loss": 0.5089, "step": 21066 }, { "epoch": 0.4467985832750101, "grad_norm": 0.348523885011673, "learning_rate": 1.7653532639314597e-05, "loss": 0.4722, "step": 21067 }, { "epoch": 0.4468197917329431, "grad_norm": 0.6224015355110168, "learning_rate": 1.765331799399058e-05, "loss": 0.4722, "step": 21068 }, { "epoch": 0.44684100019087614, "grad_norm": 0.7471866011619568, "learning_rate": 1.7653103340154615e-05, "loss": 0.5693, "step": 21069 }, { "epoch": 0.44686220864880916, "grad_norm": 0.351413369178772, "learning_rate": 1.765288867780693e-05, "loss": 0.5122, "step": 21070 }, { "epoch": 0.4468834171067422, "grad_norm": 0.3662421405315399, "learning_rate": 1.765267400694777e-05, "loss": 0.4689, "step": 21071 }, { "epoch": 0.4469046255646752, "grad_norm": 0.3309282958507538, "learning_rate": 1.7652459327577377e-05, "loss": 0.5301, "step": 21072 }, { "epoch": 0.4469258340226082, "grad_norm": 0.36408424377441406, "learning_rate": 1.765224463969598e-05, "loss": 0.5709, "step": 21073 }, { "epoch": 0.44694704248054123, "grad_norm": 0.3392461836338043, "learning_rate": 1.7652029943303824e-05, "loss": 0.5804, "step": 21074 }, { "epoch": 0.44696825093847425, "grad_norm": 0.31462037563323975, "learning_rate": 1.7651815238401147e-05, "loss": 0.5294, "step": 21075 }, { "epoch": 0.4469894593964073, "grad_norm": 0.33606967329978943, "learning_rate": 1.7651600524988187e-05, "loss": 0.4731, "step": 21076 }, { "epoch": 0.4470106678543403, "grad_norm": 0.3428143858909607, "learning_rate": 1.7651385803065185e-05, "loss": 0.5343, "step": 21077 }, { "epoch": 0.4470318763122733, "grad_norm": 0.37625598907470703, "learning_rate": 1.765117107263238e-05, "loss": 0.5058, "step": 21078 }, { "epoch": 0.44705308477020633, "grad_norm": 0.3628774583339691, "learning_rate": 1.7650956333690002e-05, "loss": 0.467, "step": 21079 }, { "epoch": 0.4470742932281394, "grad_norm": 0.35923436284065247, "learning_rate": 1.7650741586238302e-05, "loss": 0.5598, "step": 21080 }, { "epoch": 0.4470955016860724, "grad_norm": 0.3470287322998047, "learning_rate": 1.7650526830277515e-05, "loss": 0.4693, "step": 21081 }, { "epoch": 0.44711671014400545, "grad_norm": 0.35254397988319397, "learning_rate": 1.7650312065807876e-05, "loss": 0.493, "step": 21082 }, { "epoch": 0.44713791860193847, "grad_norm": 0.32813724875450134, "learning_rate": 1.7650097292829628e-05, "loss": 0.4961, "step": 21083 }, { "epoch": 0.4471591270598715, "grad_norm": 0.3935622572898865, "learning_rate": 1.7649882511343005e-05, "loss": 0.5669, "step": 21084 }, { "epoch": 0.4471803355178045, "grad_norm": 0.33627888560295105, "learning_rate": 1.7649667721348248e-05, "loss": 0.5752, "step": 21085 }, { "epoch": 0.4472015439757375, "grad_norm": 0.3548086881637573, "learning_rate": 1.76494529228456e-05, "loss": 0.4533, "step": 21086 }, { "epoch": 0.44722275243367055, "grad_norm": 0.3864234685897827, "learning_rate": 1.7649238115835297e-05, "loss": 0.5096, "step": 21087 }, { "epoch": 0.44724396089160356, "grad_norm": 0.35439735651016235, "learning_rate": 1.7649023300317584e-05, "loss": 0.5134, "step": 21088 }, { "epoch": 0.4472651693495366, "grad_norm": 0.37761440873146057, "learning_rate": 1.7648808476292686e-05, "loss": 0.5013, "step": 21089 }, { "epoch": 0.4472863778074696, "grad_norm": 0.36407721042633057, "learning_rate": 1.764859364376085e-05, "loss": 0.5542, "step": 21090 }, { "epoch": 0.4473075862654026, "grad_norm": 0.35011711716651917, "learning_rate": 1.7648378802722318e-05, "loss": 0.4417, "step": 21091 }, { "epoch": 0.44732879472333564, "grad_norm": 0.3317558169364929, "learning_rate": 1.7648163953177323e-05, "loss": 0.5254, "step": 21092 }, { "epoch": 0.44735000318126866, "grad_norm": 0.3586311936378479, "learning_rate": 1.7647949095126105e-05, "loss": 0.4902, "step": 21093 }, { "epoch": 0.44737121163920174, "grad_norm": 0.3385416567325592, "learning_rate": 1.7647734228568907e-05, "loss": 0.5116, "step": 21094 }, { "epoch": 0.44739242009713476, "grad_norm": 0.3461521565914154, "learning_rate": 1.7647519353505964e-05, "loss": 0.535, "step": 21095 }, { "epoch": 0.4474136285550678, "grad_norm": 0.3515351414680481, "learning_rate": 1.7647304469937515e-05, "loss": 0.4337, "step": 21096 }, { "epoch": 0.4474348370130008, "grad_norm": 0.3728795647621155, "learning_rate": 1.7647089577863806e-05, "loss": 0.5742, "step": 21097 }, { "epoch": 0.4474560454709338, "grad_norm": 0.3934565782546997, "learning_rate": 1.7646874677285065e-05, "loss": 0.4979, "step": 21098 }, { "epoch": 0.44747725392886684, "grad_norm": 0.3526686728000641, "learning_rate": 1.764665976820154e-05, "loss": 0.49, "step": 21099 }, { "epoch": 0.44749846238679986, "grad_norm": 0.3282851576805115, "learning_rate": 1.7646444850613465e-05, "loss": 0.4565, "step": 21100 }, { "epoch": 0.4475196708447329, "grad_norm": 0.33885958790779114, "learning_rate": 1.764622992452108e-05, "loss": 0.5126, "step": 21101 }, { "epoch": 0.4475408793026659, "grad_norm": 0.34689652919769287, "learning_rate": 1.7646014989924626e-05, "loss": 0.4847, "step": 21102 }, { "epoch": 0.4475620877605989, "grad_norm": 0.3831734359264374, "learning_rate": 1.7645800046824337e-05, "loss": 0.5277, "step": 21103 }, { "epoch": 0.44758329621853193, "grad_norm": 0.3484482169151306, "learning_rate": 1.7645585095220457e-05, "loss": 0.5058, "step": 21104 }, { "epoch": 0.44760450467646495, "grad_norm": 0.3933298587799072, "learning_rate": 1.7645370135113227e-05, "loss": 0.5085, "step": 21105 }, { "epoch": 0.447625713134398, "grad_norm": 0.3399420976638794, "learning_rate": 1.7645155166502878e-05, "loss": 0.5054, "step": 21106 }, { "epoch": 0.44764692159233105, "grad_norm": 0.3347548246383667, "learning_rate": 1.7644940189389656e-05, "loss": 0.5519, "step": 21107 }, { "epoch": 0.44766813005026407, "grad_norm": 0.3986131548881531, "learning_rate": 1.7644725203773802e-05, "loss": 0.4593, "step": 21108 }, { "epoch": 0.4476893385081971, "grad_norm": 0.4368581175804138, "learning_rate": 1.7644510209655546e-05, "loss": 0.4803, "step": 21109 }, { "epoch": 0.4477105469661301, "grad_norm": 0.3303614854812622, "learning_rate": 1.7644295207035133e-05, "loss": 0.5252, "step": 21110 }, { "epoch": 0.4477317554240631, "grad_norm": 0.36011895537376404, "learning_rate": 1.76440801959128e-05, "loss": 0.4638, "step": 21111 }, { "epoch": 0.44775296388199615, "grad_norm": 0.33701401948928833, "learning_rate": 1.764386517628879e-05, "loss": 0.5439, "step": 21112 }, { "epoch": 0.44777417233992917, "grad_norm": 0.3307477533817291, "learning_rate": 1.7643650148163337e-05, "loss": 0.5346, "step": 21113 }, { "epoch": 0.4477953807978622, "grad_norm": 0.3306255042552948, "learning_rate": 1.7643435111536685e-05, "loss": 0.478, "step": 21114 }, { "epoch": 0.4478165892557952, "grad_norm": 0.30438798666000366, "learning_rate": 1.764322006640907e-05, "loss": 0.4581, "step": 21115 }, { "epoch": 0.4478377977137282, "grad_norm": 0.33964136242866516, "learning_rate": 1.7643005012780735e-05, "loss": 0.5283, "step": 21116 }, { "epoch": 0.44785900617166124, "grad_norm": 0.36800870299339294, "learning_rate": 1.764278995065191e-05, "loss": 0.533, "step": 21117 }, { "epoch": 0.44788021462959426, "grad_norm": 0.33217641711235046, "learning_rate": 1.7642574880022846e-05, "loss": 0.3945, "step": 21118 }, { "epoch": 0.4479014230875273, "grad_norm": 0.3506220579147339, "learning_rate": 1.7642359800893772e-05, "loss": 0.5126, "step": 21119 }, { "epoch": 0.4479226315454603, "grad_norm": 0.32403379678726196, "learning_rate": 1.7642144713264936e-05, "loss": 0.5124, "step": 21120 }, { "epoch": 0.4479438400033934, "grad_norm": 0.3342459499835968, "learning_rate": 1.764192961713657e-05, "loss": 0.4775, "step": 21121 }, { "epoch": 0.4479650484613264, "grad_norm": 0.3608083426952362, "learning_rate": 1.7641714512508922e-05, "loss": 0.5278, "step": 21122 }, { "epoch": 0.4479862569192594, "grad_norm": 0.40553393959999084, "learning_rate": 1.764149939938222e-05, "loss": 0.5419, "step": 21123 }, { "epoch": 0.44800746537719244, "grad_norm": 0.3414844572544098, "learning_rate": 1.7641284277756706e-05, "loss": 0.5329, "step": 21124 }, { "epoch": 0.44802867383512546, "grad_norm": 0.47468897700309753, "learning_rate": 1.7641069147632626e-05, "loss": 0.5602, "step": 21125 }, { "epoch": 0.4480498822930585, "grad_norm": 0.35989370942115784, "learning_rate": 1.7640854009010215e-05, "loss": 0.4455, "step": 21126 }, { "epoch": 0.4480710907509915, "grad_norm": 7.31642484664917, "learning_rate": 1.7640638861889714e-05, "loss": 0.3912, "step": 21127 }, { "epoch": 0.4480922992089245, "grad_norm": 0.36251384019851685, "learning_rate": 1.764042370627136e-05, "loss": 0.5161, "step": 21128 }, { "epoch": 0.44811350766685754, "grad_norm": 0.3221059739589691, "learning_rate": 1.7640208542155394e-05, "loss": 0.4957, "step": 21129 }, { "epoch": 0.44813471612479056, "grad_norm": 0.37194952368736267, "learning_rate": 1.763999336954205e-05, "loss": 0.5436, "step": 21130 }, { "epoch": 0.4481559245827236, "grad_norm": 0.34868118166923523, "learning_rate": 1.7639778188431576e-05, "loss": 0.4393, "step": 21131 }, { "epoch": 0.4481771330406566, "grad_norm": 0.36676377058029175, "learning_rate": 1.7639562998824208e-05, "loss": 0.5408, "step": 21132 }, { "epoch": 0.4481983414985896, "grad_norm": 0.3421177268028259, "learning_rate": 1.7639347800720182e-05, "loss": 0.5322, "step": 21133 }, { "epoch": 0.44821954995652263, "grad_norm": 0.3193320631980896, "learning_rate": 1.763913259411974e-05, "loss": 0.4845, "step": 21134 }, { "epoch": 0.4482407584144557, "grad_norm": 0.3282242715358734, "learning_rate": 1.763891737902312e-05, "loss": 0.4985, "step": 21135 }, { "epoch": 0.44826196687238873, "grad_norm": 0.3826839029788971, "learning_rate": 1.7638702155430563e-05, "loss": 0.5179, "step": 21136 }, { "epoch": 0.44828317533032175, "grad_norm": 0.32805100083351135, "learning_rate": 1.7638486923342307e-05, "loss": 0.4936, "step": 21137 }, { "epoch": 0.44830438378825477, "grad_norm": 0.47089388966560364, "learning_rate": 1.7638271682758596e-05, "loss": 0.5473, "step": 21138 }, { "epoch": 0.4483255922461878, "grad_norm": 0.395535409450531, "learning_rate": 1.7638056433679662e-05, "loss": 0.5314, "step": 21139 }, { "epoch": 0.4483468007041208, "grad_norm": 0.5527527332305908, "learning_rate": 1.763784117610575e-05, "loss": 0.5297, "step": 21140 }, { "epoch": 0.4483680091620538, "grad_norm": 0.3501075804233551, "learning_rate": 1.7637625910037095e-05, "loss": 0.4662, "step": 21141 }, { "epoch": 0.44838921761998685, "grad_norm": 0.30465540289878845, "learning_rate": 1.763741063547394e-05, "loss": 0.4984, "step": 21142 }, { "epoch": 0.44841042607791987, "grad_norm": 0.3109755516052246, "learning_rate": 1.7637195352416525e-05, "loss": 0.4734, "step": 21143 }, { "epoch": 0.4484316345358529, "grad_norm": 0.38472142815589905, "learning_rate": 1.7636980060865087e-05, "loss": 0.51, "step": 21144 }, { "epoch": 0.4484528429937859, "grad_norm": 0.39425966143608093, "learning_rate": 1.7636764760819862e-05, "loss": 0.504, "step": 21145 }, { "epoch": 0.4484740514517189, "grad_norm": 0.3646872937679291, "learning_rate": 1.76365494522811e-05, "loss": 0.5415, "step": 21146 }, { "epoch": 0.44849525990965194, "grad_norm": 0.3563678562641144, "learning_rate": 1.763633413524903e-05, "loss": 0.4964, "step": 21147 }, { "epoch": 0.448516468367585, "grad_norm": 0.3357732892036438, "learning_rate": 1.7636118809723895e-05, "loss": 0.501, "step": 21148 }, { "epoch": 0.44853767682551804, "grad_norm": 0.30149468779563904, "learning_rate": 1.7635903475705938e-05, "loss": 0.4853, "step": 21149 }, { "epoch": 0.44855888528345106, "grad_norm": 0.39743322134017944, "learning_rate": 1.763568813319539e-05, "loss": 0.5351, "step": 21150 }, { "epoch": 0.4485800937413841, "grad_norm": 0.36054474115371704, "learning_rate": 1.7635472782192505e-05, "loss": 0.5304, "step": 21151 }, { "epoch": 0.4486013021993171, "grad_norm": 0.2971208095550537, "learning_rate": 1.7635257422697507e-05, "loss": 0.4699, "step": 21152 }, { "epoch": 0.4486225106572501, "grad_norm": 0.34146925806999207, "learning_rate": 1.7635042054710642e-05, "loss": 0.4882, "step": 21153 }, { "epoch": 0.44864371911518314, "grad_norm": 0.3889485001564026, "learning_rate": 1.7634826678232153e-05, "loss": 0.5645, "step": 21154 }, { "epoch": 0.44866492757311616, "grad_norm": 0.3277110457420349, "learning_rate": 1.7634611293262273e-05, "loss": 0.4573, "step": 21155 }, { "epoch": 0.4486861360310492, "grad_norm": 0.3128167390823364, "learning_rate": 1.7634395899801245e-05, "loss": 0.4637, "step": 21156 }, { "epoch": 0.4487073444889822, "grad_norm": 0.367008239030838, "learning_rate": 1.763418049784931e-05, "loss": 0.5665, "step": 21157 }, { "epoch": 0.4487285529469152, "grad_norm": 0.3353877365589142, "learning_rate": 1.7633965087406704e-05, "loss": 0.4531, "step": 21158 }, { "epoch": 0.44874976140484824, "grad_norm": 0.3729165196418762, "learning_rate": 1.763374966847367e-05, "loss": 0.4852, "step": 21159 }, { "epoch": 0.44877096986278125, "grad_norm": 0.5588129162788391, "learning_rate": 1.7633534241050445e-05, "loss": 0.4644, "step": 21160 }, { "epoch": 0.4487921783207143, "grad_norm": 0.34860897064208984, "learning_rate": 1.763331880513727e-05, "loss": 0.4538, "step": 21161 }, { "epoch": 0.44881338677864735, "grad_norm": 0.39482200145721436, "learning_rate": 1.7633103360734382e-05, "loss": 0.5534, "step": 21162 }, { "epoch": 0.44883459523658037, "grad_norm": 0.3958604633808136, "learning_rate": 1.7632887907842025e-05, "loss": 0.4739, "step": 21163 }, { "epoch": 0.4488558036945134, "grad_norm": 0.39035922288894653, "learning_rate": 1.7632672446460436e-05, "loss": 0.4337, "step": 21164 }, { "epoch": 0.4488770121524464, "grad_norm": 0.41573500633239746, "learning_rate": 1.7632456976589853e-05, "loss": 0.5363, "step": 21165 }, { "epoch": 0.44889822061037943, "grad_norm": 0.3683266043663025, "learning_rate": 1.763224149823052e-05, "loss": 0.535, "step": 21166 }, { "epoch": 0.44891942906831245, "grad_norm": 0.34218600392341614, "learning_rate": 1.7632026011382674e-05, "loss": 0.4549, "step": 21167 }, { "epoch": 0.44894063752624547, "grad_norm": 0.37926867604255676, "learning_rate": 1.7631810516046556e-05, "loss": 0.5494, "step": 21168 }, { "epoch": 0.4489618459841785, "grad_norm": 0.32926374673843384, "learning_rate": 1.7631595012222403e-05, "loss": 0.4856, "step": 21169 }, { "epoch": 0.4489830544421115, "grad_norm": 0.36187273263931274, "learning_rate": 1.7631379499910458e-05, "loss": 0.5925, "step": 21170 }, { "epoch": 0.4490042629000445, "grad_norm": 0.3215775787830353, "learning_rate": 1.7631163979110958e-05, "loss": 0.4467, "step": 21171 }, { "epoch": 0.44902547135797755, "grad_norm": 0.41907089948654175, "learning_rate": 1.763094844982414e-05, "loss": 0.5512, "step": 21172 }, { "epoch": 0.44904667981591057, "grad_norm": 0.4553751051425934, "learning_rate": 1.7630732912050252e-05, "loss": 0.5455, "step": 21173 }, { "epoch": 0.4490678882738436, "grad_norm": 0.3533591032028198, "learning_rate": 1.763051736578953e-05, "loss": 0.5714, "step": 21174 }, { "epoch": 0.44908909673177666, "grad_norm": 0.3619552254676819, "learning_rate": 1.763030181104221e-05, "loss": 0.4795, "step": 21175 }, { "epoch": 0.4491103051897097, "grad_norm": 0.4043985605239868, "learning_rate": 1.7630086247808536e-05, "loss": 0.5457, "step": 21176 }, { "epoch": 0.4491315136476427, "grad_norm": 0.3496377468109131, "learning_rate": 1.7629870676088746e-05, "loss": 0.5311, "step": 21177 }, { "epoch": 0.4491527221055757, "grad_norm": 0.9227866530418396, "learning_rate": 1.762965509588308e-05, "loss": 0.5128, "step": 21178 }, { "epoch": 0.44917393056350874, "grad_norm": 0.3864167332649231, "learning_rate": 1.7629439507191776e-05, "loss": 0.4252, "step": 21179 }, { "epoch": 0.44919513902144176, "grad_norm": 0.339910089969635, "learning_rate": 1.7629223910015078e-05, "loss": 0.5452, "step": 21180 }, { "epoch": 0.4492163474793748, "grad_norm": 0.3213629722595215, "learning_rate": 1.7629008304353224e-05, "loss": 0.4924, "step": 21181 }, { "epoch": 0.4492375559373078, "grad_norm": 0.33464983105659485, "learning_rate": 1.762879269020645e-05, "loss": 0.4626, "step": 21182 }, { "epoch": 0.4492587643952408, "grad_norm": 0.4031243622303009, "learning_rate": 1.7628577067575e-05, "loss": 0.4876, "step": 21183 }, { "epoch": 0.44927997285317384, "grad_norm": 0.3221115171909332, "learning_rate": 1.7628361436459116e-05, "loss": 0.4462, "step": 21184 }, { "epoch": 0.44930118131110686, "grad_norm": 0.797164797782898, "learning_rate": 1.7628145796859033e-05, "loss": 0.4562, "step": 21185 }, { "epoch": 0.4493223897690399, "grad_norm": 0.5290694832801819, "learning_rate": 1.762793014877499e-05, "loss": 0.5242, "step": 21186 }, { "epoch": 0.4493435982269729, "grad_norm": 0.3896886110305786, "learning_rate": 1.7627714492207233e-05, "loss": 0.4342, "step": 21187 }, { "epoch": 0.4493648066849059, "grad_norm": 0.3535008430480957, "learning_rate": 1.7627498827156e-05, "loss": 0.4596, "step": 21188 }, { "epoch": 0.449386015142839, "grad_norm": 0.3891770541667938, "learning_rate": 1.7627283153621524e-05, "loss": 0.4786, "step": 21189 }, { "epoch": 0.449407223600772, "grad_norm": 0.3949793875217438, "learning_rate": 1.762706747160405e-05, "loss": 0.4604, "step": 21190 }, { "epoch": 0.44942843205870503, "grad_norm": 0.29618388414382935, "learning_rate": 1.762685178110382e-05, "loss": 0.4805, "step": 21191 }, { "epoch": 0.44944964051663805, "grad_norm": 0.3439447283744812, "learning_rate": 1.7626636082121074e-05, "loss": 0.4582, "step": 21192 }, { "epoch": 0.44947084897457107, "grad_norm": 0.3670673668384552, "learning_rate": 1.7626420374656045e-05, "loss": 0.4621, "step": 21193 }, { "epoch": 0.4494920574325041, "grad_norm": 0.3036632537841797, "learning_rate": 1.762620465870898e-05, "loss": 0.4924, "step": 21194 }, { "epoch": 0.4495132658904371, "grad_norm": 0.39906907081604004, "learning_rate": 1.7625988934280117e-05, "loss": 0.4692, "step": 21195 }, { "epoch": 0.4495344743483701, "grad_norm": 0.33195725083351135, "learning_rate": 1.7625773201369692e-05, "loss": 0.4765, "step": 21196 }, { "epoch": 0.44955568280630315, "grad_norm": 0.33339905738830566, "learning_rate": 1.762555745997795e-05, "loss": 0.4774, "step": 21197 }, { "epoch": 0.44957689126423617, "grad_norm": 0.30382290482521057, "learning_rate": 1.762534171010513e-05, "loss": 0.4774, "step": 21198 }, { "epoch": 0.4495980997221692, "grad_norm": 0.3059466779232025, "learning_rate": 1.762512595175147e-05, "loss": 0.4561, "step": 21199 }, { "epoch": 0.4496193081801022, "grad_norm": 0.4320720434188843, "learning_rate": 1.7624910184917215e-05, "loss": 0.482, "step": 21200 }, { "epoch": 0.4496405166380352, "grad_norm": 0.38107964396476746, "learning_rate": 1.76246944096026e-05, "loss": 0.5518, "step": 21201 }, { "epoch": 0.44966172509596825, "grad_norm": 0.38716599345207214, "learning_rate": 1.7624478625807864e-05, "loss": 0.5403, "step": 21202 }, { "epoch": 0.4496829335539013, "grad_norm": 0.31966111063957214, "learning_rate": 1.7624262833533247e-05, "loss": 0.4707, "step": 21203 }, { "epoch": 0.44970414201183434, "grad_norm": 0.3288227319717407, "learning_rate": 1.7624047032778994e-05, "loss": 0.4964, "step": 21204 }, { "epoch": 0.44972535046976736, "grad_norm": 0.340094655752182, "learning_rate": 1.762383122354534e-05, "loss": 0.4461, "step": 21205 }, { "epoch": 0.4497465589277004, "grad_norm": 0.37123075127601624, "learning_rate": 1.7623615405832533e-05, "loss": 0.535, "step": 21206 }, { "epoch": 0.4497677673856334, "grad_norm": 0.3220541775226593, "learning_rate": 1.7623399579640802e-05, "loss": 0.4894, "step": 21207 }, { "epoch": 0.4497889758435664, "grad_norm": 0.35562247037887573, "learning_rate": 1.7623183744970394e-05, "loss": 0.4978, "step": 21208 }, { "epoch": 0.44981018430149944, "grad_norm": 0.4401507079601288, "learning_rate": 1.7622967901821546e-05, "loss": 0.5069, "step": 21209 }, { "epoch": 0.44983139275943246, "grad_norm": 0.38447806239128113, "learning_rate": 1.7622752050194497e-05, "loss": 0.5084, "step": 21210 }, { "epoch": 0.4498526012173655, "grad_norm": 0.3609977662563324, "learning_rate": 1.7622536190089495e-05, "loss": 0.5431, "step": 21211 }, { "epoch": 0.4498738096752985, "grad_norm": 0.36128222942352295, "learning_rate": 1.7622320321506768e-05, "loss": 0.4636, "step": 21212 }, { "epoch": 0.4498950181332315, "grad_norm": 0.3549429178237915, "learning_rate": 1.762210444444657e-05, "loss": 0.533, "step": 21213 }, { "epoch": 0.44991622659116454, "grad_norm": 0.3592491149902344, "learning_rate": 1.762188855890913e-05, "loss": 0.4654, "step": 21214 }, { "epoch": 0.44993743504909756, "grad_norm": 0.3467421531677246, "learning_rate": 1.762167266489469e-05, "loss": 0.4709, "step": 21215 }, { "epoch": 0.44995864350703063, "grad_norm": 0.3354331851005554, "learning_rate": 1.7621456762403494e-05, "loss": 0.5337, "step": 21216 }, { "epoch": 0.44997985196496365, "grad_norm": 0.4048452377319336, "learning_rate": 1.7621240851435776e-05, "loss": 0.5051, "step": 21217 }, { "epoch": 0.45000106042289667, "grad_norm": 0.3568970561027527, "learning_rate": 1.7621024931991787e-05, "loss": 0.5358, "step": 21218 }, { "epoch": 0.4500222688808297, "grad_norm": 0.3281072676181793, "learning_rate": 1.7620809004071756e-05, "loss": 0.4893, "step": 21219 }, { "epoch": 0.4500434773387627, "grad_norm": 0.35078930854797363, "learning_rate": 1.762059306767593e-05, "loss": 0.4352, "step": 21220 }, { "epoch": 0.45006468579669573, "grad_norm": 0.335761696100235, "learning_rate": 1.7620377122804543e-05, "loss": 0.5153, "step": 21221 }, { "epoch": 0.45008589425462875, "grad_norm": 0.32848963141441345, "learning_rate": 1.7620161169457843e-05, "loss": 0.5564, "step": 21222 }, { "epoch": 0.45010710271256177, "grad_norm": 0.35101595520973206, "learning_rate": 1.7619945207636063e-05, "loss": 0.5054, "step": 21223 }, { "epoch": 0.4501283111704948, "grad_norm": 0.3568372428417206, "learning_rate": 1.7619729237339448e-05, "loss": 0.4232, "step": 21224 }, { "epoch": 0.4501495196284278, "grad_norm": 0.34331604838371277, "learning_rate": 1.7619513258568234e-05, "loss": 0.5222, "step": 21225 }, { "epoch": 0.4501707280863608, "grad_norm": 0.4218757152557373, "learning_rate": 1.7619297271322665e-05, "loss": 0.5041, "step": 21226 }, { "epoch": 0.45019193654429385, "grad_norm": 0.560343861579895, "learning_rate": 1.7619081275602976e-05, "loss": 0.534, "step": 21227 }, { "epoch": 0.45021314500222687, "grad_norm": 0.3558214008808136, "learning_rate": 1.7618865271409417e-05, "loss": 0.4508, "step": 21228 }, { "epoch": 0.4502343534601599, "grad_norm": 0.3370365500450134, "learning_rate": 1.761864925874222e-05, "loss": 0.4896, "step": 21229 }, { "epoch": 0.45025556191809296, "grad_norm": 0.35750406980514526, "learning_rate": 1.7618433237601626e-05, "loss": 0.4872, "step": 21230 }, { "epoch": 0.450276770376026, "grad_norm": 0.3522767722606659, "learning_rate": 1.7618217207987876e-05, "loss": 0.4623, "step": 21231 }, { "epoch": 0.450297978833959, "grad_norm": 0.419989675283432, "learning_rate": 1.7618001169901212e-05, "loss": 0.4952, "step": 21232 }, { "epoch": 0.450319187291892, "grad_norm": 0.3379775583744049, "learning_rate": 1.7617785123341877e-05, "loss": 0.4486, "step": 21233 }, { "epoch": 0.45034039574982504, "grad_norm": 0.3632778227329254, "learning_rate": 1.7617569068310104e-05, "loss": 0.5997, "step": 21234 }, { "epoch": 0.45036160420775806, "grad_norm": 0.38816139101982117, "learning_rate": 1.7617353004806136e-05, "loss": 0.562, "step": 21235 }, { "epoch": 0.4503828126656911, "grad_norm": 0.34223467111587524, "learning_rate": 1.7617136932830215e-05, "loss": 0.4428, "step": 21236 }, { "epoch": 0.4504040211236241, "grad_norm": 0.4021533131599426, "learning_rate": 1.7616920852382582e-05, "loss": 0.4373, "step": 21237 }, { "epoch": 0.4504252295815571, "grad_norm": 0.3342341482639313, "learning_rate": 1.761670476346348e-05, "loss": 0.491, "step": 21238 }, { "epoch": 0.45044643803949014, "grad_norm": 0.36560022830963135, "learning_rate": 1.7616488666073137e-05, "loss": 0.5032, "step": 21239 }, { "epoch": 0.45046764649742316, "grad_norm": 0.3952556550502777, "learning_rate": 1.7616272560211805e-05, "loss": 0.4338, "step": 21240 }, { "epoch": 0.4504888549553562, "grad_norm": 0.495537668466568, "learning_rate": 1.7616056445879717e-05, "loss": 0.4842, "step": 21241 }, { "epoch": 0.4505100634132892, "grad_norm": 0.34877607226371765, "learning_rate": 1.7615840323077124e-05, "loss": 0.4817, "step": 21242 }, { "epoch": 0.4505312718712222, "grad_norm": 0.34735020995140076, "learning_rate": 1.761562419180426e-05, "loss": 0.407, "step": 21243 }, { "epoch": 0.4505524803291553, "grad_norm": 0.3888096213340759, "learning_rate": 1.7615408052061358e-05, "loss": 0.4996, "step": 21244 }, { "epoch": 0.4505736887870883, "grad_norm": 0.3655581474304199, "learning_rate": 1.7615191903848667e-05, "loss": 0.593, "step": 21245 }, { "epoch": 0.45059489724502133, "grad_norm": 0.3272882103919983, "learning_rate": 1.7614975747166426e-05, "loss": 0.547, "step": 21246 }, { "epoch": 0.45061610570295435, "grad_norm": 0.36560970544815063, "learning_rate": 1.761475958201488e-05, "loss": 0.4874, "step": 21247 }, { "epoch": 0.45063731416088737, "grad_norm": 0.4307929575443268, "learning_rate": 1.761454340839426e-05, "loss": 0.4802, "step": 21248 }, { "epoch": 0.4506585226188204, "grad_norm": 0.3156084716320038, "learning_rate": 1.761432722630481e-05, "loss": 0.4105, "step": 21249 }, { "epoch": 0.4506797310767534, "grad_norm": 0.3572102189064026, "learning_rate": 1.7614111035746774e-05, "loss": 0.6234, "step": 21250 }, { "epoch": 0.45070093953468643, "grad_norm": 0.3783583343029022, "learning_rate": 1.761389483672039e-05, "loss": 0.574, "step": 21251 }, { "epoch": 0.45072214799261945, "grad_norm": 0.3377716541290283, "learning_rate": 1.7613678629225896e-05, "loss": 0.5326, "step": 21252 }, { "epoch": 0.45074335645055247, "grad_norm": 0.3663095533847809, "learning_rate": 1.7613462413263536e-05, "loss": 0.5547, "step": 21253 }, { "epoch": 0.4507645649084855, "grad_norm": 0.4116544723510742, "learning_rate": 1.761324618883355e-05, "loss": 0.4573, "step": 21254 }, { "epoch": 0.4507857733664185, "grad_norm": 0.36551910638809204, "learning_rate": 1.7613029955936175e-05, "loss": 0.533, "step": 21255 }, { "epoch": 0.4508069818243515, "grad_norm": 0.34955134987831116, "learning_rate": 1.7612813714571655e-05, "loss": 0.5176, "step": 21256 }, { "epoch": 0.4508281902822846, "grad_norm": 0.33772870898246765, "learning_rate": 1.7612597464740232e-05, "loss": 0.594, "step": 21257 }, { "epoch": 0.4508493987402176, "grad_norm": 0.37510108947753906, "learning_rate": 1.7612381206442145e-05, "loss": 0.4934, "step": 21258 }, { "epoch": 0.45087060719815064, "grad_norm": 0.42163828015327454, "learning_rate": 1.761216493967763e-05, "loss": 0.5178, "step": 21259 }, { "epoch": 0.45089181565608366, "grad_norm": 0.360504686832428, "learning_rate": 1.7611948664446933e-05, "loss": 0.4773, "step": 21260 }, { "epoch": 0.4509130241140167, "grad_norm": 0.4115707278251648, "learning_rate": 1.7611732380750295e-05, "loss": 0.4429, "step": 21261 }, { "epoch": 0.4509342325719497, "grad_norm": 0.3114506006240845, "learning_rate": 1.761151608858795e-05, "loss": 0.4829, "step": 21262 }, { "epoch": 0.4509554410298827, "grad_norm": 0.4166708290576935, "learning_rate": 1.7611299787960147e-05, "loss": 0.5015, "step": 21263 }, { "epoch": 0.45097664948781574, "grad_norm": 0.3559611439704895, "learning_rate": 1.761108347886712e-05, "loss": 0.4948, "step": 21264 }, { "epoch": 0.45099785794574876, "grad_norm": 0.4343041181564331, "learning_rate": 1.7610867161309115e-05, "loss": 0.6023, "step": 21265 }, { "epoch": 0.4510190664036818, "grad_norm": 0.3930997848510742, "learning_rate": 1.7610650835286366e-05, "loss": 0.5334, "step": 21266 }, { "epoch": 0.4510402748616148, "grad_norm": 0.32335832715034485, "learning_rate": 1.761043450079912e-05, "loss": 0.4666, "step": 21267 }, { "epoch": 0.4510614833195478, "grad_norm": 0.3389738202095032, "learning_rate": 1.761021815784761e-05, "loss": 0.5197, "step": 21268 }, { "epoch": 0.45108269177748084, "grad_norm": 0.4060378670692444, "learning_rate": 1.7610001806432085e-05, "loss": 0.5477, "step": 21269 }, { "epoch": 0.45110390023541386, "grad_norm": 0.33948299288749695, "learning_rate": 1.7609785446552786e-05, "loss": 0.5215, "step": 21270 }, { "epoch": 0.45112510869334693, "grad_norm": 0.4262467324733734, "learning_rate": 1.7609569078209944e-05, "loss": 0.512, "step": 21271 }, { "epoch": 0.45114631715127995, "grad_norm": 0.363399475812912, "learning_rate": 1.760935270140381e-05, "loss": 0.5264, "step": 21272 }, { "epoch": 0.45116752560921297, "grad_norm": 0.34079989790916443, "learning_rate": 1.7609136316134616e-05, "loss": 0.4793, "step": 21273 }, { "epoch": 0.451188734067146, "grad_norm": 0.36421605944633484, "learning_rate": 1.760891992240261e-05, "loss": 0.4676, "step": 21274 }, { "epoch": 0.451209942525079, "grad_norm": 0.3448231518268585, "learning_rate": 1.7608703520208027e-05, "loss": 0.4872, "step": 21275 }, { "epoch": 0.45123115098301203, "grad_norm": 0.3561290204524994, "learning_rate": 1.7608487109551108e-05, "loss": 0.498, "step": 21276 }, { "epoch": 0.45125235944094505, "grad_norm": 0.3504598140716553, "learning_rate": 1.76082706904321e-05, "loss": 0.5594, "step": 21277 }, { "epoch": 0.45127356789887807, "grad_norm": 0.29176628589630127, "learning_rate": 1.7608054262851236e-05, "loss": 0.4269, "step": 21278 }, { "epoch": 0.4512947763568111, "grad_norm": 0.40696415305137634, "learning_rate": 1.7607837826808764e-05, "loss": 0.6136, "step": 21279 }, { "epoch": 0.4513159848147441, "grad_norm": 0.3693158030509949, "learning_rate": 1.7607621382304917e-05, "loss": 0.4995, "step": 21280 }, { "epoch": 0.45133719327267713, "grad_norm": 0.33480459451675415, "learning_rate": 1.7607404929339943e-05, "loss": 0.5019, "step": 21281 }, { "epoch": 0.45135840173061015, "grad_norm": 0.4315546154975891, "learning_rate": 1.7607188467914075e-05, "loss": 0.5441, "step": 21282 }, { "epoch": 0.45137961018854317, "grad_norm": 0.3514394760131836, "learning_rate": 1.760697199802756e-05, "loss": 0.5079, "step": 21283 }, { "epoch": 0.4514008186464762, "grad_norm": 0.3837520480155945, "learning_rate": 1.7606755519680638e-05, "loss": 0.4891, "step": 21284 }, { "epoch": 0.45142202710440926, "grad_norm": 0.4207635521888733, "learning_rate": 1.760653903287355e-05, "loss": 0.4298, "step": 21285 }, { "epoch": 0.4514432355623423, "grad_norm": 0.33507388830184937, "learning_rate": 1.7606322537606534e-05, "loss": 0.5314, "step": 21286 }, { "epoch": 0.4514644440202753, "grad_norm": 0.3612222969532013, "learning_rate": 1.760610603387983e-05, "loss": 0.4546, "step": 21287 }, { "epoch": 0.4514856524782083, "grad_norm": 0.3708747327327728, "learning_rate": 1.760588952169368e-05, "loss": 0.5031, "step": 21288 }, { "epoch": 0.45150686093614134, "grad_norm": 0.39483457803726196, "learning_rate": 1.760567300104833e-05, "loss": 0.5188, "step": 21289 }, { "epoch": 0.45152806939407436, "grad_norm": 0.4051976501941681, "learning_rate": 1.7605456471944016e-05, "loss": 0.5067, "step": 21290 }, { "epoch": 0.4515492778520074, "grad_norm": 0.5667503476142883, "learning_rate": 1.760523993438098e-05, "loss": 0.4829, "step": 21291 }, { "epoch": 0.4515704863099404, "grad_norm": 0.3474752902984619, "learning_rate": 1.7605023388359458e-05, "loss": 0.5405, "step": 21292 }, { "epoch": 0.4515916947678734, "grad_norm": 0.3237910866737366, "learning_rate": 1.76048068338797e-05, "loss": 0.4928, "step": 21293 }, { "epoch": 0.45161290322580644, "grad_norm": 0.36432409286499023, "learning_rate": 1.7604590270941937e-05, "loss": 0.5286, "step": 21294 }, { "epoch": 0.45163411168373946, "grad_norm": 0.35726404190063477, "learning_rate": 1.760437369954642e-05, "loss": 0.5572, "step": 21295 }, { "epoch": 0.4516553201416725, "grad_norm": 0.31032711267471313, "learning_rate": 1.7604157119693378e-05, "loss": 0.4808, "step": 21296 }, { "epoch": 0.4516765285996055, "grad_norm": 0.37682682275772095, "learning_rate": 1.7603940531383062e-05, "loss": 0.5249, "step": 21297 }, { "epoch": 0.4516977370575386, "grad_norm": 0.34597641229629517, "learning_rate": 1.7603723934615712e-05, "loss": 0.5203, "step": 21298 }, { "epoch": 0.4517189455154716, "grad_norm": 0.3627006709575653, "learning_rate": 1.7603507329391565e-05, "loss": 0.549, "step": 21299 }, { "epoch": 0.4517401539734046, "grad_norm": 0.3673348128795624, "learning_rate": 1.760329071571086e-05, "loss": 0.5225, "step": 21300 }, { "epoch": 0.45176136243133763, "grad_norm": 0.39701172709465027, "learning_rate": 1.7603074093573844e-05, "loss": 0.5702, "step": 21301 }, { "epoch": 0.45178257088927065, "grad_norm": 0.3679778277873993, "learning_rate": 1.7602857462980755e-05, "loss": 0.5164, "step": 21302 }, { "epoch": 0.45180377934720367, "grad_norm": 0.37825849652290344, "learning_rate": 1.760264082393183e-05, "loss": 0.4417, "step": 21303 }, { "epoch": 0.4518249878051367, "grad_norm": 0.4673702120780945, "learning_rate": 1.760242417642732e-05, "loss": 0.5782, "step": 21304 }, { "epoch": 0.4518461962630697, "grad_norm": 0.3842529356479645, "learning_rate": 1.760220752046746e-05, "loss": 0.5387, "step": 21305 }, { "epoch": 0.45186740472100273, "grad_norm": 0.37209537625312805, "learning_rate": 1.7601990856052484e-05, "loss": 0.5111, "step": 21306 }, { "epoch": 0.45188861317893575, "grad_norm": 0.333196759223938, "learning_rate": 1.7601774183182646e-05, "loss": 0.4972, "step": 21307 }, { "epoch": 0.45190982163686877, "grad_norm": 0.32981517910957336, "learning_rate": 1.7601557501858178e-05, "loss": 0.5382, "step": 21308 }, { "epoch": 0.4519310300948018, "grad_norm": 0.3576522171497345, "learning_rate": 1.7601340812079323e-05, "loss": 0.4746, "step": 21309 }, { "epoch": 0.4519522385527348, "grad_norm": 0.38420626521110535, "learning_rate": 1.7601124113846323e-05, "loss": 0.462, "step": 21310 }, { "epoch": 0.45197344701066783, "grad_norm": 0.3558740019798279, "learning_rate": 1.760090740715942e-05, "loss": 0.5557, "step": 21311 }, { "epoch": 0.4519946554686009, "grad_norm": 0.34802675247192383, "learning_rate": 1.7600690692018855e-05, "loss": 0.4709, "step": 21312 }, { "epoch": 0.4520158639265339, "grad_norm": 0.373626172542572, "learning_rate": 1.7600473968424866e-05, "loss": 0.4297, "step": 21313 }, { "epoch": 0.45203707238446694, "grad_norm": 0.30855897068977356, "learning_rate": 1.7600257236377697e-05, "loss": 0.4601, "step": 21314 }, { "epoch": 0.45205828084239996, "grad_norm": 0.38956910371780396, "learning_rate": 1.7600040495877587e-05, "loss": 0.4623, "step": 21315 }, { "epoch": 0.452079489300333, "grad_norm": 0.3431234359741211, "learning_rate": 1.759982374692478e-05, "loss": 0.5014, "step": 21316 }, { "epoch": 0.452100697758266, "grad_norm": 0.3530883491039276, "learning_rate": 1.759960698951951e-05, "loss": 0.4877, "step": 21317 }, { "epoch": 0.452121906216199, "grad_norm": 0.3874863088130951, "learning_rate": 1.759939022366203e-05, "loss": 0.503, "step": 21318 }, { "epoch": 0.45214311467413204, "grad_norm": 0.3575597107410431, "learning_rate": 1.759917344935257e-05, "loss": 0.4111, "step": 21319 }, { "epoch": 0.45216432313206506, "grad_norm": 0.3351476490497589, "learning_rate": 1.7598956666591377e-05, "loss": 0.525, "step": 21320 }, { "epoch": 0.4521855315899981, "grad_norm": 0.39269062876701355, "learning_rate": 1.7598739875378686e-05, "loss": 0.5045, "step": 21321 }, { "epoch": 0.4522067400479311, "grad_norm": 0.33994174003601074, "learning_rate": 1.7598523075714748e-05, "loss": 0.4589, "step": 21322 }, { "epoch": 0.4522279485058641, "grad_norm": 0.38931554555892944, "learning_rate": 1.7598306267599796e-05, "loss": 0.4405, "step": 21323 }, { "epoch": 0.45224915696379714, "grad_norm": 0.3472006916999817, "learning_rate": 1.7598089451034074e-05, "loss": 0.4848, "step": 21324 }, { "epoch": 0.45227036542173016, "grad_norm": 0.3590545952320099, "learning_rate": 1.7597872626017826e-05, "loss": 0.5282, "step": 21325 }, { "epoch": 0.45229157387966323, "grad_norm": 0.3687181770801544, "learning_rate": 1.7597655792551286e-05, "loss": 0.4753, "step": 21326 }, { "epoch": 0.45231278233759625, "grad_norm": 0.3185165524482727, "learning_rate": 1.7597438950634698e-05, "loss": 0.5298, "step": 21327 }, { "epoch": 0.4523339907955293, "grad_norm": 0.3712272346019745, "learning_rate": 1.759722210026831e-05, "loss": 0.4397, "step": 21328 }, { "epoch": 0.4523551992534623, "grad_norm": 0.33983609080314636, "learning_rate": 1.7597005241452354e-05, "loss": 0.4779, "step": 21329 }, { "epoch": 0.4523764077113953, "grad_norm": 0.3348609507083893, "learning_rate": 1.7596788374187075e-05, "loss": 0.5164, "step": 21330 }, { "epoch": 0.45239761616932833, "grad_norm": 0.3206324875354767, "learning_rate": 1.7596571498472714e-05, "loss": 0.4767, "step": 21331 }, { "epoch": 0.45241882462726135, "grad_norm": 0.31609269976615906, "learning_rate": 1.759635461430951e-05, "loss": 0.4268, "step": 21332 }, { "epoch": 0.45244003308519437, "grad_norm": 0.32835912704467773, "learning_rate": 1.7596137721697713e-05, "loss": 0.4735, "step": 21333 }, { "epoch": 0.4524612415431274, "grad_norm": 0.38796573877334595, "learning_rate": 1.759592082063755e-05, "loss": 0.5934, "step": 21334 }, { "epoch": 0.4524824500010604, "grad_norm": 0.3931715786457062, "learning_rate": 1.7595703911129275e-05, "loss": 0.4448, "step": 21335 }, { "epoch": 0.45250365845899343, "grad_norm": 0.3627331852912903, "learning_rate": 1.7595486993173123e-05, "loss": 0.5198, "step": 21336 }, { "epoch": 0.45252486691692645, "grad_norm": 0.38992971181869507, "learning_rate": 1.7595270066769336e-05, "loss": 0.4184, "step": 21337 }, { "epoch": 0.45254607537485947, "grad_norm": 0.3496688902378082, "learning_rate": 1.7595053131918156e-05, "loss": 0.5174, "step": 21338 }, { "epoch": 0.45256728383279254, "grad_norm": 0.31902509927749634, "learning_rate": 1.7594836188619822e-05, "loss": 0.4523, "step": 21339 }, { "epoch": 0.45258849229072556, "grad_norm": 0.34089547395706177, "learning_rate": 1.7594619236874578e-05, "loss": 0.5737, "step": 21340 }, { "epoch": 0.4526097007486586, "grad_norm": 0.37484806776046753, "learning_rate": 1.7594402276682666e-05, "loss": 0.5366, "step": 21341 }, { "epoch": 0.4526309092065916, "grad_norm": 0.3527250289916992, "learning_rate": 1.759418530804432e-05, "loss": 0.5252, "step": 21342 }, { "epoch": 0.4526521176645246, "grad_norm": 0.34318631887435913, "learning_rate": 1.7593968330959795e-05, "loss": 0.5037, "step": 21343 }, { "epoch": 0.45267332612245764, "grad_norm": 0.34355998039245605, "learning_rate": 1.7593751345429323e-05, "loss": 0.4579, "step": 21344 }, { "epoch": 0.45269453458039066, "grad_norm": 0.4165627658367157, "learning_rate": 1.7593534351453143e-05, "loss": 0.4693, "step": 21345 }, { "epoch": 0.4527157430383237, "grad_norm": 0.3712227940559387, "learning_rate": 1.7593317349031503e-05, "loss": 0.4288, "step": 21346 }, { "epoch": 0.4527369514962567, "grad_norm": 0.3234047293663025, "learning_rate": 1.759310033816464e-05, "loss": 0.4507, "step": 21347 }, { "epoch": 0.4527581599541897, "grad_norm": 0.35479897260665894, "learning_rate": 1.7592883318852796e-05, "loss": 0.4509, "step": 21348 }, { "epoch": 0.45277936841212274, "grad_norm": 0.3783022463321686, "learning_rate": 1.7592666291096217e-05, "loss": 0.6363, "step": 21349 }, { "epoch": 0.45280057687005576, "grad_norm": 0.37812331318855286, "learning_rate": 1.7592449254895133e-05, "loss": 0.5351, "step": 21350 }, { "epoch": 0.4528217853279888, "grad_norm": 2.313955307006836, "learning_rate": 1.75922322102498e-05, "loss": 0.5085, "step": 21351 }, { "epoch": 0.4528429937859218, "grad_norm": 0.3632432520389557, "learning_rate": 1.7592015157160453e-05, "loss": 0.5241, "step": 21352 }, { "epoch": 0.4528642022438549, "grad_norm": 0.38057368993759155, "learning_rate": 1.7591798095627327e-05, "loss": 0.5019, "step": 21353 }, { "epoch": 0.4528854107017879, "grad_norm": 0.34144285321235657, "learning_rate": 1.759158102565067e-05, "loss": 0.4614, "step": 21354 }, { "epoch": 0.4529066191597209, "grad_norm": 0.33679279685020447, "learning_rate": 1.7591363947230725e-05, "loss": 0.4856, "step": 21355 }, { "epoch": 0.45292782761765393, "grad_norm": 0.30149999260902405, "learning_rate": 1.759114686036773e-05, "loss": 0.4074, "step": 21356 }, { "epoch": 0.45294903607558695, "grad_norm": 0.34624335169792175, "learning_rate": 1.7590929765061927e-05, "loss": 0.5709, "step": 21357 }, { "epoch": 0.45297024453352, "grad_norm": 0.391497403383255, "learning_rate": 1.7590712661313557e-05, "loss": 0.5202, "step": 21358 }, { "epoch": 0.452991452991453, "grad_norm": 0.35299381613731384, "learning_rate": 1.7590495549122864e-05, "loss": 0.5474, "step": 21359 }, { "epoch": 0.453012661449386, "grad_norm": 0.33111706376075745, "learning_rate": 1.759027842849009e-05, "loss": 0.511, "step": 21360 }, { "epoch": 0.45303386990731903, "grad_norm": 0.3609001338481903, "learning_rate": 1.7590061299415474e-05, "loss": 0.4481, "step": 21361 }, { "epoch": 0.45305507836525205, "grad_norm": 0.3080957233905792, "learning_rate": 1.758984416189925e-05, "loss": 0.4783, "step": 21362 }, { "epoch": 0.45307628682318507, "grad_norm": 0.4047238528728485, "learning_rate": 1.7589627015941677e-05, "loss": 0.5572, "step": 21363 }, { "epoch": 0.4530974952811181, "grad_norm": 0.315548837184906, "learning_rate": 1.7589409861542983e-05, "loss": 0.4508, "step": 21364 }, { "epoch": 0.4531187037390511, "grad_norm": 0.4085047245025635, "learning_rate": 1.758919269870341e-05, "loss": 0.5504, "step": 21365 }, { "epoch": 0.4531399121969842, "grad_norm": 0.35142526030540466, "learning_rate": 1.7588975527423207e-05, "loss": 0.5621, "step": 21366 }, { "epoch": 0.4531611206549172, "grad_norm": 0.3814346194267273, "learning_rate": 1.758875834770261e-05, "loss": 0.393, "step": 21367 }, { "epoch": 0.4531823291128502, "grad_norm": 0.34257274866104126, "learning_rate": 1.7588541159541862e-05, "loss": 0.5082, "step": 21368 }, { "epoch": 0.45320353757078324, "grad_norm": 0.3869009017944336, "learning_rate": 1.7588323962941205e-05, "loss": 0.5559, "step": 21369 }, { "epoch": 0.45322474602871626, "grad_norm": 0.34332597255706787, "learning_rate": 1.758810675790088e-05, "loss": 0.4902, "step": 21370 }, { "epoch": 0.4532459544866493, "grad_norm": 0.3625836670398712, "learning_rate": 1.7587889544421128e-05, "loss": 0.4644, "step": 21371 }, { "epoch": 0.4532671629445823, "grad_norm": 0.4107531011104584, "learning_rate": 1.758767232250219e-05, "loss": 0.4992, "step": 21372 }, { "epoch": 0.4532883714025153, "grad_norm": 0.34681326150894165, "learning_rate": 1.758745509214431e-05, "loss": 0.4786, "step": 21373 }, { "epoch": 0.45330957986044834, "grad_norm": 0.3629147708415985, "learning_rate": 1.7587237853347728e-05, "loss": 0.5039, "step": 21374 }, { "epoch": 0.45333078831838136, "grad_norm": 0.3651472330093384, "learning_rate": 1.7587020606112685e-05, "loss": 0.4809, "step": 21375 }, { "epoch": 0.4533519967763144, "grad_norm": 0.4133888781070709, "learning_rate": 1.7586803350439425e-05, "loss": 0.5458, "step": 21376 }, { "epoch": 0.4533732052342474, "grad_norm": 0.3430521786212921, "learning_rate": 1.758658608632819e-05, "loss": 0.4882, "step": 21377 }, { "epoch": 0.4533944136921804, "grad_norm": 0.3317886292934418, "learning_rate": 1.7586368813779217e-05, "loss": 0.5005, "step": 21378 }, { "epoch": 0.45341562215011344, "grad_norm": 0.381474107503891, "learning_rate": 1.758615153279275e-05, "loss": 0.5455, "step": 21379 }, { "epoch": 0.4534368306080465, "grad_norm": 0.3320927321910858, "learning_rate": 1.7585934243369032e-05, "loss": 0.5804, "step": 21380 }, { "epoch": 0.45345803906597953, "grad_norm": 0.34287190437316895, "learning_rate": 1.7585716945508307e-05, "loss": 0.4115, "step": 21381 }, { "epoch": 0.45347924752391255, "grad_norm": 0.3497896194458008, "learning_rate": 1.7585499639210812e-05, "loss": 0.5306, "step": 21382 }, { "epoch": 0.4535004559818456, "grad_norm": 0.32280683517456055, "learning_rate": 1.7585282324476787e-05, "loss": 0.4711, "step": 21383 }, { "epoch": 0.4535216644397786, "grad_norm": 0.3435315787792206, "learning_rate": 1.7585065001306477e-05, "loss": 0.5242, "step": 21384 }, { "epoch": 0.4535428728977116, "grad_norm": 0.3522096872329712, "learning_rate": 1.758484766970013e-05, "loss": 0.4453, "step": 21385 }, { "epoch": 0.45356408135564463, "grad_norm": 0.3697890043258667, "learning_rate": 1.7584630329657976e-05, "loss": 0.6004, "step": 21386 }, { "epoch": 0.45358528981357765, "grad_norm": 0.32785356044769287, "learning_rate": 1.7584412981180262e-05, "loss": 0.5056, "step": 21387 }, { "epoch": 0.45360649827151067, "grad_norm": 0.35993456840515137, "learning_rate": 1.758419562426723e-05, "loss": 0.5158, "step": 21388 }, { "epoch": 0.4536277067294437, "grad_norm": 0.368442565202713, "learning_rate": 1.7583978258919123e-05, "loss": 0.5548, "step": 21389 }, { "epoch": 0.4536489151873767, "grad_norm": 0.33085307478904724, "learning_rate": 1.7583760885136182e-05, "loss": 0.4562, "step": 21390 }, { "epoch": 0.45367012364530973, "grad_norm": 0.36455675959587097, "learning_rate": 1.7583543502918646e-05, "loss": 0.4562, "step": 21391 }, { "epoch": 0.45369133210324275, "grad_norm": 0.35761794447898865, "learning_rate": 1.758332611226676e-05, "loss": 0.5402, "step": 21392 }, { "epoch": 0.45371254056117577, "grad_norm": 0.3290669023990631, "learning_rate": 1.7583108713180762e-05, "loss": 0.5688, "step": 21393 }, { "epoch": 0.45373374901910885, "grad_norm": 0.3600757122039795, "learning_rate": 1.7582891305660898e-05, "loss": 0.4803, "step": 21394 }, { "epoch": 0.45375495747704186, "grad_norm": 0.4393261969089508, "learning_rate": 1.758267388970741e-05, "loss": 0.6321, "step": 21395 }, { "epoch": 0.4537761659349749, "grad_norm": 0.33442315459251404, "learning_rate": 1.7582456465320534e-05, "loss": 0.4634, "step": 21396 }, { "epoch": 0.4537973743929079, "grad_norm": 0.3670841157436371, "learning_rate": 1.7582239032500518e-05, "loss": 0.5362, "step": 21397 }, { "epoch": 0.4538185828508409, "grad_norm": 0.38282206654548645, "learning_rate": 1.7582021591247604e-05, "loss": 0.4214, "step": 21398 }, { "epoch": 0.45383979130877394, "grad_norm": 0.3727240264415741, "learning_rate": 1.7581804141562027e-05, "loss": 0.5883, "step": 21399 }, { "epoch": 0.45386099976670696, "grad_norm": 0.35394972562789917, "learning_rate": 1.7581586683444037e-05, "loss": 0.5493, "step": 21400 }, { "epoch": 0.45388220822464, "grad_norm": 0.36036229133605957, "learning_rate": 1.758136921689387e-05, "loss": 0.4787, "step": 21401 }, { "epoch": 0.453903416682573, "grad_norm": 0.3661237359046936, "learning_rate": 1.758115174191177e-05, "loss": 0.4449, "step": 21402 }, { "epoch": 0.453924625140506, "grad_norm": 0.3462478518486023, "learning_rate": 1.7580934258497982e-05, "loss": 0.5173, "step": 21403 }, { "epoch": 0.45394583359843904, "grad_norm": 0.35340332984924316, "learning_rate": 1.7580716766652742e-05, "loss": 0.4956, "step": 21404 }, { "epoch": 0.45396704205637206, "grad_norm": 0.36153528094291687, "learning_rate": 1.7580499266376296e-05, "loss": 0.5202, "step": 21405 }, { "epoch": 0.4539882505143051, "grad_norm": 0.3470573127269745, "learning_rate": 1.758028175766888e-05, "loss": 0.5042, "step": 21406 }, { "epoch": 0.45400945897223816, "grad_norm": 0.3508372902870178, "learning_rate": 1.7580064240530746e-05, "loss": 0.5334, "step": 21407 }, { "epoch": 0.4540306674301712, "grad_norm": 0.3499193489551544, "learning_rate": 1.757984671496213e-05, "loss": 0.491, "step": 21408 }, { "epoch": 0.4540518758881042, "grad_norm": 0.31931072473526, "learning_rate": 1.7579629180963273e-05, "loss": 0.5606, "step": 21409 }, { "epoch": 0.4540730843460372, "grad_norm": 0.31064602732658386, "learning_rate": 1.757941163853442e-05, "loss": 0.4271, "step": 21410 }, { "epoch": 0.45409429280397023, "grad_norm": 0.45950034260749817, "learning_rate": 1.7579194087675807e-05, "loss": 0.6026, "step": 21411 }, { "epoch": 0.45411550126190325, "grad_norm": 0.36130350828170776, "learning_rate": 1.757897652838768e-05, "loss": 0.4787, "step": 21412 }, { "epoch": 0.4541367097198363, "grad_norm": 0.3332444727420807, "learning_rate": 1.7578758960670285e-05, "loss": 0.5174, "step": 21413 }, { "epoch": 0.4541579181777693, "grad_norm": 0.3861418068408966, "learning_rate": 1.757854138452386e-05, "loss": 0.5165, "step": 21414 }, { "epoch": 0.4541791266357023, "grad_norm": 0.35898467898368835, "learning_rate": 1.7578323799948646e-05, "loss": 0.5371, "step": 21415 }, { "epoch": 0.45420033509363533, "grad_norm": 0.38071104884147644, "learning_rate": 1.7578106206944884e-05, "loss": 0.5964, "step": 21416 }, { "epoch": 0.45422154355156835, "grad_norm": 0.3236227035522461, "learning_rate": 1.757788860551282e-05, "loss": 0.3974, "step": 21417 }, { "epoch": 0.45424275200950137, "grad_norm": 0.347888708114624, "learning_rate": 1.7577670995652695e-05, "loss": 0.4838, "step": 21418 }, { "epoch": 0.4542639604674344, "grad_norm": 0.36504414677619934, "learning_rate": 1.757745337736475e-05, "loss": 0.5505, "step": 21419 }, { "epoch": 0.4542851689253674, "grad_norm": 0.3376801311969757, "learning_rate": 1.7577235750649225e-05, "loss": 0.4436, "step": 21420 }, { "epoch": 0.4543063773833005, "grad_norm": 0.3624158501625061, "learning_rate": 1.7577018115506367e-05, "loss": 0.5253, "step": 21421 }, { "epoch": 0.4543275858412335, "grad_norm": 0.4076946973800659, "learning_rate": 1.757680047193641e-05, "loss": 0.4835, "step": 21422 }, { "epoch": 0.4543487942991665, "grad_norm": 0.3545236587524414, "learning_rate": 1.757658281993961e-05, "loss": 0.5049, "step": 21423 }, { "epoch": 0.45437000275709954, "grad_norm": 0.34857097268104553, "learning_rate": 1.7576365159516197e-05, "loss": 0.4952, "step": 21424 }, { "epoch": 0.45439121121503256, "grad_norm": 0.3611656725406647, "learning_rate": 1.757614749066641e-05, "loss": 0.5157, "step": 21425 }, { "epoch": 0.4544124196729656, "grad_norm": 0.3253553807735443, "learning_rate": 1.7575929813390504e-05, "loss": 0.4772, "step": 21426 }, { "epoch": 0.4544336281308986, "grad_norm": 0.3450571596622467, "learning_rate": 1.7575712127688717e-05, "loss": 0.5394, "step": 21427 }, { "epoch": 0.4544548365888316, "grad_norm": 0.34691622853279114, "learning_rate": 1.7575494433561283e-05, "loss": 0.5403, "step": 21428 }, { "epoch": 0.45447604504676464, "grad_norm": 0.3239245116710663, "learning_rate": 1.7575276731008453e-05, "loss": 0.4943, "step": 21429 }, { "epoch": 0.45449725350469766, "grad_norm": 0.34218066930770874, "learning_rate": 1.7575059020030464e-05, "loss": 0.4868, "step": 21430 }, { "epoch": 0.4545184619626307, "grad_norm": 0.4639231860637665, "learning_rate": 1.757484130062756e-05, "loss": 0.5354, "step": 21431 }, { "epoch": 0.4545396704205637, "grad_norm": 0.3444860577583313, "learning_rate": 1.757462357279999e-05, "loss": 0.557, "step": 21432 }, { "epoch": 0.4545608788784967, "grad_norm": 0.3432480990886688, "learning_rate": 1.757440583654798e-05, "loss": 0.4384, "step": 21433 }, { "epoch": 0.45458208733642974, "grad_norm": 0.3287728428840637, "learning_rate": 1.7574188091871788e-05, "loss": 0.4726, "step": 21434 }, { "epoch": 0.4546032957943628, "grad_norm": 0.34475892782211304, "learning_rate": 1.7573970338771647e-05, "loss": 0.4378, "step": 21435 }, { "epoch": 0.45462450425229584, "grad_norm": 0.3460892140865326, "learning_rate": 1.75737525772478e-05, "loss": 0.5421, "step": 21436 }, { "epoch": 0.45464571271022886, "grad_norm": 0.3206620514392853, "learning_rate": 1.7573534807300495e-05, "loss": 0.517, "step": 21437 }, { "epoch": 0.4546669211681619, "grad_norm": 0.3540535867214203, "learning_rate": 1.757331702892997e-05, "loss": 0.5086, "step": 21438 }, { "epoch": 0.4546881296260949, "grad_norm": 0.3086695671081543, "learning_rate": 1.7573099242136467e-05, "loss": 0.5475, "step": 21439 }, { "epoch": 0.4547093380840279, "grad_norm": 0.3493863046169281, "learning_rate": 1.7572881446920228e-05, "loss": 0.5655, "step": 21440 }, { "epoch": 0.45473054654196093, "grad_norm": 0.3689326345920563, "learning_rate": 1.7572663643281497e-05, "loss": 0.5147, "step": 21441 }, { "epoch": 0.45475175499989395, "grad_norm": 0.3534545600414276, "learning_rate": 1.7572445831220514e-05, "loss": 0.5116, "step": 21442 }, { "epoch": 0.454772963457827, "grad_norm": 0.3662739098072052, "learning_rate": 1.7572228010737526e-05, "loss": 0.5927, "step": 21443 }, { "epoch": 0.45479417191576, "grad_norm": 0.36318239569664, "learning_rate": 1.7572010181832767e-05, "loss": 0.5292, "step": 21444 }, { "epoch": 0.454815380373693, "grad_norm": 0.3277365565299988, "learning_rate": 1.7571792344506485e-05, "loss": 0.3936, "step": 21445 }, { "epoch": 0.45483658883162603, "grad_norm": 0.35631635785102844, "learning_rate": 1.7571574498758924e-05, "loss": 0.5618, "step": 21446 }, { "epoch": 0.45485779728955905, "grad_norm": 0.35249993205070496, "learning_rate": 1.757135664459032e-05, "loss": 0.5941, "step": 21447 }, { "epoch": 0.4548790057474921, "grad_norm": 0.39628177881240845, "learning_rate": 1.757113878200092e-05, "loss": 0.5504, "step": 21448 }, { "epoch": 0.45490021420542515, "grad_norm": 0.3651251494884491, "learning_rate": 1.7570920910990968e-05, "loss": 0.5188, "step": 21449 }, { "epoch": 0.45492142266335817, "grad_norm": 0.35788214206695557, "learning_rate": 1.7570703031560704e-05, "loss": 0.5138, "step": 21450 }, { "epoch": 0.4549426311212912, "grad_norm": 0.33901554346084595, "learning_rate": 1.7570485143710367e-05, "loss": 0.4837, "step": 21451 }, { "epoch": 0.4549638395792242, "grad_norm": 0.34834450483322144, "learning_rate": 1.75702672474402e-05, "loss": 0.4975, "step": 21452 }, { "epoch": 0.4549850480371572, "grad_norm": 0.3436940908432007, "learning_rate": 1.7570049342750453e-05, "loss": 0.5208, "step": 21453 }, { "epoch": 0.45500625649509024, "grad_norm": 0.43120133876800537, "learning_rate": 1.7569831429641356e-05, "loss": 0.5846, "step": 21454 }, { "epoch": 0.45502746495302326, "grad_norm": 0.32686859369277954, "learning_rate": 1.7569613508113165e-05, "loss": 0.458, "step": 21455 }, { "epoch": 0.4550486734109563, "grad_norm": 0.36008402705192566, "learning_rate": 1.7569395578166114e-05, "loss": 0.4917, "step": 21456 }, { "epoch": 0.4550698818688893, "grad_norm": 0.37138301134109497, "learning_rate": 1.7569177639800446e-05, "loss": 0.5367, "step": 21457 }, { "epoch": 0.4550910903268223, "grad_norm": 0.3678358197212219, "learning_rate": 1.7568959693016406e-05, "loss": 0.4971, "step": 21458 }, { "epoch": 0.45511229878475534, "grad_norm": 0.344764769077301, "learning_rate": 1.756874173781423e-05, "loss": 0.5126, "step": 21459 }, { "epoch": 0.45513350724268836, "grad_norm": 0.351618230342865, "learning_rate": 1.756852377419417e-05, "loss": 0.4359, "step": 21460 }, { "epoch": 0.4551547157006214, "grad_norm": 0.4166509807109833, "learning_rate": 1.7568305802156464e-05, "loss": 0.4772, "step": 21461 }, { "epoch": 0.45517592415855446, "grad_norm": 0.3326479494571686, "learning_rate": 1.756808782170135e-05, "loss": 0.4663, "step": 21462 }, { "epoch": 0.4551971326164875, "grad_norm": 0.3238529562950134, "learning_rate": 1.7567869832829076e-05, "loss": 0.4417, "step": 21463 }, { "epoch": 0.4552183410744205, "grad_norm": 0.3564773499965668, "learning_rate": 1.7567651835539883e-05, "loss": 0.5087, "step": 21464 }, { "epoch": 0.4552395495323535, "grad_norm": 0.4029552638530731, "learning_rate": 1.756743382983402e-05, "loss": 0.5779, "step": 21465 }, { "epoch": 0.45526075799028654, "grad_norm": 0.4040586054325104, "learning_rate": 1.756721581571171e-05, "loss": 0.5216, "step": 21466 }, { "epoch": 0.45528196644821955, "grad_norm": 0.39159202575683594, "learning_rate": 1.7566997793173217e-05, "loss": 0.5454, "step": 21467 }, { "epoch": 0.4553031749061526, "grad_norm": 0.3744705617427826, "learning_rate": 1.7566779762218774e-05, "loss": 0.5889, "step": 21468 }, { "epoch": 0.4553243833640856, "grad_norm": 0.3371872305870056, "learning_rate": 1.7566561722848622e-05, "loss": 0.4827, "step": 21469 }, { "epoch": 0.4553455918220186, "grad_norm": 0.3634803891181946, "learning_rate": 1.7566343675063008e-05, "loss": 0.4684, "step": 21470 }, { "epoch": 0.45536680027995163, "grad_norm": 0.40817365050315857, "learning_rate": 1.756612561886217e-05, "loss": 0.4887, "step": 21471 }, { "epoch": 0.45538800873788465, "grad_norm": 0.3061600625514984, "learning_rate": 1.7565907554246356e-05, "loss": 0.4202, "step": 21472 }, { "epoch": 0.4554092171958177, "grad_norm": 0.3911423981189728, "learning_rate": 1.7565689481215802e-05, "loss": 0.5252, "step": 21473 }, { "epoch": 0.4554304256537507, "grad_norm": 0.3452693819999695, "learning_rate": 1.7565471399770756e-05, "loss": 0.4596, "step": 21474 }, { "epoch": 0.4554516341116837, "grad_norm": 0.34410664439201355, "learning_rate": 1.7565253309911457e-05, "loss": 0.6163, "step": 21475 }, { "epoch": 0.4554728425696168, "grad_norm": 0.38769859075546265, "learning_rate": 1.756503521163815e-05, "loss": 0.4096, "step": 21476 }, { "epoch": 0.4554940510275498, "grad_norm": 0.32212021946907043, "learning_rate": 1.7564817104951077e-05, "loss": 0.5587, "step": 21477 }, { "epoch": 0.4555152594854828, "grad_norm": 0.3503962755203247, "learning_rate": 1.756459898985048e-05, "loss": 0.5411, "step": 21478 }, { "epoch": 0.45553646794341585, "grad_norm": 0.3393911123275757, "learning_rate": 1.7564380866336597e-05, "loss": 0.4631, "step": 21479 }, { "epoch": 0.45555767640134887, "grad_norm": 0.34946271777153015, "learning_rate": 1.756416273440968e-05, "loss": 0.496, "step": 21480 }, { "epoch": 0.4555788848592819, "grad_norm": 0.3753494620323181, "learning_rate": 1.756394459406997e-05, "loss": 0.5469, "step": 21481 }, { "epoch": 0.4556000933172149, "grad_norm": 0.3247535824775696, "learning_rate": 1.75637264453177e-05, "loss": 0.4934, "step": 21482 }, { "epoch": 0.4556213017751479, "grad_norm": 0.3490808308124542, "learning_rate": 1.756350828815312e-05, "loss": 0.5074, "step": 21483 }, { "epoch": 0.45564251023308094, "grad_norm": 0.32987987995147705, "learning_rate": 1.7563290122576477e-05, "loss": 0.5364, "step": 21484 }, { "epoch": 0.45566371869101396, "grad_norm": 0.3535044491291046, "learning_rate": 1.7563071948588005e-05, "loss": 0.5972, "step": 21485 }, { "epoch": 0.455684927148947, "grad_norm": 0.4153140187263489, "learning_rate": 1.7562853766187948e-05, "loss": 0.5643, "step": 21486 }, { "epoch": 0.45570613560688, "grad_norm": 0.38350555300712585, "learning_rate": 1.7562635575376555e-05, "loss": 0.5023, "step": 21487 }, { "epoch": 0.455727344064813, "grad_norm": 0.5378945469856262, "learning_rate": 1.756241737615406e-05, "loss": 0.5959, "step": 21488 }, { "epoch": 0.4557485525227461, "grad_norm": 0.4205207824707031, "learning_rate": 1.7562199168520715e-05, "loss": 0.5195, "step": 21489 }, { "epoch": 0.4557697609806791, "grad_norm": 0.45182326436042786, "learning_rate": 1.7561980952476753e-05, "loss": 0.432, "step": 21490 }, { "epoch": 0.45579096943861214, "grad_norm": 0.35137054324150085, "learning_rate": 1.7561762728022424e-05, "loss": 0.5616, "step": 21491 }, { "epoch": 0.45581217789654516, "grad_norm": 0.41903430223464966, "learning_rate": 1.7561544495157966e-05, "loss": 0.5124, "step": 21492 }, { "epoch": 0.4558333863544782, "grad_norm": 0.3573378920555115, "learning_rate": 1.7561326253883627e-05, "loss": 0.6134, "step": 21493 }, { "epoch": 0.4558545948124112, "grad_norm": 0.32850512862205505, "learning_rate": 1.7561108004199646e-05, "loss": 0.545, "step": 21494 }, { "epoch": 0.4558758032703442, "grad_norm": 0.34596478939056396, "learning_rate": 1.7560889746106268e-05, "loss": 0.4939, "step": 21495 }, { "epoch": 0.45589701172827723, "grad_norm": 0.36577051877975464, "learning_rate": 1.756067147960373e-05, "loss": 0.4999, "step": 21496 }, { "epoch": 0.45591822018621025, "grad_norm": 0.3236912488937378, "learning_rate": 1.756045320469228e-05, "loss": 0.4551, "step": 21497 }, { "epoch": 0.4559394286441433, "grad_norm": 0.34973135590553284, "learning_rate": 1.756023492137216e-05, "loss": 0.4728, "step": 21498 }, { "epoch": 0.4559606371020763, "grad_norm": 0.42756253480911255, "learning_rate": 1.7560016629643613e-05, "loss": 0.4534, "step": 21499 }, { "epoch": 0.4559818455600093, "grad_norm": 0.5978688597679138, "learning_rate": 1.755979832950688e-05, "loss": 0.4632, "step": 21500 }, { "epoch": 0.45600305401794233, "grad_norm": 0.4277459383010864, "learning_rate": 1.7559580020962208e-05, "loss": 0.3906, "step": 21501 }, { "epoch": 0.45602426247587535, "grad_norm": 0.37793925404548645, "learning_rate": 1.7559361704009835e-05, "loss": 0.5632, "step": 21502 }, { "epoch": 0.45604547093380843, "grad_norm": 0.3526048958301544, "learning_rate": 1.7559143378650006e-05, "loss": 0.4616, "step": 21503 }, { "epoch": 0.45606667939174145, "grad_norm": 0.3620706796646118, "learning_rate": 1.7558925044882962e-05, "loss": 0.5113, "step": 21504 }, { "epoch": 0.45608788784967447, "grad_norm": 0.357210248708725, "learning_rate": 1.7558706702708946e-05, "loss": 0.5208, "step": 21505 }, { "epoch": 0.4561090963076075, "grad_norm": 0.355824738740921, "learning_rate": 1.7558488352128207e-05, "loss": 0.4645, "step": 21506 }, { "epoch": 0.4561303047655405, "grad_norm": 0.34816449880599976, "learning_rate": 1.7558269993140977e-05, "loss": 0.5395, "step": 21507 }, { "epoch": 0.4561515132234735, "grad_norm": 0.33516088128089905, "learning_rate": 1.7558051625747507e-05, "loss": 0.5265, "step": 21508 }, { "epoch": 0.45617272168140655, "grad_norm": 0.3516867756843567, "learning_rate": 1.755783324994804e-05, "loss": 0.6054, "step": 21509 }, { "epoch": 0.45619393013933957, "grad_norm": 0.3510444462299347, "learning_rate": 1.7557614865742815e-05, "loss": 0.4718, "step": 21510 }, { "epoch": 0.4562151385972726, "grad_norm": 0.348307341337204, "learning_rate": 1.7557396473132077e-05, "loss": 0.4719, "step": 21511 }, { "epoch": 0.4562363470552056, "grad_norm": 0.3619551360607147, "learning_rate": 1.755717807211607e-05, "loss": 0.4697, "step": 21512 }, { "epoch": 0.4562575555131386, "grad_norm": 0.37215980887413025, "learning_rate": 1.755695966269503e-05, "loss": 0.4529, "step": 21513 }, { "epoch": 0.45627876397107164, "grad_norm": 0.37317049503326416, "learning_rate": 1.755674124486921e-05, "loss": 0.4451, "step": 21514 }, { "epoch": 0.45629997242900466, "grad_norm": 0.34423893690109253, "learning_rate": 1.7556522818638845e-05, "loss": 0.4356, "step": 21515 }, { "epoch": 0.45632118088693774, "grad_norm": 0.35567963123321533, "learning_rate": 1.755630438400418e-05, "loss": 0.5169, "step": 21516 }, { "epoch": 0.45634238934487076, "grad_norm": 0.34678930044174194, "learning_rate": 1.755608594096546e-05, "loss": 0.4786, "step": 21517 }, { "epoch": 0.4563635978028038, "grad_norm": 0.4263976216316223, "learning_rate": 1.7555867489522928e-05, "loss": 0.552, "step": 21518 }, { "epoch": 0.4563848062607368, "grad_norm": 0.41468316316604614, "learning_rate": 1.7555649029676825e-05, "loss": 0.4927, "step": 21519 }, { "epoch": 0.4564060147186698, "grad_norm": 0.30182942748069763, "learning_rate": 1.7555430561427397e-05, "loss": 0.4232, "step": 21520 }, { "epoch": 0.45642722317660284, "grad_norm": 0.37478795647621155, "learning_rate": 1.7555212084774883e-05, "loss": 0.5403, "step": 21521 }, { "epoch": 0.45644843163453586, "grad_norm": 0.3447759747505188, "learning_rate": 1.7554993599719528e-05, "loss": 0.4932, "step": 21522 }, { "epoch": 0.4564696400924689, "grad_norm": 0.32374638319015503, "learning_rate": 1.7554775106261576e-05, "loss": 0.528, "step": 21523 }, { "epoch": 0.4564908485504019, "grad_norm": 0.3310778737068176, "learning_rate": 1.7554556604401263e-05, "loss": 0.4042, "step": 21524 }, { "epoch": 0.4565120570083349, "grad_norm": 0.302377849817276, "learning_rate": 1.755433809413884e-05, "loss": 0.449, "step": 21525 }, { "epoch": 0.45653326546626793, "grad_norm": 0.3772990107536316, "learning_rate": 1.7554119575474554e-05, "loss": 0.6227, "step": 21526 }, { "epoch": 0.45655447392420095, "grad_norm": 0.8535237312316895, "learning_rate": 1.755390104840864e-05, "loss": 0.5217, "step": 21527 }, { "epoch": 0.456575682382134, "grad_norm": 0.3143731355667114, "learning_rate": 1.755368251294134e-05, "loss": 0.5069, "step": 21528 }, { "epoch": 0.456596890840067, "grad_norm": 0.3529786765575409, "learning_rate": 1.7553463969072898e-05, "loss": 0.4245, "step": 21529 }, { "epoch": 0.45661809929800007, "grad_norm": 0.5098903179168701, "learning_rate": 1.755324541680356e-05, "loss": 0.4698, "step": 21530 }, { "epoch": 0.4566393077559331, "grad_norm": 0.3430855870246887, "learning_rate": 1.755302685613357e-05, "loss": 0.4795, "step": 21531 }, { "epoch": 0.4566605162138661, "grad_norm": 0.38869673013687134, "learning_rate": 1.755280828706317e-05, "loss": 0.5401, "step": 21532 }, { "epoch": 0.4566817246717991, "grad_norm": 0.3303247094154358, "learning_rate": 1.75525897095926e-05, "loss": 0.4407, "step": 21533 }, { "epoch": 0.45670293312973215, "grad_norm": 0.33127325773239136, "learning_rate": 1.7552371123722103e-05, "loss": 0.4343, "step": 21534 }, { "epoch": 0.45672414158766517, "grad_norm": 0.37040919065475464, "learning_rate": 1.755215252945193e-05, "loss": 0.5322, "step": 21535 }, { "epoch": 0.4567453500455982, "grad_norm": 0.5608642101287842, "learning_rate": 1.7551933926782316e-05, "loss": 0.5221, "step": 21536 }, { "epoch": 0.4567665585035312, "grad_norm": 0.3071282207965851, "learning_rate": 1.7551715315713507e-05, "loss": 0.4729, "step": 21537 }, { "epoch": 0.4567877669614642, "grad_norm": 0.3397931158542633, "learning_rate": 1.7551496696245747e-05, "loss": 0.4275, "step": 21538 }, { "epoch": 0.45680897541939725, "grad_norm": 0.6046732664108276, "learning_rate": 1.7551278068379275e-05, "loss": 0.5254, "step": 21539 }, { "epoch": 0.45683018387733026, "grad_norm": 0.5150575041770935, "learning_rate": 1.755105943211434e-05, "loss": 0.5647, "step": 21540 }, { "epoch": 0.4568513923352633, "grad_norm": 0.3537478744983673, "learning_rate": 1.7550840787451184e-05, "loss": 0.5565, "step": 21541 }, { "epoch": 0.4568726007931963, "grad_norm": 0.44476720690727234, "learning_rate": 1.7550622134390042e-05, "loss": 0.4793, "step": 21542 }, { "epoch": 0.4568938092511293, "grad_norm": 0.3416338860988617, "learning_rate": 1.755040347293117e-05, "loss": 0.4869, "step": 21543 }, { "epoch": 0.4569150177090624, "grad_norm": 0.3225724697113037, "learning_rate": 1.75501848030748e-05, "loss": 0.5398, "step": 21544 }, { "epoch": 0.4569362261669954, "grad_norm": 0.3111794590950012, "learning_rate": 1.7549966124821184e-05, "loss": 0.4713, "step": 21545 }, { "epoch": 0.45695743462492844, "grad_norm": 0.3108274042606354, "learning_rate": 1.7549747438170558e-05, "loss": 0.4356, "step": 21546 }, { "epoch": 0.45697864308286146, "grad_norm": 0.3404976725578308, "learning_rate": 1.754952874312317e-05, "loss": 0.4575, "step": 21547 }, { "epoch": 0.4569998515407945, "grad_norm": 0.3470596373081207, "learning_rate": 1.754931003967926e-05, "loss": 0.4578, "step": 21548 }, { "epoch": 0.4570210599987275, "grad_norm": 0.3406220078468323, "learning_rate": 1.7549091327839077e-05, "loss": 0.488, "step": 21549 }, { "epoch": 0.4570422684566605, "grad_norm": 0.40206071734428406, "learning_rate": 1.7548872607602856e-05, "loss": 0.5653, "step": 21550 }, { "epoch": 0.45706347691459354, "grad_norm": 0.3998592793941498, "learning_rate": 1.7548653878970848e-05, "loss": 0.6385, "step": 21551 }, { "epoch": 0.45708468537252656, "grad_norm": 0.3591254949569702, "learning_rate": 1.754843514194329e-05, "loss": 0.5645, "step": 21552 }, { "epoch": 0.4571058938304596, "grad_norm": 0.3479421138763428, "learning_rate": 1.754821639652043e-05, "loss": 0.4563, "step": 21553 }, { "epoch": 0.4571271022883926, "grad_norm": 0.35736316442489624, "learning_rate": 1.7547997642702505e-05, "loss": 0.494, "step": 21554 }, { "epoch": 0.4571483107463256, "grad_norm": 0.340938925743103, "learning_rate": 1.7547778880489768e-05, "loss": 0.5227, "step": 21555 }, { "epoch": 0.45716951920425863, "grad_norm": 0.3647269010543823, "learning_rate": 1.7547560109882455e-05, "loss": 0.512, "step": 21556 }, { "epoch": 0.4571907276621917, "grad_norm": 0.35589855909347534, "learning_rate": 1.7547341330880813e-05, "loss": 0.5568, "step": 21557 }, { "epoch": 0.45721193612012473, "grad_norm": 0.345634788274765, "learning_rate": 1.7547122543485077e-05, "loss": 0.5576, "step": 21558 }, { "epoch": 0.45723314457805775, "grad_norm": 0.31203314661979675, "learning_rate": 1.7546903747695503e-05, "loss": 0.5436, "step": 21559 }, { "epoch": 0.45725435303599077, "grad_norm": 0.459060400724411, "learning_rate": 1.7546684943512326e-05, "loss": 0.5631, "step": 21560 }, { "epoch": 0.4572755614939238, "grad_norm": 0.41611820459365845, "learning_rate": 1.7546466130935792e-05, "loss": 0.5069, "step": 21561 }, { "epoch": 0.4572967699518568, "grad_norm": 0.3282927870750427, "learning_rate": 1.7546247309966147e-05, "loss": 0.4909, "step": 21562 }, { "epoch": 0.4573179784097898, "grad_norm": 0.3430337905883789, "learning_rate": 1.754602848060363e-05, "loss": 0.5388, "step": 21563 }, { "epoch": 0.45733918686772285, "grad_norm": 0.3284456431865692, "learning_rate": 1.7545809642848478e-05, "loss": 0.4717, "step": 21564 }, { "epoch": 0.45736039532565587, "grad_norm": 0.3640495836734772, "learning_rate": 1.754559079670095e-05, "loss": 0.4598, "step": 21565 }, { "epoch": 0.4573816037835889, "grad_norm": 0.36367499828338623, "learning_rate": 1.754537194216128e-05, "loss": 0.5209, "step": 21566 }, { "epoch": 0.4574028122415219, "grad_norm": 0.33630362153053284, "learning_rate": 1.7545153079229714e-05, "loss": 0.4404, "step": 21567 }, { "epoch": 0.4574240206994549, "grad_norm": 0.33511799573898315, "learning_rate": 1.7544934207906492e-05, "loss": 0.4821, "step": 21568 }, { "epoch": 0.45744522915738794, "grad_norm": 0.31999674439430237, "learning_rate": 1.754471532819186e-05, "loss": 0.4477, "step": 21569 }, { "epoch": 0.45746643761532096, "grad_norm": 0.45489931106567383, "learning_rate": 1.754449644008606e-05, "loss": 0.4623, "step": 21570 }, { "epoch": 0.45748764607325404, "grad_norm": 0.37353789806365967, "learning_rate": 1.7544277543589338e-05, "loss": 0.4879, "step": 21571 }, { "epoch": 0.45750885453118706, "grad_norm": 0.34999945759773254, "learning_rate": 1.7544058638701937e-05, "loss": 0.4942, "step": 21572 }, { "epoch": 0.4575300629891201, "grad_norm": 0.3710089325904846, "learning_rate": 1.75438397254241e-05, "loss": 0.6237, "step": 21573 }, { "epoch": 0.4575512714470531, "grad_norm": 0.34889474511146545, "learning_rate": 1.7543620803756066e-05, "loss": 0.4867, "step": 21574 }, { "epoch": 0.4575724799049861, "grad_norm": 0.3212757408618927, "learning_rate": 1.7543401873698088e-05, "loss": 0.4563, "step": 21575 }, { "epoch": 0.45759368836291914, "grad_norm": 0.3267865777015686, "learning_rate": 1.75431829352504e-05, "loss": 0.4609, "step": 21576 }, { "epoch": 0.45761489682085216, "grad_norm": 0.3882855474948883, "learning_rate": 1.754296398841325e-05, "loss": 0.5606, "step": 21577 }, { "epoch": 0.4576361052787852, "grad_norm": 0.32374757528305054, "learning_rate": 1.7542745033186883e-05, "loss": 0.5426, "step": 21578 }, { "epoch": 0.4576573137367182, "grad_norm": 0.3103373050689697, "learning_rate": 1.754252606957154e-05, "loss": 0.3722, "step": 21579 }, { "epoch": 0.4576785221946512, "grad_norm": 0.3463500738143921, "learning_rate": 1.7542307097567464e-05, "loss": 0.5057, "step": 21580 }, { "epoch": 0.45769973065258424, "grad_norm": 0.4078833758831024, "learning_rate": 1.75420881171749e-05, "loss": 0.5123, "step": 21581 }, { "epoch": 0.45772093911051726, "grad_norm": 0.33585256338119507, "learning_rate": 1.754186912839409e-05, "loss": 0.4952, "step": 21582 }, { "epoch": 0.4577421475684503, "grad_norm": 0.4258017838001251, "learning_rate": 1.754165013122528e-05, "loss": 0.4505, "step": 21583 }, { "epoch": 0.4577633560263833, "grad_norm": 0.36680540442466736, "learning_rate": 1.754143112566871e-05, "loss": 0.5052, "step": 21584 }, { "epoch": 0.45778456448431637, "grad_norm": 0.3530402183532715, "learning_rate": 1.754121211172463e-05, "loss": 0.4975, "step": 21585 }, { "epoch": 0.4578057729422494, "grad_norm": 0.5329728722572327, "learning_rate": 1.7540993089393277e-05, "loss": 0.495, "step": 21586 }, { "epoch": 0.4578269814001824, "grad_norm": 0.33603113889694214, "learning_rate": 1.7540774058674896e-05, "loss": 0.4534, "step": 21587 }, { "epoch": 0.45784818985811543, "grad_norm": 0.3245990574359894, "learning_rate": 1.754055501956973e-05, "loss": 0.5157, "step": 21588 }, { "epoch": 0.45786939831604845, "grad_norm": 0.3698080778121948, "learning_rate": 1.754033597207803e-05, "loss": 0.5541, "step": 21589 }, { "epoch": 0.45789060677398147, "grad_norm": 0.46987470984458923, "learning_rate": 1.754011691620003e-05, "loss": 0.5583, "step": 21590 }, { "epoch": 0.4579118152319145, "grad_norm": 0.34837865829467773, "learning_rate": 1.753989785193598e-05, "loss": 0.5694, "step": 21591 }, { "epoch": 0.4579330236898475, "grad_norm": 0.38149628043174744, "learning_rate": 1.7539678779286118e-05, "loss": 0.5181, "step": 21592 }, { "epoch": 0.4579542321477805, "grad_norm": 0.34972602128982544, "learning_rate": 1.753945969825069e-05, "loss": 0.5438, "step": 21593 }, { "epoch": 0.45797544060571355, "grad_norm": 0.49265938997268677, "learning_rate": 1.7539240608829945e-05, "loss": 0.4916, "step": 21594 }, { "epoch": 0.45799664906364657, "grad_norm": 0.3809538781642914, "learning_rate": 1.753902151102412e-05, "loss": 0.5119, "step": 21595 }, { "epoch": 0.4580178575215796, "grad_norm": 0.3947135806083679, "learning_rate": 1.753880240483346e-05, "loss": 0.5269, "step": 21596 }, { "epoch": 0.4580390659795126, "grad_norm": 0.3462706208229065, "learning_rate": 1.753858329025821e-05, "loss": 0.4689, "step": 21597 }, { "epoch": 0.4580602744374457, "grad_norm": 0.3315574824810028, "learning_rate": 1.7538364167298612e-05, "loss": 0.5276, "step": 21598 }, { "epoch": 0.4580814828953787, "grad_norm": 0.5427600741386414, "learning_rate": 1.753814503595491e-05, "loss": 0.5063, "step": 21599 }, { "epoch": 0.4581026913533117, "grad_norm": 0.350933313369751, "learning_rate": 1.753792589622735e-05, "loss": 0.4776, "step": 21600 }, { "epoch": 0.45812389981124474, "grad_norm": 0.37531936168670654, "learning_rate": 1.7537706748116175e-05, "loss": 0.5484, "step": 21601 }, { "epoch": 0.45814510826917776, "grad_norm": 0.3159514367580414, "learning_rate": 1.7537487591621628e-05, "loss": 0.4811, "step": 21602 }, { "epoch": 0.4581663167271108, "grad_norm": 0.6220207214355469, "learning_rate": 1.7537268426743952e-05, "loss": 0.4886, "step": 21603 }, { "epoch": 0.4581875251850438, "grad_norm": 0.3416651785373688, "learning_rate": 1.7537049253483394e-05, "loss": 0.5712, "step": 21604 }, { "epoch": 0.4582087336429768, "grad_norm": 0.3850628733634949, "learning_rate": 1.7536830071840188e-05, "loss": 0.49, "step": 21605 }, { "epoch": 0.45822994210090984, "grad_norm": 0.3542110025882721, "learning_rate": 1.753661088181459e-05, "loss": 0.5446, "step": 21606 }, { "epoch": 0.45825115055884286, "grad_norm": 0.30545035004615784, "learning_rate": 1.7536391683406845e-05, "loss": 0.4505, "step": 21607 }, { "epoch": 0.4582723590167759, "grad_norm": 0.3178693354129791, "learning_rate": 1.7536172476617183e-05, "loss": 0.4211, "step": 21608 }, { "epoch": 0.4582935674747089, "grad_norm": 0.44214752316474915, "learning_rate": 1.7535953261445858e-05, "loss": 0.5951, "step": 21609 }, { "epoch": 0.4583147759326419, "grad_norm": 0.4495251178741455, "learning_rate": 1.753573403789311e-05, "loss": 0.4909, "step": 21610 }, { "epoch": 0.45833598439057494, "grad_norm": 0.3600617051124573, "learning_rate": 1.7535514805959185e-05, "loss": 0.5459, "step": 21611 }, { "epoch": 0.458357192848508, "grad_norm": 0.42552635073661804, "learning_rate": 1.7535295565644324e-05, "loss": 0.6058, "step": 21612 }, { "epoch": 0.45837840130644103, "grad_norm": 0.41927042603492737, "learning_rate": 1.7535076316948774e-05, "loss": 0.4391, "step": 21613 }, { "epoch": 0.45839960976437405, "grad_norm": 0.32832276821136475, "learning_rate": 1.7534857059872777e-05, "loss": 0.5525, "step": 21614 }, { "epoch": 0.45842081822230707, "grad_norm": 0.38724076747894287, "learning_rate": 1.753463779441658e-05, "loss": 0.518, "step": 21615 }, { "epoch": 0.4584420266802401, "grad_norm": 0.38019871711730957, "learning_rate": 1.753441852058042e-05, "loss": 0.5378, "step": 21616 }, { "epoch": 0.4584632351381731, "grad_norm": 0.3349902331829071, "learning_rate": 1.753419923836455e-05, "loss": 0.4876, "step": 21617 }, { "epoch": 0.45848444359610613, "grad_norm": 0.34310632944107056, "learning_rate": 1.7533979947769208e-05, "loss": 0.4333, "step": 21618 }, { "epoch": 0.45850565205403915, "grad_norm": 0.4782639443874359, "learning_rate": 1.7533760648794636e-05, "loss": 0.5103, "step": 21619 }, { "epoch": 0.45852686051197217, "grad_norm": 0.4225730001926422, "learning_rate": 1.753354134144108e-05, "loss": 0.5312, "step": 21620 }, { "epoch": 0.4585480689699052, "grad_norm": 0.3416266143321991, "learning_rate": 1.753332202570879e-05, "loss": 0.5321, "step": 21621 }, { "epoch": 0.4585692774278382, "grad_norm": 0.32162222266197205, "learning_rate": 1.7533102701598002e-05, "loss": 0.5253, "step": 21622 }, { "epoch": 0.4585904858857712, "grad_norm": 0.3561372756958008, "learning_rate": 1.7532883369108962e-05, "loss": 0.5031, "step": 21623 }, { "epoch": 0.45861169434370425, "grad_norm": 0.3758942484855652, "learning_rate": 1.7532664028241915e-05, "loss": 0.4967, "step": 21624 }, { "epoch": 0.45863290280163727, "grad_norm": 0.3589807152748108, "learning_rate": 1.7532444678997103e-05, "loss": 0.4442, "step": 21625 }, { "epoch": 0.45865411125957034, "grad_norm": 0.3539152145385742, "learning_rate": 1.7532225321374773e-05, "loss": 0.4774, "step": 21626 }, { "epoch": 0.45867531971750336, "grad_norm": 0.38951021432876587, "learning_rate": 1.7532005955375168e-05, "loss": 0.4392, "step": 21627 }, { "epoch": 0.4586965281754364, "grad_norm": 0.3654845356941223, "learning_rate": 1.7531786580998532e-05, "loss": 0.5166, "step": 21628 }, { "epoch": 0.4587177366333694, "grad_norm": 0.35521018505096436, "learning_rate": 1.7531567198245107e-05, "loss": 0.6004, "step": 21629 }, { "epoch": 0.4587389450913024, "grad_norm": 0.4133651554584503, "learning_rate": 1.753134780711514e-05, "loss": 0.4993, "step": 21630 }, { "epoch": 0.45876015354923544, "grad_norm": 0.32355448603630066, "learning_rate": 1.7531128407608872e-05, "loss": 0.4256, "step": 21631 }, { "epoch": 0.45878136200716846, "grad_norm": 0.34467780590057373, "learning_rate": 1.7530908999726547e-05, "loss": 0.4853, "step": 21632 }, { "epoch": 0.4588025704651015, "grad_norm": 0.3968754708766937, "learning_rate": 1.7530689583468415e-05, "loss": 0.5869, "step": 21633 }, { "epoch": 0.4588237789230345, "grad_norm": 0.37476786971092224, "learning_rate": 1.7530470158834713e-05, "loss": 0.5353, "step": 21634 }, { "epoch": 0.4588449873809675, "grad_norm": 0.39060792326927185, "learning_rate": 1.7530250725825687e-05, "loss": 0.5634, "step": 21635 }, { "epoch": 0.45886619583890054, "grad_norm": 0.3644561767578125, "learning_rate": 1.7530031284441576e-05, "loss": 0.4871, "step": 21636 }, { "epoch": 0.45888740429683356, "grad_norm": 0.3810592591762543, "learning_rate": 1.752981183468264e-05, "loss": 0.5665, "step": 21637 }, { "epoch": 0.4589086127547666, "grad_norm": 0.3403589129447937, "learning_rate": 1.7529592376549107e-05, "loss": 0.4478, "step": 21638 }, { "epoch": 0.45892982121269965, "grad_norm": 0.3886498212814331, "learning_rate": 1.7529372910041226e-05, "loss": 0.5123, "step": 21639 }, { "epoch": 0.45895102967063267, "grad_norm": 0.33941835165023804, "learning_rate": 1.7529153435159246e-05, "loss": 0.513, "step": 21640 }, { "epoch": 0.4589722381285657, "grad_norm": 0.31313788890838623, "learning_rate": 1.7528933951903403e-05, "loss": 0.481, "step": 21641 }, { "epoch": 0.4589934465864987, "grad_norm": 0.3365803062915802, "learning_rate": 1.7528714460273945e-05, "loss": 0.4321, "step": 21642 }, { "epoch": 0.45901465504443173, "grad_norm": 0.5326869487762451, "learning_rate": 1.752849496027112e-05, "loss": 0.4999, "step": 21643 }, { "epoch": 0.45903586350236475, "grad_norm": 0.42891108989715576, "learning_rate": 1.7528275451895165e-05, "loss": 0.5973, "step": 21644 }, { "epoch": 0.45905707196029777, "grad_norm": 0.3552699089050293, "learning_rate": 1.7528055935146325e-05, "loss": 0.5001, "step": 21645 }, { "epoch": 0.4590782804182308, "grad_norm": 0.43125084042549133, "learning_rate": 1.7527836410024852e-05, "loss": 0.5081, "step": 21646 }, { "epoch": 0.4590994888761638, "grad_norm": 0.4226226210594177, "learning_rate": 1.7527616876530983e-05, "loss": 0.5183, "step": 21647 }, { "epoch": 0.45912069733409683, "grad_norm": 0.4720360338687897, "learning_rate": 1.7527397334664963e-05, "loss": 0.6237, "step": 21648 }, { "epoch": 0.45914190579202985, "grad_norm": 0.32716163992881775, "learning_rate": 1.752717778442704e-05, "loss": 0.5329, "step": 21649 }, { "epoch": 0.45916311424996287, "grad_norm": 0.33203935623168945, "learning_rate": 1.752695822581745e-05, "loss": 0.5475, "step": 21650 }, { "epoch": 0.4591843227078959, "grad_norm": 0.46493998169898987, "learning_rate": 1.7526738658836446e-05, "loss": 0.5359, "step": 21651 }, { "epoch": 0.4592055311658289, "grad_norm": 0.3428768217563629, "learning_rate": 1.752651908348427e-05, "loss": 0.5096, "step": 21652 }, { "epoch": 0.459226739623762, "grad_norm": 0.33743420243263245, "learning_rate": 1.752629949976116e-05, "loss": 0.5268, "step": 21653 }, { "epoch": 0.459247948081695, "grad_norm": 0.42416197061538696, "learning_rate": 1.752607990766737e-05, "loss": 0.4495, "step": 21654 }, { "epoch": 0.459269156539628, "grad_norm": 0.3159845471382141, "learning_rate": 1.7525860307203137e-05, "loss": 0.4311, "step": 21655 }, { "epoch": 0.45929036499756104, "grad_norm": 0.3551749885082245, "learning_rate": 1.7525640698368707e-05, "loss": 0.5191, "step": 21656 }, { "epoch": 0.45931157345549406, "grad_norm": 0.44289228320121765, "learning_rate": 1.7525421081164324e-05, "loss": 0.4721, "step": 21657 }, { "epoch": 0.4593327819134271, "grad_norm": 0.35137471556663513, "learning_rate": 1.7525201455590236e-05, "loss": 0.5132, "step": 21658 }, { "epoch": 0.4593539903713601, "grad_norm": 0.3563452661037445, "learning_rate": 1.7524981821646686e-05, "loss": 0.4312, "step": 21659 }, { "epoch": 0.4593751988292931, "grad_norm": 0.3723102807998657, "learning_rate": 1.7524762179333912e-05, "loss": 0.4773, "step": 21660 }, { "epoch": 0.45939640728722614, "grad_norm": 0.38658902049064636, "learning_rate": 1.7524542528652164e-05, "loss": 0.4867, "step": 21661 }, { "epoch": 0.45941761574515916, "grad_norm": 0.3348415791988373, "learning_rate": 1.7524322869601684e-05, "loss": 0.4671, "step": 21662 }, { "epoch": 0.4594388242030922, "grad_norm": 0.42254167795181274, "learning_rate": 1.752410320218272e-05, "loss": 0.5271, "step": 21663 }, { "epoch": 0.4594600326610252, "grad_norm": 0.383522629737854, "learning_rate": 1.7523883526395513e-05, "loss": 0.5025, "step": 21664 }, { "epoch": 0.4594812411189582, "grad_norm": 0.35094600915908813, "learning_rate": 1.7523663842240308e-05, "loss": 0.469, "step": 21665 }, { "epoch": 0.45950244957689124, "grad_norm": 0.4048953354358673, "learning_rate": 1.752344414971735e-05, "loss": 0.5102, "step": 21666 }, { "epoch": 0.4595236580348243, "grad_norm": 0.37703341245651245, "learning_rate": 1.752322444882688e-05, "loss": 0.5213, "step": 21667 }, { "epoch": 0.45954486649275733, "grad_norm": 0.3578808605670929, "learning_rate": 1.7523004739569148e-05, "loss": 0.5324, "step": 21668 }, { "epoch": 0.45956607495069035, "grad_norm": 0.3460399806499481, "learning_rate": 1.7522785021944394e-05, "loss": 0.5558, "step": 21669 }, { "epoch": 0.45958728340862337, "grad_norm": 0.35488787293434143, "learning_rate": 1.7522565295952866e-05, "loss": 0.5437, "step": 21670 }, { "epoch": 0.4596084918665564, "grad_norm": 0.37373897433280945, "learning_rate": 1.7522345561594804e-05, "loss": 0.5392, "step": 21671 }, { "epoch": 0.4596297003244894, "grad_norm": 0.4960678815841675, "learning_rate": 1.7522125818870458e-05, "loss": 0.5387, "step": 21672 }, { "epoch": 0.45965090878242243, "grad_norm": 0.32998213171958923, "learning_rate": 1.7521906067780063e-05, "loss": 0.4821, "step": 21673 }, { "epoch": 0.45967211724035545, "grad_norm": 0.3341652750968933, "learning_rate": 1.7521686308323876e-05, "loss": 0.4555, "step": 21674 }, { "epoch": 0.45969332569828847, "grad_norm": 0.3454829454421997, "learning_rate": 1.752146654050213e-05, "loss": 0.438, "step": 21675 }, { "epoch": 0.4597145341562215, "grad_norm": 0.45514389872550964, "learning_rate": 1.7521246764315077e-05, "loss": 0.508, "step": 21676 }, { "epoch": 0.4597357426141545, "grad_norm": 0.35377633571624756, "learning_rate": 1.7521026979762957e-05, "loss": 0.4914, "step": 21677 }, { "epoch": 0.4597569510720875, "grad_norm": 0.3444683253765106, "learning_rate": 1.7520807186846014e-05, "loss": 0.5096, "step": 21678 }, { "epoch": 0.45977815953002055, "grad_norm": 0.36513426899909973, "learning_rate": 1.75205873855645e-05, "loss": 0.5373, "step": 21679 }, { "epoch": 0.4597993679879536, "grad_norm": 0.356365829706192, "learning_rate": 1.7520367575918652e-05, "loss": 0.4591, "step": 21680 }, { "epoch": 0.45982057644588664, "grad_norm": 0.3591095209121704, "learning_rate": 1.7520147757908717e-05, "loss": 0.5701, "step": 21681 }, { "epoch": 0.45984178490381966, "grad_norm": 0.3087790906429291, "learning_rate": 1.7519927931534938e-05, "loss": 0.4527, "step": 21682 }, { "epoch": 0.4598629933617527, "grad_norm": 0.34209954738616943, "learning_rate": 1.751970809679756e-05, "loss": 0.5224, "step": 21683 }, { "epoch": 0.4598842018196857, "grad_norm": 0.3380195200443268, "learning_rate": 1.7519488253696825e-05, "loss": 0.4535, "step": 21684 }, { "epoch": 0.4599054102776187, "grad_norm": 0.36725205183029175, "learning_rate": 1.7519268402232988e-05, "loss": 0.4974, "step": 21685 }, { "epoch": 0.45992661873555174, "grad_norm": 0.3870013356208801, "learning_rate": 1.751904854240628e-05, "loss": 0.5349, "step": 21686 }, { "epoch": 0.45994782719348476, "grad_norm": 0.3673126995563507, "learning_rate": 1.751882867421695e-05, "loss": 0.4922, "step": 21687 }, { "epoch": 0.4599690356514178, "grad_norm": 0.3636804223060608, "learning_rate": 1.7518608797665252e-05, "loss": 0.458, "step": 21688 }, { "epoch": 0.4599902441093508, "grad_norm": 0.4482518434524536, "learning_rate": 1.7518388912751417e-05, "loss": 0.5805, "step": 21689 }, { "epoch": 0.4600114525672838, "grad_norm": 0.3394705057144165, "learning_rate": 1.7518169019475696e-05, "loss": 0.4667, "step": 21690 }, { "epoch": 0.46003266102521684, "grad_norm": 0.35939446091651917, "learning_rate": 1.7517949117838334e-05, "loss": 0.503, "step": 21691 }, { "epoch": 0.46005386948314986, "grad_norm": 0.48768430948257446, "learning_rate": 1.751772920783957e-05, "loss": 0.509, "step": 21692 }, { "epoch": 0.4600750779410829, "grad_norm": 0.3188551366329193, "learning_rate": 1.7517509289479657e-05, "loss": 0.4946, "step": 21693 }, { "epoch": 0.46009628639901595, "grad_norm": 0.45493200421333313, "learning_rate": 1.7517289362758837e-05, "loss": 0.5741, "step": 21694 }, { "epoch": 0.46011749485694897, "grad_norm": 0.3084021210670471, "learning_rate": 1.751706942767735e-05, "loss": 0.417, "step": 21695 }, { "epoch": 0.460138703314882, "grad_norm": 0.3801802694797516, "learning_rate": 1.7516849484235443e-05, "loss": 0.4677, "step": 21696 }, { "epoch": 0.460159911772815, "grad_norm": 0.48817914724349976, "learning_rate": 1.7516629532433364e-05, "loss": 0.3834, "step": 21697 }, { "epoch": 0.46018112023074803, "grad_norm": 0.3929706811904907, "learning_rate": 1.751640957227135e-05, "loss": 0.4646, "step": 21698 }, { "epoch": 0.46020232868868105, "grad_norm": 0.4283580780029297, "learning_rate": 1.7516189603749655e-05, "loss": 0.4584, "step": 21699 }, { "epoch": 0.46022353714661407, "grad_norm": 0.3447483777999878, "learning_rate": 1.751596962686852e-05, "loss": 0.5014, "step": 21700 }, { "epoch": 0.4602447456045471, "grad_norm": 0.34407007694244385, "learning_rate": 1.751574964162819e-05, "loss": 0.5578, "step": 21701 }, { "epoch": 0.4602659540624801, "grad_norm": 0.34628596901893616, "learning_rate": 1.7515529648028904e-05, "loss": 0.4353, "step": 21702 }, { "epoch": 0.46028716252041313, "grad_norm": 0.3293222188949585, "learning_rate": 1.7515309646070914e-05, "loss": 0.5103, "step": 21703 }, { "epoch": 0.46030837097834615, "grad_norm": 0.3519948124885559, "learning_rate": 1.7515089635754457e-05, "loss": 0.5084, "step": 21704 }, { "epoch": 0.46032957943627917, "grad_norm": 0.3322818875312805, "learning_rate": 1.7514869617079788e-05, "loss": 0.5314, "step": 21705 }, { "epoch": 0.4603507878942122, "grad_norm": 0.3619442880153656, "learning_rate": 1.7514649590047146e-05, "loss": 0.574, "step": 21706 }, { "epoch": 0.46037199635214526, "grad_norm": 0.3184897005558014, "learning_rate": 1.7514429554656773e-05, "loss": 0.5441, "step": 21707 }, { "epoch": 0.4603932048100783, "grad_norm": 0.392152339220047, "learning_rate": 1.751420951090892e-05, "loss": 0.4988, "step": 21708 }, { "epoch": 0.4604144132680113, "grad_norm": 0.34693995118141174, "learning_rate": 1.7513989458803828e-05, "loss": 0.5516, "step": 21709 }, { "epoch": 0.4604356217259443, "grad_norm": 0.3787817060947418, "learning_rate": 1.751376939834174e-05, "loss": 0.5818, "step": 21710 }, { "epoch": 0.46045683018387734, "grad_norm": 0.3242194354534149, "learning_rate": 1.7513549329522903e-05, "loss": 0.5013, "step": 21711 }, { "epoch": 0.46047803864181036, "grad_norm": 0.35647839307785034, "learning_rate": 1.7513329252347564e-05, "loss": 0.4914, "step": 21712 }, { "epoch": 0.4604992470997434, "grad_norm": 0.3210998773574829, "learning_rate": 1.7513109166815964e-05, "loss": 0.4736, "step": 21713 }, { "epoch": 0.4605204555576764, "grad_norm": 0.35764750838279724, "learning_rate": 1.751288907292835e-05, "loss": 0.5128, "step": 21714 }, { "epoch": 0.4605416640156094, "grad_norm": 0.32678601145744324, "learning_rate": 1.7512668970684967e-05, "loss": 0.5234, "step": 21715 }, { "epoch": 0.46056287247354244, "grad_norm": 0.3682573437690735, "learning_rate": 1.7512448860086057e-05, "loss": 0.456, "step": 21716 }, { "epoch": 0.46058408093147546, "grad_norm": 0.4472397267818451, "learning_rate": 1.751222874113187e-05, "loss": 0.4293, "step": 21717 }, { "epoch": 0.4606052893894085, "grad_norm": 0.3226622939109802, "learning_rate": 1.7512008613822642e-05, "loss": 0.4149, "step": 21718 }, { "epoch": 0.4606264978473415, "grad_norm": 0.3599700629711151, "learning_rate": 1.7511788478158624e-05, "loss": 0.5293, "step": 21719 }, { "epoch": 0.4606477063052745, "grad_norm": 0.35568782687187195, "learning_rate": 1.7511568334140064e-05, "loss": 0.4765, "step": 21720 }, { "epoch": 0.4606689147632076, "grad_norm": 0.3421294093132019, "learning_rate": 1.75113481817672e-05, "loss": 0.5503, "step": 21721 }, { "epoch": 0.4606901232211406, "grad_norm": 0.391841858625412, "learning_rate": 1.751112802104028e-05, "loss": 0.5469, "step": 21722 }, { "epoch": 0.46071133167907363, "grad_norm": 0.33664801716804504, "learning_rate": 1.751090785195955e-05, "loss": 0.5119, "step": 21723 }, { "epoch": 0.46073254013700665, "grad_norm": 0.33020249009132385, "learning_rate": 1.7510687674525254e-05, "loss": 0.4447, "step": 21724 }, { "epoch": 0.46075374859493967, "grad_norm": 0.38085824251174927, "learning_rate": 1.7510467488737632e-05, "loss": 0.4757, "step": 21725 }, { "epoch": 0.4607749570528727, "grad_norm": 0.3170619308948517, "learning_rate": 1.7510247294596937e-05, "loss": 0.4716, "step": 21726 }, { "epoch": 0.4607961655108057, "grad_norm": 0.33339905738830566, "learning_rate": 1.751002709210341e-05, "loss": 0.4982, "step": 21727 }, { "epoch": 0.46081737396873873, "grad_norm": 0.39193642139434814, "learning_rate": 1.7509806881257296e-05, "loss": 0.513, "step": 21728 }, { "epoch": 0.46083858242667175, "grad_norm": 0.30665624141693115, "learning_rate": 1.7509586662058842e-05, "loss": 0.476, "step": 21729 }, { "epoch": 0.46085979088460477, "grad_norm": 0.318848580121994, "learning_rate": 1.7509366434508287e-05, "loss": 0.4863, "step": 21730 }, { "epoch": 0.4608809993425378, "grad_norm": 0.3700562119483948, "learning_rate": 1.7509146198605883e-05, "loss": 0.4862, "step": 21731 }, { "epoch": 0.4609022078004708, "grad_norm": 0.3194465637207031, "learning_rate": 1.750892595435187e-05, "loss": 0.4657, "step": 21732 }, { "epoch": 0.46092341625840383, "grad_norm": 0.41170695424079895, "learning_rate": 1.7508705701746493e-05, "loss": 0.4922, "step": 21733 }, { "epoch": 0.46094462471633685, "grad_norm": 0.35967326164245605, "learning_rate": 1.7508485440790002e-05, "loss": 0.5005, "step": 21734 }, { "epoch": 0.4609658331742699, "grad_norm": 0.40415114164352417, "learning_rate": 1.7508265171482637e-05, "loss": 0.5138, "step": 21735 }, { "epoch": 0.46098704163220294, "grad_norm": 0.3748798370361328, "learning_rate": 1.7508044893824646e-05, "loss": 0.4683, "step": 21736 }, { "epoch": 0.46100825009013596, "grad_norm": 0.549963653087616, "learning_rate": 1.750782460781627e-05, "loss": 0.5415, "step": 21737 }, { "epoch": 0.461029458548069, "grad_norm": 0.30564987659454346, "learning_rate": 1.7507604313457762e-05, "loss": 0.4531, "step": 21738 }, { "epoch": 0.461050667006002, "grad_norm": 0.3964574337005615, "learning_rate": 1.7507384010749357e-05, "loss": 0.4631, "step": 21739 }, { "epoch": 0.461071875463935, "grad_norm": 0.3416667580604553, "learning_rate": 1.7507163699691308e-05, "loss": 0.4764, "step": 21740 }, { "epoch": 0.46109308392186804, "grad_norm": 0.3806948661804199, "learning_rate": 1.7506943380283855e-05, "loss": 0.4818, "step": 21741 }, { "epoch": 0.46111429237980106, "grad_norm": 0.3911910057067871, "learning_rate": 1.7506723052527243e-05, "loss": 0.568, "step": 21742 }, { "epoch": 0.4611355008377341, "grad_norm": 0.3854728043079376, "learning_rate": 1.7506502716421722e-05, "loss": 0.5508, "step": 21743 }, { "epoch": 0.4611567092956671, "grad_norm": 0.35584014654159546, "learning_rate": 1.7506282371967532e-05, "loss": 0.5517, "step": 21744 }, { "epoch": 0.4611779177536001, "grad_norm": 0.5602601766586304, "learning_rate": 1.750606201916492e-05, "loss": 0.4419, "step": 21745 }, { "epoch": 0.46119912621153314, "grad_norm": 0.37451833486557007, "learning_rate": 1.7505841658014133e-05, "loss": 0.5175, "step": 21746 }, { "epoch": 0.46122033466946616, "grad_norm": 0.3369627594947815, "learning_rate": 1.7505621288515412e-05, "loss": 0.4514, "step": 21747 }, { "epoch": 0.46124154312739923, "grad_norm": 0.3304598331451416, "learning_rate": 1.7505400910669005e-05, "loss": 0.5166, "step": 21748 }, { "epoch": 0.46126275158533225, "grad_norm": 0.3443736732006073, "learning_rate": 1.750518052447516e-05, "loss": 0.4961, "step": 21749 }, { "epoch": 0.4612839600432653, "grad_norm": 0.38915467262268066, "learning_rate": 1.7504960129934112e-05, "loss": 0.5229, "step": 21750 }, { "epoch": 0.4613051685011983, "grad_norm": 0.35191288590431213, "learning_rate": 1.7504739727046118e-05, "loss": 0.4927, "step": 21751 }, { "epoch": 0.4613263769591313, "grad_norm": 0.34985798597335815, "learning_rate": 1.7504519315811413e-05, "loss": 0.5534, "step": 21752 }, { "epoch": 0.46134758541706433, "grad_norm": 0.3719913363456726, "learning_rate": 1.7504298896230246e-05, "loss": 0.5416, "step": 21753 }, { "epoch": 0.46136879387499735, "grad_norm": 0.3223336935043335, "learning_rate": 1.7504078468302867e-05, "loss": 0.5231, "step": 21754 }, { "epoch": 0.46139000233293037, "grad_norm": 0.38377615809440613, "learning_rate": 1.7503858032029516e-05, "loss": 0.5007, "step": 21755 }, { "epoch": 0.4614112107908634, "grad_norm": 0.40687304735183716, "learning_rate": 1.7503637587410438e-05, "loss": 0.5426, "step": 21756 }, { "epoch": 0.4614324192487964, "grad_norm": 0.3488742411136627, "learning_rate": 1.7503417134445883e-05, "loss": 0.5434, "step": 21757 }, { "epoch": 0.46145362770672943, "grad_norm": 0.43552377820014954, "learning_rate": 1.7503196673136088e-05, "loss": 0.3995, "step": 21758 }, { "epoch": 0.46147483616466245, "grad_norm": 0.3817070424556732, "learning_rate": 1.7502976203481305e-05, "loss": 0.6134, "step": 21759 }, { "epoch": 0.46149604462259547, "grad_norm": 0.3528274893760681, "learning_rate": 1.7502755725481776e-05, "loss": 0.5771, "step": 21760 }, { "epoch": 0.4615172530805285, "grad_norm": 0.37951815128326416, "learning_rate": 1.750253523913775e-05, "loss": 0.4552, "step": 21761 }, { "epoch": 0.46153846153846156, "grad_norm": 0.38746917247772217, "learning_rate": 1.7502314744449466e-05, "loss": 0.5307, "step": 21762 }, { "epoch": 0.4615596699963946, "grad_norm": 0.36349380016326904, "learning_rate": 1.7502094241417177e-05, "loss": 0.5621, "step": 21763 }, { "epoch": 0.4615808784543276, "grad_norm": 0.3395627737045288, "learning_rate": 1.750187373004112e-05, "loss": 0.511, "step": 21764 }, { "epoch": 0.4616020869122606, "grad_norm": 0.9833025932312012, "learning_rate": 1.7501653210321544e-05, "loss": 0.4972, "step": 21765 }, { "epoch": 0.46162329537019364, "grad_norm": 0.34958893060684204, "learning_rate": 1.7501432682258695e-05, "loss": 0.5318, "step": 21766 }, { "epoch": 0.46164450382812666, "grad_norm": 0.36066755652427673, "learning_rate": 1.7501212145852823e-05, "loss": 0.4901, "step": 21767 }, { "epoch": 0.4616657122860597, "grad_norm": 0.3581380546092987, "learning_rate": 1.7500991601104164e-05, "loss": 0.6094, "step": 21768 }, { "epoch": 0.4616869207439927, "grad_norm": 0.3562316298484802, "learning_rate": 1.7500771048012966e-05, "loss": 0.5506, "step": 21769 }, { "epoch": 0.4617081292019257, "grad_norm": 0.5453090667724609, "learning_rate": 1.750055048657948e-05, "loss": 0.5008, "step": 21770 }, { "epoch": 0.46172933765985874, "grad_norm": 0.33203214406967163, "learning_rate": 1.750032991680394e-05, "loss": 0.4551, "step": 21771 }, { "epoch": 0.46175054611779176, "grad_norm": 0.3411742150783539, "learning_rate": 1.75001093386866e-05, "loss": 0.5186, "step": 21772 }, { "epoch": 0.4617717545757248, "grad_norm": 0.3326316475868225, "learning_rate": 1.7499888752227706e-05, "loss": 0.4956, "step": 21773 }, { "epoch": 0.4617929630336578, "grad_norm": 0.47081881761550903, "learning_rate": 1.74996681574275e-05, "loss": 0.549, "step": 21774 }, { "epoch": 0.4618141714915908, "grad_norm": 0.4061647951602936, "learning_rate": 1.749944755428623e-05, "loss": 0.5336, "step": 21775 }, { "epoch": 0.4618353799495239, "grad_norm": 0.42078298330307007, "learning_rate": 1.7499226942804138e-05, "loss": 0.5335, "step": 21776 }, { "epoch": 0.4618565884074569, "grad_norm": 0.3288447856903076, "learning_rate": 1.7499006322981472e-05, "loss": 0.4582, "step": 21777 }, { "epoch": 0.46187779686538993, "grad_norm": 0.30808940529823303, "learning_rate": 1.7498785694818475e-05, "loss": 0.4942, "step": 21778 }, { "epoch": 0.46189900532332295, "grad_norm": 0.3418642282485962, "learning_rate": 1.7498565058315392e-05, "loss": 0.5487, "step": 21779 }, { "epoch": 0.461920213781256, "grad_norm": 0.3442627489566803, "learning_rate": 1.749834441347247e-05, "loss": 0.4049, "step": 21780 }, { "epoch": 0.461941422239189, "grad_norm": 0.35246673226356506, "learning_rate": 1.749812376028996e-05, "loss": 0.5162, "step": 21781 }, { "epoch": 0.461962630697122, "grad_norm": 0.3630512058734894, "learning_rate": 1.74979030987681e-05, "loss": 0.4817, "step": 21782 }, { "epoch": 0.46198383915505503, "grad_norm": 0.37801802158355713, "learning_rate": 1.7497682428907135e-05, "loss": 0.4957, "step": 21783 }, { "epoch": 0.46200504761298805, "grad_norm": 0.40876051783561707, "learning_rate": 1.7497461750707313e-05, "loss": 0.5593, "step": 21784 }, { "epoch": 0.46202625607092107, "grad_norm": 0.35685989260673523, "learning_rate": 1.749724106416888e-05, "loss": 0.4455, "step": 21785 }, { "epoch": 0.4620474645288541, "grad_norm": 0.3285757899284363, "learning_rate": 1.749702036929208e-05, "loss": 0.5038, "step": 21786 }, { "epoch": 0.4620686729867871, "grad_norm": 0.36867043375968933, "learning_rate": 1.7496799666077157e-05, "loss": 0.4607, "step": 21787 }, { "epoch": 0.46208988144472013, "grad_norm": 0.3295014202594757, "learning_rate": 1.7496578954524363e-05, "loss": 0.4542, "step": 21788 }, { "epoch": 0.4621110899026532, "grad_norm": 0.3426167964935303, "learning_rate": 1.7496358234633936e-05, "loss": 0.4938, "step": 21789 }, { "epoch": 0.4621322983605862, "grad_norm": 0.34660762548446655, "learning_rate": 1.7496137506406126e-05, "loss": 0.5064, "step": 21790 }, { "epoch": 0.46215350681851924, "grad_norm": 0.3213768005371094, "learning_rate": 1.7495916769841178e-05, "loss": 0.4925, "step": 21791 }, { "epoch": 0.46217471527645226, "grad_norm": 0.42305704951286316, "learning_rate": 1.749569602493933e-05, "loss": 0.6187, "step": 21792 }, { "epoch": 0.4621959237343853, "grad_norm": 0.35196951031684875, "learning_rate": 1.7495475271700845e-05, "loss": 0.5369, "step": 21793 }, { "epoch": 0.4622171321923183, "grad_norm": 0.3580029606819153, "learning_rate": 1.7495254510125947e-05, "loss": 0.5396, "step": 21794 }, { "epoch": 0.4622383406502513, "grad_norm": 0.3821612298488617, "learning_rate": 1.7495033740214898e-05, "loss": 0.6444, "step": 21795 }, { "epoch": 0.46225954910818434, "grad_norm": 0.3617425560951233, "learning_rate": 1.749481296196794e-05, "loss": 0.4928, "step": 21796 }, { "epoch": 0.46228075756611736, "grad_norm": 0.4162399172782898, "learning_rate": 1.749459217538531e-05, "loss": 0.4893, "step": 21797 }, { "epoch": 0.4623019660240504, "grad_norm": 0.3378123342990875, "learning_rate": 1.749437138046726e-05, "loss": 0.5754, "step": 21798 }, { "epoch": 0.4623231744819834, "grad_norm": 0.37750574946403503, "learning_rate": 1.7494150577214037e-05, "loss": 0.4694, "step": 21799 }, { "epoch": 0.4623443829399164, "grad_norm": 0.34492799639701843, "learning_rate": 1.7493929765625886e-05, "loss": 0.5, "step": 21800 }, { "epoch": 0.46236559139784944, "grad_norm": 0.37024205923080444, "learning_rate": 1.749370894570305e-05, "loss": 0.5882, "step": 21801 }, { "epoch": 0.46238679985578246, "grad_norm": 0.3952363431453705, "learning_rate": 1.749348811744578e-05, "loss": 0.4557, "step": 21802 }, { "epoch": 0.46240800831371554, "grad_norm": 0.42357537150382996, "learning_rate": 1.7493267280854313e-05, "loss": 0.5464, "step": 21803 }, { "epoch": 0.46242921677164855, "grad_norm": 0.3676343858242035, "learning_rate": 1.7493046435928897e-05, "loss": 0.5341, "step": 21804 }, { "epoch": 0.4624504252295816, "grad_norm": 0.3257605731487274, "learning_rate": 1.7492825582669785e-05, "loss": 0.4719, "step": 21805 }, { "epoch": 0.4624716336875146, "grad_norm": 0.3926791846752167, "learning_rate": 1.7492604721077217e-05, "loss": 0.6044, "step": 21806 }, { "epoch": 0.4624928421454476, "grad_norm": 0.32612887024879456, "learning_rate": 1.7492383851151434e-05, "loss": 0.5161, "step": 21807 }, { "epoch": 0.46251405060338063, "grad_norm": 0.38505855202674866, "learning_rate": 1.749216297289269e-05, "loss": 0.5869, "step": 21808 }, { "epoch": 0.46253525906131365, "grad_norm": 0.3368777334690094, "learning_rate": 1.749194208630123e-05, "loss": 0.4515, "step": 21809 }, { "epoch": 0.4625564675192467, "grad_norm": 0.3588659167289734, "learning_rate": 1.7491721191377294e-05, "loss": 0.4851, "step": 21810 }, { "epoch": 0.4625776759771797, "grad_norm": 0.37638017535209656, "learning_rate": 1.749150028812113e-05, "loss": 0.423, "step": 21811 }, { "epoch": 0.4625988844351127, "grad_norm": 0.3650727868080139, "learning_rate": 1.7491279376532988e-05, "loss": 0.5386, "step": 21812 }, { "epoch": 0.46262009289304573, "grad_norm": 0.3855215907096863, "learning_rate": 1.7491058456613108e-05, "loss": 0.5174, "step": 21813 }, { "epoch": 0.46264130135097875, "grad_norm": 0.32894426584243774, "learning_rate": 1.7490837528361737e-05, "loss": 0.5419, "step": 21814 }, { "epoch": 0.46266250980891177, "grad_norm": 0.37756770849227905, "learning_rate": 1.7490616591779123e-05, "loss": 0.5157, "step": 21815 }, { "epoch": 0.4626837182668448, "grad_norm": 0.36421775817871094, "learning_rate": 1.7490395646865513e-05, "loss": 0.3899, "step": 21816 }, { "epoch": 0.46270492672477787, "grad_norm": 0.31481292843818665, "learning_rate": 1.7490174693621145e-05, "loss": 0.4564, "step": 21817 }, { "epoch": 0.4627261351827109, "grad_norm": 0.36747071146965027, "learning_rate": 1.748995373204627e-05, "loss": 0.4567, "step": 21818 }, { "epoch": 0.4627473436406439, "grad_norm": 0.33876463770866394, "learning_rate": 1.7489732762141137e-05, "loss": 0.5669, "step": 21819 }, { "epoch": 0.4627685520985769, "grad_norm": 0.4139336943626404, "learning_rate": 1.7489511783905987e-05, "loss": 0.5255, "step": 21820 }, { "epoch": 0.46278976055650994, "grad_norm": 0.3576415479183197, "learning_rate": 1.7489290797341066e-05, "loss": 0.5096, "step": 21821 }, { "epoch": 0.46281096901444296, "grad_norm": 0.37056225538253784, "learning_rate": 1.7489069802446624e-05, "loss": 0.4465, "step": 21822 }, { "epoch": 0.462832177472376, "grad_norm": 0.37703219056129456, "learning_rate": 1.74888487992229e-05, "loss": 0.5637, "step": 21823 }, { "epoch": 0.462853385930309, "grad_norm": 0.3659146726131439, "learning_rate": 1.7488627787670145e-05, "loss": 0.5162, "step": 21824 }, { "epoch": 0.462874594388242, "grad_norm": 0.32999876141548157, "learning_rate": 1.74884067677886e-05, "loss": 0.473, "step": 21825 }, { "epoch": 0.46289580284617504, "grad_norm": 0.3802639842033386, "learning_rate": 1.748818573957852e-05, "loss": 0.5262, "step": 21826 }, { "epoch": 0.46291701130410806, "grad_norm": 0.3383849561214447, "learning_rate": 1.7487964703040142e-05, "loss": 0.4336, "step": 21827 }, { "epoch": 0.4629382197620411, "grad_norm": 0.3648924231529236, "learning_rate": 1.7487743658173714e-05, "loss": 0.464, "step": 21828 }, { "epoch": 0.4629594282199741, "grad_norm": 0.4321213662624359, "learning_rate": 1.7487522604979484e-05, "loss": 0.4942, "step": 21829 }, { "epoch": 0.4629806366779072, "grad_norm": 0.34017473459243774, "learning_rate": 1.7487301543457695e-05, "loss": 0.4722, "step": 21830 }, { "epoch": 0.4630018451358402, "grad_norm": 0.37055888772010803, "learning_rate": 1.7487080473608592e-05, "loss": 0.437, "step": 21831 }, { "epoch": 0.4630230535937732, "grad_norm": 0.3798525929450989, "learning_rate": 1.7486859395432427e-05, "loss": 0.5521, "step": 21832 }, { "epoch": 0.46304426205170623, "grad_norm": 0.33308669924736023, "learning_rate": 1.748663830892944e-05, "loss": 0.5002, "step": 21833 }, { "epoch": 0.46306547050963925, "grad_norm": 0.3823574483394623, "learning_rate": 1.748641721409988e-05, "loss": 0.4705, "step": 21834 }, { "epoch": 0.4630866789675723, "grad_norm": 0.36680665612220764, "learning_rate": 1.748619611094399e-05, "loss": 0.5389, "step": 21835 }, { "epoch": 0.4631078874255053, "grad_norm": 0.3216269314289093, "learning_rate": 1.7485974999462022e-05, "loss": 0.519, "step": 21836 }, { "epoch": 0.4631290958834383, "grad_norm": 0.36092013120651245, "learning_rate": 1.7485753879654215e-05, "loss": 0.4756, "step": 21837 }, { "epoch": 0.46315030434137133, "grad_norm": 0.3531181216239929, "learning_rate": 1.748553275152082e-05, "loss": 0.5548, "step": 21838 }, { "epoch": 0.46317151279930435, "grad_norm": 0.3607366681098938, "learning_rate": 1.7485311615062075e-05, "loss": 0.4881, "step": 21839 }, { "epoch": 0.46319272125723737, "grad_norm": 0.32514142990112305, "learning_rate": 1.7485090470278234e-05, "loss": 0.4167, "step": 21840 }, { "epoch": 0.4632139297151704, "grad_norm": 0.3299933075904846, "learning_rate": 1.748486931716954e-05, "loss": 0.5002, "step": 21841 }, { "epoch": 0.4632351381731034, "grad_norm": 0.3272053897380829, "learning_rate": 1.748464815573624e-05, "loss": 0.5016, "step": 21842 }, { "epoch": 0.46325634663103643, "grad_norm": 0.34473663568496704, "learning_rate": 1.748442698597858e-05, "loss": 0.5145, "step": 21843 }, { "epoch": 0.4632775550889695, "grad_norm": 0.32985377311706543, "learning_rate": 1.7484205807896804e-05, "loss": 0.5187, "step": 21844 }, { "epoch": 0.4632987635469025, "grad_norm": 0.37066102027893066, "learning_rate": 1.748398462149116e-05, "loss": 0.5106, "step": 21845 }, { "epoch": 0.46331997200483555, "grad_norm": 0.5036678910255432, "learning_rate": 1.7483763426761896e-05, "loss": 0.5009, "step": 21846 }, { "epoch": 0.46334118046276856, "grad_norm": 0.3570568561553955, "learning_rate": 1.7483542223709253e-05, "loss": 0.5291, "step": 21847 }, { "epoch": 0.4633623889207016, "grad_norm": 0.3321697413921356, "learning_rate": 1.7483321012333477e-05, "loss": 0.4964, "step": 21848 }, { "epoch": 0.4633835973786346, "grad_norm": 0.33327600359916687, "learning_rate": 1.748309979263482e-05, "loss": 0.4195, "step": 21849 }, { "epoch": 0.4634048058365676, "grad_norm": 0.34953224658966064, "learning_rate": 1.7482878564613523e-05, "loss": 0.4912, "step": 21850 }, { "epoch": 0.46342601429450064, "grad_norm": 0.3500326871871948, "learning_rate": 1.748265732826983e-05, "loss": 0.5497, "step": 21851 }, { "epoch": 0.46344722275243366, "grad_norm": 0.3403095602989197, "learning_rate": 1.7482436083603994e-05, "loss": 0.4887, "step": 21852 }, { "epoch": 0.4634684312103667, "grad_norm": 0.34234318137168884, "learning_rate": 1.748221483061626e-05, "loss": 0.5385, "step": 21853 }, { "epoch": 0.4634896396682997, "grad_norm": 0.34719038009643555, "learning_rate": 1.748199356930687e-05, "loss": 0.5039, "step": 21854 }, { "epoch": 0.4635108481262327, "grad_norm": 0.39296093583106995, "learning_rate": 1.7481772299676068e-05, "loss": 0.4545, "step": 21855 }, { "epoch": 0.46353205658416574, "grad_norm": 0.3480941355228424, "learning_rate": 1.748155102172411e-05, "loss": 0.4926, "step": 21856 }, { "epoch": 0.4635532650420988, "grad_norm": 0.33987957239151, "learning_rate": 1.7481329735451235e-05, "loss": 0.5198, "step": 21857 }, { "epoch": 0.46357447350003184, "grad_norm": 0.42304620146751404, "learning_rate": 1.7481108440857687e-05, "loss": 0.4945, "step": 21858 }, { "epoch": 0.46359568195796486, "grad_norm": 0.34995603561401367, "learning_rate": 1.7480887137943715e-05, "loss": 0.5047, "step": 21859 }, { "epoch": 0.4636168904158979, "grad_norm": 0.3257286250591278, "learning_rate": 1.7480665826709565e-05, "loss": 0.3921, "step": 21860 }, { "epoch": 0.4636380988738309, "grad_norm": 0.4064352512359619, "learning_rate": 1.7480444507155485e-05, "loss": 0.452, "step": 21861 }, { "epoch": 0.4636593073317639, "grad_norm": 0.4172170162200928, "learning_rate": 1.748022317928172e-05, "loss": 0.4973, "step": 21862 }, { "epoch": 0.46368051578969693, "grad_norm": 0.3893968462944031, "learning_rate": 1.7480001843088513e-05, "loss": 0.532, "step": 21863 }, { "epoch": 0.46370172424762995, "grad_norm": 0.3190310597419739, "learning_rate": 1.747978049857612e-05, "loss": 0.4244, "step": 21864 }, { "epoch": 0.463722932705563, "grad_norm": 0.5196954607963562, "learning_rate": 1.747955914574477e-05, "loss": 0.5314, "step": 21865 }, { "epoch": 0.463744141163496, "grad_norm": 0.35648611187934875, "learning_rate": 1.747933778459473e-05, "loss": 0.4422, "step": 21866 }, { "epoch": 0.463765349621429, "grad_norm": 0.3392091393470764, "learning_rate": 1.7479116415126227e-05, "loss": 0.5095, "step": 21867 }, { "epoch": 0.46378655807936203, "grad_norm": 0.3803965747356415, "learning_rate": 1.747889503733952e-05, "loss": 0.5349, "step": 21868 }, { "epoch": 0.46380776653729505, "grad_norm": 0.38603565096855164, "learning_rate": 1.7478673651234846e-05, "loss": 0.4822, "step": 21869 }, { "epoch": 0.46382897499522807, "grad_norm": 0.3945642411708832, "learning_rate": 1.7478452256812462e-05, "loss": 0.5793, "step": 21870 }, { "epoch": 0.46385018345316115, "grad_norm": 0.35177552700042725, "learning_rate": 1.7478230854072604e-05, "loss": 0.5378, "step": 21871 }, { "epoch": 0.46387139191109417, "grad_norm": 0.41876164078712463, "learning_rate": 1.7478009443015523e-05, "loss": 0.4875, "step": 21872 }, { "epoch": 0.4638926003690272, "grad_norm": 0.3615201413631439, "learning_rate": 1.747778802364147e-05, "loss": 0.587, "step": 21873 }, { "epoch": 0.4639138088269602, "grad_norm": 0.3680247664451599, "learning_rate": 1.747756659595068e-05, "loss": 0.5082, "step": 21874 }, { "epoch": 0.4639350172848932, "grad_norm": 0.3444198668003082, "learning_rate": 1.747734515994341e-05, "loss": 0.4636, "step": 21875 }, { "epoch": 0.46395622574282624, "grad_norm": 0.4110560715198517, "learning_rate": 1.74771237156199e-05, "loss": 0.5537, "step": 21876 }, { "epoch": 0.46397743420075926, "grad_norm": 0.32940560579299927, "learning_rate": 1.7476902262980397e-05, "loss": 0.4986, "step": 21877 }, { "epoch": 0.4639986426586923, "grad_norm": 0.34549301862716675, "learning_rate": 1.747668080202515e-05, "loss": 0.5242, "step": 21878 }, { "epoch": 0.4640198511166253, "grad_norm": 0.3103763163089752, "learning_rate": 1.7476459332754402e-05, "loss": 0.4622, "step": 21879 }, { "epoch": 0.4640410595745583, "grad_norm": 0.35618048906326294, "learning_rate": 1.7476237855168402e-05, "loss": 0.479, "step": 21880 }, { "epoch": 0.46406226803249134, "grad_norm": 0.32971224188804626, "learning_rate": 1.7476016369267395e-05, "loss": 0.5458, "step": 21881 }, { "epoch": 0.46408347649042436, "grad_norm": 0.31152814626693726, "learning_rate": 1.7475794875051625e-05, "loss": 0.473, "step": 21882 }, { "epoch": 0.4641046849483574, "grad_norm": 0.38874801993370056, "learning_rate": 1.7475573372521345e-05, "loss": 0.5026, "step": 21883 }, { "epoch": 0.4641258934062904, "grad_norm": 0.36043158173561096, "learning_rate": 1.7475351861676793e-05, "loss": 0.5259, "step": 21884 }, { "epoch": 0.4641471018642235, "grad_norm": 0.4060858190059662, "learning_rate": 1.7475130342518223e-05, "loss": 0.4785, "step": 21885 }, { "epoch": 0.4641683103221565, "grad_norm": 0.44345617294311523, "learning_rate": 1.7474908815045876e-05, "loss": 0.4562, "step": 21886 }, { "epoch": 0.4641895187800895, "grad_norm": 0.33957338333129883, "learning_rate": 1.7474687279260002e-05, "loss": 0.4445, "step": 21887 }, { "epoch": 0.46421072723802254, "grad_norm": 0.32419899106025696, "learning_rate": 1.7474465735160845e-05, "loss": 0.457, "step": 21888 }, { "epoch": 0.46423193569595556, "grad_norm": 0.3376460671424866, "learning_rate": 1.7474244182748652e-05, "loss": 0.585, "step": 21889 }, { "epoch": 0.4642531441538886, "grad_norm": 0.3553314507007599, "learning_rate": 1.747402262202367e-05, "loss": 0.4824, "step": 21890 }, { "epoch": 0.4642743526118216, "grad_norm": 0.3725893199443817, "learning_rate": 1.7473801052986144e-05, "loss": 0.5189, "step": 21891 }, { "epoch": 0.4642955610697546, "grad_norm": 0.3769773840904236, "learning_rate": 1.7473579475636323e-05, "loss": 0.5055, "step": 21892 }, { "epoch": 0.46431676952768763, "grad_norm": 0.32559987902641296, "learning_rate": 1.7473357889974448e-05, "loss": 0.3889, "step": 21893 }, { "epoch": 0.46433797798562065, "grad_norm": 0.38068950176239014, "learning_rate": 1.747313629600077e-05, "loss": 0.5242, "step": 21894 }, { "epoch": 0.4643591864435537, "grad_norm": 0.3454132676124573, "learning_rate": 1.747291469371554e-05, "loss": 0.5126, "step": 21895 }, { "epoch": 0.4643803949014867, "grad_norm": 0.3776697516441345, "learning_rate": 1.7472693083118995e-05, "loss": 0.6087, "step": 21896 }, { "epoch": 0.4644016033594197, "grad_norm": 0.36817044019699097, "learning_rate": 1.7472471464211385e-05, "loss": 0.4855, "step": 21897 }, { "epoch": 0.4644228118173528, "grad_norm": 0.35288649797439575, "learning_rate": 1.747224983699296e-05, "loss": 0.4145, "step": 21898 }, { "epoch": 0.4644440202752858, "grad_norm": 0.329751193523407, "learning_rate": 1.7472028201463966e-05, "loss": 0.436, "step": 21899 }, { "epoch": 0.4644652287332188, "grad_norm": 0.3445194363594055, "learning_rate": 1.747180655762464e-05, "loss": 0.4747, "step": 21900 }, { "epoch": 0.46448643719115185, "grad_norm": 0.4051571190357208, "learning_rate": 1.747158490547524e-05, "loss": 0.5124, "step": 21901 }, { "epoch": 0.46450764564908487, "grad_norm": 0.4361283779144287, "learning_rate": 1.7471363245016005e-05, "loss": 0.6847, "step": 21902 }, { "epoch": 0.4645288541070179, "grad_norm": 0.5246530771255493, "learning_rate": 1.747114157624719e-05, "loss": 0.471, "step": 21903 }, { "epoch": 0.4645500625649509, "grad_norm": 0.36154672503471375, "learning_rate": 1.7470919899169032e-05, "loss": 0.4837, "step": 21904 }, { "epoch": 0.4645712710228839, "grad_norm": 0.3832781910896301, "learning_rate": 1.747069821378178e-05, "loss": 0.6073, "step": 21905 }, { "epoch": 0.46459247948081694, "grad_norm": 0.33983156085014343, "learning_rate": 1.747047652008569e-05, "loss": 0.4691, "step": 21906 }, { "epoch": 0.46461368793874996, "grad_norm": 0.3386516273021698, "learning_rate": 1.7470254818080997e-05, "loss": 0.5411, "step": 21907 }, { "epoch": 0.464634896396683, "grad_norm": 0.35804620385169983, "learning_rate": 1.747003310776795e-05, "loss": 0.5034, "step": 21908 }, { "epoch": 0.464656104854616, "grad_norm": 0.31306740641593933, "learning_rate": 1.7469811389146797e-05, "loss": 0.5042, "step": 21909 }, { "epoch": 0.464677313312549, "grad_norm": 0.35135143995285034, "learning_rate": 1.7469589662217784e-05, "loss": 0.5101, "step": 21910 }, { "epoch": 0.46469852177048204, "grad_norm": 0.4171772301197052, "learning_rate": 1.746936792698116e-05, "loss": 0.5444, "step": 21911 }, { "epoch": 0.4647197302284151, "grad_norm": 0.40577617287635803, "learning_rate": 1.7469146183437173e-05, "loss": 0.5039, "step": 21912 }, { "epoch": 0.46474093868634814, "grad_norm": 0.333771675825119, "learning_rate": 1.7468924431586062e-05, "loss": 0.5827, "step": 21913 }, { "epoch": 0.46476214714428116, "grad_norm": 0.8370434641838074, "learning_rate": 1.746870267142808e-05, "loss": 0.547, "step": 21914 }, { "epoch": 0.4647833556022142, "grad_norm": 0.3501393795013428, "learning_rate": 1.746848090296347e-05, "loss": 0.4923, "step": 21915 }, { "epoch": 0.4648045640601472, "grad_norm": 0.3897615969181061, "learning_rate": 1.746825912619248e-05, "loss": 0.5773, "step": 21916 }, { "epoch": 0.4648257725180802, "grad_norm": 0.3448287844657898, "learning_rate": 1.746803734111536e-05, "loss": 0.4782, "step": 21917 }, { "epoch": 0.46484698097601324, "grad_norm": 0.30514633655548096, "learning_rate": 1.7467815547732354e-05, "loss": 0.5154, "step": 21918 }, { "epoch": 0.46486818943394626, "grad_norm": 0.32445213198661804, "learning_rate": 1.7467593746043705e-05, "loss": 0.4351, "step": 21919 }, { "epoch": 0.4648893978918793, "grad_norm": 0.3129272162914276, "learning_rate": 1.7467371936049665e-05, "loss": 0.4645, "step": 21920 }, { "epoch": 0.4649106063498123, "grad_norm": 0.34737518429756165, "learning_rate": 1.746715011775048e-05, "loss": 0.4855, "step": 21921 }, { "epoch": 0.4649318148077453, "grad_norm": 0.33192184567451477, "learning_rate": 1.7466928291146393e-05, "loss": 0.515, "step": 21922 }, { "epoch": 0.46495302326567833, "grad_norm": 0.3848942220211029, "learning_rate": 1.7466706456237654e-05, "loss": 0.4473, "step": 21923 }, { "epoch": 0.46497423172361135, "grad_norm": 0.44327765703201294, "learning_rate": 1.746648461302451e-05, "loss": 0.571, "step": 21924 }, { "epoch": 0.4649954401815444, "grad_norm": 0.3258678615093231, "learning_rate": 1.7466262761507206e-05, "loss": 0.5434, "step": 21925 }, { "epoch": 0.46501664863947745, "grad_norm": 0.42575690150260925, "learning_rate": 1.746604090168599e-05, "loss": 0.553, "step": 21926 }, { "epoch": 0.46503785709741047, "grad_norm": 0.5406203269958496, "learning_rate": 1.746581903356111e-05, "loss": 0.5429, "step": 21927 }, { "epoch": 0.4650590655553435, "grad_norm": 0.4074499011039734, "learning_rate": 1.7465597157132807e-05, "loss": 0.5625, "step": 21928 }, { "epoch": 0.4650802740132765, "grad_norm": 0.41603270173072815, "learning_rate": 1.7465375272401332e-05, "loss": 0.5383, "step": 21929 }, { "epoch": 0.4651014824712095, "grad_norm": 0.5035991668701172, "learning_rate": 1.7465153379366933e-05, "loss": 0.4401, "step": 21930 }, { "epoch": 0.46512269092914255, "grad_norm": 0.5650279521942139, "learning_rate": 1.7464931478029855e-05, "loss": 0.5181, "step": 21931 }, { "epoch": 0.46514389938707557, "grad_norm": 0.37898334860801697, "learning_rate": 1.7464709568390343e-05, "loss": 0.5746, "step": 21932 }, { "epoch": 0.4651651078450086, "grad_norm": 0.35600367188453674, "learning_rate": 1.7464487650448648e-05, "loss": 0.5421, "step": 21933 }, { "epoch": 0.4651863163029416, "grad_norm": 0.3358372151851654, "learning_rate": 1.7464265724205017e-05, "loss": 0.5382, "step": 21934 }, { "epoch": 0.4652075247608746, "grad_norm": 0.40361177921295166, "learning_rate": 1.746404378965969e-05, "loss": 0.4534, "step": 21935 }, { "epoch": 0.46522873321880764, "grad_norm": 0.4072290062904358, "learning_rate": 1.7463821846812923e-05, "loss": 0.5887, "step": 21936 }, { "epoch": 0.46524994167674066, "grad_norm": 0.3722112476825714, "learning_rate": 1.7463599895664955e-05, "loss": 0.4871, "step": 21937 }, { "epoch": 0.4652711501346737, "grad_norm": 0.3699018359184265, "learning_rate": 1.7463377936216037e-05, "loss": 0.5394, "step": 21938 }, { "epoch": 0.46529235859260676, "grad_norm": 0.40206798911094666, "learning_rate": 1.7463155968466416e-05, "loss": 0.5002, "step": 21939 }, { "epoch": 0.4653135670505398, "grad_norm": 0.5183854103088379, "learning_rate": 1.7462933992416333e-05, "loss": 0.4809, "step": 21940 }, { "epoch": 0.4653347755084728, "grad_norm": 0.31070607900619507, "learning_rate": 1.7462712008066045e-05, "loss": 0.4278, "step": 21941 }, { "epoch": 0.4653559839664058, "grad_norm": 0.36206528544425964, "learning_rate": 1.7462490015415793e-05, "loss": 0.5121, "step": 21942 }, { "epoch": 0.46537719242433884, "grad_norm": 0.33724626898765564, "learning_rate": 1.746226801446582e-05, "loss": 0.5376, "step": 21943 }, { "epoch": 0.46539840088227186, "grad_norm": 0.3891237676143646, "learning_rate": 1.746204600521638e-05, "loss": 0.5821, "step": 21944 }, { "epoch": 0.4654196093402049, "grad_norm": 0.398873507976532, "learning_rate": 1.7461823987667717e-05, "loss": 0.497, "step": 21945 }, { "epoch": 0.4654408177981379, "grad_norm": 0.317954421043396, "learning_rate": 1.746160196182008e-05, "loss": 0.5319, "step": 21946 }, { "epoch": 0.4654620262560709, "grad_norm": 0.32613953948020935, "learning_rate": 1.746137992767371e-05, "loss": 0.5287, "step": 21947 }, { "epoch": 0.46548323471400394, "grad_norm": 0.36108681559562683, "learning_rate": 1.7461157885228862e-05, "loss": 0.5429, "step": 21948 }, { "epoch": 0.46550444317193695, "grad_norm": 0.3176896870136261, "learning_rate": 1.746093583448578e-05, "loss": 0.5541, "step": 21949 }, { "epoch": 0.46552565162987, "grad_norm": 0.35272088646888733, "learning_rate": 1.7460713775444707e-05, "loss": 0.5114, "step": 21950 }, { "epoch": 0.465546860087803, "grad_norm": 0.4081743657588959, "learning_rate": 1.7460491708105892e-05, "loss": 0.4793, "step": 21951 }, { "epoch": 0.465568068545736, "grad_norm": 0.3355441689491272, "learning_rate": 1.7460269632469586e-05, "loss": 0.5314, "step": 21952 }, { "epoch": 0.4655892770036691, "grad_norm": 0.3697299063205719, "learning_rate": 1.7460047548536034e-05, "loss": 0.5016, "step": 21953 }, { "epoch": 0.4656104854616021, "grad_norm": 0.36795154213905334, "learning_rate": 1.745982545630548e-05, "loss": 0.5273, "step": 21954 }, { "epoch": 0.46563169391953513, "grad_norm": 0.3269747197628021, "learning_rate": 1.745960335577817e-05, "loss": 0.4872, "step": 21955 }, { "epoch": 0.46565290237746815, "grad_norm": 0.3441441059112549, "learning_rate": 1.7459381246954355e-05, "loss": 0.5531, "step": 21956 }, { "epoch": 0.46567411083540117, "grad_norm": 0.37137219309806824, "learning_rate": 1.7459159129834286e-05, "loss": 0.5203, "step": 21957 }, { "epoch": 0.4656953192933342, "grad_norm": 0.3562774956226349, "learning_rate": 1.74589370044182e-05, "loss": 0.5239, "step": 21958 }, { "epoch": 0.4657165277512672, "grad_norm": 0.3591456413269043, "learning_rate": 1.745871487070635e-05, "loss": 0.5358, "step": 21959 }, { "epoch": 0.4657377362092002, "grad_norm": 0.35151511430740356, "learning_rate": 1.7458492728698982e-05, "loss": 0.4648, "step": 21960 }, { "epoch": 0.46575894466713325, "grad_norm": 0.33629339933395386, "learning_rate": 1.7458270578396343e-05, "loss": 0.4585, "step": 21961 }, { "epoch": 0.46578015312506627, "grad_norm": 0.31586122512817383, "learning_rate": 1.7458048419798683e-05, "loss": 0.4566, "step": 21962 }, { "epoch": 0.4658013615829993, "grad_norm": 0.3335200548171997, "learning_rate": 1.7457826252906246e-05, "loss": 0.4477, "step": 21963 }, { "epoch": 0.4658225700409323, "grad_norm": 0.3728279769420624, "learning_rate": 1.7457604077719276e-05, "loss": 0.561, "step": 21964 }, { "epoch": 0.4658437784988653, "grad_norm": 0.38772499561309814, "learning_rate": 1.7457381894238027e-05, "loss": 0.471, "step": 21965 }, { "epoch": 0.46586498695679834, "grad_norm": 0.37468957901000977, "learning_rate": 1.7457159702462737e-05, "loss": 0.5114, "step": 21966 }, { "epoch": 0.4658861954147314, "grad_norm": 0.3234899640083313, "learning_rate": 1.7456937502393666e-05, "loss": 0.5255, "step": 21967 }, { "epoch": 0.46590740387266444, "grad_norm": 0.4184340834617615, "learning_rate": 1.745671529403105e-05, "loss": 0.4529, "step": 21968 }, { "epoch": 0.46592861233059746, "grad_norm": 0.5684127807617188, "learning_rate": 1.745649307737514e-05, "loss": 0.564, "step": 21969 }, { "epoch": 0.4659498207885305, "grad_norm": 0.3510834574699402, "learning_rate": 1.7456270852426184e-05, "loss": 0.462, "step": 21970 }, { "epoch": 0.4659710292464635, "grad_norm": 0.38423240184783936, "learning_rate": 1.745604861918443e-05, "loss": 0.4776, "step": 21971 }, { "epoch": 0.4659922377043965, "grad_norm": 0.36253049969673157, "learning_rate": 1.745582637765012e-05, "loss": 0.4987, "step": 21972 }, { "epoch": 0.46601344616232954, "grad_norm": 0.3708939850330353, "learning_rate": 1.7455604127823506e-05, "loss": 0.4801, "step": 21973 }, { "epoch": 0.46603465462026256, "grad_norm": 0.3238508403301239, "learning_rate": 1.7455381869704835e-05, "loss": 0.4858, "step": 21974 }, { "epoch": 0.4660558630781956, "grad_norm": 0.33588147163391113, "learning_rate": 1.7455159603294354e-05, "loss": 0.4805, "step": 21975 }, { "epoch": 0.4660770715361286, "grad_norm": 0.683214545249939, "learning_rate": 1.7454937328592306e-05, "loss": 0.5279, "step": 21976 }, { "epoch": 0.4660982799940616, "grad_norm": 0.3902481496334076, "learning_rate": 1.7454715045598943e-05, "loss": 0.5359, "step": 21977 }, { "epoch": 0.46611948845199463, "grad_norm": 0.3845268189907074, "learning_rate": 1.745449275431451e-05, "loss": 0.5501, "step": 21978 }, { "epoch": 0.46614069690992765, "grad_norm": 0.3950130045413971, "learning_rate": 1.7454270454739254e-05, "loss": 0.5771, "step": 21979 }, { "epoch": 0.46616190536786073, "grad_norm": 0.40075528621673584, "learning_rate": 1.7454048146873427e-05, "loss": 0.4929, "step": 21980 }, { "epoch": 0.46618311382579375, "grad_norm": 0.32879728078842163, "learning_rate": 1.745382583071727e-05, "loss": 0.4946, "step": 21981 }, { "epoch": 0.46620432228372677, "grad_norm": 0.3896901309490204, "learning_rate": 1.7453603506271033e-05, "loss": 0.5066, "step": 21982 }, { "epoch": 0.4662255307416598, "grad_norm": 0.3391227424144745, "learning_rate": 1.7453381173534963e-05, "loss": 0.4926, "step": 21983 }, { "epoch": 0.4662467391995928, "grad_norm": 0.35432374477386475, "learning_rate": 1.7453158832509306e-05, "loss": 0.5762, "step": 21984 }, { "epoch": 0.4662679476575258, "grad_norm": 0.3300824761390686, "learning_rate": 1.7452936483194314e-05, "loss": 0.5318, "step": 21985 }, { "epoch": 0.46628915611545885, "grad_norm": 0.322638601064682, "learning_rate": 1.7452714125590224e-05, "loss": 0.4911, "step": 21986 }, { "epoch": 0.46631036457339187, "grad_norm": 0.34075772762298584, "learning_rate": 1.7452491759697296e-05, "loss": 0.5266, "step": 21987 }, { "epoch": 0.4663315730313249, "grad_norm": 0.39207708835601807, "learning_rate": 1.745226938551577e-05, "loss": 0.577, "step": 21988 }, { "epoch": 0.4663527814892579, "grad_norm": 0.38674813508987427, "learning_rate": 1.7452047003045893e-05, "loss": 0.5829, "step": 21989 }, { "epoch": 0.4663739899471909, "grad_norm": 0.6107866168022156, "learning_rate": 1.7451824612287917e-05, "loss": 0.5411, "step": 21990 }, { "epoch": 0.46639519840512395, "grad_norm": 0.36265507340431213, "learning_rate": 1.7451602213242086e-05, "loss": 0.4277, "step": 21991 }, { "epoch": 0.46641640686305696, "grad_norm": 0.3323269784450531, "learning_rate": 1.7451379805908647e-05, "loss": 0.4916, "step": 21992 }, { "epoch": 0.46643761532099, "grad_norm": 0.37043264508247375, "learning_rate": 1.7451157390287848e-05, "loss": 0.5078, "step": 21993 }, { "epoch": 0.46645882377892306, "grad_norm": 0.3634801208972931, "learning_rate": 1.7450934966379936e-05, "loss": 0.4816, "step": 21994 }, { "epoch": 0.4664800322368561, "grad_norm": 0.44113004207611084, "learning_rate": 1.745071253418516e-05, "loss": 0.4723, "step": 21995 }, { "epoch": 0.4665012406947891, "grad_norm": 0.3478739261627197, "learning_rate": 1.7450490093703763e-05, "loss": 0.4265, "step": 21996 }, { "epoch": 0.4665224491527221, "grad_norm": 0.40462052822113037, "learning_rate": 1.7450267644936e-05, "loss": 0.4576, "step": 21997 }, { "epoch": 0.46654365761065514, "grad_norm": 0.38042473793029785, "learning_rate": 1.745004518788211e-05, "loss": 0.5315, "step": 21998 }, { "epoch": 0.46656486606858816, "grad_norm": 0.3398721218109131, "learning_rate": 1.7449822722542347e-05, "loss": 0.5381, "step": 21999 }, { "epoch": 0.4665860745265212, "grad_norm": 0.36615845561027527, "learning_rate": 1.7449600248916952e-05, "loss": 0.479, "step": 22000 }, { "epoch": 0.4666072829844542, "grad_norm": 0.3584037721157074, "learning_rate": 1.7449377767006182e-05, "loss": 0.4983, "step": 22001 }, { "epoch": 0.4666284914423872, "grad_norm": 0.4482085406780243, "learning_rate": 1.7449155276810278e-05, "loss": 0.4576, "step": 22002 }, { "epoch": 0.46664969990032024, "grad_norm": 0.5169986486434937, "learning_rate": 1.7448932778329487e-05, "loss": 0.4788, "step": 22003 }, { "epoch": 0.46667090835825326, "grad_norm": 0.364715576171875, "learning_rate": 1.7448710271564057e-05, "loss": 0.4662, "step": 22004 }, { "epoch": 0.4666921168161863, "grad_norm": 0.322162389755249, "learning_rate": 1.7448487756514238e-05, "loss": 0.4964, "step": 22005 }, { "epoch": 0.4667133252741193, "grad_norm": 0.5033310055732727, "learning_rate": 1.744826523318027e-05, "loss": 0.4618, "step": 22006 }, { "epoch": 0.46673453373205237, "grad_norm": 0.39331987500190735, "learning_rate": 1.7448042701562412e-05, "loss": 0.5149, "step": 22007 }, { "epoch": 0.4667557421899854, "grad_norm": 0.31918638944625854, "learning_rate": 1.7447820161660904e-05, "loss": 0.4975, "step": 22008 }, { "epoch": 0.4667769506479184, "grad_norm": 0.3510342836380005, "learning_rate": 1.7447597613475996e-05, "loss": 0.5562, "step": 22009 }, { "epoch": 0.46679815910585143, "grad_norm": 0.40642666816711426, "learning_rate": 1.7447375057007933e-05, "loss": 0.5718, "step": 22010 }, { "epoch": 0.46681936756378445, "grad_norm": 0.40341827273368835, "learning_rate": 1.7447152492256965e-05, "loss": 0.5127, "step": 22011 }, { "epoch": 0.46684057602171747, "grad_norm": 0.32071739435195923, "learning_rate": 1.744692991922334e-05, "loss": 0.4698, "step": 22012 }, { "epoch": 0.4668617844796505, "grad_norm": 0.31306228041648865, "learning_rate": 1.74467073379073e-05, "loss": 0.5052, "step": 22013 }, { "epoch": 0.4668829929375835, "grad_norm": 0.34347468614578247, "learning_rate": 1.7446484748309102e-05, "loss": 0.4949, "step": 22014 }, { "epoch": 0.4669042013955165, "grad_norm": 0.41815316677093506, "learning_rate": 1.7446262150428984e-05, "loss": 0.5435, "step": 22015 }, { "epoch": 0.46692540985344955, "grad_norm": 0.34303346276283264, "learning_rate": 1.74460395442672e-05, "loss": 0.4442, "step": 22016 }, { "epoch": 0.46694661831138257, "grad_norm": 0.36401310563087463, "learning_rate": 1.7445816929823993e-05, "loss": 0.5043, "step": 22017 }, { "epoch": 0.4669678267693156, "grad_norm": 0.354979544878006, "learning_rate": 1.744559430709962e-05, "loss": 0.5399, "step": 22018 }, { "epoch": 0.4669890352272486, "grad_norm": 0.35584935545921326, "learning_rate": 1.7445371676094314e-05, "loss": 0.4616, "step": 22019 }, { "epoch": 0.4670102436851816, "grad_norm": 0.401322603225708, "learning_rate": 1.7445149036808332e-05, "loss": 0.4734, "step": 22020 }, { "epoch": 0.4670314521431147, "grad_norm": 0.3026503026485443, "learning_rate": 1.7444926389241923e-05, "loss": 0.4564, "step": 22021 }, { "epoch": 0.4670526606010477, "grad_norm": 0.3208375573158264, "learning_rate": 1.744470373339533e-05, "loss": 0.4581, "step": 22022 }, { "epoch": 0.46707386905898074, "grad_norm": 0.30733707547187805, "learning_rate": 1.7444481069268803e-05, "loss": 0.4976, "step": 22023 }, { "epoch": 0.46709507751691376, "grad_norm": 0.3499050736427307, "learning_rate": 1.7444258396862586e-05, "loss": 0.45, "step": 22024 }, { "epoch": 0.4671162859748468, "grad_norm": 0.43838417530059814, "learning_rate": 1.744403571617693e-05, "loss": 0.5483, "step": 22025 }, { "epoch": 0.4671374944327798, "grad_norm": 0.3890542685985565, "learning_rate": 1.7443813027212085e-05, "loss": 0.5217, "step": 22026 }, { "epoch": 0.4671587028907128, "grad_norm": 0.33638885617256165, "learning_rate": 1.7443590329968294e-05, "loss": 0.5392, "step": 22027 }, { "epoch": 0.46717991134864584, "grad_norm": 0.3951953053474426, "learning_rate": 1.7443367624445807e-05, "loss": 0.5706, "step": 22028 }, { "epoch": 0.46720111980657886, "grad_norm": 0.30639344453811646, "learning_rate": 1.744314491064487e-05, "loss": 0.4007, "step": 22029 }, { "epoch": 0.4672223282645119, "grad_norm": 0.4450831413269043, "learning_rate": 1.7442922188565735e-05, "loss": 0.4846, "step": 22030 }, { "epoch": 0.4672435367224449, "grad_norm": 0.390273779630661, "learning_rate": 1.744269945820864e-05, "loss": 0.5943, "step": 22031 }, { "epoch": 0.4672647451803779, "grad_norm": 0.34697744250297546, "learning_rate": 1.7442476719573847e-05, "loss": 0.5515, "step": 22032 }, { "epoch": 0.46728595363831094, "grad_norm": 0.3498111367225647, "learning_rate": 1.7442253972661593e-05, "loss": 0.5163, "step": 22033 }, { "epoch": 0.46730716209624396, "grad_norm": 0.38281258940696716, "learning_rate": 1.7442031217472126e-05, "loss": 0.5828, "step": 22034 }, { "epoch": 0.46732837055417703, "grad_norm": 0.34327825903892517, "learning_rate": 1.74418084540057e-05, "loss": 0.5079, "step": 22035 }, { "epoch": 0.46734957901211005, "grad_norm": 0.29295626282691956, "learning_rate": 1.744158568226256e-05, "loss": 0.4612, "step": 22036 }, { "epoch": 0.46737078747004307, "grad_norm": 0.4529283046722412, "learning_rate": 1.744136290224295e-05, "loss": 0.5973, "step": 22037 }, { "epoch": 0.4673919959279761, "grad_norm": 0.35078755021095276, "learning_rate": 1.7441140113947125e-05, "loss": 0.5026, "step": 22038 }, { "epoch": 0.4674132043859091, "grad_norm": 0.33869171142578125, "learning_rate": 1.7440917317375325e-05, "loss": 0.5047, "step": 22039 }, { "epoch": 0.46743441284384213, "grad_norm": 0.313966304063797, "learning_rate": 1.7440694512527802e-05, "loss": 0.4677, "step": 22040 }, { "epoch": 0.46745562130177515, "grad_norm": 0.37884950637817383, "learning_rate": 1.7440471699404802e-05, "loss": 0.4568, "step": 22041 }, { "epoch": 0.46747682975970817, "grad_norm": 0.3523029685020447, "learning_rate": 1.7440248878006575e-05, "loss": 0.5098, "step": 22042 }, { "epoch": 0.4674980382176412, "grad_norm": 0.3886581361293793, "learning_rate": 1.744002604833337e-05, "loss": 0.4998, "step": 22043 }, { "epoch": 0.4675192466755742, "grad_norm": 0.340439110994339, "learning_rate": 1.743980321038543e-05, "loss": 0.4923, "step": 22044 }, { "epoch": 0.4675404551335072, "grad_norm": 0.3470284044742584, "learning_rate": 1.7439580364163008e-05, "loss": 0.4608, "step": 22045 }, { "epoch": 0.46756166359144025, "grad_norm": 0.34556636214256287, "learning_rate": 1.7439357509666348e-05, "loss": 0.473, "step": 22046 }, { "epoch": 0.46758287204937327, "grad_norm": 0.3622078597545624, "learning_rate": 1.7439134646895698e-05, "loss": 0.5015, "step": 22047 }, { "epoch": 0.46760408050730634, "grad_norm": 0.3581187427043915, "learning_rate": 1.7438911775851307e-05, "loss": 0.5226, "step": 22048 }, { "epoch": 0.46762528896523936, "grad_norm": 0.35930055379867554, "learning_rate": 1.7438688896533427e-05, "loss": 0.4403, "step": 22049 }, { "epoch": 0.4676464974231724, "grad_norm": 0.37347152829170227, "learning_rate": 1.7438466008942296e-05, "loss": 0.5594, "step": 22050 }, { "epoch": 0.4676677058811054, "grad_norm": 0.3099125921726227, "learning_rate": 1.743824311307817e-05, "loss": 0.4754, "step": 22051 }, { "epoch": 0.4676889143390384, "grad_norm": 0.34063366055488586, "learning_rate": 1.7438020208941296e-05, "loss": 0.4594, "step": 22052 }, { "epoch": 0.46771012279697144, "grad_norm": 1.0160894393920898, "learning_rate": 1.743779729653192e-05, "loss": 0.559, "step": 22053 }, { "epoch": 0.46773133125490446, "grad_norm": 0.3657335937023163, "learning_rate": 1.743757437585029e-05, "loss": 0.474, "step": 22054 }, { "epoch": 0.4677525397128375, "grad_norm": 0.3498251438140869, "learning_rate": 1.7437351446896656e-05, "loss": 0.613, "step": 22055 }, { "epoch": 0.4677737481707705, "grad_norm": 0.37495627999305725, "learning_rate": 1.743712850967126e-05, "loss": 0.5163, "step": 22056 }, { "epoch": 0.4677949566287035, "grad_norm": 0.4336695969104767, "learning_rate": 1.7436905564174358e-05, "loss": 0.5124, "step": 22057 }, { "epoch": 0.46781616508663654, "grad_norm": 0.3698965013027191, "learning_rate": 1.7436682610406195e-05, "loss": 0.5776, "step": 22058 }, { "epoch": 0.46783737354456956, "grad_norm": 0.330384224653244, "learning_rate": 1.7436459648367017e-05, "loss": 0.4814, "step": 22059 }, { "epoch": 0.4678585820025026, "grad_norm": 0.35509192943573, "learning_rate": 1.7436236678057073e-05, "loss": 0.5552, "step": 22060 }, { "epoch": 0.4678797904604356, "grad_norm": 0.35424137115478516, "learning_rate": 1.7436013699476612e-05, "loss": 0.5345, "step": 22061 }, { "epoch": 0.46790099891836867, "grad_norm": 0.3576184809207916, "learning_rate": 1.743579071262588e-05, "loss": 0.6135, "step": 22062 }, { "epoch": 0.4679222073763017, "grad_norm": 0.38217034935951233, "learning_rate": 1.7435567717505126e-05, "loss": 0.5784, "step": 22063 }, { "epoch": 0.4679434158342347, "grad_norm": 0.31962308287620544, "learning_rate": 1.74353447141146e-05, "loss": 0.4305, "step": 22064 }, { "epoch": 0.46796462429216773, "grad_norm": 0.3412958085536957, "learning_rate": 1.7435121702454543e-05, "loss": 0.5058, "step": 22065 }, { "epoch": 0.46798583275010075, "grad_norm": 0.3625395894050598, "learning_rate": 1.7434898682525213e-05, "loss": 0.5191, "step": 22066 }, { "epoch": 0.46800704120803377, "grad_norm": 0.36743491888046265, "learning_rate": 1.7434675654326852e-05, "loss": 0.5567, "step": 22067 }, { "epoch": 0.4680282496659668, "grad_norm": 0.34501373767852783, "learning_rate": 1.743445261785971e-05, "loss": 0.4827, "step": 22068 }, { "epoch": 0.4680494581238998, "grad_norm": 0.36501795053482056, "learning_rate": 1.7434229573124033e-05, "loss": 0.498, "step": 22069 }, { "epoch": 0.46807066658183283, "grad_norm": 0.3480727970600128, "learning_rate": 1.743400652012007e-05, "loss": 0.5501, "step": 22070 }, { "epoch": 0.46809187503976585, "grad_norm": 0.3070108890533447, "learning_rate": 1.7433783458848074e-05, "loss": 0.4712, "step": 22071 }, { "epoch": 0.46811308349769887, "grad_norm": 0.3588148355484009, "learning_rate": 1.7433560389308285e-05, "loss": 0.5321, "step": 22072 }, { "epoch": 0.4681342919556319, "grad_norm": 0.3876994550228119, "learning_rate": 1.7433337311500956e-05, "loss": 0.5182, "step": 22073 }, { "epoch": 0.4681555004135649, "grad_norm": 0.39836058020591736, "learning_rate": 1.7433114225426332e-05, "loss": 0.5071, "step": 22074 }, { "epoch": 0.4681767088714979, "grad_norm": 0.4489307105541229, "learning_rate": 1.7432891131084663e-05, "loss": 0.557, "step": 22075 }, { "epoch": 0.468197917329431, "grad_norm": 0.37912335991859436, "learning_rate": 1.74326680284762e-05, "loss": 0.5239, "step": 22076 }, { "epoch": 0.468219125787364, "grad_norm": 0.3854292631149292, "learning_rate": 1.7432444917601183e-05, "loss": 0.513, "step": 22077 }, { "epoch": 0.46824033424529704, "grad_norm": 0.34510621428489685, "learning_rate": 1.743222179845987e-05, "loss": 0.522, "step": 22078 }, { "epoch": 0.46826154270323006, "grad_norm": 0.3414241671562195, "learning_rate": 1.74319986710525e-05, "loss": 0.5439, "step": 22079 }, { "epoch": 0.4682827511611631, "grad_norm": 0.33149251341819763, "learning_rate": 1.743177553537933e-05, "loss": 0.5794, "step": 22080 }, { "epoch": 0.4683039596190961, "grad_norm": 0.30734381079673767, "learning_rate": 1.7431552391440606e-05, "loss": 0.4745, "step": 22081 }, { "epoch": 0.4683251680770291, "grad_norm": 0.3336459696292877, "learning_rate": 1.7431329239236568e-05, "loss": 0.4843, "step": 22082 }, { "epoch": 0.46834637653496214, "grad_norm": 0.3569183051586151, "learning_rate": 1.7431106078767474e-05, "loss": 0.5076, "step": 22083 }, { "epoch": 0.46836758499289516, "grad_norm": 0.3197059631347656, "learning_rate": 1.7430882910033567e-05, "loss": 0.5075, "step": 22084 }, { "epoch": 0.4683887934508282, "grad_norm": 0.3742717206478119, "learning_rate": 1.7430659733035096e-05, "loss": 0.5404, "step": 22085 }, { "epoch": 0.4684100019087612, "grad_norm": 0.33711326122283936, "learning_rate": 1.743043654777231e-05, "loss": 0.532, "step": 22086 }, { "epoch": 0.4684312103666942, "grad_norm": 0.33422866463661194, "learning_rate": 1.743021335424546e-05, "loss": 0.4461, "step": 22087 }, { "epoch": 0.46845241882462724, "grad_norm": 0.3201408386230469, "learning_rate": 1.7429990152454787e-05, "loss": 0.4491, "step": 22088 }, { "epoch": 0.4684736272825603, "grad_norm": 0.3456682860851288, "learning_rate": 1.7429766942400543e-05, "loss": 0.39, "step": 22089 }, { "epoch": 0.46849483574049333, "grad_norm": 0.3361126482486725, "learning_rate": 1.742954372408298e-05, "loss": 0.4669, "step": 22090 }, { "epoch": 0.46851604419842635, "grad_norm": 0.37774017453193665, "learning_rate": 1.7429320497502342e-05, "loss": 0.5813, "step": 22091 }, { "epoch": 0.46853725265635937, "grad_norm": 0.49501004815101624, "learning_rate": 1.7429097262658877e-05, "loss": 0.4912, "step": 22092 }, { "epoch": 0.4685584611142924, "grad_norm": 0.35190171003341675, "learning_rate": 1.7428874019552835e-05, "loss": 0.5004, "step": 22093 }, { "epoch": 0.4685796695722254, "grad_norm": 0.32308509945869446, "learning_rate": 1.7428650768184464e-05, "loss": 0.5101, "step": 22094 }, { "epoch": 0.46860087803015843, "grad_norm": 0.35652467608451843, "learning_rate": 1.7428427508554016e-05, "loss": 0.5334, "step": 22095 }, { "epoch": 0.46862208648809145, "grad_norm": 0.34933799505233765, "learning_rate": 1.742820424066173e-05, "loss": 0.4824, "step": 22096 }, { "epoch": 0.46864329494602447, "grad_norm": 0.3832327127456665, "learning_rate": 1.7427980964507864e-05, "loss": 0.5096, "step": 22097 }, { "epoch": 0.4686645034039575, "grad_norm": 0.34258177876472473, "learning_rate": 1.7427757680092662e-05, "loss": 0.5186, "step": 22098 }, { "epoch": 0.4686857118618905, "grad_norm": 0.392522394657135, "learning_rate": 1.7427534387416368e-05, "loss": 0.5352, "step": 22099 }, { "epoch": 0.46870692031982353, "grad_norm": 0.38863280415534973, "learning_rate": 1.7427311086479237e-05, "loss": 0.5487, "step": 22100 }, { "epoch": 0.46872812877775655, "grad_norm": 0.37945306301116943, "learning_rate": 1.7427087777281518e-05, "loss": 0.5752, "step": 22101 }, { "epoch": 0.46874933723568957, "grad_norm": 0.34925925731658936, "learning_rate": 1.7426864459823452e-05, "loss": 0.5309, "step": 22102 }, { "epoch": 0.46877054569362264, "grad_norm": 0.39629414677619934, "learning_rate": 1.7426641134105294e-05, "loss": 0.5481, "step": 22103 }, { "epoch": 0.46879175415155566, "grad_norm": 0.5248058438301086, "learning_rate": 1.742641780012729e-05, "loss": 0.4565, "step": 22104 }, { "epoch": 0.4688129626094887, "grad_norm": 0.3226916790008545, "learning_rate": 1.742619445788969e-05, "loss": 0.4638, "step": 22105 }, { "epoch": 0.4688341710674217, "grad_norm": 0.37393710017204285, "learning_rate": 1.742597110739274e-05, "loss": 0.4313, "step": 22106 }, { "epoch": 0.4688553795253547, "grad_norm": 0.37948131561279297, "learning_rate": 1.7425747748636687e-05, "loss": 0.5603, "step": 22107 }, { "epoch": 0.46887658798328774, "grad_norm": 0.36460280418395996, "learning_rate": 1.7425524381621784e-05, "loss": 0.6229, "step": 22108 }, { "epoch": 0.46889779644122076, "grad_norm": 0.31790128350257874, "learning_rate": 1.7425301006348275e-05, "loss": 0.5506, "step": 22109 }, { "epoch": 0.4689190048991538, "grad_norm": 0.3842436671257019, "learning_rate": 1.7425077622816415e-05, "loss": 0.476, "step": 22110 }, { "epoch": 0.4689402133570868, "grad_norm": 0.33341357111930847, "learning_rate": 1.7424854231026445e-05, "loss": 0.513, "step": 22111 }, { "epoch": 0.4689614218150198, "grad_norm": 0.32978659868240356, "learning_rate": 1.7424630830978618e-05, "loss": 0.5089, "step": 22112 }, { "epoch": 0.46898263027295284, "grad_norm": 0.3902350664138794, "learning_rate": 1.7424407422673182e-05, "loss": 0.5343, "step": 22113 }, { "epoch": 0.46900383873088586, "grad_norm": 0.31906309723854065, "learning_rate": 1.7424184006110382e-05, "loss": 0.479, "step": 22114 }, { "epoch": 0.4690250471888189, "grad_norm": 0.35948261618614197, "learning_rate": 1.7423960581290473e-05, "loss": 0.5671, "step": 22115 }, { "epoch": 0.4690462556467519, "grad_norm": 0.3641645312309265, "learning_rate": 1.7423737148213695e-05, "loss": 0.4881, "step": 22116 }, { "epoch": 0.469067464104685, "grad_norm": 0.4464772939682007, "learning_rate": 1.7423513706880303e-05, "loss": 0.5119, "step": 22117 }, { "epoch": 0.469088672562618, "grad_norm": 0.31862059235572815, "learning_rate": 1.7423290257290543e-05, "loss": 0.4053, "step": 22118 }, { "epoch": 0.469109881020551, "grad_norm": 0.3974415957927704, "learning_rate": 1.7423066799444667e-05, "loss": 0.4332, "step": 22119 }, { "epoch": 0.46913108947848403, "grad_norm": 0.33774054050445557, "learning_rate": 1.7422843333342914e-05, "loss": 0.5029, "step": 22120 }, { "epoch": 0.46915229793641705, "grad_norm": 0.3558516204357147, "learning_rate": 1.7422619858985545e-05, "loss": 0.5754, "step": 22121 }, { "epoch": 0.46917350639435007, "grad_norm": 0.3569756746292114, "learning_rate": 1.7422396376372802e-05, "loss": 0.4681, "step": 22122 }, { "epoch": 0.4691947148522831, "grad_norm": 0.3446959853172302, "learning_rate": 1.7422172885504937e-05, "loss": 0.5035, "step": 22123 }, { "epoch": 0.4692159233102161, "grad_norm": 0.3538726568222046, "learning_rate": 1.7421949386382187e-05, "loss": 0.4932, "step": 22124 }, { "epoch": 0.46923713176814913, "grad_norm": 0.35742127895355225, "learning_rate": 1.7421725879004817e-05, "loss": 0.4899, "step": 22125 }, { "epoch": 0.46925834022608215, "grad_norm": 0.3788824677467346, "learning_rate": 1.7421502363373063e-05, "loss": 0.5017, "step": 22126 }, { "epoch": 0.46927954868401517, "grad_norm": 0.33989453315734863, "learning_rate": 1.7421278839487183e-05, "loss": 0.4697, "step": 22127 }, { "epoch": 0.4693007571419482, "grad_norm": 0.3519865572452545, "learning_rate": 1.742105530734742e-05, "loss": 0.5566, "step": 22128 }, { "epoch": 0.4693219655998812, "grad_norm": 0.35748690366744995, "learning_rate": 1.742083176695402e-05, "loss": 0.5817, "step": 22129 }, { "epoch": 0.4693431740578143, "grad_norm": 0.485509991645813, "learning_rate": 1.742060821830724e-05, "loss": 0.449, "step": 22130 }, { "epoch": 0.4693643825157473, "grad_norm": 0.3688405454158783, "learning_rate": 1.7420384661407323e-05, "loss": 0.4691, "step": 22131 }, { "epoch": 0.4693855909736803, "grad_norm": 0.3367242217063904, "learning_rate": 1.7420161096254516e-05, "loss": 0.5672, "step": 22132 }, { "epoch": 0.46940679943161334, "grad_norm": 0.3095252513885498, "learning_rate": 1.7419937522849075e-05, "loss": 0.4949, "step": 22133 }, { "epoch": 0.46942800788954636, "grad_norm": 0.37688785791397095, "learning_rate": 1.7419713941191238e-05, "loss": 0.4855, "step": 22134 }, { "epoch": 0.4694492163474794, "grad_norm": 0.4301653802394867, "learning_rate": 1.7419490351281264e-05, "loss": 0.5152, "step": 22135 }, { "epoch": 0.4694704248054124, "grad_norm": 0.3732747435569763, "learning_rate": 1.7419266753119397e-05, "loss": 0.4755, "step": 22136 }, { "epoch": 0.4694916332633454, "grad_norm": 0.37986642122268677, "learning_rate": 1.7419043146705884e-05, "loss": 0.5844, "step": 22137 }, { "epoch": 0.46951284172127844, "grad_norm": 0.3352760374546051, "learning_rate": 1.741881953204098e-05, "loss": 0.4611, "step": 22138 }, { "epoch": 0.46953405017921146, "grad_norm": 0.33213913440704346, "learning_rate": 1.7418595909124924e-05, "loss": 0.434, "step": 22139 }, { "epoch": 0.4695552586371445, "grad_norm": 0.3406360149383545, "learning_rate": 1.7418372277957967e-05, "loss": 0.4616, "step": 22140 }, { "epoch": 0.4695764670950775, "grad_norm": 0.32076629996299744, "learning_rate": 1.741814863854037e-05, "loss": 0.4379, "step": 22141 }, { "epoch": 0.4695976755530105, "grad_norm": 0.3642350137233734, "learning_rate": 1.741792499087237e-05, "loss": 0.5151, "step": 22142 }, { "epoch": 0.46961888401094354, "grad_norm": 0.393707811832428, "learning_rate": 1.7417701334954212e-05, "loss": 0.5577, "step": 22143 }, { "epoch": 0.4696400924688766, "grad_norm": 0.3668231964111328, "learning_rate": 1.7417477670786158e-05, "loss": 0.5092, "step": 22144 }, { "epoch": 0.46966130092680963, "grad_norm": 0.32010263204574585, "learning_rate": 1.741725399836845e-05, "loss": 0.5196, "step": 22145 }, { "epoch": 0.46968250938474265, "grad_norm": 0.444163978099823, "learning_rate": 1.741703031770133e-05, "loss": 0.4832, "step": 22146 }, { "epoch": 0.46970371784267567, "grad_norm": 0.3253088593482971, "learning_rate": 1.7416806628785058e-05, "loss": 0.4496, "step": 22147 }, { "epoch": 0.4697249263006087, "grad_norm": 0.5600354671478271, "learning_rate": 1.741658293161988e-05, "loss": 0.4518, "step": 22148 }, { "epoch": 0.4697461347585417, "grad_norm": 0.32576945424079895, "learning_rate": 1.741635922620604e-05, "loss": 0.5208, "step": 22149 }, { "epoch": 0.46976734321647473, "grad_norm": 0.3204760253429413, "learning_rate": 1.7416135512543787e-05, "loss": 0.5621, "step": 22150 }, { "epoch": 0.46978855167440775, "grad_norm": 0.34109824895858765, "learning_rate": 1.7415911790633378e-05, "loss": 0.4304, "step": 22151 }, { "epoch": 0.46980976013234077, "grad_norm": 0.3112774193286896, "learning_rate": 1.7415688060475053e-05, "loss": 0.4373, "step": 22152 }, { "epoch": 0.4698309685902738, "grad_norm": 0.3994225561618805, "learning_rate": 1.7415464322069063e-05, "loss": 0.4816, "step": 22153 }, { "epoch": 0.4698521770482068, "grad_norm": 0.37589186429977417, "learning_rate": 1.7415240575415663e-05, "loss": 0.5303, "step": 22154 }, { "epoch": 0.46987338550613983, "grad_norm": 0.3632127344608307, "learning_rate": 1.7415016820515092e-05, "loss": 0.4705, "step": 22155 }, { "epoch": 0.46989459396407285, "grad_norm": 0.3357371389865875, "learning_rate": 1.7414793057367606e-05, "loss": 0.4859, "step": 22156 }, { "epoch": 0.46991580242200587, "grad_norm": 0.3349408507347107, "learning_rate": 1.7414569285973454e-05, "loss": 0.4857, "step": 22157 }, { "epoch": 0.46993701087993894, "grad_norm": 0.34664487838745117, "learning_rate": 1.7414345506332877e-05, "loss": 0.5661, "step": 22158 }, { "epoch": 0.46995821933787196, "grad_norm": 0.3438064754009247, "learning_rate": 1.7414121718446133e-05, "loss": 0.4983, "step": 22159 }, { "epoch": 0.469979427795805, "grad_norm": 0.3723771274089813, "learning_rate": 1.7413897922313466e-05, "loss": 0.532, "step": 22160 }, { "epoch": 0.470000636253738, "grad_norm": 0.38749855756759644, "learning_rate": 1.7413674117935126e-05, "loss": 0.55, "step": 22161 }, { "epoch": 0.470021844711671, "grad_norm": 0.34221768379211426, "learning_rate": 1.741345030531136e-05, "loss": 0.5624, "step": 22162 }, { "epoch": 0.47004305316960404, "grad_norm": 0.3317825198173523, "learning_rate": 1.741322648444242e-05, "loss": 0.4822, "step": 22163 }, { "epoch": 0.47006426162753706, "grad_norm": 0.3886147141456604, "learning_rate": 1.7413002655328557e-05, "loss": 0.5252, "step": 22164 }, { "epoch": 0.4700854700854701, "grad_norm": 0.339465469121933, "learning_rate": 1.7412778817970014e-05, "loss": 0.522, "step": 22165 }, { "epoch": 0.4701066785434031, "grad_norm": 0.40808218717575073, "learning_rate": 1.7412554972367048e-05, "loss": 0.534, "step": 22166 }, { "epoch": 0.4701278870013361, "grad_norm": 0.39478376507759094, "learning_rate": 1.7412331118519897e-05, "loss": 0.5079, "step": 22167 }, { "epoch": 0.47014909545926914, "grad_norm": 0.39803874492645264, "learning_rate": 1.7412107256428818e-05, "loss": 0.5859, "step": 22168 }, { "epoch": 0.47017030391720216, "grad_norm": 0.34644079208374023, "learning_rate": 1.7411883386094055e-05, "loss": 0.5183, "step": 22169 }, { "epoch": 0.4701915123751352, "grad_norm": 0.33187878131866455, "learning_rate": 1.7411659507515863e-05, "loss": 0.4708, "step": 22170 }, { "epoch": 0.47021272083306825, "grad_norm": 0.3227195143699646, "learning_rate": 1.7411435620694485e-05, "loss": 0.5324, "step": 22171 }, { "epoch": 0.4702339292910013, "grad_norm": 0.34478896856307983, "learning_rate": 1.7411211725630175e-05, "loss": 0.5993, "step": 22172 }, { "epoch": 0.4702551377489343, "grad_norm": 0.3697793781757355, "learning_rate": 1.7410987822323178e-05, "loss": 0.5294, "step": 22173 }, { "epoch": 0.4702763462068673, "grad_norm": 0.3477330803871155, "learning_rate": 1.7410763910773747e-05, "loss": 0.5607, "step": 22174 }, { "epoch": 0.47029755466480033, "grad_norm": 0.37758004665374756, "learning_rate": 1.7410539990982126e-05, "loss": 0.4774, "step": 22175 }, { "epoch": 0.47031876312273335, "grad_norm": 0.6524693965911865, "learning_rate": 1.7410316062948567e-05, "loss": 0.5755, "step": 22176 }, { "epoch": 0.47033997158066637, "grad_norm": 0.3344268500804901, "learning_rate": 1.7410092126673323e-05, "loss": 0.4973, "step": 22177 }, { "epoch": 0.4703611800385994, "grad_norm": 0.3222109377384186, "learning_rate": 1.7409868182156634e-05, "loss": 0.5246, "step": 22178 }, { "epoch": 0.4703823884965324, "grad_norm": 0.5772206783294678, "learning_rate": 1.7409644229398756e-05, "loss": 0.4431, "step": 22179 }, { "epoch": 0.47040359695446543, "grad_norm": 0.3554914891719818, "learning_rate": 1.7409420268399934e-05, "loss": 0.5078, "step": 22180 }, { "epoch": 0.47042480541239845, "grad_norm": 0.33258894085884094, "learning_rate": 1.740919629916042e-05, "loss": 0.5027, "step": 22181 }, { "epoch": 0.47044601387033147, "grad_norm": 0.48297935724258423, "learning_rate": 1.7408972321680466e-05, "loss": 0.4714, "step": 22182 }, { "epoch": 0.4704672223282645, "grad_norm": 0.3863525986671448, "learning_rate": 1.7408748335960314e-05, "loss": 0.5083, "step": 22183 }, { "epoch": 0.4704884307861975, "grad_norm": 0.3744523525238037, "learning_rate": 1.740852434200022e-05, "loss": 0.471, "step": 22184 }, { "epoch": 0.4705096392441306, "grad_norm": 0.36750277876853943, "learning_rate": 1.7408300339800424e-05, "loss": 0.4538, "step": 22185 }, { "epoch": 0.4705308477020636, "grad_norm": 0.32228726148605347, "learning_rate": 1.7408076329361184e-05, "loss": 0.4025, "step": 22186 }, { "epoch": 0.4705520561599966, "grad_norm": 0.38554421067237854, "learning_rate": 1.7407852310682746e-05, "loss": 0.4842, "step": 22187 }, { "epoch": 0.47057326461792964, "grad_norm": 0.5534997582435608, "learning_rate": 1.7407628283765357e-05, "loss": 0.5389, "step": 22188 }, { "epoch": 0.47059447307586266, "grad_norm": 0.32949045300483704, "learning_rate": 1.740740424860927e-05, "loss": 0.4677, "step": 22189 }, { "epoch": 0.4706156815337957, "grad_norm": 0.3779323399066925, "learning_rate": 1.740718020521473e-05, "loss": 0.5046, "step": 22190 }, { "epoch": 0.4706368899917287, "grad_norm": 0.33578068017959595, "learning_rate": 1.740695615358199e-05, "loss": 0.5412, "step": 22191 }, { "epoch": 0.4706580984496617, "grad_norm": 0.4187053143978119, "learning_rate": 1.74067320937113e-05, "loss": 0.6209, "step": 22192 }, { "epoch": 0.47067930690759474, "grad_norm": 0.36476871371269226, "learning_rate": 1.7406508025602904e-05, "loss": 0.4811, "step": 22193 }, { "epoch": 0.47070051536552776, "grad_norm": 0.36172375082969666, "learning_rate": 1.7406283949257054e-05, "loss": 0.5749, "step": 22194 }, { "epoch": 0.4707217238234608, "grad_norm": 0.3190310597419739, "learning_rate": 1.7406059864673998e-05, "loss": 0.4424, "step": 22195 }, { "epoch": 0.4707429322813938, "grad_norm": 0.38624346256256104, "learning_rate": 1.740583577185399e-05, "loss": 0.5397, "step": 22196 }, { "epoch": 0.4707641407393268, "grad_norm": 0.3283151686191559, "learning_rate": 1.7405611670797275e-05, "loss": 0.4989, "step": 22197 }, { "epoch": 0.4707853491972599, "grad_norm": 0.3303869366645813, "learning_rate": 1.7405387561504103e-05, "loss": 0.5439, "step": 22198 }, { "epoch": 0.4708065576551929, "grad_norm": 0.3648796081542969, "learning_rate": 1.7405163443974724e-05, "loss": 0.5782, "step": 22199 }, { "epoch": 0.47082776611312593, "grad_norm": 0.3845517337322235, "learning_rate": 1.7404939318209387e-05, "loss": 0.5246, "step": 22200 }, { "epoch": 0.47084897457105895, "grad_norm": 0.4111456573009491, "learning_rate": 1.740471518420834e-05, "loss": 0.5391, "step": 22201 }, { "epoch": 0.470870183028992, "grad_norm": 0.36216026544570923, "learning_rate": 1.740449104197183e-05, "loss": 0.4615, "step": 22202 }, { "epoch": 0.470891391486925, "grad_norm": 0.3489731252193451, "learning_rate": 1.7404266891500113e-05, "loss": 0.495, "step": 22203 }, { "epoch": 0.470912599944858, "grad_norm": 0.31389743089675903, "learning_rate": 1.740404273279343e-05, "loss": 0.3835, "step": 22204 }, { "epoch": 0.47093380840279103, "grad_norm": 0.46030470728874207, "learning_rate": 1.740381856585204e-05, "loss": 0.4523, "step": 22205 }, { "epoch": 0.47095501686072405, "grad_norm": 0.36755526065826416, "learning_rate": 1.7403594390676187e-05, "loss": 0.444, "step": 22206 }, { "epoch": 0.47097622531865707, "grad_norm": 0.39467915892601013, "learning_rate": 1.740337020726612e-05, "loss": 0.62, "step": 22207 }, { "epoch": 0.4709974337765901, "grad_norm": 0.3936099112033844, "learning_rate": 1.7403146015622088e-05, "loss": 0.5241, "step": 22208 }, { "epoch": 0.4710186422345231, "grad_norm": 0.3424415588378906, "learning_rate": 1.740292181574434e-05, "loss": 0.5128, "step": 22209 }, { "epoch": 0.47103985069245613, "grad_norm": 0.3308562934398651, "learning_rate": 1.740269760763313e-05, "loss": 0.4444, "step": 22210 }, { "epoch": 0.47106105915038915, "grad_norm": 0.362871378660202, "learning_rate": 1.74024733912887e-05, "loss": 0.4439, "step": 22211 }, { "epoch": 0.4710822676083222, "grad_norm": 0.3279412090778351, "learning_rate": 1.740224916671131e-05, "loss": 0.4596, "step": 22212 }, { "epoch": 0.47110347606625524, "grad_norm": 0.3540961742401123, "learning_rate": 1.7402024933901197e-05, "loss": 0.5729, "step": 22213 }, { "epoch": 0.47112468452418826, "grad_norm": 0.38380229473114014, "learning_rate": 1.7401800692858616e-05, "loss": 0.508, "step": 22214 }, { "epoch": 0.4711458929821213, "grad_norm": 0.3291354477405548, "learning_rate": 1.740157644358382e-05, "loss": 0.5364, "step": 22215 }, { "epoch": 0.4711671014400543, "grad_norm": 0.3440570533275604, "learning_rate": 1.7401352186077053e-05, "loss": 0.4868, "step": 22216 }, { "epoch": 0.4711883098979873, "grad_norm": 0.3258368968963623, "learning_rate": 1.7401127920338566e-05, "loss": 0.4588, "step": 22217 }, { "epoch": 0.47120951835592034, "grad_norm": 0.38021358847618103, "learning_rate": 1.740090364636861e-05, "loss": 0.5173, "step": 22218 }, { "epoch": 0.47123072681385336, "grad_norm": 0.34106960892677307, "learning_rate": 1.7400679364167433e-05, "loss": 0.4399, "step": 22219 }, { "epoch": 0.4712519352717864, "grad_norm": 0.3281814455986023, "learning_rate": 1.7400455073735283e-05, "loss": 0.462, "step": 22220 }, { "epoch": 0.4712731437297194, "grad_norm": 0.35185709595680237, "learning_rate": 1.7400230775072415e-05, "loss": 0.578, "step": 22221 }, { "epoch": 0.4712943521876524, "grad_norm": 0.4133802056312561, "learning_rate": 1.740000646817907e-05, "loss": 0.4955, "step": 22222 }, { "epoch": 0.47131556064558544, "grad_norm": 0.4196920394897461, "learning_rate": 1.7399782153055502e-05, "loss": 0.4662, "step": 22223 }, { "epoch": 0.47133676910351846, "grad_norm": 0.3174147605895996, "learning_rate": 1.7399557829701966e-05, "loss": 0.536, "step": 22224 }, { "epoch": 0.4713579775614515, "grad_norm": 0.4998728036880493, "learning_rate": 1.7399333498118702e-05, "loss": 0.502, "step": 22225 }, { "epoch": 0.47137918601938456, "grad_norm": 0.33080849051475525, "learning_rate": 1.7399109158305964e-05, "loss": 0.4864, "step": 22226 }, { "epoch": 0.4714003944773176, "grad_norm": 0.40105411410331726, "learning_rate": 1.7398884810264003e-05, "loss": 0.4594, "step": 22227 }, { "epoch": 0.4714216029352506, "grad_norm": 0.3220251202583313, "learning_rate": 1.7398660453993065e-05, "loss": 0.5102, "step": 22228 }, { "epoch": 0.4714428113931836, "grad_norm": 0.4038117527961731, "learning_rate": 1.73984360894934e-05, "loss": 0.4705, "step": 22229 }, { "epoch": 0.47146401985111663, "grad_norm": 0.3263993561267853, "learning_rate": 1.7398211716765262e-05, "loss": 0.4841, "step": 22230 }, { "epoch": 0.47148522830904965, "grad_norm": 0.3495146930217743, "learning_rate": 1.7397987335808895e-05, "loss": 0.4684, "step": 22231 }, { "epoch": 0.4715064367669827, "grad_norm": 0.39239501953125, "learning_rate": 1.739776294662455e-05, "loss": 0.4708, "step": 22232 }, { "epoch": 0.4715276452249157, "grad_norm": 0.35069331526756287, "learning_rate": 1.7397538549212475e-05, "loss": 0.5096, "step": 22233 }, { "epoch": 0.4715488536828487, "grad_norm": 0.32424548268318176, "learning_rate": 1.739731414357293e-05, "loss": 0.4773, "step": 22234 }, { "epoch": 0.47157006214078173, "grad_norm": 0.3372419476509094, "learning_rate": 1.739708972970615e-05, "loss": 0.4843, "step": 22235 }, { "epoch": 0.47159127059871475, "grad_norm": 0.49184465408325195, "learning_rate": 1.7396865307612392e-05, "loss": 0.4962, "step": 22236 }, { "epoch": 0.47161247905664777, "grad_norm": 0.3583036959171295, "learning_rate": 1.7396640877291907e-05, "loss": 0.4811, "step": 22237 }, { "epoch": 0.4716336875145808, "grad_norm": 0.2938888967037201, "learning_rate": 1.7396416438744942e-05, "loss": 0.4161, "step": 22238 }, { "epoch": 0.47165489597251387, "grad_norm": 0.3236910104751587, "learning_rate": 1.7396191991971743e-05, "loss": 0.4873, "step": 22239 }, { "epoch": 0.4716761044304469, "grad_norm": 0.3493803143501282, "learning_rate": 1.7395967536972563e-05, "loss": 0.5542, "step": 22240 }, { "epoch": 0.4716973128883799, "grad_norm": 0.3806806206703186, "learning_rate": 1.739574307374766e-05, "loss": 0.4938, "step": 22241 }, { "epoch": 0.4717185213463129, "grad_norm": 0.3404557406902313, "learning_rate": 1.7395518602297267e-05, "loss": 0.5296, "step": 22242 }, { "epoch": 0.47173972980424594, "grad_norm": 0.3334519565105438, "learning_rate": 1.7395294122621646e-05, "loss": 0.4795, "step": 22243 }, { "epoch": 0.47176093826217896, "grad_norm": 0.3404923379421234, "learning_rate": 1.7395069634721043e-05, "loss": 0.4436, "step": 22244 }, { "epoch": 0.471782146720112, "grad_norm": 0.35578566789627075, "learning_rate": 1.739484513859571e-05, "loss": 0.554, "step": 22245 }, { "epoch": 0.471803355178045, "grad_norm": 0.3415120542049408, "learning_rate": 1.7394620634245893e-05, "loss": 0.4363, "step": 22246 }, { "epoch": 0.471824563635978, "grad_norm": 0.34084439277648926, "learning_rate": 1.7394396121671844e-05, "loss": 0.5242, "step": 22247 }, { "epoch": 0.47184577209391104, "grad_norm": 0.31267908215522766, "learning_rate": 1.739417160087381e-05, "loss": 0.4467, "step": 22248 }, { "epoch": 0.47186698055184406, "grad_norm": 0.35475292801856995, "learning_rate": 1.739394707185204e-05, "loss": 0.5544, "step": 22249 }, { "epoch": 0.4718881890097771, "grad_norm": 0.38475027680397034, "learning_rate": 1.7393722534606792e-05, "loss": 0.5734, "step": 22250 }, { "epoch": 0.4719093974677101, "grad_norm": 0.3483087718486786, "learning_rate": 1.739349798913831e-05, "loss": 0.4826, "step": 22251 }, { "epoch": 0.4719306059256431, "grad_norm": 0.39996472001075745, "learning_rate": 1.739327343544684e-05, "loss": 0.4794, "step": 22252 }, { "epoch": 0.4719518143835762, "grad_norm": 0.3753780722618103, "learning_rate": 1.7393048873532636e-05, "loss": 0.4903, "step": 22253 }, { "epoch": 0.4719730228415092, "grad_norm": 0.356161504983902, "learning_rate": 1.739282430339595e-05, "loss": 0.5029, "step": 22254 }, { "epoch": 0.47199423129944224, "grad_norm": 0.3520353138446808, "learning_rate": 1.739259972503703e-05, "loss": 0.5207, "step": 22255 }, { "epoch": 0.47201543975737525, "grad_norm": 0.3270376920700073, "learning_rate": 1.739237513845612e-05, "loss": 0.4751, "step": 22256 }, { "epoch": 0.4720366482153083, "grad_norm": 0.40913403034210205, "learning_rate": 1.7392150543653477e-05, "loss": 0.6343, "step": 22257 }, { "epoch": 0.4720578566732413, "grad_norm": 0.36104533076286316, "learning_rate": 1.7391925940629346e-05, "loss": 0.505, "step": 22258 }, { "epoch": 0.4720790651311743, "grad_norm": 0.34615686535835266, "learning_rate": 1.7391701329383982e-05, "loss": 0.5443, "step": 22259 }, { "epoch": 0.47210027358910733, "grad_norm": 0.3789295256137848, "learning_rate": 1.739147670991763e-05, "loss": 0.5341, "step": 22260 }, { "epoch": 0.47212148204704035, "grad_norm": 0.34219878911972046, "learning_rate": 1.7391252082230544e-05, "loss": 0.4709, "step": 22261 }, { "epoch": 0.4721426905049734, "grad_norm": 0.34040695428848267, "learning_rate": 1.7391027446322972e-05, "loss": 0.4688, "step": 22262 }, { "epoch": 0.4721638989629064, "grad_norm": 0.36041733622550964, "learning_rate": 1.739080280219516e-05, "loss": 0.5528, "step": 22263 }, { "epoch": 0.4721851074208394, "grad_norm": 0.3397558331489563, "learning_rate": 1.7390578149847364e-05, "loss": 0.4667, "step": 22264 }, { "epoch": 0.47220631587877243, "grad_norm": 0.34947124123573303, "learning_rate": 1.739035348927983e-05, "loss": 0.4617, "step": 22265 }, { "epoch": 0.47222752433670545, "grad_norm": 0.34484389424324036, "learning_rate": 1.739012882049281e-05, "loss": 0.5235, "step": 22266 }, { "epoch": 0.4722487327946385, "grad_norm": 0.36739176511764526, "learning_rate": 1.7389904143486553e-05, "loss": 0.4523, "step": 22267 }, { "epoch": 0.47226994125257155, "grad_norm": 0.37877076864242554, "learning_rate": 1.738967945826131e-05, "loss": 0.4784, "step": 22268 }, { "epoch": 0.47229114971050457, "grad_norm": 0.3997677266597748, "learning_rate": 1.7389454764817328e-05, "loss": 0.4929, "step": 22269 }, { "epoch": 0.4723123581684376, "grad_norm": 0.5027230978012085, "learning_rate": 1.738923006315486e-05, "loss": 0.4823, "step": 22270 }, { "epoch": 0.4723335666263706, "grad_norm": 0.33281242847442627, "learning_rate": 1.738900535327415e-05, "loss": 0.4824, "step": 22271 }, { "epoch": 0.4723547750843036, "grad_norm": 0.6037423610687256, "learning_rate": 1.7388780635175456e-05, "loss": 0.5266, "step": 22272 }, { "epoch": 0.47237598354223664, "grad_norm": 0.3531511127948761, "learning_rate": 1.7388555908859026e-05, "loss": 0.5628, "step": 22273 }, { "epoch": 0.47239719200016966, "grad_norm": 0.6384517550468445, "learning_rate": 1.7388331174325105e-05, "loss": 0.57, "step": 22274 }, { "epoch": 0.4724184004581027, "grad_norm": 0.3500926196575165, "learning_rate": 1.738810643157395e-05, "loss": 0.4277, "step": 22275 }, { "epoch": 0.4724396089160357, "grad_norm": 0.5969010591506958, "learning_rate": 1.73878816806058e-05, "loss": 0.4461, "step": 22276 }, { "epoch": 0.4724608173739687, "grad_norm": 0.33997100591659546, "learning_rate": 1.738765692142092e-05, "loss": 0.514, "step": 22277 }, { "epoch": 0.47248202583190174, "grad_norm": 0.3426109254360199, "learning_rate": 1.738743215401955e-05, "loss": 0.3404, "step": 22278 }, { "epoch": 0.47250323428983476, "grad_norm": 0.32421088218688965, "learning_rate": 1.738720737840194e-05, "loss": 0.4279, "step": 22279 }, { "epoch": 0.47252444274776784, "grad_norm": 0.5606143474578857, "learning_rate": 1.7386982594568345e-05, "loss": 0.5762, "step": 22280 }, { "epoch": 0.47254565120570086, "grad_norm": 0.33913183212280273, "learning_rate": 1.738675780251901e-05, "loss": 0.5064, "step": 22281 }, { "epoch": 0.4725668596636339, "grad_norm": 0.4735927879810333, "learning_rate": 1.7386533002254186e-05, "loss": 0.4707, "step": 22282 }, { "epoch": 0.4725880681215669, "grad_norm": 0.3781161308288574, "learning_rate": 1.738630819377413e-05, "loss": 0.5059, "step": 22283 }, { "epoch": 0.4726092765794999, "grad_norm": 0.3662513196468353, "learning_rate": 1.7386083377079078e-05, "loss": 0.5716, "step": 22284 }, { "epoch": 0.47263048503743293, "grad_norm": 0.3532477021217346, "learning_rate": 1.7385858552169293e-05, "loss": 0.4977, "step": 22285 }, { "epoch": 0.47265169349536595, "grad_norm": 0.32643839716911316, "learning_rate": 1.738563371904502e-05, "loss": 0.4546, "step": 22286 }, { "epoch": 0.472672901953299, "grad_norm": 0.3531193733215332, "learning_rate": 1.738540887770651e-05, "loss": 0.4758, "step": 22287 }, { "epoch": 0.472694110411232, "grad_norm": 0.4437708556652069, "learning_rate": 1.738518402815401e-05, "loss": 0.4682, "step": 22288 }, { "epoch": 0.472715318869165, "grad_norm": 0.34218406677246094, "learning_rate": 1.7384959170387774e-05, "loss": 0.545, "step": 22289 }, { "epoch": 0.47273652732709803, "grad_norm": 0.3806132674217224, "learning_rate": 1.7384734304408052e-05, "loss": 0.5036, "step": 22290 }, { "epoch": 0.47275773578503105, "grad_norm": 0.3100271224975586, "learning_rate": 1.738450943021509e-05, "loss": 0.4649, "step": 22291 }, { "epoch": 0.47277894424296407, "grad_norm": 0.4211312234401703, "learning_rate": 1.738428454780914e-05, "loss": 0.4685, "step": 22292 }, { "epoch": 0.4728001527008971, "grad_norm": 0.3378954231739044, "learning_rate": 1.7384059657190458e-05, "loss": 0.5339, "step": 22293 }, { "epoch": 0.47282136115883017, "grad_norm": 0.3424840271472931, "learning_rate": 1.7383834758359285e-05, "loss": 0.5138, "step": 22294 }, { "epoch": 0.4728425696167632, "grad_norm": 0.33145958185195923, "learning_rate": 1.7383609851315873e-05, "loss": 0.5766, "step": 22295 }, { "epoch": 0.4728637780746962, "grad_norm": 0.3411048352718353, "learning_rate": 1.738338493606048e-05, "loss": 0.4904, "step": 22296 }, { "epoch": 0.4728849865326292, "grad_norm": 0.38815274834632874, "learning_rate": 1.7383160012593347e-05, "loss": 0.4868, "step": 22297 }, { "epoch": 0.47290619499056225, "grad_norm": 0.30902576446533203, "learning_rate": 1.7382935080914728e-05, "loss": 0.4093, "step": 22298 }, { "epoch": 0.47292740344849526, "grad_norm": 0.3704397976398468, "learning_rate": 1.7382710141024873e-05, "loss": 0.4937, "step": 22299 }, { "epoch": 0.4729486119064283, "grad_norm": 0.3290858268737793, "learning_rate": 1.738248519292403e-05, "loss": 0.4379, "step": 22300 }, { "epoch": 0.4729698203643613, "grad_norm": 0.4678420424461365, "learning_rate": 1.7382260236612455e-05, "loss": 0.5268, "step": 22301 }, { "epoch": 0.4729910288222943, "grad_norm": 0.33842286467552185, "learning_rate": 1.738203527209039e-05, "loss": 0.503, "step": 22302 }, { "epoch": 0.47301223728022734, "grad_norm": 0.3551548719406128, "learning_rate": 1.7381810299358094e-05, "loss": 0.5531, "step": 22303 }, { "epoch": 0.47303344573816036, "grad_norm": 0.3494158983230591, "learning_rate": 1.7381585318415808e-05, "loss": 0.5127, "step": 22304 }, { "epoch": 0.4730546541960934, "grad_norm": 0.33594006299972534, "learning_rate": 1.738136032926379e-05, "loss": 0.4808, "step": 22305 }, { "epoch": 0.4730758626540264, "grad_norm": 0.34123387932777405, "learning_rate": 1.7381135331902286e-05, "loss": 0.4634, "step": 22306 }, { "epoch": 0.4730970711119594, "grad_norm": 0.34591948986053467, "learning_rate": 1.7380910326331548e-05, "loss": 0.4629, "step": 22307 }, { "epoch": 0.4731182795698925, "grad_norm": 0.3416807949542999, "learning_rate": 1.7380685312551824e-05, "loss": 0.6487, "step": 22308 }, { "epoch": 0.4731394880278255, "grad_norm": 0.4805546998977661, "learning_rate": 1.7380460290563368e-05, "loss": 0.4992, "step": 22309 }, { "epoch": 0.47316069648575854, "grad_norm": 0.3392867147922516, "learning_rate": 1.7380235260366428e-05, "loss": 0.492, "step": 22310 }, { "epoch": 0.47318190494369156, "grad_norm": 0.39914849400520325, "learning_rate": 1.7380010221961254e-05, "loss": 0.4752, "step": 22311 }, { "epoch": 0.4732031134016246, "grad_norm": 0.39424169063568115, "learning_rate": 1.7379785175348097e-05, "loss": 0.5119, "step": 22312 }, { "epoch": 0.4732243218595576, "grad_norm": 0.3495161235332489, "learning_rate": 1.7379560120527207e-05, "loss": 0.5575, "step": 22313 }, { "epoch": 0.4732455303174906, "grad_norm": 0.3990658223628998, "learning_rate": 1.7379335057498833e-05, "loss": 0.4893, "step": 22314 }, { "epoch": 0.47326673877542363, "grad_norm": 0.4096420109272003, "learning_rate": 1.7379109986263228e-05, "loss": 0.3636, "step": 22315 }, { "epoch": 0.47328794723335665, "grad_norm": 0.30823880434036255, "learning_rate": 1.7378884906820643e-05, "loss": 0.5183, "step": 22316 }, { "epoch": 0.4733091556912897, "grad_norm": 0.32492348551750183, "learning_rate": 1.7378659819171324e-05, "loss": 0.4684, "step": 22317 }, { "epoch": 0.4733303641492227, "grad_norm": 0.4087267518043518, "learning_rate": 1.7378434723315524e-05, "loss": 0.6019, "step": 22318 }, { "epoch": 0.4733515726071557, "grad_norm": 0.374656617641449, "learning_rate": 1.737820961925349e-05, "loss": 0.5201, "step": 22319 }, { "epoch": 0.47337278106508873, "grad_norm": 0.32841163873672485, "learning_rate": 1.737798450698548e-05, "loss": 0.4704, "step": 22320 }, { "epoch": 0.4733939895230218, "grad_norm": 0.3256062865257263, "learning_rate": 1.7377759386511738e-05, "loss": 0.478, "step": 22321 }, { "epoch": 0.4734151979809548, "grad_norm": 0.36127808690071106, "learning_rate": 1.7377534257832517e-05, "loss": 0.5533, "step": 22322 }, { "epoch": 0.47343640643888785, "grad_norm": 0.34728899598121643, "learning_rate": 1.7377309120948063e-05, "loss": 0.5249, "step": 22323 }, { "epoch": 0.47345761489682087, "grad_norm": 0.5693883299827576, "learning_rate": 1.7377083975858632e-05, "loss": 0.5414, "step": 22324 }, { "epoch": 0.4734788233547539, "grad_norm": 0.3481392562389374, "learning_rate": 1.7376858822564474e-05, "loss": 0.4957, "step": 22325 }, { "epoch": 0.4735000318126869, "grad_norm": 0.35781607031822205, "learning_rate": 1.7376633661065833e-05, "loss": 0.5745, "step": 22326 }, { "epoch": 0.4735212402706199, "grad_norm": 0.33759090304374695, "learning_rate": 1.737640849136297e-05, "loss": 0.5333, "step": 22327 }, { "epoch": 0.47354244872855294, "grad_norm": 0.34753429889678955, "learning_rate": 1.7376183313456128e-05, "loss": 0.4937, "step": 22328 }, { "epoch": 0.47356365718648596, "grad_norm": 0.38911786675453186, "learning_rate": 1.7375958127345555e-05, "loss": 0.4415, "step": 22329 }, { "epoch": 0.473584865644419, "grad_norm": 1.4737169742584229, "learning_rate": 1.737573293303151e-05, "loss": 0.5285, "step": 22330 }, { "epoch": 0.473606074102352, "grad_norm": 0.4016357362270355, "learning_rate": 1.7375507730514237e-05, "loss": 0.5991, "step": 22331 }, { "epoch": 0.473627282560285, "grad_norm": 0.41394755244255066, "learning_rate": 1.737528251979399e-05, "loss": 0.5085, "step": 22332 }, { "epoch": 0.47364849101821804, "grad_norm": 0.31804296374320984, "learning_rate": 1.7375057300871014e-05, "loss": 0.4915, "step": 22333 }, { "epoch": 0.47366969947615106, "grad_norm": 0.3662821650505066, "learning_rate": 1.7374832073745568e-05, "loss": 0.497, "step": 22334 }, { "epoch": 0.47369090793408414, "grad_norm": 0.36183401942253113, "learning_rate": 1.7374606838417895e-05, "loss": 0.4629, "step": 22335 }, { "epoch": 0.47371211639201716, "grad_norm": 0.38001328706741333, "learning_rate": 1.7374381594888246e-05, "loss": 0.4241, "step": 22336 }, { "epoch": 0.4737333248499502, "grad_norm": 0.41321316361427307, "learning_rate": 1.737415634315688e-05, "loss": 0.5555, "step": 22337 }, { "epoch": 0.4737545333078832, "grad_norm": 0.3699810802936554, "learning_rate": 1.7373931083224038e-05, "loss": 0.5843, "step": 22338 }, { "epoch": 0.4737757417658162, "grad_norm": 0.36330392956733704, "learning_rate": 1.7373705815089972e-05, "loss": 0.5122, "step": 22339 }, { "epoch": 0.47379695022374924, "grad_norm": 0.3342965841293335, "learning_rate": 1.7373480538754938e-05, "loss": 0.4979, "step": 22340 }, { "epoch": 0.47381815868168226, "grad_norm": 0.44332873821258545, "learning_rate": 1.7373255254219184e-05, "loss": 0.4628, "step": 22341 }, { "epoch": 0.4738393671396153, "grad_norm": 0.3901708126068115, "learning_rate": 1.7373029961482955e-05, "loss": 0.6039, "step": 22342 }, { "epoch": 0.4738605755975483, "grad_norm": 0.3468768000602722, "learning_rate": 1.7372804660546506e-05, "loss": 0.5144, "step": 22343 }, { "epoch": 0.4738817840554813, "grad_norm": 0.3061918616294861, "learning_rate": 1.7372579351410093e-05, "loss": 0.4193, "step": 22344 }, { "epoch": 0.47390299251341433, "grad_norm": 0.35696902871131897, "learning_rate": 1.7372354034073958e-05, "loss": 0.5099, "step": 22345 }, { "epoch": 0.47392420097134735, "grad_norm": 0.3345535099506378, "learning_rate": 1.7372128708538353e-05, "loss": 0.5254, "step": 22346 }, { "epoch": 0.4739454094292804, "grad_norm": 0.4040021002292633, "learning_rate": 1.7371903374803535e-05, "loss": 0.5064, "step": 22347 }, { "epoch": 0.47396661788721345, "grad_norm": 0.3841414749622345, "learning_rate": 1.737167803286975e-05, "loss": 0.5573, "step": 22348 }, { "epoch": 0.47398782634514647, "grad_norm": 0.3721901476383209, "learning_rate": 1.7371452682737243e-05, "loss": 0.5632, "step": 22349 }, { "epoch": 0.4740090348030795, "grad_norm": 0.410734087228775, "learning_rate": 1.737122732440627e-05, "loss": 0.5118, "step": 22350 }, { "epoch": 0.4740302432610125, "grad_norm": 0.38882455229759216, "learning_rate": 1.737100195787709e-05, "loss": 0.657, "step": 22351 }, { "epoch": 0.4740514517189455, "grad_norm": 0.34835731983184814, "learning_rate": 1.737077658314994e-05, "loss": 0.4821, "step": 22352 }, { "epoch": 0.47407266017687855, "grad_norm": 0.32962852716445923, "learning_rate": 1.737055120022508e-05, "loss": 0.4752, "step": 22353 }, { "epoch": 0.47409386863481157, "grad_norm": 0.39333033561706543, "learning_rate": 1.737032580910275e-05, "loss": 0.5626, "step": 22354 }, { "epoch": 0.4741150770927446, "grad_norm": 0.37956541776657104, "learning_rate": 1.7370100409783215e-05, "loss": 0.4868, "step": 22355 }, { "epoch": 0.4741362855506776, "grad_norm": 0.3629648983478546, "learning_rate": 1.7369875002266717e-05, "loss": 0.4767, "step": 22356 }, { "epoch": 0.4741574940086106, "grad_norm": 0.39168334007263184, "learning_rate": 1.736964958655351e-05, "loss": 0.5095, "step": 22357 }, { "epoch": 0.47417870246654364, "grad_norm": 0.3446965515613556, "learning_rate": 1.7369424162643837e-05, "loss": 0.5451, "step": 22358 }, { "epoch": 0.47419991092447666, "grad_norm": 0.3360268771648407, "learning_rate": 1.7369198730537956e-05, "loss": 0.4776, "step": 22359 }, { "epoch": 0.4742211193824097, "grad_norm": 0.562110960483551, "learning_rate": 1.7368973290236116e-05, "loss": 0.5403, "step": 22360 }, { "epoch": 0.4742423278403427, "grad_norm": 0.4876566231250763, "learning_rate": 1.736874784173857e-05, "loss": 0.4748, "step": 22361 }, { "epoch": 0.4742635362982758, "grad_norm": 0.371065229177475, "learning_rate": 1.7368522385045567e-05, "loss": 0.467, "step": 22362 }, { "epoch": 0.4742847447562088, "grad_norm": 0.3752537965774536, "learning_rate": 1.7368296920157356e-05, "loss": 0.4708, "step": 22363 }, { "epoch": 0.4743059532141418, "grad_norm": 0.3812013566493988, "learning_rate": 1.736807144707419e-05, "loss": 0.539, "step": 22364 }, { "epoch": 0.47432716167207484, "grad_norm": 0.3657604157924652, "learning_rate": 1.7367845965796317e-05, "loss": 0.515, "step": 22365 }, { "epoch": 0.47434837013000786, "grad_norm": 0.3541681468486786, "learning_rate": 1.736762047632399e-05, "loss": 0.4895, "step": 22366 }, { "epoch": 0.4743695785879409, "grad_norm": 0.35595250129699707, "learning_rate": 1.7367394978657462e-05, "loss": 0.5593, "step": 22367 }, { "epoch": 0.4743907870458739, "grad_norm": 0.3799729645252228, "learning_rate": 1.736716947279698e-05, "loss": 0.5063, "step": 22368 }, { "epoch": 0.4744119955038069, "grad_norm": 0.36449766159057617, "learning_rate": 1.7366943958742795e-05, "loss": 0.5269, "step": 22369 }, { "epoch": 0.47443320396173994, "grad_norm": 0.32382696866989136, "learning_rate": 1.736671843649516e-05, "loss": 0.5757, "step": 22370 }, { "epoch": 0.47445441241967296, "grad_norm": 0.38500532507896423, "learning_rate": 1.7366492906054327e-05, "loss": 0.5656, "step": 22371 }, { "epoch": 0.474475620877606, "grad_norm": 0.353240966796875, "learning_rate": 1.736626736742054e-05, "loss": 0.5631, "step": 22372 }, { "epoch": 0.474496829335539, "grad_norm": 0.34144389629364014, "learning_rate": 1.736604182059406e-05, "loss": 0.4946, "step": 22373 }, { "epoch": 0.474518037793472, "grad_norm": 0.3514881134033203, "learning_rate": 1.7365816265575125e-05, "loss": 0.5397, "step": 22374 }, { "epoch": 0.47453924625140503, "grad_norm": 0.34326452016830444, "learning_rate": 1.7365590702364e-05, "loss": 0.5369, "step": 22375 }, { "epoch": 0.4745604547093381, "grad_norm": 0.3775101602077484, "learning_rate": 1.7365365130960925e-05, "loss": 0.5435, "step": 22376 }, { "epoch": 0.47458166316727113, "grad_norm": 0.32761111855506897, "learning_rate": 1.7365139551366157e-05, "loss": 0.4471, "step": 22377 }, { "epoch": 0.47460287162520415, "grad_norm": 0.35638514161109924, "learning_rate": 1.7364913963579943e-05, "loss": 0.4946, "step": 22378 }, { "epoch": 0.47462408008313717, "grad_norm": 0.34253108501434326, "learning_rate": 1.7364688367602535e-05, "loss": 0.4753, "step": 22379 }, { "epoch": 0.4746452885410702, "grad_norm": 0.3948400914669037, "learning_rate": 1.736446276343419e-05, "loss": 0.5632, "step": 22380 }, { "epoch": 0.4746664969990032, "grad_norm": 0.3565034866333008, "learning_rate": 1.7364237151075147e-05, "loss": 0.5497, "step": 22381 }, { "epoch": 0.4746877054569362, "grad_norm": 0.32451722025871277, "learning_rate": 1.7364011530525665e-05, "loss": 0.5193, "step": 22382 }, { "epoch": 0.47470891391486925, "grad_norm": 0.35467201471328735, "learning_rate": 1.7363785901785998e-05, "loss": 0.4588, "step": 22383 }, { "epoch": 0.47473012237280227, "grad_norm": 0.34595856070518494, "learning_rate": 1.7363560264856386e-05, "loss": 0.4882, "step": 22384 }, { "epoch": 0.4747513308307353, "grad_norm": 0.41084033250808716, "learning_rate": 1.736333461973709e-05, "loss": 0.4179, "step": 22385 }, { "epoch": 0.4747725392886683, "grad_norm": 0.3361953794956207, "learning_rate": 1.7363108966428355e-05, "loss": 0.5546, "step": 22386 }, { "epoch": 0.4747937477466013, "grad_norm": 0.36274081468582153, "learning_rate": 1.7362883304930434e-05, "loss": 0.5163, "step": 22387 }, { "epoch": 0.47481495620453434, "grad_norm": 0.39058130979537964, "learning_rate": 1.736265763524358e-05, "loss": 0.5512, "step": 22388 }, { "epoch": 0.4748361646624674, "grad_norm": 0.3623443841934204, "learning_rate": 1.736243195736804e-05, "loss": 0.495, "step": 22389 }, { "epoch": 0.47485737312040044, "grad_norm": 0.3666553795337677, "learning_rate": 1.736220627130407e-05, "loss": 0.4917, "step": 22390 }, { "epoch": 0.47487858157833346, "grad_norm": 0.3300166726112366, "learning_rate": 1.7361980577051918e-05, "loss": 0.4798, "step": 22391 }, { "epoch": 0.4748997900362665, "grad_norm": 0.32851719856262207, "learning_rate": 1.7361754874611833e-05, "loss": 0.5065, "step": 22392 }, { "epoch": 0.4749209984941995, "grad_norm": 0.3276742696762085, "learning_rate": 1.7361529163984067e-05, "loss": 0.5401, "step": 22393 }, { "epoch": 0.4749422069521325, "grad_norm": 0.5766432881355286, "learning_rate": 1.7361303445168876e-05, "loss": 0.5402, "step": 22394 }, { "epoch": 0.47496341541006554, "grad_norm": 0.5455672144889832, "learning_rate": 1.7361077718166506e-05, "loss": 0.5487, "step": 22395 }, { "epoch": 0.47498462386799856, "grad_norm": 0.34749579429626465, "learning_rate": 1.736085198297721e-05, "loss": 0.452, "step": 22396 }, { "epoch": 0.4750058323259316, "grad_norm": 0.35050639510154724, "learning_rate": 1.736062623960124e-05, "loss": 0.5673, "step": 22397 }, { "epoch": 0.4750270407838646, "grad_norm": 0.5436002016067505, "learning_rate": 1.7360400488038836e-05, "loss": 0.5296, "step": 22398 }, { "epoch": 0.4750482492417976, "grad_norm": 0.38068318367004395, "learning_rate": 1.7360174728290267e-05, "loss": 0.5401, "step": 22399 }, { "epoch": 0.47506945769973064, "grad_norm": 0.3891787528991699, "learning_rate": 1.7359948960355775e-05, "loss": 0.4823, "step": 22400 }, { "epoch": 0.47509066615766365, "grad_norm": 0.4027126729488373, "learning_rate": 1.7359723184235608e-05, "loss": 0.5198, "step": 22401 }, { "epoch": 0.4751118746155967, "grad_norm": 0.3236767053604126, "learning_rate": 1.7359497399930025e-05, "loss": 0.4502, "step": 22402 }, { "epoch": 0.47513308307352975, "grad_norm": 0.38397499918937683, "learning_rate": 1.7359271607439272e-05, "loss": 0.4884, "step": 22403 }, { "epoch": 0.47515429153146277, "grad_norm": 0.39994993805885315, "learning_rate": 1.7359045806763602e-05, "loss": 0.6193, "step": 22404 }, { "epoch": 0.4751754999893958, "grad_norm": 0.3595651388168335, "learning_rate": 1.7358819997903263e-05, "loss": 0.5362, "step": 22405 }, { "epoch": 0.4751967084473288, "grad_norm": 0.4508213996887207, "learning_rate": 1.7358594180858506e-05, "loss": 0.533, "step": 22406 }, { "epoch": 0.47521791690526183, "grad_norm": 0.3412393033504486, "learning_rate": 1.735836835562959e-05, "loss": 0.5599, "step": 22407 }, { "epoch": 0.47523912536319485, "grad_norm": 0.41576701402664185, "learning_rate": 1.7358142522216758e-05, "loss": 0.5198, "step": 22408 }, { "epoch": 0.47526033382112787, "grad_norm": 0.40072348713874817, "learning_rate": 1.7357916680620263e-05, "loss": 0.5145, "step": 22409 }, { "epoch": 0.4752815422790609, "grad_norm": 0.4606415033340454, "learning_rate": 1.7357690830840358e-05, "loss": 0.4483, "step": 22410 }, { "epoch": 0.4753027507369939, "grad_norm": 0.3329381048679352, "learning_rate": 1.7357464972877292e-05, "loss": 0.443, "step": 22411 }, { "epoch": 0.4753239591949269, "grad_norm": 0.34821414947509766, "learning_rate": 1.735723910673132e-05, "loss": 0.4863, "step": 22412 }, { "epoch": 0.47534516765285995, "grad_norm": 0.3793768882751465, "learning_rate": 1.7357013232402685e-05, "loss": 0.4991, "step": 22413 }, { "epoch": 0.47536637611079297, "grad_norm": 0.9977331161499023, "learning_rate": 1.735678734989165e-05, "loss": 0.5202, "step": 22414 }, { "epoch": 0.475387584568726, "grad_norm": 0.3392932415008545, "learning_rate": 1.7356561459198457e-05, "loss": 0.5158, "step": 22415 }, { "epoch": 0.475408793026659, "grad_norm": 0.36003708839416504, "learning_rate": 1.735633556032336e-05, "loss": 0.4799, "step": 22416 }, { "epoch": 0.4754300014845921, "grad_norm": 0.41002148389816284, "learning_rate": 1.7356109653266615e-05, "loss": 0.5029, "step": 22417 }, { "epoch": 0.4754512099425251, "grad_norm": 0.3930598795413971, "learning_rate": 1.735588373802846e-05, "loss": 0.4251, "step": 22418 }, { "epoch": 0.4754724184004581, "grad_norm": 0.37628406286239624, "learning_rate": 1.7355657814609163e-05, "loss": 0.4475, "step": 22419 }, { "epoch": 0.47549362685839114, "grad_norm": 0.37346360087394714, "learning_rate": 1.7355431883008965e-05, "loss": 0.5353, "step": 22420 }, { "epoch": 0.47551483531632416, "grad_norm": 0.33902353048324585, "learning_rate": 1.7355205943228117e-05, "loss": 0.5278, "step": 22421 }, { "epoch": 0.4755360437742572, "grad_norm": 0.309482604265213, "learning_rate": 1.7354979995266872e-05, "loss": 0.3616, "step": 22422 }, { "epoch": 0.4755572522321902, "grad_norm": 0.33231425285339355, "learning_rate": 1.735475403912549e-05, "loss": 0.4935, "step": 22423 }, { "epoch": 0.4755784606901232, "grad_norm": 0.32574716210365295, "learning_rate": 1.7354528074804208e-05, "loss": 0.4809, "step": 22424 }, { "epoch": 0.47559966914805624, "grad_norm": 0.36449384689331055, "learning_rate": 1.7354302102303284e-05, "loss": 0.4081, "step": 22425 }, { "epoch": 0.47562087760598926, "grad_norm": 0.34302520751953125, "learning_rate": 1.7354076121622972e-05, "loss": 0.483, "step": 22426 }, { "epoch": 0.4756420860639223, "grad_norm": 0.3326762914657593, "learning_rate": 1.7353850132763517e-05, "loss": 0.5196, "step": 22427 }, { "epoch": 0.4756632945218553, "grad_norm": 0.33958113193511963, "learning_rate": 1.7353624135725176e-05, "loss": 0.541, "step": 22428 }, { "epoch": 0.4756845029797883, "grad_norm": 0.3667152225971222, "learning_rate": 1.7353398130508197e-05, "loss": 0.468, "step": 22429 }, { "epoch": 0.4757057114377214, "grad_norm": 0.37326446175575256, "learning_rate": 1.7353172117112834e-05, "loss": 0.4917, "step": 22430 }, { "epoch": 0.4757269198956544, "grad_norm": 0.42289653420448303, "learning_rate": 1.7352946095539337e-05, "loss": 0.4968, "step": 22431 }, { "epoch": 0.47574812835358743, "grad_norm": 0.4432523250579834, "learning_rate": 1.7352720065787957e-05, "loss": 0.4692, "step": 22432 }, { "epoch": 0.47576933681152045, "grad_norm": 0.34693828225135803, "learning_rate": 1.735249402785894e-05, "loss": 0.5436, "step": 22433 }, { "epoch": 0.47579054526945347, "grad_norm": 0.39761361479759216, "learning_rate": 1.735226798175255e-05, "loss": 0.512, "step": 22434 }, { "epoch": 0.4758117537273865, "grad_norm": 0.31133779883384705, "learning_rate": 1.735204192746903e-05, "loss": 0.4717, "step": 22435 }, { "epoch": 0.4758329621853195, "grad_norm": 0.3547459542751312, "learning_rate": 1.735181586500863e-05, "loss": 0.5668, "step": 22436 }, { "epoch": 0.4758541706432525, "grad_norm": 0.4004812240600586, "learning_rate": 1.735158979437161e-05, "loss": 0.4437, "step": 22437 }, { "epoch": 0.47587537910118555, "grad_norm": 0.44402769207954407, "learning_rate": 1.7351363715558208e-05, "loss": 0.5261, "step": 22438 }, { "epoch": 0.47589658755911857, "grad_norm": 0.34681588411331177, "learning_rate": 1.7351137628568688e-05, "loss": 0.4819, "step": 22439 }, { "epoch": 0.4759177960170516, "grad_norm": 0.4291267693042755, "learning_rate": 1.7350911533403295e-05, "loss": 0.5714, "step": 22440 }, { "epoch": 0.4759390044749846, "grad_norm": 0.3804902732372284, "learning_rate": 1.7350685430062285e-05, "loss": 0.5345, "step": 22441 }, { "epoch": 0.4759602129329176, "grad_norm": 0.3668915331363678, "learning_rate": 1.7350459318545904e-05, "loss": 0.5018, "step": 22442 }, { "epoch": 0.47598142139085065, "grad_norm": 0.41399675607681274, "learning_rate": 1.7350233198854404e-05, "loss": 0.5072, "step": 22443 }, { "epoch": 0.4760026298487837, "grad_norm": 0.360017329454422, "learning_rate": 1.7350007070988043e-05, "loss": 0.5739, "step": 22444 }, { "epoch": 0.47602383830671674, "grad_norm": 0.3317897319793701, "learning_rate": 1.7349780934947066e-05, "loss": 0.4475, "step": 22445 }, { "epoch": 0.47604504676464976, "grad_norm": 0.3410682678222656, "learning_rate": 1.7349554790731726e-05, "loss": 0.5109, "step": 22446 }, { "epoch": 0.4760662552225828, "grad_norm": 0.35730817914009094, "learning_rate": 1.7349328638342274e-05, "loss": 0.5652, "step": 22447 }, { "epoch": 0.4760874636805158, "grad_norm": 0.3558325171470642, "learning_rate": 1.7349102477778965e-05, "loss": 0.4232, "step": 22448 }, { "epoch": 0.4761086721384488, "grad_norm": 0.34331727027893066, "learning_rate": 1.7348876309042048e-05, "loss": 0.5174, "step": 22449 }, { "epoch": 0.47612988059638184, "grad_norm": 0.4288939833641052, "learning_rate": 1.734865013213177e-05, "loss": 0.4797, "step": 22450 }, { "epoch": 0.47615108905431486, "grad_norm": 0.35828620195388794, "learning_rate": 1.7348423947048393e-05, "loss": 0.467, "step": 22451 }, { "epoch": 0.4761722975122479, "grad_norm": 0.4305664896965027, "learning_rate": 1.734819775379216e-05, "loss": 0.4518, "step": 22452 }, { "epoch": 0.4761935059701809, "grad_norm": 0.37850597500801086, "learning_rate": 1.7347971552363325e-05, "loss": 0.4979, "step": 22453 }, { "epoch": 0.4762147144281139, "grad_norm": 0.3637659251689911, "learning_rate": 1.734774534276214e-05, "loss": 0.4931, "step": 22454 }, { "epoch": 0.47623592288604694, "grad_norm": 0.31556403636932373, "learning_rate": 1.7347519124988857e-05, "loss": 0.4584, "step": 22455 }, { "epoch": 0.47625713134397996, "grad_norm": 0.3558434247970581, "learning_rate": 1.734729289904373e-05, "loss": 0.4528, "step": 22456 }, { "epoch": 0.476278339801913, "grad_norm": 0.32806697487831116, "learning_rate": 1.7347066664927005e-05, "loss": 0.5098, "step": 22457 }, { "epoch": 0.47629954825984605, "grad_norm": 0.31579795479774475, "learning_rate": 1.7346840422638935e-05, "loss": 0.4579, "step": 22458 }, { "epoch": 0.47632075671777907, "grad_norm": 0.3255826532840729, "learning_rate": 1.734661417217977e-05, "loss": 0.4723, "step": 22459 }, { "epoch": 0.4763419651757121, "grad_norm": 0.3677617907524109, "learning_rate": 1.7346387913549772e-05, "loss": 0.531, "step": 22460 }, { "epoch": 0.4763631736336451, "grad_norm": 0.3654336631298065, "learning_rate": 1.734616164674918e-05, "loss": 0.5453, "step": 22461 }, { "epoch": 0.47638438209157813, "grad_norm": 0.3871535658836365, "learning_rate": 1.7345935371778253e-05, "loss": 0.5304, "step": 22462 }, { "epoch": 0.47640559054951115, "grad_norm": 0.4102748930454254, "learning_rate": 1.7345709088637238e-05, "loss": 0.6125, "step": 22463 }, { "epoch": 0.47642679900744417, "grad_norm": 0.39051011204719543, "learning_rate": 1.734548279732639e-05, "loss": 0.4994, "step": 22464 }, { "epoch": 0.4764480074653772, "grad_norm": 0.5161356329917908, "learning_rate": 1.7345256497845964e-05, "loss": 0.621, "step": 22465 }, { "epoch": 0.4764692159233102, "grad_norm": 0.3436158001422882, "learning_rate": 1.7345030190196203e-05, "loss": 0.4673, "step": 22466 }, { "epoch": 0.4764904243812432, "grad_norm": 0.336833655834198, "learning_rate": 1.7344803874377368e-05, "loss": 0.4659, "step": 22467 }, { "epoch": 0.47651163283917625, "grad_norm": 0.316231906414032, "learning_rate": 1.73445775503897e-05, "loss": 0.4899, "step": 22468 }, { "epoch": 0.47653284129710927, "grad_norm": 0.34299206733703613, "learning_rate": 1.734435121823346e-05, "loss": 0.5163, "step": 22469 }, { "epoch": 0.4765540497550423, "grad_norm": 0.3803994953632355, "learning_rate": 1.7344124877908895e-05, "loss": 0.4574, "step": 22470 }, { "epoch": 0.47657525821297536, "grad_norm": 0.3529617488384247, "learning_rate": 1.734389852941626e-05, "loss": 0.4713, "step": 22471 }, { "epoch": 0.4765964666709084, "grad_norm": 0.36900949478149414, "learning_rate": 1.7343672172755803e-05, "loss": 0.5913, "step": 22472 }, { "epoch": 0.4766176751288414, "grad_norm": 0.36993107199668884, "learning_rate": 1.7343445807927778e-05, "loss": 0.5105, "step": 22473 }, { "epoch": 0.4766388835867744, "grad_norm": 0.3396632969379425, "learning_rate": 1.7343219434932437e-05, "loss": 0.5043, "step": 22474 }, { "epoch": 0.47666009204470744, "grad_norm": 0.32913774251937866, "learning_rate": 1.7342993053770028e-05, "loss": 0.4466, "step": 22475 }, { "epoch": 0.47668130050264046, "grad_norm": 0.3339630663394928, "learning_rate": 1.7342766664440812e-05, "loss": 0.5774, "step": 22476 }, { "epoch": 0.4767025089605735, "grad_norm": 0.33174413442611694, "learning_rate": 1.734254026694503e-05, "loss": 0.48, "step": 22477 }, { "epoch": 0.4767237174185065, "grad_norm": 0.3862100839614868, "learning_rate": 1.734231386128294e-05, "loss": 0.5154, "step": 22478 }, { "epoch": 0.4767449258764395, "grad_norm": 0.43578964471817017, "learning_rate": 1.7342087447454793e-05, "loss": 0.462, "step": 22479 }, { "epoch": 0.47676613433437254, "grad_norm": 0.358185738325119, "learning_rate": 1.734186102546084e-05, "loss": 0.4992, "step": 22480 }, { "epoch": 0.47678734279230556, "grad_norm": 0.3193279504776001, "learning_rate": 1.734163459530133e-05, "loss": 0.5131, "step": 22481 }, { "epoch": 0.4768085512502386, "grad_norm": 0.31528234481811523, "learning_rate": 1.734140815697652e-05, "loss": 0.4433, "step": 22482 }, { "epoch": 0.4768297597081716, "grad_norm": 0.3988988399505615, "learning_rate": 1.734118171048666e-05, "loss": 0.3825, "step": 22483 }, { "epoch": 0.4768509681661046, "grad_norm": 0.4806719422340393, "learning_rate": 1.7340955255832003e-05, "loss": 0.5083, "step": 22484 }, { "epoch": 0.4768721766240377, "grad_norm": 0.3974347710609436, "learning_rate": 1.7340728793012796e-05, "loss": 0.5285, "step": 22485 }, { "epoch": 0.4768933850819707, "grad_norm": 0.3405470550060272, "learning_rate": 1.7340502322029292e-05, "loss": 0.4534, "step": 22486 }, { "epoch": 0.47691459353990373, "grad_norm": 0.3391713500022888, "learning_rate": 1.734027584288175e-05, "loss": 0.5589, "step": 22487 }, { "epoch": 0.47693580199783675, "grad_norm": 0.3106120824813843, "learning_rate": 1.734004935557042e-05, "loss": 0.5416, "step": 22488 }, { "epoch": 0.47695701045576977, "grad_norm": 0.3142949640750885, "learning_rate": 1.7339822860095547e-05, "loss": 0.4285, "step": 22489 }, { "epoch": 0.4769782189137028, "grad_norm": 0.36067214608192444, "learning_rate": 1.7339596356457385e-05, "loss": 0.5108, "step": 22490 }, { "epoch": 0.4769994273716358, "grad_norm": 0.3511582314968109, "learning_rate": 1.7339369844656187e-05, "loss": 0.5238, "step": 22491 }, { "epoch": 0.47702063582956883, "grad_norm": 0.4590664207935333, "learning_rate": 1.7339143324692207e-05, "loss": 0.6544, "step": 22492 }, { "epoch": 0.47704184428750185, "grad_norm": 0.3700965642929077, "learning_rate": 1.7338916796565696e-05, "loss": 0.4094, "step": 22493 }, { "epoch": 0.47706305274543487, "grad_norm": 0.35124343633651733, "learning_rate": 1.7338690260276907e-05, "loss": 0.5297, "step": 22494 }, { "epoch": 0.4770842612033679, "grad_norm": 0.47914940118789673, "learning_rate": 1.733846371582609e-05, "loss": 0.4795, "step": 22495 }, { "epoch": 0.4771054696613009, "grad_norm": 0.339638888835907, "learning_rate": 1.7338237163213495e-05, "loss": 0.4974, "step": 22496 }, { "epoch": 0.4771266781192339, "grad_norm": 0.35000520944595337, "learning_rate": 1.7338010602439376e-05, "loss": 0.531, "step": 22497 }, { "epoch": 0.477147886577167, "grad_norm": 0.3345772922039032, "learning_rate": 1.733778403350399e-05, "loss": 0.4754, "step": 22498 }, { "epoch": 0.4771690950351, "grad_norm": 0.35493841767311096, "learning_rate": 1.733755745640758e-05, "loss": 0.5107, "step": 22499 }, { "epoch": 0.47719030349303304, "grad_norm": 0.39584973454475403, "learning_rate": 1.7337330871150408e-05, "loss": 0.555, "step": 22500 }, { "epoch": 0.47721151195096606, "grad_norm": 0.3477468192577362, "learning_rate": 1.7337104277732716e-05, "loss": 0.4684, "step": 22501 }, { "epoch": 0.4772327204088991, "grad_norm": 0.2975315451622009, "learning_rate": 1.7336877676154757e-05, "loss": 0.429, "step": 22502 }, { "epoch": 0.4772539288668321, "grad_norm": 0.3564263880252838, "learning_rate": 1.733665106641679e-05, "loss": 0.4206, "step": 22503 }, { "epoch": 0.4772751373247651, "grad_norm": 0.33923688530921936, "learning_rate": 1.7336424448519065e-05, "loss": 0.491, "step": 22504 }, { "epoch": 0.47729634578269814, "grad_norm": 0.3653286397457123, "learning_rate": 1.733619782246183e-05, "loss": 0.4771, "step": 22505 }, { "epoch": 0.47731755424063116, "grad_norm": 0.35625159740448, "learning_rate": 1.733597118824534e-05, "loss": 0.552, "step": 22506 }, { "epoch": 0.4773387626985642, "grad_norm": 0.33553382754325867, "learning_rate": 1.733574454586985e-05, "loss": 0.5427, "step": 22507 }, { "epoch": 0.4773599711564972, "grad_norm": 0.3156394362449646, "learning_rate": 1.7335517895335604e-05, "loss": 0.46, "step": 22508 }, { "epoch": 0.4773811796144302, "grad_norm": 0.39035454392433167, "learning_rate": 1.733529123664286e-05, "loss": 0.5078, "step": 22509 }, { "epoch": 0.47740238807236324, "grad_norm": 0.4469667971134186, "learning_rate": 1.7335064569791868e-05, "loss": 0.4905, "step": 22510 }, { "epoch": 0.47742359653029626, "grad_norm": 0.3221968114376068, "learning_rate": 1.7334837894782885e-05, "loss": 0.4955, "step": 22511 }, { "epoch": 0.47744480498822933, "grad_norm": 0.4532620906829834, "learning_rate": 1.7334611211616154e-05, "loss": 0.4891, "step": 22512 }, { "epoch": 0.47746601344616235, "grad_norm": 0.4067772924900055, "learning_rate": 1.7334384520291933e-05, "loss": 0.5901, "step": 22513 }, { "epoch": 0.47748722190409537, "grad_norm": 0.35666850209236145, "learning_rate": 1.7334157820810474e-05, "loss": 0.5428, "step": 22514 }, { "epoch": 0.4775084303620284, "grad_norm": 0.3343610167503357, "learning_rate": 1.733393111317203e-05, "loss": 0.4361, "step": 22515 }, { "epoch": 0.4775296388199614, "grad_norm": 0.34900033473968506, "learning_rate": 1.7333704397376848e-05, "loss": 0.4597, "step": 22516 }, { "epoch": 0.47755084727789443, "grad_norm": 0.33296525478363037, "learning_rate": 1.7333477673425186e-05, "loss": 0.4803, "step": 22517 }, { "epoch": 0.47757205573582745, "grad_norm": 0.31509241461753845, "learning_rate": 1.7333250941317295e-05, "loss": 0.4873, "step": 22518 }, { "epoch": 0.47759326419376047, "grad_norm": 0.32077184319496155, "learning_rate": 1.7333024201053428e-05, "loss": 0.4904, "step": 22519 }, { "epoch": 0.4776144726516935, "grad_norm": 0.3735222816467285, "learning_rate": 1.733279745263383e-05, "loss": 0.5644, "step": 22520 }, { "epoch": 0.4776356811096265, "grad_norm": 0.32511207461357117, "learning_rate": 1.7332570696058762e-05, "loss": 0.5424, "step": 22521 }, { "epoch": 0.47765688956755953, "grad_norm": 0.3641229271888733, "learning_rate": 1.733234393132847e-05, "loss": 0.5001, "step": 22522 }, { "epoch": 0.47767809802549255, "grad_norm": 0.3559664785861969, "learning_rate": 1.733211715844321e-05, "loss": 0.4881, "step": 22523 }, { "epoch": 0.47769930648342557, "grad_norm": 0.37649229168891907, "learning_rate": 1.7331890377403233e-05, "loss": 0.4252, "step": 22524 }, { "epoch": 0.4777205149413586, "grad_norm": 0.3344581425189972, "learning_rate": 1.733166358820879e-05, "loss": 0.5144, "step": 22525 }, { "epoch": 0.47774172339929166, "grad_norm": 0.4002992808818817, "learning_rate": 1.7331436790860138e-05, "loss": 0.5362, "step": 22526 }, { "epoch": 0.4777629318572247, "grad_norm": 0.34779080748558044, "learning_rate": 1.733120998535752e-05, "loss": 0.5156, "step": 22527 }, { "epoch": 0.4777841403151577, "grad_norm": 0.3376067280769348, "learning_rate": 1.7330983171701196e-05, "loss": 0.536, "step": 22528 }, { "epoch": 0.4778053487730907, "grad_norm": 0.33212587237358093, "learning_rate": 1.733075634989142e-05, "loss": 0.455, "step": 22529 }, { "epoch": 0.47782655723102374, "grad_norm": 0.390317440032959, "learning_rate": 1.7330529519928436e-05, "loss": 0.5851, "step": 22530 }, { "epoch": 0.47784776568895676, "grad_norm": 0.3312895894050598, "learning_rate": 1.7330302681812504e-05, "loss": 0.5397, "step": 22531 }, { "epoch": 0.4778689741468898, "grad_norm": 0.35914355516433716, "learning_rate": 1.733007583554387e-05, "loss": 0.5329, "step": 22532 }, { "epoch": 0.4778901826048228, "grad_norm": 0.36100396513938904, "learning_rate": 1.7329848981122792e-05, "loss": 0.5688, "step": 22533 }, { "epoch": 0.4779113910627558, "grad_norm": 0.40096691250801086, "learning_rate": 1.732962211854952e-05, "loss": 0.6081, "step": 22534 }, { "epoch": 0.47793259952068884, "grad_norm": 0.3433147370815277, "learning_rate": 1.73293952478243e-05, "loss": 0.5184, "step": 22535 }, { "epoch": 0.47795380797862186, "grad_norm": 0.40625640749931335, "learning_rate": 1.73291683689474e-05, "loss": 0.5978, "step": 22536 }, { "epoch": 0.4779750164365549, "grad_norm": 0.5420207977294922, "learning_rate": 1.7328941481919055e-05, "loss": 0.5423, "step": 22537 }, { "epoch": 0.4779962248944879, "grad_norm": 0.365058958530426, "learning_rate": 1.7328714586739527e-05, "loss": 0.4085, "step": 22538 }, { "epoch": 0.478017433352421, "grad_norm": 0.3107488453388214, "learning_rate": 1.7328487683409068e-05, "loss": 0.4749, "step": 22539 }, { "epoch": 0.478038641810354, "grad_norm": 0.38002559542655945, "learning_rate": 1.7328260771927927e-05, "loss": 0.577, "step": 22540 }, { "epoch": 0.478059850268287, "grad_norm": 0.5173911452293396, "learning_rate": 1.7328033852296357e-05, "loss": 0.4464, "step": 22541 }, { "epoch": 0.47808105872622003, "grad_norm": 0.3198065757751465, "learning_rate": 1.7327806924514612e-05, "loss": 0.4484, "step": 22542 }, { "epoch": 0.47810226718415305, "grad_norm": 0.34788572788238525, "learning_rate": 1.7327579988582946e-05, "loss": 0.4716, "step": 22543 }, { "epoch": 0.47812347564208607, "grad_norm": 0.5892812013626099, "learning_rate": 1.7327353044501608e-05, "loss": 0.5257, "step": 22544 }, { "epoch": 0.4781446841000191, "grad_norm": 0.41187477111816406, "learning_rate": 1.7327126092270853e-05, "loss": 0.5796, "step": 22545 }, { "epoch": 0.4781658925579521, "grad_norm": 0.32907283306121826, "learning_rate": 1.7326899131890927e-05, "loss": 0.4685, "step": 22546 }, { "epoch": 0.47818710101588513, "grad_norm": 0.3344387412071228, "learning_rate": 1.7326672163362093e-05, "loss": 0.4729, "step": 22547 }, { "epoch": 0.47820830947381815, "grad_norm": 0.35119977593421936, "learning_rate": 1.7326445186684595e-05, "loss": 0.4661, "step": 22548 }, { "epoch": 0.47822951793175117, "grad_norm": 0.36361777782440186, "learning_rate": 1.7326218201858688e-05, "loss": 0.4882, "step": 22549 }, { "epoch": 0.4782507263896842, "grad_norm": 0.32172849774360657, "learning_rate": 1.7325991208884625e-05, "loss": 0.4676, "step": 22550 }, { "epoch": 0.4782719348476172, "grad_norm": 0.3616805672645569, "learning_rate": 1.7325764207762658e-05, "loss": 0.5378, "step": 22551 }, { "epoch": 0.47829314330555023, "grad_norm": 0.33400243520736694, "learning_rate": 1.732553719849304e-05, "loss": 0.4887, "step": 22552 }, { "epoch": 0.4783143517634833, "grad_norm": 0.31788069009780884, "learning_rate": 1.7325310181076026e-05, "loss": 0.483, "step": 22553 }, { "epoch": 0.4783355602214163, "grad_norm": 0.3371855914592743, "learning_rate": 1.7325083155511862e-05, "loss": 0.4773, "step": 22554 }, { "epoch": 0.47835676867934934, "grad_norm": 0.32762718200683594, "learning_rate": 1.7324856121800807e-05, "loss": 0.5574, "step": 22555 }, { "epoch": 0.47837797713728236, "grad_norm": 0.34166884422302246, "learning_rate": 1.7324629079943108e-05, "loss": 0.5236, "step": 22556 }, { "epoch": 0.4783991855952154, "grad_norm": 0.33932575583457947, "learning_rate": 1.7324402029939022e-05, "loss": 0.4915, "step": 22557 }, { "epoch": 0.4784203940531484, "grad_norm": 0.31798475980758667, "learning_rate": 1.73241749717888e-05, "loss": 0.4121, "step": 22558 }, { "epoch": 0.4784416025110814, "grad_norm": 0.3759889006614685, "learning_rate": 1.7323947905492692e-05, "loss": 0.491, "step": 22559 }, { "epoch": 0.47846281096901444, "grad_norm": 0.35096412897109985, "learning_rate": 1.7323720831050956e-05, "loss": 0.5026, "step": 22560 }, { "epoch": 0.47848401942694746, "grad_norm": 0.3377847969532013, "learning_rate": 1.7323493748463837e-05, "loss": 0.4406, "step": 22561 }, { "epoch": 0.4785052278848805, "grad_norm": 0.3406291604042053, "learning_rate": 1.7323266657731595e-05, "loss": 0.4729, "step": 22562 }, { "epoch": 0.4785264363428135, "grad_norm": 0.3435957431793213, "learning_rate": 1.7323039558854476e-05, "loss": 0.4339, "step": 22563 }, { "epoch": 0.4785476448007465, "grad_norm": 0.3416617512702942, "learning_rate": 1.7322812451832743e-05, "loss": 0.4373, "step": 22564 }, { "epoch": 0.47856885325867954, "grad_norm": 0.39754003286361694, "learning_rate": 1.7322585336666636e-05, "loss": 0.4986, "step": 22565 }, { "epoch": 0.47859006171661256, "grad_norm": 0.38502219319343567, "learning_rate": 1.7322358213356416e-05, "loss": 0.5616, "step": 22566 }, { "epoch": 0.47861127017454563, "grad_norm": 0.33378922939300537, "learning_rate": 1.7322131081902332e-05, "loss": 0.4353, "step": 22567 }, { "epoch": 0.47863247863247865, "grad_norm": 0.3108053505420685, "learning_rate": 1.7321903942304634e-05, "loss": 0.4458, "step": 22568 }, { "epoch": 0.4786536870904117, "grad_norm": 0.3543647229671478, "learning_rate": 1.732167679456358e-05, "loss": 0.4775, "step": 22569 }, { "epoch": 0.4786748955483447, "grad_norm": 0.32470613718032837, "learning_rate": 1.7321449638679424e-05, "loss": 0.4391, "step": 22570 }, { "epoch": 0.4786961040062777, "grad_norm": 0.33621281385421753, "learning_rate": 1.732122247465241e-05, "loss": 0.5198, "step": 22571 }, { "epoch": 0.47871731246421073, "grad_norm": 0.3986707925796509, "learning_rate": 1.73209953024828e-05, "loss": 0.4178, "step": 22572 }, { "epoch": 0.47873852092214375, "grad_norm": 0.3611195385456085, "learning_rate": 1.7320768122170843e-05, "loss": 0.4803, "step": 22573 }, { "epoch": 0.47875972938007677, "grad_norm": 0.3327696621417999, "learning_rate": 1.732054093371679e-05, "loss": 0.4752, "step": 22574 }, { "epoch": 0.4787809378380098, "grad_norm": 0.37736091017723083, "learning_rate": 1.732031373712089e-05, "loss": 0.5072, "step": 22575 }, { "epoch": 0.4788021462959428, "grad_norm": 0.34113702178001404, "learning_rate": 1.7320086532383408e-05, "loss": 0.4864, "step": 22576 }, { "epoch": 0.47882335475387583, "grad_norm": 0.367562860250473, "learning_rate": 1.7319859319504586e-05, "loss": 0.4586, "step": 22577 }, { "epoch": 0.47884456321180885, "grad_norm": 0.3111293315887451, "learning_rate": 1.731963209848468e-05, "loss": 0.4425, "step": 22578 }, { "epoch": 0.47886577166974187, "grad_norm": 0.399684339761734, "learning_rate": 1.7319404869323944e-05, "loss": 0.4966, "step": 22579 }, { "epoch": 0.47888698012767494, "grad_norm": 0.3176334500312805, "learning_rate": 1.731917763202263e-05, "loss": 0.5221, "step": 22580 }, { "epoch": 0.47890818858560796, "grad_norm": 0.3070693016052246, "learning_rate": 1.7318950386580987e-05, "loss": 0.4432, "step": 22581 }, { "epoch": 0.478929397043541, "grad_norm": 0.34015119075775146, "learning_rate": 1.7318723132999274e-05, "loss": 0.4688, "step": 22582 }, { "epoch": 0.478950605501474, "grad_norm": 0.40015876293182373, "learning_rate": 1.731849587127774e-05, "loss": 0.4924, "step": 22583 }, { "epoch": 0.478971813959407, "grad_norm": 0.4033641517162323, "learning_rate": 1.7318268601416637e-05, "loss": 0.4992, "step": 22584 }, { "epoch": 0.47899302241734004, "grad_norm": 0.34322789311408997, "learning_rate": 1.7318041323416222e-05, "loss": 0.5392, "step": 22585 }, { "epoch": 0.47901423087527306, "grad_norm": 0.36089780926704407, "learning_rate": 1.7317814037276745e-05, "loss": 0.5002, "step": 22586 }, { "epoch": 0.4790354393332061, "grad_norm": 0.3560360074043274, "learning_rate": 1.7317586742998457e-05, "loss": 0.4615, "step": 22587 }, { "epoch": 0.4790566477911391, "grad_norm": 0.37216678261756897, "learning_rate": 1.731735944058161e-05, "loss": 0.4599, "step": 22588 }, { "epoch": 0.4790778562490721, "grad_norm": 0.3219220042228699, "learning_rate": 1.7317132130026464e-05, "loss": 0.4979, "step": 22589 }, { "epoch": 0.47909906470700514, "grad_norm": 0.4055832028388977, "learning_rate": 1.7316904811333268e-05, "loss": 0.5664, "step": 22590 }, { "epoch": 0.47912027316493816, "grad_norm": 0.3447286784648895, "learning_rate": 1.731667748450227e-05, "loss": 0.5476, "step": 22591 }, { "epoch": 0.4791414816228712, "grad_norm": 0.37665605545043945, "learning_rate": 1.731645014953373e-05, "loss": 0.5491, "step": 22592 }, { "epoch": 0.4791626900808042, "grad_norm": 0.3549546003341675, "learning_rate": 1.7316222806427894e-05, "loss": 0.4971, "step": 22593 }, { "epoch": 0.4791838985387373, "grad_norm": 0.3395642340183258, "learning_rate": 1.731599545518502e-05, "loss": 0.4665, "step": 22594 }, { "epoch": 0.4792051069966703, "grad_norm": 0.31469354033470154, "learning_rate": 1.731576809580536e-05, "loss": 0.48, "step": 22595 }, { "epoch": 0.4792263154546033, "grad_norm": 0.353462278842926, "learning_rate": 1.7315540728289167e-05, "loss": 0.4715, "step": 22596 }, { "epoch": 0.47924752391253633, "grad_norm": 0.3282682001590729, "learning_rate": 1.731531335263669e-05, "loss": 0.4031, "step": 22597 }, { "epoch": 0.47926873237046935, "grad_norm": 0.38464903831481934, "learning_rate": 1.731508596884819e-05, "loss": 0.4933, "step": 22598 }, { "epoch": 0.47928994082840237, "grad_norm": 0.35649386048316956, "learning_rate": 1.7314858576923913e-05, "loss": 0.5401, "step": 22599 }, { "epoch": 0.4793111492863354, "grad_norm": 0.32331526279449463, "learning_rate": 1.731463117686411e-05, "loss": 0.4267, "step": 22600 }, { "epoch": 0.4793323577442684, "grad_norm": 0.3575242757797241, "learning_rate": 1.731440376866904e-05, "loss": 0.5199, "step": 22601 }, { "epoch": 0.47935356620220143, "grad_norm": 0.4186262786388397, "learning_rate": 1.731417635233896e-05, "loss": 0.5875, "step": 22602 }, { "epoch": 0.47937477466013445, "grad_norm": 0.3429969847202301, "learning_rate": 1.731394892787411e-05, "loss": 0.4802, "step": 22603 }, { "epoch": 0.47939598311806747, "grad_norm": 0.6439867615699768, "learning_rate": 1.7313721495274747e-05, "loss": 0.4847, "step": 22604 }, { "epoch": 0.4794171915760005, "grad_norm": 0.325278103351593, "learning_rate": 1.7313494054541132e-05, "loss": 0.4666, "step": 22605 }, { "epoch": 0.4794384000339335, "grad_norm": 0.4191083610057831, "learning_rate": 1.7313266605673514e-05, "loss": 0.5265, "step": 22606 }, { "epoch": 0.47945960849186653, "grad_norm": 0.40206509828567505, "learning_rate": 1.731303914867214e-05, "loss": 0.4496, "step": 22607 }, { "epoch": 0.4794808169497996, "grad_norm": 0.3551954925060272, "learning_rate": 1.7312811683537267e-05, "loss": 0.5317, "step": 22608 }, { "epoch": 0.4795020254077326, "grad_norm": 0.37803971767425537, "learning_rate": 1.731258421026915e-05, "loss": 0.569, "step": 22609 }, { "epoch": 0.47952323386566564, "grad_norm": 0.3563346564769745, "learning_rate": 1.731235672886804e-05, "loss": 0.5711, "step": 22610 }, { "epoch": 0.47954444232359866, "grad_norm": 0.4258837103843689, "learning_rate": 1.731212923933419e-05, "loss": 0.5688, "step": 22611 }, { "epoch": 0.4795656507815317, "grad_norm": 0.36377304792404175, "learning_rate": 1.7311901741667855e-05, "loss": 0.4449, "step": 22612 }, { "epoch": 0.4795868592394647, "grad_norm": 0.40332287549972534, "learning_rate": 1.7311674235869285e-05, "loss": 0.568, "step": 22613 }, { "epoch": 0.4796080676973977, "grad_norm": 0.5205784440040588, "learning_rate": 1.7311446721938735e-05, "loss": 0.4845, "step": 22614 }, { "epoch": 0.47962927615533074, "grad_norm": 0.3107813596725464, "learning_rate": 1.7311219199876458e-05, "loss": 0.3863, "step": 22615 }, { "epoch": 0.47965048461326376, "grad_norm": 0.39308691024780273, "learning_rate": 1.7310991669682706e-05, "loss": 0.5541, "step": 22616 }, { "epoch": 0.4796716930711968, "grad_norm": 0.3596649765968323, "learning_rate": 1.7310764131357732e-05, "loss": 0.5224, "step": 22617 }, { "epoch": 0.4796929015291298, "grad_norm": 0.39982402324676514, "learning_rate": 1.731053658490179e-05, "loss": 0.4548, "step": 22618 }, { "epoch": 0.4797141099870628, "grad_norm": 0.3788134753704071, "learning_rate": 1.7310309030315134e-05, "loss": 0.5263, "step": 22619 }, { "epoch": 0.47973531844499584, "grad_norm": 0.3470045328140259, "learning_rate": 1.7310081467598013e-05, "loss": 0.414, "step": 22620 }, { "epoch": 0.4797565269029289, "grad_norm": 0.40977853536605835, "learning_rate": 1.7309853896750684e-05, "loss": 0.4954, "step": 22621 }, { "epoch": 0.47977773536086193, "grad_norm": 0.3363671898841858, "learning_rate": 1.73096263177734e-05, "loss": 0.4704, "step": 22622 }, { "epoch": 0.47979894381879495, "grad_norm": 0.38801291584968567, "learning_rate": 1.7309398730666414e-05, "loss": 0.4703, "step": 22623 }, { "epoch": 0.479820152276728, "grad_norm": 0.3336307406425476, "learning_rate": 1.7309171135429978e-05, "loss": 0.4409, "step": 22624 }, { "epoch": 0.479841360734661, "grad_norm": 0.4699277877807617, "learning_rate": 1.7308943532064343e-05, "loss": 0.5797, "step": 22625 }, { "epoch": 0.479862569192594, "grad_norm": 0.3352266252040863, "learning_rate": 1.7308715920569766e-05, "loss": 0.5077, "step": 22626 }, { "epoch": 0.47988377765052703, "grad_norm": 0.33719438314437866, "learning_rate": 1.73084883009465e-05, "loss": 0.4365, "step": 22627 }, { "epoch": 0.47990498610846005, "grad_norm": 0.3727535307407379, "learning_rate": 1.7308260673194792e-05, "loss": 0.5206, "step": 22628 }, { "epoch": 0.47992619456639307, "grad_norm": 0.34423205256462097, "learning_rate": 1.7308033037314903e-05, "loss": 0.4559, "step": 22629 }, { "epoch": 0.4799474030243261, "grad_norm": 0.3796491026878357, "learning_rate": 1.7307805393307085e-05, "loss": 0.4324, "step": 22630 }, { "epoch": 0.4799686114822591, "grad_norm": 0.3413105010986328, "learning_rate": 1.7307577741171587e-05, "loss": 0.6118, "step": 22631 }, { "epoch": 0.47998981994019213, "grad_norm": 0.3952251076698303, "learning_rate": 1.7307350080908665e-05, "loss": 0.479, "step": 22632 }, { "epoch": 0.48001102839812515, "grad_norm": 0.48303815722465515, "learning_rate": 1.7307122412518573e-05, "loss": 0.4554, "step": 22633 }, { "epoch": 0.48003223685605817, "grad_norm": 0.32207199931144714, "learning_rate": 1.730689473600156e-05, "loss": 0.4386, "step": 22634 }, { "epoch": 0.48005344531399124, "grad_norm": 0.3912288248538971, "learning_rate": 1.7306667051357882e-05, "loss": 0.4605, "step": 22635 }, { "epoch": 0.48007465377192426, "grad_norm": 0.3845832943916321, "learning_rate": 1.7306439358587798e-05, "loss": 0.5773, "step": 22636 }, { "epoch": 0.4800958622298573, "grad_norm": 0.4220992922782898, "learning_rate": 1.7306211657691553e-05, "loss": 0.4872, "step": 22637 }, { "epoch": 0.4801170706877903, "grad_norm": 0.39026516675949097, "learning_rate": 1.73059839486694e-05, "loss": 0.5216, "step": 22638 }, { "epoch": 0.4801382791457233, "grad_norm": 0.35784590244293213, "learning_rate": 1.7305756231521595e-05, "loss": 0.5542, "step": 22639 }, { "epoch": 0.48015948760365634, "grad_norm": 0.4859812259674072, "learning_rate": 1.7305528506248396e-05, "loss": 0.5143, "step": 22640 }, { "epoch": 0.48018069606158936, "grad_norm": 0.33970654010772705, "learning_rate": 1.7305300772850048e-05, "loss": 0.5753, "step": 22641 }, { "epoch": 0.4802019045195224, "grad_norm": 0.32710954546928406, "learning_rate": 1.7305073031326806e-05, "loss": 0.5332, "step": 22642 }, { "epoch": 0.4802231129774554, "grad_norm": 0.40833646059036255, "learning_rate": 1.730484528167893e-05, "loss": 0.5129, "step": 22643 }, { "epoch": 0.4802443214353884, "grad_norm": 0.3061105012893677, "learning_rate": 1.730461752390667e-05, "loss": 0.4858, "step": 22644 }, { "epoch": 0.48026552989332144, "grad_norm": 0.2996680438518524, "learning_rate": 1.7304389758010272e-05, "loss": 0.4631, "step": 22645 }, { "epoch": 0.48028673835125446, "grad_norm": 0.37514665722846985, "learning_rate": 1.7304161983989995e-05, "loss": 0.4729, "step": 22646 }, { "epoch": 0.4803079468091875, "grad_norm": 0.6221082210540771, "learning_rate": 1.7303934201846096e-05, "loss": 0.3795, "step": 22647 }, { "epoch": 0.4803291552671205, "grad_norm": 0.3857212960720062, "learning_rate": 1.7303706411578826e-05, "loss": 0.5396, "step": 22648 }, { "epoch": 0.4803503637250536, "grad_norm": 0.3549514710903168, "learning_rate": 1.7303478613188436e-05, "loss": 0.5229, "step": 22649 }, { "epoch": 0.4803715721829866, "grad_norm": 0.3337722718715668, "learning_rate": 1.730325080667518e-05, "loss": 0.4324, "step": 22650 }, { "epoch": 0.4803927806409196, "grad_norm": 0.435798704624176, "learning_rate": 1.730302299203931e-05, "loss": 0.5125, "step": 22651 }, { "epoch": 0.48041398909885263, "grad_norm": 0.33013057708740234, "learning_rate": 1.7302795169281082e-05, "loss": 0.4777, "step": 22652 }, { "epoch": 0.48043519755678565, "grad_norm": 0.36991459131240845, "learning_rate": 1.7302567338400747e-05, "loss": 0.562, "step": 22653 }, { "epoch": 0.4804564060147187, "grad_norm": 0.43071702122688293, "learning_rate": 1.7302339499398563e-05, "loss": 0.5426, "step": 22654 }, { "epoch": 0.4804776144726517, "grad_norm": 0.41779565811157227, "learning_rate": 1.7302111652274782e-05, "loss": 0.5395, "step": 22655 }, { "epoch": 0.4804988229305847, "grad_norm": 0.34497854113578796, "learning_rate": 1.7301883797029653e-05, "loss": 0.5422, "step": 22656 }, { "epoch": 0.48052003138851773, "grad_norm": 0.29935067892074585, "learning_rate": 1.7301655933663436e-05, "loss": 0.4258, "step": 22657 }, { "epoch": 0.48054123984645075, "grad_norm": 0.6110793352127075, "learning_rate": 1.7301428062176376e-05, "loss": 0.4785, "step": 22658 }, { "epoch": 0.48056244830438377, "grad_norm": 0.3408561646938324, "learning_rate": 1.7301200182568732e-05, "loss": 0.4547, "step": 22659 }, { "epoch": 0.4805836567623168, "grad_norm": 0.33304959535598755, "learning_rate": 1.730097229484076e-05, "loss": 0.5985, "step": 22660 }, { "epoch": 0.4806048652202498, "grad_norm": 0.40399813652038574, "learning_rate": 1.7300744398992707e-05, "loss": 0.5316, "step": 22661 }, { "epoch": 0.4806260736781829, "grad_norm": 0.34167879819869995, "learning_rate": 1.730051649502483e-05, "loss": 0.5357, "step": 22662 }, { "epoch": 0.4806472821361159, "grad_norm": 0.34889164566993713, "learning_rate": 1.730028858293738e-05, "loss": 0.5816, "step": 22663 }, { "epoch": 0.4806684905940489, "grad_norm": 0.4168175458908081, "learning_rate": 1.7300060662730617e-05, "loss": 0.4468, "step": 22664 }, { "epoch": 0.48068969905198194, "grad_norm": 0.3036152720451355, "learning_rate": 1.7299832734404786e-05, "loss": 0.4449, "step": 22665 }, { "epoch": 0.48071090750991496, "grad_norm": 0.3782469630241394, "learning_rate": 1.7299604797960145e-05, "loss": 0.5337, "step": 22666 }, { "epoch": 0.480732115967848, "grad_norm": 0.31492164731025696, "learning_rate": 1.729937685339695e-05, "loss": 0.4721, "step": 22667 }, { "epoch": 0.480753324425781, "grad_norm": 0.41446760296821594, "learning_rate": 1.729914890071545e-05, "loss": 0.5466, "step": 22668 }, { "epoch": 0.480774532883714, "grad_norm": 0.40239155292510986, "learning_rate": 1.7298920939915896e-05, "loss": 0.5341, "step": 22669 }, { "epoch": 0.48079574134164704, "grad_norm": 0.6848930716514587, "learning_rate": 1.729869297099855e-05, "loss": 0.5595, "step": 22670 }, { "epoch": 0.48081694979958006, "grad_norm": 0.4392991065979004, "learning_rate": 1.729846499396366e-05, "loss": 0.6044, "step": 22671 }, { "epoch": 0.4808381582575131, "grad_norm": 0.3851355016231537, "learning_rate": 1.729823700881148e-05, "loss": 0.4644, "step": 22672 }, { "epoch": 0.4808593667154461, "grad_norm": 0.34280699491500854, "learning_rate": 1.7298009015542266e-05, "loss": 0.4766, "step": 22673 }, { "epoch": 0.4808805751733791, "grad_norm": 0.3460625112056732, "learning_rate": 1.729778101415627e-05, "loss": 0.4719, "step": 22674 }, { "epoch": 0.48090178363131214, "grad_norm": 0.35956835746765137, "learning_rate": 1.729755300465374e-05, "loss": 0.516, "step": 22675 }, { "epoch": 0.4809229920892452, "grad_norm": 0.31148263812065125, "learning_rate": 1.729732498703494e-05, "loss": 0.4537, "step": 22676 }, { "epoch": 0.48094420054717824, "grad_norm": 0.33581721782684326, "learning_rate": 1.729709696130012e-05, "loss": 0.5296, "step": 22677 }, { "epoch": 0.48096540900511126, "grad_norm": 0.3660254180431366, "learning_rate": 1.729686892744953e-05, "loss": 0.5608, "step": 22678 }, { "epoch": 0.4809866174630443, "grad_norm": 0.34180089831352234, "learning_rate": 1.729664088548342e-05, "loss": 0.5116, "step": 22679 }, { "epoch": 0.4810078259209773, "grad_norm": 0.3447584807872772, "learning_rate": 1.7296412835402055e-05, "loss": 0.4561, "step": 22680 }, { "epoch": 0.4810290343789103, "grad_norm": 0.6472638249397278, "learning_rate": 1.7296184777205684e-05, "loss": 0.5349, "step": 22681 }, { "epoch": 0.48105024283684333, "grad_norm": 0.4399069845676422, "learning_rate": 1.729595671089456e-05, "loss": 0.5263, "step": 22682 }, { "epoch": 0.48107145129477635, "grad_norm": 0.33261287212371826, "learning_rate": 1.7295728636468933e-05, "loss": 0.5187, "step": 22683 }, { "epoch": 0.4810926597527094, "grad_norm": 0.3296588957309723, "learning_rate": 1.7295500553929065e-05, "loss": 0.5359, "step": 22684 }, { "epoch": 0.4811138682106424, "grad_norm": 0.360185444355011, "learning_rate": 1.7295272463275198e-05, "loss": 0.5252, "step": 22685 }, { "epoch": 0.4811350766685754, "grad_norm": 0.327709436416626, "learning_rate": 1.7295044364507595e-05, "loss": 0.4459, "step": 22686 }, { "epoch": 0.48115628512650843, "grad_norm": 0.3685927093029022, "learning_rate": 1.729481625762651e-05, "loss": 0.4684, "step": 22687 }, { "epoch": 0.48117749358444145, "grad_norm": 0.5286216139793396, "learning_rate": 1.7294588142632192e-05, "loss": 0.4236, "step": 22688 }, { "epoch": 0.4811987020423745, "grad_norm": 0.32307836413383484, "learning_rate": 1.7294360019524898e-05, "loss": 0.4358, "step": 22689 }, { "epoch": 0.48121991050030755, "grad_norm": 0.46089082956314087, "learning_rate": 1.7294131888304874e-05, "loss": 0.5151, "step": 22690 }, { "epoch": 0.48124111895824057, "grad_norm": 0.3311198353767395, "learning_rate": 1.7293903748972386e-05, "loss": 0.4783, "step": 22691 }, { "epoch": 0.4812623274161736, "grad_norm": 0.371671587228775, "learning_rate": 1.729367560152768e-05, "loss": 0.4701, "step": 22692 }, { "epoch": 0.4812835358741066, "grad_norm": 0.32994934916496277, "learning_rate": 1.729344744597101e-05, "loss": 0.4717, "step": 22693 }, { "epoch": 0.4813047443320396, "grad_norm": 0.35612374544143677, "learning_rate": 1.729321928230263e-05, "loss": 0.5151, "step": 22694 }, { "epoch": 0.48132595278997264, "grad_norm": 0.4246103763580322, "learning_rate": 1.7292991110522798e-05, "loss": 0.5823, "step": 22695 }, { "epoch": 0.48134716124790566, "grad_norm": 0.3869462013244629, "learning_rate": 1.7292762930631766e-05, "loss": 0.4894, "step": 22696 }, { "epoch": 0.4813683697058387, "grad_norm": 0.3468122184276581, "learning_rate": 1.7292534742629784e-05, "loss": 0.6201, "step": 22697 }, { "epoch": 0.4813895781637717, "grad_norm": 0.3208240270614624, "learning_rate": 1.7292306546517106e-05, "loss": 0.4307, "step": 22698 }, { "epoch": 0.4814107866217047, "grad_norm": 0.4062129557132721, "learning_rate": 1.7292078342293993e-05, "loss": 0.4931, "step": 22699 }, { "epoch": 0.48143199507963774, "grad_norm": 0.36725470423698425, "learning_rate": 1.729185012996069e-05, "loss": 0.5481, "step": 22700 }, { "epoch": 0.48145320353757076, "grad_norm": 0.4108595550060272, "learning_rate": 1.7291621909517453e-05, "loss": 0.5044, "step": 22701 }, { "epoch": 0.4814744119955038, "grad_norm": 0.391262412071228, "learning_rate": 1.7291393680964544e-05, "loss": 0.5069, "step": 22702 }, { "epoch": 0.48149562045343686, "grad_norm": 0.29914477467536926, "learning_rate": 1.7291165444302204e-05, "loss": 0.4452, "step": 22703 }, { "epoch": 0.4815168289113699, "grad_norm": 0.36209508776664734, "learning_rate": 1.7290937199530695e-05, "loss": 0.4658, "step": 22704 }, { "epoch": 0.4815380373693029, "grad_norm": 0.36054983735084534, "learning_rate": 1.729070894665027e-05, "loss": 0.5844, "step": 22705 }, { "epoch": 0.4815592458272359, "grad_norm": 0.3326292634010315, "learning_rate": 1.7290480685661182e-05, "loss": 0.4933, "step": 22706 }, { "epoch": 0.48158045428516894, "grad_norm": 0.36601898074150085, "learning_rate": 1.7290252416563683e-05, "loss": 0.56, "step": 22707 }, { "epoch": 0.48160166274310195, "grad_norm": 0.40396979451179504, "learning_rate": 1.729002413935803e-05, "loss": 0.4485, "step": 22708 }, { "epoch": 0.481622871201035, "grad_norm": 0.3030455708503723, "learning_rate": 1.7289795854044474e-05, "loss": 0.4083, "step": 22709 }, { "epoch": 0.481644079658968, "grad_norm": 0.31753256916999817, "learning_rate": 1.7289567560623272e-05, "loss": 0.5048, "step": 22710 }, { "epoch": 0.481665288116901, "grad_norm": 0.358398973941803, "learning_rate": 1.7289339259094674e-05, "loss": 0.5445, "step": 22711 }, { "epoch": 0.48168649657483403, "grad_norm": 0.3133436441421509, "learning_rate": 1.728911094945894e-05, "loss": 0.5263, "step": 22712 }, { "epoch": 0.48170770503276705, "grad_norm": 0.3371727764606476, "learning_rate": 1.728888263171632e-05, "loss": 0.5435, "step": 22713 }, { "epoch": 0.4817289134907001, "grad_norm": 0.36309272050857544, "learning_rate": 1.7288654305867063e-05, "loss": 0.5177, "step": 22714 }, { "epoch": 0.4817501219486331, "grad_norm": 0.3311159312725067, "learning_rate": 1.7288425971911432e-05, "loss": 0.5152, "step": 22715 }, { "epoch": 0.4817713304065661, "grad_norm": 0.3401415944099426, "learning_rate": 1.7288197629849678e-05, "loss": 0.5021, "step": 22716 }, { "epoch": 0.4817925388644992, "grad_norm": 0.32602429389953613, "learning_rate": 1.7287969279682052e-05, "loss": 0.5399, "step": 22717 }, { "epoch": 0.4818137473224322, "grad_norm": 0.3247293531894684, "learning_rate": 1.728774092140881e-05, "loss": 0.4908, "step": 22718 }, { "epoch": 0.4818349557803652, "grad_norm": 0.35899561643600464, "learning_rate": 1.7287512555030207e-05, "loss": 0.5448, "step": 22719 }, { "epoch": 0.48185616423829825, "grad_norm": 0.38783931732177734, "learning_rate": 1.728728418054649e-05, "loss": 0.5066, "step": 22720 }, { "epoch": 0.48187737269623127, "grad_norm": 0.3237220048904419, "learning_rate": 1.7287055797957924e-05, "loss": 0.4527, "step": 22721 }, { "epoch": 0.4818985811541643, "grad_norm": 0.48903363943099976, "learning_rate": 1.728682740726476e-05, "loss": 0.5721, "step": 22722 }, { "epoch": 0.4819197896120973, "grad_norm": 0.3854469358921051, "learning_rate": 1.7286599008467245e-05, "loss": 0.461, "step": 22723 }, { "epoch": 0.4819409980700303, "grad_norm": 0.3446817398071289, "learning_rate": 1.728637060156564e-05, "loss": 0.5085, "step": 22724 }, { "epoch": 0.48196220652796334, "grad_norm": 0.41115206480026245, "learning_rate": 1.72861421865602e-05, "loss": 0.537, "step": 22725 }, { "epoch": 0.48198341498589636, "grad_norm": 0.370239794254303, "learning_rate": 1.728591376345117e-05, "loss": 0.5341, "step": 22726 }, { "epoch": 0.4820046234438294, "grad_norm": 0.4207627475261688, "learning_rate": 1.7285685332238813e-05, "loss": 0.5222, "step": 22727 }, { "epoch": 0.4820258319017624, "grad_norm": 0.33887726068496704, "learning_rate": 1.7285456892923382e-05, "loss": 0.4776, "step": 22728 }, { "epoch": 0.4820470403596954, "grad_norm": 0.3071367144584656, "learning_rate": 1.7285228445505125e-05, "loss": 0.5156, "step": 22729 }, { "epoch": 0.4820682488176285, "grad_norm": 0.41813746094703674, "learning_rate": 1.72849999899843e-05, "loss": 0.542, "step": 22730 }, { "epoch": 0.4820894572755615, "grad_norm": 0.3363457918167114, "learning_rate": 1.7284771526361166e-05, "loss": 0.5417, "step": 22731 }, { "epoch": 0.48211066573349454, "grad_norm": 0.37266552448272705, "learning_rate": 1.728454305463597e-05, "loss": 0.5552, "step": 22732 }, { "epoch": 0.48213187419142756, "grad_norm": 0.35063567757606506, "learning_rate": 1.728431457480897e-05, "loss": 0.4292, "step": 22733 }, { "epoch": 0.4821530826493606, "grad_norm": 0.3178236186504364, "learning_rate": 1.7284086086880414e-05, "loss": 0.5091, "step": 22734 }, { "epoch": 0.4821742911072936, "grad_norm": 0.35600605607032776, "learning_rate": 1.7283857590850565e-05, "loss": 0.4814, "step": 22735 }, { "epoch": 0.4821954995652266, "grad_norm": 0.3338378965854645, "learning_rate": 1.7283629086719675e-05, "loss": 0.444, "step": 22736 }, { "epoch": 0.48221670802315963, "grad_norm": 0.3782283067703247, "learning_rate": 1.728340057448799e-05, "loss": 0.5039, "step": 22737 }, { "epoch": 0.48223791648109265, "grad_norm": 0.38406237959861755, "learning_rate": 1.7283172054155774e-05, "loss": 0.4007, "step": 22738 }, { "epoch": 0.4822591249390257, "grad_norm": 0.315799742937088, "learning_rate": 1.7282943525723275e-05, "loss": 0.4873, "step": 22739 }, { "epoch": 0.4822803333969587, "grad_norm": 0.4085756242275238, "learning_rate": 1.728271498919075e-05, "loss": 0.4927, "step": 22740 }, { "epoch": 0.4823015418548917, "grad_norm": 0.41620299220085144, "learning_rate": 1.7282486444558454e-05, "loss": 0.4879, "step": 22741 }, { "epoch": 0.48232275031282473, "grad_norm": 0.33644533157348633, "learning_rate": 1.728225789182664e-05, "loss": 0.4603, "step": 22742 }, { "epoch": 0.48234395877075775, "grad_norm": 0.4233122169971466, "learning_rate": 1.728202933099556e-05, "loss": 0.4586, "step": 22743 }, { "epoch": 0.4823651672286908, "grad_norm": 0.38920438289642334, "learning_rate": 1.728180076206547e-05, "loss": 0.5896, "step": 22744 }, { "epoch": 0.48238637568662385, "grad_norm": 0.3990471661090851, "learning_rate": 1.728157218503663e-05, "loss": 0.4995, "step": 22745 }, { "epoch": 0.48240758414455687, "grad_norm": 0.33926552534103394, "learning_rate": 1.728134359990928e-05, "loss": 0.425, "step": 22746 }, { "epoch": 0.4824287926024899, "grad_norm": 0.3786810636520386, "learning_rate": 1.7281115006683687e-05, "loss": 0.5244, "step": 22747 }, { "epoch": 0.4824500010604229, "grad_norm": 0.34009426832199097, "learning_rate": 1.7280886405360103e-05, "loss": 0.5126, "step": 22748 }, { "epoch": 0.4824712095183559, "grad_norm": 0.5403890013694763, "learning_rate": 1.728065779593878e-05, "loss": 0.5167, "step": 22749 }, { "epoch": 0.48249241797628895, "grad_norm": 0.346853107213974, "learning_rate": 1.728042917841997e-05, "loss": 0.5062, "step": 22750 }, { "epoch": 0.48251362643422196, "grad_norm": 0.29304054379463196, "learning_rate": 1.728020055280393e-05, "loss": 0.4782, "step": 22751 }, { "epoch": 0.482534834892155, "grad_norm": 0.371501624584198, "learning_rate": 1.727997191909092e-05, "loss": 0.5352, "step": 22752 }, { "epoch": 0.482556043350088, "grad_norm": 0.3672788441181183, "learning_rate": 1.7279743277281183e-05, "loss": 0.6102, "step": 22753 }, { "epoch": 0.482577251808021, "grad_norm": 0.37861204147338867, "learning_rate": 1.727951462737498e-05, "loss": 0.461, "step": 22754 }, { "epoch": 0.48259846026595404, "grad_norm": 0.4167778491973877, "learning_rate": 1.7279285969372567e-05, "loss": 0.5578, "step": 22755 }, { "epoch": 0.48261966872388706, "grad_norm": 0.39698782563209534, "learning_rate": 1.7279057303274188e-05, "loss": 0.4026, "step": 22756 }, { "epoch": 0.4826408771818201, "grad_norm": 0.3472635746002197, "learning_rate": 1.7278828629080112e-05, "loss": 0.5709, "step": 22757 }, { "epoch": 0.48266208563975316, "grad_norm": 0.3467006981372833, "learning_rate": 1.727859994679058e-05, "loss": 0.4911, "step": 22758 }, { "epoch": 0.4826832940976862, "grad_norm": 0.3299865126609802, "learning_rate": 1.7278371256405856e-05, "loss": 0.3809, "step": 22759 }, { "epoch": 0.4827045025556192, "grad_norm": 0.37272655963897705, "learning_rate": 1.727814255792619e-05, "loss": 0.5593, "step": 22760 }, { "epoch": 0.4827257110135522, "grad_norm": 0.354225218296051, "learning_rate": 1.7277913851351838e-05, "loss": 0.5727, "step": 22761 }, { "epoch": 0.48274691947148524, "grad_norm": 0.3375678062438965, "learning_rate": 1.7277685136683055e-05, "loss": 0.4862, "step": 22762 }, { "epoch": 0.48276812792941826, "grad_norm": 0.3693816661834717, "learning_rate": 1.727745641392009e-05, "loss": 0.4809, "step": 22763 }, { "epoch": 0.4827893363873513, "grad_norm": 0.3753042221069336, "learning_rate": 1.7277227683063203e-05, "loss": 0.5078, "step": 22764 }, { "epoch": 0.4828105448452843, "grad_norm": 0.43437451124191284, "learning_rate": 1.7276998944112646e-05, "loss": 0.6258, "step": 22765 }, { "epoch": 0.4828317533032173, "grad_norm": 0.4287450909614563, "learning_rate": 1.7276770197068675e-05, "loss": 0.4816, "step": 22766 }, { "epoch": 0.48285296176115033, "grad_norm": 0.6555092334747314, "learning_rate": 1.727654144193154e-05, "loss": 0.5856, "step": 22767 }, { "epoch": 0.48287417021908335, "grad_norm": 0.3145110607147217, "learning_rate": 1.7276312678701502e-05, "loss": 0.5113, "step": 22768 }, { "epoch": 0.4828953786770164, "grad_norm": 0.36292192339897156, "learning_rate": 1.7276083907378812e-05, "loss": 0.5352, "step": 22769 }, { "epoch": 0.4829165871349494, "grad_norm": 0.3705780804157257, "learning_rate": 1.7275855127963726e-05, "loss": 0.4877, "step": 22770 }, { "epoch": 0.48293779559288247, "grad_norm": 0.3284079432487488, "learning_rate": 1.7275626340456496e-05, "loss": 0.4812, "step": 22771 }, { "epoch": 0.4829590040508155, "grad_norm": 0.5715965032577515, "learning_rate": 1.7275397544857378e-05, "loss": 0.4804, "step": 22772 }, { "epoch": 0.4829802125087485, "grad_norm": 0.39908283948898315, "learning_rate": 1.7275168741166625e-05, "loss": 0.4697, "step": 22773 }, { "epoch": 0.4830014209666815, "grad_norm": 0.3798135817050934, "learning_rate": 1.7274939929384492e-05, "loss": 0.4869, "step": 22774 }, { "epoch": 0.48302262942461455, "grad_norm": 0.4356047809123993, "learning_rate": 1.7274711109511236e-05, "loss": 0.5691, "step": 22775 }, { "epoch": 0.48304383788254757, "grad_norm": 0.374519944190979, "learning_rate": 1.7274482281547107e-05, "loss": 0.4692, "step": 22776 }, { "epoch": 0.4830650463404806, "grad_norm": 0.33448120951652527, "learning_rate": 1.7274253445492364e-05, "loss": 0.4807, "step": 22777 }, { "epoch": 0.4830862547984136, "grad_norm": 0.36735275387763977, "learning_rate": 1.7274024601347256e-05, "loss": 0.4941, "step": 22778 }, { "epoch": 0.4831074632563466, "grad_norm": 0.38555237650871277, "learning_rate": 1.7273795749112046e-05, "loss": 0.5026, "step": 22779 }, { "epoch": 0.48312867171427964, "grad_norm": 0.47647780179977417, "learning_rate": 1.7273566888786983e-05, "loss": 0.5569, "step": 22780 }, { "epoch": 0.48314988017221266, "grad_norm": 0.34629419445991516, "learning_rate": 1.7273338020372318e-05, "loss": 0.464, "step": 22781 }, { "epoch": 0.4831710886301457, "grad_norm": 0.41419440507888794, "learning_rate": 1.7273109143868313e-05, "loss": 0.559, "step": 22782 }, { "epoch": 0.4831922970880787, "grad_norm": 0.31792667508125305, "learning_rate": 1.7272880259275217e-05, "loss": 0.4341, "step": 22783 }, { "epoch": 0.4832135055460117, "grad_norm": 0.38201284408569336, "learning_rate": 1.727265136659329e-05, "loss": 0.5022, "step": 22784 }, { "epoch": 0.4832347140039448, "grad_norm": 0.48856058716773987, "learning_rate": 1.727242246582278e-05, "loss": 0.4804, "step": 22785 }, { "epoch": 0.4832559224618778, "grad_norm": 0.3291983902454376, "learning_rate": 1.7272193556963944e-05, "loss": 0.4448, "step": 22786 }, { "epoch": 0.48327713091981084, "grad_norm": 0.3340790867805481, "learning_rate": 1.7271964640017043e-05, "loss": 0.509, "step": 22787 }, { "epoch": 0.48329833937774386, "grad_norm": 0.35568979382514954, "learning_rate": 1.7271735714982322e-05, "loss": 0.5157, "step": 22788 }, { "epoch": 0.4833195478356769, "grad_norm": 0.4139629006385803, "learning_rate": 1.727150678186004e-05, "loss": 0.5692, "step": 22789 }, { "epoch": 0.4833407562936099, "grad_norm": 0.37091854214668274, "learning_rate": 1.7271277840650455e-05, "loss": 0.4662, "step": 22790 }, { "epoch": 0.4833619647515429, "grad_norm": 0.38396671414375305, "learning_rate": 1.7271048891353813e-05, "loss": 0.4873, "step": 22791 }, { "epoch": 0.48338317320947594, "grad_norm": 0.32231613993644714, "learning_rate": 1.7270819933970375e-05, "loss": 0.5197, "step": 22792 }, { "epoch": 0.48340438166740896, "grad_norm": 0.3237464427947998, "learning_rate": 1.7270590968500392e-05, "loss": 0.516, "step": 22793 }, { "epoch": 0.483425590125342, "grad_norm": 0.3527149260044098, "learning_rate": 1.7270361994944127e-05, "loss": 0.4741, "step": 22794 }, { "epoch": 0.483446798583275, "grad_norm": 0.3536287546157837, "learning_rate": 1.7270133013301825e-05, "loss": 0.5249, "step": 22795 }, { "epoch": 0.483468007041208, "grad_norm": 0.3166373372077942, "learning_rate": 1.7269904023573743e-05, "loss": 0.3922, "step": 22796 }, { "epoch": 0.48348921549914103, "grad_norm": 0.3936861753463745, "learning_rate": 1.726967502576014e-05, "loss": 0.5648, "step": 22797 }, { "epoch": 0.48351042395707405, "grad_norm": 0.39174503087997437, "learning_rate": 1.7269446019861266e-05, "loss": 0.4967, "step": 22798 }, { "epoch": 0.48353163241500713, "grad_norm": 0.35756126046180725, "learning_rate": 1.7269217005877376e-05, "loss": 0.5173, "step": 22799 }, { "epoch": 0.48355284087294015, "grad_norm": 0.3412017822265625, "learning_rate": 1.726898798380873e-05, "loss": 0.5438, "step": 22800 }, { "epoch": 0.48357404933087317, "grad_norm": 0.3413192331790924, "learning_rate": 1.7268758953655572e-05, "loss": 0.4906, "step": 22801 }, { "epoch": 0.4835952577888062, "grad_norm": 0.3563379645347595, "learning_rate": 1.7268529915418168e-05, "loss": 0.4549, "step": 22802 }, { "epoch": 0.4836164662467392, "grad_norm": 0.352584570646286, "learning_rate": 1.7268300869096765e-05, "loss": 0.531, "step": 22803 }, { "epoch": 0.4836376747046722, "grad_norm": 0.3531958758831024, "learning_rate": 1.7268071814691626e-05, "loss": 0.4963, "step": 22804 }, { "epoch": 0.48365888316260525, "grad_norm": 0.3225633502006531, "learning_rate": 1.7267842752203e-05, "loss": 0.5007, "step": 22805 }, { "epoch": 0.48368009162053827, "grad_norm": 0.3314983546733856, "learning_rate": 1.7267613681631136e-05, "loss": 0.4987, "step": 22806 }, { "epoch": 0.4837013000784713, "grad_norm": 0.385922908782959, "learning_rate": 1.72673846029763e-05, "loss": 0.459, "step": 22807 }, { "epoch": 0.4837225085364043, "grad_norm": 0.3387657105922699, "learning_rate": 1.7267155516238744e-05, "loss": 0.4713, "step": 22808 }, { "epoch": 0.4837437169943373, "grad_norm": 0.4212094843387604, "learning_rate": 1.726692642141872e-05, "loss": 0.4664, "step": 22809 }, { "epoch": 0.48376492545227034, "grad_norm": 0.3253103792667389, "learning_rate": 1.7266697318516478e-05, "loss": 0.4581, "step": 22810 }, { "epoch": 0.48378613391020336, "grad_norm": 0.35289663076400757, "learning_rate": 1.726646820753228e-05, "loss": 0.538, "step": 22811 }, { "epoch": 0.48380734236813644, "grad_norm": 0.3545025587081909, "learning_rate": 1.7266239088466385e-05, "loss": 0.4814, "step": 22812 }, { "epoch": 0.48382855082606946, "grad_norm": 0.3976883590221405, "learning_rate": 1.7266009961319035e-05, "loss": 0.5219, "step": 22813 }, { "epoch": 0.4838497592840025, "grad_norm": 0.31664833426475525, "learning_rate": 1.72657808260905e-05, "loss": 0.5298, "step": 22814 }, { "epoch": 0.4838709677419355, "grad_norm": 0.3983357548713684, "learning_rate": 1.726555168278102e-05, "loss": 0.5767, "step": 22815 }, { "epoch": 0.4838921761998685, "grad_norm": 0.3394678831100464, "learning_rate": 1.726532253139086e-05, "loss": 0.4976, "step": 22816 }, { "epoch": 0.48391338465780154, "grad_norm": 0.37788283824920654, "learning_rate": 1.726509337192027e-05, "loss": 0.6544, "step": 22817 }, { "epoch": 0.48393459311573456, "grad_norm": 0.3842824101448059, "learning_rate": 1.7264864204369507e-05, "loss": 0.4979, "step": 22818 }, { "epoch": 0.4839558015736676, "grad_norm": 0.33169305324554443, "learning_rate": 1.7264635028738825e-05, "loss": 0.4546, "step": 22819 }, { "epoch": 0.4839770100316006, "grad_norm": 0.38407576084136963, "learning_rate": 1.7264405845028475e-05, "loss": 0.5087, "step": 22820 }, { "epoch": 0.4839982184895336, "grad_norm": 0.3771771788597107, "learning_rate": 1.726417665323872e-05, "loss": 0.5151, "step": 22821 }, { "epoch": 0.48401942694746664, "grad_norm": 0.33627447485923767, "learning_rate": 1.7263947453369812e-05, "loss": 0.4425, "step": 22822 }, { "epoch": 0.48404063540539966, "grad_norm": 0.40111225843429565, "learning_rate": 1.7263718245422004e-05, "loss": 0.4927, "step": 22823 }, { "epoch": 0.4840618438633327, "grad_norm": 0.36497029662132263, "learning_rate": 1.7263489029395548e-05, "loss": 0.4534, "step": 22824 }, { "epoch": 0.4840830523212657, "grad_norm": 0.347788542509079, "learning_rate": 1.7263259805290705e-05, "loss": 0.5027, "step": 22825 }, { "epoch": 0.48410426077919877, "grad_norm": 0.35483795404434204, "learning_rate": 1.726303057310773e-05, "loss": 0.5119, "step": 22826 }, { "epoch": 0.4841254692371318, "grad_norm": 0.32986918091773987, "learning_rate": 1.726280133284687e-05, "loss": 0.5878, "step": 22827 }, { "epoch": 0.4841466776950648, "grad_norm": 0.36838504672050476, "learning_rate": 1.7262572084508392e-05, "loss": 0.5367, "step": 22828 }, { "epoch": 0.48416788615299783, "grad_norm": 0.37816041707992554, "learning_rate": 1.726234282809254e-05, "loss": 0.4853, "step": 22829 }, { "epoch": 0.48418909461093085, "grad_norm": 0.4265139698982239, "learning_rate": 1.7262113563599573e-05, "loss": 0.4948, "step": 22830 }, { "epoch": 0.48421030306886387, "grad_norm": 0.36752843856811523, "learning_rate": 1.7261884291029748e-05, "loss": 0.5276, "step": 22831 }, { "epoch": 0.4842315115267969, "grad_norm": 0.3374989628791809, "learning_rate": 1.7261655010383317e-05, "loss": 0.4951, "step": 22832 }, { "epoch": 0.4842527199847299, "grad_norm": 0.31510311365127563, "learning_rate": 1.726142572166054e-05, "loss": 0.4455, "step": 22833 }, { "epoch": 0.4842739284426629, "grad_norm": 0.3798932433128357, "learning_rate": 1.7261196424861663e-05, "loss": 0.6132, "step": 22834 }, { "epoch": 0.48429513690059595, "grad_norm": 0.37270084023475647, "learning_rate": 1.7260967119986953e-05, "loss": 0.5137, "step": 22835 }, { "epoch": 0.48431634535852897, "grad_norm": 0.36643561720848083, "learning_rate": 1.7260737807036655e-05, "loss": 0.4941, "step": 22836 }, { "epoch": 0.484337553816462, "grad_norm": 0.3209819793701172, "learning_rate": 1.7260508486011026e-05, "loss": 0.5232, "step": 22837 }, { "epoch": 0.484358762274395, "grad_norm": 0.3691156506538391, "learning_rate": 1.7260279156910323e-05, "loss": 0.5278, "step": 22838 }, { "epoch": 0.4843799707323281, "grad_norm": 0.4963928461074829, "learning_rate": 1.72600498197348e-05, "loss": 0.4573, "step": 22839 }, { "epoch": 0.4844011791902611, "grad_norm": 0.37374433875083923, "learning_rate": 1.7259820474484718e-05, "loss": 0.5011, "step": 22840 }, { "epoch": 0.4844223876481941, "grad_norm": 0.3209238648414612, "learning_rate": 1.7259591121160323e-05, "loss": 0.5204, "step": 22841 }, { "epoch": 0.48444359610612714, "grad_norm": 0.3827962577342987, "learning_rate": 1.725936175976187e-05, "loss": 0.4971, "step": 22842 }, { "epoch": 0.48446480456406016, "grad_norm": 0.8518117070198059, "learning_rate": 1.7259132390289624e-05, "loss": 0.4494, "step": 22843 }, { "epoch": 0.4844860130219932, "grad_norm": 0.3187805712223053, "learning_rate": 1.725890301274383e-05, "loss": 0.5231, "step": 22844 }, { "epoch": 0.4845072214799262, "grad_norm": 0.36870089173316956, "learning_rate": 1.725867362712475e-05, "loss": 0.5129, "step": 22845 }, { "epoch": 0.4845284299378592, "grad_norm": 0.37833133339881897, "learning_rate": 1.7258444233432635e-05, "loss": 0.4947, "step": 22846 }, { "epoch": 0.48454963839579224, "grad_norm": 0.9570022225379944, "learning_rate": 1.725821483166774e-05, "loss": 0.5173, "step": 22847 }, { "epoch": 0.48457084685372526, "grad_norm": 0.33559945225715637, "learning_rate": 1.7257985421830325e-05, "loss": 0.4399, "step": 22848 }, { "epoch": 0.4845920553116583, "grad_norm": 0.5103311538696289, "learning_rate": 1.7257756003920637e-05, "loss": 0.5244, "step": 22849 }, { "epoch": 0.4846132637695913, "grad_norm": 0.4009478986263275, "learning_rate": 1.725752657793894e-05, "loss": 0.4758, "step": 22850 }, { "epoch": 0.4846344722275243, "grad_norm": 0.3554525375366211, "learning_rate": 1.7257297143885483e-05, "loss": 0.4716, "step": 22851 }, { "epoch": 0.48465568068545734, "grad_norm": 0.32774144411087036, "learning_rate": 1.7257067701760523e-05, "loss": 0.5299, "step": 22852 }, { "epoch": 0.4846768891433904, "grad_norm": 0.3282228708267212, "learning_rate": 1.7256838251564318e-05, "loss": 0.5038, "step": 22853 }, { "epoch": 0.48469809760132343, "grad_norm": 0.39201849699020386, "learning_rate": 1.7256608793297118e-05, "loss": 0.601, "step": 22854 }, { "epoch": 0.48471930605925645, "grad_norm": 0.3525025546550751, "learning_rate": 1.725637932695918e-05, "loss": 0.4554, "step": 22855 }, { "epoch": 0.48474051451718947, "grad_norm": 0.3985605537891388, "learning_rate": 1.725614985255076e-05, "loss": 0.5786, "step": 22856 }, { "epoch": 0.4847617229751225, "grad_norm": 0.3573947250843048, "learning_rate": 1.7255920370072113e-05, "loss": 0.5172, "step": 22857 }, { "epoch": 0.4847829314330555, "grad_norm": 0.36325186491012573, "learning_rate": 1.7255690879523495e-05, "loss": 0.4495, "step": 22858 }, { "epoch": 0.48480413989098853, "grad_norm": 0.35084110498428345, "learning_rate": 1.7255461380905164e-05, "loss": 0.5557, "step": 22859 }, { "epoch": 0.48482534834892155, "grad_norm": 0.33681267499923706, "learning_rate": 1.7255231874217367e-05, "loss": 0.4847, "step": 22860 }, { "epoch": 0.48484655680685457, "grad_norm": 0.368521511554718, "learning_rate": 1.7255002359460368e-05, "loss": 0.4455, "step": 22861 }, { "epoch": 0.4848677652647876, "grad_norm": 0.33778947591781616, "learning_rate": 1.7254772836634418e-05, "loss": 0.4307, "step": 22862 }, { "epoch": 0.4848889737227206, "grad_norm": 0.33253565430641174, "learning_rate": 1.725454330573977e-05, "loss": 0.524, "step": 22863 }, { "epoch": 0.4849101821806536, "grad_norm": 0.530905544757843, "learning_rate": 1.725431376677668e-05, "loss": 0.4835, "step": 22864 }, { "epoch": 0.48493139063858665, "grad_norm": 0.3359641432762146, "learning_rate": 1.725408421974541e-05, "loss": 0.4929, "step": 22865 }, { "epoch": 0.48495259909651967, "grad_norm": 0.3882755637168884, "learning_rate": 1.725385466464621e-05, "loss": 0.6291, "step": 22866 }, { "epoch": 0.48497380755445274, "grad_norm": 0.29852887988090515, "learning_rate": 1.7253625101479337e-05, "loss": 0.442, "step": 22867 }, { "epoch": 0.48499501601238576, "grad_norm": 0.3685111999511719, "learning_rate": 1.725339553024504e-05, "loss": 0.5629, "step": 22868 }, { "epoch": 0.4850162244703188, "grad_norm": 0.33419114351272583, "learning_rate": 1.7253165950943585e-05, "loss": 0.5281, "step": 22869 }, { "epoch": 0.4850374329282518, "grad_norm": 0.39559322595596313, "learning_rate": 1.7252936363575222e-05, "loss": 0.5475, "step": 22870 }, { "epoch": 0.4850586413861848, "grad_norm": 0.32422128319740295, "learning_rate": 1.7252706768140202e-05, "loss": 0.4566, "step": 22871 }, { "epoch": 0.48507984984411784, "grad_norm": 0.412744402885437, "learning_rate": 1.7252477164638785e-05, "loss": 0.4275, "step": 22872 }, { "epoch": 0.48510105830205086, "grad_norm": 0.36466965079307556, "learning_rate": 1.7252247553071226e-05, "loss": 0.582, "step": 22873 }, { "epoch": 0.4851222667599839, "grad_norm": 0.3872871696949005, "learning_rate": 1.7252017933437782e-05, "loss": 0.4558, "step": 22874 }, { "epoch": 0.4851434752179169, "grad_norm": 0.314681738615036, "learning_rate": 1.7251788305738708e-05, "loss": 0.4758, "step": 22875 }, { "epoch": 0.4851646836758499, "grad_norm": 0.3232291638851166, "learning_rate": 1.7251558669974256e-05, "loss": 0.4618, "step": 22876 }, { "epoch": 0.48518589213378294, "grad_norm": 0.36178967356681824, "learning_rate": 1.7251329026144683e-05, "loss": 0.5827, "step": 22877 }, { "epoch": 0.48520710059171596, "grad_norm": 0.3505738079547882, "learning_rate": 1.7251099374250244e-05, "loss": 0.4288, "step": 22878 }, { "epoch": 0.485228309049649, "grad_norm": 0.34273403882980347, "learning_rate": 1.7250869714291195e-05, "loss": 0.5836, "step": 22879 }, { "epoch": 0.48524951750758205, "grad_norm": 0.3949706554412842, "learning_rate": 1.7250640046267796e-05, "loss": 0.4371, "step": 22880 }, { "epoch": 0.48527072596551507, "grad_norm": 0.3991253972053528, "learning_rate": 1.7250410370180294e-05, "loss": 0.5269, "step": 22881 }, { "epoch": 0.4852919344234481, "grad_norm": 0.33758488297462463, "learning_rate": 1.725018068602895e-05, "loss": 0.5065, "step": 22882 }, { "epoch": 0.4853131428813811, "grad_norm": 0.38062599301338196, "learning_rate": 1.724995099381402e-05, "loss": 0.511, "step": 22883 }, { "epoch": 0.48533435133931413, "grad_norm": 0.3772554099559784, "learning_rate": 1.7249721293535754e-05, "loss": 0.4935, "step": 22884 }, { "epoch": 0.48535555979724715, "grad_norm": 0.32111698389053345, "learning_rate": 1.7249491585194413e-05, "loss": 0.4862, "step": 22885 }, { "epoch": 0.48537676825518017, "grad_norm": 0.3292836546897888, "learning_rate": 1.724926186879025e-05, "loss": 0.4406, "step": 22886 }, { "epoch": 0.4853979767131132, "grad_norm": 0.6001608967781067, "learning_rate": 1.724903214432352e-05, "loss": 0.4639, "step": 22887 }, { "epoch": 0.4854191851710462, "grad_norm": 0.34083324670791626, "learning_rate": 1.724880241179448e-05, "loss": 0.4555, "step": 22888 }, { "epoch": 0.4854403936289792, "grad_norm": 0.29079729318618774, "learning_rate": 1.7248572671203384e-05, "loss": 0.4123, "step": 22889 }, { "epoch": 0.48546160208691225, "grad_norm": 0.2972559630870819, "learning_rate": 1.7248342922550492e-05, "loss": 0.4271, "step": 22890 }, { "epoch": 0.48548281054484527, "grad_norm": 0.31760650873184204, "learning_rate": 1.7248113165836055e-05, "loss": 0.5108, "step": 22891 }, { "epoch": 0.4855040190027783, "grad_norm": 0.34654027223587036, "learning_rate": 1.7247883401060327e-05, "loss": 0.5108, "step": 22892 }, { "epoch": 0.4855252274607113, "grad_norm": 0.3532865643501282, "learning_rate": 1.724765362822357e-05, "loss": 0.5253, "step": 22893 }, { "epoch": 0.4855464359186444, "grad_norm": 0.34656718373298645, "learning_rate": 1.7247423847326027e-05, "loss": 0.5221, "step": 22894 }, { "epoch": 0.4855676443765774, "grad_norm": 0.31749239563941956, "learning_rate": 1.724719405836797e-05, "loss": 0.3923, "step": 22895 }, { "epoch": 0.4855888528345104, "grad_norm": 0.3008556067943573, "learning_rate": 1.7246964261349645e-05, "loss": 0.5148, "step": 22896 }, { "epoch": 0.48561006129244344, "grad_norm": 0.3253381848335266, "learning_rate": 1.724673445627131e-05, "loss": 0.4431, "step": 22897 }, { "epoch": 0.48563126975037646, "grad_norm": 0.3198276162147522, "learning_rate": 1.7246504643133217e-05, "loss": 0.4933, "step": 22898 }, { "epoch": 0.4856524782083095, "grad_norm": 0.3276802599430084, "learning_rate": 1.7246274821935625e-05, "loss": 0.5027, "step": 22899 }, { "epoch": 0.4856736866662425, "grad_norm": 0.3733639121055603, "learning_rate": 1.7246044992678793e-05, "loss": 0.5009, "step": 22900 }, { "epoch": 0.4856948951241755, "grad_norm": 0.9081434011459351, "learning_rate": 1.724581515536297e-05, "loss": 0.5556, "step": 22901 }, { "epoch": 0.48571610358210854, "grad_norm": 0.39118605852127075, "learning_rate": 1.7245585309988416e-05, "loss": 0.5688, "step": 22902 }, { "epoch": 0.48573731204004156, "grad_norm": 0.3542806804180145, "learning_rate": 1.7245355456555383e-05, "loss": 0.5017, "step": 22903 }, { "epoch": 0.4857585204979746, "grad_norm": 0.3741813004016876, "learning_rate": 1.7245125595064126e-05, "loss": 0.5679, "step": 22904 }, { "epoch": 0.4857797289559076, "grad_norm": 0.3456479012966156, "learning_rate": 1.7244895725514908e-05, "loss": 0.5014, "step": 22905 }, { "epoch": 0.4858009374138406, "grad_norm": 0.35652726888656616, "learning_rate": 1.724466584790798e-05, "loss": 0.5061, "step": 22906 }, { "epoch": 0.48582214587177364, "grad_norm": 0.35613754391670227, "learning_rate": 1.7244435962243593e-05, "loss": 0.4701, "step": 22907 }, { "epoch": 0.4858433543297067, "grad_norm": 0.39092057943344116, "learning_rate": 1.724420606852201e-05, "loss": 0.5557, "step": 22908 }, { "epoch": 0.48586456278763973, "grad_norm": 0.3384590744972229, "learning_rate": 1.7243976166743485e-05, "loss": 0.4837, "step": 22909 }, { "epoch": 0.48588577124557275, "grad_norm": 0.3413425385951996, "learning_rate": 1.7243746256908272e-05, "loss": 0.4839, "step": 22910 }, { "epoch": 0.48590697970350577, "grad_norm": 0.3477714955806732, "learning_rate": 1.7243516339016625e-05, "loss": 0.4684, "step": 22911 }, { "epoch": 0.4859281881614388, "grad_norm": 0.319767028093338, "learning_rate": 1.7243286413068806e-05, "loss": 0.4719, "step": 22912 }, { "epoch": 0.4859493966193718, "grad_norm": 0.406442791223526, "learning_rate": 1.7243056479065064e-05, "loss": 0.5436, "step": 22913 }, { "epoch": 0.48597060507730483, "grad_norm": 0.32181450724601746, "learning_rate": 1.724282653700566e-05, "loss": 0.378, "step": 22914 }, { "epoch": 0.48599181353523785, "grad_norm": 0.3445764183998108, "learning_rate": 1.7242596586890842e-05, "loss": 0.4552, "step": 22915 }, { "epoch": 0.48601302199317087, "grad_norm": 0.33183297514915466, "learning_rate": 1.7242366628720874e-05, "loss": 0.4082, "step": 22916 }, { "epoch": 0.4860342304511039, "grad_norm": 0.3353906273841858, "learning_rate": 1.7242136662496012e-05, "loss": 0.4898, "step": 22917 }, { "epoch": 0.4860554389090369, "grad_norm": 0.43509793281555176, "learning_rate": 1.7241906688216504e-05, "loss": 0.5377, "step": 22918 }, { "epoch": 0.4860766473669699, "grad_norm": 0.32796186208724976, "learning_rate": 1.7241676705882614e-05, "loss": 0.4527, "step": 22919 }, { "epoch": 0.48609785582490295, "grad_norm": 0.4537426829338074, "learning_rate": 1.7241446715494592e-05, "loss": 0.4523, "step": 22920 }, { "epoch": 0.486119064282836, "grad_norm": 0.3547093868255615, "learning_rate": 1.7241216717052696e-05, "loss": 0.5074, "step": 22921 }, { "epoch": 0.48614027274076904, "grad_norm": 0.3401857316493988, "learning_rate": 1.724098671055718e-05, "loss": 0.4668, "step": 22922 }, { "epoch": 0.48616148119870206, "grad_norm": 0.335418701171875, "learning_rate": 1.7240756696008307e-05, "loss": 0.5556, "step": 22923 }, { "epoch": 0.4861826896566351, "grad_norm": 0.29061442613601685, "learning_rate": 1.724052667340632e-05, "loss": 0.4101, "step": 22924 }, { "epoch": 0.4862038981145681, "grad_norm": 0.3943864405155182, "learning_rate": 1.7240296642751488e-05, "loss": 0.4688, "step": 22925 }, { "epoch": 0.4862251065725011, "grad_norm": 0.3301348090171814, "learning_rate": 1.7240066604044058e-05, "loss": 0.4663, "step": 22926 }, { "epoch": 0.48624631503043414, "grad_norm": 0.3435601592063904, "learning_rate": 1.7239836557284286e-05, "loss": 0.4144, "step": 22927 }, { "epoch": 0.48626752348836716, "grad_norm": 0.3492754399776459, "learning_rate": 1.7239606502472436e-05, "loss": 0.4785, "step": 22928 }, { "epoch": 0.4862887319463002, "grad_norm": 0.32738950848579407, "learning_rate": 1.723937643960876e-05, "loss": 0.4641, "step": 22929 }, { "epoch": 0.4863099404042332, "grad_norm": 0.36207640171051025, "learning_rate": 1.7239146368693504e-05, "loss": 0.5429, "step": 22930 }, { "epoch": 0.4863311488621662, "grad_norm": 0.370965838432312, "learning_rate": 1.7238916289726938e-05, "loss": 0.5174, "step": 22931 }, { "epoch": 0.48635235732009924, "grad_norm": 0.37247782945632935, "learning_rate": 1.7238686202709313e-05, "loss": 0.5628, "step": 22932 }, { "epoch": 0.48637356577803226, "grad_norm": 0.32546499371528625, "learning_rate": 1.7238456107640883e-05, "loss": 0.4483, "step": 22933 }, { "epoch": 0.4863947742359653, "grad_norm": 0.3826020359992981, "learning_rate": 1.7238226004521904e-05, "loss": 0.4191, "step": 22934 }, { "epoch": 0.48641598269389835, "grad_norm": 0.36380937695503235, "learning_rate": 1.7237995893352632e-05, "loss": 0.5174, "step": 22935 }, { "epoch": 0.48643719115183137, "grad_norm": 0.44849321246147156, "learning_rate": 1.7237765774133325e-05, "loss": 0.5832, "step": 22936 }, { "epoch": 0.4864583996097644, "grad_norm": 0.35905149579048157, "learning_rate": 1.723753564686424e-05, "loss": 0.5569, "step": 22937 }, { "epoch": 0.4864796080676974, "grad_norm": 0.3330966830253601, "learning_rate": 1.723730551154563e-05, "loss": 0.4779, "step": 22938 }, { "epoch": 0.48650081652563043, "grad_norm": 0.31869569420814514, "learning_rate": 1.723707536817775e-05, "loss": 0.4705, "step": 22939 }, { "epoch": 0.48652202498356345, "grad_norm": 0.3478846251964569, "learning_rate": 1.7236845216760855e-05, "loss": 0.4796, "step": 22940 }, { "epoch": 0.48654323344149647, "grad_norm": 0.32296037673950195, "learning_rate": 1.723661505729521e-05, "loss": 0.4882, "step": 22941 }, { "epoch": 0.4865644418994295, "grad_norm": 0.3218270242214203, "learning_rate": 1.723638488978106e-05, "loss": 0.426, "step": 22942 }, { "epoch": 0.4865856503573625, "grad_norm": 0.3645649552345276, "learning_rate": 1.7236154714218666e-05, "loss": 0.4418, "step": 22943 }, { "epoch": 0.48660685881529553, "grad_norm": 0.337674081325531, "learning_rate": 1.7235924530608287e-05, "loss": 0.4642, "step": 22944 }, { "epoch": 0.48662806727322855, "grad_norm": 0.33474573493003845, "learning_rate": 1.7235694338950173e-05, "loss": 0.3826, "step": 22945 }, { "epoch": 0.48664927573116157, "grad_norm": 0.4283032715320587, "learning_rate": 1.7235464139244582e-05, "loss": 0.5431, "step": 22946 }, { "epoch": 0.4866704841890946, "grad_norm": 0.3666725754737854, "learning_rate": 1.723523393149177e-05, "loss": 0.6302, "step": 22947 }, { "epoch": 0.4866916926470276, "grad_norm": 0.29161006212234497, "learning_rate": 1.7235003715691996e-05, "loss": 0.4081, "step": 22948 }, { "epoch": 0.4867129011049607, "grad_norm": 0.39514511823654175, "learning_rate": 1.7234773491845513e-05, "loss": 0.4775, "step": 22949 }, { "epoch": 0.4867341095628937, "grad_norm": 0.35462838411331177, "learning_rate": 1.723454325995258e-05, "loss": 0.5159, "step": 22950 }, { "epoch": 0.4867553180208267, "grad_norm": 0.38541844487190247, "learning_rate": 1.7234313020013448e-05, "loss": 0.5514, "step": 22951 }, { "epoch": 0.48677652647875974, "grad_norm": 0.5029973387718201, "learning_rate": 1.7234082772028372e-05, "loss": 0.5233, "step": 22952 }, { "epoch": 0.48679773493669276, "grad_norm": 0.34085968136787415, "learning_rate": 1.723385251599762e-05, "loss": 0.569, "step": 22953 }, { "epoch": 0.4868189433946258, "grad_norm": 0.5495293736457825, "learning_rate": 1.7233622251921436e-05, "loss": 0.5088, "step": 22954 }, { "epoch": 0.4868401518525588, "grad_norm": 0.36893391609191895, "learning_rate": 1.723339197980008e-05, "loss": 0.5341, "step": 22955 }, { "epoch": 0.4868613603104918, "grad_norm": 0.39348065853118896, "learning_rate": 1.7233161699633808e-05, "loss": 0.5273, "step": 22956 }, { "epoch": 0.48688256876842484, "grad_norm": 0.34206831455230713, "learning_rate": 1.7232931411422876e-05, "loss": 0.5512, "step": 22957 }, { "epoch": 0.48690377722635786, "grad_norm": 0.3524110019207001, "learning_rate": 1.7232701115167544e-05, "loss": 0.4646, "step": 22958 }, { "epoch": 0.4869249856842909, "grad_norm": 0.3449957072734833, "learning_rate": 1.7232470810868062e-05, "loss": 0.4625, "step": 22959 }, { "epoch": 0.4869461941422239, "grad_norm": 0.3412373661994934, "learning_rate": 1.723224049852469e-05, "loss": 0.5383, "step": 22960 }, { "epoch": 0.4869674026001569, "grad_norm": 0.40448182821273804, "learning_rate": 1.723201017813768e-05, "loss": 0.5585, "step": 22961 }, { "epoch": 0.48698861105809, "grad_norm": 0.5126540064811707, "learning_rate": 1.7231779849707294e-05, "loss": 0.5863, "step": 22962 }, { "epoch": 0.487009819516023, "grad_norm": 0.3259783089160919, "learning_rate": 1.7231549513233787e-05, "loss": 0.4837, "step": 22963 }, { "epoch": 0.48703102797395603, "grad_norm": 0.3347579538822174, "learning_rate": 1.7231319168717407e-05, "loss": 0.4715, "step": 22964 }, { "epoch": 0.48705223643188905, "grad_norm": 0.4400855302810669, "learning_rate": 1.7231088816158422e-05, "loss": 0.4979, "step": 22965 }, { "epoch": 0.48707344488982207, "grad_norm": 0.34470173716545105, "learning_rate": 1.723085845555708e-05, "loss": 0.5655, "step": 22966 }, { "epoch": 0.4870946533477551, "grad_norm": 0.42352524399757385, "learning_rate": 1.7230628086913645e-05, "loss": 0.5306, "step": 22967 }, { "epoch": 0.4871158618056881, "grad_norm": 0.4070787727832794, "learning_rate": 1.7230397710228363e-05, "loss": 0.4745, "step": 22968 }, { "epoch": 0.48713707026362113, "grad_norm": 0.39660346508026123, "learning_rate": 1.7230167325501497e-05, "loss": 0.5526, "step": 22969 }, { "epoch": 0.48715827872155415, "grad_norm": 0.3635474741458893, "learning_rate": 1.7229936932733302e-05, "loss": 0.526, "step": 22970 }, { "epoch": 0.48717948717948717, "grad_norm": 0.3419181704521179, "learning_rate": 1.7229706531924035e-05, "loss": 0.5326, "step": 22971 }, { "epoch": 0.4872006956374202, "grad_norm": 0.3368246257305145, "learning_rate": 1.722947612307395e-05, "loss": 0.452, "step": 22972 }, { "epoch": 0.4872219040953532, "grad_norm": 0.3686148226261139, "learning_rate": 1.7229245706183305e-05, "loss": 0.4813, "step": 22973 }, { "epoch": 0.48724311255328623, "grad_norm": 0.3341217339038849, "learning_rate": 1.7229015281252352e-05, "loss": 0.5181, "step": 22974 }, { "epoch": 0.48726432101121925, "grad_norm": 0.3204389214515686, "learning_rate": 1.7228784848281355e-05, "loss": 0.4671, "step": 22975 }, { "epoch": 0.4872855294691523, "grad_norm": 0.33853235840797424, "learning_rate": 1.722855440727057e-05, "loss": 0.4771, "step": 22976 }, { "epoch": 0.48730673792708534, "grad_norm": 0.3786933720111847, "learning_rate": 1.722832395822024e-05, "loss": 0.5716, "step": 22977 }, { "epoch": 0.48732794638501836, "grad_norm": 0.3899040222167969, "learning_rate": 1.7228093501130637e-05, "loss": 0.541, "step": 22978 }, { "epoch": 0.4873491548429514, "grad_norm": 0.3390350043773651, "learning_rate": 1.722786303600201e-05, "loss": 0.5115, "step": 22979 }, { "epoch": 0.4873703633008844, "grad_norm": 0.30290117859840393, "learning_rate": 1.7227632562834618e-05, "loss": 0.4431, "step": 22980 }, { "epoch": 0.4873915717588174, "grad_norm": 0.41191762685775757, "learning_rate": 1.7227402081628713e-05, "loss": 0.5038, "step": 22981 }, { "epoch": 0.48741278021675044, "grad_norm": 0.4134022891521454, "learning_rate": 1.7227171592384557e-05, "loss": 0.4437, "step": 22982 }, { "epoch": 0.48743398867468346, "grad_norm": 0.33915817737579346, "learning_rate": 1.7226941095102404e-05, "loss": 0.4632, "step": 22983 }, { "epoch": 0.4874551971326165, "grad_norm": 0.3656495213508606, "learning_rate": 1.7226710589782505e-05, "loss": 0.5605, "step": 22984 }, { "epoch": 0.4874764055905495, "grad_norm": 0.32131242752075195, "learning_rate": 1.7226480076425124e-05, "loss": 0.4291, "step": 22985 }, { "epoch": 0.4874976140484825, "grad_norm": 0.3552132248878479, "learning_rate": 1.7226249555030518e-05, "loss": 0.5131, "step": 22986 }, { "epoch": 0.48751882250641554, "grad_norm": 0.34425774216651917, "learning_rate": 1.7226019025598938e-05, "loss": 0.4511, "step": 22987 }, { "epoch": 0.48754003096434856, "grad_norm": 0.3324364423751831, "learning_rate": 1.7225788488130638e-05, "loss": 0.4804, "step": 22988 }, { "epoch": 0.4875612394222816, "grad_norm": 0.33392342925071716, "learning_rate": 1.7225557942625883e-05, "loss": 0.5591, "step": 22989 }, { "epoch": 0.48758244788021465, "grad_norm": 0.3322916030883789, "learning_rate": 1.7225327389084926e-05, "loss": 0.537, "step": 22990 }, { "epoch": 0.4876036563381477, "grad_norm": 0.3398655652999878, "learning_rate": 1.722509682750802e-05, "loss": 0.4816, "step": 22991 }, { "epoch": 0.4876248647960807, "grad_norm": 0.32932528853416443, "learning_rate": 1.7224866257895426e-05, "loss": 0.4325, "step": 22992 }, { "epoch": 0.4876460732540137, "grad_norm": 0.4124816060066223, "learning_rate": 1.7224635680247396e-05, "loss": 0.5393, "step": 22993 }, { "epoch": 0.48766728171194673, "grad_norm": 0.4873153269290924, "learning_rate": 1.7224405094564192e-05, "loss": 0.4989, "step": 22994 }, { "epoch": 0.48768849016987975, "grad_norm": 0.35182926058769226, "learning_rate": 1.7224174500846068e-05, "loss": 0.5314, "step": 22995 }, { "epoch": 0.48770969862781277, "grad_norm": 0.4043065905570984, "learning_rate": 1.7223943899093277e-05, "loss": 0.4333, "step": 22996 }, { "epoch": 0.4877309070857458, "grad_norm": 0.34232616424560547, "learning_rate": 1.722371328930608e-05, "loss": 0.5135, "step": 22997 }, { "epoch": 0.4877521155436788, "grad_norm": 0.3305601179599762, "learning_rate": 1.722348267148473e-05, "loss": 0.4445, "step": 22998 }, { "epoch": 0.48777332400161183, "grad_norm": 0.35400575399398804, "learning_rate": 1.7223252045629486e-05, "loss": 0.4437, "step": 22999 }, { "epoch": 0.48779453245954485, "grad_norm": 0.3502272665500641, "learning_rate": 1.7223021411740607e-05, "loss": 0.5283, "step": 23000 }, { "epoch": 0.48781574091747787, "grad_norm": 0.3441048264503479, "learning_rate": 1.7222790769818342e-05, "loss": 0.5102, "step": 23001 }, { "epoch": 0.4878369493754109, "grad_norm": 0.34994640946388245, "learning_rate": 1.7222560119862955e-05, "loss": 0.4229, "step": 23002 }, { "epoch": 0.48785815783334396, "grad_norm": 0.3961144685745239, "learning_rate": 1.7222329461874695e-05, "loss": 0.503, "step": 23003 }, { "epoch": 0.487879366291277, "grad_norm": 0.3282129168510437, "learning_rate": 1.7222098795853828e-05, "loss": 0.4121, "step": 23004 }, { "epoch": 0.48790057474921, "grad_norm": 0.3681829869747162, "learning_rate": 1.72218681218006e-05, "loss": 0.4721, "step": 23005 }, { "epoch": 0.487921783207143, "grad_norm": 0.3374844491481781, "learning_rate": 1.7221637439715277e-05, "loss": 0.5204, "step": 23006 }, { "epoch": 0.48794299166507604, "grad_norm": 0.3281392455101013, "learning_rate": 1.7221406749598114e-05, "loss": 0.4079, "step": 23007 }, { "epoch": 0.48796420012300906, "grad_norm": 0.34310466051101685, "learning_rate": 1.722117605144936e-05, "loss": 0.4459, "step": 23008 }, { "epoch": 0.4879854085809421, "grad_norm": 0.3400535583496094, "learning_rate": 1.722094534526928e-05, "loss": 0.5131, "step": 23009 }, { "epoch": 0.4880066170388751, "grad_norm": 0.35839226841926575, "learning_rate": 1.7220714631058127e-05, "loss": 0.59, "step": 23010 }, { "epoch": 0.4880278254968081, "grad_norm": 0.3858317732810974, "learning_rate": 1.7220483908816156e-05, "loss": 0.48, "step": 23011 }, { "epoch": 0.48804903395474114, "grad_norm": 0.3837655782699585, "learning_rate": 1.7220253178543625e-05, "loss": 0.539, "step": 23012 }, { "epoch": 0.48807024241267416, "grad_norm": 0.3548399806022644, "learning_rate": 1.7220022440240795e-05, "loss": 0.5144, "step": 23013 }, { "epoch": 0.4880914508706072, "grad_norm": 0.34126579761505127, "learning_rate": 1.7219791693907916e-05, "loss": 0.5155, "step": 23014 }, { "epoch": 0.4881126593285402, "grad_norm": 0.41941824555397034, "learning_rate": 1.7219560939545246e-05, "loss": 0.5447, "step": 23015 }, { "epoch": 0.4881338677864732, "grad_norm": 0.33685070276260376, "learning_rate": 1.7219330177153043e-05, "loss": 0.475, "step": 23016 }, { "epoch": 0.4881550762444063, "grad_norm": 0.31186068058013916, "learning_rate": 1.7219099406731567e-05, "loss": 0.4211, "step": 23017 }, { "epoch": 0.4881762847023393, "grad_norm": 0.29379355907440186, "learning_rate": 1.7218868628281072e-05, "loss": 0.5045, "step": 23018 }, { "epoch": 0.48819749316027233, "grad_norm": 0.34483256936073303, "learning_rate": 1.721863784180181e-05, "loss": 0.5399, "step": 23019 }, { "epoch": 0.48821870161820535, "grad_norm": 0.3533920347690582, "learning_rate": 1.7218407047294044e-05, "loss": 0.553, "step": 23020 }, { "epoch": 0.4882399100761384, "grad_norm": 0.35079866647720337, "learning_rate": 1.721817624475803e-05, "loss": 0.4001, "step": 23021 }, { "epoch": 0.4882611185340714, "grad_norm": 0.33265864849090576, "learning_rate": 1.7217945434194018e-05, "loss": 0.4791, "step": 23022 }, { "epoch": 0.4882823269920044, "grad_norm": 0.35246819257736206, "learning_rate": 1.7217714615602273e-05, "loss": 0.5161, "step": 23023 }, { "epoch": 0.48830353544993743, "grad_norm": 0.3219921588897705, "learning_rate": 1.721748378898305e-05, "loss": 0.5278, "step": 23024 }, { "epoch": 0.48832474390787045, "grad_norm": 0.2945919334888458, "learning_rate": 1.72172529543366e-05, "loss": 0.4304, "step": 23025 }, { "epoch": 0.48834595236580347, "grad_norm": 0.38562673330307007, "learning_rate": 1.721702211166319e-05, "loss": 0.5445, "step": 23026 }, { "epoch": 0.4883671608237365, "grad_norm": 0.337785005569458, "learning_rate": 1.7216791260963065e-05, "loss": 0.5187, "step": 23027 }, { "epoch": 0.4883883692816695, "grad_norm": 0.33213967084884644, "learning_rate": 1.721656040223649e-05, "loss": 0.5186, "step": 23028 }, { "epoch": 0.48840957773960253, "grad_norm": 0.38687634468078613, "learning_rate": 1.7216329535483722e-05, "loss": 0.569, "step": 23029 }, { "epoch": 0.4884307861975356, "grad_norm": 0.41290581226348877, "learning_rate": 1.721609866070501e-05, "loss": 0.5079, "step": 23030 }, { "epoch": 0.4884519946554686, "grad_norm": 0.37083110213279724, "learning_rate": 1.7215867777900617e-05, "loss": 0.4436, "step": 23031 }, { "epoch": 0.48847320311340164, "grad_norm": 0.3571746349334717, "learning_rate": 1.72156368870708e-05, "loss": 0.4981, "step": 23032 }, { "epoch": 0.48849441157133466, "grad_norm": 0.32734790444374084, "learning_rate": 1.7215405988215814e-05, "loss": 0.4495, "step": 23033 }, { "epoch": 0.4885156200292677, "grad_norm": 0.394083708524704, "learning_rate": 1.7215175081335914e-05, "loss": 0.6083, "step": 23034 }, { "epoch": 0.4885368284872007, "grad_norm": 0.3518843948841095, "learning_rate": 1.721494416643136e-05, "loss": 0.5151, "step": 23035 }, { "epoch": 0.4885580369451337, "grad_norm": 0.40948227047920227, "learning_rate": 1.721471324350241e-05, "loss": 0.5155, "step": 23036 }, { "epoch": 0.48857924540306674, "grad_norm": 0.3337245285511017, "learning_rate": 1.7214482312549316e-05, "loss": 0.5605, "step": 23037 }, { "epoch": 0.48860045386099976, "grad_norm": 0.38080769777297974, "learning_rate": 1.721425137357234e-05, "loss": 0.4486, "step": 23038 }, { "epoch": 0.4886216623189328, "grad_norm": 0.3373243808746338, "learning_rate": 1.7214020426571735e-05, "loss": 0.4671, "step": 23039 }, { "epoch": 0.4886428707768658, "grad_norm": 0.3277476131916046, "learning_rate": 1.7213789471547756e-05, "loss": 0.4722, "step": 23040 }, { "epoch": 0.4886640792347988, "grad_norm": 0.34349995851516724, "learning_rate": 1.7213558508500667e-05, "loss": 0.4789, "step": 23041 }, { "epoch": 0.48868528769273184, "grad_norm": 0.5101171135902405, "learning_rate": 1.721332753743072e-05, "loss": 0.4934, "step": 23042 }, { "epoch": 0.48870649615066486, "grad_norm": 0.3448449671268463, "learning_rate": 1.7213096558338175e-05, "loss": 0.485, "step": 23043 }, { "epoch": 0.48872770460859793, "grad_norm": 0.33689776062965393, "learning_rate": 1.7212865571223282e-05, "loss": 0.4229, "step": 23044 }, { "epoch": 0.48874891306653095, "grad_norm": 0.39701342582702637, "learning_rate": 1.7212634576086306e-05, "loss": 0.443, "step": 23045 }, { "epoch": 0.488770121524464, "grad_norm": 0.3544449210166931, "learning_rate": 1.7212403572927498e-05, "loss": 0.495, "step": 23046 }, { "epoch": 0.488791329982397, "grad_norm": 0.3620293438434601, "learning_rate": 1.721217256174712e-05, "loss": 0.5043, "step": 23047 }, { "epoch": 0.48881253844033, "grad_norm": 0.37948691844940186, "learning_rate": 1.7211941542545422e-05, "loss": 0.482, "step": 23048 }, { "epoch": 0.48883374689826303, "grad_norm": 0.36450037360191345, "learning_rate": 1.7211710515322672e-05, "loss": 0.5318, "step": 23049 }, { "epoch": 0.48885495535619605, "grad_norm": 0.3040020167827606, "learning_rate": 1.7211479480079117e-05, "loss": 0.4783, "step": 23050 }, { "epoch": 0.48887616381412907, "grad_norm": 0.8335411548614502, "learning_rate": 1.7211248436815013e-05, "loss": 0.5, "step": 23051 }, { "epoch": 0.4888973722720621, "grad_norm": 0.3778313398361206, "learning_rate": 1.721101738553063e-05, "loss": 0.4843, "step": 23052 }, { "epoch": 0.4889185807299951, "grad_norm": 0.369612455368042, "learning_rate": 1.7210786326226208e-05, "loss": 0.4886, "step": 23053 }, { "epoch": 0.48893978918792813, "grad_norm": 0.3829372823238373, "learning_rate": 1.7210555258902015e-05, "loss": 0.5386, "step": 23054 }, { "epoch": 0.48896099764586115, "grad_norm": 0.3643324375152588, "learning_rate": 1.7210324183558304e-05, "loss": 0.4603, "step": 23055 }, { "epoch": 0.48898220610379417, "grad_norm": 0.3310984969139099, "learning_rate": 1.7210093100195335e-05, "loss": 0.4761, "step": 23056 }, { "epoch": 0.4890034145617272, "grad_norm": 0.3340027928352356, "learning_rate": 1.720986200881336e-05, "loss": 0.5014, "step": 23057 }, { "epoch": 0.48902462301966027, "grad_norm": 0.3613280653953552, "learning_rate": 1.7209630909412642e-05, "loss": 0.523, "step": 23058 }, { "epoch": 0.4890458314775933, "grad_norm": 0.3807298541069031, "learning_rate": 1.7209399801993436e-05, "loss": 0.4527, "step": 23059 }, { "epoch": 0.4890670399355263, "grad_norm": 0.42643430829048157, "learning_rate": 1.7209168686555996e-05, "loss": 0.5728, "step": 23060 }, { "epoch": 0.4890882483934593, "grad_norm": 0.48707544803619385, "learning_rate": 1.7208937563100583e-05, "loss": 0.5532, "step": 23061 }, { "epoch": 0.48910945685139234, "grad_norm": 0.34583571553230286, "learning_rate": 1.720870643162745e-05, "loss": 0.574, "step": 23062 }, { "epoch": 0.48913066530932536, "grad_norm": 0.3650684356689453, "learning_rate": 1.720847529213686e-05, "loss": 0.511, "step": 23063 }, { "epoch": 0.4891518737672584, "grad_norm": 0.5341081023216248, "learning_rate": 1.7208244144629062e-05, "loss": 0.6319, "step": 23064 }, { "epoch": 0.4891730822251914, "grad_norm": 0.7663185596466064, "learning_rate": 1.7208012989104318e-05, "loss": 0.5141, "step": 23065 }, { "epoch": 0.4891942906831244, "grad_norm": 0.3513672351837158, "learning_rate": 1.7207781825562888e-05, "loss": 0.4682, "step": 23066 }, { "epoch": 0.48921549914105744, "grad_norm": 0.4359533190727234, "learning_rate": 1.7207550654005022e-05, "loss": 0.5252, "step": 23067 }, { "epoch": 0.48923670759899046, "grad_norm": 0.3799944221973419, "learning_rate": 1.7207319474430983e-05, "loss": 0.502, "step": 23068 }, { "epoch": 0.4892579160569235, "grad_norm": 0.3155728280544281, "learning_rate": 1.7207088286841023e-05, "loss": 0.3689, "step": 23069 }, { "epoch": 0.4892791245148565, "grad_norm": 0.33700957894325256, "learning_rate": 1.7206857091235408e-05, "loss": 0.4994, "step": 23070 }, { "epoch": 0.4893003329727896, "grad_norm": 0.3133523762226105, "learning_rate": 1.7206625887614384e-05, "loss": 0.4556, "step": 23071 }, { "epoch": 0.4893215414307226, "grad_norm": 0.3587244749069214, "learning_rate": 1.7206394675978217e-05, "loss": 0.5124, "step": 23072 }, { "epoch": 0.4893427498886556, "grad_norm": 0.3286944627761841, "learning_rate": 1.7206163456327156e-05, "loss": 0.5027, "step": 23073 }, { "epoch": 0.48936395834658863, "grad_norm": 0.34157487750053406, "learning_rate": 1.7205932228661465e-05, "loss": 0.424, "step": 23074 }, { "epoch": 0.48938516680452165, "grad_norm": 0.8327723145484924, "learning_rate": 1.72057009929814e-05, "loss": 0.4683, "step": 23075 }, { "epoch": 0.4894063752624547, "grad_norm": 0.35186466574668884, "learning_rate": 1.7205469749287214e-05, "loss": 0.4612, "step": 23076 }, { "epoch": 0.4894275837203877, "grad_norm": 0.33753976225852966, "learning_rate": 1.7205238497579167e-05, "loss": 0.4297, "step": 23077 }, { "epoch": 0.4894487921783207, "grad_norm": 0.3682272136211395, "learning_rate": 1.720500723785752e-05, "loss": 0.538, "step": 23078 }, { "epoch": 0.48947000063625373, "grad_norm": 0.3424340784549713, "learning_rate": 1.7204775970122525e-05, "loss": 0.5366, "step": 23079 }, { "epoch": 0.48949120909418675, "grad_norm": 0.3283904790878296, "learning_rate": 1.720454469437444e-05, "loss": 0.4277, "step": 23080 }, { "epoch": 0.48951241755211977, "grad_norm": 0.3622850179672241, "learning_rate": 1.7204313410613522e-05, "loss": 0.5045, "step": 23081 }, { "epoch": 0.4895336260100528, "grad_norm": 0.3702410161495209, "learning_rate": 1.7204082118840035e-05, "loss": 0.5489, "step": 23082 }, { "epoch": 0.4895548344679858, "grad_norm": 0.37498846650123596, "learning_rate": 1.7203850819054222e-05, "loss": 0.5102, "step": 23083 }, { "epoch": 0.48957604292591883, "grad_norm": 0.32274118065834045, "learning_rate": 1.7203619511256352e-05, "loss": 0.5477, "step": 23084 }, { "epoch": 0.4895972513838519, "grad_norm": 0.39302751421928406, "learning_rate": 1.7203388195446682e-05, "loss": 0.5251, "step": 23085 }, { "epoch": 0.4896184598417849, "grad_norm": 0.3577645719051361, "learning_rate": 1.7203156871625462e-05, "loss": 0.6032, "step": 23086 }, { "epoch": 0.48963966829971795, "grad_norm": 0.3971128463745117, "learning_rate": 1.7202925539792955e-05, "loss": 0.4917, "step": 23087 }, { "epoch": 0.48966087675765096, "grad_norm": 0.36054208874702454, "learning_rate": 1.7202694199949416e-05, "loss": 0.5522, "step": 23088 }, { "epoch": 0.489682085215584, "grad_norm": 0.39210405945777893, "learning_rate": 1.7202462852095107e-05, "loss": 0.564, "step": 23089 }, { "epoch": 0.489703293673517, "grad_norm": 0.3529618978500366, "learning_rate": 1.720223149623028e-05, "loss": 0.3593, "step": 23090 }, { "epoch": 0.48972450213145, "grad_norm": 0.338015079498291, "learning_rate": 1.720200013235519e-05, "loss": 0.4966, "step": 23091 }, { "epoch": 0.48974571058938304, "grad_norm": 0.36779704689979553, "learning_rate": 1.72017687604701e-05, "loss": 0.5128, "step": 23092 }, { "epoch": 0.48976691904731606, "grad_norm": 0.41937199234962463, "learning_rate": 1.7201537380575267e-05, "loss": 0.503, "step": 23093 }, { "epoch": 0.4897881275052491, "grad_norm": 0.5845620632171631, "learning_rate": 1.7201305992670946e-05, "loss": 0.5416, "step": 23094 }, { "epoch": 0.4898093359631821, "grad_norm": 0.4639122188091278, "learning_rate": 1.720107459675739e-05, "loss": 0.5206, "step": 23095 }, { "epoch": 0.4898305444211151, "grad_norm": 0.4149288833141327, "learning_rate": 1.7200843192834868e-05, "loss": 0.5193, "step": 23096 }, { "epoch": 0.48985175287904814, "grad_norm": 0.5412771105766296, "learning_rate": 1.7200611780903628e-05, "loss": 0.5112, "step": 23097 }, { "epoch": 0.48987296133698116, "grad_norm": 0.34436094760894775, "learning_rate": 1.720038036096393e-05, "loss": 0.5133, "step": 23098 }, { "epoch": 0.48989416979491424, "grad_norm": 0.3682270646095276, "learning_rate": 1.7200148933016034e-05, "loss": 0.4996, "step": 23099 }, { "epoch": 0.48991537825284726, "grad_norm": 0.35509011149406433, "learning_rate": 1.719991749706019e-05, "loss": 0.5089, "step": 23100 }, { "epoch": 0.4899365867107803, "grad_norm": 0.34439346194267273, "learning_rate": 1.7199686053096666e-05, "loss": 0.4053, "step": 23101 }, { "epoch": 0.4899577951687133, "grad_norm": 0.372178316116333, "learning_rate": 1.719945460112571e-05, "loss": 0.5415, "step": 23102 }, { "epoch": 0.4899790036266463, "grad_norm": 0.45734450221061707, "learning_rate": 1.7199223141147584e-05, "loss": 0.6361, "step": 23103 }, { "epoch": 0.49000021208457933, "grad_norm": 0.3326556980609894, "learning_rate": 1.7198991673162546e-05, "loss": 0.484, "step": 23104 }, { "epoch": 0.49002142054251235, "grad_norm": 0.32150986790657043, "learning_rate": 1.7198760197170848e-05, "loss": 0.4657, "step": 23105 }, { "epoch": 0.4900426290004454, "grad_norm": 0.3388059139251709, "learning_rate": 1.7198528713172757e-05, "loss": 0.4791, "step": 23106 }, { "epoch": 0.4900638374583784, "grad_norm": 0.3506779968738556, "learning_rate": 1.719829722116852e-05, "loss": 0.4609, "step": 23107 }, { "epoch": 0.4900850459163114, "grad_norm": 0.3628285527229309, "learning_rate": 1.7198065721158405e-05, "loss": 0.5111, "step": 23108 }, { "epoch": 0.49010625437424443, "grad_norm": 0.3224738836288452, "learning_rate": 1.7197834213142658e-05, "loss": 0.4743, "step": 23109 }, { "epoch": 0.49012746283217745, "grad_norm": 0.3687083125114441, "learning_rate": 1.7197602697121544e-05, "loss": 0.553, "step": 23110 }, { "epoch": 0.49014867129011047, "grad_norm": 0.43698498606681824, "learning_rate": 1.719737117309532e-05, "loss": 0.5642, "step": 23111 }, { "epoch": 0.49016987974804355, "grad_norm": 0.36931076645851135, "learning_rate": 1.7197139641064243e-05, "loss": 0.4787, "step": 23112 }, { "epoch": 0.49019108820597657, "grad_norm": 0.3790413439273834, "learning_rate": 1.7196908101028567e-05, "loss": 0.5016, "step": 23113 }, { "epoch": 0.4902122966639096, "grad_norm": 0.3778464198112488, "learning_rate": 1.7196676552988555e-05, "loss": 0.5445, "step": 23114 }, { "epoch": 0.4902335051218426, "grad_norm": 0.44720903038978577, "learning_rate": 1.719644499694446e-05, "loss": 0.476, "step": 23115 }, { "epoch": 0.4902547135797756, "grad_norm": 0.3273213505744934, "learning_rate": 1.7196213432896542e-05, "loss": 0.5168, "step": 23116 }, { "epoch": 0.49027592203770864, "grad_norm": 0.36826738715171814, "learning_rate": 1.719598186084506e-05, "loss": 0.5213, "step": 23117 }, { "epoch": 0.49029713049564166, "grad_norm": 0.4347675144672394, "learning_rate": 1.7195750280790265e-05, "loss": 0.471, "step": 23118 }, { "epoch": 0.4903183389535747, "grad_norm": 0.3568721115589142, "learning_rate": 1.7195518692732422e-05, "loss": 0.5136, "step": 23119 }, { "epoch": 0.4903395474115077, "grad_norm": 0.3770647346973419, "learning_rate": 1.7195287096671785e-05, "loss": 0.5217, "step": 23120 }, { "epoch": 0.4903607558694407, "grad_norm": 0.346470445394516, "learning_rate": 1.7195055492608613e-05, "loss": 0.6051, "step": 23121 }, { "epoch": 0.49038196432737374, "grad_norm": 0.3777124881744385, "learning_rate": 1.7194823880543163e-05, "loss": 0.528, "step": 23122 }, { "epoch": 0.49040317278530676, "grad_norm": 0.3719475567340851, "learning_rate": 1.719459226047569e-05, "loss": 0.5254, "step": 23123 }, { "epoch": 0.4904243812432398, "grad_norm": 0.31581443548202515, "learning_rate": 1.7194360632406457e-05, "loss": 0.5243, "step": 23124 }, { "epoch": 0.4904455897011728, "grad_norm": 0.3701673150062561, "learning_rate": 1.7194128996335716e-05, "loss": 0.5492, "step": 23125 }, { "epoch": 0.4904667981591059, "grad_norm": 0.33972859382629395, "learning_rate": 1.719389735226373e-05, "loss": 0.5026, "step": 23126 }, { "epoch": 0.4904880066170389, "grad_norm": 0.4298374056816101, "learning_rate": 1.7193665700190752e-05, "loss": 0.5242, "step": 23127 }, { "epoch": 0.4905092150749719, "grad_norm": 0.3073230981826782, "learning_rate": 1.7193434040117042e-05, "loss": 0.4527, "step": 23128 }, { "epoch": 0.49053042353290494, "grad_norm": 0.35125046968460083, "learning_rate": 1.7193202372042853e-05, "loss": 0.5358, "step": 23129 }, { "epoch": 0.49055163199083796, "grad_norm": 0.35345858335494995, "learning_rate": 1.7192970695968454e-05, "loss": 0.5691, "step": 23130 }, { "epoch": 0.490572840448771, "grad_norm": 0.3730834424495697, "learning_rate": 1.719273901189409e-05, "loss": 0.5566, "step": 23131 }, { "epoch": 0.490594048906704, "grad_norm": 0.30984804034233093, "learning_rate": 1.7192507319820028e-05, "loss": 0.4786, "step": 23132 }, { "epoch": 0.490615257364637, "grad_norm": 0.3800467848777771, "learning_rate": 1.719227561974652e-05, "loss": 0.5246, "step": 23133 }, { "epoch": 0.49063646582257003, "grad_norm": 0.3165624141693115, "learning_rate": 1.7192043911673824e-05, "loss": 0.3878, "step": 23134 }, { "epoch": 0.49065767428050305, "grad_norm": 0.4371348023414612, "learning_rate": 1.71918121956022e-05, "loss": 0.4636, "step": 23135 }, { "epoch": 0.4906788827384361, "grad_norm": 0.44600316882133484, "learning_rate": 1.7191580471531907e-05, "loss": 0.5418, "step": 23136 }, { "epoch": 0.4907000911963691, "grad_norm": 0.35376185178756714, "learning_rate": 1.71913487394632e-05, "loss": 0.5355, "step": 23137 }, { "epoch": 0.4907212996543021, "grad_norm": 0.343641072511673, "learning_rate": 1.719111699939634e-05, "loss": 0.4864, "step": 23138 }, { "epoch": 0.49074250811223513, "grad_norm": 0.3400079905986786, "learning_rate": 1.7190885251331576e-05, "loss": 0.5523, "step": 23139 }, { "epoch": 0.4907637165701682, "grad_norm": 0.329113632440567, "learning_rate": 1.7190653495269176e-05, "loss": 0.5902, "step": 23140 }, { "epoch": 0.4907849250281012, "grad_norm": 0.32628685235977173, "learning_rate": 1.7190421731209392e-05, "loss": 0.4008, "step": 23141 }, { "epoch": 0.49080613348603425, "grad_norm": 0.32886430621147156, "learning_rate": 1.7190189959152483e-05, "loss": 0.4993, "step": 23142 }, { "epoch": 0.49082734194396727, "grad_norm": 0.37464508414268494, "learning_rate": 1.718995817909871e-05, "loss": 0.5253, "step": 23143 }, { "epoch": 0.4908485504019003, "grad_norm": 0.361092209815979, "learning_rate": 1.7189726391048325e-05, "loss": 0.5595, "step": 23144 }, { "epoch": 0.4908697588598333, "grad_norm": 0.34421417117118835, "learning_rate": 1.7189494595001593e-05, "loss": 0.5567, "step": 23145 }, { "epoch": 0.4908909673177663, "grad_norm": 0.3676380515098572, "learning_rate": 1.7189262790958764e-05, "loss": 0.5284, "step": 23146 }, { "epoch": 0.49091217577569934, "grad_norm": 0.364349365234375, "learning_rate": 1.7189030978920097e-05, "loss": 0.5284, "step": 23147 }, { "epoch": 0.49093338423363236, "grad_norm": 0.3830684721469879, "learning_rate": 1.718879915888586e-05, "loss": 0.5166, "step": 23148 }, { "epoch": 0.4909545926915654, "grad_norm": 0.3561485707759857, "learning_rate": 1.7188567330856293e-05, "loss": 0.452, "step": 23149 }, { "epoch": 0.4909758011494984, "grad_norm": 0.3561851978302002, "learning_rate": 1.7188335494831672e-05, "loss": 0.5774, "step": 23150 }, { "epoch": 0.4909970096074314, "grad_norm": 0.3932890295982361, "learning_rate": 1.7188103650812243e-05, "loss": 0.4726, "step": 23151 }, { "epoch": 0.49101821806536444, "grad_norm": 0.3472622036933899, "learning_rate": 1.718787179879827e-05, "loss": 0.4899, "step": 23152 }, { "epoch": 0.4910394265232975, "grad_norm": 0.31625601649284363, "learning_rate": 1.7187639938790005e-05, "loss": 0.4667, "step": 23153 }, { "epoch": 0.49106063498123054, "grad_norm": 0.35553163290023804, "learning_rate": 1.718740807078771e-05, "loss": 0.5121, "step": 23154 }, { "epoch": 0.49108184343916356, "grad_norm": 0.36528074741363525, "learning_rate": 1.7187176194791643e-05, "loss": 0.5126, "step": 23155 }, { "epoch": 0.4911030518970966, "grad_norm": 0.39750880002975464, "learning_rate": 1.7186944310802064e-05, "loss": 0.539, "step": 23156 }, { "epoch": 0.4911242603550296, "grad_norm": 4.0019612312316895, "learning_rate": 1.7186712418819224e-05, "loss": 0.5874, "step": 23157 }, { "epoch": 0.4911454688129626, "grad_norm": 0.3250342011451721, "learning_rate": 1.718648051884339e-05, "loss": 0.4372, "step": 23158 }, { "epoch": 0.49116667727089564, "grad_norm": 0.4339381456375122, "learning_rate": 1.7186248610874808e-05, "loss": 0.4836, "step": 23159 }, { "epoch": 0.49118788572882865, "grad_norm": 0.39319244027137756, "learning_rate": 1.7186016694913746e-05, "loss": 0.537, "step": 23160 }, { "epoch": 0.4912090941867617, "grad_norm": 0.30755218863487244, "learning_rate": 1.7185784770960458e-05, "loss": 0.4116, "step": 23161 }, { "epoch": 0.4912303026446947, "grad_norm": 0.33862751722335815, "learning_rate": 1.71855528390152e-05, "loss": 0.4625, "step": 23162 }, { "epoch": 0.4912515111026277, "grad_norm": 0.4398342967033386, "learning_rate": 1.7185320899078237e-05, "loss": 0.5233, "step": 23163 }, { "epoch": 0.49127271956056073, "grad_norm": 0.33150628209114075, "learning_rate": 1.718508895114982e-05, "loss": 0.4939, "step": 23164 }, { "epoch": 0.49129392801849375, "grad_norm": 0.34005841612815857, "learning_rate": 1.7184856995230213e-05, "loss": 0.5494, "step": 23165 }, { "epoch": 0.4913151364764268, "grad_norm": 0.3676151633262634, "learning_rate": 1.7184625031319666e-05, "loss": 0.45, "step": 23166 }, { "epoch": 0.49133634493435985, "grad_norm": 0.3373648226261139, "learning_rate": 1.718439305941844e-05, "loss": 0.4737, "step": 23167 }, { "epoch": 0.49135755339229287, "grad_norm": 0.33078768849372864, "learning_rate": 1.7184161079526803e-05, "loss": 0.5157, "step": 23168 }, { "epoch": 0.4913787618502259, "grad_norm": 0.4035869538784027, "learning_rate": 1.7183929091644997e-05, "loss": 0.5366, "step": 23169 }, { "epoch": 0.4913999703081589, "grad_norm": 0.3609924912452698, "learning_rate": 1.718369709577329e-05, "loss": 0.5046, "step": 23170 }, { "epoch": 0.4914211787660919, "grad_norm": 0.32601800560951233, "learning_rate": 1.7183465091911938e-05, "loss": 0.4188, "step": 23171 }, { "epoch": 0.49144238722402495, "grad_norm": 0.3433328866958618, "learning_rate": 1.7183233080061197e-05, "loss": 0.4295, "step": 23172 }, { "epoch": 0.49146359568195797, "grad_norm": 0.3066960275173187, "learning_rate": 1.7183001060221326e-05, "loss": 0.4553, "step": 23173 }, { "epoch": 0.491484804139891, "grad_norm": 0.33809196949005127, "learning_rate": 1.7182769032392586e-05, "loss": 0.454, "step": 23174 }, { "epoch": 0.491506012597824, "grad_norm": 0.30883529782295227, "learning_rate": 1.7182536996575234e-05, "loss": 0.4191, "step": 23175 }, { "epoch": 0.491527221055757, "grad_norm": 0.4031517505645752, "learning_rate": 1.718230495276952e-05, "loss": 0.5106, "step": 23176 }, { "epoch": 0.49154842951369004, "grad_norm": 1.2967530488967896, "learning_rate": 1.7182072900975715e-05, "loss": 0.4386, "step": 23177 }, { "epoch": 0.49156963797162306, "grad_norm": 0.3823862075805664, "learning_rate": 1.718184084119407e-05, "loss": 0.4628, "step": 23178 }, { "epoch": 0.4915908464295561, "grad_norm": 0.40569138526916504, "learning_rate": 1.7181608773424842e-05, "loss": 0.5597, "step": 23179 }, { "epoch": 0.49161205488748916, "grad_norm": 0.31215858459472656, "learning_rate": 1.718137669766829e-05, "loss": 0.4923, "step": 23180 }, { "epoch": 0.4916332633454222, "grad_norm": 0.3499312400817871, "learning_rate": 1.718114461392468e-05, "loss": 0.4705, "step": 23181 }, { "epoch": 0.4916544718033552, "grad_norm": 0.36497586965560913, "learning_rate": 1.7180912522194256e-05, "loss": 0.5604, "step": 23182 }, { "epoch": 0.4916756802612882, "grad_norm": 0.45075514912605286, "learning_rate": 1.7180680422477288e-05, "loss": 0.5342, "step": 23183 }, { "epoch": 0.49169688871922124, "grad_norm": 0.35054150223731995, "learning_rate": 1.718044831477403e-05, "loss": 0.532, "step": 23184 }, { "epoch": 0.49171809717715426, "grad_norm": 0.3782682418823242, "learning_rate": 1.7180216199084732e-05, "loss": 0.5802, "step": 23185 }, { "epoch": 0.4917393056350873, "grad_norm": 0.4515346586704254, "learning_rate": 1.717998407540967e-05, "loss": 0.5693, "step": 23186 }, { "epoch": 0.4917605140930203, "grad_norm": 0.39751264452934265, "learning_rate": 1.7179751943749087e-05, "loss": 0.5698, "step": 23187 }, { "epoch": 0.4917817225509533, "grad_norm": 0.5486707091331482, "learning_rate": 1.7179519804103246e-05, "loss": 0.4275, "step": 23188 }, { "epoch": 0.49180293100888633, "grad_norm": 0.4503745436668396, "learning_rate": 1.7179287656472406e-05, "loss": 0.5604, "step": 23189 }, { "epoch": 0.49182413946681935, "grad_norm": 0.4735540747642517, "learning_rate": 1.7179055500856826e-05, "loss": 0.4309, "step": 23190 }, { "epoch": 0.4918453479247524, "grad_norm": 0.3829251229763031, "learning_rate": 1.717882333725676e-05, "loss": 0.5355, "step": 23191 }, { "epoch": 0.4918665563826854, "grad_norm": 0.3526268005371094, "learning_rate": 1.7178591165672472e-05, "loss": 0.4587, "step": 23192 }, { "epoch": 0.4918877648406184, "grad_norm": 0.3324291408061981, "learning_rate": 1.7178358986104217e-05, "loss": 0.5098, "step": 23193 }, { "epoch": 0.4919089732985515, "grad_norm": 0.3554185628890991, "learning_rate": 1.7178126798552253e-05, "loss": 0.5402, "step": 23194 }, { "epoch": 0.4919301817564845, "grad_norm": 0.35402432084083557, "learning_rate": 1.7177894603016837e-05, "loss": 0.546, "step": 23195 }, { "epoch": 0.4919513902144175, "grad_norm": 0.3117591440677643, "learning_rate": 1.717766239949823e-05, "loss": 0.4981, "step": 23196 }, { "epoch": 0.49197259867235055, "grad_norm": 0.32362088561058044, "learning_rate": 1.717743018799669e-05, "loss": 0.5042, "step": 23197 }, { "epoch": 0.49199380713028357, "grad_norm": 0.34157371520996094, "learning_rate": 1.7177197968512475e-05, "loss": 0.5264, "step": 23198 }, { "epoch": 0.4920150155882166, "grad_norm": 0.35130739212036133, "learning_rate": 1.717696574104584e-05, "loss": 0.451, "step": 23199 }, { "epoch": 0.4920362240461496, "grad_norm": 0.3426859676837921, "learning_rate": 1.717673350559705e-05, "loss": 0.4715, "step": 23200 }, { "epoch": 0.4920574325040826, "grad_norm": 0.2967572510242462, "learning_rate": 1.7176501262166357e-05, "loss": 0.4948, "step": 23201 }, { "epoch": 0.49207864096201565, "grad_norm": 0.34032756090164185, "learning_rate": 1.717626901075402e-05, "loss": 0.5735, "step": 23202 }, { "epoch": 0.49209984941994867, "grad_norm": 0.3340514898300171, "learning_rate": 1.7176036751360303e-05, "loss": 0.505, "step": 23203 }, { "epoch": 0.4921210578778817, "grad_norm": 0.4421142339706421, "learning_rate": 1.7175804483985458e-05, "loss": 0.4285, "step": 23204 }, { "epoch": 0.4921422663358147, "grad_norm": 0.32334935665130615, "learning_rate": 1.7175572208629746e-05, "loss": 0.515, "step": 23205 }, { "epoch": 0.4921634747937477, "grad_norm": 0.3806854486465454, "learning_rate": 1.7175339925293423e-05, "loss": 0.5585, "step": 23206 }, { "epoch": 0.49218468325168074, "grad_norm": 0.3507549464702606, "learning_rate": 1.7175107633976755e-05, "loss": 0.4795, "step": 23207 }, { "epoch": 0.4922058917096138, "grad_norm": 0.3260243833065033, "learning_rate": 1.7174875334679986e-05, "loss": 0.5199, "step": 23208 }, { "epoch": 0.49222710016754684, "grad_norm": 0.33061641454696655, "learning_rate": 1.7174643027403387e-05, "loss": 0.4032, "step": 23209 }, { "epoch": 0.49224830862547986, "grad_norm": 0.3451598286628723, "learning_rate": 1.7174410712147213e-05, "loss": 0.4407, "step": 23210 }, { "epoch": 0.4922695170834129, "grad_norm": 0.3484772741794586, "learning_rate": 1.7174178388911724e-05, "loss": 0.5272, "step": 23211 }, { "epoch": 0.4922907255413459, "grad_norm": 0.3239080607891083, "learning_rate": 1.717394605769717e-05, "loss": 0.4694, "step": 23212 }, { "epoch": 0.4923119339992789, "grad_norm": 0.3300393223762512, "learning_rate": 1.7173713718503822e-05, "loss": 0.5036, "step": 23213 }, { "epoch": 0.49233314245721194, "grad_norm": 0.35074546933174133, "learning_rate": 1.7173481371331928e-05, "loss": 0.4653, "step": 23214 }, { "epoch": 0.49235435091514496, "grad_norm": 0.5022128224372864, "learning_rate": 1.7173249016181752e-05, "loss": 0.4149, "step": 23215 }, { "epoch": 0.492375559373078, "grad_norm": 0.34110262989997864, "learning_rate": 1.7173016653053553e-05, "loss": 0.4788, "step": 23216 }, { "epoch": 0.492396767831011, "grad_norm": 0.30344632267951965, "learning_rate": 1.7172784281947584e-05, "loss": 0.4652, "step": 23217 }, { "epoch": 0.492417976288944, "grad_norm": 0.3238033354282379, "learning_rate": 1.7172551902864106e-05, "loss": 0.4618, "step": 23218 }, { "epoch": 0.49243918474687703, "grad_norm": 0.4487276077270508, "learning_rate": 1.7172319515803377e-05, "loss": 0.4353, "step": 23219 }, { "epoch": 0.49246039320481005, "grad_norm": 0.39819714426994324, "learning_rate": 1.717208712076566e-05, "loss": 0.5213, "step": 23220 }, { "epoch": 0.49248160166274313, "grad_norm": 0.4333038032054901, "learning_rate": 1.7171854717751208e-05, "loss": 0.4921, "step": 23221 }, { "epoch": 0.49250281012067615, "grad_norm": 0.33838143944740295, "learning_rate": 1.7171622306760284e-05, "loss": 0.4459, "step": 23222 }, { "epoch": 0.49252401857860917, "grad_norm": 0.42003071308135986, "learning_rate": 1.717138988779314e-05, "loss": 0.4239, "step": 23223 }, { "epoch": 0.4925452270365422, "grad_norm": 0.32144421339035034, "learning_rate": 1.7171157460850042e-05, "loss": 0.5185, "step": 23224 }, { "epoch": 0.4925664354944752, "grad_norm": 0.3451744318008423, "learning_rate": 1.7170925025931243e-05, "loss": 0.4718, "step": 23225 }, { "epoch": 0.4925876439524082, "grad_norm": 0.41808438301086426, "learning_rate": 1.7170692583037002e-05, "loss": 0.4803, "step": 23226 }, { "epoch": 0.49260885241034125, "grad_norm": 0.4294314980506897, "learning_rate": 1.7170460132167584e-05, "loss": 0.4869, "step": 23227 }, { "epoch": 0.49263006086827427, "grad_norm": 0.6700830459594727, "learning_rate": 1.7170227673323238e-05, "loss": 0.5431, "step": 23228 }, { "epoch": 0.4926512693262073, "grad_norm": 0.40936797857284546, "learning_rate": 1.716999520650423e-05, "loss": 0.4321, "step": 23229 }, { "epoch": 0.4926724777841403, "grad_norm": 0.4615150988101959, "learning_rate": 1.7169762731710813e-05, "loss": 0.5149, "step": 23230 }, { "epoch": 0.4926936862420733, "grad_norm": 0.34991455078125, "learning_rate": 1.716953024894325e-05, "loss": 0.5116, "step": 23231 }, { "epoch": 0.49271489470000635, "grad_norm": 0.3835010230541229, "learning_rate": 1.7169297758201797e-05, "loss": 0.4561, "step": 23232 }, { "epoch": 0.49273610315793936, "grad_norm": 0.3339163362979889, "learning_rate": 1.7169065259486713e-05, "loss": 0.4982, "step": 23233 }, { "epoch": 0.4927573116158724, "grad_norm": 0.3822670876979828, "learning_rate": 1.716883275279826e-05, "loss": 0.5194, "step": 23234 }, { "epoch": 0.49277852007380546, "grad_norm": 0.3487812280654907, "learning_rate": 1.716860023813669e-05, "loss": 0.5198, "step": 23235 }, { "epoch": 0.4927997285317385, "grad_norm": 0.3369208872318268, "learning_rate": 1.7168367715502262e-05, "loss": 0.4937, "step": 23236 }, { "epoch": 0.4928209369896715, "grad_norm": 0.31966981291770935, "learning_rate": 1.7168135184895246e-05, "loss": 0.5107, "step": 23237 }, { "epoch": 0.4928421454476045, "grad_norm": 0.3624390959739685, "learning_rate": 1.7167902646315887e-05, "loss": 0.5339, "step": 23238 }, { "epoch": 0.49286335390553754, "grad_norm": 0.3708723783493042, "learning_rate": 1.7167670099764448e-05, "loss": 0.5273, "step": 23239 }, { "epoch": 0.49288456236347056, "grad_norm": 0.3524864912033081, "learning_rate": 1.716743754524119e-05, "loss": 0.5205, "step": 23240 }, { "epoch": 0.4929057708214036, "grad_norm": 0.35038334131240845, "learning_rate": 1.7167204982746373e-05, "loss": 0.522, "step": 23241 }, { "epoch": 0.4929269792793366, "grad_norm": 0.3847484588623047, "learning_rate": 1.716697241228025e-05, "loss": 0.5259, "step": 23242 }, { "epoch": 0.4929481877372696, "grad_norm": 0.39425036311149597, "learning_rate": 1.7166739833843085e-05, "loss": 0.4797, "step": 23243 }, { "epoch": 0.49296939619520264, "grad_norm": 0.3943764865398407, "learning_rate": 1.716650724743513e-05, "loss": 0.5381, "step": 23244 }, { "epoch": 0.49299060465313566, "grad_norm": 0.32644909620285034, "learning_rate": 1.716627465305665e-05, "loss": 0.471, "step": 23245 }, { "epoch": 0.4930118131110687, "grad_norm": 0.4375295042991638, "learning_rate": 1.71660420507079e-05, "loss": 0.449, "step": 23246 }, { "epoch": 0.4930330215690017, "grad_norm": 0.6689842939376831, "learning_rate": 1.7165809440389145e-05, "loss": 0.4575, "step": 23247 }, { "epoch": 0.4930542300269347, "grad_norm": 0.32395389676094055, "learning_rate": 1.7165576822100636e-05, "loss": 0.4989, "step": 23248 }, { "epoch": 0.4930754384848678, "grad_norm": 0.33739355206489563, "learning_rate": 1.7165344195842634e-05, "loss": 0.4736, "step": 23249 }, { "epoch": 0.4930966469428008, "grad_norm": 0.33908694982528687, "learning_rate": 1.71651115616154e-05, "loss": 0.5561, "step": 23250 }, { "epoch": 0.49311785540073383, "grad_norm": 0.3461097180843353, "learning_rate": 1.716487891941919e-05, "loss": 0.4885, "step": 23251 }, { "epoch": 0.49313906385866685, "grad_norm": 0.33819466829299927, "learning_rate": 1.7164646269254265e-05, "loss": 0.5045, "step": 23252 }, { "epoch": 0.49316027231659987, "grad_norm": 0.3619192838668823, "learning_rate": 1.716441361112088e-05, "loss": 0.4844, "step": 23253 }, { "epoch": 0.4931814807745329, "grad_norm": 0.3316228985786438, "learning_rate": 1.71641809450193e-05, "loss": 0.472, "step": 23254 }, { "epoch": 0.4932026892324659, "grad_norm": 0.327604204416275, "learning_rate": 1.7163948270949778e-05, "loss": 0.4922, "step": 23255 }, { "epoch": 0.4932238976903989, "grad_norm": 0.3233019709587097, "learning_rate": 1.7163715588912576e-05, "loss": 0.4303, "step": 23256 }, { "epoch": 0.49324510614833195, "grad_norm": 0.3467409610748291, "learning_rate": 1.7163482898907952e-05, "loss": 0.4464, "step": 23257 }, { "epoch": 0.49326631460626497, "grad_norm": 0.368319034576416, "learning_rate": 1.7163250200936163e-05, "loss": 0.6029, "step": 23258 }, { "epoch": 0.493287523064198, "grad_norm": 0.354923278093338, "learning_rate": 1.716301749499747e-05, "loss": 0.5386, "step": 23259 }, { "epoch": 0.493308731522131, "grad_norm": 0.35145092010498047, "learning_rate": 1.716278478109213e-05, "loss": 0.5651, "step": 23260 }, { "epoch": 0.493329939980064, "grad_norm": 0.327351450920105, "learning_rate": 1.7162552059220406e-05, "loss": 0.4892, "step": 23261 }, { "epoch": 0.4933511484379971, "grad_norm": 0.3257553279399872, "learning_rate": 1.7162319329382553e-05, "loss": 0.4922, "step": 23262 }, { "epoch": 0.4933723568959301, "grad_norm": 0.544582724571228, "learning_rate": 1.716208659157883e-05, "loss": 0.5813, "step": 23263 }, { "epoch": 0.49339356535386314, "grad_norm": 0.3844055235385895, "learning_rate": 1.7161853845809497e-05, "loss": 0.4857, "step": 23264 }, { "epoch": 0.49341477381179616, "grad_norm": 0.33478277921676636, "learning_rate": 1.716162109207481e-05, "loss": 0.5243, "step": 23265 }, { "epoch": 0.4934359822697292, "grad_norm": 0.32184407114982605, "learning_rate": 1.7161388330375032e-05, "loss": 0.4463, "step": 23266 }, { "epoch": 0.4934571907276622, "grad_norm": 0.35180723667144775, "learning_rate": 1.716115556071042e-05, "loss": 0.5455, "step": 23267 }, { "epoch": 0.4934783991855952, "grad_norm": 0.37763088941574097, "learning_rate": 1.7160922783081235e-05, "loss": 0.5465, "step": 23268 }, { "epoch": 0.49349960764352824, "grad_norm": 0.34028303623199463, "learning_rate": 1.716068999748773e-05, "loss": 0.4708, "step": 23269 }, { "epoch": 0.49352081610146126, "grad_norm": 0.36983099579811096, "learning_rate": 1.716045720393017e-05, "loss": 0.5148, "step": 23270 }, { "epoch": 0.4935420245593943, "grad_norm": 0.33353734016418457, "learning_rate": 1.716022440240881e-05, "loss": 0.4936, "step": 23271 }, { "epoch": 0.4935632330173273, "grad_norm": 0.349359393119812, "learning_rate": 1.7159991592923914e-05, "loss": 0.4944, "step": 23272 }, { "epoch": 0.4935844414752603, "grad_norm": 0.3276771008968353, "learning_rate": 1.7159758775475736e-05, "loss": 0.4909, "step": 23273 }, { "epoch": 0.49360564993319334, "grad_norm": 0.3749978244304657, "learning_rate": 1.7159525950064536e-05, "loss": 0.6228, "step": 23274 }, { "epoch": 0.49362685839112636, "grad_norm": 0.34272927045822144, "learning_rate": 1.7159293116690573e-05, "loss": 0.5392, "step": 23275 }, { "epoch": 0.49364806684905943, "grad_norm": 0.3884284198284149, "learning_rate": 1.7159060275354108e-05, "loss": 0.5442, "step": 23276 }, { "epoch": 0.49366927530699245, "grad_norm": 0.36206111311912537, "learning_rate": 1.7158827426055397e-05, "loss": 0.4581, "step": 23277 }, { "epoch": 0.49369048376492547, "grad_norm": 0.3338980972766876, "learning_rate": 1.7158594568794698e-05, "loss": 0.4928, "step": 23278 }, { "epoch": 0.4937116922228585, "grad_norm": 0.34760335087776184, "learning_rate": 1.7158361703572276e-05, "loss": 0.5018, "step": 23279 }, { "epoch": 0.4937329006807915, "grad_norm": 0.34824198484420776, "learning_rate": 1.7158128830388385e-05, "loss": 0.5112, "step": 23280 }, { "epoch": 0.49375410913872453, "grad_norm": 0.3506050705909729, "learning_rate": 1.7157895949243285e-05, "loss": 0.4551, "step": 23281 }, { "epoch": 0.49377531759665755, "grad_norm": 0.36324965953826904, "learning_rate": 1.7157663060137237e-05, "loss": 0.6181, "step": 23282 }, { "epoch": 0.49379652605459057, "grad_norm": 0.37800896167755127, "learning_rate": 1.71574301630705e-05, "loss": 0.5474, "step": 23283 }, { "epoch": 0.4938177345125236, "grad_norm": 0.3373326361179352, "learning_rate": 1.7157197258043326e-05, "loss": 0.5072, "step": 23284 }, { "epoch": 0.4938389429704566, "grad_norm": 0.3371220827102661, "learning_rate": 1.715696434505598e-05, "loss": 0.5161, "step": 23285 }, { "epoch": 0.4938601514283896, "grad_norm": 0.3821229636669159, "learning_rate": 1.7156731424108722e-05, "loss": 0.5737, "step": 23286 }, { "epoch": 0.49388135988632265, "grad_norm": 0.37043601274490356, "learning_rate": 1.7156498495201813e-05, "loss": 0.5343, "step": 23287 }, { "epoch": 0.49390256834425567, "grad_norm": 0.5581555962562561, "learning_rate": 1.7156265558335504e-05, "loss": 0.5484, "step": 23288 }, { "epoch": 0.4939237768021887, "grad_norm": 0.40972110629081726, "learning_rate": 1.715603261351006e-05, "loss": 0.4757, "step": 23289 }, { "epoch": 0.49394498526012176, "grad_norm": 0.37068912386894226, "learning_rate": 1.715579966072574e-05, "loss": 0.5221, "step": 23290 }, { "epoch": 0.4939661937180548, "grad_norm": 0.3326183557510376, "learning_rate": 1.71555666999828e-05, "loss": 0.4812, "step": 23291 }, { "epoch": 0.4939874021759878, "grad_norm": 0.33449041843414307, "learning_rate": 1.71553337312815e-05, "loss": 0.4773, "step": 23292 }, { "epoch": 0.4940086106339208, "grad_norm": 0.3699556589126587, "learning_rate": 1.71551007546221e-05, "loss": 0.5291, "step": 23293 }, { "epoch": 0.49402981909185384, "grad_norm": 0.35094207525253296, "learning_rate": 1.7154867770004862e-05, "loss": 0.5269, "step": 23294 }, { "epoch": 0.49405102754978686, "grad_norm": 0.3571987748146057, "learning_rate": 1.715463477743004e-05, "loss": 0.389, "step": 23295 }, { "epoch": 0.4940722360077199, "grad_norm": 0.34255489706993103, "learning_rate": 1.7154401776897897e-05, "loss": 0.5587, "step": 23296 }, { "epoch": 0.4940934444656529, "grad_norm": 0.3268367052078247, "learning_rate": 1.7154168768408688e-05, "loss": 0.4909, "step": 23297 }, { "epoch": 0.4941146529235859, "grad_norm": 0.5224561095237732, "learning_rate": 1.715393575196268e-05, "loss": 0.4804, "step": 23298 }, { "epoch": 0.49413586138151894, "grad_norm": 0.3716454803943634, "learning_rate": 1.715370272756012e-05, "loss": 0.5762, "step": 23299 }, { "epoch": 0.49415706983945196, "grad_norm": 0.3465481996536255, "learning_rate": 1.7153469695201278e-05, "loss": 0.5384, "step": 23300 }, { "epoch": 0.494178278297385, "grad_norm": 0.341149240732193, "learning_rate": 1.7153236654886405e-05, "loss": 0.53, "step": 23301 }, { "epoch": 0.494199486755318, "grad_norm": 0.3628823459148407, "learning_rate": 1.715300360661577e-05, "loss": 0.542, "step": 23302 }, { "epoch": 0.49422069521325107, "grad_norm": 0.41102129220962524, "learning_rate": 1.7152770550389623e-05, "loss": 0.6224, "step": 23303 }, { "epoch": 0.4942419036711841, "grad_norm": 0.3352089822292328, "learning_rate": 1.7152537486208228e-05, "loss": 0.5171, "step": 23304 }, { "epoch": 0.4942631121291171, "grad_norm": 0.3517802059650421, "learning_rate": 1.7152304414071842e-05, "loss": 0.5441, "step": 23305 }, { "epoch": 0.49428432058705013, "grad_norm": 0.34105315804481506, "learning_rate": 1.7152071333980726e-05, "loss": 0.4708, "step": 23306 }, { "epoch": 0.49430552904498315, "grad_norm": 0.5081616640090942, "learning_rate": 1.7151838245935137e-05, "loss": 0.567, "step": 23307 }, { "epoch": 0.49432673750291617, "grad_norm": 0.4015340209007263, "learning_rate": 1.715160514993534e-05, "loss": 0.4796, "step": 23308 }, { "epoch": 0.4943479459608492, "grad_norm": 0.3705061376094818, "learning_rate": 1.7151372045981583e-05, "loss": 0.5015, "step": 23309 }, { "epoch": 0.4943691544187822, "grad_norm": 0.39017176628112793, "learning_rate": 1.7151138934074137e-05, "loss": 0.5082, "step": 23310 }, { "epoch": 0.49439036287671523, "grad_norm": 0.4381525218486786, "learning_rate": 1.7150905814213255e-05, "loss": 0.4721, "step": 23311 }, { "epoch": 0.49441157133464825, "grad_norm": 0.3135865032672882, "learning_rate": 1.71506726863992e-05, "loss": 0.4224, "step": 23312 }, { "epoch": 0.49443277979258127, "grad_norm": 0.43229562044143677, "learning_rate": 1.7150439550632224e-05, "loss": 0.5235, "step": 23313 }, { "epoch": 0.4944539882505143, "grad_norm": 0.33958175778388977, "learning_rate": 1.7150206406912592e-05, "loss": 0.5459, "step": 23314 }, { "epoch": 0.4944751967084473, "grad_norm": 0.3633841574192047, "learning_rate": 1.7149973255240568e-05, "loss": 0.5241, "step": 23315 }, { "epoch": 0.4944964051663803, "grad_norm": 0.38719239830970764, "learning_rate": 1.7149740095616402e-05, "loss": 0.5667, "step": 23316 }, { "epoch": 0.4945176136243134, "grad_norm": 0.35103943943977356, "learning_rate": 1.7149506928040355e-05, "loss": 0.4855, "step": 23317 }, { "epoch": 0.4945388220822464, "grad_norm": 0.3540402948856354, "learning_rate": 1.714927375251269e-05, "loss": 0.4793, "step": 23318 }, { "epoch": 0.49456003054017944, "grad_norm": 0.33740806579589844, "learning_rate": 1.7149040569033672e-05, "loss": 0.5184, "step": 23319 }, { "epoch": 0.49458123899811246, "grad_norm": 0.37199461460113525, "learning_rate": 1.7148807377603544e-05, "loss": 0.3946, "step": 23320 }, { "epoch": 0.4946024474560455, "grad_norm": 0.3583105504512787, "learning_rate": 1.7148574178222575e-05, "loss": 0.4556, "step": 23321 }, { "epoch": 0.4946236559139785, "grad_norm": 0.3508039116859436, "learning_rate": 1.714834097089103e-05, "loss": 0.4419, "step": 23322 }, { "epoch": 0.4946448643719115, "grad_norm": 0.3686324954032898, "learning_rate": 1.714810775560916e-05, "loss": 0.4712, "step": 23323 }, { "epoch": 0.49466607282984454, "grad_norm": 0.3725837469100952, "learning_rate": 1.7147874532377224e-05, "loss": 0.5261, "step": 23324 }, { "epoch": 0.49468728128777756, "grad_norm": 0.3434586226940155, "learning_rate": 1.7147641301195487e-05, "loss": 0.5714, "step": 23325 }, { "epoch": 0.4947084897457106, "grad_norm": 0.33170172572135925, "learning_rate": 1.7147408062064204e-05, "loss": 0.5027, "step": 23326 }, { "epoch": 0.4947296982036436, "grad_norm": 0.3357720971107483, "learning_rate": 1.7147174814983632e-05, "loss": 0.4916, "step": 23327 }, { "epoch": 0.4947509066615766, "grad_norm": 0.32880261540412903, "learning_rate": 1.714694155995404e-05, "loss": 0.4349, "step": 23328 }, { "epoch": 0.49477211511950964, "grad_norm": 0.31570500135421753, "learning_rate": 1.714670829697568e-05, "loss": 0.5365, "step": 23329 }, { "epoch": 0.4947933235774427, "grad_norm": 0.3507966697216034, "learning_rate": 1.7146475026048812e-05, "loss": 0.498, "step": 23330 }, { "epoch": 0.49481453203537573, "grad_norm": 0.49967682361602783, "learning_rate": 1.7146241747173696e-05, "loss": 0.5479, "step": 23331 }, { "epoch": 0.49483574049330875, "grad_norm": 0.3553537130355835, "learning_rate": 1.7146008460350596e-05, "loss": 0.4453, "step": 23332 }, { "epoch": 0.49485694895124177, "grad_norm": 0.3397218883037567, "learning_rate": 1.714577516557976e-05, "loss": 0.4944, "step": 23333 }, { "epoch": 0.4948781574091748, "grad_norm": 0.32548797130584717, "learning_rate": 1.7145541862861465e-05, "loss": 0.4853, "step": 23334 }, { "epoch": 0.4948993658671078, "grad_norm": 0.369432270526886, "learning_rate": 1.7145308552195956e-05, "loss": 0.5483, "step": 23335 }, { "epoch": 0.49492057432504083, "grad_norm": 0.3248308598995209, "learning_rate": 1.7145075233583495e-05, "loss": 0.5255, "step": 23336 }, { "epoch": 0.49494178278297385, "grad_norm": 0.3588165044784546, "learning_rate": 1.7144841907024345e-05, "loss": 0.5503, "step": 23337 }, { "epoch": 0.49496299124090687, "grad_norm": 0.38118842244148254, "learning_rate": 1.714460857251876e-05, "loss": 0.5323, "step": 23338 }, { "epoch": 0.4949841996988399, "grad_norm": 0.3333231210708618, "learning_rate": 1.714437523006701e-05, "loss": 0.4171, "step": 23339 }, { "epoch": 0.4950054081567729, "grad_norm": 0.45092642307281494, "learning_rate": 1.7144141879669346e-05, "loss": 0.5293, "step": 23340 }, { "epoch": 0.4950266166147059, "grad_norm": 0.3336491882801056, "learning_rate": 1.7143908521326027e-05, "loss": 0.475, "step": 23341 }, { "epoch": 0.49504782507263895, "grad_norm": 0.35252833366394043, "learning_rate": 1.7143675155037317e-05, "loss": 0.472, "step": 23342 }, { "epoch": 0.49506903353057197, "grad_norm": 0.3314669728279114, "learning_rate": 1.7143441780803473e-05, "loss": 0.5513, "step": 23343 }, { "epoch": 0.49509024198850504, "grad_norm": 0.3928615152835846, "learning_rate": 1.7143208398624754e-05, "loss": 0.4844, "step": 23344 }, { "epoch": 0.49511145044643806, "grad_norm": 0.33188003301620483, "learning_rate": 1.7142975008501422e-05, "loss": 0.4745, "step": 23345 }, { "epoch": 0.4951326589043711, "grad_norm": 0.3444865643978119, "learning_rate": 1.7142741610433734e-05, "loss": 0.4538, "step": 23346 }, { "epoch": 0.4951538673623041, "grad_norm": 0.3252812325954437, "learning_rate": 1.714250820442195e-05, "loss": 0.4759, "step": 23347 }, { "epoch": 0.4951750758202371, "grad_norm": 0.3458339273929596, "learning_rate": 1.7142274790466334e-05, "loss": 0.5489, "step": 23348 }, { "epoch": 0.49519628427817014, "grad_norm": 0.35062557458877563, "learning_rate": 1.714204136856714e-05, "loss": 0.5599, "step": 23349 }, { "epoch": 0.49521749273610316, "grad_norm": 0.3657335340976715, "learning_rate": 1.7141807938724628e-05, "loss": 0.498, "step": 23350 }, { "epoch": 0.4952387011940362, "grad_norm": 0.41488122940063477, "learning_rate": 1.7141574500939064e-05, "loss": 0.5161, "step": 23351 }, { "epoch": 0.4952599096519692, "grad_norm": 0.3500421941280365, "learning_rate": 1.71413410552107e-05, "loss": 0.5621, "step": 23352 }, { "epoch": 0.4952811181099022, "grad_norm": 0.3787703812122345, "learning_rate": 1.7141107601539797e-05, "loss": 0.4353, "step": 23353 }, { "epoch": 0.49530232656783524, "grad_norm": 0.34241604804992676, "learning_rate": 1.7140874139926617e-05, "loss": 0.4767, "step": 23354 }, { "epoch": 0.49532353502576826, "grad_norm": 0.3588131070137024, "learning_rate": 1.7140640670371418e-05, "loss": 0.5732, "step": 23355 }, { "epoch": 0.4953447434837013, "grad_norm": 0.6628214120864868, "learning_rate": 1.7140407192874466e-05, "loss": 0.6362, "step": 23356 }, { "epoch": 0.4953659519416343, "grad_norm": 0.3568311631679535, "learning_rate": 1.714017370743601e-05, "loss": 0.4902, "step": 23357 }, { "epoch": 0.4953871603995674, "grad_norm": 0.5269098281860352, "learning_rate": 1.7139940214056316e-05, "loss": 0.4564, "step": 23358 }, { "epoch": 0.4954083688575004, "grad_norm": 0.30792099237442017, "learning_rate": 1.713970671273564e-05, "loss": 0.5189, "step": 23359 }, { "epoch": 0.4954295773154334, "grad_norm": 0.31337007880210876, "learning_rate": 1.7139473203474248e-05, "loss": 0.4394, "step": 23360 }, { "epoch": 0.49545078577336643, "grad_norm": 0.3858122229576111, "learning_rate": 1.7139239686272393e-05, "loss": 0.5016, "step": 23361 }, { "epoch": 0.49547199423129945, "grad_norm": 0.35507500171661377, "learning_rate": 1.713900616113034e-05, "loss": 0.4742, "step": 23362 }, { "epoch": 0.49549320268923247, "grad_norm": 0.3387058973312378, "learning_rate": 1.7138772628048346e-05, "loss": 0.4964, "step": 23363 }, { "epoch": 0.4955144111471655, "grad_norm": 0.36513781547546387, "learning_rate": 1.713853908702667e-05, "loss": 0.5079, "step": 23364 }, { "epoch": 0.4955356196050985, "grad_norm": 0.3314392566680908, "learning_rate": 1.7138305538065572e-05, "loss": 0.4911, "step": 23365 }, { "epoch": 0.49555682806303153, "grad_norm": 0.36690932512283325, "learning_rate": 1.7138071981165316e-05, "loss": 0.483, "step": 23366 }, { "epoch": 0.49557803652096455, "grad_norm": 0.3468766212463379, "learning_rate": 1.7137838416326154e-05, "loss": 0.4733, "step": 23367 }, { "epoch": 0.49559924497889757, "grad_norm": 0.34325939416885376, "learning_rate": 1.7137604843548353e-05, "loss": 0.5299, "step": 23368 }, { "epoch": 0.4956204534368306, "grad_norm": 0.5111086964607239, "learning_rate": 1.713737126283217e-05, "loss": 0.5439, "step": 23369 }, { "epoch": 0.4956416618947636, "grad_norm": 0.4225039482116699, "learning_rate": 1.7137137674177867e-05, "loss": 0.5031, "step": 23370 }, { "epoch": 0.4956628703526967, "grad_norm": 0.3474995493888855, "learning_rate": 1.7136904077585697e-05, "loss": 0.4674, "step": 23371 }, { "epoch": 0.4956840788106297, "grad_norm": 0.33966580033302307, "learning_rate": 1.7136670473055927e-05, "loss": 0.5435, "step": 23372 }, { "epoch": 0.4957052872685627, "grad_norm": 0.390306293964386, "learning_rate": 1.713643686058881e-05, "loss": 0.3873, "step": 23373 }, { "epoch": 0.49572649572649574, "grad_norm": 0.34344035387039185, "learning_rate": 1.7136203240184614e-05, "loss": 0.5371, "step": 23374 }, { "epoch": 0.49574770418442876, "grad_norm": 0.39318549633026123, "learning_rate": 1.7135969611843592e-05, "loss": 0.5656, "step": 23375 }, { "epoch": 0.4957689126423618, "grad_norm": 0.332258403301239, "learning_rate": 1.713573597556601e-05, "loss": 0.4294, "step": 23376 }, { "epoch": 0.4957901211002948, "grad_norm": 0.35273125767707825, "learning_rate": 1.7135502331352124e-05, "loss": 0.4379, "step": 23377 }, { "epoch": 0.4958113295582278, "grad_norm": 0.35109034180641174, "learning_rate": 1.7135268679202192e-05, "loss": 0.6002, "step": 23378 }, { "epoch": 0.49583253801616084, "grad_norm": 0.3254029154777527, "learning_rate": 1.7135035019116477e-05, "loss": 0.4762, "step": 23379 }, { "epoch": 0.49585374647409386, "grad_norm": 0.34190428256988525, "learning_rate": 1.7134801351095238e-05, "loss": 0.4214, "step": 23380 }, { "epoch": 0.4958749549320269, "grad_norm": 0.333980530500412, "learning_rate": 1.7134567675138734e-05, "loss": 0.522, "step": 23381 }, { "epoch": 0.4958961633899599, "grad_norm": 0.3544657528400421, "learning_rate": 1.713433399124723e-05, "loss": 0.596, "step": 23382 }, { "epoch": 0.4959173718478929, "grad_norm": 0.3489431142807007, "learning_rate": 1.7134100299420976e-05, "loss": 0.4307, "step": 23383 }, { "epoch": 0.49593858030582594, "grad_norm": 0.4236004054546356, "learning_rate": 1.713386659966024e-05, "loss": 0.5565, "step": 23384 }, { "epoch": 0.495959788763759, "grad_norm": 0.3648039996623993, "learning_rate": 1.713363289196528e-05, "loss": 0.4401, "step": 23385 }, { "epoch": 0.49598099722169203, "grad_norm": 0.34357884526252747, "learning_rate": 1.7133399176336354e-05, "loss": 0.5058, "step": 23386 }, { "epoch": 0.49600220567962505, "grad_norm": 0.35994529724121094, "learning_rate": 1.7133165452773725e-05, "loss": 0.4988, "step": 23387 }, { "epoch": 0.49602341413755807, "grad_norm": 0.439899206161499, "learning_rate": 1.7132931721277653e-05, "loss": 0.5265, "step": 23388 }, { "epoch": 0.4960446225954911, "grad_norm": 0.372527152299881, "learning_rate": 1.7132697981848393e-05, "loss": 0.5399, "step": 23389 }, { "epoch": 0.4960658310534241, "grad_norm": 0.34918567538261414, "learning_rate": 1.713246423448621e-05, "loss": 0.4805, "step": 23390 }, { "epoch": 0.49608703951135713, "grad_norm": 0.33402180671691895, "learning_rate": 1.713223047919136e-05, "loss": 0.4262, "step": 23391 }, { "epoch": 0.49610824796929015, "grad_norm": 0.3753513991832733, "learning_rate": 1.7131996715964106e-05, "loss": 0.565, "step": 23392 }, { "epoch": 0.49612945642722317, "grad_norm": 0.3448997735977173, "learning_rate": 1.713176294480471e-05, "loss": 0.4815, "step": 23393 }, { "epoch": 0.4961506648851562, "grad_norm": 0.3332602381706238, "learning_rate": 1.7131529165713427e-05, "loss": 0.5455, "step": 23394 }, { "epoch": 0.4961718733430892, "grad_norm": 0.3215668201446533, "learning_rate": 1.713129537869052e-05, "loss": 0.4686, "step": 23395 }, { "epoch": 0.49619308180102223, "grad_norm": 0.35028406977653503, "learning_rate": 1.7131061583736248e-05, "loss": 0.5629, "step": 23396 }, { "epoch": 0.49621429025895525, "grad_norm": 0.40697741508483887, "learning_rate": 1.713082778085087e-05, "loss": 0.5147, "step": 23397 }, { "epoch": 0.49623549871688827, "grad_norm": 0.3691597580909729, "learning_rate": 1.713059397003465e-05, "loss": 0.4509, "step": 23398 }, { "epoch": 0.49625670717482134, "grad_norm": 0.4206361174583435, "learning_rate": 1.7130360151287845e-05, "loss": 0.4664, "step": 23399 }, { "epoch": 0.49627791563275436, "grad_norm": 0.3874092102050781, "learning_rate": 1.713012632461071e-05, "loss": 0.4778, "step": 23400 }, { "epoch": 0.4962991240906874, "grad_norm": 0.32681405544281006, "learning_rate": 1.7129892490003517e-05, "loss": 0.4778, "step": 23401 }, { "epoch": 0.4963203325486204, "grad_norm": 0.3203781247138977, "learning_rate": 1.7129658647466518e-05, "loss": 0.408, "step": 23402 }, { "epoch": 0.4963415410065534, "grad_norm": 0.4491816759109497, "learning_rate": 1.7129424796999973e-05, "loss": 0.6326, "step": 23403 }, { "epoch": 0.49636274946448644, "grad_norm": 0.3638370633125305, "learning_rate": 1.7129190938604142e-05, "loss": 0.4248, "step": 23404 }, { "epoch": 0.49638395792241946, "grad_norm": 0.3834971785545349, "learning_rate": 1.712895707227929e-05, "loss": 0.5618, "step": 23405 }, { "epoch": 0.4964051663803525, "grad_norm": 0.3338744640350342, "learning_rate": 1.7128723198025675e-05, "loss": 0.4105, "step": 23406 }, { "epoch": 0.4964263748382855, "grad_norm": 0.37246084213256836, "learning_rate": 1.7128489315843553e-05, "loss": 0.5034, "step": 23407 }, { "epoch": 0.4964475832962185, "grad_norm": 0.3450522720813751, "learning_rate": 1.712825542573319e-05, "loss": 0.5721, "step": 23408 }, { "epoch": 0.49646879175415154, "grad_norm": 0.3526928424835205, "learning_rate": 1.7128021527694838e-05, "loss": 0.4294, "step": 23409 }, { "epoch": 0.49649000021208456, "grad_norm": 0.3354910612106323, "learning_rate": 1.7127787621728766e-05, "loss": 0.5089, "step": 23410 }, { "epoch": 0.4965112086700176, "grad_norm": 0.40296727418899536, "learning_rate": 1.7127553707835232e-05, "loss": 0.4967, "step": 23411 }, { "epoch": 0.49653241712795065, "grad_norm": 0.3423936069011688, "learning_rate": 1.7127319786014493e-05, "loss": 0.5067, "step": 23412 }, { "epoch": 0.4965536255858837, "grad_norm": 0.351104736328125, "learning_rate": 1.712708585626681e-05, "loss": 0.5243, "step": 23413 }, { "epoch": 0.4965748340438167, "grad_norm": 0.34038040041923523, "learning_rate": 1.712685191859244e-05, "loss": 0.5154, "step": 23414 }, { "epoch": 0.4965960425017497, "grad_norm": 0.34600159525871277, "learning_rate": 1.7126617972991655e-05, "loss": 0.5336, "step": 23415 }, { "epoch": 0.49661725095968273, "grad_norm": 0.34362709522247314, "learning_rate": 1.7126384019464704e-05, "loss": 0.4855, "step": 23416 }, { "epoch": 0.49663845941761575, "grad_norm": 0.3741110861301422, "learning_rate": 1.712615005801185e-05, "loss": 0.5406, "step": 23417 }, { "epoch": 0.49665966787554877, "grad_norm": 0.3486524224281311, "learning_rate": 1.7125916088633357e-05, "loss": 0.4595, "step": 23418 }, { "epoch": 0.4966808763334818, "grad_norm": 0.4327602684497833, "learning_rate": 1.712568211132948e-05, "loss": 0.4399, "step": 23419 }, { "epoch": 0.4967020847914148, "grad_norm": 0.33569103479385376, "learning_rate": 1.712544812610048e-05, "loss": 0.4359, "step": 23420 }, { "epoch": 0.49672329324934783, "grad_norm": 0.3392837941646576, "learning_rate": 1.712521413294662e-05, "loss": 0.4695, "step": 23421 }, { "epoch": 0.49674450170728085, "grad_norm": 0.3380248546600342, "learning_rate": 1.7124980131868157e-05, "loss": 0.4752, "step": 23422 }, { "epoch": 0.49676571016521387, "grad_norm": 0.336273729801178, "learning_rate": 1.7124746122865356e-05, "loss": 0.4535, "step": 23423 }, { "epoch": 0.4967869186231469, "grad_norm": 0.3549668788909912, "learning_rate": 1.712451210593847e-05, "loss": 0.5177, "step": 23424 }, { "epoch": 0.4968081270810799, "grad_norm": 0.4960460662841797, "learning_rate": 1.712427808108777e-05, "loss": 0.5295, "step": 23425 }, { "epoch": 0.496829335539013, "grad_norm": 0.3700742721557617, "learning_rate": 1.7124044048313503e-05, "loss": 0.5, "step": 23426 }, { "epoch": 0.496850543996946, "grad_norm": 0.35703757405281067, "learning_rate": 1.712381000761594e-05, "loss": 0.5427, "step": 23427 }, { "epoch": 0.496871752454879, "grad_norm": 0.3230714201927185, "learning_rate": 1.712357595899534e-05, "loss": 0.5288, "step": 23428 }, { "epoch": 0.49689296091281204, "grad_norm": 0.3506326377391815, "learning_rate": 1.7123341902451953e-05, "loss": 0.543, "step": 23429 }, { "epoch": 0.49691416937074506, "grad_norm": 0.3129708766937256, "learning_rate": 1.7123107837986054e-05, "loss": 0.477, "step": 23430 }, { "epoch": 0.4969353778286781, "grad_norm": 0.3542363941669464, "learning_rate": 1.7122873765597893e-05, "loss": 0.5027, "step": 23431 }, { "epoch": 0.4969565862866111, "grad_norm": 0.33156618475914, "learning_rate": 1.712263968528773e-05, "loss": 0.5212, "step": 23432 }, { "epoch": 0.4969777947445441, "grad_norm": 0.40900900959968567, "learning_rate": 1.7122405597055833e-05, "loss": 0.5531, "step": 23433 }, { "epoch": 0.49699900320247714, "grad_norm": 0.3753664791584015, "learning_rate": 1.712217150090246e-05, "loss": 0.6067, "step": 23434 }, { "epoch": 0.49702021166041016, "grad_norm": 0.3341591954231262, "learning_rate": 1.712193739682787e-05, "loss": 0.4675, "step": 23435 }, { "epoch": 0.4970414201183432, "grad_norm": 0.3433021306991577, "learning_rate": 1.712170328483232e-05, "loss": 0.5204, "step": 23436 }, { "epoch": 0.4970626285762762, "grad_norm": 0.33096563816070557, "learning_rate": 1.7121469164916072e-05, "loss": 0.4948, "step": 23437 }, { "epoch": 0.4970838370342092, "grad_norm": 0.34196221828460693, "learning_rate": 1.7121235037079393e-05, "loss": 0.4722, "step": 23438 }, { "epoch": 0.49710504549214224, "grad_norm": 0.3934701383113861, "learning_rate": 1.712100090132254e-05, "loss": 0.4988, "step": 23439 }, { "epoch": 0.4971262539500753, "grad_norm": 0.372845321893692, "learning_rate": 1.7120766757645764e-05, "loss": 0.4971, "step": 23440 }, { "epoch": 0.49714746240800833, "grad_norm": 0.35282865166664124, "learning_rate": 1.7120532606049337e-05, "loss": 0.5048, "step": 23441 }, { "epoch": 0.49716867086594135, "grad_norm": 0.3419942855834961, "learning_rate": 1.7120298446533514e-05, "loss": 0.4797, "step": 23442 }, { "epoch": 0.4971898793238744, "grad_norm": 0.3706781566143036, "learning_rate": 1.712006427909856e-05, "loss": 0.5334, "step": 23443 }, { "epoch": 0.4972110877818074, "grad_norm": 0.3230791389942169, "learning_rate": 1.711983010374473e-05, "loss": 0.4316, "step": 23444 }, { "epoch": 0.4972322962397404, "grad_norm": 0.3548179268836975, "learning_rate": 1.7119595920472284e-05, "loss": 0.5305, "step": 23445 }, { "epoch": 0.49725350469767343, "grad_norm": 0.3979722559452057, "learning_rate": 1.711936172928149e-05, "loss": 0.4805, "step": 23446 }, { "epoch": 0.49727471315560645, "grad_norm": 0.6826103329658508, "learning_rate": 1.71191275301726e-05, "loss": 0.4841, "step": 23447 }, { "epoch": 0.49729592161353947, "grad_norm": 0.3534676134586334, "learning_rate": 1.7118893323145882e-05, "loss": 0.553, "step": 23448 }, { "epoch": 0.4973171300714725, "grad_norm": 0.3513513505458832, "learning_rate": 1.7118659108201592e-05, "loss": 0.5023, "step": 23449 }, { "epoch": 0.4973383385294055, "grad_norm": 0.3774971663951874, "learning_rate": 1.711842488533999e-05, "loss": 0.5425, "step": 23450 }, { "epoch": 0.49735954698733853, "grad_norm": 0.30817684531211853, "learning_rate": 1.7118190654561336e-05, "loss": 0.4163, "step": 23451 }, { "epoch": 0.49738075544527155, "grad_norm": 0.3419051766395569, "learning_rate": 1.7117956415865897e-05, "loss": 0.4968, "step": 23452 }, { "epoch": 0.4974019639032046, "grad_norm": 0.3554830551147461, "learning_rate": 1.7117722169253924e-05, "loss": 0.5443, "step": 23453 }, { "epoch": 0.49742317236113764, "grad_norm": 0.3415902554988861, "learning_rate": 1.7117487914725687e-05, "loss": 0.4481, "step": 23454 }, { "epoch": 0.49744438081907066, "grad_norm": 0.35472041368484497, "learning_rate": 1.711725365228144e-05, "loss": 0.5351, "step": 23455 }, { "epoch": 0.4974655892770037, "grad_norm": 0.3657185435295105, "learning_rate": 1.711701938192144e-05, "loss": 0.4193, "step": 23456 }, { "epoch": 0.4974867977349367, "grad_norm": 0.5767422318458557, "learning_rate": 1.7116785103645958e-05, "loss": 0.5439, "step": 23457 }, { "epoch": 0.4975080061928697, "grad_norm": 0.38921070098876953, "learning_rate": 1.711655081745525e-05, "loss": 0.6096, "step": 23458 }, { "epoch": 0.49752921465080274, "grad_norm": 0.3638781011104584, "learning_rate": 1.7116316523349574e-05, "loss": 0.5258, "step": 23459 }, { "epoch": 0.49755042310873576, "grad_norm": 0.42112839221954346, "learning_rate": 1.7116082221329192e-05, "loss": 0.5067, "step": 23460 }, { "epoch": 0.4975716315666688, "grad_norm": 0.3408668637275696, "learning_rate": 1.7115847911394364e-05, "loss": 0.5012, "step": 23461 }, { "epoch": 0.4975928400246018, "grad_norm": 0.35391074419021606, "learning_rate": 1.7115613593545356e-05, "loss": 0.4973, "step": 23462 }, { "epoch": 0.4976140484825348, "grad_norm": 0.3492128849029541, "learning_rate": 1.7115379267782424e-05, "loss": 0.562, "step": 23463 }, { "epoch": 0.49763525694046784, "grad_norm": 0.3414810597896576, "learning_rate": 1.7115144934105825e-05, "loss": 0.5675, "step": 23464 }, { "epoch": 0.49765646539840086, "grad_norm": 0.41408291459083557, "learning_rate": 1.7114910592515825e-05, "loss": 0.4988, "step": 23465 }, { "epoch": 0.4976776738563339, "grad_norm": 0.3040057122707367, "learning_rate": 1.7114676243012684e-05, "loss": 0.4277, "step": 23466 }, { "epoch": 0.49769888231426695, "grad_norm": 0.31695863604545593, "learning_rate": 1.7114441885596665e-05, "loss": 0.5041, "step": 23467 }, { "epoch": 0.4977200907722, "grad_norm": 0.33002325892448425, "learning_rate": 1.711420752026802e-05, "loss": 0.4415, "step": 23468 }, { "epoch": 0.497741299230133, "grad_norm": 0.3980448246002197, "learning_rate": 1.7113973147027022e-05, "loss": 0.4856, "step": 23469 }, { "epoch": 0.497762507688066, "grad_norm": 0.37658724188804626, "learning_rate": 1.711373876587392e-05, "loss": 0.5026, "step": 23470 }, { "epoch": 0.49778371614599903, "grad_norm": 0.3644717335700989, "learning_rate": 1.711350437680898e-05, "loss": 0.4672, "step": 23471 }, { "epoch": 0.49780492460393205, "grad_norm": 0.32765448093414307, "learning_rate": 1.711326997983246e-05, "loss": 0.4148, "step": 23472 }, { "epoch": 0.4978261330618651, "grad_norm": 0.34843388199806213, "learning_rate": 1.711303557494463e-05, "loss": 0.5237, "step": 23473 }, { "epoch": 0.4978473415197981, "grad_norm": 0.39708590507507324, "learning_rate": 1.7112801162145734e-05, "loss": 0.5129, "step": 23474 }, { "epoch": 0.4978685499777311, "grad_norm": 0.3542082905769348, "learning_rate": 1.7112566741436046e-05, "loss": 0.5629, "step": 23475 }, { "epoch": 0.49788975843566413, "grad_norm": 0.50666344165802, "learning_rate": 1.7112332312815827e-05, "loss": 0.5319, "step": 23476 }, { "epoch": 0.49791096689359715, "grad_norm": 0.4175654947757721, "learning_rate": 1.7112097876285332e-05, "loss": 0.4877, "step": 23477 }, { "epoch": 0.49793217535153017, "grad_norm": 0.31449905037879944, "learning_rate": 1.7111863431844823e-05, "loss": 0.4822, "step": 23478 }, { "epoch": 0.4979533838094632, "grad_norm": 0.4287365674972534, "learning_rate": 1.7111628979494562e-05, "loss": 0.5799, "step": 23479 }, { "epoch": 0.4979745922673962, "grad_norm": 0.37553948163986206, "learning_rate": 1.711139451923481e-05, "loss": 0.4995, "step": 23480 }, { "epoch": 0.4979958007253293, "grad_norm": 0.367436945438385, "learning_rate": 1.711116005106582e-05, "loss": 0.5353, "step": 23481 }, { "epoch": 0.4980170091832623, "grad_norm": 0.34425610303878784, "learning_rate": 1.7110925574987866e-05, "loss": 0.4629, "step": 23482 }, { "epoch": 0.4980382176411953, "grad_norm": 0.3619399070739746, "learning_rate": 1.71106910910012e-05, "loss": 0.4609, "step": 23483 }, { "epoch": 0.49805942609912834, "grad_norm": 0.371063232421875, "learning_rate": 1.7110456599106086e-05, "loss": 0.4624, "step": 23484 }, { "epoch": 0.49808063455706136, "grad_norm": 0.3524593710899353, "learning_rate": 1.7110222099302786e-05, "loss": 0.4911, "step": 23485 }, { "epoch": 0.4981018430149944, "grad_norm": 0.38128942251205444, "learning_rate": 1.7109987591591556e-05, "loss": 0.5074, "step": 23486 }, { "epoch": 0.4981230514729274, "grad_norm": 0.3243086338043213, "learning_rate": 1.710975307597266e-05, "loss": 0.4938, "step": 23487 }, { "epoch": 0.4981442599308604, "grad_norm": 0.37139683961868286, "learning_rate": 1.710951855244636e-05, "loss": 0.3804, "step": 23488 }, { "epoch": 0.49816546838879344, "grad_norm": 0.39932623505592346, "learning_rate": 1.710928402101291e-05, "loss": 0.557, "step": 23489 }, { "epoch": 0.49818667684672646, "grad_norm": 0.40688398480415344, "learning_rate": 1.710904948167258e-05, "loss": 0.5051, "step": 23490 }, { "epoch": 0.4982078853046595, "grad_norm": 0.3699759542942047, "learning_rate": 1.710881493442563e-05, "loss": 0.5139, "step": 23491 }, { "epoch": 0.4982290937625925, "grad_norm": 0.35238879919052124, "learning_rate": 1.7108580379272314e-05, "loss": 0.4402, "step": 23492 }, { "epoch": 0.4982503022205255, "grad_norm": 0.4292575418949127, "learning_rate": 1.7108345816212894e-05, "loss": 0.5767, "step": 23493 }, { "epoch": 0.4982715106784586, "grad_norm": 0.33787259459495544, "learning_rate": 1.710811124524764e-05, "loss": 0.5099, "step": 23494 }, { "epoch": 0.4982927191363916, "grad_norm": 0.36561352014541626, "learning_rate": 1.7107876666376802e-05, "loss": 0.515, "step": 23495 }, { "epoch": 0.49831392759432463, "grad_norm": 0.34226271510124207, "learning_rate": 1.7107642079600646e-05, "loss": 0.4381, "step": 23496 }, { "epoch": 0.49833513605225765, "grad_norm": 0.3812273442745209, "learning_rate": 1.710740748491943e-05, "loss": 0.5426, "step": 23497 }, { "epoch": 0.4983563445101907, "grad_norm": 0.4599243402481079, "learning_rate": 1.7107172882333423e-05, "loss": 0.498, "step": 23498 }, { "epoch": 0.4983775529681237, "grad_norm": 0.32631954550743103, "learning_rate": 1.7106938271842876e-05, "loss": 0.4431, "step": 23499 }, { "epoch": 0.4983987614260567, "grad_norm": 0.3383275270462036, "learning_rate": 1.7106703653448055e-05, "loss": 0.4523, "step": 23500 }, { "epoch": 0.49841996988398973, "grad_norm": 0.31516551971435547, "learning_rate": 1.710646902714922e-05, "loss": 0.5385, "step": 23501 }, { "epoch": 0.49844117834192275, "grad_norm": 0.4306713044643402, "learning_rate": 1.710623439294663e-05, "loss": 0.5641, "step": 23502 }, { "epoch": 0.4984623867998558, "grad_norm": 0.45154574513435364, "learning_rate": 1.710599975084055e-05, "loss": 0.5476, "step": 23503 }, { "epoch": 0.4984835952577888, "grad_norm": 0.45044204592704773, "learning_rate": 1.7105765100831238e-05, "loss": 0.536, "step": 23504 }, { "epoch": 0.4985048037157218, "grad_norm": 0.32882553339004517, "learning_rate": 1.7105530442918955e-05, "loss": 0.4837, "step": 23505 }, { "epoch": 0.49852601217365483, "grad_norm": 0.4183397591114044, "learning_rate": 1.7105295777103967e-05, "loss": 0.5238, "step": 23506 }, { "epoch": 0.49854722063158785, "grad_norm": 0.34475430846214294, "learning_rate": 1.7105061103386527e-05, "loss": 0.457, "step": 23507 }, { "epoch": 0.4985684290895209, "grad_norm": 0.3301805555820465, "learning_rate": 1.71048264217669e-05, "loss": 0.4408, "step": 23508 }, { "epoch": 0.49858963754745395, "grad_norm": 0.3274770677089691, "learning_rate": 1.7104591732245347e-05, "loss": 0.5256, "step": 23509 }, { "epoch": 0.49861084600538697, "grad_norm": 0.3657338321208954, "learning_rate": 1.710435703482213e-05, "loss": 0.476, "step": 23510 }, { "epoch": 0.49863205446332, "grad_norm": 0.3178696930408478, "learning_rate": 1.710412232949751e-05, "loss": 0.4442, "step": 23511 }, { "epoch": 0.498653262921253, "grad_norm": 0.32956820726394653, "learning_rate": 1.7103887616271745e-05, "loss": 0.5123, "step": 23512 }, { "epoch": 0.498674471379186, "grad_norm": 0.40682438015937805, "learning_rate": 1.7103652895145092e-05, "loss": 0.5035, "step": 23513 }, { "epoch": 0.49869567983711904, "grad_norm": 0.3348884880542755, "learning_rate": 1.7103418166117824e-05, "loss": 0.5026, "step": 23514 }, { "epoch": 0.49871688829505206, "grad_norm": 0.3423367738723755, "learning_rate": 1.7103183429190198e-05, "loss": 0.522, "step": 23515 }, { "epoch": 0.4987380967529851, "grad_norm": 0.4053760766983032, "learning_rate": 1.7102948684362472e-05, "loss": 0.5795, "step": 23516 }, { "epoch": 0.4987593052109181, "grad_norm": 0.30573058128356934, "learning_rate": 1.7102713931634903e-05, "loss": 0.4902, "step": 23517 }, { "epoch": 0.4987805136688511, "grad_norm": 0.3330196142196655, "learning_rate": 1.7102479171007763e-05, "loss": 0.448, "step": 23518 }, { "epoch": 0.49880172212678414, "grad_norm": 0.3656782805919647, "learning_rate": 1.7102244402481306e-05, "loss": 0.533, "step": 23519 }, { "epoch": 0.49882293058471716, "grad_norm": 0.36399781703948975, "learning_rate": 1.7102009626055795e-05, "loss": 0.5106, "step": 23520 }, { "epoch": 0.49884413904265024, "grad_norm": 0.3489419221878052, "learning_rate": 1.7101774841731485e-05, "loss": 0.5182, "step": 23521 }, { "epoch": 0.49886534750058326, "grad_norm": 0.35340842604637146, "learning_rate": 1.710154004950865e-05, "loss": 0.5435, "step": 23522 }, { "epoch": 0.4988865559585163, "grad_norm": 0.323405385017395, "learning_rate": 1.7101305249387542e-05, "loss": 0.4642, "step": 23523 }, { "epoch": 0.4989077644164493, "grad_norm": 0.39829307794570923, "learning_rate": 1.710107044136842e-05, "loss": 0.7094, "step": 23524 }, { "epoch": 0.4989289728743823, "grad_norm": 0.3338161110877991, "learning_rate": 1.7100835625451554e-05, "loss": 0.5636, "step": 23525 }, { "epoch": 0.49895018133231533, "grad_norm": 0.3909370005130768, "learning_rate": 1.71006008016372e-05, "loss": 0.5777, "step": 23526 }, { "epoch": 0.49897138979024835, "grad_norm": 0.4638720154762268, "learning_rate": 1.7100365969925618e-05, "loss": 0.5071, "step": 23527 }, { "epoch": 0.4989925982481814, "grad_norm": 0.3542589247226715, "learning_rate": 1.7100131130317066e-05, "loss": 0.5294, "step": 23528 }, { "epoch": 0.4990138067061144, "grad_norm": 0.323236882686615, "learning_rate": 1.7099896282811817e-05, "loss": 0.4469, "step": 23529 }, { "epoch": 0.4990350151640474, "grad_norm": 0.3812256455421448, "learning_rate": 1.7099661427410123e-05, "loss": 0.5894, "step": 23530 }, { "epoch": 0.49905622362198043, "grad_norm": 0.34188249707221985, "learning_rate": 1.7099426564112246e-05, "loss": 0.4837, "step": 23531 }, { "epoch": 0.49907743207991345, "grad_norm": 0.3740796744823456, "learning_rate": 1.709919169291845e-05, "loss": 0.4897, "step": 23532 }, { "epoch": 0.49909864053784647, "grad_norm": 0.34982070326805115, "learning_rate": 1.7098956813828995e-05, "loss": 0.5736, "step": 23533 }, { "epoch": 0.4991198489957795, "grad_norm": 0.3393505811691284, "learning_rate": 1.709872192684414e-05, "loss": 0.4586, "step": 23534 }, { "epoch": 0.49914105745371257, "grad_norm": 0.534433901309967, "learning_rate": 1.709848703196415e-05, "loss": 0.5023, "step": 23535 }, { "epoch": 0.4991622659116456, "grad_norm": 0.4552099406719208, "learning_rate": 1.7098252129189283e-05, "loss": 0.4648, "step": 23536 }, { "epoch": 0.4991834743695786, "grad_norm": 0.33433210849761963, "learning_rate": 1.7098017218519804e-05, "loss": 0.6021, "step": 23537 }, { "epoch": 0.4992046828275116, "grad_norm": 0.342538446187973, "learning_rate": 1.7097782299955968e-05, "loss": 0.4409, "step": 23538 }, { "epoch": 0.49922589128544465, "grad_norm": 0.3721531331539154, "learning_rate": 1.7097547373498042e-05, "loss": 0.5494, "step": 23539 }, { "epoch": 0.49924709974337766, "grad_norm": 0.3496073782444, "learning_rate": 1.7097312439146284e-05, "loss": 0.5697, "step": 23540 }, { "epoch": 0.4992683082013107, "grad_norm": 0.3430817425251007, "learning_rate": 1.709707749690096e-05, "loss": 0.4963, "step": 23541 }, { "epoch": 0.4992895166592437, "grad_norm": 0.3400651514530182, "learning_rate": 1.7096842546762326e-05, "loss": 0.4539, "step": 23542 }, { "epoch": 0.4993107251171767, "grad_norm": 0.35095566511154175, "learning_rate": 1.7096607588730644e-05, "loss": 0.5358, "step": 23543 }, { "epoch": 0.49933193357510974, "grad_norm": 0.3920583426952362, "learning_rate": 1.709637262280618e-05, "loss": 0.4901, "step": 23544 }, { "epoch": 0.49935314203304276, "grad_norm": 0.32272762060165405, "learning_rate": 1.7096137648989192e-05, "loss": 0.4708, "step": 23545 }, { "epoch": 0.4993743504909758, "grad_norm": 0.32356104254722595, "learning_rate": 1.7095902667279937e-05, "loss": 0.5408, "step": 23546 }, { "epoch": 0.4993955589489088, "grad_norm": 0.41050565242767334, "learning_rate": 1.7095667677678683e-05, "loss": 0.4474, "step": 23547 }, { "epoch": 0.4994167674068418, "grad_norm": 0.34819158911705017, "learning_rate": 1.7095432680185688e-05, "loss": 0.4848, "step": 23548 }, { "epoch": 0.4994379758647749, "grad_norm": 0.34445658326148987, "learning_rate": 1.709519767480122e-05, "loss": 0.3884, "step": 23549 }, { "epoch": 0.4994591843227079, "grad_norm": 0.35978180170059204, "learning_rate": 1.709496266152553e-05, "loss": 0.5532, "step": 23550 }, { "epoch": 0.49948039278064094, "grad_norm": 0.3092770576477051, "learning_rate": 1.7094727640358885e-05, "loss": 0.4628, "step": 23551 }, { "epoch": 0.49950160123857396, "grad_norm": 0.35816705226898193, "learning_rate": 1.7094492611301542e-05, "loss": 0.4825, "step": 23552 }, { "epoch": 0.499522809696507, "grad_norm": 0.31211763620376587, "learning_rate": 1.7094257574353772e-05, "loss": 0.5175, "step": 23553 }, { "epoch": 0.49954401815444, "grad_norm": 0.3496638536453247, "learning_rate": 1.709402252951583e-05, "loss": 0.4819, "step": 23554 }, { "epoch": 0.499565226612373, "grad_norm": 0.34998583793640137, "learning_rate": 1.709378747678797e-05, "loss": 0.5577, "step": 23555 }, { "epoch": 0.49958643507030603, "grad_norm": 0.369471937417984, "learning_rate": 1.709355241617047e-05, "loss": 0.5381, "step": 23556 }, { "epoch": 0.49960764352823905, "grad_norm": 0.36159205436706543, "learning_rate": 1.709331734766358e-05, "loss": 0.5279, "step": 23557 }, { "epoch": 0.4996288519861721, "grad_norm": 0.3493255376815796, "learning_rate": 1.709308227126756e-05, "loss": 0.4813, "step": 23558 }, { "epoch": 0.4996500604441051, "grad_norm": 0.3280947208404541, "learning_rate": 1.709284718698268e-05, "loss": 0.5705, "step": 23559 }, { "epoch": 0.4996712689020381, "grad_norm": 0.36914628744125366, "learning_rate": 1.7092612094809196e-05, "loss": 0.5385, "step": 23560 }, { "epoch": 0.49969247735997113, "grad_norm": 0.3760623633861542, "learning_rate": 1.7092376994747372e-05, "loss": 0.4435, "step": 23561 }, { "epoch": 0.4997136858179042, "grad_norm": 0.3199475407600403, "learning_rate": 1.7092141886797465e-05, "loss": 0.4677, "step": 23562 }, { "epoch": 0.4997348942758372, "grad_norm": 0.33260563015937805, "learning_rate": 1.709190677095974e-05, "loss": 0.4485, "step": 23563 }, { "epoch": 0.49975610273377025, "grad_norm": 0.34710827469825745, "learning_rate": 1.709167164723446e-05, "loss": 0.4998, "step": 23564 }, { "epoch": 0.49977731119170327, "grad_norm": 0.34804457426071167, "learning_rate": 1.7091436515621883e-05, "loss": 0.5842, "step": 23565 }, { "epoch": 0.4997985196496363, "grad_norm": 0.3668680489063263, "learning_rate": 1.709120137612227e-05, "loss": 0.4832, "step": 23566 }, { "epoch": 0.4998197281075693, "grad_norm": 0.40911608934402466, "learning_rate": 1.7090966228735887e-05, "loss": 0.453, "step": 23567 }, { "epoch": 0.4998409365655023, "grad_norm": 0.524588406085968, "learning_rate": 1.709073107346299e-05, "loss": 0.5279, "step": 23568 }, { "epoch": 0.49986214502343534, "grad_norm": 0.37967216968536377, "learning_rate": 1.7090495910303847e-05, "loss": 0.5934, "step": 23569 }, { "epoch": 0.49988335348136836, "grad_norm": 0.3623723089694977, "learning_rate": 1.7090260739258717e-05, "loss": 0.6127, "step": 23570 }, { "epoch": 0.4999045619393014, "grad_norm": 0.3726411461830139, "learning_rate": 1.7090025560327855e-05, "loss": 0.4519, "step": 23571 }, { "epoch": 0.4999257703972344, "grad_norm": 0.49610984325408936, "learning_rate": 1.708979037351153e-05, "loss": 0.4957, "step": 23572 }, { "epoch": 0.4999469788551674, "grad_norm": 0.35201019048690796, "learning_rate": 1.7089555178810007e-05, "loss": 0.4521, "step": 23573 }, { "epoch": 0.49996818731310044, "grad_norm": 0.42769476771354675, "learning_rate": 1.708931997622354e-05, "loss": 0.5331, "step": 23574 }, { "epoch": 0.49998939577103346, "grad_norm": 0.3681672513484955, "learning_rate": 1.708908476575239e-05, "loss": 0.451, "step": 23575 }, { "epoch": 0.5000106042289665, "grad_norm": 0.3459397852420807, "learning_rate": 1.7088849547396823e-05, "loss": 0.5104, "step": 23576 }, { "epoch": 0.5000318126868996, "grad_norm": 0.3580016493797302, "learning_rate": 1.7088614321157098e-05, "loss": 0.5772, "step": 23577 }, { "epoch": 0.5000530211448325, "grad_norm": 0.38424691557884216, "learning_rate": 1.708837908703348e-05, "loss": 0.4952, "step": 23578 }, { "epoch": 0.5000742296027656, "grad_norm": 0.3286208510398865, "learning_rate": 1.7088143845026225e-05, "loss": 0.4093, "step": 23579 }, { "epoch": 0.5000954380606986, "grad_norm": 0.43355682492256165, "learning_rate": 1.70879085951356e-05, "loss": 0.3823, "step": 23580 }, { "epoch": 0.5001166465186316, "grad_norm": 0.36882078647613525, "learning_rate": 1.7087673337361868e-05, "loss": 0.4794, "step": 23581 }, { "epoch": 0.5001378549765646, "grad_norm": 0.33643263578414917, "learning_rate": 1.7087438071705285e-05, "loss": 0.5388, "step": 23582 }, { "epoch": 0.5001590634344977, "grad_norm": 0.3526858389377594, "learning_rate": 1.7087202798166112e-05, "loss": 0.4685, "step": 23583 }, { "epoch": 0.5001802718924308, "grad_norm": 0.3088624179363251, "learning_rate": 1.7086967516744617e-05, "loss": 0.4424, "step": 23584 }, { "epoch": 0.5002014803503637, "grad_norm": 0.3896002173423767, "learning_rate": 1.7086732227441056e-05, "loss": 0.4753, "step": 23585 }, { "epoch": 0.5002226888082968, "grad_norm": 0.3300662040710449, "learning_rate": 1.7086496930255695e-05, "loss": 0.4413, "step": 23586 }, { "epoch": 0.5002438972662298, "grad_norm": 0.32385900616645813, "learning_rate": 1.708626162518879e-05, "loss": 0.4282, "step": 23587 }, { "epoch": 0.5002651057241628, "grad_norm": 0.3345881402492523, "learning_rate": 1.708602631224061e-05, "loss": 0.546, "step": 23588 }, { "epoch": 0.5002863141820958, "grad_norm": 0.38424646854400635, "learning_rate": 1.708579099141141e-05, "loss": 0.5255, "step": 23589 }, { "epoch": 0.5003075226400289, "grad_norm": 0.40299487113952637, "learning_rate": 1.708555566270146e-05, "loss": 0.5477, "step": 23590 }, { "epoch": 0.5003287310979618, "grad_norm": 0.4650416374206543, "learning_rate": 1.7085320326111014e-05, "loss": 0.4872, "step": 23591 }, { "epoch": 0.5003499395558949, "grad_norm": 0.3213195204734802, "learning_rate": 1.7085084981640333e-05, "loss": 0.5349, "step": 23592 }, { "epoch": 0.5003711480138279, "grad_norm": 0.3674124479293823, "learning_rate": 1.7084849629289688e-05, "loss": 0.4038, "step": 23593 }, { "epoch": 0.500392356471761, "grad_norm": 0.3517632782459259, "learning_rate": 1.708461426905933e-05, "loss": 0.538, "step": 23594 }, { "epoch": 0.5004135649296939, "grad_norm": 0.32242944836616516, "learning_rate": 1.7084378900949527e-05, "loss": 0.4406, "step": 23595 }, { "epoch": 0.500434773387627, "grad_norm": 0.3714275658130646, "learning_rate": 1.7084143524960537e-05, "loss": 0.5025, "step": 23596 }, { "epoch": 0.5004559818455601, "grad_norm": 0.3148531913757324, "learning_rate": 1.708390814109263e-05, "loss": 0.4991, "step": 23597 }, { "epoch": 0.500477190303493, "grad_norm": 0.36361217498779297, "learning_rate": 1.7083672749346058e-05, "loss": 0.481, "step": 23598 }, { "epoch": 0.5004983987614261, "grad_norm": 0.36141225695610046, "learning_rate": 1.7083437349721087e-05, "loss": 0.5501, "step": 23599 }, { "epoch": 0.5005196072193591, "grad_norm": 0.35322305560112, "learning_rate": 1.708320194221798e-05, "loss": 0.5105, "step": 23600 }, { "epoch": 0.5005408156772921, "grad_norm": 0.318125456571579, "learning_rate": 1.7082966526836996e-05, "loss": 0.4937, "step": 23601 }, { "epoch": 0.5005620241352251, "grad_norm": 0.30786940455436707, "learning_rate": 1.7082731103578397e-05, "loss": 0.4752, "step": 23602 }, { "epoch": 0.5005832325931582, "grad_norm": 0.3419357240200043, "learning_rate": 1.7082495672442446e-05, "loss": 0.5011, "step": 23603 }, { "epoch": 0.5006044410510911, "grad_norm": 0.33780622482299805, "learning_rate": 1.7082260233429407e-05, "loss": 0.4384, "step": 23604 }, { "epoch": 0.5006256495090242, "grad_norm": 0.40627676248550415, "learning_rate": 1.708202478653954e-05, "loss": 0.5106, "step": 23605 }, { "epoch": 0.5006468579669572, "grad_norm": 0.6926013827323914, "learning_rate": 1.70817893317731e-05, "loss": 0.5198, "step": 23606 }, { "epoch": 0.5006680664248903, "grad_norm": 0.38771092891693115, "learning_rate": 1.7081553869130364e-05, "loss": 0.5406, "step": 23607 }, { "epoch": 0.5006892748828232, "grad_norm": 0.31587129831314087, "learning_rate": 1.7081318398611582e-05, "loss": 0.4238, "step": 23608 }, { "epoch": 0.5007104833407563, "grad_norm": 0.35807836055755615, "learning_rate": 1.708108292021702e-05, "loss": 0.4099, "step": 23609 }, { "epoch": 0.5007316917986894, "grad_norm": 0.32100406289100647, "learning_rate": 1.7080847433946935e-05, "loss": 0.4638, "step": 23610 }, { "epoch": 0.5007529002566223, "grad_norm": 0.37043648958206177, "learning_rate": 1.70806119398016e-05, "loss": 0.528, "step": 23611 }, { "epoch": 0.5007741087145554, "grad_norm": 0.3680100440979004, "learning_rate": 1.7080376437781265e-05, "loss": 0.4795, "step": 23612 }, { "epoch": 0.5007953171724884, "grad_norm": 0.45280954241752625, "learning_rate": 1.7080140927886198e-05, "loss": 0.4116, "step": 23613 }, { "epoch": 0.5008165256304214, "grad_norm": 0.31641024351119995, "learning_rate": 1.707990541011666e-05, "loss": 0.4456, "step": 23614 }, { "epoch": 0.5008377340883544, "grad_norm": 0.3342326879501343, "learning_rate": 1.7079669884472912e-05, "loss": 0.5111, "step": 23615 }, { "epoch": 0.5008589425462875, "grad_norm": 0.3506360948085785, "learning_rate": 1.7079434350955217e-05, "loss": 0.5239, "step": 23616 }, { "epoch": 0.5008801510042205, "grad_norm": 0.37777388095855713, "learning_rate": 1.7079198809563834e-05, "loss": 0.465, "step": 23617 }, { "epoch": 0.5009013594621535, "grad_norm": 0.42487791180610657, "learning_rate": 1.707896326029903e-05, "loss": 0.4991, "step": 23618 }, { "epoch": 0.5009225679200865, "grad_norm": 0.6910281777381897, "learning_rate": 1.7078727703161067e-05, "loss": 0.5052, "step": 23619 }, { "epoch": 0.5009437763780196, "grad_norm": 0.3650079667568207, "learning_rate": 1.7078492138150204e-05, "loss": 0.4375, "step": 23620 }, { "epoch": 0.5009649848359525, "grad_norm": 0.3808799684047699, "learning_rate": 1.70782565652667e-05, "loss": 0.5806, "step": 23621 }, { "epoch": 0.5009861932938856, "grad_norm": 0.3759278953075409, "learning_rate": 1.7078020984510822e-05, "loss": 0.5929, "step": 23622 }, { "epoch": 0.5010074017518186, "grad_norm": 0.3302828371524811, "learning_rate": 1.7077785395882833e-05, "loss": 0.4659, "step": 23623 }, { "epoch": 0.5010286102097516, "grad_norm": 0.3690362274646759, "learning_rate": 1.707754979938299e-05, "loss": 0.4672, "step": 23624 }, { "epoch": 0.5010498186676847, "grad_norm": 0.4135272800922394, "learning_rate": 1.7077314195011557e-05, "loss": 0.4991, "step": 23625 }, { "epoch": 0.5010710271256177, "grad_norm": 0.3360489010810852, "learning_rate": 1.70770785827688e-05, "loss": 0.5028, "step": 23626 }, { "epoch": 0.5010922355835508, "grad_norm": 0.35804808139801025, "learning_rate": 1.7076842962654974e-05, "loss": 0.5904, "step": 23627 }, { "epoch": 0.5011134440414837, "grad_norm": 0.33579757809638977, "learning_rate": 1.7076607334670345e-05, "loss": 0.527, "step": 23628 }, { "epoch": 0.5011346524994168, "grad_norm": 0.384922057390213, "learning_rate": 1.7076371698815177e-05, "loss": 0.5003, "step": 23629 }, { "epoch": 0.5011558609573498, "grad_norm": 0.48581212759017944, "learning_rate": 1.7076136055089728e-05, "loss": 0.5324, "step": 23630 }, { "epoch": 0.5011770694152828, "grad_norm": 0.3024391233921051, "learning_rate": 1.707590040349426e-05, "loss": 0.4446, "step": 23631 }, { "epoch": 0.5011982778732158, "grad_norm": 0.35754528641700745, "learning_rate": 1.7075664744029046e-05, "loss": 0.3946, "step": 23632 }, { "epoch": 0.5012194863311489, "grad_norm": 0.34030765295028687, "learning_rate": 1.7075429076694332e-05, "loss": 0.5457, "step": 23633 }, { "epoch": 0.5012406947890818, "grad_norm": 0.3775424063205719, "learning_rate": 1.7075193401490388e-05, "loss": 0.4995, "step": 23634 }, { "epoch": 0.5012619032470149, "grad_norm": 0.3474002778530121, "learning_rate": 1.7074957718417476e-05, "loss": 0.5172, "step": 23635 }, { "epoch": 0.5012831117049479, "grad_norm": 0.39029645919799805, "learning_rate": 1.7074722027475856e-05, "loss": 0.5827, "step": 23636 }, { "epoch": 0.501304320162881, "grad_norm": 0.3307945132255554, "learning_rate": 1.7074486328665794e-05, "loss": 0.5262, "step": 23637 }, { "epoch": 0.501325528620814, "grad_norm": 0.4350346624851227, "learning_rate": 1.707425062198755e-05, "loss": 0.5634, "step": 23638 }, { "epoch": 0.501346737078747, "grad_norm": 0.3480948507785797, "learning_rate": 1.7074014907441383e-05, "loss": 0.5271, "step": 23639 }, { "epoch": 0.5013679455366801, "grad_norm": 0.35713502764701843, "learning_rate": 1.707377918502756e-05, "loss": 0.4497, "step": 23640 }, { "epoch": 0.501389153994613, "grad_norm": 0.346059650182724, "learning_rate": 1.707354345474634e-05, "loss": 0.4711, "step": 23641 }, { "epoch": 0.5014103624525461, "grad_norm": 0.38421735167503357, "learning_rate": 1.7073307716597985e-05, "loss": 0.4939, "step": 23642 }, { "epoch": 0.5014315709104791, "grad_norm": 0.49778446555137634, "learning_rate": 1.7073071970582764e-05, "loss": 0.4823, "step": 23643 }, { "epoch": 0.5014527793684121, "grad_norm": 0.39616233110427856, "learning_rate": 1.7072836216700927e-05, "loss": 0.522, "step": 23644 }, { "epoch": 0.5014739878263451, "grad_norm": 0.3274792730808258, "learning_rate": 1.707260045495275e-05, "loss": 0.5063, "step": 23645 }, { "epoch": 0.5014951962842782, "grad_norm": 0.3575374484062195, "learning_rate": 1.7072364685338482e-05, "loss": 0.5237, "step": 23646 }, { "epoch": 0.5015164047422112, "grad_norm": 0.3361317813396454, "learning_rate": 1.7072128907858395e-05, "loss": 0.5113, "step": 23647 }, { "epoch": 0.5015376132001442, "grad_norm": 0.36874082684516907, "learning_rate": 1.7071893122512747e-05, "loss": 0.5003, "step": 23648 }, { "epoch": 0.5015588216580772, "grad_norm": 0.3372516930103302, "learning_rate": 1.7071657329301802e-05, "loss": 0.476, "step": 23649 }, { "epoch": 0.5015800301160103, "grad_norm": 0.30414918065071106, "learning_rate": 1.7071421528225817e-05, "loss": 0.4635, "step": 23650 }, { "epoch": 0.5016012385739433, "grad_norm": 0.3138447403907776, "learning_rate": 1.7071185719285064e-05, "loss": 0.5336, "step": 23651 }, { "epoch": 0.5016224470318763, "grad_norm": 0.3614797592163086, "learning_rate": 1.7070949902479795e-05, "loss": 0.5542, "step": 23652 }, { "epoch": 0.5016436554898094, "grad_norm": 0.32897183299064636, "learning_rate": 1.707071407781028e-05, "loss": 0.4466, "step": 23653 }, { "epoch": 0.5016648639477423, "grad_norm": 0.3446449935436249, "learning_rate": 1.7070478245276776e-05, "loss": 0.4974, "step": 23654 }, { "epoch": 0.5016860724056754, "grad_norm": 0.39933985471725464, "learning_rate": 1.707024240487955e-05, "loss": 0.4847, "step": 23655 }, { "epoch": 0.5017072808636084, "grad_norm": 0.3649958372116089, "learning_rate": 1.7070006556618862e-05, "loss": 0.4357, "step": 23656 }, { "epoch": 0.5017284893215415, "grad_norm": 0.3703306317329407, "learning_rate": 1.7069770700494972e-05, "loss": 0.4746, "step": 23657 }, { "epoch": 0.5017496977794744, "grad_norm": 0.34623005986213684, "learning_rate": 1.7069534836508142e-05, "loss": 0.4092, "step": 23658 }, { "epoch": 0.5017709062374075, "grad_norm": 0.3225097954273224, "learning_rate": 1.706929896465864e-05, "loss": 0.4852, "step": 23659 }, { "epoch": 0.5017921146953405, "grad_norm": 0.34576255083084106, "learning_rate": 1.7069063084946726e-05, "loss": 0.4922, "step": 23660 }, { "epoch": 0.5018133231532735, "grad_norm": 0.349124938249588, "learning_rate": 1.7068827197372662e-05, "loss": 0.502, "step": 23661 }, { "epoch": 0.5018345316112065, "grad_norm": 0.6698277592658997, "learning_rate": 1.706859130193671e-05, "loss": 0.5269, "step": 23662 }, { "epoch": 0.5018557400691396, "grad_norm": 0.341652512550354, "learning_rate": 1.7068355398639127e-05, "loss": 0.4574, "step": 23663 }, { "epoch": 0.5018769485270725, "grad_norm": 0.5138251781463623, "learning_rate": 1.7068119487480182e-05, "loss": 0.521, "step": 23664 }, { "epoch": 0.5018981569850056, "grad_norm": 0.3328310251235962, "learning_rate": 1.7067883568460138e-05, "loss": 0.4677, "step": 23665 }, { "epoch": 0.5019193654429387, "grad_norm": 0.32807281613349915, "learning_rate": 1.7067647641579256e-05, "loss": 0.4536, "step": 23666 }, { "epoch": 0.5019405739008717, "grad_norm": 0.3639613687992096, "learning_rate": 1.70674117068378e-05, "loss": 0.538, "step": 23667 }, { "epoch": 0.5019617823588047, "grad_norm": 0.3688909411430359, "learning_rate": 1.7067175764236025e-05, "loss": 0.5789, "step": 23668 }, { "epoch": 0.5019829908167377, "grad_norm": 0.3815086781978607, "learning_rate": 1.70669398137742e-05, "loss": 0.4776, "step": 23669 }, { "epoch": 0.5020041992746708, "grad_norm": 0.3494490385055542, "learning_rate": 1.7066703855452585e-05, "loss": 0.4601, "step": 23670 }, { "epoch": 0.5020254077326037, "grad_norm": 0.47134634852409363, "learning_rate": 1.7066467889271448e-05, "loss": 0.5035, "step": 23671 }, { "epoch": 0.5020466161905368, "grad_norm": 0.3377094268798828, "learning_rate": 1.7066231915231043e-05, "loss": 0.4576, "step": 23672 }, { "epoch": 0.5020678246484698, "grad_norm": 0.34242504835128784, "learning_rate": 1.7065995933331637e-05, "loss": 0.4385, "step": 23673 }, { "epoch": 0.5020890331064028, "grad_norm": 0.36372247338294983, "learning_rate": 1.706575994357349e-05, "loss": 0.5117, "step": 23674 }, { "epoch": 0.5021102415643358, "grad_norm": 0.34995129704475403, "learning_rate": 1.706552394595687e-05, "loss": 0.4804, "step": 23675 }, { "epoch": 0.5021314500222689, "grad_norm": 0.3269703984260559, "learning_rate": 1.706528794048203e-05, "loss": 0.5274, "step": 23676 }, { "epoch": 0.5021526584802019, "grad_norm": 0.35739895701408386, "learning_rate": 1.706505192714924e-05, "loss": 0.5705, "step": 23677 }, { "epoch": 0.5021738669381349, "grad_norm": 0.35830092430114746, "learning_rate": 1.7064815905958763e-05, "loss": 0.5641, "step": 23678 }, { "epoch": 0.502195075396068, "grad_norm": 0.2881336510181427, "learning_rate": 1.7064579876910857e-05, "loss": 0.456, "step": 23679 }, { "epoch": 0.502216283854001, "grad_norm": 0.34012776613235474, "learning_rate": 1.7064343840005792e-05, "loss": 0.5148, "step": 23680 }, { "epoch": 0.502237492311934, "grad_norm": 0.36045053601264954, "learning_rate": 1.706410779524382e-05, "loss": 0.5073, "step": 23681 }, { "epoch": 0.502258700769867, "grad_norm": 0.30903545022010803, "learning_rate": 1.7063871742625208e-05, "loss": 0.488, "step": 23682 }, { "epoch": 0.5022799092278001, "grad_norm": 0.49707624316215515, "learning_rate": 1.706363568215022e-05, "loss": 0.4139, "step": 23683 }, { "epoch": 0.502301117685733, "grad_norm": 0.31289437413215637, "learning_rate": 1.706339961381912e-05, "loss": 0.4739, "step": 23684 }, { "epoch": 0.5023223261436661, "grad_norm": 0.3958890736103058, "learning_rate": 1.706316353763217e-05, "loss": 0.5176, "step": 23685 }, { "epoch": 0.5023435346015991, "grad_norm": 0.3622133731842041, "learning_rate": 1.7062927453589622e-05, "loss": 0.4807, "step": 23686 }, { "epoch": 0.5023647430595322, "grad_norm": 0.455578088760376, "learning_rate": 1.7062691361691753e-05, "loss": 0.4932, "step": 23687 }, { "epoch": 0.5023859515174651, "grad_norm": 0.3921433389186859, "learning_rate": 1.706245526193882e-05, "loss": 0.515, "step": 23688 }, { "epoch": 0.5024071599753982, "grad_norm": 0.38247713446617126, "learning_rate": 1.7062219154331086e-05, "loss": 0.5161, "step": 23689 }, { "epoch": 0.5024283684333312, "grad_norm": 0.5814939737319946, "learning_rate": 1.7061983038868818e-05, "loss": 0.5027, "step": 23690 }, { "epoch": 0.5024495768912642, "grad_norm": 0.37899187207221985, "learning_rate": 1.7061746915552267e-05, "loss": 0.543, "step": 23691 }, { "epoch": 0.5024707853491973, "grad_norm": 0.3755878210067749, "learning_rate": 1.7061510784381705e-05, "loss": 0.5168, "step": 23692 }, { "epoch": 0.5024919938071303, "grad_norm": 0.3444148898124695, "learning_rate": 1.706127464535739e-05, "loss": 0.5139, "step": 23693 }, { "epoch": 0.5025132022650634, "grad_norm": 0.37654227018356323, "learning_rate": 1.7061038498479586e-05, "loss": 0.445, "step": 23694 }, { "epoch": 0.5025344107229963, "grad_norm": 0.3368363678455353, "learning_rate": 1.706080234374856e-05, "loss": 0.5719, "step": 23695 }, { "epoch": 0.5025556191809294, "grad_norm": 0.3701934814453125, "learning_rate": 1.706056618116457e-05, "loss": 0.4839, "step": 23696 }, { "epoch": 0.5025768276388624, "grad_norm": 0.3780977725982666, "learning_rate": 1.7060330010727878e-05, "loss": 0.5527, "step": 23697 }, { "epoch": 0.5025980360967954, "grad_norm": 0.3376989960670471, "learning_rate": 1.706009383243875e-05, "loss": 0.4845, "step": 23698 }, { "epoch": 0.5026192445547284, "grad_norm": 0.3294442892074585, "learning_rate": 1.7059857646297446e-05, "loss": 0.4265, "step": 23699 }, { "epoch": 0.5026404530126615, "grad_norm": 0.37979164719581604, "learning_rate": 1.705962145230423e-05, "loss": 0.4788, "step": 23700 }, { "epoch": 0.5026616614705944, "grad_norm": 0.3782939612865448, "learning_rate": 1.7059385250459362e-05, "loss": 0.5182, "step": 23701 }, { "epoch": 0.5026828699285275, "grad_norm": 0.3291650116443634, "learning_rate": 1.705914904076311e-05, "loss": 0.4469, "step": 23702 }, { "epoch": 0.5027040783864605, "grad_norm": 0.3759717345237732, "learning_rate": 1.7058912823215732e-05, "loss": 0.5633, "step": 23703 }, { "epoch": 0.5027252868443935, "grad_norm": 0.3644501864910126, "learning_rate": 1.7058676597817495e-05, "loss": 0.5538, "step": 23704 }, { "epoch": 0.5027464953023265, "grad_norm": 0.3850196301937103, "learning_rate": 1.7058440364568658e-05, "loss": 0.4155, "step": 23705 }, { "epoch": 0.5027677037602596, "grad_norm": 0.33075782656669617, "learning_rate": 1.7058204123469485e-05, "loss": 0.4434, "step": 23706 }, { "epoch": 0.5027889122181927, "grad_norm": 0.3505517244338989, "learning_rate": 1.7057967874520235e-05, "loss": 0.5316, "step": 23707 }, { "epoch": 0.5028101206761256, "grad_norm": 0.3236154615879059, "learning_rate": 1.7057731617721178e-05, "loss": 0.5366, "step": 23708 }, { "epoch": 0.5028313291340587, "grad_norm": 0.3593384325504303, "learning_rate": 1.7057495353072576e-05, "loss": 0.4468, "step": 23709 }, { "epoch": 0.5028525375919917, "grad_norm": 0.36180317401885986, "learning_rate": 1.7057259080574685e-05, "loss": 0.5177, "step": 23710 }, { "epoch": 0.5028737460499247, "grad_norm": 0.37279650568962097, "learning_rate": 1.7057022800227773e-05, "loss": 0.511, "step": 23711 }, { "epoch": 0.5028949545078577, "grad_norm": 0.34351009130477905, "learning_rate": 1.70567865120321e-05, "loss": 0.5182, "step": 23712 }, { "epoch": 0.5029161629657908, "grad_norm": 0.3452273905277252, "learning_rate": 1.705655021598793e-05, "loss": 0.5074, "step": 23713 }, { "epoch": 0.5029373714237237, "grad_norm": 0.3161263167858124, "learning_rate": 1.705631391209553e-05, "loss": 0.4598, "step": 23714 }, { "epoch": 0.5029585798816568, "grad_norm": 0.3435898721218109, "learning_rate": 1.7056077600355155e-05, "loss": 0.5282, "step": 23715 }, { "epoch": 0.5029797883395898, "grad_norm": 0.8718681931495667, "learning_rate": 1.7055841280767077e-05, "loss": 0.5808, "step": 23716 }, { "epoch": 0.5030009967975229, "grad_norm": 0.33453431725502014, "learning_rate": 1.7055604953331548e-05, "loss": 0.4798, "step": 23717 }, { "epoch": 0.5030222052554558, "grad_norm": 0.3972095549106598, "learning_rate": 1.705536861804884e-05, "loss": 0.5803, "step": 23718 }, { "epoch": 0.5030434137133889, "grad_norm": 0.37299686670303345, "learning_rate": 1.705513227491921e-05, "loss": 0.5711, "step": 23719 }, { "epoch": 0.503064622171322, "grad_norm": 0.4051089882850647, "learning_rate": 1.7054895923942924e-05, "loss": 0.5288, "step": 23720 }, { "epoch": 0.5030858306292549, "grad_norm": 0.33541396260261536, "learning_rate": 1.7054659565120242e-05, "loss": 0.4728, "step": 23721 }, { "epoch": 0.503107039087188, "grad_norm": 0.3296944499015808, "learning_rate": 1.7054423198451434e-05, "loss": 0.426, "step": 23722 }, { "epoch": 0.503128247545121, "grad_norm": 0.34063729643821716, "learning_rate": 1.7054186823936753e-05, "loss": 0.5086, "step": 23723 }, { "epoch": 0.503149456003054, "grad_norm": 0.37469759583473206, "learning_rate": 1.705395044157647e-05, "loss": 0.546, "step": 23724 }, { "epoch": 0.503170664460987, "grad_norm": 0.2904433608055115, "learning_rate": 1.7053714051370845e-05, "loss": 0.4431, "step": 23725 }, { "epoch": 0.5031918729189201, "grad_norm": 0.345956951379776, "learning_rate": 1.7053477653320137e-05, "loss": 0.5298, "step": 23726 }, { "epoch": 0.503213081376853, "grad_norm": 0.41619041562080383, "learning_rate": 1.7053241247424612e-05, "loss": 0.5187, "step": 23727 }, { "epoch": 0.5032342898347861, "grad_norm": 0.34686702489852905, "learning_rate": 1.7053004833684537e-05, "loss": 0.4449, "step": 23728 }, { "epoch": 0.5032554982927191, "grad_norm": 0.33806782960891724, "learning_rate": 1.705276841210017e-05, "loss": 0.4863, "step": 23729 }, { "epoch": 0.5032767067506522, "grad_norm": 0.31154483556747437, "learning_rate": 1.7052531982671773e-05, "loss": 0.4071, "step": 23730 }, { "epoch": 0.5032979152085851, "grad_norm": 0.3725748360157013, "learning_rate": 1.7052295545399614e-05, "loss": 0.5628, "step": 23731 }, { "epoch": 0.5033191236665182, "grad_norm": 0.3176414966583252, "learning_rate": 1.705205910028395e-05, "loss": 0.5056, "step": 23732 }, { "epoch": 0.5033403321244513, "grad_norm": 0.8898536562919617, "learning_rate": 1.705182264732505e-05, "loss": 0.5816, "step": 23733 }, { "epoch": 0.5033615405823842, "grad_norm": 0.351076602935791, "learning_rate": 1.7051586186523173e-05, "loss": 0.4655, "step": 23734 }, { "epoch": 0.5033827490403173, "grad_norm": 0.3399287164211273, "learning_rate": 1.7051349717878584e-05, "loss": 0.4833, "step": 23735 }, { "epoch": 0.5034039574982503, "grad_norm": 0.3314957320690155, "learning_rate": 1.7051113241391545e-05, "loss": 0.51, "step": 23736 }, { "epoch": 0.5034251659561834, "grad_norm": 0.34463295340538025, "learning_rate": 1.705087675706232e-05, "loss": 0.4611, "step": 23737 }, { "epoch": 0.5034463744141163, "grad_norm": 0.36189091205596924, "learning_rate": 1.705064026489117e-05, "loss": 0.5216, "step": 23738 }, { "epoch": 0.5034675828720494, "grad_norm": 0.37208306789398193, "learning_rate": 1.7050403764878358e-05, "loss": 0.5157, "step": 23739 }, { "epoch": 0.5034887913299824, "grad_norm": 0.38027623295783997, "learning_rate": 1.705016725702415e-05, "loss": 0.5174, "step": 23740 }, { "epoch": 0.5035099997879154, "grad_norm": 0.3252016007900238, "learning_rate": 1.704993074132881e-05, "loss": 0.4723, "step": 23741 }, { "epoch": 0.5035312082458484, "grad_norm": 0.3611429035663605, "learning_rate": 1.704969421779259e-05, "loss": 0.4957, "step": 23742 }, { "epoch": 0.5035524167037815, "grad_norm": 0.3721880614757538, "learning_rate": 1.7049457686415767e-05, "loss": 0.4969, "step": 23743 }, { "epoch": 0.5035736251617144, "grad_norm": 0.3367881774902344, "learning_rate": 1.70492211471986e-05, "loss": 0.4903, "step": 23744 }, { "epoch": 0.5035948336196475, "grad_norm": 0.33859071135520935, "learning_rate": 1.7048984600141344e-05, "loss": 0.5305, "step": 23745 }, { "epoch": 0.5036160420775806, "grad_norm": 0.34837448596954346, "learning_rate": 1.7048748045244275e-05, "loss": 0.4951, "step": 23746 }, { "epoch": 0.5036372505355136, "grad_norm": 0.3546563982963562, "learning_rate": 1.7048511482507647e-05, "loss": 0.4955, "step": 23747 }, { "epoch": 0.5036584589934466, "grad_norm": 0.3568904995918274, "learning_rate": 1.7048274911931727e-05, "loss": 0.4787, "step": 23748 }, { "epoch": 0.5036796674513796, "grad_norm": 0.3285125195980072, "learning_rate": 1.7048038333516777e-05, "loss": 0.4176, "step": 23749 }, { "epoch": 0.5037008759093127, "grad_norm": 0.33214133977890015, "learning_rate": 1.7047801747263056e-05, "loss": 0.5214, "step": 23750 }, { "epoch": 0.5037220843672456, "grad_norm": 0.7168301939964294, "learning_rate": 1.7047565153170835e-05, "loss": 0.4773, "step": 23751 }, { "epoch": 0.5037432928251787, "grad_norm": 0.34649139642715454, "learning_rate": 1.704732855124037e-05, "loss": 0.4781, "step": 23752 }, { "epoch": 0.5037645012831117, "grad_norm": 0.31527793407440186, "learning_rate": 1.704709194147193e-05, "loss": 0.4717, "step": 23753 }, { "epoch": 0.5037857097410448, "grad_norm": 0.40781956911087036, "learning_rate": 1.7046855323865776e-05, "loss": 0.5178, "step": 23754 }, { "epoch": 0.5038069181989777, "grad_norm": 0.3377666175365448, "learning_rate": 1.704661869842217e-05, "loss": 0.4074, "step": 23755 }, { "epoch": 0.5038281266569108, "grad_norm": 0.29991474747657776, "learning_rate": 1.7046382065141374e-05, "loss": 0.4057, "step": 23756 }, { "epoch": 0.5038493351148438, "grad_norm": 0.37272486090660095, "learning_rate": 1.7046145424023656e-05, "loss": 0.5153, "step": 23757 }, { "epoch": 0.5038705435727768, "grad_norm": 0.31757670640945435, "learning_rate": 1.7045908775069276e-05, "loss": 0.5068, "step": 23758 }, { "epoch": 0.5038917520307098, "grad_norm": 0.4588712453842163, "learning_rate": 1.7045672118278497e-05, "loss": 0.573, "step": 23759 }, { "epoch": 0.5039129604886429, "grad_norm": 0.3443124294281006, "learning_rate": 1.7045435453651582e-05, "loss": 0.5461, "step": 23760 }, { "epoch": 0.5039341689465759, "grad_norm": 0.34013405442237854, "learning_rate": 1.7045198781188796e-05, "loss": 0.437, "step": 23761 }, { "epoch": 0.5039553774045089, "grad_norm": 0.357393741607666, "learning_rate": 1.70449621008904e-05, "loss": 0.4358, "step": 23762 }, { "epoch": 0.503976585862442, "grad_norm": 0.6767836213111877, "learning_rate": 1.7044725412756656e-05, "loss": 0.4905, "step": 23763 }, { "epoch": 0.503997794320375, "grad_norm": 0.3547448217868805, "learning_rate": 1.704448871678783e-05, "loss": 0.5176, "step": 23764 }, { "epoch": 0.504019002778308, "grad_norm": 0.36081400513648987, "learning_rate": 1.704425201298419e-05, "loss": 0.483, "step": 23765 }, { "epoch": 0.504040211236241, "grad_norm": 0.34526708722114563, "learning_rate": 1.7044015301345994e-05, "loss": 0.4615, "step": 23766 }, { "epoch": 0.5040614196941741, "grad_norm": 0.4139554798603058, "learning_rate": 1.7043778581873502e-05, "loss": 0.5478, "step": 23767 }, { "epoch": 0.504082628152107, "grad_norm": 0.33684486150741577, "learning_rate": 1.704354185456698e-05, "loss": 0.4789, "step": 23768 }, { "epoch": 0.5041038366100401, "grad_norm": 0.38947099447250366, "learning_rate": 1.7043305119426692e-05, "loss": 0.5471, "step": 23769 }, { "epoch": 0.5041250450679731, "grad_norm": 0.43806958198547363, "learning_rate": 1.7043068376452906e-05, "loss": 0.5187, "step": 23770 }, { "epoch": 0.5041462535259061, "grad_norm": 0.3431659936904907, "learning_rate": 1.7042831625645874e-05, "loss": 0.4654, "step": 23771 }, { "epoch": 0.5041674619838391, "grad_norm": 0.41445088386535645, "learning_rate": 1.704259486700587e-05, "loss": 0.5482, "step": 23772 }, { "epoch": 0.5041886704417722, "grad_norm": 0.3379647135734558, "learning_rate": 1.704235810053315e-05, "loss": 0.5727, "step": 23773 }, { "epoch": 0.5042098788997053, "grad_norm": 0.34374529123306274, "learning_rate": 1.7042121326227982e-05, "loss": 0.4639, "step": 23774 }, { "epoch": 0.5042310873576382, "grad_norm": 0.38972440361976624, "learning_rate": 1.704188454409063e-05, "loss": 0.4994, "step": 23775 }, { "epoch": 0.5042522958155713, "grad_norm": 0.3603232204914093, "learning_rate": 1.7041647754121356e-05, "loss": 0.4739, "step": 23776 }, { "epoch": 0.5042735042735043, "grad_norm": 0.3238140642642975, "learning_rate": 1.7041410956320418e-05, "loss": 0.4917, "step": 23777 }, { "epoch": 0.5042947127314373, "grad_norm": 0.3285573720932007, "learning_rate": 1.7041174150688084e-05, "loss": 0.4577, "step": 23778 }, { "epoch": 0.5043159211893703, "grad_norm": 0.3896530568599701, "learning_rate": 1.704093733722462e-05, "loss": 0.4646, "step": 23779 }, { "epoch": 0.5043371296473034, "grad_norm": 0.39151865243911743, "learning_rate": 1.7040700515930287e-05, "loss": 0.5079, "step": 23780 }, { "epoch": 0.5043583381052363, "grad_norm": 0.35283446311950684, "learning_rate": 1.7040463686805345e-05, "loss": 0.3826, "step": 23781 }, { "epoch": 0.5043795465631694, "grad_norm": 0.32055574655532837, "learning_rate": 1.7040226849850065e-05, "loss": 0.527, "step": 23782 }, { "epoch": 0.5044007550211024, "grad_norm": 0.36726149916648865, "learning_rate": 1.70399900050647e-05, "loss": 0.4446, "step": 23783 }, { "epoch": 0.5044219634790355, "grad_norm": 0.43222177028656006, "learning_rate": 1.7039753152449522e-05, "loss": 0.6333, "step": 23784 }, { "epoch": 0.5044431719369684, "grad_norm": 0.3667967617511749, "learning_rate": 1.7039516292004795e-05, "loss": 0.5049, "step": 23785 }, { "epoch": 0.5044643803949015, "grad_norm": 0.3724752962589264, "learning_rate": 1.7039279423730777e-05, "loss": 0.5015, "step": 23786 }, { "epoch": 0.5044855888528346, "grad_norm": 0.3425365388393402, "learning_rate": 1.703904254762773e-05, "loss": 0.4515, "step": 23787 }, { "epoch": 0.5045067973107675, "grad_norm": 0.3486579656600952, "learning_rate": 1.7038805663695926e-05, "loss": 0.5833, "step": 23788 }, { "epoch": 0.5045280057687006, "grad_norm": 0.40783095359802246, "learning_rate": 1.703856877193562e-05, "loss": 0.4674, "step": 23789 }, { "epoch": 0.5045492142266336, "grad_norm": 0.33100056648254395, "learning_rate": 1.703833187234708e-05, "loss": 0.4812, "step": 23790 }, { "epoch": 0.5045704226845666, "grad_norm": 0.3760044276714325, "learning_rate": 1.7038094964930568e-05, "loss": 0.5307, "step": 23791 }, { "epoch": 0.5045916311424996, "grad_norm": 0.3434157073497772, "learning_rate": 1.703785804968635e-05, "loss": 0.4649, "step": 23792 }, { "epoch": 0.5046128396004327, "grad_norm": 0.36406269669532776, "learning_rate": 1.7037621126614687e-05, "loss": 0.4767, "step": 23793 }, { "epoch": 0.5046340480583656, "grad_norm": 0.3711755871772766, "learning_rate": 1.7037384195715843e-05, "loss": 0.5738, "step": 23794 }, { "epoch": 0.5046552565162987, "grad_norm": 0.3486919105052948, "learning_rate": 1.703714725699008e-05, "loss": 0.5841, "step": 23795 }, { "epoch": 0.5046764649742317, "grad_norm": 0.37037065625190735, "learning_rate": 1.7036910310437663e-05, "loss": 0.537, "step": 23796 }, { "epoch": 0.5046976734321648, "grad_norm": 0.414058119058609, "learning_rate": 1.7036673356058856e-05, "loss": 0.5459, "step": 23797 }, { "epoch": 0.5047188818900977, "grad_norm": 0.37736496329307556, "learning_rate": 1.703643639385392e-05, "loss": 0.5598, "step": 23798 }, { "epoch": 0.5047400903480308, "grad_norm": 0.33625760674476624, "learning_rate": 1.7036199423823122e-05, "loss": 0.4758, "step": 23799 }, { "epoch": 0.5047612988059638, "grad_norm": 0.32451823353767395, "learning_rate": 1.7035962445966726e-05, "loss": 0.5065, "step": 23800 }, { "epoch": 0.5047825072638968, "grad_norm": 0.3205770254135132, "learning_rate": 1.7035725460284993e-05, "loss": 0.5353, "step": 23801 }, { "epoch": 0.5048037157218299, "grad_norm": 0.517002284526825, "learning_rate": 1.7035488466778187e-05, "loss": 0.4608, "step": 23802 }, { "epoch": 0.5048249241797629, "grad_norm": 0.3468732237815857, "learning_rate": 1.7035251465446568e-05, "loss": 0.4653, "step": 23803 }, { "epoch": 0.504846132637696, "grad_norm": 0.3583667278289795, "learning_rate": 1.7035014456290408e-05, "loss": 0.5128, "step": 23804 }, { "epoch": 0.5048673410956289, "grad_norm": 0.35861074924468994, "learning_rate": 1.7034777439309967e-05, "loss": 0.4687, "step": 23805 }, { "epoch": 0.504888549553562, "grad_norm": 0.3749921917915344, "learning_rate": 1.7034540414505504e-05, "loss": 0.4682, "step": 23806 }, { "epoch": 0.504909758011495, "grad_norm": 0.34991535544395447, "learning_rate": 1.7034303381877288e-05, "loss": 0.462, "step": 23807 }, { "epoch": 0.504930966469428, "grad_norm": 0.34695518016815186, "learning_rate": 1.703406634142558e-05, "loss": 0.5064, "step": 23808 }, { "epoch": 0.504952174927361, "grad_norm": 0.32811540365219116, "learning_rate": 1.7033829293150645e-05, "loss": 0.5777, "step": 23809 }, { "epoch": 0.5049733833852941, "grad_norm": 0.39009416103363037, "learning_rate": 1.703359223705275e-05, "loss": 0.4761, "step": 23810 }, { "epoch": 0.504994591843227, "grad_norm": 0.3326217830181122, "learning_rate": 1.7033355173132147e-05, "loss": 0.5084, "step": 23811 }, { "epoch": 0.5050158003011601, "grad_norm": 0.4077088236808777, "learning_rate": 1.7033118101389116e-05, "loss": 0.6367, "step": 23812 }, { "epoch": 0.5050370087590931, "grad_norm": 0.38764896988868713, "learning_rate": 1.7032881021823904e-05, "loss": 0.4943, "step": 23813 }, { "epoch": 0.5050582172170262, "grad_norm": 0.3505224883556366, "learning_rate": 1.703264393443679e-05, "loss": 0.4573, "step": 23814 }, { "epoch": 0.5050794256749592, "grad_norm": 0.3746696710586548, "learning_rate": 1.7032406839228026e-05, "loss": 0.4783, "step": 23815 }, { "epoch": 0.5051006341328922, "grad_norm": 0.29988208413124084, "learning_rate": 1.7032169736197878e-05, "loss": 0.4789, "step": 23816 }, { "epoch": 0.5051218425908253, "grad_norm": 0.39095091819763184, "learning_rate": 1.703193262534662e-05, "loss": 0.5267, "step": 23817 }, { "epoch": 0.5051430510487582, "grad_norm": 0.34352272748947144, "learning_rate": 1.70316955066745e-05, "loss": 0.5177, "step": 23818 }, { "epoch": 0.5051642595066913, "grad_norm": 0.3999212384223938, "learning_rate": 1.703145838018179e-05, "loss": 0.4648, "step": 23819 }, { "epoch": 0.5051854679646243, "grad_norm": 0.32898667454719543, "learning_rate": 1.7031221245868756e-05, "loss": 0.5101, "step": 23820 }, { "epoch": 0.5052066764225573, "grad_norm": 0.33756569027900696, "learning_rate": 1.703098410373566e-05, "loss": 0.5126, "step": 23821 }, { "epoch": 0.5052278848804903, "grad_norm": 0.31898361444473267, "learning_rate": 1.7030746953782763e-05, "loss": 0.4403, "step": 23822 }, { "epoch": 0.5052490933384234, "grad_norm": 0.3619305193424225, "learning_rate": 1.703050979601033e-05, "loss": 0.5053, "step": 23823 }, { "epoch": 0.5052703017963563, "grad_norm": 0.3390045166015625, "learning_rate": 1.7030272630418622e-05, "loss": 0.5112, "step": 23824 }, { "epoch": 0.5052915102542894, "grad_norm": 0.36591070890426636, "learning_rate": 1.7030035457007913e-05, "loss": 0.4631, "step": 23825 }, { "epoch": 0.5053127187122224, "grad_norm": 0.3419143259525299, "learning_rate": 1.7029798275778456e-05, "loss": 0.3905, "step": 23826 }, { "epoch": 0.5053339271701555, "grad_norm": 0.3980434536933899, "learning_rate": 1.7029561086730516e-05, "loss": 0.4947, "step": 23827 }, { "epoch": 0.5053551356280885, "grad_norm": 0.3348785638809204, "learning_rate": 1.702932388986436e-05, "loss": 0.4364, "step": 23828 }, { "epoch": 0.5053763440860215, "grad_norm": 0.32256218791007996, "learning_rate": 1.7029086685180253e-05, "loss": 0.4505, "step": 23829 }, { "epoch": 0.5053975525439546, "grad_norm": 0.9674863219261169, "learning_rate": 1.702884947267846e-05, "loss": 0.5441, "step": 23830 }, { "epoch": 0.5054187610018875, "grad_norm": 0.37273016571998596, "learning_rate": 1.7028612252359234e-05, "loss": 0.5317, "step": 23831 }, { "epoch": 0.5054399694598206, "grad_norm": 0.32129332423210144, "learning_rate": 1.7028375024222848e-05, "loss": 0.4665, "step": 23832 }, { "epoch": 0.5054611779177536, "grad_norm": 0.35383114218711853, "learning_rate": 1.7028137788269567e-05, "loss": 0.4371, "step": 23833 }, { "epoch": 0.5054823863756867, "grad_norm": 0.3512742817401886, "learning_rate": 1.7027900544499653e-05, "loss": 0.4602, "step": 23834 }, { "epoch": 0.5055035948336196, "grad_norm": 0.3548210561275482, "learning_rate": 1.7027663292913367e-05, "loss": 0.5326, "step": 23835 }, { "epoch": 0.5055248032915527, "grad_norm": 0.3461947739124298, "learning_rate": 1.7027426033510975e-05, "loss": 0.4663, "step": 23836 }, { "epoch": 0.5055460117494857, "grad_norm": 0.3153938055038452, "learning_rate": 1.7027188766292743e-05, "loss": 0.4738, "step": 23837 }, { "epoch": 0.5055672202074187, "grad_norm": 0.3383946716785431, "learning_rate": 1.702695149125893e-05, "loss": 0.4849, "step": 23838 }, { "epoch": 0.5055884286653517, "grad_norm": 0.3709925413131714, "learning_rate": 1.7026714208409806e-05, "loss": 0.5094, "step": 23839 }, { "epoch": 0.5056096371232848, "grad_norm": 0.3451330065727234, "learning_rate": 1.7026476917745627e-05, "loss": 0.433, "step": 23840 }, { "epoch": 0.5056308455812177, "grad_norm": 0.37146782875061035, "learning_rate": 1.7026239619266665e-05, "loss": 0.374, "step": 23841 }, { "epoch": 0.5056520540391508, "grad_norm": 0.46782585978507996, "learning_rate": 1.702600231297318e-05, "loss": 0.5118, "step": 23842 }, { "epoch": 0.5056732624970839, "grad_norm": 0.35279059410095215, "learning_rate": 1.7025764998865436e-05, "loss": 0.5075, "step": 23843 }, { "epoch": 0.5056944709550169, "grad_norm": 0.598928689956665, "learning_rate": 1.7025527676943695e-05, "loss": 0.4062, "step": 23844 }, { "epoch": 0.5057156794129499, "grad_norm": 0.48825696110725403, "learning_rate": 1.7025290347208225e-05, "loss": 0.5073, "step": 23845 }, { "epoch": 0.5057368878708829, "grad_norm": 0.407523512840271, "learning_rate": 1.7025053009659288e-05, "loss": 0.4804, "step": 23846 }, { "epoch": 0.505758096328816, "grad_norm": 0.4933997690677643, "learning_rate": 1.702481566429715e-05, "loss": 0.4702, "step": 23847 }, { "epoch": 0.5057793047867489, "grad_norm": 0.3418499827384949, "learning_rate": 1.7024578311122072e-05, "loss": 0.4417, "step": 23848 }, { "epoch": 0.505800513244682, "grad_norm": 0.39286020398139954, "learning_rate": 1.7024340950134318e-05, "loss": 0.5288, "step": 23849 }, { "epoch": 0.505821721702615, "grad_norm": 0.3491438925266266, "learning_rate": 1.702410358133415e-05, "loss": 0.4675, "step": 23850 }, { "epoch": 0.505842930160548, "grad_norm": 0.3304291069507599, "learning_rate": 1.702386620472184e-05, "loss": 0.4559, "step": 23851 }, { "epoch": 0.505864138618481, "grad_norm": 0.3496127426624298, "learning_rate": 1.7023628820297644e-05, "loss": 0.5417, "step": 23852 }, { "epoch": 0.5058853470764141, "grad_norm": 0.29710355401039124, "learning_rate": 1.702339142806183e-05, "loss": 0.461, "step": 23853 }, { "epoch": 0.505906555534347, "grad_norm": 0.34405943751335144, "learning_rate": 1.702315402801466e-05, "loss": 0.5122, "step": 23854 }, { "epoch": 0.5059277639922801, "grad_norm": 0.401713490486145, "learning_rate": 1.70229166201564e-05, "loss": 0.4784, "step": 23855 }, { "epoch": 0.5059489724502132, "grad_norm": 0.41365891695022583, "learning_rate": 1.7022679204487314e-05, "loss": 0.4933, "step": 23856 }, { "epoch": 0.5059701809081462, "grad_norm": 0.32110050320625305, "learning_rate": 1.7022441781007664e-05, "loss": 0.5002, "step": 23857 }, { "epoch": 0.5059913893660792, "grad_norm": 0.4022311866283417, "learning_rate": 1.7022204349717717e-05, "loss": 0.52, "step": 23858 }, { "epoch": 0.5060125978240122, "grad_norm": 0.3541546165943146, "learning_rate": 1.7021966910617736e-05, "loss": 0.4726, "step": 23859 }, { "epoch": 0.5060338062819453, "grad_norm": 0.37849679589271545, "learning_rate": 1.702172946370798e-05, "loss": 0.554, "step": 23860 }, { "epoch": 0.5060550147398782, "grad_norm": 0.3500203788280487, "learning_rate": 1.702149200898872e-05, "loss": 0.454, "step": 23861 }, { "epoch": 0.5060762231978113, "grad_norm": 0.3451883792877197, "learning_rate": 1.702125454646022e-05, "loss": 0.5372, "step": 23862 }, { "epoch": 0.5060974316557443, "grad_norm": 0.5078198313713074, "learning_rate": 1.7021017076122737e-05, "loss": 0.5277, "step": 23863 }, { "epoch": 0.5061186401136774, "grad_norm": 0.3329896032810211, "learning_rate": 1.7020779597976544e-05, "loss": 0.4974, "step": 23864 }, { "epoch": 0.5061398485716103, "grad_norm": 0.3261415958404541, "learning_rate": 1.7020542112021902e-05, "loss": 0.4583, "step": 23865 }, { "epoch": 0.5061610570295434, "grad_norm": 0.3444364070892334, "learning_rate": 1.7020304618259067e-05, "loss": 0.4798, "step": 23866 }, { "epoch": 0.5061822654874764, "grad_norm": 0.3757239282131195, "learning_rate": 1.7020067116688316e-05, "loss": 0.5237, "step": 23867 }, { "epoch": 0.5062034739454094, "grad_norm": 0.33824992179870605, "learning_rate": 1.7019829607309906e-05, "loss": 0.4685, "step": 23868 }, { "epoch": 0.5062246824033425, "grad_norm": 0.398188978433609, "learning_rate": 1.7019592090124103e-05, "loss": 0.5448, "step": 23869 }, { "epoch": 0.5062458908612755, "grad_norm": 0.41097235679626465, "learning_rate": 1.7019354565131166e-05, "loss": 0.5418, "step": 23870 }, { "epoch": 0.5062670993192085, "grad_norm": 0.3438713550567627, "learning_rate": 1.701911703233137e-05, "loss": 0.4912, "step": 23871 }, { "epoch": 0.5062883077771415, "grad_norm": 0.3808785676956177, "learning_rate": 1.701887949172497e-05, "loss": 0.5623, "step": 23872 }, { "epoch": 0.5063095162350746, "grad_norm": 0.3035070300102234, "learning_rate": 1.7018641943312233e-05, "loss": 0.4539, "step": 23873 }, { "epoch": 0.5063307246930076, "grad_norm": 0.39949968457221985, "learning_rate": 1.7018404387093427e-05, "loss": 0.5693, "step": 23874 }, { "epoch": 0.5063519331509406, "grad_norm": 0.351199746131897, "learning_rate": 1.701816682306881e-05, "loss": 0.4865, "step": 23875 }, { "epoch": 0.5063731416088736, "grad_norm": 0.4019261300563812, "learning_rate": 1.7017929251238644e-05, "loss": 0.4592, "step": 23876 }, { "epoch": 0.5063943500668067, "grad_norm": 0.33300521969795227, "learning_rate": 1.7017691671603205e-05, "loss": 0.4636, "step": 23877 }, { "epoch": 0.5064155585247396, "grad_norm": 0.33220118284225464, "learning_rate": 1.7017454084162747e-05, "loss": 0.4913, "step": 23878 }, { "epoch": 0.5064367669826727, "grad_norm": 0.3133813738822937, "learning_rate": 1.701721648891754e-05, "loss": 0.4326, "step": 23879 }, { "epoch": 0.5064579754406057, "grad_norm": 0.37405434250831604, "learning_rate": 1.7016978885867842e-05, "loss": 0.4524, "step": 23880 }, { "epoch": 0.5064791838985387, "grad_norm": 0.4754702150821686, "learning_rate": 1.7016741275013923e-05, "loss": 0.5067, "step": 23881 }, { "epoch": 0.5065003923564717, "grad_norm": 0.44620856642723083, "learning_rate": 1.701650365635605e-05, "loss": 0.4523, "step": 23882 }, { "epoch": 0.5065216008144048, "grad_norm": 0.40135061740875244, "learning_rate": 1.7016266029894478e-05, "loss": 0.6474, "step": 23883 }, { "epoch": 0.5065428092723379, "grad_norm": 0.39637067914009094, "learning_rate": 1.7016028395629477e-05, "loss": 0.5215, "step": 23884 }, { "epoch": 0.5065640177302708, "grad_norm": 0.3291856348514557, "learning_rate": 1.7015790753561306e-05, "loss": 0.4787, "step": 23885 }, { "epoch": 0.5065852261882039, "grad_norm": 0.36285921931266785, "learning_rate": 1.7015553103690238e-05, "loss": 0.5013, "step": 23886 }, { "epoch": 0.5066064346461369, "grad_norm": 0.3705821931362152, "learning_rate": 1.701531544601653e-05, "loss": 0.4515, "step": 23887 }, { "epoch": 0.5066276431040699, "grad_norm": 0.44699355959892273, "learning_rate": 1.701507778054045e-05, "loss": 0.5367, "step": 23888 }, { "epoch": 0.5066488515620029, "grad_norm": 0.4439098536968231, "learning_rate": 1.7014840107262263e-05, "loss": 0.5044, "step": 23889 }, { "epoch": 0.506670060019936, "grad_norm": 0.3550218641757965, "learning_rate": 1.7014602426182232e-05, "loss": 0.4638, "step": 23890 }, { "epoch": 0.5066912684778689, "grad_norm": 0.35030603408813477, "learning_rate": 1.701436473730062e-05, "loss": 0.4571, "step": 23891 }, { "epoch": 0.506712476935802, "grad_norm": 0.5322487950325012, "learning_rate": 1.7014127040617694e-05, "loss": 0.6096, "step": 23892 }, { "epoch": 0.506733685393735, "grad_norm": 0.3833467662334442, "learning_rate": 1.7013889336133712e-05, "loss": 0.534, "step": 23893 }, { "epoch": 0.506754893851668, "grad_norm": 0.3573286235332489, "learning_rate": 1.701365162384895e-05, "loss": 0.4127, "step": 23894 }, { "epoch": 0.506776102309601, "grad_norm": 0.44521045684814453, "learning_rate": 1.7013413903763658e-05, "loss": 0.5056, "step": 23895 }, { "epoch": 0.5067973107675341, "grad_norm": 0.37327998876571655, "learning_rate": 1.7013176175878114e-05, "loss": 0.4558, "step": 23896 }, { "epoch": 0.5068185192254672, "grad_norm": 0.3278709948062897, "learning_rate": 1.7012938440192575e-05, "loss": 0.5189, "step": 23897 }, { "epoch": 0.5068397276834001, "grad_norm": 0.32920345664024353, "learning_rate": 1.7012700696707307e-05, "loss": 0.5427, "step": 23898 }, { "epoch": 0.5068609361413332, "grad_norm": 0.3308176100254059, "learning_rate": 1.7012462945422573e-05, "loss": 0.4313, "step": 23899 }, { "epoch": 0.5068821445992662, "grad_norm": 0.3710384666919708, "learning_rate": 1.7012225186338637e-05, "loss": 0.5502, "step": 23900 }, { "epoch": 0.5069033530571992, "grad_norm": 0.34475910663604736, "learning_rate": 1.7011987419455767e-05, "loss": 0.5106, "step": 23901 }, { "epoch": 0.5069245615151322, "grad_norm": 0.362039715051651, "learning_rate": 1.7011749644774228e-05, "loss": 0.508, "step": 23902 }, { "epoch": 0.5069457699730653, "grad_norm": 0.37048161029815674, "learning_rate": 1.701151186229428e-05, "loss": 0.5696, "step": 23903 }, { "epoch": 0.5069669784309982, "grad_norm": 0.35276293754577637, "learning_rate": 1.7011274072016188e-05, "loss": 0.5352, "step": 23904 }, { "epoch": 0.5069881868889313, "grad_norm": 0.37493109703063965, "learning_rate": 1.7011036273940222e-05, "loss": 0.4834, "step": 23905 }, { "epoch": 0.5070093953468643, "grad_norm": 0.33990299701690674, "learning_rate": 1.7010798468066637e-05, "loss": 0.4659, "step": 23906 }, { "epoch": 0.5070306038047974, "grad_norm": 1.2778013944625854, "learning_rate": 1.7010560654395703e-05, "loss": 0.4924, "step": 23907 }, { "epoch": 0.5070518122627303, "grad_norm": 0.3743440806865692, "learning_rate": 1.7010322832927686e-05, "loss": 0.5331, "step": 23908 }, { "epoch": 0.5070730207206634, "grad_norm": 0.33300554752349854, "learning_rate": 1.7010085003662852e-05, "loss": 0.4701, "step": 23909 }, { "epoch": 0.5070942291785965, "grad_norm": 0.3643559515476227, "learning_rate": 1.700984716660146e-05, "loss": 0.4748, "step": 23910 }, { "epoch": 0.5071154376365294, "grad_norm": 0.3425655663013458, "learning_rate": 1.7009609321743776e-05, "loss": 0.6306, "step": 23911 }, { "epoch": 0.5071366460944625, "grad_norm": 0.36651280522346497, "learning_rate": 1.7009371469090066e-05, "loss": 0.5647, "step": 23912 }, { "epoch": 0.5071578545523955, "grad_norm": 0.40789180994033813, "learning_rate": 1.7009133608640593e-05, "loss": 0.5177, "step": 23913 }, { "epoch": 0.5071790630103286, "grad_norm": 0.310336709022522, "learning_rate": 1.7008895740395625e-05, "loss": 0.5304, "step": 23914 }, { "epoch": 0.5072002714682615, "grad_norm": 0.3255733549594879, "learning_rate": 1.700865786435542e-05, "loss": 0.5399, "step": 23915 }, { "epoch": 0.5072214799261946, "grad_norm": 0.40281298756599426, "learning_rate": 1.700841998052025e-05, "loss": 0.4363, "step": 23916 }, { "epoch": 0.5072426883841276, "grad_norm": 0.37167227268218994, "learning_rate": 1.7008182088890377e-05, "loss": 0.5436, "step": 23917 }, { "epoch": 0.5072638968420606, "grad_norm": 0.351531445980072, "learning_rate": 1.700794418946606e-05, "loss": 0.4648, "step": 23918 }, { "epoch": 0.5072851052999936, "grad_norm": 0.3495105504989624, "learning_rate": 1.7007706282247573e-05, "loss": 0.483, "step": 23919 }, { "epoch": 0.5073063137579267, "grad_norm": 0.33531108498573303, "learning_rate": 1.7007468367235175e-05, "loss": 0.4663, "step": 23920 }, { "epoch": 0.5073275222158596, "grad_norm": 0.34143924713134766, "learning_rate": 1.7007230444429132e-05, "loss": 0.4883, "step": 23921 }, { "epoch": 0.5073487306737927, "grad_norm": 0.4024932384490967, "learning_rate": 1.7006992513829705e-05, "loss": 0.521, "step": 23922 }, { "epoch": 0.5073699391317257, "grad_norm": 0.3581543564796448, "learning_rate": 1.7006754575437164e-05, "loss": 0.55, "step": 23923 }, { "epoch": 0.5073911475896588, "grad_norm": 0.35923388600349426, "learning_rate": 1.7006516629251772e-05, "loss": 0.515, "step": 23924 }, { "epoch": 0.5074123560475918, "grad_norm": 0.46517959237098694, "learning_rate": 1.7006278675273788e-05, "loss": 0.5305, "step": 23925 }, { "epoch": 0.5074335645055248, "grad_norm": 0.3846279978752136, "learning_rate": 1.7006040713503487e-05, "loss": 0.5246, "step": 23926 }, { "epoch": 0.5074547729634579, "grad_norm": 0.33729347586631775, "learning_rate": 1.7005802743941127e-05, "loss": 0.4038, "step": 23927 }, { "epoch": 0.5074759814213908, "grad_norm": 0.34913069009780884, "learning_rate": 1.7005564766586976e-05, "loss": 0.5443, "step": 23928 }, { "epoch": 0.5074971898793239, "grad_norm": 0.5069276094436646, "learning_rate": 1.7005326781441297e-05, "loss": 0.469, "step": 23929 }, { "epoch": 0.5075183983372569, "grad_norm": 0.3495362102985382, "learning_rate": 1.700508878850435e-05, "loss": 0.5133, "step": 23930 }, { "epoch": 0.50753960679519, "grad_norm": 0.37334033846855164, "learning_rate": 1.7004850787776406e-05, "loss": 0.5766, "step": 23931 }, { "epoch": 0.5075608152531229, "grad_norm": 0.33715155720710754, "learning_rate": 1.700461277925773e-05, "loss": 0.5215, "step": 23932 }, { "epoch": 0.507582023711056, "grad_norm": 0.4183861315250397, "learning_rate": 1.7004374762948584e-05, "loss": 0.5397, "step": 23933 }, { "epoch": 0.507603232168989, "grad_norm": 0.2970009446144104, "learning_rate": 1.7004136738849232e-05, "loss": 0.4739, "step": 23934 }, { "epoch": 0.507624440626922, "grad_norm": 0.3224962055683136, "learning_rate": 1.7003898706959942e-05, "loss": 0.475, "step": 23935 }, { "epoch": 0.507645649084855, "grad_norm": 0.3391035795211792, "learning_rate": 1.7003660667280974e-05, "loss": 0.5043, "step": 23936 }, { "epoch": 0.5076668575427881, "grad_norm": 0.34945034980773926, "learning_rate": 1.7003422619812596e-05, "loss": 0.4931, "step": 23937 }, { "epoch": 0.5076880660007211, "grad_norm": 0.38237136602401733, "learning_rate": 1.7003184564555072e-05, "loss": 0.4987, "step": 23938 }, { "epoch": 0.5077092744586541, "grad_norm": 0.38762933015823364, "learning_rate": 1.7002946501508667e-05, "loss": 0.5526, "step": 23939 }, { "epoch": 0.5077304829165872, "grad_norm": 0.5663357973098755, "learning_rate": 1.7002708430673645e-05, "loss": 0.5696, "step": 23940 }, { "epoch": 0.5077516913745201, "grad_norm": 0.3660379648208618, "learning_rate": 1.7002470352050274e-05, "loss": 0.4865, "step": 23941 }, { "epoch": 0.5077728998324532, "grad_norm": 0.3532770276069641, "learning_rate": 1.7002232265638815e-05, "loss": 0.4931, "step": 23942 }, { "epoch": 0.5077941082903862, "grad_norm": 0.3445926308631897, "learning_rate": 1.7001994171439534e-05, "loss": 0.5434, "step": 23943 }, { "epoch": 0.5078153167483193, "grad_norm": 0.360652357339859, "learning_rate": 1.7001756069452694e-05, "loss": 0.5095, "step": 23944 }, { "epoch": 0.5078365252062522, "grad_norm": 0.35710540413856506, "learning_rate": 1.7001517959678568e-05, "loss": 0.4688, "step": 23945 }, { "epoch": 0.5078577336641853, "grad_norm": 0.3319423794746399, "learning_rate": 1.7001279842117407e-05, "loss": 0.4177, "step": 23946 }, { "epoch": 0.5078789421221183, "grad_norm": 0.3879842758178711, "learning_rate": 1.700104171676949e-05, "loss": 0.4947, "step": 23947 }, { "epoch": 0.5079001505800513, "grad_norm": 0.36488550901412964, "learning_rate": 1.700080358363507e-05, "loss": 0.5693, "step": 23948 }, { "epoch": 0.5079213590379843, "grad_norm": 0.36767250299453735, "learning_rate": 1.700056544271442e-05, "loss": 0.538, "step": 23949 }, { "epoch": 0.5079425674959174, "grad_norm": 0.3320056200027466, "learning_rate": 1.70003272940078e-05, "loss": 0.478, "step": 23950 }, { "epoch": 0.5079637759538504, "grad_norm": 0.34298139810562134, "learning_rate": 1.7000089137515477e-05, "loss": 0.4996, "step": 23951 }, { "epoch": 0.5079849844117834, "grad_norm": 0.31611528992652893, "learning_rate": 1.699985097323772e-05, "loss": 0.5016, "step": 23952 }, { "epoch": 0.5080061928697165, "grad_norm": 0.3248741328716278, "learning_rate": 1.6999612801174782e-05, "loss": 0.4859, "step": 23953 }, { "epoch": 0.5080274013276495, "grad_norm": 0.39220261573791504, "learning_rate": 1.699937462132694e-05, "loss": 0.4928, "step": 23954 }, { "epoch": 0.5080486097855825, "grad_norm": 0.439992219209671, "learning_rate": 1.6999136433694455e-05, "loss": 0.5818, "step": 23955 }, { "epoch": 0.5080698182435155, "grad_norm": 0.46963080763816833, "learning_rate": 1.699889823827759e-05, "loss": 0.4802, "step": 23956 }, { "epoch": 0.5080910267014486, "grad_norm": 0.39645084738731384, "learning_rate": 1.699866003507661e-05, "loss": 0.5967, "step": 23957 }, { "epoch": 0.5081122351593815, "grad_norm": 0.3241472542285919, "learning_rate": 1.6998421824091785e-05, "loss": 0.4385, "step": 23958 }, { "epoch": 0.5081334436173146, "grad_norm": 0.31786325573921204, "learning_rate": 1.699818360532337e-05, "loss": 0.5346, "step": 23959 }, { "epoch": 0.5081546520752476, "grad_norm": 0.5167014598846436, "learning_rate": 1.699794537877164e-05, "loss": 0.5498, "step": 23960 }, { "epoch": 0.5081758605331806, "grad_norm": 0.7072324752807617, "learning_rate": 1.699770714443686e-05, "loss": 0.447, "step": 23961 }, { "epoch": 0.5081970689911136, "grad_norm": 0.37233832478523254, "learning_rate": 1.6997468902319283e-05, "loss": 0.5163, "step": 23962 }, { "epoch": 0.5082182774490467, "grad_norm": 0.3760254979133606, "learning_rate": 1.699723065241918e-05, "loss": 0.5733, "step": 23963 }, { "epoch": 0.5082394859069796, "grad_norm": 0.4268667995929718, "learning_rate": 1.6996992394736827e-05, "loss": 0.4892, "step": 23964 }, { "epoch": 0.5082606943649127, "grad_norm": 0.3855462670326233, "learning_rate": 1.6996754129272478e-05, "loss": 0.5144, "step": 23965 }, { "epoch": 0.5082819028228458, "grad_norm": 0.46684029698371887, "learning_rate": 1.6996515856026398e-05, "loss": 0.4565, "step": 23966 }, { "epoch": 0.5083031112807788, "grad_norm": 0.3716457188129425, "learning_rate": 1.6996277574998852e-05, "loss": 0.5223, "step": 23967 }, { "epoch": 0.5083243197387118, "grad_norm": 0.3479277491569519, "learning_rate": 1.699603928619011e-05, "loss": 0.4891, "step": 23968 }, { "epoch": 0.5083455281966448, "grad_norm": 0.334115207195282, "learning_rate": 1.699580098960043e-05, "loss": 0.5129, "step": 23969 }, { "epoch": 0.5083667366545779, "grad_norm": 0.34288063645362854, "learning_rate": 1.6995562685230084e-05, "loss": 0.4673, "step": 23970 }, { "epoch": 0.5083879451125108, "grad_norm": 0.3892131745815277, "learning_rate": 1.6995324373079333e-05, "loss": 0.4854, "step": 23971 }, { "epoch": 0.5084091535704439, "grad_norm": 0.4076315760612488, "learning_rate": 1.6995086053148445e-05, "loss": 0.5264, "step": 23972 }, { "epoch": 0.5084303620283769, "grad_norm": 0.3093315064907074, "learning_rate": 1.699484772543768e-05, "loss": 0.418, "step": 23973 }, { "epoch": 0.50845157048631, "grad_norm": 0.4568336606025696, "learning_rate": 1.6994609389947307e-05, "loss": 0.5151, "step": 23974 }, { "epoch": 0.5084727789442429, "grad_norm": 0.32478705048561096, "learning_rate": 1.6994371046677592e-05, "loss": 0.418, "step": 23975 }, { "epoch": 0.508493987402176, "grad_norm": 0.3450486660003662, "learning_rate": 1.6994132695628794e-05, "loss": 0.492, "step": 23976 }, { "epoch": 0.508515195860109, "grad_norm": 0.367423415184021, "learning_rate": 1.6993894336801185e-05, "loss": 0.4571, "step": 23977 }, { "epoch": 0.508536404318042, "grad_norm": 0.3241652250289917, "learning_rate": 1.6993655970195027e-05, "loss": 0.4285, "step": 23978 }, { "epoch": 0.5085576127759751, "grad_norm": 0.3482360541820526, "learning_rate": 1.6993417595810587e-05, "loss": 0.5266, "step": 23979 }, { "epoch": 0.5085788212339081, "grad_norm": 0.3652661144733429, "learning_rate": 1.6993179213648128e-05, "loss": 0.432, "step": 23980 }, { "epoch": 0.5086000296918411, "grad_norm": 0.34241920709609985, "learning_rate": 1.6992940823707914e-05, "loss": 0.4831, "step": 23981 }, { "epoch": 0.5086212381497741, "grad_norm": 0.3466145396232605, "learning_rate": 1.6992702425990213e-05, "loss": 0.5006, "step": 23982 }, { "epoch": 0.5086424466077072, "grad_norm": 0.33944040536880493, "learning_rate": 1.6992464020495292e-05, "loss": 0.4939, "step": 23983 }, { "epoch": 0.5086636550656402, "grad_norm": 0.30616241693496704, "learning_rate": 1.699222560722341e-05, "loss": 0.4416, "step": 23984 }, { "epoch": 0.5086848635235732, "grad_norm": 0.3693360388278961, "learning_rate": 1.699198718617484e-05, "loss": 0.551, "step": 23985 }, { "epoch": 0.5087060719815062, "grad_norm": 0.42097094655036926, "learning_rate": 1.6991748757349834e-05, "loss": 0.5213, "step": 23986 }, { "epoch": 0.5087272804394393, "grad_norm": 0.3666554391384125, "learning_rate": 1.6991510320748676e-05, "loss": 0.5332, "step": 23987 }, { "epoch": 0.5087484888973722, "grad_norm": 0.3177017867565155, "learning_rate": 1.6991271876371612e-05, "loss": 0.4325, "step": 23988 }, { "epoch": 0.5087696973553053, "grad_norm": 0.3514336347579956, "learning_rate": 1.699103342421892e-05, "loss": 0.5778, "step": 23989 }, { "epoch": 0.5087909058132383, "grad_norm": 0.3394243121147156, "learning_rate": 1.6990794964290863e-05, "loss": 0.4682, "step": 23990 }, { "epoch": 0.5088121142711713, "grad_norm": 0.3509517312049866, "learning_rate": 1.69905564965877e-05, "loss": 0.5087, "step": 23991 }, { "epoch": 0.5088333227291044, "grad_norm": 0.3478221297264099, "learning_rate": 1.6990318021109706e-05, "loss": 0.509, "step": 23992 }, { "epoch": 0.5088545311870374, "grad_norm": 0.3221423625946045, "learning_rate": 1.6990079537857137e-05, "loss": 0.4386, "step": 23993 }, { "epoch": 0.5088757396449705, "grad_norm": 0.35509490966796875, "learning_rate": 1.6989841046830266e-05, "loss": 0.4726, "step": 23994 }, { "epoch": 0.5088969481029034, "grad_norm": 0.3922939598560333, "learning_rate": 1.698960254802935e-05, "loss": 0.424, "step": 23995 }, { "epoch": 0.5089181565608365, "grad_norm": 0.4601665437221527, "learning_rate": 1.6989364041454665e-05, "loss": 0.541, "step": 23996 }, { "epoch": 0.5089393650187695, "grad_norm": 0.3431468904018402, "learning_rate": 1.6989125527106463e-05, "loss": 0.5126, "step": 23997 }, { "epoch": 0.5089605734767025, "grad_norm": 0.3320397734642029, "learning_rate": 1.6988887004985023e-05, "loss": 0.4962, "step": 23998 }, { "epoch": 0.5089817819346355, "grad_norm": 0.3572269678115845, "learning_rate": 1.69886484750906e-05, "loss": 0.5232, "step": 23999 }, { "epoch": 0.5090029903925686, "grad_norm": 0.3689531981945038, "learning_rate": 1.698840993742346e-05, "loss": 0.5665, "step": 24000 }, { "epoch": 0.5090241988505015, "grad_norm": 0.3500406742095947, "learning_rate": 1.6988171391983875e-05, "loss": 0.4626, "step": 24001 }, { "epoch": 0.5090454073084346, "grad_norm": 0.414652556180954, "learning_rate": 1.6987932838772104e-05, "loss": 0.4552, "step": 24002 }, { "epoch": 0.5090666157663676, "grad_norm": 0.514643132686615, "learning_rate": 1.698769427778842e-05, "loss": 0.4809, "step": 24003 }, { "epoch": 0.5090878242243007, "grad_norm": 0.41715070605278015, "learning_rate": 1.6987455709033076e-05, "loss": 0.4787, "step": 24004 }, { "epoch": 0.5091090326822336, "grad_norm": 0.36072438955307007, "learning_rate": 1.698721713250635e-05, "loss": 0.4528, "step": 24005 }, { "epoch": 0.5091302411401667, "grad_norm": 0.509743869304657, "learning_rate": 1.69869785482085e-05, "loss": 0.6321, "step": 24006 }, { "epoch": 0.5091514495980998, "grad_norm": 0.32037439942359924, "learning_rate": 1.6986739956139794e-05, "loss": 0.4129, "step": 24007 }, { "epoch": 0.5091726580560327, "grad_norm": 0.6813808083534241, "learning_rate": 1.6986501356300493e-05, "loss": 0.4401, "step": 24008 }, { "epoch": 0.5091938665139658, "grad_norm": 0.33602792024612427, "learning_rate": 1.698626274869087e-05, "loss": 0.4479, "step": 24009 }, { "epoch": 0.5092150749718988, "grad_norm": 0.8071281909942627, "learning_rate": 1.6986024133311184e-05, "loss": 0.5107, "step": 24010 }, { "epoch": 0.5092362834298318, "grad_norm": 0.32700616121292114, "learning_rate": 1.6985785510161705e-05, "loss": 0.4182, "step": 24011 }, { "epoch": 0.5092574918877648, "grad_norm": 0.37245669960975647, "learning_rate": 1.6985546879242694e-05, "loss": 0.4773, "step": 24012 }, { "epoch": 0.5092787003456979, "grad_norm": 0.39681485295295715, "learning_rate": 1.6985308240554416e-05, "loss": 0.5242, "step": 24013 }, { "epoch": 0.5092999088036309, "grad_norm": 0.34324875473976135, "learning_rate": 1.6985069594097142e-05, "loss": 0.4813, "step": 24014 }, { "epoch": 0.5093211172615639, "grad_norm": 0.349731981754303, "learning_rate": 1.6984830939871135e-05, "loss": 0.4544, "step": 24015 }, { "epoch": 0.5093423257194969, "grad_norm": 0.3642917573451996, "learning_rate": 1.6984592277876658e-05, "loss": 0.4406, "step": 24016 }, { "epoch": 0.50936353417743, "grad_norm": 0.3295716345310211, "learning_rate": 1.698435360811398e-05, "loss": 0.4688, "step": 24017 }, { "epoch": 0.5093847426353629, "grad_norm": 0.3518664538860321, "learning_rate": 1.6984114930583363e-05, "loss": 0.4681, "step": 24018 }, { "epoch": 0.509405951093296, "grad_norm": 0.3541351854801178, "learning_rate": 1.6983876245285074e-05, "loss": 0.5278, "step": 24019 }, { "epoch": 0.5094271595512291, "grad_norm": 0.4039244055747986, "learning_rate": 1.6983637552219382e-05, "loss": 0.5449, "step": 24020 }, { "epoch": 0.509448368009162, "grad_norm": 0.4154355823993683, "learning_rate": 1.6983398851386543e-05, "loss": 0.5298, "step": 24021 }, { "epoch": 0.5094695764670951, "grad_norm": 0.34447750449180603, "learning_rate": 1.6983160142786832e-05, "loss": 0.4959, "step": 24022 }, { "epoch": 0.5094907849250281, "grad_norm": 0.3132549524307251, "learning_rate": 1.698292142642051e-05, "loss": 0.4637, "step": 24023 }, { "epoch": 0.5095119933829612, "grad_norm": 0.3857247531414032, "learning_rate": 1.6982682702287844e-05, "loss": 0.5, "step": 24024 }, { "epoch": 0.5095332018408941, "grad_norm": 0.41561663150787354, "learning_rate": 1.6982443970389096e-05, "loss": 0.5327, "step": 24025 }, { "epoch": 0.5095544102988272, "grad_norm": 0.35912543535232544, "learning_rate": 1.698220523072454e-05, "loss": 0.5911, "step": 24026 }, { "epoch": 0.5095756187567602, "grad_norm": 0.32674285769462585, "learning_rate": 1.6981966483294433e-05, "loss": 0.4892, "step": 24027 }, { "epoch": 0.5095968272146932, "grad_norm": 0.4434579312801361, "learning_rate": 1.6981727728099043e-05, "loss": 0.5409, "step": 24028 }, { "epoch": 0.5096180356726262, "grad_norm": 0.30523553490638733, "learning_rate": 1.6981488965138638e-05, "loss": 0.4349, "step": 24029 }, { "epoch": 0.5096392441305593, "grad_norm": 0.312114953994751, "learning_rate": 1.6981250194413478e-05, "loss": 0.4453, "step": 24030 }, { "epoch": 0.5096604525884922, "grad_norm": 0.3390597105026245, "learning_rate": 1.6981011415923835e-05, "loss": 0.5802, "step": 24031 }, { "epoch": 0.5096816610464253, "grad_norm": 0.410908043384552, "learning_rate": 1.6980772629669973e-05, "loss": 0.4678, "step": 24032 }, { "epoch": 0.5097028695043584, "grad_norm": 0.36148199439048767, "learning_rate": 1.6980533835652156e-05, "loss": 0.515, "step": 24033 }, { "epoch": 0.5097240779622914, "grad_norm": 0.33226945996284485, "learning_rate": 1.6980295033870646e-05, "loss": 0.5603, "step": 24034 }, { "epoch": 0.5097452864202244, "grad_norm": 0.3745843768119812, "learning_rate": 1.6980056224325715e-05, "loss": 0.5261, "step": 24035 }, { "epoch": 0.5097664948781574, "grad_norm": 0.3763401210308075, "learning_rate": 1.6979817407017627e-05, "loss": 0.5118, "step": 24036 }, { "epoch": 0.5097877033360905, "grad_norm": 0.31611931324005127, "learning_rate": 1.6979578581946647e-05, "loss": 0.4487, "step": 24037 }, { "epoch": 0.5098089117940234, "grad_norm": 0.599380373954773, "learning_rate": 1.6979339749113037e-05, "loss": 0.4948, "step": 24038 }, { "epoch": 0.5098301202519565, "grad_norm": 0.4580881595611572, "learning_rate": 1.697910090851707e-05, "loss": 0.4827, "step": 24039 }, { "epoch": 0.5098513287098895, "grad_norm": 0.3109760284423828, "learning_rate": 1.6978862060159004e-05, "loss": 0.4709, "step": 24040 }, { "epoch": 0.5098725371678225, "grad_norm": 0.5057511329650879, "learning_rate": 1.697862320403911e-05, "loss": 0.5851, "step": 24041 }, { "epoch": 0.5098937456257555, "grad_norm": 0.3830801248550415, "learning_rate": 1.6978384340157653e-05, "loss": 0.5377, "step": 24042 }, { "epoch": 0.5099149540836886, "grad_norm": 0.3316431939601898, "learning_rate": 1.6978145468514898e-05, "loss": 0.4484, "step": 24043 }, { "epoch": 0.5099361625416216, "grad_norm": 0.3430261015892029, "learning_rate": 1.697790658911111e-05, "loss": 0.5519, "step": 24044 }, { "epoch": 0.5099573709995546, "grad_norm": 0.497887521982193, "learning_rate": 1.6977667701946552e-05, "loss": 0.5414, "step": 24045 }, { "epoch": 0.5099785794574876, "grad_norm": 0.3250488042831421, "learning_rate": 1.6977428807021494e-05, "loss": 0.4712, "step": 24046 }, { "epoch": 0.5099997879154207, "grad_norm": 0.3793598413467407, "learning_rate": 1.69771899043362e-05, "loss": 0.5167, "step": 24047 }, { "epoch": 0.5100209963733537, "grad_norm": 0.4099922180175781, "learning_rate": 1.6976950993890934e-05, "loss": 0.5522, "step": 24048 }, { "epoch": 0.5100422048312867, "grad_norm": 0.37349802255630493, "learning_rate": 1.6976712075685967e-05, "loss": 0.5205, "step": 24049 }, { "epoch": 0.5100634132892198, "grad_norm": 0.3531222641468048, "learning_rate": 1.6976473149721563e-05, "loss": 0.5163, "step": 24050 }, { "epoch": 0.5100846217471527, "grad_norm": 0.36229023337364197, "learning_rate": 1.6976234215997984e-05, "loss": 0.5306, "step": 24051 }, { "epoch": 0.5101058302050858, "grad_norm": 0.3497246205806732, "learning_rate": 1.69759952745155e-05, "loss": 0.483, "step": 24052 }, { "epoch": 0.5101270386630188, "grad_norm": 0.42509594559669495, "learning_rate": 1.6975756325274368e-05, "loss": 0.6095, "step": 24053 }, { "epoch": 0.5101482471209519, "grad_norm": 0.31354600191116333, "learning_rate": 1.6975517368274865e-05, "loss": 0.4789, "step": 24054 }, { "epoch": 0.5101694555788848, "grad_norm": 0.3356640934944153, "learning_rate": 1.6975278403517253e-05, "loss": 0.5103, "step": 24055 }, { "epoch": 0.5101906640368179, "grad_norm": 0.37756338715553284, "learning_rate": 1.6975039431001798e-05, "loss": 0.5444, "step": 24056 }, { "epoch": 0.5102118724947509, "grad_norm": 0.34028327465057373, "learning_rate": 1.697480045072876e-05, "loss": 0.5264, "step": 24057 }, { "epoch": 0.5102330809526839, "grad_norm": 0.3644777238368988, "learning_rate": 1.6974561462698414e-05, "loss": 0.4841, "step": 24058 }, { "epoch": 0.5102542894106169, "grad_norm": 0.34847837686538696, "learning_rate": 1.6974322466911016e-05, "loss": 0.5633, "step": 24059 }, { "epoch": 0.51027549786855, "grad_norm": 0.33816415071487427, "learning_rate": 1.6974083463366844e-05, "loss": 0.5617, "step": 24060 }, { "epoch": 0.510296706326483, "grad_norm": 0.3617038428783417, "learning_rate": 1.6973844452066153e-05, "loss": 0.4676, "step": 24061 }, { "epoch": 0.510317914784416, "grad_norm": 0.376295804977417, "learning_rate": 1.6973605433009216e-05, "loss": 0.5061, "step": 24062 }, { "epoch": 0.5103391232423491, "grad_norm": 0.3685571849346161, "learning_rate": 1.697336640619629e-05, "loss": 0.5569, "step": 24063 }, { "epoch": 0.510360331700282, "grad_norm": 0.4165017604827881, "learning_rate": 1.697312737162765e-05, "loss": 0.4404, "step": 24064 }, { "epoch": 0.5103815401582151, "grad_norm": 0.410562664270401, "learning_rate": 1.697288832930356e-05, "loss": 0.5663, "step": 24065 }, { "epoch": 0.5104027486161481, "grad_norm": 0.3565189242362976, "learning_rate": 1.697264927922428e-05, "loss": 0.5061, "step": 24066 }, { "epoch": 0.5104239570740812, "grad_norm": 0.3715647757053375, "learning_rate": 1.697241022139008e-05, "loss": 0.5038, "step": 24067 }, { "epoch": 0.5104451655320141, "grad_norm": 0.3260492980480194, "learning_rate": 1.6972171155801232e-05, "loss": 0.5, "step": 24068 }, { "epoch": 0.5104663739899472, "grad_norm": 0.5237982273101807, "learning_rate": 1.697193208245799e-05, "loss": 0.5047, "step": 24069 }, { "epoch": 0.5104875824478802, "grad_norm": 0.3509705662727356, "learning_rate": 1.6971693001360626e-05, "loss": 0.5512, "step": 24070 }, { "epoch": 0.5105087909058132, "grad_norm": 0.3660627603530884, "learning_rate": 1.697145391250941e-05, "loss": 0.5708, "step": 24071 }, { "epoch": 0.5105299993637462, "grad_norm": 0.356381356716156, "learning_rate": 1.6971214815904598e-05, "loss": 0.5868, "step": 24072 }, { "epoch": 0.5105512078216793, "grad_norm": 0.35658034682273865, "learning_rate": 1.6970975711546465e-05, "loss": 0.5261, "step": 24073 }, { "epoch": 0.5105724162796124, "grad_norm": 0.3459807336330414, "learning_rate": 1.697073659943527e-05, "loss": 0.4476, "step": 24074 }, { "epoch": 0.5105936247375453, "grad_norm": 0.3562619388103485, "learning_rate": 1.6970497479571286e-05, "loss": 0.4806, "step": 24075 }, { "epoch": 0.5106148331954784, "grad_norm": 0.3565114140510559, "learning_rate": 1.6970258351954774e-05, "loss": 0.4821, "step": 24076 }, { "epoch": 0.5106360416534114, "grad_norm": 0.3508813977241516, "learning_rate": 1.6970019216586003e-05, "loss": 0.584, "step": 24077 }, { "epoch": 0.5106572501113444, "grad_norm": 0.49002954363822937, "learning_rate": 1.6969780073465238e-05, "loss": 0.5019, "step": 24078 }, { "epoch": 0.5106784585692774, "grad_norm": 0.3352416753768921, "learning_rate": 1.696954092259274e-05, "loss": 0.5102, "step": 24079 }, { "epoch": 0.5106996670272105, "grad_norm": 0.33357974886894226, "learning_rate": 1.6969301763968776e-05, "loss": 0.5291, "step": 24080 }, { "epoch": 0.5107208754851434, "grad_norm": 0.324860543012619, "learning_rate": 1.6969062597593626e-05, "loss": 0.4883, "step": 24081 }, { "epoch": 0.5107420839430765, "grad_norm": 0.4035913050174713, "learning_rate": 1.6968823423467534e-05, "loss": 0.4724, "step": 24082 }, { "epoch": 0.5107632924010095, "grad_norm": 0.3512118458747864, "learning_rate": 1.6968584241590783e-05, "loss": 0.5227, "step": 24083 }, { "epoch": 0.5107845008589426, "grad_norm": 0.3311575651168823, "learning_rate": 1.6968345051963633e-05, "loss": 0.4971, "step": 24084 }, { "epoch": 0.5108057093168755, "grad_norm": 0.4181104302406311, "learning_rate": 1.6968105854586348e-05, "loss": 0.4557, "step": 24085 }, { "epoch": 0.5108269177748086, "grad_norm": 0.415290892124176, "learning_rate": 1.6967866649459196e-05, "loss": 0.4645, "step": 24086 }, { "epoch": 0.5108481262327417, "grad_norm": 0.31282028555870056, "learning_rate": 1.6967627436582445e-05, "loss": 0.453, "step": 24087 }, { "epoch": 0.5108693346906746, "grad_norm": 0.36805903911590576, "learning_rate": 1.6967388215956363e-05, "loss": 0.4322, "step": 24088 }, { "epoch": 0.5108905431486077, "grad_norm": 0.31653207540512085, "learning_rate": 1.6967148987581206e-05, "loss": 0.4919, "step": 24089 }, { "epoch": 0.5109117516065407, "grad_norm": 0.344723641872406, "learning_rate": 1.696690975145725e-05, "loss": 0.4338, "step": 24090 }, { "epoch": 0.5109329600644738, "grad_norm": 0.39065369963645935, "learning_rate": 1.6966670507584754e-05, "loss": 0.4896, "step": 24091 }, { "epoch": 0.5109541685224067, "grad_norm": 0.3185330331325531, "learning_rate": 1.696643125596399e-05, "loss": 0.4511, "step": 24092 }, { "epoch": 0.5109753769803398, "grad_norm": 0.3109932839870453, "learning_rate": 1.6966191996595224e-05, "loss": 0.4283, "step": 24093 }, { "epoch": 0.5109965854382728, "grad_norm": 0.3198055326938629, "learning_rate": 1.6965952729478715e-05, "loss": 0.4403, "step": 24094 }, { "epoch": 0.5110177938962058, "grad_norm": 0.36802154779434204, "learning_rate": 1.6965713454614737e-05, "loss": 0.5163, "step": 24095 }, { "epoch": 0.5110390023541388, "grad_norm": 0.4173874855041504, "learning_rate": 1.6965474172003556e-05, "loss": 0.4491, "step": 24096 }, { "epoch": 0.5110602108120719, "grad_norm": 0.35052651166915894, "learning_rate": 1.696523488164543e-05, "loss": 0.4433, "step": 24097 }, { "epoch": 0.5110814192700048, "grad_norm": 0.2994253635406494, "learning_rate": 1.696499558354063e-05, "loss": 0.4352, "step": 24098 }, { "epoch": 0.5111026277279379, "grad_norm": 0.4464426040649414, "learning_rate": 1.696475627768942e-05, "loss": 0.4488, "step": 24099 }, { "epoch": 0.5111238361858709, "grad_norm": 0.40115535259246826, "learning_rate": 1.6964516964092078e-05, "loss": 0.4855, "step": 24100 }, { "epoch": 0.511145044643804, "grad_norm": 0.36962059140205383, "learning_rate": 1.6964277642748855e-05, "loss": 0.5472, "step": 24101 }, { "epoch": 0.511166253101737, "grad_norm": 0.3580581247806549, "learning_rate": 1.6964038313660023e-05, "loss": 0.452, "step": 24102 }, { "epoch": 0.51118746155967, "grad_norm": 0.3733231723308563, "learning_rate": 1.696379897682585e-05, "loss": 0.5306, "step": 24103 }, { "epoch": 0.5112086700176031, "grad_norm": 0.33142247796058655, "learning_rate": 1.6963559632246597e-05, "loss": 0.5145, "step": 24104 }, { "epoch": 0.511229878475536, "grad_norm": 0.34511467814445496, "learning_rate": 1.6963320279922534e-05, "loss": 0.5017, "step": 24105 }, { "epoch": 0.5112510869334691, "grad_norm": 0.33204638957977295, "learning_rate": 1.696308091985393e-05, "loss": 0.4777, "step": 24106 }, { "epoch": 0.5112722953914021, "grad_norm": 0.33671456575393677, "learning_rate": 1.6962841552041043e-05, "loss": 0.56, "step": 24107 }, { "epoch": 0.5112935038493351, "grad_norm": 0.4732075333595276, "learning_rate": 1.6962602176484147e-05, "loss": 0.5186, "step": 24108 }, { "epoch": 0.5113147123072681, "grad_norm": 0.3237072229385376, "learning_rate": 1.6962362793183507e-05, "loss": 0.4348, "step": 24109 }, { "epoch": 0.5113359207652012, "grad_norm": 0.34849393367767334, "learning_rate": 1.6962123402139386e-05, "loss": 0.5298, "step": 24110 }, { "epoch": 0.5113571292231341, "grad_norm": 0.3341308534145355, "learning_rate": 1.696188400335205e-05, "loss": 0.504, "step": 24111 }, { "epoch": 0.5113783376810672, "grad_norm": 0.3527941107749939, "learning_rate": 1.696164459682177e-05, "loss": 0.4114, "step": 24112 }, { "epoch": 0.5113995461390002, "grad_norm": 0.3752477467060089, "learning_rate": 1.6961405182548807e-05, "loss": 0.458, "step": 24113 }, { "epoch": 0.5114207545969333, "grad_norm": 0.33076250553131104, "learning_rate": 1.696116576053343e-05, "loss": 0.5161, "step": 24114 }, { "epoch": 0.5114419630548663, "grad_norm": 0.39261680841445923, "learning_rate": 1.6960926330775907e-05, "loss": 0.5088, "step": 24115 }, { "epoch": 0.5114631715127993, "grad_norm": 0.34148839116096497, "learning_rate": 1.69606868932765e-05, "loss": 0.4837, "step": 24116 }, { "epoch": 0.5114843799707324, "grad_norm": 0.34109199047088623, "learning_rate": 1.696044744803548e-05, "loss": 0.46, "step": 24117 }, { "epoch": 0.5115055884286653, "grad_norm": 0.34630605578422546, "learning_rate": 1.696020799505311e-05, "loss": 0.4891, "step": 24118 }, { "epoch": 0.5115267968865984, "grad_norm": 0.3378058075904846, "learning_rate": 1.6959968534329653e-05, "loss": 0.4242, "step": 24119 }, { "epoch": 0.5115480053445314, "grad_norm": 0.305425763130188, "learning_rate": 1.6959729065865383e-05, "loss": 0.4114, "step": 24120 }, { "epoch": 0.5115692138024645, "grad_norm": 0.3650916814804077, "learning_rate": 1.6959489589660562e-05, "loss": 0.484, "step": 24121 }, { "epoch": 0.5115904222603974, "grad_norm": 0.38012799620628357, "learning_rate": 1.695925010571546e-05, "loss": 0.5191, "step": 24122 }, { "epoch": 0.5116116307183305, "grad_norm": 0.5261760354042053, "learning_rate": 1.6959010614030337e-05, "loss": 0.4457, "step": 24123 }, { "epoch": 0.5116328391762635, "grad_norm": 0.40877625346183777, "learning_rate": 1.6958771114605464e-05, "loss": 0.5747, "step": 24124 }, { "epoch": 0.5116540476341965, "grad_norm": 0.4271441400051117, "learning_rate": 1.6958531607441108e-05, "loss": 0.4943, "step": 24125 }, { "epoch": 0.5116752560921295, "grad_norm": 0.3691461384296417, "learning_rate": 1.695829209253753e-05, "loss": 0.4095, "step": 24126 }, { "epoch": 0.5116964645500626, "grad_norm": 0.331216961145401, "learning_rate": 1.6958052569895e-05, "loss": 0.4964, "step": 24127 }, { "epoch": 0.5117176730079956, "grad_norm": 0.36450380086898804, "learning_rate": 1.6957813039513787e-05, "loss": 0.5207, "step": 24128 }, { "epoch": 0.5117388814659286, "grad_norm": 0.30665937066078186, "learning_rate": 1.6957573501394154e-05, "loss": 0.4708, "step": 24129 }, { "epoch": 0.5117600899238617, "grad_norm": 0.47309184074401855, "learning_rate": 1.6957333955536368e-05, "loss": 0.5173, "step": 24130 }, { "epoch": 0.5117812983817946, "grad_norm": 0.33510205149650574, "learning_rate": 1.6957094401940695e-05, "loss": 0.578, "step": 24131 }, { "epoch": 0.5118025068397277, "grad_norm": 0.3225564956665039, "learning_rate": 1.6956854840607404e-05, "loss": 0.4427, "step": 24132 }, { "epoch": 0.5118237152976607, "grad_norm": 0.3540536165237427, "learning_rate": 1.6956615271536758e-05, "loss": 0.4856, "step": 24133 }, { "epoch": 0.5118449237555938, "grad_norm": 0.38366076350212097, "learning_rate": 1.6956375694729024e-05, "loss": 0.4713, "step": 24134 }, { "epoch": 0.5118661322135267, "grad_norm": 0.37998491525650024, "learning_rate": 1.6956136110184472e-05, "loss": 0.4553, "step": 24135 }, { "epoch": 0.5118873406714598, "grad_norm": 0.3074820935726166, "learning_rate": 1.6955896517903365e-05, "loss": 0.4636, "step": 24136 }, { "epoch": 0.5119085491293928, "grad_norm": 0.35489538311958313, "learning_rate": 1.695565691788597e-05, "loss": 0.5539, "step": 24137 }, { "epoch": 0.5119297575873258, "grad_norm": 0.3718009889125824, "learning_rate": 1.6955417310132552e-05, "loss": 0.4752, "step": 24138 }, { "epoch": 0.5119509660452588, "grad_norm": 0.3655658960342407, "learning_rate": 1.6955177694643382e-05, "loss": 0.5059, "step": 24139 }, { "epoch": 0.5119721745031919, "grad_norm": 0.32000529766082764, "learning_rate": 1.695493807141872e-05, "loss": 0.4726, "step": 24140 }, { "epoch": 0.5119933829611248, "grad_norm": 0.3984990119934082, "learning_rate": 1.695469844045884e-05, "loss": 0.5283, "step": 24141 }, { "epoch": 0.5120145914190579, "grad_norm": 0.3641442358493805, "learning_rate": 1.6954458801764003e-05, "loss": 0.5071, "step": 24142 }, { "epoch": 0.512035799876991, "grad_norm": 0.3453551232814789, "learning_rate": 1.695421915533448e-05, "loss": 0.498, "step": 24143 }, { "epoch": 0.512057008334924, "grad_norm": 0.6131729483604431, "learning_rate": 1.6953979501170533e-05, "loss": 0.5017, "step": 24144 }, { "epoch": 0.512078216792857, "grad_norm": 0.3312472999095917, "learning_rate": 1.6953739839272428e-05, "loss": 0.5346, "step": 24145 }, { "epoch": 0.51209942525079, "grad_norm": 0.3344484865665436, "learning_rate": 1.6953500169640438e-05, "loss": 0.5245, "step": 24146 }, { "epoch": 0.5121206337087231, "grad_norm": 0.33239248394966125, "learning_rate": 1.6953260492274823e-05, "loss": 0.5266, "step": 24147 }, { "epoch": 0.512141842166656, "grad_norm": 0.42217040061950684, "learning_rate": 1.6953020807175856e-05, "loss": 0.4353, "step": 24148 }, { "epoch": 0.5121630506245891, "grad_norm": 0.38557907938957214, "learning_rate": 1.6952781114343792e-05, "loss": 0.4918, "step": 24149 }, { "epoch": 0.5121842590825221, "grad_norm": 0.3581673502922058, "learning_rate": 1.695254141377891e-05, "loss": 0.4935, "step": 24150 }, { "epoch": 0.5122054675404552, "grad_norm": 0.3946520686149597, "learning_rate": 1.6952301705481473e-05, "loss": 0.5113, "step": 24151 }, { "epoch": 0.5122266759983881, "grad_norm": 0.3407593369483948, "learning_rate": 1.6952061989451745e-05, "loss": 0.555, "step": 24152 }, { "epoch": 0.5122478844563212, "grad_norm": 0.4162907004356384, "learning_rate": 1.6951822265689993e-05, "loss": 0.5559, "step": 24153 }, { "epoch": 0.5122690929142542, "grad_norm": 0.34823721647262573, "learning_rate": 1.6951582534196486e-05, "loss": 0.4911, "step": 24154 }, { "epoch": 0.5122903013721872, "grad_norm": 0.3812665343284607, "learning_rate": 1.695134279497149e-05, "loss": 0.4884, "step": 24155 }, { "epoch": 0.5123115098301203, "grad_norm": 0.5020971894264221, "learning_rate": 1.6951103048015268e-05, "loss": 0.5381, "step": 24156 }, { "epoch": 0.5123327182880533, "grad_norm": 0.35120776295661926, "learning_rate": 1.6950863293328095e-05, "loss": 0.5041, "step": 24157 }, { "epoch": 0.5123539267459863, "grad_norm": 0.38304418325424194, "learning_rate": 1.695062353091023e-05, "loss": 0.5572, "step": 24158 }, { "epoch": 0.5123751352039193, "grad_norm": 0.35753780603408813, "learning_rate": 1.6950383760761937e-05, "loss": 0.55, "step": 24159 }, { "epoch": 0.5123963436618524, "grad_norm": 0.3041490316390991, "learning_rate": 1.6950143982883493e-05, "loss": 0.4217, "step": 24160 }, { "epoch": 0.5124175521197853, "grad_norm": 0.3277336657047272, "learning_rate": 1.694990419727516e-05, "loss": 0.4689, "step": 24161 }, { "epoch": 0.5124387605777184, "grad_norm": 0.3530193269252777, "learning_rate": 1.6949664403937198e-05, "loss": 0.449, "step": 24162 }, { "epoch": 0.5124599690356514, "grad_norm": 0.34335091710090637, "learning_rate": 1.6949424602869882e-05, "loss": 0.5344, "step": 24163 }, { "epoch": 0.5124811774935845, "grad_norm": 0.5471710562705994, "learning_rate": 1.6949184794073476e-05, "loss": 0.3819, "step": 24164 }, { "epoch": 0.5125023859515174, "grad_norm": 0.3522590100765228, "learning_rate": 1.6948944977548253e-05, "loss": 0.498, "step": 24165 }, { "epoch": 0.5125235944094505, "grad_norm": 0.3454212546348572, "learning_rate": 1.6948705153294467e-05, "loss": 0.527, "step": 24166 }, { "epoch": 0.5125448028673835, "grad_norm": 0.3319118916988373, "learning_rate": 1.6948465321312394e-05, "loss": 0.4847, "step": 24167 }, { "epoch": 0.5125660113253165, "grad_norm": 0.3396573066711426, "learning_rate": 1.6948225481602297e-05, "loss": 0.5043, "step": 24168 }, { "epoch": 0.5125872197832496, "grad_norm": 0.31711673736572266, "learning_rate": 1.6947985634164443e-05, "loss": 0.5277, "step": 24169 }, { "epoch": 0.5126084282411826, "grad_norm": 0.3368145227432251, "learning_rate": 1.6947745778999105e-05, "loss": 0.4773, "step": 24170 }, { "epoch": 0.5126296366991157, "grad_norm": 0.2801728844642639, "learning_rate": 1.694750591610654e-05, "loss": 0.3501, "step": 24171 }, { "epoch": 0.5126508451570486, "grad_norm": 0.3341038227081299, "learning_rate": 1.694726604548702e-05, "loss": 0.5089, "step": 24172 }, { "epoch": 0.5126720536149817, "grad_norm": 0.321409672498703, "learning_rate": 1.6947026167140814e-05, "loss": 0.5493, "step": 24173 }, { "epoch": 0.5126932620729147, "grad_norm": 0.35307157039642334, "learning_rate": 1.6946786281068182e-05, "loss": 0.4687, "step": 24174 }, { "epoch": 0.5127144705308477, "grad_norm": 0.31561192870140076, "learning_rate": 1.6946546387269393e-05, "loss": 0.4453, "step": 24175 }, { "epoch": 0.5127356789887807, "grad_norm": 0.3448420763015747, "learning_rate": 1.694630648574472e-05, "loss": 0.4876, "step": 24176 }, { "epoch": 0.5127568874467138, "grad_norm": 0.36925992369651794, "learning_rate": 1.6946066576494426e-05, "loss": 0.4703, "step": 24177 }, { "epoch": 0.5127780959046467, "grad_norm": 0.32548144459724426, "learning_rate": 1.6945826659518773e-05, "loss": 0.5121, "step": 24178 }, { "epoch": 0.5127993043625798, "grad_norm": 0.3168368637561798, "learning_rate": 1.694558673481803e-05, "loss": 0.4538, "step": 24179 }, { "epoch": 0.5128205128205128, "grad_norm": 0.6239485740661621, "learning_rate": 1.6945346802392472e-05, "loss": 0.525, "step": 24180 }, { "epoch": 0.5128417212784459, "grad_norm": 0.3658601939678192, "learning_rate": 1.6945106862242357e-05, "loss": 0.4869, "step": 24181 }, { "epoch": 0.5128629297363788, "grad_norm": 0.36405208706855774, "learning_rate": 1.6944866914367955e-05, "loss": 0.4946, "step": 24182 }, { "epoch": 0.5128841381943119, "grad_norm": 0.3281669616699219, "learning_rate": 1.6944626958769534e-05, "loss": 0.4825, "step": 24183 }, { "epoch": 0.512905346652245, "grad_norm": 0.3288736641407013, "learning_rate": 1.6944386995447355e-05, "loss": 0.4733, "step": 24184 }, { "epoch": 0.5129265551101779, "grad_norm": 0.3515680730342865, "learning_rate": 1.6944147024401692e-05, "loss": 0.5074, "step": 24185 }, { "epoch": 0.512947763568111, "grad_norm": 0.3329757750034332, "learning_rate": 1.6943907045632807e-05, "loss": 0.5853, "step": 24186 }, { "epoch": 0.512968972026044, "grad_norm": 0.34258556365966797, "learning_rate": 1.694366705914097e-05, "loss": 0.4498, "step": 24187 }, { "epoch": 0.512990180483977, "grad_norm": 0.3721481263637543, "learning_rate": 1.694342706492645e-05, "loss": 0.5372, "step": 24188 }, { "epoch": 0.51301138894191, "grad_norm": 0.34163907170295715, "learning_rate": 1.6943187062989505e-05, "loss": 0.5673, "step": 24189 }, { "epoch": 0.5130325973998431, "grad_norm": 0.4940662384033203, "learning_rate": 1.694294705333041e-05, "loss": 0.4349, "step": 24190 }, { "epoch": 0.513053805857776, "grad_norm": 0.4994544982910156, "learning_rate": 1.694270703594943e-05, "loss": 0.5574, "step": 24191 }, { "epoch": 0.5130750143157091, "grad_norm": 0.4194124937057495, "learning_rate": 1.6942467010846832e-05, "loss": 0.4956, "step": 24192 }, { "epoch": 0.5130962227736421, "grad_norm": 0.3167189955711365, "learning_rate": 1.6942226978022882e-05, "loss": 0.4497, "step": 24193 }, { "epoch": 0.5131174312315752, "grad_norm": 0.3302208483219147, "learning_rate": 1.6941986937477848e-05, "loss": 0.4428, "step": 24194 }, { "epoch": 0.5131386396895081, "grad_norm": 0.36252740025520325, "learning_rate": 1.6941746889211993e-05, "loss": 0.425, "step": 24195 }, { "epoch": 0.5131598481474412, "grad_norm": 0.3313008248806, "learning_rate": 1.6941506833225592e-05, "loss": 0.4525, "step": 24196 }, { "epoch": 0.5131810566053743, "grad_norm": 0.44784119725227356, "learning_rate": 1.6941266769518906e-05, "loss": 0.5236, "step": 24197 }, { "epoch": 0.5132022650633072, "grad_norm": 0.34254127740859985, "learning_rate": 1.6941026698092202e-05, "loss": 0.5315, "step": 24198 }, { "epoch": 0.5132234735212403, "grad_norm": 0.3319684565067291, "learning_rate": 1.694078661894575e-05, "loss": 0.4517, "step": 24199 }, { "epoch": 0.5132446819791733, "grad_norm": 0.40230900049209595, "learning_rate": 1.6940546532079816e-05, "loss": 0.5112, "step": 24200 }, { "epoch": 0.5132658904371064, "grad_norm": 0.35167163610458374, "learning_rate": 1.6940306437494666e-05, "loss": 0.5422, "step": 24201 }, { "epoch": 0.5132870988950393, "grad_norm": 0.31765443086624146, "learning_rate": 1.6940066335190562e-05, "loss": 0.477, "step": 24202 }, { "epoch": 0.5133083073529724, "grad_norm": 0.3670811057090759, "learning_rate": 1.6939826225167783e-05, "loss": 0.4893, "step": 24203 }, { "epoch": 0.5133295158109054, "grad_norm": 0.35123467445373535, "learning_rate": 1.6939586107426588e-05, "loss": 0.4766, "step": 24204 }, { "epoch": 0.5133507242688384, "grad_norm": 0.3524448871612549, "learning_rate": 1.693934598196725e-05, "loss": 0.5032, "step": 24205 }, { "epoch": 0.5133719327267714, "grad_norm": 0.31091779470443726, "learning_rate": 1.6939105848790024e-05, "loss": 0.4592, "step": 24206 }, { "epoch": 0.5133931411847045, "grad_norm": 0.42252832651138306, "learning_rate": 1.6938865707895188e-05, "loss": 0.5218, "step": 24207 }, { "epoch": 0.5134143496426374, "grad_norm": 0.41588982939720154, "learning_rate": 1.6938625559283006e-05, "loss": 0.5311, "step": 24208 }, { "epoch": 0.5134355581005705, "grad_norm": 0.354688823223114, "learning_rate": 1.6938385402953742e-05, "loss": 0.479, "step": 24209 }, { "epoch": 0.5134567665585036, "grad_norm": 0.36842215061187744, "learning_rate": 1.693814523890767e-05, "loss": 0.4459, "step": 24210 }, { "epoch": 0.5134779750164365, "grad_norm": 0.43466269969940186, "learning_rate": 1.6937905067145053e-05, "loss": 0.5401, "step": 24211 }, { "epoch": 0.5134991834743696, "grad_norm": 0.3583771586418152, "learning_rate": 1.6937664887666158e-05, "loss": 0.4499, "step": 24212 }, { "epoch": 0.5135203919323026, "grad_norm": 0.3826955556869507, "learning_rate": 1.693742470047125e-05, "loss": 0.477, "step": 24213 }, { "epoch": 0.5135416003902357, "grad_norm": 0.404262900352478, "learning_rate": 1.69371845055606e-05, "loss": 0.4452, "step": 24214 }, { "epoch": 0.5135628088481686, "grad_norm": 0.38871443271636963, "learning_rate": 1.6936944302934473e-05, "loss": 0.4855, "step": 24215 }, { "epoch": 0.5135840173061017, "grad_norm": 0.345237672328949, "learning_rate": 1.693670409259314e-05, "loss": 0.4879, "step": 24216 }, { "epoch": 0.5136052257640347, "grad_norm": 0.32214051485061646, "learning_rate": 1.693646387453686e-05, "loss": 0.5411, "step": 24217 }, { "epoch": 0.5136264342219677, "grad_norm": 0.34253406524658203, "learning_rate": 1.6936223648765907e-05, "loss": 0.4888, "step": 24218 }, { "epoch": 0.5136476426799007, "grad_norm": 0.3556446433067322, "learning_rate": 1.693598341528055e-05, "loss": 0.4932, "step": 24219 }, { "epoch": 0.5136688511378338, "grad_norm": 0.3735678791999817, "learning_rate": 1.6935743174081048e-05, "loss": 0.4916, "step": 24220 }, { "epoch": 0.5136900595957667, "grad_norm": 0.32087257504463196, "learning_rate": 1.6935502925167674e-05, "loss": 0.4104, "step": 24221 }, { "epoch": 0.5137112680536998, "grad_norm": 0.34216341376304626, "learning_rate": 1.6935262668540696e-05, "loss": 0.4763, "step": 24222 }, { "epoch": 0.5137324765116328, "grad_norm": 0.3516525328159332, "learning_rate": 1.6935022404200375e-05, "loss": 0.5084, "step": 24223 }, { "epoch": 0.5137536849695659, "grad_norm": 0.34562352299690247, "learning_rate": 1.6934782132146986e-05, "loss": 0.5013, "step": 24224 }, { "epoch": 0.5137748934274989, "grad_norm": 0.5349461436271667, "learning_rate": 1.693454185238079e-05, "loss": 0.4166, "step": 24225 }, { "epoch": 0.5137961018854319, "grad_norm": 0.3302318751811981, "learning_rate": 1.6934301564902055e-05, "loss": 0.5013, "step": 24226 }, { "epoch": 0.513817310343365, "grad_norm": 0.38729268312454224, "learning_rate": 1.6934061269711056e-05, "loss": 0.5317, "step": 24227 }, { "epoch": 0.5138385188012979, "grad_norm": 0.3291477859020233, "learning_rate": 1.693382096680805e-05, "loss": 0.4712, "step": 24228 }, { "epoch": 0.513859727259231, "grad_norm": 0.35329845547676086, "learning_rate": 1.693358065619331e-05, "loss": 0.5321, "step": 24229 }, { "epoch": 0.513880935717164, "grad_norm": 0.3440024256706238, "learning_rate": 1.6933340337867103e-05, "loss": 0.4923, "step": 24230 }, { "epoch": 0.513902144175097, "grad_norm": 0.3428502082824707, "learning_rate": 1.693310001182969e-05, "loss": 0.5268, "step": 24231 }, { "epoch": 0.51392335263303, "grad_norm": 0.3533422648906708, "learning_rate": 1.693285967808135e-05, "loss": 0.5223, "step": 24232 }, { "epoch": 0.5139445610909631, "grad_norm": 0.3322238326072693, "learning_rate": 1.693261933662234e-05, "loss": 0.4021, "step": 24233 }, { "epoch": 0.5139657695488961, "grad_norm": 0.3668895363807678, "learning_rate": 1.693237898745293e-05, "loss": 0.5532, "step": 24234 }, { "epoch": 0.5139869780068291, "grad_norm": 0.36707234382629395, "learning_rate": 1.693213863057339e-05, "loss": 0.4815, "step": 24235 }, { "epoch": 0.5140081864647621, "grad_norm": 0.3481951653957367, "learning_rate": 1.693189826598399e-05, "loss": 0.4757, "step": 24236 }, { "epoch": 0.5140293949226952, "grad_norm": 0.3403433859348297, "learning_rate": 1.6931657893684986e-05, "loss": 0.5808, "step": 24237 }, { "epoch": 0.5140506033806282, "grad_norm": 0.3398060202598572, "learning_rate": 1.6931417513676655e-05, "loss": 0.5123, "step": 24238 }, { "epoch": 0.5140718118385612, "grad_norm": 0.36003077030181885, "learning_rate": 1.693117712595926e-05, "loss": 0.5448, "step": 24239 }, { "epoch": 0.5140930202964943, "grad_norm": 0.38266029953956604, "learning_rate": 1.6930936730533072e-05, "loss": 0.49, "step": 24240 }, { "epoch": 0.5141142287544272, "grad_norm": 0.32649046182632446, "learning_rate": 1.6930696327398356e-05, "loss": 0.5311, "step": 24241 }, { "epoch": 0.5141354372123603, "grad_norm": 0.3724692761898041, "learning_rate": 1.6930455916555382e-05, "loss": 0.4553, "step": 24242 }, { "epoch": 0.5141566456702933, "grad_norm": 0.33032456040382385, "learning_rate": 1.6930215498004412e-05, "loss": 0.4974, "step": 24243 }, { "epoch": 0.5141778541282264, "grad_norm": 0.3769734799861908, "learning_rate": 1.6929975071745717e-05, "loss": 0.5278, "step": 24244 }, { "epoch": 0.5141990625861593, "grad_norm": 0.40088585019111633, "learning_rate": 1.6929734637779564e-05, "loss": 0.5022, "step": 24245 }, { "epoch": 0.5142202710440924, "grad_norm": 0.3629809021949768, "learning_rate": 1.692949419610622e-05, "loss": 0.4667, "step": 24246 }, { "epoch": 0.5142414795020254, "grad_norm": 0.3307502567768097, "learning_rate": 1.6929253746725953e-05, "loss": 0.4852, "step": 24247 }, { "epoch": 0.5142626879599584, "grad_norm": 0.3544943332672119, "learning_rate": 1.6929013289639032e-05, "loss": 0.4941, "step": 24248 }, { "epoch": 0.5142838964178914, "grad_norm": 0.43778350949287415, "learning_rate": 1.6928772824845722e-05, "loss": 0.5711, "step": 24249 }, { "epoch": 0.5143051048758245, "grad_norm": 0.37609997391700745, "learning_rate": 1.692853235234629e-05, "loss": 0.5521, "step": 24250 }, { "epoch": 0.5143263133337576, "grad_norm": 0.3312770426273346, "learning_rate": 1.6928291872141003e-05, "loss": 0.472, "step": 24251 }, { "epoch": 0.5143475217916905, "grad_norm": 0.34429219365119934, "learning_rate": 1.692805138423013e-05, "loss": 0.4959, "step": 24252 }, { "epoch": 0.5143687302496236, "grad_norm": 0.37294477224349976, "learning_rate": 1.6927810888613942e-05, "loss": 0.5737, "step": 24253 }, { "epoch": 0.5143899387075566, "grad_norm": 0.37656113505363464, "learning_rate": 1.69275703852927e-05, "loss": 0.5432, "step": 24254 }, { "epoch": 0.5144111471654896, "grad_norm": 0.5555508136749268, "learning_rate": 1.6927329874266675e-05, "loss": 0.4988, "step": 24255 }, { "epoch": 0.5144323556234226, "grad_norm": 0.34421616792678833, "learning_rate": 1.6927089355536136e-05, "loss": 0.4761, "step": 24256 }, { "epoch": 0.5144535640813557, "grad_norm": 0.37278062105178833, "learning_rate": 1.6926848829101348e-05, "loss": 0.5106, "step": 24257 }, { "epoch": 0.5144747725392886, "grad_norm": 0.3466072678565979, "learning_rate": 1.6926608294962578e-05, "loss": 0.5116, "step": 24258 }, { "epoch": 0.5144959809972217, "grad_norm": 0.34966418147087097, "learning_rate": 1.692636775312009e-05, "loss": 0.4817, "step": 24259 }, { "epoch": 0.5145171894551547, "grad_norm": 0.4095510244369507, "learning_rate": 1.692612720357416e-05, "loss": 0.4564, "step": 24260 }, { "epoch": 0.5145383979130878, "grad_norm": 0.4834118187427521, "learning_rate": 1.6925886646325053e-05, "loss": 0.5017, "step": 24261 }, { "epoch": 0.5145596063710207, "grad_norm": 0.37597671151161194, "learning_rate": 1.6925646081373034e-05, "loss": 0.5468, "step": 24262 }, { "epoch": 0.5145808148289538, "grad_norm": 0.4977172017097473, "learning_rate": 1.6925405508718372e-05, "loss": 0.4869, "step": 24263 }, { "epoch": 0.5146020232868868, "grad_norm": 0.416737824678421, "learning_rate": 1.6925164928361334e-05, "loss": 0.5937, "step": 24264 }, { "epoch": 0.5146232317448198, "grad_norm": 0.3631339371204376, "learning_rate": 1.692492434030219e-05, "loss": 0.552, "step": 24265 }, { "epoch": 0.5146444402027529, "grad_norm": 0.33253082633018494, "learning_rate": 1.6924683744541196e-05, "loss": 0.4946, "step": 24266 }, { "epoch": 0.5146656486606859, "grad_norm": 0.3519536852836609, "learning_rate": 1.6924443141078637e-05, "loss": 0.5449, "step": 24267 }, { "epoch": 0.514686857118619, "grad_norm": 0.3337816298007965, "learning_rate": 1.692420252991477e-05, "loss": 0.4739, "step": 24268 }, { "epoch": 0.5147080655765519, "grad_norm": 0.3318333923816681, "learning_rate": 1.6923961911049867e-05, "loss": 0.4564, "step": 24269 }, { "epoch": 0.514729274034485, "grad_norm": 0.35806554555892944, "learning_rate": 1.6923721284484194e-05, "loss": 0.5357, "step": 24270 }, { "epoch": 0.514750482492418, "grad_norm": 0.3868769705295563, "learning_rate": 1.6923480650218015e-05, "loss": 0.5315, "step": 24271 }, { "epoch": 0.514771690950351, "grad_norm": 0.3549007773399353, "learning_rate": 1.6923240008251602e-05, "loss": 0.5712, "step": 24272 }, { "epoch": 0.514792899408284, "grad_norm": 0.4124910235404968, "learning_rate": 1.6922999358585223e-05, "loss": 0.5794, "step": 24273 }, { "epoch": 0.5148141078662171, "grad_norm": 0.33831366896629333, "learning_rate": 1.6922758701219144e-05, "loss": 0.5857, "step": 24274 }, { "epoch": 0.51483531632415, "grad_norm": 0.37239712476730347, "learning_rate": 1.6922518036153635e-05, "loss": 0.6174, "step": 24275 }, { "epoch": 0.5148565247820831, "grad_norm": 0.3913726508617401, "learning_rate": 1.6922277363388958e-05, "loss": 0.5451, "step": 24276 }, { "epoch": 0.5148777332400161, "grad_norm": 0.34642139077186584, "learning_rate": 1.6922036682925386e-05, "loss": 0.4531, "step": 24277 }, { "epoch": 0.5148989416979491, "grad_norm": 0.3515939712524414, "learning_rate": 1.6921795994763182e-05, "loss": 0.4446, "step": 24278 }, { "epoch": 0.5149201501558822, "grad_norm": 0.3857630789279938, "learning_rate": 1.692155529890262e-05, "loss": 0.4292, "step": 24279 }, { "epoch": 0.5149413586138152, "grad_norm": 0.37052688002586365, "learning_rate": 1.6921314595343963e-05, "loss": 0.5114, "step": 24280 }, { "epoch": 0.5149625670717483, "grad_norm": 0.3736927807331085, "learning_rate": 1.692107388408748e-05, "loss": 0.5325, "step": 24281 }, { "epoch": 0.5149837755296812, "grad_norm": 0.40857401490211487, "learning_rate": 1.692083316513344e-05, "loss": 0.6276, "step": 24282 }, { "epoch": 0.5150049839876143, "grad_norm": 0.38305285573005676, "learning_rate": 1.692059243848211e-05, "loss": 0.4763, "step": 24283 }, { "epoch": 0.5150261924455473, "grad_norm": 0.35123369097709656, "learning_rate": 1.6920351704133753e-05, "loss": 0.5029, "step": 24284 }, { "epoch": 0.5150474009034803, "grad_norm": 0.3544837534427643, "learning_rate": 1.6920110962088646e-05, "loss": 0.4986, "step": 24285 }, { "epoch": 0.5150686093614133, "grad_norm": 0.3590765595436096, "learning_rate": 1.691987021234705e-05, "loss": 0.5194, "step": 24286 }, { "epoch": 0.5150898178193464, "grad_norm": 0.39474016427993774, "learning_rate": 1.691962945490923e-05, "loss": 0.476, "step": 24287 }, { "epoch": 0.5151110262772793, "grad_norm": 0.42019543051719666, "learning_rate": 1.6919388689775463e-05, "loss": 0.5085, "step": 24288 }, { "epoch": 0.5151322347352124, "grad_norm": 0.41733652353286743, "learning_rate": 1.6919147916946014e-05, "loss": 0.4534, "step": 24289 }, { "epoch": 0.5151534431931454, "grad_norm": 0.3654938340187073, "learning_rate": 1.6918907136421142e-05, "loss": 0.4991, "step": 24290 }, { "epoch": 0.5151746516510785, "grad_norm": 0.32570481300354004, "learning_rate": 1.6918666348201128e-05, "loss": 0.5077, "step": 24291 }, { "epoch": 0.5151958601090115, "grad_norm": 0.3296359181404114, "learning_rate": 1.691842555228623e-05, "loss": 0.4623, "step": 24292 }, { "epoch": 0.5152170685669445, "grad_norm": 0.4485642611980438, "learning_rate": 1.691818474867672e-05, "loss": 0.536, "step": 24293 }, { "epoch": 0.5152382770248776, "grad_norm": 0.3316824734210968, "learning_rate": 1.6917943937372867e-05, "loss": 0.5593, "step": 24294 }, { "epoch": 0.5152594854828105, "grad_norm": 0.3365350365638733, "learning_rate": 1.6917703118374932e-05, "loss": 0.486, "step": 24295 }, { "epoch": 0.5152806939407436, "grad_norm": 0.3923151195049286, "learning_rate": 1.6917462291683193e-05, "loss": 0.5195, "step": 24296 }, { "epoch": 0.5153019023986766, "grad_norm": 0.4347579777240753, "learning_rate": 1.691722145729791e-05, "loss": 0.5675, "step": 24297 }, { "epoch": 0.5153231108566096, "grad_norm": 0.4626774787902832, "learning_rate": 1.6916980615219355e-05, "loss": 0.6343, "step": 24298 }, { "epoch": 0.5153443193145426, "grad_norm": 0.36416807770729065, "learning_rate": 1.691673976544779e-05, "loss": 0.4834, "step": 24299 }, { "epoch": 0.5153655277724757, "grad_norm": 0.3443073034286499, "learning_rate": 1.691649890798349e-05, "loss": 0.4319, "step": 24300 }, { "epoch": 0.5153867362304086, "grad_norm": 0.3393535315990448, "learning_rate": 1.6916258042826722e-05, "loss": 0.5573, "step": 24301 }, { "epoch": 0.5154079446883417, "grad_norm": 0.32049253582954407, "learning_rate": 1.691601716997775e-05, "loss": 0.4679, "step": 24302 }, { "epoch": 0.5154291531462747, "grad_norm": 0.3456656038761139, "learning_rate": 1.691577628943684e-05, "loss": 0.4606, "step": 24303 }, { "epoch": 0.5154503616042078, "grad_norm": 0.36800360679626465, "learning_rate": 1.6915535401204267e-05, "loss": 0.495, "step": 24304 }, { "epoch": 0.5154715700621407, "grad_norm": 0.35618382692337036, "learning_rate": 1.6915294505280297e-05, "loss": 0.5564, "step": 24305 }, { "epoch": 0.5154927785200738, "grad_norm": 0.345499724149704, "learning_rate": 1.6915053601665197e-05, "loss": 0.5134, "step": 24306 }, { "epoch": 0.5155139869780069, "grad_norm": 0.3719995319843292, "learning_rate": 1.6914812690359234e-05, "loss": 0.5253, "step": 24307 }, { "epoch": 0.5155351954359398, "grad_norm": 0.3707355260848999, "learning_rate": 1.6914571771362672e-05, "loss": 0.5762, "step": 24308 }, { "epoch": 0.5155564038938729, "grad_norm": 0.4030649662017822, "learning_rate": 1.6914330844675787e-05, "loss": 0.5081, "step": 24309 }, { "epoch": 0.5155776123518059, "grad_norm": 0.35787054896354675, "learning_rate": 1.6914089910298842e-05, "loss": 0.5642, "step": 24310 }, { "epoch": 0.515598820809739, "grad_norm": 0.34686318039894104, "learning_rate": 1.691384896823211e-05, "loss": 0.4962, "step": 24311 }, { "epoch": 0.5156200292676719, "grad_norm": 0.3241226077079773, "learning_rate": 1.691360801847585e-05, "loss": 0.494, "step": 24312 }, { "epoch": 0.515641237725605, "grad_norm": 0.3409567177295685, "learning_rate": 1.691336706103034e-05, "loss": 0.4767, "step": 24313 }, { "epoch": 0.515662446183538, "grad_norm": 0.35032349824905396, "learning_rate": 1.691312609589584e-05, "loss": 0.4477, "step": 24314 }, { "epoch": 0.515683654641471, "grad_norm": 0.32421883940696716, "learning_rate": 1.691288512307262e-05, "loss": 0.4776, "step": 24315 }, { "epoch": 0.515704863099404, "grad_norm": 0.37269720435142517, "learning_rate": 1.6912644142560953e-05, "loss": 0.5153, "step": 24316 }, { "epoch": 0.5157260715573371, "grad_norm": 0.48473548889160156, "learning_rate": 1.69124031543611e-05, "loss": 0.5137, "step": 24317 }, { "epoch": 0.51574728001527, "grad_norm": 0.34647661447525024, "learning_rate": 1.6912162158473335e-05, "loss": 0.5115, "step": 24318 }, { "epoch": 0.5157684884732031, "grad_norm": 0.40878480672836304, "learning_rate": 1.6911921154897922e-05, "loss": 0.4836, "step": 24319 }, { "epoch": 0.5157896969311362, "grad_norm": 0.33146440982818604, "learning_rate": 1.691168014363513e-05, "loss": 0.3828, "step": 24320 }, { "epoch": 0.5158109053890692, "grad_norm": 0.33426567912101746, "learning_rate": 1.691143912468523e-05, "loss": 0.5478, "step": 24321 }, { "epoch": 0.5158321138470022, "grad_norm": 0.365881085395813, "learning_rate": 1.691119809804848e-05, "loss": 0.5908, "step": 24322 }, { "epoch": 0.5158533223049352, "grad_norm": 0.37003180384635925, "learning_rate": 1.6910957063725165e-05, "loss": 0.521, "step": 24323 }, { "epoch": 0.5158745307628683, "grad_norm": 0.37962377071380615, "learning_rate": 1.6910716021715537e-05, "loss": 0.5895, "step": 24324 }, { "epoch": 0.5158957392208012, "grad_norm": 0.4359201192855835, "learning_rate": 1.6910474972019874e-05, "loss": 0.4876, "step": 24325 }, { "epoch": 0.5159169476787343, "grad_norm": 0.33750513195991516, "learning_rate": 1.691023391463844e-05, "loss": 0.5202, "step": 24326 }, { "epoch": 0.5159381561366673, "grad_norm": 0.34975379705429077, "learning_rate": 1.6909992849571506e-05, "loss": 0.482, "step": 24327 }, { "epoch": 0.5159593645946003, "grad_norm": 0.3462061882019043, "learning_rate": 1.6909751776819335e-05, "loss": 0.4601, "step": 24328 }, { "epoch": 0.5159805730525333, "grad_norm": 0.4678274989128113, "learning_rate": 1.69095106963822e-05, "loss": 0.5573, "step": 24329 }, { "epoch": 0.5160017815104664, "grad_norm": 0.36347514390945435, "learning_rate": 1.6909269608260363e-05, "loss": 0.441, "step": 24330 }, { "epoch": 0.5160229899683993, "grad_norm": 0.35745909810066223, "learning_rate": 1.69090285124541e-05, "loss": 0.5202, "step": 24331 }, { "epoch": 0.5160441984263324, "grad_norm": 0.34917405247688293, "learning_rate": 1.6908787408963676e-05, "loss": 0.5122, "step": 24332 }, { "epoch": 0.5160654068842655, "grad_norm": 0.3905350863933563, "learning_rate": 1.6908546297789356e-05, "loss": 0.4987, "step": 24333 }, { "epoch": 0.5160866153421985, "grad_norm": 0.36498555541038513, "learning_rate": 1.6908305178931413e-05, "loss": 0.5832, "step": 24334 }, { "epoch": 0.5161078238001315, "grad_norm": 0.3669390380382538, "learning_rate": 1.6908064052390112e-05, "loss": 0.4483, "step": 24335 }, { "epoch": 0.5161290322580645, "grad_norm": 0.39580702781677246, "learning_rate": 1.6907822918165725e-05, "loss": 0.4849, "step": 24336 }, { "epoch": 0.5161502407159976, "grad_norm": 0.361244797706604, "learning_rate": 1.690758177625851e-05, "loss": 0.4849, "step": 24337 }, { "epoch": 0.5161714491739305, "grad_norm": 0.3525974452495575, "learning_rate": 1.690734062666875e-05, "loss": 0.5291, "step": 24338 }, { "epoch": 0.5161926576318636, "grad_norm": 0.36444422602653503, "learning_rate": 1.6907099469396704e-05, "loss": 0.5517, "step": 24339 }, { "epoch": 0.5162138660897966, "grad_norm": 0.39249473810195923, "learning_rate": 1.690685830444264e-05, "loss": 0.4727, "step": 24340 }, { "epoch": 0.5162350745477297, "grad_norm": 0.353853315114975, "learning_rate": 1.690661713180683e-05, "loss": 0.4966, "step": 24341 }, { "epoch": 0.5162562830056626, "grad_norm": 0.3532062768936157, "learning_rate": 1.690637595148954e-05, "loss": 0.4053, "step": 24342 }, { "epoch": 0.5162774914635957, "grad_norm": 0.3106303811073303, "learning_rate": 1.6906134763491037e-05, "loss": 0.5225, "step": 24343 }, { "epoch": 0.5162986999215287, "grad_norm": 0.3437125086784363, "learning_rate": 1.690589356781159e-05, "loss": 0.4865, "step": 24344 }, { "epoch": 0.5163199083794617, "grad_norm": 0.37241530418395996, "learning_rate": 1.6905652364451476e-05, "loss": 0.4967, "step": 24345 }, { "epoch": 0.5163411168373947, "grad_norm": 0.36538007855415344, "learning_rate": 1.6905411153410947e-05, "loss": 0.5429, "step": 24346 }, { "epoch": 0.5163623252953278, "grad_norm": 0.8405922055244446, "learning_rate": 1.6905169934690284e-05, "loss": 0.5312, "step": 24347 }, { "epoch": 0.5163835337532608, "grad_norm": 0.33344656229019165, "learning_rate": 1.6904928708289748e-05, "loss": 0.5521, "step": 24348 }, { "epoch": 0.5164047422111938, "grad_norm": 0.4586902856826782, "learning_rate": 1.6904687474209612e-05, "loss": 0.5401, "step": 24349 }, { "epoch": 0.5164259506691269, "grad_norm": 0.30434155464172363, "learning_rate": 1.6904446232450143e-05, "loss": 0.5067, "step": 24350 }, { "epoch": 0.5164471591270599, "grad_norm": 0.35566702485084534, "learning_rate": 1.690420498301161e-05, "loss": 0.5348, "step": 24351 }, { "epoch": 0.5164683675849929, "grad_norm": 0.3376133143901825, "learning_rate": 1.6903963725894278e-05, "loss": 0.4287, "step": 24352 }, { "epoch": 0.5164895760429259, "grad_norm": 0.3491053283214569, "learning_rate": 1.6903722461098416e-05, "loss": 0.5186, "step": 24353 }, { "epoch": 0.516510784500859, "grad_norm": 0.3339567184448242, "learning_rate": 1.6903481188624296e-05, "loss": 0.4555, "step": 24354 }, { "epoch": 0.5165319929587919, "grad_norm": 0.3518167734146118, "learning_rate": 1.6903239908472185e-05, "loss": 0.5463, "step": 24355 }, { "epoch": 0.516553201416725, "grad_norm": 0.3335835337638855, "learning_rate": 1.6902998620642347e-05, "loss": 0.4581, "step": 24356 }, { "epoch": 0.516574409874658, "grad_norm": 0.3471009433269501, "learning_rate": 1.6902757325135056e-05, "loss": 0.5911, "step": 24357 }, { "epoch": 0.516595618332591, "grad_norm": 0.45590370893478394, "learning_rate": 1.690251602195058e-05, "loss": 0.5083, "step": 24358 }, { "epoch": 0.516616826790524, "grad_norm": 0.33207666873931885, "learning_rate": 1.6902274711089185e-05, "loss": 0.5339, "step": 24359 }, { "epoch": 0.5166380352484571, "grad_norm": 0.3829156160354614, "learning_rate": 1.6902033392551143e-05, "loss": 0.4844, "step": 24360 }, { "epoch": 0.5166592437063902, "grad_norm": 0.3611108362674713, "learning_rate": 1.6901792066336713e-05, "loss": 0.5155, "step": 24361 }, { "epoch": 0.5166804521643231, "grad_norm": 0.3613027334213257, "learning_rate": 1.6901550732446172e-05, "loss": 0.533, "step": 24362 }, { "epoch": 0.5167016606222562, "grad_norm": 0.3385714590549469, "learning_rate": 1.6901309390879788e-05, "loss": 0.4519, "step": 24363 }, { "epoch": 0.5167228690801892, "grad_norm": 0.3675207197666168, "learning_rate": 1.6901068041637828e-05, "loss": 0.4885, "step": 24364 }, { "epoch": 0.5167440775381222, "grad_norm": 0.38686633110046387, "learning_rate": 1.6900826684720557e-05, "loss": 0.567, "step": 24365 }, { "epoch": 0.5167652859960552, "grad_norm": 0.3521665334701538, "learning_rate": 1.690058532012825e-05, "loss": 0.522, "step": 24366 }, { "epoch": 0.5167864944539883, "grad_norm": 0.35102781653404236, "learning_rate": 1.6900343947861167e-05, "loss": 0.5863, "step": 24367 }, { "epoch": 0.5168077029119212, "grad_norm": 0.3647853434085846, "learning_rate": 1.6900102567919585e-05, "loss": 0.4952, "step": 24368 }, { "epoch": 0.5168289113698543, "grad_norm": 0.3482424318790436, "learning_rate": 1.689986118030377e-05, "loss": 0.5266, "step": 24369 }, { "epoch": 0.5168501198277873, "grad_norm": 0.3305182456970215, "learning_rate": 1.6899619785013986e-05, "loss": 0.5559, "step": 24370 }, { "epoch": 0.5168713282857204, "grad_norm": 0.3832628130912781, "learning_rate": 1.6899378382050507e-05, "loss": 0.4316, "step": 24371 }, { "epoch": 0.5168925367436533, "grad_norm": 0.3626602292060852, "learning_rate": 1.68991369714136e-05, "loss": 0.5253, "step": 24372 }, { "epoch": 0.5169137452015864, "grad_norm": 0.3493719696998596, "learning_rate": 1.689889555310353e-05, "loss": 0.441, "step": 24373 }, { "epoch": 0.5169349536595195, "grad_norm": 0.3789310157299042, "learning_rate": 1.689865412712057e-05, "loss": 0.4807, "step": 24374 }, { "epoch": 0.5169561621174524, "grad_norm": 0.38993075489997864, "learning_rate": 1.6898412693464986e-05, "loss": 0.5105, "step": 24375 }, { "epoch": 0.5169773705753855, "grad_norm": 0.3475490212440491, "learning_rate": 1.689817125213705e-05, "loss": 0.427, "step": 24376 }, { "epoch": 0.5169985790333185, "grad_norm": 0.34259718656539917, "learning_rate": 1.6897929803137026e-05, "loss": 0.4161, "step": 24377 }, { "epoch": 0.5170197874912515, "grad_norm": 0.36280831694602966, "learning_rate": 1.6897688346465183e-05, "loss": 0.4731, "step": 24378 }, { "epoch": 0.5170409959491845, "grad_norm": 0.4087311327457428, "learning_rate": 1.6897446882121796e-05, "loss": 0.5111, "step": 24379 }, { "epoch": 0.5170622044071176, "grad_norm": 0.3464944362640381, "learning_rate": 1.6897205410107124e-05, "loss": 0.4637, "step": 24380 }, { "epoch": 0.5170834128650506, "grad_norm": 0.3897828161716461, "learning_rate": 1.689696393042144e-05, "loss": 0.4904, "step": 24381 }, { "epoch": 0.5171046213229836, "grad_norm": 0.3193042278289795, "learning_rate": 1.6896722443065012e-05, "loss": 0.4812, "step": 24382 }, { "epoch": 0.5171258297809166, "grad_norm": 0.4745887815952301, "learning_rate": 1.689648094803811e-05, "loss": 0.5204, "step": 24383 }, { "epoch": 0.5171470382388497, "grad_norm": 0.35626134276390076, "learning_rate": 1.6896239445341005e-05, "loss": 0.5773, "step": 24384 }, { "epoch": 0.5171682466967826, "grad_norm": 0.33631080389022827, "learning_rate": 1.689599793497396e-05, "loss": 0.4664, "step": 24385 }, { "epoch": 0.5171894551547157, "grad_norm": 0.32083696126937866, "learning_rate": 1.6895756416937245e-05, "loss": 0.5253, "step": 24386 }, { "epoch": 0.5172106636126487, "grad_norm": 0.35796597599983215, "learning_rate": 1.689551489123113e-05, "loss": 0.4814, "step": 24387 }, { "epoch": 0.5172318720705817, "grad_norm": 0.3072333335876465, "learning_rate": 1.6895273357855882e-05, "loss": 0.4239, "step": 24388 }, { "epoch": 0.5172530805285148, "grad_norm": 0.3315678536891937, "learning_rate": 1.6895031816811772e-05, "loss": 0.5179, "step": 24389 }, { "epoch": 0.5172742889864478, "grad_norm": 0.439438134431839, "learning_rate": 1.689479026809907e-05, "loss": 0.4552, "step": 24390 }, { "epoch": 0.5172954974443809, "grad_norm": 0.3995344042778015, "learning_rate": 1.689454871171804e-05, "loss": 0.4941, "step": 24391 }, { "epoch": 0.5173167059023138, "grad_norm": 0.33416277170181274, "learning_rate": 1.6894307147668954e-05, "loss": 0.4584, "step": 24392 }, { "epoch": 0.5173379143602469, "grad_norm": 0.4046228229999542, "learning_rate": 1.689406557595208e-05, "loss": 0.5186, "step": 24393 }, { "epoch": 0.5173591228181799, "grad_norm": 0.4014247953891754, "learning_rate": 1.6893823996567683e-05, "loss": 0.5247, "step": 24394 }, { "epoch": 0.5173803312761129, "grad_norm": 0.3232157230377197, "learning_rate": 1.6893582409516037e-05, "loss": 0.4658, "step": 24395 }, { "epoch": 0.5174015397340459, "grad_norm": 0.3370423913002014, "learning_rate": 1.6893340814797407e-05, "loss": 0.546, "step": 24396 }, { "epoch": 0.517422748191979, "grad_norm": 0.3491877615451813, "learning_rate": 1.6893099212412065e-05, "loss": 0.489, "step": 24397 }, { "epoch": 0.5174439566499119, "grad_norm": 0.3617490530014038, "learning_rate": 1.689285760236028e-05, "loss": 0.5062, "step": 24398 }, { "epoch": 0.517465165107845, "grad_norm": 0.3694002628326416, "learning_rate": 1.6892615984642312e-05, "loss": 0.5685, "step": 24399 }, { "epoch": 0.517486373565778, "grad_norm": 0.3807823956012726, "learning_rate": 1.689237435925844e-05, "loss": 0.4655, "step": 24400 }, { "epoch": 0.517507582023711, "grad_norm": 0.3108760416507721, "learning_rate": 1.689213272620893e-05, "loss": 0.4877, "step": 24401 }, { "epoch": 0.5175287904816441, "grad_norm": 0.37575626373291016, "learning_rate": 1.689189108549405e-05, "loss": 0.5635, "step": 24402 }, { "epoch": 0.5175499989395771, "grad_norm": 0.31037458777427673, "learning_rate": 1.689164943711407e-05, "loss": 0.418, "step": 24403 }, { "epoch": 0.5175712073975102, "grad_norm": 0.36538276076316833, "learning_rate": 1.6891407781069255e-05, "loss": 0.5752, "step": 24404 }, { "epoch": 0.5175924158554431, "grad_norm": 0.37986820936203003, "learning_rate": 1.6891166117359876e-05, "loss": 0.5336, "step": 24405 }, { "epoch": 0.5176136243133762, "grad_norm": 0.33906471729278564, "learning_rate": 1.6890924445986198e-05, "loss": 0.5224, "step": 24406 }, { "epoch": 0.5176348327713092, "grad_norm": 0.3116832673549652, "learning_rate": 1.68906827669485e-05, "loss": 0.4642, "step": 24407 }, { "epoch": 0.5176560412292422, "grad_norm": 0.3662385940551758, "learning_rate": 1.6890441080247043e-05, "loss": 0.4685, "step": 24408 }, { "epoch": 0.5176772496871752, "grad_norm": 0.47997790575027466, "learning_rate": 1.6890199385882095e-05, "loss": 0.4689, "step": 24409 }, { "epoch": 0.5176984581451083, "grad_norm": 0.32209229469299316, "learning_rate": 1.688995768385393e-05, "loss": 0.4963, "step": 24410 }, { "epoch": 0.5177196666030413, "grad_norm": 0.331805557012558, "learning_rate": 1.6889715974162814e-05, "loss": 0.4967, "step": 24411 }, { "epoch": 0.5177408750609743, "grad_norm": 0.34189674258232117, "learning_rate": 1.688947425680901e-05, "loss": 0.4912, "step": 24412 }, { "epoch": 0.5177620835189073, "grad_norm": 0.3287118673324585, "learning_rate": 1.68892325317928e-05, "loss": 0.5076, "step": 24413 }, { "epoch": 0.5177832919768404, "grad_norm": 0.32813921570777893, "learning_rate": 1.6888990799114443e-05, "loss": 0.4851, "step": 24414 }, { "epoch": 0.5178045004347734, "grad_norm": 0.3396347761154175, "learning_rate": 1.688874905877421e-05, "loss": 0.4405, "step": 24415 }, { "epoch": 0.5178257088927064, "grad_norm": 0.3586401343345642, "learning_rate": 1.6888507310772372e-05, "loss": 0.5996, "step": 24416 }, { "epoch": 0.5178469173506395, "grad_norm": 0.3889777958393097, "learning_rate": 1.688826555510919e-05, "loss": 0.5402, "step": 24417 }, { "epoch": 0.5178681258085724, "grad_norm": 0.35672613978385925, "learning_rate": 1.6888023791784943e-05, "loss": 0.4769, "step": 24418 }, { "epoch": 0.5178893342665055, "grad_norm": 0.3440295159816742, "learning_rate": 1.6887782020799895e-05, "loss": 0.5279, "step": 24419 }, { "epoch": 0.5179105427244385, "grad_norm": 0.33346953988075256, "learning_rate": 1.688754024215432e-05, "loss": 0.4749, "step": 24420 }, { "epoch": 0.5179317511823716, "grad_norm": 0.49594685435295105, "learning_rate": 1.6887298455848475e-05, "loss": 0.5941, "step": 24421 }, { "epoch": 0.5179529596403045, "grad_norm": 0.32853975892066956, "learning_rate": 1.6887056661882644e-05, "loss": 0.4908, "step": 24422 }, { "epoch": 0.5179741680982376, "grad_norm": 0.3438211679458618, "learning_rate": 1.6886814860257084e-05, "loss": 0.4885, "step": 24423 }, { "epoch": 0.5179953765561706, "grad_norm": 0.37285366654396057, "learning_rate": 1.688657305097207e-05, "loss": 0.5693, "step": 24424 }, { "epoch": 0.5180165850141036, "grad_norm": 0.36851465702056885, "learning_rate": 1.6886331234027867e-05, "loss": 0.4419, "step": 24425 }, { "epoch": 0.5180377934720366, "grad_norm": 0.3361597955226898, "learning_rate": 1.6886089409424746e-05, "loss": 0.4559, "step": 24426 }, { "epoch": 0.5180590019299697, "grad_norm": 0.37528395652770996, "learning_rate": 1.688584757716298e-05, "loss": 0.5107, "step": 24427 }, { "epoch": 0.5180802103879028, "grad_norm": 0.3493989109992981, "learning_rate": 1.688560573724283e-05, "loss": 0.4599, "step": 24428 }, { "epoch": 0.5181014188458357, "grad_norm": 0.39390531182289124, "learning_rate": 1.688536388966457e-05, "loss": 0.5597, "step": 24429 }, { "epoch": 0.5181226273037688, "grad_norm": 0.332042932510376, "learning_rate": 1.688512203442847e-05, "loss": 0.492, "step": 24430 }, { "epoch": 0.5181438357617018, "grad_norm": 0.34448668360710144, "learning_rate": 1.6884880171534797e-05, "loss": 0.5308, "step": 24431 }, { "epoch": 0.5181650442196348, "grad_norm": 0.36936336755752563, "learning_rate": 1.6884638300983818e-05, "loss": 0.5636, "step": 24432 }, { "epoch": 0.5181862526775678, "grad_norm": 0.4923154413700104, "learning_rate": 1.6884396422775807e-05, "loss": 0.5561, "step": 24433 }, { "epoch": 0.5182074611355009, "grad_norm": 0.3356402516365051, "learning_rate": 1.6884154536911025e-05, "loss": 0.4837, "step": 24434 }, { "epoch": 0.5182286695934338, "grad_norm": 0.3746882677078247, "learning_rate": 1.6883912643389753e-05, "loss": 0.49, "step": 24435 }, { "epoch": 0.5182498780513669, "grad_norm": 0.3840426206588745, "learning_rate": 1.6883670742212248e-05, "loss": 0.5507, "step": 24436 }, { "epoch": 0.5182710865092999, "grad_norm": 0.35430383682250977, "learning_rate": 1.6883428833378788e-05, "loss": 0.6079, "step": 24437 }, { "epoch": 0.518292294967233, "grad_norm": 0.29458290338516235, "learning_rate": 1.6883186916889634e-05, "loss": 0.4057, "step": 24438 }, { "epoch": 0.5183135034251659, "grad_norm": 0.3609381914138794, "learning_rate": 1.688294499274506e-05, "loss": 0.478, "step": 24439 }, { "epoch": 0.518334711883099, "grad_norm": 0.3388388156890869, "learning_rate": 1.6882703060945337e-05, "loss": 0.6014, "step": 24440 }, { "epoch": 0.518355920341032, "grad_norm": 0.3277880549430847, "learning_rate": 1.688246112149073e-05, "loss": 0.4926, "step": 24441 }, { "epoch": 0.518377128798965, "grad_norm": 0.3155069947242737, "learning_rate": 1.6882219174381513e-05, "loss": 0.4527, "step": 24442 }, { "epoch": 0.5183983372568981, "grad_norm": 0.39009347558021545, "learning_rate": 1.6881977219617946e-05, "loss": 0.4871, "step": 24443 }, { "epoch": 0.5184195457148311, "grad_norm": 0.4269215166568756, "learning_rate": 1.6881735257200308e-05, "loss": 0.525, "step": 24444 }, { "epoch": 0.5184407541727641, "grad_norm": 0.42020732164382935, "learning_rate": 1.688149328712886e-05, "loss": 0.5021, "step": 24445 }, { "epoch": 0.5184619626306971, "grad_norm": 0.45267266035079956, "learning_rate": 1.688125130940388e-05, "loss": 0.5398, "step": 24446 }, { "epoch": 0.5184831710886302, "grad_norm": 0.35638222098350525, "learning_rate": 1.6881009324025628e-05, "loss": 0.4857, "step": 24447 }, { "epoch": 0.5185043795465631, "grad_norm": 0.40710166096687317, "learning_rate": 1.688076733099438e-05, "loss": 0.4876, "step": 24448 }, { "epoch": 0.5185255880044962, "grad_norm": 0.37293800711631775, "learning_rate": 1.68805253303104e-05, "loss": 0.5251, "step": 24449 }, { "epoch": 0.5185467964624292, "grad_norm": 0.34213411808013916, "learning_rate": 1.6880283321973956e-05, "loss": 0.5088, "step": 24450 }, { "epoch": 0.5185680049203623, "grad_norm": 0.3449724018573761, "learning_rate": 1.6880041305985326e-05, "loss": 0.4815, "step": 24451 }, { "epoch": 0.5185892133782952, "grad_norm": 0.3620275855064392, "learning_rate": 1.6879799282344773e-05, "loss": 0.5234, "step": 24452 }, { "epoch": 0.5186104218362283, "grad_norm": 0.3143174350261688, "learning_rate": 1.687955725105257e-05, "loss": 0.3782, "step": 24453 }, { "epoch": 0.5186316302941613, "grad_norm": 0.34477004408836365, "learning_rate": 1.6879315212108977e-05, "loss": 0.5422, "step": 24454 }, { "epoch": 0.5186528387520943, "grad_norm": 0.3598352074623108, "learning_rate": 1.687907316551427e-05, "loss": 0.4982, "step": 24455 }, { "epoch": 0.5186740472100274, "grad_norm": 0.3219492733478546, "learning_rate": 1.687883111126872e-05, "loss": 0.4855, "step": 24456 }, { "epoch": 0.5186952556679604, "grad_norm": 0.378473162651062, "learning_rate": 1.6878589049372593e-05, "loss": 0.4876, "step": 24457 }, { "epoch": 0.5187164641258935, "grad_norm": 0.3808363378047943, "learning_rate": 1.687834697982616e-05, "loss": 0.5628, "step": 24458 }, { "epoch": 0.5187376725838264, "grad_norm": 0.31705188751220703, "learning_rate": 1.687810490262969e-05, "loss": 0.4696, "step": 24459 }, { "epoch": 0.5187588810417595, "grad_norm": 0.3715481460094452, "learning_rate": 1.687786281778345e-05, "loss": 0.4569, "step": 24460 }, { "epoch": 0.5187800894996925, "grad_norm": 0.3727363348007202, "learning_rate": 1.6877620725287713e-05, "loss": 0.4691, "step": 24461 }, { "epoch": 0.5188012979576255, "grad_norm": 0.31974825263023376, "learning_rate": 1.687737862514274e-05, "loss": 0.424, "step": 24462 }, { "epoch": 0.5188225064155585, "grad_norm": 0.3849071264266968, "learning_rate": 1.6877136517348815e-05, "loss": 0.4458, "step": 24463 }, { "epoch": 0.5188437148734916, "grad_norm": 0.39345040917396545, "learning_rate": 1.687689440190619e-05, "loss": 0.5188, "step": 24464 }, { "epoch": 0.5188649233314245, "grad_norm": 0.37036165595054626, "learning_rate": 1.6876652278815148e-05, "loss": 0.5056, "step": 24465 }, { "epoch": 0.5188861317893576, "grad_norm": 0.34293684363365173, "learning_rate": 1.687641014807595e-05, "loss": 0.5263, "step": 24466 }, { "epoch": 0.5189073402472906, "grad_norm": 0.4052608907222748, "learning_rate": 1.6876168009688873e-05, "loss": 0.6094, "step": 24467 }, { "epoch": 0.5189285487052236, "grad_norm": 0.34811463952064514, "learning_rate": 1.6875925863654177e-05, "loss": 0.5124, "step": 24468 }, { "epoch": 0.5189497571631567, "grad_norm": 0.32454243302345276, "learning_rate": 1.6875683709972136e-05, "loss": 0.5261, "step": 24469 }, { "epoch": 0.5189709656210897, "grad_norm": 0.3422263562679291, "learning_rate": 1.6875441548643023e-05, "loss": 0.5033, "step": 24470 }, { "epoch": 0.5189921740790228, "grad_norm": 0.3553815186023712, "learning_rate": 1.68751993796671e-05, "loss": 0.5169, "step": 24471 }, { "epoch": 0.5190133825369557, "grad_norm": 0.3334367871284485, "learning_rate": 1.6874957203044645e-05, "loss": 0.4777, "step": 24472 }, { "epoch": 0.5190345909948888, "grad_norm": 0.37388911843299866, "learning_rate": 1.6874715018775916e-05, "loss": 0.4183, "step": 24473 }, { "epoch": 0.5190557994528218, "grad_norm": 0.3601967394351959, "learning_rate": 1.6874472826861192e-05, "loss": 0.5814, "step": 24474 }, { "epoch": 0.5190770079107548, "grad_norm": 0.3583998680114746, "learning_rate": 1.687423062730074e-05, "loss": 0.485, "step": 24475 }, { "epoch": 0.5190982163686878, "grad_norm": 6.632079124450684, "learning_rate": 1.6873988420094825e-05, "loss": 0.5873, "step": 24476 }, { "epoch": 0.5191194248266209, "grad_norm": 0.3660140633583069, "learning_rate": 1.6873746205243723e-05, "loss": 0.4933, "step": 24477 }, { "epoch": 0.5191406332845538, "grad_norm": 0.35898134112358093, "learning_rate": 1.6873503982747695e-05, "loss": 0.5131, "step": 24478 }, { "epoch": 0.5191618417424869, "grad_norm": 0.35863417387008667, "learning_rate": 1.6873261752607022e-05, "loss": 0.5531, "step": 24479 }, { "epoch": 0.5191830502004199, "grad_norm": 0.35512399673461914, "learning_rate": 1.6873019514821964e-05, "loss": 0.5257, "step": 24480 }, { "epoch": 0.519204258658353, "grad_norm": 0.3309022784233093, "learning_rate": 1.6872777269392794e-05, "loss": 0.429, "step": 24481 }, { "epoch": 0.5192254671162859, "grad_norm": 0.356850802898407, "learning_rate": 1.687253501631978e-05, "loss": 0.4114, "step": 24482 }, { "epoch": 0.519246675574219, "grad_norm": 0.3849973678588867, "learning_rate": 1.6872292755603194e-05, "loss": 0.5543, "step": 24483 }, { "epoch": 0.5192678840321521, "grad_norm": 0.3607647716999054, "learning_rate": 1.6872050487243303e-05, "loss": 0.5242, "step": 24484 }, { "epoch": 0.519289092490085, "grad_norm": 0.3384208381175995, "learning_rate": 1.6871808211240376e-05, "loss": 0.4964, "step": 24485 }, { "epoch": 0.5193103009480181, "grad_norm": 0.48683181405067444, "learning_rate": 1.6871565927594685e-05, "loss": 0.5028, "step": 24486 }, { "epoch": 0.5193315094059511, "grad_norm": 0.33557024598121643, "learning_rate": 1.6871323636306497e-05, "loss": 0.5285, "step": 24487 }, { "epoch": 0.5193527178638842, "grad_norm": 0.3787350654602051, "learning_rate": 1.6871081337376086e-05, "loss": 0.4677, "step": 24488 }, { "epoch": 0.5193739263218171, "grad_norm": 0.349437415599823, "learning_rate": 1.6870839030803713e-05, "loss": 0.4629, "step": 24489 }, { "epoch": 0.5193951347797502, "grad_norm": 0.40636757016181946, "learning_rate": 1.6870596716589656e-05, "loss": 0.5329, "step": 24490 }, { "epoch": 0.5194163432376832, "grad_norm": 0.34752771258354187, "learning_rate": 1.687035439473418e-05, "loss": 0.4626, "step": 24491 }, { "epoch": 0.5194375516956162, "grad_norm": 0.3330979347229004, "learning_rate": 1.6870112065237555e-05, "loss": 0.4409, "step": 24492 }, { "epoch": 0.5194587601535492, "grad_norm": 0.34547534584999084, "learning_rate": 1.686986972810005e-05, "loss": 0.5026, "step": 24493 }, { "epoch": 0.5194799686114823, "grad_norm": 0.3523366451263428, "learning_rate": 1.6869627383321937e-05, "loss": 0.4352, "step": 24494 }, { "epoch": 0.5195011770694152, "grad_norm": 0.3490135073661804, "learning_rate": 1.6869385030903483e-05, "loss": 0.5016, "step": 24495 }, { "epoch": 0.5195223855273483, "grad_norm": 0.4268478751182556, "learning_rate": 1.686914267084496e-05, "loss": 0.4991, "step": 24496 }, { "epoch": 0.5195435939852814, "grad_norm": 0.364474892616272, "learning_rate": 1.6868900303146634e-05, "loss": 0.4792, "step": 24497 }, { "epoch": 0.5195648024432143, "grad_norm": 0.34248462319374084, "learning_rate": 1.686865792780878e-05, "loss": 0.4096, "step": 24498 }, { "epoch": 0.5195860109011474, "grad_norm": 0.3147601783275604, "learning_rate": 1.6868415544831663e-05, "loss": 0.4561, "step": 24499 }, { "epoch": 0.5196072193590804, "grad_norm": 0.38389649987220764, "learning_rate": 1.6868173154215553e-05, "loss": 0.4822, "step": 24500 }, { "epoch": 0.5196284278170135, "grad_norm": 0.35765403509140015, "learning_rate": 1.686793075596072e-05, "loss": 0.4737, "step": 24501 }, { "epoch": 0.5196496362749464, "grad_norm": 0.37200698256492615, "learning_rate": 1.6867688350067436e-05, "loss": 0.5348, "step": 24502 }, { "epoch": 0.5196708447328795, "grad_norm": 0.34618276357650757, "learning_rate": 1.6867445936535968e-05, "loss": 0.4981, "step": 24503 }, { "epoch": 0.5196920531908125, "grad_norm": 0.32634249329566956, "learning_rate": 1.6867203515366587e-05, "loss": 0.5479, "step": 24504 }, { "epoch": 0.5197132616487455, "grad_norm": 0.3622477054595947, "learning_rate": 1.686696108655956e-05, "loss": 0.461, "step": 24505 }, { "epoch": 0.5197344701066785, "grad_norm": 0.3291093409061432, "learning_rate": 1.686671865011516e-05, "loss": 0.4883, "step": 24506 }, { "epoch": 0.5197556785646116, "grad_norm": 0.3398072421550751, "learning_rate": 1.6866476206033656e-05, "loss": 0.5038, "step": 24507 }, { "epoch": 0.5197768870225445, "grad_norm": 0.3025577962398529, "learning_rate": 1.6866233754315313e-05, "loss": 0.4479, "step": 24508 }, { "epoch": 0.5197980954804776, "grad_norm": 0.3454284965991974, "learning_rate": 1.686599129496041e-05, "loss": 0.3793, "step": 24509 }, { "epoch": 0.5198193039384107, "grad_norm": 0.3065987229347229, "learning_rate": 1.686574882796921e-05, "loss": 0.4585, "step": 24510 }, { "epoch": 0.5198405123963437, "grad_norm": 0.3549298942089081, "learning_rate": 1.686550635334198e-05, "loss": 0.5445, "step": 24511 }, { "epoch": 0.5198617208542767, "grad_norm": 0.33863914012908936, "learning_rate": 1.6865263871078994e-05, "loss": 0.4771, "step": 24512 }, { "epoch": 0.5198829293122097, "grad_norm": 0.8629035949707031, "learning_rate": 1.6865021381180523e-05, "loss": 0.4256, "step": 24513 }, { "epoch": 0.5199041377701428, "grad_norm": 0.32453373074531555, "learning_rate": 1.6864778883646835e-05, "loss": 0.471, "step": 24514 }, { "epoch": 0.5199253462280757, "grad_norm": 0.3501061499118805, "learning_rate": 1.6864536378478196e-05, "loss": 0.4549, "step": 24515 }, { "epoch": 0.5199465546860088, "grad_norm": 0.36996376514434814, "learning_rate": 1.6864293865674883e-05, "loss": 0.5453, "step": 24516 }, { "epoch": 0.5199677631439418, "grad_norm": 0.43692052364349365, "learning_rate": 1.6864051345237164e-05, "loss": 0.5581, "step": 24517 }, { "epoch": 0.5199889716018748, "grad_norm": 0.3532302975654602, "learning_rate": 1.6863808817165303e-05, "loss": 0.4902, "step": 24518 }, { "epoch": 0.5200101800598078, "grad_norm": 0.3512994349002838, "learning_rate": 1.6863566281459574e-05, "loss": 0.5079, "step": 24519 }, { "epoch": 0.5200313885177409, "grad_norm": 0.3376225531101227, "learning_rate": 1.6863323738120246e-05, "loss": 0.4729, "step": 24520 }, { "epoch": 0.5200525969756739, "grad_norm": 0.347011923789978, "learning_rate": 1.686308118714759e-05, "loss": 0.5279, "step": 24521 }, { "epoch": 0.5200738054336069, "grad_norm": 0.3540281653404236, "learning_rate": 1.6862838628541877e-05, "loss": 0.4978, "step": 24522 }, { "epoch": 0.5200950138915399, "grad_norm": 0.3415423631668091, "learning_rate": 1.686259606230337e-05, "loss": 0.5151, "step": 24523 }, { "epoch": 0.520116222349473, "grad_norm": 0.3130210041999817, "learning_rate": 1.6862353488432345e-05, "loss": 0.528, "step": 24524 }, { "epoch": 0.520137430807406, "grad_norm": 0.3081137239933014, "learning_rate": 1.6862110906929073e-05, "loss": 0.4441, "step": 24525 }, { "epoch": 0.520158639265339, "grad_norm": 0.3654871881008148, "learning_rate": 1.6861868317793817e-05, "loss": 0.5267, "step": 24526 }, { "epoch": 0.5201798477232721, "grad_norm": 0.7429081797599792, "learning_rate": 1.686162572102685e-05, "loss": 0.5809, "step": 24527 }, { "epoch": 0.520201056181205, "grad_norm": 0.33473527431488037, "learning_rate": 1.6861383116628445e-05, "loss": 0.4989, "step": 24528 }, { "epoch": 0.5202222646391381, "grad_norm": 0.4503649175167084, "learning_rate": 1.6861140504598868e-05, "loss": 0.5091, "step": 24529 }, { "epoch": 0.5202434730970711, "grad_norm": 0.38596636056900024, "learning_rate": 1.686089788493839e-05, "loss": 0.4711, "step": 24530 }, { "epoch": 0.5202646815550042, "grad_norm": 0.3475891947746277, "learning_rate": 1.6860655257647283e-05, "loss": 0.4599, "step": 24531 }, { "epoch": 0.5202858900129371, "grad_norm": 0.374640554189682, "learning_rate": 1.6860412622725814e-05, "loss": 0.5464, "step": 24532 }, { "epoch": 0.5203070984708702, "grad_norm": 0.39100584387779236, "learning_rate": 1.686016998017425e-05, "loss": 0.5691, "step": 24533 }, { "epoch": 0.5203283069288032, "grad_norm": 0.32230690121650696, "learning_rate": 1.685992732999287e-05, "loss": 0.4661, "step": 24534 }, { "epoch": 0.5203495153867362, "grad_norm": 0.39350998401641846, "learning_rate": 1.6859684672181935e-05, "loss": 0.5861, "step": 24535 }, { "epoch": 0.5203707238446692, "grad_norm": 0.38562309741973877, "learning_rate": 1.685944200674172e-05, "loss": 0.5213, "step": 24536 }, { "epoch": 0.5203919323026023, "grad_norm": 0.3148820102214813, "learning_rate": 1.6859199333672492e-05, "loss": 0.4213, "step": 24537 }, { "epoch": 0.5204131407605354, "grad_norm": 0.3439684808254242, "learning_rate": 1.6858956652974523e-05, "loss": 0.4913, "step": 24538 }, { "epoch": 0.5204343492184683, "grad_norm": 0.3436272442340851, "learning_rate": 1.6858713964648084e-05, "loss": 0.5036, "step": 24539 }, { "epoch": 0.5204555576764014, "grad_norm": 0.31709474325180054, "learning_rate": 1.685847126869344e-05, "loss": 0.4876, "step": 24540 }, { "epoch": 0.5204767661343344, "grad_norm": 0.35615047812461853, "learning_rate": 1.6858228565110862e-05, "loss": 0.5543, "step": 24541 }, { "epoch": 0.5204979745922674, "grad_norm": 0.3333158493041992, "learning_rate": 1.6857985853900625e-05, "loss": 0.5124, "step": 24542 }, { "epoch": 0.5205191830502004, "grad_norm": 0.32430902123451233, "learning_rate": 1.6857743135062995e-05, "loss": 0.4804, "step": 24543 }, { "epoch": 0.5205403915081335, "grad_norm": 0.34524884819984436, "learning_rate": 1.685750040859824e-05, "loss": 0.5196, "step": 24544 }, { "epoch": 0.5205615999660664, "grad_norm": 0.34633636474609375, "learning_rate": 1.6857257674506638e-05, "loss": 0.4627, "step": 24545 }, { "epoch": 0.5205828084239995, "grad_norm": 0.3564949035644531, "learning_rate": 1.685701493278845e-05, "loss": 0.5127, "step": 24546 }, { "epoch": 0.5206040168819325, "grad_norm": 0.31959691643714905, "learning_rate": 1.6856772183443953e-05, "loss": 0.491, "step": 24547 }, { "epoch": 0.5206252253398655, "grad_norm": 0.3831186890602112, "learning_rate": 1.6856529426473408e-05, "loss": 0.6074, "step": 24548 }, { "epoch": 0.5206464337977985, "grad_norm": 0.3608410060405731, "learning_rate": 1.6856286661877096e-05, "loss": 0.5003, "step": 24549 }, { "epoch": 0.5206676422557316, "grad_norm": 0.31552496552467346, "learning_rate": 1.6856043889655276e-05, "loss": 0.4834, "step": 24550 }, { "epoch": 0.5206888507136647, "grad_norm": 0.3373660147190094, "learning_rate": 1.6855801109808228e-05, "loss": 0.4746, "step": 24551 }, { "epoch": 0.5207100591715976, "grad_norm": 0.3349967896938324, "learning_rate": 1.6855558322336216e-05, "loss": 0.4358, "step": 24552 }, { "epoch": 0.5207312676295307, "grad_norm": 0.38643985986709595, "learning_rate": 1.6855315527239512e-05, "loss": 0.5556, "step": 24553 }, { "epoch": 0.5207524760874637, "grad_norm": 0.35113659501075745, "learning_rate": 1.6855072724518384e-05, "loss": 0.5223, "step": 24554 }, { "epoch": 0.5207736845453967, "grad_norm": 0.4451735019683838, "learning_rate": 1.6854829914173105e-05, "loss": 0.5908, "step": 24555 }, { "epoch": 0.5207948930033297, "grad_norm": 0.4341809153556824, "learning_rate": 1.6854587096203943e-05, "loss": 0.5374, "step": 24556 }, { "epoch": 0.5208161014612628, "grad_norm": 0.36665043234825134, "learning_rate": 1.6854344270611173e-05, "loss": 0.5647, "step": 24557 }, { "epoch": 0.5208373099191957, "grad_norm": 0.3567237854003906, "learning_rate": 1.6854101437395055e-05, "loss": 0.5394, "step": 24558 }, { "epoch": 0.5208585183771288, "grad_norm": 0.3841574788093567, "learning_rate": 1.685385859655587e-05, "loss": 0.4976, "step": 24559 }, { "epoch": 0.5208797268350618, "grad_norm": 0.38276955485343933, "learning_rate": 1.6853615748093876e-05, "loss": 0.4618, "step": 24560 }, { "epoch": 0.5209009352929949, "grad_norm": 0.36524561047554016, "learning_rate": 1.6853372892009354e-05, "loss": 0.5257, "step": 24561 }, { "epoch": 0.5209221437509278, "grad_norm": 0.3245401084423065, "learning_rate": 1.6853130028302574e-05, "loss": 0.4804, "step": 24562 }, { "epoch": 0.5209433522088609, "grad_norm": 0.378171443939209, "learning_rate": 1.6852887156973796e-05, "loss": 0.5137, "step": 24563 }, { "epoch": 0.5209645606667939, "grad_norm": 0.3289392590522766, "learning_rate": 1.68526442780233e-05, "loss": 0.4851, "step": 24564 }, { "epoch": 0.5209857691247269, "grad_norm": 0.3188803493976593, "learning_rate": 1.685240139145135e-05, "loss": 0.4643, "step": 24565 }, { "epoch": 0.52100697758266, "grad_norm": 0.3261485993862152, "learning_rate": 1.6852158497258223e-05, "loss": 0.5733, "step": 24566 }, { "epoch": 0.521028186040593, "grad_norm": 0.34796762466430664, "learning_rate": 1.685191559544418e-05, "loss": 0.4841, "step": 24567 }, { "epoch": 0.521049394498526, "grad_norm": 0.3292388319969177, "learning_rate": 1.68516726860095e-05, "loss": 0.4636, "step": 24568 }, { "epoch": 0.521070602956459, "grad_norm": 0.3361574113368988, "learning_rate": 1.6851429768954445e-05, "loss": 0.4699, "step": 24569 }, { "epoch": 0.5210918114143921, "grad_norm": 0.35527199506759644, "learning_rate": 1.6851186844279295e-05, "loss": 0.5482, "step": 24570 }, { "epoch": 0.5211130198723251, "grad_norm": 0.3383486568927765, "learning_rate": 1.685094391198431e-05, "loss": 0.4821, "step": 24571 }, { "epoch": 0.5211342283302581, "grad_norm": 0.4404747486114502, "learning_rate": 1.6850700972069763e-05, "loss": 0.4534, "step": 24572 }, { "epoch": 0.5211554367881911, "grad_norm": 0.34129229187965393, "learning_rate": 1.685045802453593e-05, "loss": 0.534, "step": 24573 }, { "epoch": 0.5211766452461242, "grad_norm": 0.35484638810157776, "learning_rate": 1.685021506938308e-05, "loss": 0.4521, "step": 24574 }, { "epoch": 0.5211978537040571, "grad_norm": 0.33552947640419006, "learning_rate": 1.6849972106611473e-05, "loss": 0.5115, "step": 24575 }, { "epoch": 0.5212190621619902, "grad_norm": 0.33272063732147217, "learning_rate": 1.6849729136221392e-05, "loss": 0.5012, "step": 24576 }, { "epoch": 0.5212402706199232, "grad_norm": 0.4115836024284363, "learning_rate": 1.68494861582131e-05, "loss": 0.6059, "step": 24577 }, { "epoch": 0.5212614790778562, "grad_norm": 0.3412705957889557, "learning_rate": 1.6849243172586865e-05, "loss": 0.5886, "step": 24578 }, { "epoch": 0.5212826875357893, "grad_norm": 0.3552699089050293, "learning_rate": 1.6849000179342968e-05, "loss": 0.5384, "step": 24579 }, { "epoch": 0.5213038959937223, "grad_norm": 0.450836718082428, "learning_rate": 1.6848757178481666e-05, "loss": 0.4392, "step": 24580 }, { "epoch": 0.5213251044516554, "grad_norm": 0.3442123532295227, "learning_rate": 1.6848514170003244e-05, "loss": 0.5403, "step": 24581 }, { "epoch": 0.5213463129095883, "grad_norm": 0.34727999567985535, "learning_rate": 1.6848271153907957e-05, "loss": 0.448, "step": 24582 }, { "epoch": 0.5213675213675214, "grad_norm": 0.31955307722091675, "learning_rate": 1.6848028130196084e-05, "loss": 0.4992, "step": 24583 }, { "epoch": 0.5213887298254544, "grad_norm": 0.354945570230484, "learning_rate": 1.6847785098867895e-05, "loss": 0.5128, "step": 24584 }, { "epoch": 0.5214099382833874, "grad_norm": 0.42757534980773926, "learning_rate": 1.6847542059923657e-05, "loss": 0.5449, "step": 24585 }, { "epoch": 0.5214311467413204, "grad_norm": 0.34177765250205994, "learning_rate": 1.6847299013363643e-05, "loss": 0.455, "step": 24586 }, { "epoch": 0.5214523551992535, "grad_norm": 0.36199918389320374, "learning_rate": 1.6847055959188123e-05, "loss": 0.502, "step": 24587 }, { "epoch": 0.5214735636571864, "grad_norm": 0.4146857261657715, "learning_rate": 1.6846812897397366e-05, "loss": 0.483, "step": 24588 }, { "epoch": 0.5214947721151195, "grad_norm": 0.3042758107185364, "learning_rate": 1.6846569827991644e-05, "loss": 0.4657, "step": 24589 }, { "epoch": 0.5215159805730525, "grad_norm": 0.3667251169681549, "learning_rate": 1.6846326750971228e-05, "loss": 0.5618, "step": 24590 }, { "epoch": 0.5215371890309856, "grad_norm": 0.3816215395927429, "learning_rate": 1.6846083666336386e-05, "loss": 0.5321, "step": 24591 }, { "epoch": 0.5215583974889186, "grad_norm": 0.3351539969444275, "learning_rate": 1.6845840574087388e-05, "loss": 0.4755, "step": 24592 }, { "epoch": 0.5215796059468516, "grad_norm": 0.41495513916015625, "learning_rate": 1.6845597474224503e-05, "loss": 0.5036, "step": 24593 }, { "epoch": 0.5216008144047847, "grad_norm": 0.42742201685905457, "learning_rate": 1.6845354366748008e-05, "loss": 0.4767, "step": 24594 }, { "epoch": 0.5216220228627176, "grad_norm": 0.35417261719703674, "learning_rate": 1.6845111251658167e-05, "loss": 0.4654, "step": 24595 }, { "epoch": 0.5216432313206507, "grad_norm": 0.3615190088748932, "learning_rate": 1.6844868128955255e-05, "loss": 0.4794, "step": 24596 }, { "epoch": 0.5216644397785837, "grad_norm": 0.3530692458152771, "learning_rate": 1.684462499863954e-05, "loss": 0.5658, "step": 24597 }, { "epoch": 0.5216856482365168, "grad_norm": 0.3904530704021454, "learning_rate": 1.6844381860711288e-05, "loss": 0.5305, "step": 24598 }, { "epoch": 0.5217068566944497, "grad_norm": 0.35111209750175476, "learning_rate": 1.684413871517078e-05, "loss": 0.4809, "step": 24599 }, { "epoch": 0.5217280651523828, "grad_norm": 0.338619202375412, "learning_rate": 1.6843895562018276e-05, "loss": 0.4565, "step": 24600 }, { "epoch": 0.5217492736103158, "grad_norm": 0.3942035138607025, "learning_rate": 1.6843652401254057e-05, "loss": 0.4441, "step": 24601 }, { "epoch": 0.5217704820682488, "grad_norm": 0.40283066034317017, "learning_rate": 1.684340923287838e-05, "loss": 0.4788, "step": 24602 }, { "epoch": 0.5217916905261818, "grad_norm": 0.3384839594364166, "learning_rate": 1.6843166056891526e-05, "loss": 0.5101, "step": 24603 }, { "epoch": 0.5218128989841149, "grad_norm": 0.3542580008506775, "learning_rate": 1.684292287329376e-05, "loss": 0.5058, "step": 24604 }, { "epoch": 0.5218341074420478, "grad_norm": 0.3728371262550354, "learning_rate": 1.684267968208536e-05, "loss": 0.5408, "step": 24605 }, { "epoch": 0.5218553158999809, "grad_norm": 0.3240850567817688, "learning_rate": 1.6842436483266586e-05, "loss": 0.4692, "step": 24606 }, { "epoch": 0.521876524357914, "grad_norm": 0.387949675321579, "learning_rate": 1.6842193276837716e-05, "loss": 0.5861, "step": 24607 }, { "epoch": 0.521897732815847, "grad_norm": 0.323560893535614, "learning_rate": 1.6841950062799018e-05, "loss": 0.4603, "step": 24608 }, { "epoch": 0.52191894127378, "grad_norm": 0.3262631297111511, "learning_rate": 1.6841706841150762e-05, "loss": 0.4718, "step": 24609 }, { "epoch": 0.521940149731713, "grad_norm": 0.3255750238895416, "learning_rate": 1.684146361189322e-05, "loss": 0.4458, "step": 24610 }, { "epoch": 0.5219613581896461, "grad_norm": 0.3631690740585327, "learning_rate": 1.684122037502666e-05, "loss": 0.5343, "step": 24611 }, { "epoch": 0.521982566647579, "grad_norm": 0.34829846024513245, "learning_rate": 1.6840977130551352e-05, "loss": 0.5343, "step": 24612 }, { "epoch": 0.5220037751055121, "grad_norm": 0.33566635847091675, "learning_rate": 1.6840733878467573e-05, "loss": 0.5113, "step": 24613 }, { "epoch": 0.5220249835634451, "grad_norm": 0.35113897919654846, "learning_rate": 1.6840490618775588e-05, "loss": 0.4579, "step": 24614 }, { "epoch": 0.5220461920213781, "grad_norm": 0.38248568773269653, "learning_rate": 1.6840247351475668e-05, "loss": 0.4608, "step": 24615 }, { "epoch": 0.5220674004793111, "grad_norm": 0.30950066447257996, "learning_rate": 1.6840004076568084e-05, "loss": 0.3958, "step": 24616 }, { "epoch": 0.5220886089372442, "grad_norm": 0.4394163489341736, "learning_rate": 1.6839760794053105e-05, "loss": 0.5511, "step": 24617 }, { "epoch": 0.5221098173951771, "grad_norm": 0.32565566897392273, "learning_rate": 1.6839517503931007e-05, "loss": 0.4823, "step": 24618 }, { "epoch": 0.5221310258531102, "grad_norm": 0.36716774106025696, "learning_rate": 1.6839274206202055e-05, "loss": 0.5811, "step": 24619 }, { "epoch": 0.5221522343110433, "grad_norm": 0.39189258217811584, "learning_rate": 1.6839030900866524e-05, "loss": 0.5546, "step": 24620 }, { "epoch": 0.5221734427689763, "grad_norm": 0.37480199337005615, "learning_rate": 1.683878758792468e-05, "loss": 0.4037, "step": 24621 }, { "epoch": 0.5221946512269093, "grad_norm": 0.3085726201534271, "learning_rate": 1.6838544267376797e-05, "loss": 0.4511, "step": 24622 }, { "epoch": 0.5222158596848423, "grad_norm": 0.3232758343219757, "learning_rate": 1.6838300939223144e-05, "loss": 0.4374, "step": 24623 }, { "epoch": 0.5222370681427754, "grad_norm": 0.360538125038147, "learning_rate": 1.6838057603463995e-05, "loss": 0.4911, "step": 24624 }, { "epoch": 0.5222582766007083, "grad_norm": 0.33214670419692993, "learning_rate": 1.6837814260099615e-05, "loss": 0.5446, "step": 24625 }, { "epoch": 0.5222794850586414, "grad_norm": 0.36527305841445923, "learning_rate": 1.6837570909130275e-05, "loss": 0.5268, "step": 24626 }, { "epoch": 0.5223006935165744, "grad_norm": 0.32757115364074707, "learning_rate": 1.6837327550556253e-05, "loss": 0.4039, "step": 24627 }, { "epoch": 0.5223219019745075, "grad_norm": 0.3546088933944702, "learning_rate": 1.6837084184377813e-05, "loss": 0.4898, "step": 24628 }, { "epoch": 0.5223431104324404, "grad_norm": 0.382236123085022, "learning_rate": 1.6836840810595224e-05, "loss": 0.4398, "step": 24629 }, { "epoch": 0.5223643188903735, "grad_norm": 0.3328450918197632, "learning_rate": 1.683659742920876e-05, "loss": 0.5294, "step": 24630 }, { "epoch": 0.5223855273483065, "grad_norm": 0.35478097200393677, "learning_rate": 1.6836354040218698e-05, "loss": 0.5238, "step": 24631 }, { "epoch": 0.5224067358062395, "grad_norm": 0.34087100625038147, "learning_rate": 1.6836110643625297e-05, "loss": 0.5148, "step": 24632 }, { "epoch": 0.5224279442641726, "grad_norm": 0.34053710103034973, "learning_rate": 1.6835867239428837e-05, "loss": 0.5665, "step": 24633 }, { "epoch": 0.5224491527221056, "grad_norm": 0.33742377161979675, "learning_rate": 1.6835623827629582e-05, "loss": 0.5048, "step": 24634 }, { "epoch": 0.5224703611800386, "grad_norm": 0.36894169449806213, "learning_rate": 1.6835380408227803e-05, "loss": 0.4866, "step": 24635 }, { "epoch": 0.5224915696379716, "grad_norm": 0.34178346395492554, "learning_rate": 1.683513698122378e-05, "loss": 0.5859, "step": 24636 }, { "epoch": 0.5225127780959047, "grad_norm": 0.3507869839668274, "learning_rate": 1.6834893546617772e-05, "loss": 0.502, "step": 24637 }, { "epoch": 0.5225339865538376, "grad_norm": 0.548009991645813, "learning_rate": 1.6834650104410055e-05, "loss": 0.5039, "step": 24638 }, { "epoch": 0.5225551950117707, "grad_norm": 0.3653446137905121, "learning_rate": 1.68344066546009e-05, "loss": 0.4297, "step": 24639 }, { "epoch": 0.5225764034697037, "grad_norm": 0.32818159461021423, "learning_rate": 1.683416319719058e-05, "loss": 0.5015, "step": 24640 }, { "epoch": 0.5225976119276368, "grad_norm": 0.3553450107574463, "learning_rate": 1.683391973217936e-05, "loss": 0.4859, "step": 24641 }, { "epoch": 0.5226188203855697, "grad_norm": 0.32372647523880005, "learning_rate": 1.6833676259567516e-05, "loss": 0.5244, "step": 24642 }, { "epoch": 0.5226400288435028, "grad_norm": 0.32133185863494873, "learning_rate": 1.6833432779355314e-05, "loss": 0.4868, "step": 24643 }, { "epoch": 0.5226612373014358, "grad_norm": 0.33325493335723877, "learning_rate": 1.683318929154303e-05, "loss": 0.5109, "step": 24644 }, { "epoch": 0.5226824457593688, "grad_norm": 0.6116818785667419, "learning_rate": 1.6832945796130932e-05, "loss": 0.5501, "step": 24645 }, { "epoch": 0.5227036542173018, "grad_norm": 0.40014412999153137, "learning_rate": 1.683270229311929e-05, "loss": 0.5392, "step": 24646 }, { "epoch": 0.5227248626752349, "grad_norm": 0.3497685194015503, "learning_rate": 1.6832458782508376e-05, "loss": 0.5382, "step": 24647 }, { "epoch": 0.522746071133168, "grad_norm": 0.33585965633392334, "learning_rate": 1.6832215264298463e-05, "loss": 0.5097, "step": 24648 }, { "epoch": 0.5227672795911009, "grad_norm": 0.34396907687187195, "learning_rate": 1.6831971738489813e-05, "loss": 0.4693, "step": 24649 }, { "epoch": 0.522788488049034, "grad_norm": 0.3072563409805298, "learning_rate": 1.6831728205082713e-05, "loss": 0.4528, "step": 24650 }, { "epoch": 0.522809696506967, "grad_norm": 0.3901631534099579, "learning_rate": 1.6831484664077415e-05, "loss": 0.4717, "step": 24651 }, { "epoch": 0.5228309049649, "grad_norm": 0.4501500427722931, "learning_rate": 1.6831241115474207e-05, "loss": 0.4889, "step": 24652 }, { "epoch": 0.522852113422833, "grad_norm": 0.33679842948913574, "learning_rate": 1.6830997559273345e-05, "loss": 0.5181, "step": 24653 }, { "epoch": 0.5228733218807661, "grad_norm": 0.34411191940307617, "learning_rate": 1.6830753995475112e-05, "loss": 0.5691, "step": 24654 }, { "epoch": 0.522894530338699, "grad_norm": 0.33504238724708557, "learning_rate": 1.6830510424079772e-05, "loss": 0.4778, "step": 24655 }, { "epoch": 0.5229157387966321, "grad_norm": 0.33266884088516235, "learning_rate": 1.6830266845087595e-05, "loss": 0.5179, "step": 24656 }, { "epoch": 0.5229369472545651, "grad_norm": 0.35714060068130493, "learning_rate": 1.683002325849886e-05, "loss": 0.4881, "step": 24657 }, { "epoch": 0.5229581557124982, "grad_norm": 0.3572918772697449, "learning_rate": 1.6829779664313828e-05, "loss": 0.5979, "step": 24658 }, { "epoch": 0.5229793641704311, "grad_norm": 0.3414393365383148, "learning_rate": 1.6829536062532774e-05, "loss": 0.5742, "step": 24659 }, { "epoch": 0.5230005726283642, "grad_norm": 0.2867007553577423, "learning_rate": 1.6829292453155972e-05, "loss": 0.4063, "step": 24660 }, { "epoch": 0.5230217810862973, "grad_norm": 0.33630719780921936, "learning_rate": 1.682904883618369e-05, "loss": 0.5416, "step": 24661 }, { "epoch": 0.5230429895442302, "grad_norm": 0.3284934461116791, "learning_rate": 1.6828805211616197e-05, "loss": 0.4683, "step": 24662 }, { "epoch": 0.5230641980021633, "grad_norm": 0.3729150891304016, "learning_rate": 1.682856157945377e-05, "loss": 0.4, "step": 24663 }, { "epoch": 0.5230854064600963, "grad_norm": 0.3760964274406433, "learning_rate": 1.6828317939696674e-05, "loss": 0.484, "step": 24664 }, { "epoch": 0.5231066149180293, "grad_norm": 0.4185829758644104, "learning_rate": 1.6828074292345183e-05, "loss": 0.412, "step": 24665 }, { "epoch": 0.5231278233759623, "grad_norm": 0.3458523452281952, "learning_rate": 1.6827830637399564e-05, "loss": 0.4678, "step": 24666 }, { "epoch": 0.5231490318338954, "grad_norm": 0.32784345746040344, "learning_rate": 1.682758697486009e-05, "loss": 0.4531, "step": 24667 }, { "epoch": 0.5231702402918283, "grad_norm": 0.3479897975921631, "learning_rate": 1.682734330472704e-05, "loss": 0.5916, "step": 24668 }, { "epoch": 0.5231914487497614, "grad_norm": 0.33802369236946106, "learning_rate": 1.6827099627000673e-05, "loss": 0.4312, "step": 24669 }, { "epoch": 0.5232126572076944, "grad_norm": 0.47412970662117004, "learning_rate": 1.6826855941681266e-05, "loss": 0.5433, "step": 24670 }, { "epoch": 0.5232338656656275, "grad_norm": 0.3446863889694214, "learning_rate": 1.6826612248769093e-05, "loss": 0.4493, "step": 24671 }, { "epoch": 0.5232550741235604, "grad_norm": 0.38164472579956055, "learning_rate": 1.6826368548264414e-05, "loss": 0.4857, "step": 24672 }, { "epoch": 0.5232762825814935, "grad_norm": 0.36327338218688965, "learning_rate": 1.6826124840167512e-05, "loss": 0.5126, "step": 24673 }, { "epoch": 0.5232974910394266, "grad_norm": 0.3771217167377472, "learning_rate": 1.6825881124478653e-05, "loss": 0.4668, "step": 24674 }, { "epoch": 0.5233186994973595, "grad_norm": 0.34503817558288574, "learning_rate": 1.6825637401198106e-05, "loss": 0.5115, "step": 24675 }, { "epoch": 0.5233399079552926, "grad_norm": 0.3475930988788605, "learning_rate": 1.6825393670326144e-05, "loss": 0.512, "step": 24676 }, { "epoch": 0.5233611164132256, "grad_norm": 0.31688427925109863, "learning_rate": 1.6825149931863043e-05, "loss": 0.424, "step": 24677 }, { "epoch": 0.5233823248711587, "grad_norm": 0.3338611423969269, "learning_rate": 1.6824906185809066e-05, "loss": 0.4887, "step": 24678 }, { "epoch": 0.5234035333290916, "grad_norm": 0.3412904143333435, "learning_rate": 1.6824662432164488e-05, "loss": 0.5652, "step": 24679 }, { "epoch": 0.5234247417870247, "grad_norm": 0.3538728952407837, "learning_rate": 1.682441867092958e-05, "loss": 0.4955, "step": 24680 }, { "epoch": 0.5234459502449577, "grad_norm": 0.33140552043914795, "learning_rate": 1.6824174902104613e-05, "loss": 0.456, "step": 24681 }, { "epoch": 0.5234671587028907, "grad_norm": 0.3396276831626892, "learning_rate": 1.682393112568986e-05, "loss": 0.5204, "step": 24682 }, { "epoch": 0.5234883671608237, "grad_norm": 0.3754558265209198, "learning_rate": 1.6823687341685587e-05, "loss": 0.4474, "step": 24683 }, { "epoch": 0.5235095756187568, "grad_norm": 0.32354632019996643, "learning_rate": 1.682344355009207e-05, "loss": 0.4395, "step": 24684 }, { "epoch": 0.5235307840766897, "grad_norm": 0.42664995789527893, "learning_rate": 1.6823199750909577e-05, "loss": 0.4652, "step": 24685 }, { "epoch": 0.5235519925346228, "grad_norm": 0.3392978012561798, "learning_rate": 1.682295594413838e-05, "loss": 0.5348, "step": 24686 }, { "epoch": 0.5235732009925558, "grad_norm": 0.37707263231277466, "learning_rate": 1.6822712129778753e-05, "loss": 0.5524, "step": 24687 }, { "epoch": 0.5235944094504889, "grad_norm": 0.5629155039787292, "learning_rate": 1.6822468307830963e-05, "loss": 0.4836, "step": 24688 }, { "epoch": 0.5236156179084219, "grad_norm": 0.399316668510437, "learning_rate": 1.6822224478295282e-05, "loss": 0.5362, "step": 24689 }, { "epoch": 0.5236368263663549, "grad_norm": 0.34524860978126526, "learning_rate": 1.6821980641171985e-05, "loss": 0.4956, "step": 24690 }, { "epoch": 0.523658034824288, "grad_norm": 0.3058571219444275, "learning_rate": 1.682173679646134e-05, "loss": 0.4573, "step": 24691 }, { "epoch": 0.5236792432822209, "grad_norm": 0.38252323865890503, "learning_rate": 1.682149294416362e-05, "loss": 0.4996, "step": 24692 }, { "epoch": 0.523700451740154, "grad_norm": 0.32171866297721863, "learning_rate": 1.682124908427909e-05, "loss": 0.4475, "step": 24693 }, { "epoch": 0.523721660198087, "grad_norm": 0.38076251745224, "learning_rate": 1.6821005216808028e-05, "loss": 0.5867, "step": 24694 }, { "epoch": 0.52374286865602, "grad_norm": 0.35452139377593994, "learning_rate": 1.6820761341750706e-05, "loss": 0.451, "step": 24695 }, { "epoch": 0.523764077113953, "grad_norm": 0.4688127040863037, "learning_rate": 1.682051745910739e-05, "loss": 0.4691, "step": 24696 }, { "epoch": 0.5237852855718861, "grad_norm": 0.37848931550979614, "learning_rate": 1.6820273568878354e-05, "loss": 0.4997, "step": 24697 }, { "epoch": 0.523806494029819, "grad_norm": 0.37365397810935974, "learning_rate": 1.682002967106387e-05, "loss": 0.5329, "step": 24698 }, { "epoch": 0.5238277024877521, "grad_norm": 0.3355223536491394, "learning_rate": 1.6819785765664208e-05, "loss": 0.4602, "step": 24699 }, { "epoch": 0.5238489109456851, "grad_norm": 0.3190319538116455, "learning_rate": 1.681954185267964e-05, "loss": 0.5296, "step": 24700 }, { "epoch": 0.5238701194036182, "grad_norm": 0.3374349772930145, "learning_rate": 1.6819297932110433e-05, "loss": 0.4823, "step": 24701 }, { "epoch": 0.5238913278615512, "grad_norm": 0.4091576635837555, "learning_rate": 1.6819054003956864e-05, "loss": 0.5693, "step": 24702 }, { "epoch": 0.5239125363194842, "grad_norm": 0.33990392088890076, "learning_rate": 1.6818810068219203e-05, "loss": 0.4671, "step": 24703 }, { "epoch": 0.5239337447774173, "grad_norm": 0.3716084063053131, "learning_rate": 1.681856612489772e-05, "loss": 0.4587, "step": 24704 }, { "epoch": 0.5239549532353502, "grad_norm": 0.4403804540634155, "learning_rate": 1.6818322173992688e-05, "loss": 0.5555, "step": 24705 }, { "epoch": 0.5239761616932833, "grad_norm": 0.33325013518333435, "learning_rate": 1.681807821550438e-05, "loss": 0.437, "step": 24706 }, { "epoch": 0.5239973701512163, "grad_norm": 0.33478936553001404, "learning_rate": 1.681783424943306e-05, "loss": 0.4851, "step": 24707 }, { "epoch": 0.5240185786091494, "grad_norm": 0.360074520111084, "learning_rate": 1.6817590275779005e-05, "loss": 0.5361, "step": 24708 }, { "epoch": 0.5240397870670823, "grad_norm": 0.726527214050293, "learning_rate": 1.681734629454249e-05, "loss": 0.5997, "step": 24709 }, { "epoch": 0.5240609955250154, "grad_norm": 0.3392208218574524, "learning_rate": 1.6817102305723777e-05, "loss": 0.4489, "step": 24710 }, { "epoch": 0.5240822039829484, "grad_norm": 0.5092384219169617, "learning_rate": 1.681685830932314e-05, "loss": 0.4655, "step": 24711 }, { "epoch": 0.5241034124408814, "grad_norm": 0.3287309408187866, "learning_rate": 1.6816614305340855e-05, "loss": 0.5073, "step": 24712 }, { "epoch": 0.5241246208988144, "grad_norm": 0.35076168179512024, "learning_rate": 1.681637029377719e-05, "loss": 0.6027, "step": 24713 }, { "epoch": 0.5241458293567475, "grad_norm": 0.34080183506011963, "learning_rate": 1.681612627463242e-05, "loss": 0.4758, "step": 24714 }, { "epoch": 0.5241670378146805, "grad_norm": 0.3550184667110443, "learning_rate": 1.6815882247906808e-05, "loss": 0.5024, "step": 24715 }, { "epoch": 0.5241882462726135, "grad_norm": 0.353542685508728, "learning_rate": 1.6815638213600638e-05, "loss": 0.4789, "step": 24716 }, { "epoch": 0.5242094547305466, "grad_norm": 0.390057772397995, "learning_rate": 1.6815394171714173e-05, "loss": 0.5418, "step": 24717 }, { "epoch": 0.5242306631884796, "grad_norm": 0.35554036498069763, "learning_rate": 1.6815150122247682e-05, "loss": 0.4876, "step": 24718 }, { "epoch": 0.5242518716464126, "grad_norm": 0.34597447514533997, "learning_rate": 1.6814906065201442e-05, "loss": 0.5351, "step": 24719 }, { "epoch": 0.5242730801043456, "grad_norm": 0.3624374568462372, "learning_rate": 1.6814662000575724e-05, "loss": 0.5336, "step": 24720 }, { "epoch": 0.5242942885622787, "grad_norm": 0.3907054364681244, "learning_rate": 1.6814417928370796e-05, "loss": 0.5345, "step": 24721 }, { "epoch": 0.5243154970202116, "grad_norm": 0.3656211197376251, "learning_rate": 1.6814173848586933e-05, "loss": 0.4913, "step": 24722 }, { "epoch": 0.5243367054781447, "grad_norm": 0.380309134721756, "learning_rate": 1.6813929761224404e-05, "loss": 0.5334, "step": 24723 }, { "epoch": 0.5243579139360777, "grad_norm": 0.3148137927055359, "learning_rate": 1.6813685666283482e-05, "loss": 0.4149, "step": 24724 }, { "epoch": 0.5243791223940107, "grad_norm": 1.2469439506530762, "learning_rate": 1.6813441563764438e-05, "loss": 0.5121, "step": 24725 }, { "epoch": 0.5244003308519437, "grad_norm": 0.3644062578678131, "learning_rate": 1.6813197453667545e-05, "loss": 0.5404, "step": 24726 }, { "epoch": 0.5244215393098768, "grad_norm": 0.4088383615016937, "learning_rate": 1.681295333599307e-05, "loss": 0.4791, "step": 24727 }, { "epoch": 0.5244427477678097, "grad_norm": 0.3380660116672516, "learning_rate": 1.681270921074129e-05, "loss": 0.5528, "step": 24728 }, { "epoch": 0.5244639562257428, "grad_norm": 0.36020132899284363, "learning_rate": 1.6812465077912472e-05, "loss": 0.5414, "step": 24729 }, { "epoch": 0.5244851646836759, "grad_norm": 0.4142802357673645, "learning_rate": 1.681222093750689e-05, "loss": 0.5151, "step": 24730 }, { "epoch": 0.5245063731416089, "grad_norm": 0.33996984362602234, "learning_rate": 1.6811976789524816e-05, "loss": 0.5572, "step": 24731 }, { "epoch": 0.5245275815995419, "grad_norm": 0.3194458782672882, "learning_rate": 1.681173263396652e-05, "loss": 0.5177, "step": 24732 }, { "epoch": 0.5245487900574749, "grad_norm": 0.34109002351760864, "learning_rate": 1.6811488470832277e-05, "loss": 0.547, "step": 24733 }, { "epoch": 0.524569998515408, "grad_norm": 0.3546395003795624, "learning_rate": 1.681124430012235e-05, "loss": 0.5034, "step": 24734 }, { "epoch": 0.5245912069733409, "grad_norm": 0.36525651812553406, "learning_rate": 1.681100012183702e-05, "loss": 0.5228, "step": 24735 }, { "epoch": 0.524612415431274, "grad_norm": 0.3668507933616638, "learning_rate": 1.6810755935976555e-05, "loss": 0.5311, "step": 24736 }, { "epoch": 0.524633623889207, "grad_norm": 0.37394624948501587, "learning_rate": 1.6810511742541226e-05, "loss": 0.5912, "step": 24737 }, { "epoch": 0.52465483234714, "grad_norm": 0.36112016439437866, "learning_rate": 1.6810267541531304e-05, "loss": 0.5823, "step": 24738 }, { "epoch": 0.524676040805073, "grad_norm": 0.3610911965370178, "learning_rate": 1.6810023332947062e-05, "loss": 0.5912, "step": 24739 }, { "epoch": 0.5246972492630061, "grad_norm": 0.3413807451725006, "learning_rate": 1.680977911678877e-05, "loss": 0.435, "step": 24740 }, { "epoch": 0.5247184577209391, "grad_norm": 0.9768849015235901, "learning_rate": 1.68095348930567e-05, "loss": 0.493, "step": 24741 }, { "epoch": 0.5247396661788721, "grad_norm": 0.32024475932121277, "learning_rate": 1.680929066175113e-05, "loss": 0.4799, "step": 24742 }, { "epoch": 0.5247608746368052, "grad_norm": 0.37624868750572205, "learning_rate": 1.6809046422872322e-05, "loss": 0.532, "step": 24743 }, { "epoch": 0.5247820830947382, "grad_norm": 0.36174970865249634, "learning_rate": 1.6808802176420554e-05, "loss": 0.4878, "step": 24744 }, { "epoch": 0.5248032915526712, "grad_norm": 0.34749892354011536, "learning_rate": 1.6808557922396094e-05, "loss": 0.5691, "step": 24745 }, { "epoch": 0.5248245000106042, "grad_norm": 0.35152918100357056, "learning_rate": 1.680831366079921e-05, "loss": 0.4842, "step": 24746 }, { "epoch": 0.5248457084685373, "grad_norm": 0.33239632844924927, "learning_rate": 1.6808069391630187e-05, "loss": 0.5074, "step": 24747 }, { "epoch": 0.5248669169264703, "grad_norm": 0.36804336309432983, "learning_rate": 1.6807825114889286e-05, "loss": 0.539, "step": 24748 }, { "epoch": 0.5248881253844033, "grad_norm": 0.36555254459381104, "learning_rate": 1.6807580830576778e-05, "loss": 0.4719, "step": 24749 }, { "epoch": 0.5249093338423363, "grad_norm": 0.3479044437408447, "learning_rate": 1.680733653869294e-05, "loss": 0.483, "step": 24750 }, { "epoch": 0.5249305423002694, "grad_norm": 0.33479616045951843, "learning_rate": 1.680709223923804e-05, "loss": 0.4664, "step": 24751 }, { "epoch": 0.5249517507582023, "grad_norm": 0.35902461409568787, "learning_rate": 1.680684793221235e-05, "loss": 0.4825, "step": 24752 }, { "epoch": 0.5249729592161354, "grad_norm": 0.41074100136756897, "learning_rate": 1.6806603617616148e-05, "loss": 0.5113, "step": 24753 }, { "epoch": 0.5249941676740684, "grad_norm": 0.3089948892593384, "learning_rate": 1.6806359295449697e-05, "loss": 0.4922, "step": 24754 }, { "epoch": 0.5250153761320014, "grad_norm": 0.3693418502807617, "learning_rate": 1.680611496571327e-05, "loss": 0.4999, "step": 24755 }, { "epoch": 0.5250365845899345, "grad_norm": 0.34840962290763855, "learning_rate": 1.6805870628407146e-05, "loss": 0.4575, "step": 24756 }, { "epoch": 0.5250577930478675, "grad_norm": 0.3588205575942993, "learning_rate": 1.6805626283531592e-05, "loss": 0.5264, "step": 24757 }, { "epoch": 0.5250790015058006, "grad_norm": 0.39311668276786804, "learning_rate": 1.6805381931086877e-05, "loss": 0.4875, "step": 24758 }, { "epoch": 0.5251002099637335, "grad_norm": 0.34677815437316895, "learning_rate": 1.6805137571073276e-05, "loss": 0.4842, "step": 24759 }, { "epoch": 0.5251214184216666, "grad_norm": 0.3336278200149536, "learning_rate": 1.6804893203491056e-05, "loss": 0.4742, "step": 24760 }, { "epoch": 0.5251426268795996, "grad_norm": 0.35310545563697815, "learning_rate": 1.68046488283405e-05, "loss": 0.4809, "step": 24761 }, { "epoch": 0.5251638353375326, "grad_norm": 0.29333260655403137, "learning_rate": 1.680440444562187e-05, "loss": 0.4672, "step": 24762 }, { "epoch": 0.5251850437954656, "grad_norm": 0.3581005036830902, "learning_rate": 1.6804160055335437e-05, "loss": 0.4377, "step": 24763 }, { "epoch": 0.5252062522533987, "grad_norm": 0.35120105743408203, "learning_rate": 1.6803915657481483e-05, "loss": 0.4779, "step": 24764 }, { "epoch": 0.5252274607113316, "grad_norm": 0.34324097633361816, "learning_rate": 1.6803671252060265e-05, "loss": 0.5395, "step": 24765 }, { "epoch": 0.5252486691692647, "grad_norm": 0.38749203085899353, "learning_rate": 1.680342683907207e-05, "loss": 0.4742, "step": 24766 }, { "epoch": 0.5252698776271977, "grad_norm": 0.3251829445362091, "learning_rate": 1.680318241851716e-05, "loss": 0.46, "step": 24767 }, { "epoch": 0.5252910860851308, "grad_norm": 0.31179186701774597, "learning_rate": 1.680293799039581e-05, "loss": 0.4336, "step": 24768 }, { "epoch": 0.5253122945430638, "grad_norm": 0.3469710648059845, "learning_rate": 1.680269355470829e-05, "loss": 0.4064, "step": 24769 }, { "epoch": 0.5253335030009968, "grad_norm": 0.34313932061195374, "learning_rate": 1.6802449111454876e-05, "loss": 0.4628, "step": 24770 }, { "epoch": 0.5253547114589299, "grad_norm": 0.33952245116233826, "learning_rate": 1.6802204660635833e-05, "loss": 0.4924, "step": 24771 }, { "epoch": 0.5253759199168628, "grad_norm": 0.336729496717453, "learning_rate": 1.680196020225144e-05, "loss": 0.5256, "step": 24772 }, { "epoch": 0.5253971283747959, "grad_norm": 0.3214571177959442, "learning_rate": 1.6801715736301963e-05, "loss": 0.484, "step": 24773 }, { "epoch": 0.5254183368327289, "grad_norm": 0.34488150477409363, "learning_rate": 1.680147126278768e-05, "loss": 0.5112, "step": 24774 }, { "epoch": 0.525439545290662, "grad_norm": 0.36314624547958374, "learning_rate": 1.680122678170886e-05, "loss": 0.5255, "step": 24775 }, { "epoch": 0.5254607537485949, "grad_norm": 0.37811872363090515, "learning_rate": 1.6800982293065774e-05, "loss": 0.4826, "step": 24776 }, { "epoch": 0.525481962206528, "grad_norm": 0.35518020391464233, "learning_rate": 1.6800737796858698e-05, "loss": 0.541, "step": 24777 }, { "epoch": 0.525503170664461, "grad_norm": 0.3622719645500183, "learning_rate": 1.6800493293087895e-05, "loss": 0.4991, "step": 24778 }, { "epoch": 0.525524379122394, "grad_norm": 0.3866766691207886, "learning_rate": 1.6800248781753646e-05, "loss": 0.4829, "step": 24779 }, { "epoch": 0.525545587580327, "grad_norm": 0.3686644434928894, "learning_rate": 1.6800004262856217e-05, "loss": 0.5414, "step": 24780 }, { "epoch": 0.5255667960382601, "grad_norm": 0.39630937576293945, "learning_rate": 1.679975973639588e-05, "loss": 0.5089, "step": 24781 }, { "epoch": 0.525588004496193, "grad_norm": 0.3440263569355011, "learning_rate": 1.6799515202372917e-05, "loss": 0.5237, "step": 24782 }, { "epoch": 0.5256092129541261, "grad_norm": 0.3772749900817871, "learning_rate": 1.6799270660787588e-05, "loss": 0.4928, "step": 24783 }, { "epoch": 0.5256304214120592, "grad_norm": 0.3780164122581482, "learning_rate": 1.6799026111640166e-05, "loss": 0.4345, "step": 24784 }, { "epoch": 0.5256516298699921, "grad_norm": 0.34635114669799805, "learning_rate": 1.679878155493093e-05, "loss": 0.5268, "step": 24785 }, { "epoch": 0.5256728383279252, "grad_norm": 0.36657318472862244, "learning_rate": 1.6798536990660147e-05, "loss": 0.5662, "step": 24786 }, { "epoch": 0.5256940467858582, "grad_norm": 0.33835020661354065, "learning_rate": 1.679829241882809e-05, "loss": 0.444, "step": 24787 }, { "epoch": 0.5257152552437913, "grad_norm": 0.3485039472579956, "learning_rate": 1.679804783943503e-05, "loss": 0.4711, "step": 24788 }, { "epoch": 0.5257364637017242, "grad_norm": 0.34621965885162354, "learning_rate": 1.6797803252481245e-05, "loss": 0.573, "step": 24789 }, { "epoch": 0.5257576721596573, "grad_norm": 0.4154742360115051, "learning_rate": 1.6797558657967e-05, "loss": 0.5753, "step": 24790 }, { "epoch": 0.5257788806175903, "grad_norm": 0.32901567220687866, "learning_rate": 1.6797314055892568e-05, "loss": 0.5047, "step": 24791 }, { "epoch": 0.5258000890755233, "grad_norm": 0.3867145776748657, "learning_rate": 1.679706944625822e-05, "loss": 0.5498, "step": 24792 }, { "epoch": 0.5258212975334563, "grad_norm": 0.33189722895622253, "learning_rate": 1.6796824829064235e-05, "loss": 0.4889, "step": 24793 }, { "epoch": 0.5258425059913894, "grad_norm": 0.44023236632347107, "learning_rate": 1.679658020431088e-05, "loss": 0.5806, "step": 24794 }, { "epoch": 0.5258637144493223, "grad_norm": 0.36762988567352295, "learning_rate": 1.6796335571998425e-05, "loss": 0.4716, "step": 24795 }, { "epoch": 0.5258849229072554, "grad_norm": 0.3441390097141266, "learning_rate": 1.6796090932127147e-05, "loss": 0.4766, "step": 24796 }, { "epoch": 0.5259061313651885, "grad_norm": 0.3188089430332184, "learning_rate": 1.6795846284697313e-05, "loss": 0.4866, "step": 24797 }, { "epoch": 0.5259273398231215, "grad_norm": 0.3453240394592285, "learning_rate": 1.67956016297092e-05, "loss": 0.5134, "step": 24798 }, { "epoch": 0.5259485482810545, "grad_norm": 0.3068098723888397, "learning_rate": 1.6795356967163078e-05, "loss": 0.4984, "step": 24799 }, { "epoch": 0.5259697567389875, "grad_norm": 0.607132077217102, "learning_rate": 1.679511229705922e-05, "loss": 0.4331, "step": 24800 }, { "epoch": 0.5259909651969206, "grad_norm": 0.4092714786529541, "learning_rate": 1.6794867619397895e-05, "loss": 0.5313, "step": 24801 }, { "epoch": 0.5260121736548535, "grad_norm": 0.3373439610004425, "learning_rate": 1.6794622934179377e-05, "loss": 0.4974, "step": 24802 }, { "epoch": 0.5260333821127866, "grad_norm": 0.3923284709453583, "learning_rate": 1.679437824140394e-05, "loss": 0.5687, "step": 24803 }, { "epoch": 0.5260545905707196, "grad_norm": 0.3755936026573181, "learning_rate": 1.6794133541071852e-05, "loss": 0.5558, "step": 24804 }, { "epoch": 0.5260757990286526, "grad_norm": 0.3436315953731537, "learning_rate": 1.679388883318339e-05, "loss": 0.6035, "step": 24805 }, { "epoch": 0.5260970074865856, "grad_norm": 0.3528759777545929, "learning_rate": 1.6793644117738825e-05, "loss": 0.498, "step": 24806 }, { "epoch": 0.5261182159445187, "grad_norm": 0.3800971210002899, "learning_rate": 1.6793399394738426e-05, "loss": 0.5967, "step": 24807 }, { "epoch": 0.5261394244024516, "grad_norm": 0.31099534034729004, "learning_rate": 1.6793154664182466e-05, "loss": 0.4726, "step": 24808 }, { "epoch": 0.5261606328603847, "grad_norm": 0.4717264473438263, "learning_rate": 1.679290992607122e-05, "loss": 0.5115, "step": 24809 }, { "epoch": 0.5261818413183178, "grad_norm": 0.35067978501319885, "learning_rate": 1.6792665180404957e-05, "loss": 0.5221, "step": 24810 }, { "epoch": 0.5262030497762508, "grad_norm": 0.36009281873703003, "learning_rate": 1.6792420427183957e-05, "loss": 0.5018, "step": 24811 }, { "epoch": 0.5262242582341838, "grad_norm": 0.3709874153137207, "learning_rate": 1.679217566640848e-05, "loss": 0.5254, "step": 24812 }, { "epoch": 0.5262454666921168, "grad_norm": 0.34592801332473755, "learning_rate": 1.6791930898078805e-05, "loss": 0.5369, "step": 24813 }, { "epoch": 0.5262666751500499, "grad_norm": 0.315753698348999, "learning_rate": 1.6791686122195205e-05, "loss": 0.4669, "step": 24814 }, { "epoch": 0.5262878836079828, "grad_norm": 0.41706371307373047, "learning_rate": 1.6791441338757948e-05, "loss": 0.5148, "step": 24815 }, { "epoch": 0.5263090920659159, "grad_norm": 0.3804023265838623, "learning_rate": 1.679119654776731e-05, "loss": 0.4678, "step": 24816 }, { "epoch": 0.5263303005238489, "grad_norm": 0.4500369429588318, "learning_rate": 1.6790951749223564e-05, "loss": 0.5256, "step": 24817 }, { "epoch": 0.526351508981782, "grad_norm": 0.3366890549659729, "learning_rate": 1.679070694312698e-05, "loss": 0.5447, "step": 24818 }, { "epoch": 0.5263727174397149, "grad_norm": 0.35560473799705505, "learning_rate": 1.679046212947783e-05, "loss": 0.4805, "step": 24819 }, { "epoch": 0.526393925897648, "grad_norm": 0.3238409161567688, "learning_rate": 1.6790217308276388e-05, "loss": 0.4258, "step": 24820 }, { "epoch": 0.526415134355581, "grad_norm": 0.34185791015625, "learning_rate": 1.6789972479522923e-05, "loss": 0.4604, "step": 24821 }, { "epoch": 0.526436342813514, "grad_norm": 0.3774576187133789, "learning_rate": 1.6789727643217708e-05, "loss": 0.5147, "step": 24822 }, { "epoch": 0.526457551271447, "grad_norm": 0.3731847405433655, "learning_rate": 1.678948279936102e-05, "loss": 0.5778, "step": 24823 }, { "epoch": 0.5264787597293801, "grad_norm": 0.34466788172721863, "learning_rate": 1.678923794795313e-05, "loss": 0.4869, "step": 24824 }, { "epoch": 0.5264999681873131, "grad_norm": 0.38655033707618713, "learning_rate": 1.6788993088994305e-05, "loss": 0.4516, "step": 24825 }, { "epoch": 0.5265211766452461, "grad_norm": 0.3876034915447235, "learning_rate": 1.6788748222484824e-05, "loss": 0.5045, "step": 24826 }, { "epoch": 0.5265423851031792, "grad_norm": 0.31043416261672974, "learning_rate": 1.6788503348424955e-05, "loss": 0.4579, "step": 24827 }, { "epoch": 0.5265635935611122, "grad_norm": 0.4004048705101013, "learning_rate": 1.678825846681497e-05, "loss": 0.5454, "step": 24828 }, { "epoch": 0.5265848020190452, "grad_norm": 0.5772414207458496, "learning_rate": 1.6788013577655143e-05, "loss": 0.5101, "step": 24829 }, { "epoch": 0.5266060104769782, "grad_norm": 0.3547689616680145, "learning_rate": 1.678776868094575e-05, "loss": 0.511, "step": 24830 }, { "epoch": 0.5266272189349113, "grad_norm": 0.37551888823509216, "learning_rate": 1.6787523776687055e-05, "loss": 0.434, "step": 24831 }, { "epoch": 0.5266484273928442, "grad_norm": 0.3563912510871887, "learning_rate": 1.6787278864879336e-05, "loss": 0.525, "step": 24832 }, { "epoch": 0.5266696358507773, "grad_norm": 0.3797503113746643, "learning_rate": 1.678703394552287e-05, "loss": 0.5094, "step": 24833 }, { "epoch": 0.5266908443087103, "grad_norm": 0.319328248500824, "learning_rate": 1.6786789018617916e-05, "loss": 0.4788, "step": 24834 }, { "epoch": 0.5267120527666433, "grad_norm": 0.35891255736351013, "learning_rate": 1.6786544084164758e-05, "loss": 0.5112, "step": 24835 }, { "epoch": 0.5267332612245763, "grad_norm": 0.4065916836261749, "learning_rate": 1.6786299142163665e-05, "loss": 0.5801, "step": 24836 }, { "epoch": 0.5267544696825094, "grad_norm": 0.35825595259666443, "learning_rate": 1.6786054192614905e-05, "loss": 0.5673, "step": 24837 }, { "epoch": 0.5267756781404425, "grad_norm": 0.3258097469806671, "learning_rate": 1.678580923551876e-05, "loss": 0.5244, "step": 24838 }, { "epoch": 0.5267968865983754, "grad_norm": 0.3871210515499115, "learning_rate": 1.6785564270875494e-05, "loss": 0.4777, "step": 24839 }, { "epoch": 0.5268180950563085, "grad_norm": 0.4898615777492523, "learning_rate": 1.678531929868538e-05, "loss": 0.6137, "step": 24840 }, { "epoch": 0.5268393035142415, "grad_norm": 0.3495033085346222, "learning_rate": 1.67850743189487e-05, "loss": 0.4523, "step": 24841 }, { "epoch": 0.5268605119721745, "grad_norm": 0.32341527938842773, "learning_rate": 1.6784829331665713e-05, "loss": 0.4444, "step": 24842 }, { "epoch": 0.5268817204301075, "grad_norm": 0.3975869417190552, "learning_rate": 1.67845843368367e-05, "loss": 0.5574, "step": 24843 }, { "epoch": 0.5269029288880406, "grad_norm": 0.3914264142513275, "learning_rate": 1.678433933446193e-05, "loss": 0.5074, "step": 24844 }, { "epoch": 0.5269241373459735, "grad_norm": 0.34936365485191345, "learning_rate": 1.678409432454168e-05, "loss": 0.4729, "step": 24845 }, { "epoch": 0.5269453458039066, "grad_norm": 0.34681951999664307, "learning_rate": 1.6783849307076217e-05, "loss": 0.4666, "step": 24846 }, { "epoch": 0.5269665542618396, "grad_norm": 0.4123966693878174, "learning_rate": 1.6783604282065814e-05, "loss": 0.5518, "step": 24847 }, { "epoch": 0.5269877627197727, "grad_norm": 0.3633950352668762, "learning_rate": 1.6783359249510746e-05, "loss": 0.4839, "step": 24848 }, { "epoch": 0.5270089711777056, "grad_norm": 0.3183019757270813, "learning_rate": 1.6783114209411285e-05, "loss": 0.4751, "step": 24849 }, { "epoch": 0.5270301796356387, "grad_norm": 0.37747254967689514, "learning_rate": 1.6782869161767704e-05, "loss": 0.5647, "step": 24850 }, { "epoch": 0.5270513880935718, "grad_norm": 0.3394860327243805, "learning_rate": 1.678262410658028e-05, "loss": 0.4516, "step": 24851 }, { "epoch": 0.5270725965515047, "grad_norm": 0.30908092856407166, "learning_rate": 1.6782379043849272e-05, "loss": 0.5255, "step": 24852 }, { "epoch": 0.5270938050094378, "grad_norm": 0.41254574060440063, "learning_rate": 1.6782133973574964e-05, "loss": 0.5026, "step": 24853 }, { "epoch": 0.5271150134673708, "grad_norm": 0.3606894612312317, "learning_rate": 1.6781888895757626e-05, "loss": 0.4832, "step": 24854 }, { "epoch": 0.5271362219253038, "grad_norm": 0.32050618529319763, "learning_rate": 1.678164381039753e-05, "loss": 0.4413, "step": 24855 }, { "epoch": 0.5271574303832368, "grad_norm": 0.3926742672920227, "learning_rate": 1.6781398717494948e-05, "loss": 0.4839, "step": 24856 }, { "epoch": 0.5271786388411699, "grad_norm": 0.33877742290496826, "learning_rate": 1.6781153617050156e-05, "loss": 0.5242, "step": 24857 }, { "epoch": 0.5271998472991029, "grad_norm": 0.4004586935043335, "learning_rate": 1.678090850906342e-05, "loss": 0.5067, "step": 24858 }, { "epoch": 0.5272210557570359, "grad_norm": 0.3716793954372406, "learning_rate": 1.6780663393535016e-05, "loss": 0.4816, "step": 24859 }, { "epoch": 0.5272422642149689, "grad_norm": 0.3272491991519928, "learning_rate": 1.678041827046522e-05, "loss": 0.463, "step": 24860 }, { "epoch": 0.527263472672902, "grad_norm": 0.3508225083351135, "learning_rate": 1.67801731398543e-05, "loss": 0.5255, "step": 24861 }, { "epoch": 0.5272846811308349, "grad_norm": 0.41837069392204285, "learning_rate": 1.677992800170253e-05, "loss": 0.5755, "step": 24862 }, { "epoch": 0.527305889588768, "grad_norm": 0.33145076036453247, "learning_rate": 1.6779682856010184e-05, "loss": 0.4983, "step": 24863 }, { "epoch": 0.527327098046701, "grad_norm": 0.42156916856765747, "learning_rate": 1.6779437702777536e-05, "loss": 0.4696, "step": 24864 }, { "epoch": 0.527348306504634, "grad_norm": 0.3944879472255707, "learning_rate": 1.6779192542004855e-05, "loss": 0.5672, "step": 24865 }, { "epoch": 0.5273695149625671, "grad_norm": 0.34662193059921265, "learning_rate": 1.6778947373692415e-05, "loss": 0.5226, "step": 24866 }, { "epoch": 0.5273907234205001, "grad_norm": 0.42810264229774475, "learning_rate": 1.6778702197840484e-05, "loss": 0.5005, "step": 24867 }, { "epoch": 0.5274119318784332, "grad_norm": 0.3430715501308441, "learning_rate": 1.6778457014449343e-05, "loss": 0.5037, "step": 24868 }, { "epoch": 0.5274331403363661, "grad_norm": 0.3461070656776428, "learning_rate": 1.677821182351926e-05, "loss": 0.4114, "step": 24869 }, { "epoch": 0.5274543487942992, "grad_norm": 0.3573819398880005, "learning_rate": 1.6777966625050512e-05, "loss": 0.5343, "step": 24870 }, { "epoch": 0.5274755572522322, "grad_norm": 0.7220950722694397, "learning_rate": 1.6777721419043365e-05, "loss": 0.555, "step": 24871 }, { "epoch": 0.5274967657101652, "grad_norm": 0.4205801784992218, "learning_rate": 1.67774762054981e-05, "loss": 0.606, "step": 24872 }, { "epoch": 0.5275179741680982, "grad_norm": 0.3464650809764862, "learning_rate": 1.6777230984414977e-05, "loss": 0.5118, "step": 24873 }, { "epoch": 0.5275391826260313, "grad_norm": 0.401591032743454, "learning_rate": 1.6776985755794283e-05, "loss": 0.4795, "step": 24874 }, { "epoch": 0.5275603910839642, "grad_norm": 0.34698018431663513, "learning_rate": 1.677674051963628e-05, "loss": 0.5366, "step": 24875 }, { "epoch": 0.5275815995418973, "grad_norm": 0.3495291769504547, "learning_rate": 1.6776495275941248e-05, "loss": 0.5636, "step": 24876 }, { "epoch": 0.5276028079998303, "grad_norm": 0.3927842080593109, "learning_rate": 1.6776250024709453e-05, "loss": 0.4621, "step": 24877 }, { "epoch": 0.5276240164577634, "grad_norm": 0.3475950062274933, "learning_rate": 1.6776004765941175e-05, "loss": 0.5002, "step": 24878 }, { "epoch": 0.5276452249156964, "grad_norm": 0.35301652550697327, "learning_rate": 1.6775759499636683e-05, "loss": 0.5329, "step": 24879 }, { "epoch": 0.5276664333736294, "grad_norm": 0.3695700168609619, "learning_rate": 1.677551422579625e-05, "loss": 0.5153, "step": 24880 }, { "epoch": 0.5276876418315625, "grad_norm": 0.34679871797561646, "learning_rate": 1.6775268944420148e-05, "loss": 0.4684, "step": 24881 }, { "epoch": 0.5277088502894954, "grad_norm": 0.3446223735809326, "learning_rate": 1.6775023655508655e-05, "loss": 0.5051, "step": 24882 }, { "epoch": 0.5277300587474285, "grad_norm": 0.3747636079788208, "learning_rate": 1.677477835906203e-05, "loss": 0.5343, "step": 24883 }, { "epoch": 0.5277512672053615, "grad_norm": 0.3230270445346832, "learning_rate": 1.6774533055080564e-05, "loss": 0.5041, "step": 24884 }, { "epoch": 0.5277724756632945, "grad_norm": 0.38835641741752625, "learning_rate": 1.677428774356452e-05, "loss": 0.564, "step": 24885 }, { "epoch": 0.5277936841212275, "grad_norm": 0.3662276864051819, "learning_rate": 1.677404242451417e-05, "loss": 0.4935, "step": 24886 }, { "epoch": 0.5278148925791606, "grad_norm": 0.35400819778442383, "learning_rate": 1.677379709792979e-05, "loss": 0.5116, "step": 24887 }, { "epoch": 0.5278361010370936, "grad_norm": 0.40251103043556213, "learning_rate": 1.677355176381165e-05, "loss": 0.5021, "step": 24888 }, { "epoch": 0.5278573094950266, "grad_norm": 0.3223749101161957, "learning_rate": 1.677330642216003e-05, "loss": 0.4049, "step": 24889 }, { "epoch": 0.5278785179529596, "grad_norm": 0.36344510316848755, "learning_rate": 1.677306107297519e-05, "loss": 0.5105, "step": 24890 }, { "epoch": 0.5278997264108927, "grad_norm": 0.3666945695877075, "learning_rate": 1.6772815716257414e-05, "loss": 0.4667, "step": 24891 }, { "epoch": 0.5279209348688257, "grad_norm": 0.32132285833358765, "learning_rate": 1.6772570352006968e-05, "loss": 0.3334, "step": 24892 }, { "epoch": 0.5279421433267587, "grad_norm": 0.41167089343070984, "learning_rate": 1.677232498022413e-05, "loss": 0.6763, "step": 24893 }, { "epoch": 0.5279633517846918, "grad_norm": 0.3720662593841553, "learning_rate": 1.6772079600909172e-05, "loss": 0.5456, "step": 24894 }, { "epoch": 0.5279845602426247, "grad_norm": 0.3639043867588043, "learning_rate": 1.6771834214062367e-05, "loss": 0.53, "step": 24895 }, { "epoch": 0.5280057687005578, "grad_norm": 0.346027135848999, "learning_rate": 1.6771588819683986e-05, "loss": 0.5432, "step": 24896 }, { "epoch": 0.5280269771584908, "grad_norm": 0.37347692251205444, "learning_rate": 1.6771343417774303e-05, "loss": 0.5197, "step": 24897 }, { "epoch": 0.5280481856164239, "grad_norm": 0.35141241550445557, "learning_rate": 1.677109800833359e-05, "loss": 0.5464, "step": 24898 }, { "epoch": 0.5280693940743568, "grad_norm": 0.8537619709968567, "learning_rate": 1.6770852591362118e-05, "loss": 0.457, "step": 24899 }, { "epoch": 0.5280906025322899, "grad_norm": 0.34980037808418274, "learning_rate": 1.6770607166860165e-05, "loss": 0.4611, "step": 24900 }, { "epoch": 0.5281118109902229, "grad_norm": 0.36978641152381897, "learning_rate": 1.6770361734828e-05, "loss": 0.427, "step": 24901 }, { "epoch": 0.5281330194481559, "grad_norm": 0.3400766849517822, "learning_rate": 1.67701162952659e-05, "loss": 0.4414, "step": 24902 }, { "epoch": 0.5281542279060889, "grad_norm": 0.3791864514350891, "learning_rate": 1.6769870848174136e-05, "loss": 0.5133, "step": 24903 }, { "epoch": 0.528175436364022, "grad_norm": 0.34178420901298523, "learning_rate": 1.676962539355298e-05, "loss": 0.4904, "step": 24904 }, { "epoch": 0.5281966448219549, "grad_norm": 0.3332952558994293, "learning_rate": 1.6769379931402702e-05, "loss": 0.5429, "step": 24905 }, { "epoch": 0.528217853279888, "grad_norm": 0.3845137059688568, "learning_rate": 1.6769134461723583e-05, "loss": 0.5198, "step": 24906 }, { "epoch": 0.5282390617378211, "grad_norm": 0.30776482820510864, "learning_rate": 1.6768888984515887e-05, "loss": 0.4503, "step": 24907 }, { "epoch": 0.5282602701957541, "grad_norm": 0.3733970522880554, "learning_rate": 1.6768643499779896e-05, "loss": 0.5289, "step": 24908 }, { "epoch": 0.5282814786536871, "grad_norm": 0.3242715001106262, "learning_rate": 1.6768398007515877e-05, "loss": 0.4476, "step": 24909 }, { "epoch": 0.5283026871116201, "grad_norm": 0.37535709142684937, "learning_rate": 1.6768152507724103e-05, "loss": 0.4738, "step": 24910 }, { "epoch": 0.5283238955695532, "grad_norm": 0.32556629180908203, "learning_rate": 1.676790700040485e-05, "loss": 0.5643, "step": 24911 }, { "epoch": 0.5283451040274861, "grad_norm": 0.3203125, "learning_rate": 1.6767661485558386e-05, "loss": 0.4139, "step": 24912 }, { "epoch": 0.5283663124854192, "grad_norm": 0.3539738655090332, "learning_rate": 1.6767415963184992e-05, "loss": 0.456, "step": 24913 }, { "epoch": 0.5283875209433522, "grad_norm": 0.3930286169052124, "learning_rate": 1.6767170433284938e-05, "loss": 0.4972, "step": 24914 }, { "epoch": 0.5284087294012852, "grad_norm": 0.32125934958457947, "learning_rate": 1.676692489585849e-05, "loss": 0.5453, "step": 24915 }, { "epoch": 0.5284299378592182, "grad_norm": 0.37026190757751465, "learning_rate": 1.6766679350905932e-05, "loss": 0.4891, "step": 24916 }, { "epoch": 0.5284511463171513, "grad_norm": 0.3576694130897522, "learning_rate": 1.676643379842753e-05, "loss": 0.5629, "step": 24917 }, { "epoch": 0.5284723547750843, "grad_norm": 0.3874550461769104, "learning_rate": 1.676618823842356e-05, "loss": 0.5155, "step": 24918 }, { "epoch": 0.5284935632330173, "grad_norm": 0.9013611674308777, "learning_rate": 1.6765942670894292e-05, "loss": 0.3931, "step": 24919 }, { "epoch": 0.5285147716909504, "grad_norm": 0.3555580675601959, "learning_rate": 1.6765697095840002e-05, "loss": 0.5378, "step": 24920 }, { "epoch": 0.5285359801488834, "grad_norm": 0.32815051078796387, "learning_rate": 1.6765451513260966e-05, "loss": 0.4528, "step": 24921 }, { "epoch": 0.5285571886068164, "grad_norm": 0.3996489644050598, "learning_rate": 1.6765205923157452e-05, "loss": 0.5544, "step": 24922 }, { "epoch": 0.5285783970647494, "grad_norm": 0.35854071378707886, "learning_rate": 1.676496032552973e-05, "loss": 0.5673, "step": 24923 }, { "epoch": 0.5285996055226825, "grad_norm": 0.41219836473464966, "learning_rate": 1.6764714720378083e-05, "loss": 0.4357, "step": 24924 }, { "epoch": 0.5286208139806154, "grad_norm": 0.41814157366752625, "learning_rate": 1.6764469107702777e-05, "loss": 0.5463, "step": 24925 }, { "epoch": 0.5286420224385485, "grad_norm": 0.3106827735900879, "learning_rate": 1.6764223487504088e-05, "loss": 0.4783, "step": 24926 }, { "epoch": 0.5286632308964815, "grad_norm": 0.3406463861465454, "learning_rate": 1.676397785978229e-05, "loss": 0.5295, "step": 24927 }, { "epoch": 0.5286844393544146, "grad_norm": 0.36367523670196533, "learning_rate": 1.676373222453765e-05, "loss": 0.4156, "step": 24928 }, { "epoch": 0.5287056478123475, "grad_norm": 0.4064890444278717, "learning_rate": 1.676348658177045e-05, "loss": 0.5227, "step": 24929 }, { "epoch": 0.5287268562702806, "grad_norm": 0.37154313921928406, "learning_rate": 1.6763240931480958e-05, "loss": 0.5438, "step": 24930 }, { "epoch": 0.5287480647282136, "grad_norm": 0.4899107813835144, "learning_rate": 1.676299527366945e-05, "loss": 0.446, "step": 24931 }, { "epoch": 0.5287692731861466, "grad_norm": 0.3519585430622101, "learning_rate": 1.676274960833619e-05, "loss": 0.5209, "step": 24932 }, { "epoch": 0.5287904816440797, "grad_norm": 0.3432108163833618, "learning_rate": 1.676250393548147e-05, "loss": 0.4607, "step": 24933 }, { "epoch": 0.5288116901020127, "grad_norm": 0.2998257577419281, "learning_rate": 1.6762258255105543e-05, "loss": 0.4028, "step": 24934 }, { "epoch": 0.5288328985599458, "grad_norm": 0.3503738343715668, "learning_rate": 1.6762012567208694e-05, "loss": 0.5239, "step": 24935 }, { "epoch": 0.5288541070178787, "grad_norm": 0.3618089258670807, "learning_rate": 1.6761766871791195e-05, "loss": 0.5571, "step": 24936 }, { "epoch": 0.5288753154758118, "grad_norm": 0.36366909742355347, "learning_rate": 1.6761521168853314e-05, "loss": 0.6019, "step": 24937 }, { "epoch": 0.5288965239337448, "grad_norm": 0.37508219480514526, "learning_rate": 1.676127545839533e-05, "loss": 0.4457, "step": 24938 }, { "epoch": 0.5289177323916778, "grad_norm": 0.3600708544254303, "learning_rate": 1.676102974041751e-05, "loss": 0.5881, "step": 24939 }, { "epoch": 0.5289389408496108, "grad_norm": 0.3298630118370056, "learning_rate": 1.676078401492014e-05, "loss": 0.4066, "step": 24940 }, { "epoch": 0.5289601493075439, "grad_norm": 0.35968369245529175, "learning_rate": 1.676053828190348e-05, "loss": 0.5151, "step": 24941 }, { "epoch": 0.5289813577654768, "grad_norm": 0.40490585565567017, "learning_rate": 1.6760292541367807e-05, "loss": 0.4141, "step": 24942 }, { "epoch": 0.5290025662234099, "grad_norm": 0.39259615540504456, "learning_rate": 1.6760046793313397e-05, "loss": 0.3791, "step": 24943 }, { "epoch": 0.5290237746813429, "grad_norm": 0.3140206038951874, "learning_rate": 1.675980103774052e-05, "loss": 0.5007, "step": 24944 }, { "epoch": 0.529044983139276, "grad_norm": 0.3546760082244873, "learning_rate": 1.6759555274649452e-05, "loss": 0.454, "step": 24945 }, { "epoch": 0.5290661915972089, "grad_norm": 0.3538499176502228, "learning_rate": 1.6759309504040466e-05, "loss": 0.5155, "step": 24946 }, { "epoch": 0.529087400055142, "grad_norm": 0.3490936756134033, "learning_rate": 1.6759063725913835e-05, "loss": 0.5609, "step": 24947 }, { "epoch": 0.5291086085130751, "grad_norm": 0.333791047334671, "learning_rate": 1.675881794026983e-05, "loss": 0.4472, "step": 24948 }, { "epoch": 0.529129816971008, "grad_norm": 0.306960791349411, "learning_rate": 1.675857214710873e-05, "loss": 0.5015, "step": 24949 }, { "epoch": 0.5291510254289411, "grad_norm": 0.3308663070201874, "learning_rate": 1.6758326346430803e-05, "loss": 0.4134, "step": 24950 }, { "epoch": 0.5291722338868741, "grad_norm": 0.3524719476699829, "learning_rate": 1.675808053823632e-05, "loss": 0.4465, "step": 24951 }, { "epoch": 0.5291934423448071, "grad_norm": 0.41594111919403076, "learning_rate": 1.6757834722525562e-05, "loss": 0.5586, "step": 24952 }, { "epoch": 0.5292146508027401, "grad_norm": 0.4212774634361267, "learning_rate": 1.67575888992988e-05, "loss": 0.505, "step": 24953 }, { "epoch": 0.5292358592606732, "grad_norm": 0.32967686653137207, "learning_rate": 1.6757343068556306e-05, "loss": 0.4986, "step": 24954 }, { "epoch": 0.5292570677186061, "grad_norm": 0.36957940459251404, "learning_rate": 1.6757097230298352e-05, "loss": 0.5101, "step": 24955 }, { "epoch": 0.5292782761765392, "grad_norm": 0.3965345621109009, "learning_rate": 1.6756851384525214e-05, "loss": 0.6084, "step": 24956 }, { "epoch": 0.5292994846344722, "grad_norm": 0.420987993478775, "learning_rate": 1.6756605531237165e-05, "loss": 0.6488, "step": 24957 }, { "epoch": 0.5293206930924053, "grad_norm": 0.321855753660202, "learning_rate": 1.6756359670434478e-05, "loss": 0.4185, "step": 24958 }, { "epoch": 0.5293419015503382, "grad_norm": 0.3405437171459198, "learning_rate": 1.6756113802117425e-05, "loss": 0.4133, "step": 24959 }, { "epoch": 0.5293631100082713, "grad_norm": 0.3445031940937042, "learning_rate": 1.6755867926286286e-05, "loss": 0.4824, "step": 24960 }, { "epoch": 0.5293843184662044, "grad_norm": 0.341829389333725, "learning_rate": 1.6755622042941324e-05, "loss": 0.4654, "step": 24961 }, { "epoch": 0.5294055269241373, "grad_norm": 0.32621851563453674, "learning_rate": 1.675537615208282e-05, "loss": 0.5127, "step": 24962 }, { "epoch": 0.5294267353820704, "grad_norm": 0.3654763102531433, "learning_rate": 1.6755130253711044e-05, "loss": 0.5206, "step": 24963 }, { "epoch": 0.5294479438400034, "grad_norm": 0.3672308623790741, "learning_rate": 1.675488434782627e-05, "loss": 0.4424, "step": 24964 }, { "epoch": 0.5294691522979365, "grad_norm": 0.39575570821762085, "learning_rate": 1.6754638434428776e-05, "loss": 0.5052, "step": 24965 }, { "epoch": 0.5294903607558694, "grad_norm": 0.34592965245246887, "learning_rate": 1.675439251351883e-05, "loss": 0.498, "step": 24966 }, { "epoch": 0.5295115692138025, "grad_norm": 0.3956534266471863, "learning_rate": 1.6754146585096708e-05, "loss": 0.5275, "step": 24967 }, { "epoch": 0.5295327776717355, "grad_norm": 0.3538864552974701, "learning_rate": 1.6753900649162682e-05, "loss": 0.4677, "step": 24968 }, { "epoch": 0.5295539861296685, "grad_norm": 0.3449319899082184, "learning_rate": 1.6753654705717022e-05, "loss": 0.5739, "step": 24969 }, { "epoch": 0.5295751945876015, "grad_norm": 0.8657916188240051, "learning_rate": 1.6753408754760014e-05, "loss": 0.6236, "step": 24970 }, { "epoch": 0.5295964030455346, "grad_norm": 0.34318166971206665, "learning_rate": 1.675316279629192e-05, "loss": 0.46, "step": 24971 }, { "epoch": 0.5296176115034675, "grad_norm": 0.3641717731952667, "learning_rate": 1.6752916830313016e-05, "loss": 0.4467, "step": 24972 }, { "epoch": 0.5296388199614006, "grad_norm": 0.35962042212486267, "learning_rate": 1.6752670856823575e-05, "loss": 0.5522, "step": 24973 }, { "epoch": 0.5296600284193337, "grad_norm": 0.3710939586162567, "learning_rate": 1.6752424875823876e-05, "loss": 0.5715, "step": 24974 }, { "epoch": 0.5296812368772666, "grad_norm": 0.41707050800323486, "learning_rate": 1.6752178887314187e-05, "loss": 0.6095, "step": 24975 }, { "epoch": 0.5297024453351997, "grad_norm": 0.3635544180870056, "learning_rate": 1.6751932891294785e-05, "loss": 0.4979, "step": 24976 }, { "epoch": 0.5297236537931327, "grad_norm": 0.3955596089363098, "learning_rate": 1.6751686887765938e-05, "loss": 0.4507, "step": 24977 }, { "epoch": 0.5297448622510658, "grad_norm": 0.3353755474090576, "learning_rate": 1.6751440876727926e-05, "loss": 0.4155, "step": 24978 }, { "epoch": 0.5297660707089987, "grad_norm": 0.35731977224349976, "learning_rate": 1.675119485818102e-05, "loss": 0.4494, "step": 24979 }, { "epoch": 0.5297872791669318, "grad_norm": 0.5556153655052185, "learning_rate": 1.6750948832125493e-05, "loss": 0.498, "step": 24980 }, { "epoch": 0.5298084876248648, "grad_norm": 0.32756632566452026, "learning_rate": 1.6750702798561623e-05, "loss": 0.4999, "step": 24981 }, { "epoch": 0.5298296960827978, "grad_norm": 0.36030614376068115, "learning_rate": 1.6750456757489674e-05, "loss": 0.516, "step": 24982 }, { "epoch": 0.5298509045407308, "grad_norm": 0.3418475091457367, "learning_rate": 1.675021070890993e-05, "loss": 0.5313, "step": 24983 }, { "epoch": 0.5298721129986639, "grad_norm": 0.39880749583244324, "learning_rate": 1.674996465282266e-05, "loss": 0.5171, "step": 24984 }, { "epoch": 0.5298933214565968, "grad_norm": 0.40217122435569763, "learning_rate": 1.6749718589228135e-05, "loss": 0.5679, "step": 24985 }, { "epoch": 0.5299145299145299, "grad_norm": 0.3846181333065033, "learning_rate": 1.674947251812663e-05, "loss": 0.4754, "step": 24986 }, { "epoch": 0.5299357383724629, "grad_norm": 0.32870370149612427, "learning_rate": 1.6749226439518427e-05, "loss": 0.5135, "step": 24987 }, { "epoch": 0.529956946830396, "grad_norm": 0.38948681950569153, "learning_rate": 1.674898035340379e-05, "loss": 0.5659, "step": 24988 }, { "epoch": 0.529978155288329, "grad_norm": 0.3546160161495209, "learning_rate": 1.6748734259782993e-05, "loss": 0.5114, "step": 24989 }, { "epoch": 0.529999363746262, "grad_norm": 0.3214131295681, "learning_rate": 1.6748488158656316e-05, "loss": 0.3939, "step": 24990 }, { "epoch": 0.5300205722041951, "grad_norm": 0.3494708240032196, "learning_rate": 1.674824205002403e-05, "loss": 0.5233, "step": 24991 }, { "epoch": 0.530041780662128, "grad_norm": 0.37109309434890747, "learning_rate": 1.6747995933886404e-05, "loss": 0.5269, "step": 24992 }, { "epoch": 0.5300629891200611, "grad_norm": 0.322544127702713, "learning_rate": 1.6747749810243717e-05, "loss": 0.5306, "step": 24993 }, { "epoch": 0.5300841975779941, "grad_norm": 0.33778083324432373, "learning_rate": 1.674750367909624e-05, "loss": 0.5156, "step": 24994 }, { "epoch": 0.5301054060359272, "grad_norm": 0.3520197868347168, "learning_rate": 1.6747257540444253e-05, "loss": 0.4248, "step": 24995 }, { "epoch": 0.5301266144938601, "grad_norm": 0.32989010214805603, "learning_rate": 1.6747011394288017e-05, "loss": 0.5169, "step": 24996 }, { "epoch": 0.5301478229517932, "grad_norm": 0.3522103726863861, "learning_rate": 1.674676524062782e-05, "loss": 0.4475, "step": 24997 }, { "epoch": 0.5301690314097262, "grad_norm": 0.43718865513801575, "learning_rate": 1.6746519079463927e-05, "loss": 0.5578, "step": 24998 }, { "epoch": 0.5301902398676592, "grad_norm": 0.3582250475883484, "learning_rate": 1.6746272910796614e-05, "loss": 0.5204, "step": 24999 }, { "epoch": 0.5302114483255922, "grad_norm": 0.40992632508277893, "learning_rate": 1.674602673462616e-05, "loss": 0.3627, "step": 25000 }, { "epoch": 0.5302326567835253, "grad_norm": 0.41222575306892395, "learning_rate": 1.6745780550952825e-05, "loss": 0.5002, "step": 25001 }, { "epoch": 0.5302538652414583, "grad_norm": 0.3573327362537384, "learning_rate": 1.6745534359776896e-05, "loss": 0.4759, "step": 25002 }, { "epoch": 0.5302750736993913, "grad_norm": 0.35294318199157715, "learning_rate": 1.6745288161098637e-05, "loss": 0.5502, "step": 25003 }, { "epoch": 0.5302962821573244, "grad_norm": 0.35470104217529297, "learning_rate": 1.6745041954918336e-05, "loss": 0.4908, "step": 25004 }, { "epoch": 0.5303174906152573, "grad_norm": 0.3364293873310089, "learning_rate": 1.6744795741236254e-05, "loss": 0.4366, "step": 25005 }, { "epoch": 0.5303386990731904, "grad_norm": 0.35359853506088257, "learning_rate": 1.6744549520052666e-05, "loss": 0.5074, "step": 25006 }, { "epoch": 0.5303599075311234, "grad_norm": 0.35522520542144775, "learning_rate": 1.674430329136785e-05, "loss": 0.5508, "step": 25007 }, { "epoch": 0.5303811159890565, "grad_norm": 0.3695806860923767, "learning_rate": 1.6744057055182083e-05, "loss": 0.5227, "step": 25008 }, { "epoch": 0.5304023244469894, "grad_norm": 0.37323155999183655, "learning_rate": 1.674381081149563e-05, "loss": 0.5015, "step": 25009 }, { "epoch": 0.5304235329049225, "grad_norm": 0.34415337443351746, "learning_rate": 1.6743564560308766e-05, "loss": 0.4281, "step": 25010 }, { "epoch": 0.5304447413628555, "grad_norm": 0.334490567445755, "learning_rate": 1.6743318301621777e-05, "loss": 0.4728, "step": 25011 }, { "epoch": 0.5304659498207885, "grad_norm": 0.33257851004600525, "learning_rate": 1.674307203543492e-05, "loss": 0.4422, "step": 25012 }, { "epoch": 0.5304871582787215, "grad_norm": 0.35619646310806274, "learning_rate": 1.6742825761748483e-05, "loss": 0.5345, "step": 25013 }, { "epoch": 0.5305083667366546, "grad_norm": 0.3918742537498474, "learning_rate": 1.674257948056273e-05, "loss": 0.5328, "step": 25014 }, { "epoch": 0.5305295751945877, "grad_norm": 0.35912543535232544, "learning_rate": 1.6742333191877937e-05, "loss": 0.5381, "step": 25015 }, { "epoch": 0.5305507836525206, "grad_norm": 0.36176812648773193, "learning_rate": 1.6742086895694382e-05, "loss": 0.5781, "step": 25016 }, { "epoch": 0.5305719921104537, "grad_norm": 0.33758899569511414, "learning_rate": 1.6741840592012336e-05, "loss": 0.5245, "step": 25017 }, { "epoch": 0.5305932005683867, "grad_norm": 0.3273986577987671, "learning_rate": 1.6741594280832076e-05, "loss": 0.4829, "step": 25018 }, { "epoch": 0.5306144090263197, "grad_norm": 0.32857653498649597, "learning_rate": 1.674134796215387e-05, "loss": 0.381, "step": 25019 }, { "epoch": 0.5306356174842527, "grad_norm": 0.39975765347480774, "learning_rate": 1.6741101635977996e-05, "loss": 0.5722, "step": 25020 }, { "epoch": 0.5306568259421858, "grad_norm": 0.33139553666114807, "learning_rate": 1.674085530230473e-05, "loss": 0.5324, "step": 25021 }, { "epoch": 0.5306780344001187, "grad_norm": 0.3583250939846039, "learning_rate": 1.6740608961134342e-05, "loss": 0.5298, "step": 25022 }, { "epoch": 0.5306992428580518, "grad_norm": 0.4367169439792633, "learning_rate": 1.6740362612467104e-05, "loss": 0.5365, "step": 25023 }, { "epoch": 0.5307204513159848, "grad_norm": 0.3651023805141449, "learning_rate": 1.6740116256303296e-05, "loss": 0.5432, "step": 25024 }, { "epoch": 0.5307416597739179, "grad_norm": 0.3712666630744934, "learning_rate": 1.673986989264319e-05, "loss": 0.5696, "step": 25025 }, { "epoch": 0.5307628682318508, "grad_norm": 0.31522488594055176, "learning_rate": 1.673962352148706e-05, "loss": 0.4422, "step": 25026 }, { "epoch": 0.5307840766897839, "grad_norm": 0.3651939332485199, "learning_rate": 1.6739377142835178e-05, "loss": 0.5542, "step": 25027 }, { "epoch": 0.5308052851477169, "grad_norm": 0.3872290551662445, "learning_rate": 1.6739130756687816e-05, "loss": 0.6219, "step": 25028 }, { "epoch": 0.5308264936056499, "grad_norm": 0.37052738666534424, "learning_rate": 1.6738884363045253e-05, "loss": 0.5051, "step": 25029 }, { "epoch": 0.530847702063583, "grad_norm": 0.3215128481388092, "learning_rate": 1.6738637961907763e-05, "loss": 0.5168, "step": 25030 }, { "epoch": 0.530868910521516, "grad_norm": 0.35689085721969604, "learning_rate": 1.673839155327562e-05, "loss": 0.5975, "step": 25031 }, { "epoch": 0.530890118979449, "grad_norm": 0.41313108801841736, "learning_rate": 1.6738145137149096e-05, "loss": 0.5, "step": 25032 }, { "epoch": 0.530911327437382, "grad_norm": 0.34325021505355835, "learning_rate": 1.673789871352846e-05, "loss": 0.4897, "step": 25033 }, { "epoch": 0.5309325358953151, "grad_norm": 0.3465160131454468, "learning_rate": 1.6737652282413998e-05, "loss": 0.5071, "step": 25034 }, { "epoch": 0.530953744353248, "grad_norm": 0.3391932547092438, "learning_rate": 1.6737405843805972e-05, "loss": 0.4916, "step": 25035 }, { "epoch": 0.5309749528111811, "grad_norm": 0.37925878167152405, "learning_rate": 1.6737159397704667e-05, "loss": 0.5112, "step": 25036 }, { "epoch": 0.5309961612691141, "grad_norm": 0.3716791570186615, "learning_rate": 1.673691294411035e-05, "loss": 0.4042, "step": 25037 }, { "epoch": 0.5310173697270472, "grad_norm": 0.3737754225730896, "learning_rate": 1.6736666483023295e-05, "loss": 0.6002, "step": 25038 }, { "epoch": 0.5310385781849801, "grad_norm": 0.3998448848724365, "learning_rate": 1.673642001444378e-05, "loss": 0.5802, "step": 25039 }, { "epoch": 0.5310597866429132, "grad_norm": 0.35302090644836426, "learning_rate": 1.6736173538372078e-05, "loss": 0.4613, "step": 25040 }, { "epoch": 0.5310809951008462, "grad_norm": 0.3525281548500061, "learning_rate": 1.673592705480846e-05, "loss": 0.4624, "step": 25041 }, { "epoch": 0.5311022035587792, "grad_norm": 0.31125393509864807, "learning_rate": 1.6735680563753205e-05, "loss": 0.4033, "step": 25042 }, { "epoch": 0.5311234120167123, "grad_norm": 0.35133644938468933, "learning_rate": 1.673543406520658e-05, "loss": 0.4153, "step": 25043 }, { "epoch": 0.5311446204746453, "grad_norm": 0.37210309505462646, "learning_rate": 1.673518755916887e-05, "loss": 0.5201, "step": 25044 }, { "epoch": 0.5311658289325784, "grad_norm": 0.3197748064994812, "learning_rate": 1.6734941045640338e-05, "loss": 0.4786, "step": 25045 }, { "epoch": 0.5311870373905113, "grad_norm": 0.32925155758857727, "learning_rate": 1.6734694524621268e-05, "loss": 0.482, "step": 25046 }, { "epoch": 0.5312082458484444, "grad_norm": 0.392803430557251, "learning_rate": 1.6734447996111924e-05, "loss": 0.5085, "step": 25047 }, { "epoch": 0.5312294543063774, "grad_norm": 0.4229685366153717, "learning_rate": 1.673420146011259e-05, "loss": 0.579, "step": 25048 }, { "epoch": 0.5312506627643104, "grad_norm": 0.3798835277557373, "learning_rate": 1.673395491662353e-05, "loss": 0.3676, "step": 25049 }, { "epoch": 0.5312718712222434, "grad_norm": 0.4624849855899811, "learning_rate": 1.673370836564503e-05, "loss": 0.6515, "step": 25050 }, { "epoch": 0.5312930796801765, "grad_norm": 0.35256800055503845, "learning_rate": 1.6733461807177355e-05, "loss": 0.4427, "step": 25051 }, { "epoch": 0.5313142881381094, "grad_norm": 0.3811183273792267, "learning_rate": 1.6733215241220782e-05, "loss": 0.4669, "step": 25052 }, { "epoch": 0.5313354965960425, "grad_norm": 0.3317178785800934, "learning_rate": 1.6732968667775585e-05, "loss": 0.4443, "step": 25053 }, { "epoch": 0.5313567050539755, "grad_norm": 0.35255834460258484, "learning_rate": 1.673272208684204e-05, "loss": 0.4726, "step": 25054 }, { "epoch": 0.5313779135119086, "grad_norm": 0.35507094860076904, "learning_rate": 1.6732475498420418e-05, "loss": 0.5083, "step": 25055 }, { "epoch": 0.5313991219698416, "grad_norm": 0.32051709294319153, "learning_rate": 1.6732228902511e-05, "loss": 0.4248, "step": 25056 }, { "epoch": 0.5314203304277746, "grad_norm": 0.3306621015071869, "learning_rate": 1.673198229911405e-05, "loss": 0.4788, "step": 25057 }, { "epoch": 0.5314415388857077, "grad_norm": 0.3514266908168793, "learning_rate": 1.6731735688229852e-05, "loss": 0.4919, "step": 25058 }, { "epoch": 0.5314627473436406, "grad_norm": 0.3514968156814575, "learning_rate": 1.6731489069858674e-05, "loss": 0.4487, "step": 25059 }, { "epoch": 0.5314839558015737, "grad_norm": 0.3219815790653229, "learning_rate": 1.673124244400079e-05, "loss": 0.4547, "step": 25060 }, { "epoch": 0.5315051642595067, "grad_norm": 0.34042948484420776, "learning_rate": 1.673099581065648e-05, "loss": 0.4887, "step": 25061 }, { "epoch": 0.5315263727174397, "grad_norm": 0.3393819332122803, "learning_rate": 1.6730749169826016e-05, "loss": 0.4935, "step": 25062 }, { "epoch": 0.5315475811753727, "grad_norm": 0.33806005120277405, "learning_rate": 1.673050252150967e-05, "loss": 0.4845, "step": 25063 }, { "epoch": 0.5315687896333058, "grad_norm": 0.3590039014816284, "learning_rate": 1.6730255865707715e-05, "loss": 0.4659, "step": 25064 }, { "epoch": 0.5315899980912387, "grad_norm": 0.33326685428619385, "learning_rate": 1.6730009202420432e-05, "loss": 0.4732, "step": 25065 }, { "epoch": 0.5316112065491718, "grad_norm": 0.35379311442375183, "learning_rate": 1.6729762531648086e-05, "loss": 0.5348, "step": 25066 }, { "epoch": 0.5316324150071048, "grad_norm": 0.32714977860450745, "learning_rate": 1.672951585339096e-05, "loss": 0.4262, "step": 25067 }, { "epoch": 0.5316536234650379, "grad_norm": 0.3700876832008362, "learning_rate": 1.6729269167649326e-05, "loss": 0.4384, "step": 25068 }, { "epoch": 0.5316748319229708, "grad_norm": 0.36465081572532654, "learning_rate": 1.6729022474423454e-05, "loss": 0.6204, "step": 25069 }, { "epoch": 0.5316960403809039, "grad_norm": 0.338932603597641, "learning_rate": 1.6728775773713626e-05, "loss": 0.4852, "step": 25070 }, { "epoch": 0.531717248838837, "grad_norm": 0.44272947311401367, "learning_rate": 1.672852906552011e-05, "loss": 0.5275, "step": 25071 }, { "epoch": 0.5317384572967699, "grad_norm": 0.3075029253959656, "learning_rate": 1.6728282349843182e-05, "loss": 0.5151, "step": 25072 }, { "epoch": 0.531759665754703, "grad_norm": 0.3837626278400421, "learning_rate": 1.6728035626683115e-05, "loss": 0.5024, "step": 25073 }, { "epoch": 0.531780874212636, "grad_norm": 0.40729469060897827, "learning_rate": 1.6727788896040186e-05, "loss": 0.4258, "step": 25074 }, { "epoch": 0.531802082670569, "grad_norm": 0.36154329776763916, "learning_rate": 1.672754215791467e-05, "loss": 0.4825, "step": 25075 }, { "epoch": 0.531823291128502, "grad_norm": 0.40284043550491333, "learning_rate": 1.6727295412306837e-05, "loss": 0.5559, "step": 25076 }, { "epoch": 0.5318444995864351, "grad_norm": 0.34434974193573, "learning_rate": 1.672704865921697e-05, "loss": 0.4893, "step": 25077 }, { "epoch": 0.5318657080443681, "grad_norm": 0.3644145131111145, "learning_rate": 1.6726801898645338e-05, "loss": 0.4938, "step": 25078 }, { "epoch": 0.5318869165023011, "grad_norm": 0.4065638482570648, "learning_rate": 1.672655513059221e-05, "loss": 0.4514, "step": 25079 }, { "epoch": 0.5319081249602341, "grad_norm": 0.36714842915534973, "learning_rate": 1.6726308355057866e-05, "loss": 0.5167, "step": 25080 }, { "epoch": 0.5319293334181672, "grad_norm": 0.6823353171348572, "learning_rate": 1.6726061572042585e-05, "loss": 0.5367, "step": 25081 }, { "epoch": 0.5319505418761001, "grad_norm": 0.3114808201789856, "learning_rate": 1.6725814781546635e-05, "loss": 0.4587, "step": 25082 }, { "epoch": 0.5319717503340332, "grad_norm": 0.35023558139801025, "learning_rate": 1.6725567983570292e-05, "loss": 0.5054, "step": 25083 }, { "epoch": 0.5319929587919663, "grad_norm": 0.34407737851142883, "learning_rate": 1.672532117811383e-05, "loss": 0.5054, "step": 25084 }, { "epoch": 0.5320141672498993, "grad_norm": 0.3626153767108917, "learning_rate": 1.6725074365177522e-05, "loss": 0.5191, "step": 25085 }, { "epoch": 0.5320353757078323, "grad_norm": 0.38274067640304565, "learning_rate": 1.6724827544761647e-05, "loss": 0.5329, "step": 25086 }, { "epoch": 0.5320565841657653, "grad_norm": 0.3301200568675995, "learning_rate": 1.6724580716866478e-05, "loss": 0.4591, "step": 25087 }, { "epoch": 0.5320777926236984, "grad_norm": 0.35882121324539185, "learning_rate": 1.672433388149229e-05, "loss": 0.5374, "step": 25088 }, { "epoch": 0.5320990010816313, "grad_norm": 0.37092190980911255, "learning_rate": 1.6724087038639355e-05, "loss": 0.5127, "step": 25089 }, { "epoch": 0.5321202095395644, "grad_norm": 0.3364301919937134, "learning_rate": 1.6723840188307947e-05, "loss": 0.5243, "step": 25090 }, { "epoch": 0.5321414179974974, "grad_norm": 0.3580099940299988, "learning_rate": 1.6723593330498345e-05, "loss": 0.522, "step": 25091 }, { "epoch": 0.5321626264554304, "grad_norm": 0.3773246109485626, "learning_rate": 1.6723346465210815e-05, "loss": 0.4594, "step": 25092 }, { "epoch": 0.5321838349133634, "grad_norm": 0.35000917315483093, "learning_rate": 1.6723099592445645e-05, "loss": 0.5647, "step": 25093 }, { "epoch": 0.5322050433712965, "grad_norm": 0.3525944650173187, "learning_rate": 1.67228527122031e-05, "loss": 0.4987, "step": 25094 }, { "epoch": 0.5322262518292294, "grad_norm": 0.32494285702705383, "learning_rate": 1.6722605824483455e-05, "loss": 0.4471, "step": 25095 }, { "epoch": 0.5322474602871625, "grad_norm": 0.3430114984512329, "learning_rate": 1.6722358929286985e-05, "loss": 0.5221, "step": 25096 }, { "epoch": 0.5322686687450956, "grad_norm": 0.34948745369911194, "learning_rate": 1.672211202661397e-05, "loss": 0.4799, "step": 25097 }, { "epoch": 0.5322898772030286, "grad_norm": 0.43880850076675415, "learning_rate": 1.6721865116464676e-05, "loss": 0.4437, "step": 25098 }, { "epoch": 0.5323110856609616, "grad_norm": 0.3374752700328827, "learning_rate": 1.6721618198839384e-05, "loss": 0.5462, "step": 25099 }, { "epoch": 0.5323322941188946, "grad_norm": 0.4059065282344818, "learning_rate": 1.6721371273738368e-05, "loss": 0.5571, "step": 25100 }, { "epoch": 0.5323535025768277, "grad_norm": 0.3643791973590851, "learning_rate": 1.67211243411619e-05, "loss": 0.5042, "step": 25101 }, { "epoch": 0.5323747110347606, "grad_norm": 0.32539188861846924, "learning_rate": 1.6720877401110255e-05, "loss": 0.4803, "step": 25102 }, { "epoch": 0.5323959194926937, "grad_norm": 0.3272566795349121, "learning_rate": 1.672063045358371e-05, "loss": 0.5206, "step": 25103 }, { "epoch": 0.5324171279506267, "grad_norm": 0.35613343119621277, "learning_rate": 1.6720383498582536e-05, "loss": 0.4918, "step": 25104 }, { "epoch": 0.5324383364085598, "grad_norm": 0.3643748462200165, "learning_rate": 1.672013653610701e-05, "loss": 0.4636, "step": 25105 }, { "epoch": 0.5324595448664927, "grad_norm": 0.3987645208835602, "learning_rate": 1.671988956615741e-05, "loss": 0.5219, "step": 25106 }, { "epoch": 0.5324807533244258, "grad_norm": 0.3674446940422058, "learning_rate": 1.6719642588734007e-05, "loss": 0.5472, "step": 25107 }, { "epoch": 0.5325019617823588, "grad_norm": 0.3475748598575592, "learning_rate": 1.6719395603837073e-05, "loss": 0.4541, "step": 25108 }, { "epoch": 0.5325231702402918, "grad_norm": 0.3668786287307739, "learning_rate": 1.6719148611466886e-05, "loss": 0.4561, "step": 25109 }, { "epoch": 0.5325443786982249, "grad_norm": 0.33887097239494324, "learning_rate": 1.671890161162372e-05, "loss": 0.3935, "step": 25110 }, { "epoch": 0.5325655871561579, "grad_norm": 0.31018078327178955, "learning_rate": 1.671865460430785e-05, "loss": 0.4471, "step": 25111 }, { "epoch": 0.532586795614091, "grad_norm": 0.40428289771080017, "learning_rate": 1.671840758951955e-05, "loss": 0.4265, "step": 25112 }, { "epoch": 0.5326080040720239, "grad_norm": 0.39523738622665405, "learning_rate": 1.6718160567259102e-05, "loss": 0.5382, "step": 25113 }, { "epoch": 0.532629212529957, "grad_norm": 0.3468462824821472, "learning_rate": 1.671791353752677e-05, "loss": 0.5229, "step": 25114 }, { "epoch": 0.53265042098789, "grad_norm": 0.4004567265510559, "learning_rate": 1.671766650032283e-05, "loss": 0.5749, "step": 25115 }, { "epoch": 0.532671629445823, "grad_norm": 0.3541828989982605, "learning_rate": 1.6717419455647563e-05, "loss": 0.6076, "step": 25116 }, { "epoch": 0.532692837903756, "grad_norm": 0.37343278527259827, "learning_rate": 1.6717172403501237e-05, "loss": 0.5027, "step": 25117 }, { "epoch": 0.5327140463616891, "grad_norm": 0.3148949146270752, "learning_rate": 1.6716925343884132e-05, "loss": 0.5313, "step": 25118 }, { "epoch": 0.532735254819622, "grad_norm": 0.3888109624385834, "learning_rate": 1.6716678276796522e-05, "loss": 0.5825, "step": 25119 }, { "epoch": 0.5327564632775551, "grad_norm": 0.31580960750579834, "learning_rate": 1.671643120223868e-05, "loss": 0.486, "step": 25120 }, { "epoch": 0.5327776717354881, "grad_norm": 0.4000284969806671, "learning_rate": 1.6716184120210883e-05, "loss": 0.507, "step": 25121 }, { "epoch": 0.5327988801934211, "grad_norm": 0.35751715302467346, "learning_rate": 1.6715937030713403e-05, "loss": 0.4453, "step": 25122 }, { "epoch": 0.5328200886513541, "grad_norm": 0.5839021801948547, "learning_rate": 1.671568993374652e-05, "loss": 0.4187, "step": 25123 }, { "epoch": 0.5328412971092872, "grad_norm": 0.35310912132263184, "learning_rate": 1.6715442829310498e-05, "loss": 0.5998, "step": 25124 }, { "epoch": 0.5328625055672203, "grad_norm": 0.35361984372138977, "learning_rate": 1.6715195717405622e-05, "loss": 0.5097, "step": 25125 }, { "epoch": 0.5328837140251532, "grad_norm": 0.36632463335990906, "learning_rate": 1.6714948598032162e-05, "loss": 0.5495, "step": 25126 }, { "epoch": 0.5329049224830863, "grad_norm": 0.39841127395629883, "learning_rate": 1.6714701471190395e-05, "loss": 0.5341, "step": 25127 }, { "epoch": 0.5329261309410193, "grad_norm": 0.3913792073726654, "learning_rate": 1.67144543368806e-05, "loss": 0.5688, "step": 25128 }, { "epoch": 0.5329473393989523, "grad_norm": 0.40326443314552307, "learning_rate": 1.6714207195103044e-05, "loss": 0.5228, "step": 25129 }, { "epoch": 0.5329685478568853, "grad_norm": 0.37385034561157227, "learning_rate": 1.6713960045858e-05, "loss": 0.4633, "step": 25130 }, { "epoch": 0.5329897563148184, "grad_norm": 0.37275850772857666, "learning_rate": 1.6713712889145757e-05, "loss": 0.4719, "step": 25131 }, { "epoch": 0.5330109647727513, "grad_norm": 0.3593407869338989, "learning_rate": 1.6713465724966575e-05, "loss": 0.4567, "step": 25132 }, { "epoch": 0.5330321732306844, "grad_norm": 0.36369356513023376, "learning_rate": 1.6713218553320736e-05, "loss": 0.5065, "step": 25133 }, { "epoch": 0.5330533816886174, "grad_norm": 0.34622347354888916, "learning_rate": 1.6712971374208515e-05, "loss": 0.4467, "step": 25134 }, { "epoch": 0.5330745901465505, "grad_norm": 0.35883378982543945, "learning_rate": 1.6712724187630187e-05, "loss": 0.463, "step": 25135 }, { "epoch": 0.5330957986044834, "grad_norm": 0.3218708336353302, "learning_rate": 1.6712476993586023e-05, "loss": 0.4962, "step": 25136 }, { "epoch": 0.5331170070624165, "grad_norm": 0.3474145233631134, "learning_rate": 1.67122297920763e-05, "loss": 0.5888, "step": 25137 }, { "epoch": 0.5331382155203496, "grad_norm": 0.37260377407073975, "learning_rate": 1.6711982583101295e-05, "loss": 0.5178, "step": 25138 }, { "epoch": 0.5331594239782825, "grad_norm": 0.3718339502811432, "learning_rate": 1.6711735366661278e-05, "loss": 0.5427, "step": 25139 }, { "epoch": 0.5331806324362156, "grad_norm": 0.391647607088089, "learning_rate": 1.671148814275653e-05, "loss": 0.4666, "step": 25140 }, { "epoch": 0.5332018408941486, "grad_norm": 0.3296283185482025, "learning_rate": 1.6711240911387322e-05, "loss": 0.4799, "step": 25141 }, { "epoch": 0.5332230493520816, "grad_norm": 0.3661785423755646, "learning_rate": 1.6710993672553932e-05, "loss": 0.5469, "step": 25142 }, { "epoch": 0.5332442578100146, "grad_norm": 0.36766308546066284, "learning_rate": 1.671074642625663e-05, "loss": 0.4485, "step": 25143 }, { "epoch": 0.5332654662679477, "grad_norm": 0.3034818470478058, "learning_rate": 1.6710499172495694e-05, "loss": 0.4374, "step": 25144 }, { "epoch": 0.5332866747258806, "grad_norm": 0.38957497477531433, "learning_rate": 1.6710251911271402e-05, "loss": 0.6203, "step": 25145 }, { "epoch": 0.5333078831838137, "grad_norm": 0.36548447608947754, "learning_rate": 1.6710004642584027e-05, "loss": 0.5783, "step": 25146 }, { "epoch": 0.5333290916417467, "grad_norm": 0.3352898061275482, "learning_rate": 1.6709757366433842e-05, "loss": 0.4897, "step": 25147 }, { "epoch": 0.5333503000996798, "grad_norm": 0.3350827097892761, "learning_rate": 1.670951008282112e-05, "loss": 0.4896, "step": 25148 }, { "epoch": 0.5333715085576127, "grad_norm": 0.3579352796077728, "learning_rate": 1.6709262791746143e-05, "loss": 0.4888, "step": 25149 }, { "epoch": 0.5333927170155458, "grad_norm": 0.337798535823822, "learning_rate": 1.670901549320918e-05, "loss": 0.5477, "step": 25150 }, { "epoch": 0.5334139254734789, "grad_norm": 0.36908718943595886, "learning_rate": 1.6708768187210508e-05, "loss": 0.5489, "step": 25151 }, { "epoch": 0.5334351339314118, "grad_norm": 0.3421795666217804, "learning_rate": 1.6708520873750405e-05, "loss": 0.5359, "step": 25152 }, { "epoch": 0.5334563423893449, "grad_norm": 0.4212627112865448, "learning_rate": 1.6708273552829145e-05, "loss": 0.5217, "step": 25153 }, { "epoch": 0.5334775508472779, "grad_norm": 0.38899165391921997, "learning_rate": 1.6708026224446995e-05, "loss": 0.5685, "step": 25154 }, { "epoch": 0.533498759305211, "grad_norm": 0.6870083212852478, "learning_rate": 1.670777888860424e-05, "loss": 0.5246, "step": 25155 }, { "epoch": 0.5335199677631439, "grad_norm": 0.39843451976776123, "learning_rate": 1.670753154530115e-05, "loss": 0.528, "step": 25156 }, { "epoch": 0.533541176221077, "grad_norm": 0.33737871050834656, "learning_rate": 1.6707284194538004e-05, "loss": 0.4494, "step": 25157 }, { "epoch": 0.53356238467901, "grad_norm": 0.37518882751464844, "learning_rate": 1.6707036836315073e-05, "loss": 0.5091, "step": 25158 }, { "epoch": 0.533583593136943, "grad_norm": 0.36622583866119385, "learning_rate": 1.6706789470632635e-05, "loss": 0.4586, "step": 25159 }, { "epoch": 0.533604801594876, "grad_norm": 0.35338515043258667, "learning_rate": 1.6706542097490966e-05, "loss": 0.4875, "step": 25160 }, { "epoch": 0.5336260100528091, "grad_norm": 0.3810703456401825, "learning_rate": 1.6706294716890336e-05, "loss": 0.5584, "step": 25161 }, { "epoch": 0.533647218510742, "grad_norm": 0.3633541464805603, "learning_rate": 1.6706047328831022e-05, "loss": 0.518, "step": 25162 }, { "epoch": 0.5336684269686751, "grad_norm": 0.37989842891693115, "learning_rate": 1.67057999333133e-05, "loss": 0.5398, "step": 25163 }, { "epoch": 0.5336896354266081, "grad_norm": 0.379578173160553, "learning_rate": 1.6705552530337452e-05, "loss": 0.5568, "step": 25164 }, { "epoch": 0.5337108438845412, "grad_norm": 0.36107343435287476, "learning_rate": 1.6705305119903742e-05, "loss": 0.5099, "step": 25165 }, { "epoch": 0.5337320523424742, "grad_norm": 0.3485427796840668, "learning_rate": 1.670505770201245e-05, "loss": 0.461, "step": 25166 }, { "epoch": 0.5337532608004072, "grad_norm": 0.3513471186161041, "learning_rate": 1.6704810276663855e-05, "loss": 0.4897, "step": 25167 }, { "epoch": 0.5337744692583403, "grad_norm": 0.3504864573478699, "learning_rate": 1.6704562843858222e-05, "loss": 0.4426, "step": 25168 }, { "epoch": 0.5337956777162732, "grad_norm": 0.32178056240081787, "learning_rate": 1.6704315403595837e-05, "loss": 0.5065, "step": 25169 }, { "epoch": 0.5338168861742063, "grad_norm": 0.3383137285709381, "learning_rate": 1.670406795587697e-05, "loss": 0.4376, "step": 25170 }, { "epoch": 0.5338380946321393, "grad_norm": 0.3821168541908264, "learning_rate": 1.6703820500701896e-05, "loss": 0.5534, "step": 25171 }, { "epoch": 0.5338593030900723, "grad_norm": 0.32378295063972473, "learning_rate": 1.6703573038070892e-05, "loss": 0.384, "step": 25172 }, { "epoch": 0.5338805115480053, "grad_norm": 0.33040112257003784, "learning_rate": 1.670332556798423e-05, "loss": 0.4119, "step": 25173 }, { "epoch": 0.5339017200059384, "grad_norm": 0.40737783908843994, "learning_rate": 1.670307809044219e-05, "loss": 0.5461, "step": 25174 }, { "epoch": 0.5339229284638713, "grad_norm": 0.3256734609603882, "learning_rate": 1.6702830605445043e-05, "loss": 0.5477, "step": 25175 }, { "epoch": 0.5339441369218044, "grad_norm": 0.3756769597530365, "learning_rate": 1.6702583112993065e-05, "loss": 0.5138, "step": 25176 }, { "epoch": 0.5339653453797374, "grad_norm": 0.43879157304763794, "learning_rate": 1.6702335613086537e-05, "loss": 0.5519, "step": 25177 }, { "epoch": 0.5339865538376705, "grad_norm": 0.40166565775871277, "learning_rate": 1.6702088105725725e-05, "loss": 0.4708, "step": 25178 }, { "epoch": 0.5340077622956035, "grad_norm": 0.4246881306171417, "learning_rate": 1.670184059091091e-05, "loss": 0.5306, "step": 25179 }, { "epoch": 0.5340289707535365, "grad_norm": 0.3816951811313629, "learning_rate": 1.6701593068642368e-05, "loss": 0.4708, "step": 25180 }, { "epoch": 0.5340501792114696, "grad_norm": 0.3883533775806427, "learning_rate": 1.6701345538920368e-05, "loss": 0.4686, "step": 25181 }, { "epoch": 0.5340713876694025, "grad_norm": 0.35067760944366455, "learning_rate": 1.6701098001745192e-05, "loss": 0.5119, "step": 25182 }, { "epoch": 0.5340925961273356, "grad_norm": 0.3485373854637146, "learning_rate": 1.6700850457117117e-05, "loss": 0.5205, "step": 25183 }, { "epoch": 0.5341138045852686, "grad_norm": 0.3048170506954193, "learning_rate": 1.6700602905036408e-05, "loss": 0.3824, "step": 25184 }, { "epoch": 0.5341350130432017, "grad_norm": 0.33720019459724426, "learning_rate": 1.670035534550335e-05, "loss": 0.4975, "step": 25185 }, { "epoch": 0.5341562215011346, "grad_norm": 0.36239558458328247, "learning_rate": 1.6700107778518216e-05, "loss": 0.4861, "step": 25186 }, { "epoch": 0.5341774299590677, "grad_norm": 0.3343341648578644, "learning_rate": 1.669986020408128e-05, "loss": 0.4687, "step": 25187 }, { "epoch": 0.5341986384170007, "grad_norm": 0.43761584162712097, "learning_rate": 1.6699612622192813e-05, "loss": 0.5257, "step": 25188 }, { "epoch": 0.5342198468749337, "grad_norm": 0.3392449617385864, "learning_rate": 1.66993650328531e-05, "loss": 0.4573, "step": 25189 }, { "epoch": 0.5342410553328667, "grad_norm": 0.35163551568984985, "learning_rate": 1.669911743606241e-05, "loss": 0.4734, "step": 25190 }, { "epoch": 0.5342622637907998, "grad_norm": 0.3639889657497406, "learning_rate": 1.669886983182102e-05, "loss": 0.4924, "step": 25191 }, { "epoch": 0.5342834722487328, "grad_norm": 0.47193437814712524, "learning_rate": 1.6698622220129206e-05, "loss": 0.5398, "step": 25192 }, { "epoch": 0.5343046807066658, "grad_norm": 0.38807326555252075, "learning_rate": 1.6698374600987242e-05, "loss": 0.488, "step": 25193 }, { "epoch": 0.5343258891645989, "grad_norm": 0.3684656322002411, "learning_rate": 1.6698126974395403e-05, "loss": 0.4875, "step": 25194 }, { "epoch": 0.5343470976225319, "grad_norm": 0.3803641200065613, "learning_rate": 1.6697879340353966e-05, "loss": 0.5023, "step": 25195 }, { "epoch": 0.5343683060804649, "grad_norm": 0.3481042683124542, "learning_rate": 1.6697631698863207e-05, "loss": 0.5136, "step": 25196 }, { "epoch": 0.5343895145383979, "grad_norm": 1.1481099128723145, "learning_rate": 1.6697384049923397e-05, "loss": 0.4638, "step": 25197 }, { "epoch": 0.534410722996331, "grad_norm": 0.38045814633369446, "learning_rate": 1.6697136393534816e-05, "loss": 0.5216, "step": 25198 }, { "epoch": 0.5344319314542639, "grad_norm": 0.34101933240890503, "learning_rate": 1.669688872969774e-05, "loss": 0.4977, "step": 25199 }, { "epoch": 0.534453139912197, "grad_norm": 0.37053224444389343, "learning_rate": 1.669664105841244e-05, "loss": 0.4853, "step": 25200 }, { "epoch": 0.53447434837013, "grad_norm": 0.3050804138183594, "learning_rate": 1.6696393379679194e-05, "loss": 0.5039, "step": 25201 }, { "epoch": 0.534495556828063, "grad_norm": 0.3398895561695099, "learning_rate": 1.669614569349828e-05, "loss": 0.5203, "step": 25202 }, { "epoch": 0.534516765285996, "grad_norm": 0.36888736486434937, "learning_rate": 1.669589799986997e-05, "loss": 0.5153, "step": 25203 }, { "epoch": 0.5345379737439291, "grad_norm": 0.3874187767505646, "learning_rate": 1.669565029879454e-05, "loss": 0.5193, "step": 25204 }, { "epoch": 0.534559182201862, "grad_norm": 0.3495143949985504, "learning_rate": 1.6695402590272265e-05, "loss": 0.5477, "step": 25205 }, { "epoch": 0.5345803906597951, "grad_norm": 0.37147220969200134, "learning_rate": 1.6695154874303424e-05, "loss": 0.5255, "step": 25206 }, { "epoch": 0.5346015991177282, "grad_norm": 0.33514833450317383, "learning_rate": 1.6694907150888285e-05, "loss": 0.5098, "step": 25207 }, { "epoch": 0.5346228075756612, "grad_norm": 0.3403821885585785, "learning_rate": 1.6694659420027134e-05, "loss": 0.522, "step": 25208 }, { "epoch": 0.5346440160335942, "grad_norm": 0.3814702332019806, "learning_rate": 1.6694411681720238e-05, "loss": 0.4982, "step": 25209 }, { "epoch": 0.5346652244915272, "grad_norm": 0.34070342779159546, "learning_rate": 1.669416393596788e-05, "loss": 0.5076, "step": 25210 }, { "epoch": 0.5346864329494603, "grad_norm": 0.575082540512085, "learning_rate": 1.6693916182770323e-05, "loss": 0.5835, "step": 25211 }, { "epoch": 0.5347076414073932, "grad_norm": 0.33850452303886414, "learning_rate": 1.6693668422127856e-05, "loss": 0.5204, "step": 25212 }, { "epoch": 0.5347288498653263, "grad_norm": 0.3314131498336792, "learning_rate": 1.6693420654040746e-05, "loss": 0.5057, "step": 25213 }, { "epoch": 0.5347500583232593, "grad_norm": 0.37265026569366455, "learning_rate": 1.6693172878509272e-05, "loss": 0.5167, "step": 25214 }, { "epoch": 0.5347712667811924, "grad_norm": 0.4133365750312805, "learning_rate": 1.669292509553371e-05, "loss": 0.5065, "step": 25215 }, { "epoch": 0.5347924752391253, "grad_norm": 0.3595001697540283, "learning_rate": 1.6692677305114336e-05, "loss": 0.4382, "step": 25216 }, { "epoch": 0.5348136836970584, "grad_norm": 0.31392496824264526, "learning_rate": 1.6692429507251426e-05, "loss": 0.4522, "step": 25217 }, { "epoch": 0.5348348921549914, "grad_norm": 0.37787118554115295, "learning_rate": 1.669218170194525e-05, "loss": 0.4935, "step": 25218 }, { "epoch": 0.5348561006129244, "grad_norm": 0.3787069618701935, "learning_rate": 1.669193388919609e-05, "loss": 0.5442, "step": 25219 }, { "epoch": 0.5348773090708575, "grad_norm": 0.33606523275375366, "learning_rate": 1.669168606900422e-05, "loss": 0.5024, "step": 25220 }, { "epoch": 0.5348985175287905, "grad_norm": 0.37673747539520264, "learning_rate": 1.6691438241369914e-05, "loss": 0.6035, "step": 25221 }, { "epoch": 0.5349197259867235, "grad_norm": 0.3563058078289032, "learning_rate": 1.6691190406293447e-05, "loss": 0.5162, "step": 25222 }, { "epoch": 0.5349409344446565, "grad_norm": 0.36482447385787964, "learning_rate": 1.66909425637751e-05, "loss": 0.4197, "step": 25223 }, { "epoch": 0.5349621429025896, "grad_norm": 0.40550556778907776, "learning_rate": 1.6690694713815142e-05, "loss": 0.4605, "step": 25224 }, { "epoch": 0.5349833513605226, "grad_norm": 0.36631402373313904, "learning_rate": 1.6690446856413852e-05, "loss": 0.5393, "step": 25225 }, { "epoch": 0.5350045598184556, "grad_norm": 0.4195137619972229, "learning_rate": 1.6690198991571506e-05, "loss": 0.5091, "step": 25226 }, { "epoch": 0.5350257682763886, "grad_norm": 0.3284880816936493, "learning_rate": 1.6689951119288375e-05, "loss": 0.4687, "step": 25227 }, { "epoch": 0.5350469767343217, "grad_norm": 0.3653446137905121, "learning_rate": 1.668970323956474e-05, "loss": 0.5237, "step": 25228 }, { "epoch": 0.5350681851922546, "grad_norm": 0.3590875566005707, "learning_rate": 1.668945535240088e-05, "loss": 0.5631, "step": 25229 }, { "epoch": 0.5350893936501877, "grad_norm": 0.3990679681301117, "learning_rate": 1.668920745779706e-05, "loss": 0.5071, "step": 25230 }, { "epoch": 0.5351106021081207, "grad_norm": 0.30696502327919006, "learning_rate": 1.6688959555753563e-05, "loss": 0.4584, "step": 25231 }, { "epoch": 0.5351318105660537, "grad_norm": 0.4228651225566864, "learning_rate": 1.6688711646270667e-05, "loss": 0.5134, "step": 25232 }, { "epoch": 0.5351530190239868, "grad_norm": 0.3932109475135803, "learning_rate": 1.668846372934864e-05, "loss": 0.4481, "step": 25233 }, { "epoch": 0.5351742274819198, "grad_norm": 0.39851611852645874, "learning_rate": 1.668821580498776e-05, "loss": 0.5327, "step": 25234 }, { "epoch": 0.5351954359398529, "grad_norm": 0.36717715859413147, "learning_rate": 1.668796787318831e-05, "loss": 0.5152, "step": 25235 }, { "epoch": 0.5352166443977858, "grad_norm": 0.3641062080860138, "learning_rate": 1.6687719933950557e-05, "loss": 0.5142, "step": 25236 }, { "epoch": 0.5352378528557189, "grad_norm": 0.4325173497200012, "learning_rate": 1.668747198727478e-05, "loss": 0.5144, "step": 25237 }, { "epoch": 0.5352590613136519, "grad_norm": 0.3279412090778351, "learning_rate": 1.668722403316126e-05, "loss": 0.4895, "step": 25238 }, { "epoch": 0.5352802697715849, "grad_norm": 0.33187851309776306, "learning_rate": 1.6686976071610258e-05, "loss": 0.5243, "step": 25239 }, { "epoch": 0.5353014782295179, "grad_norm": 0.33918577432632446, "learning_rate": 1.6686728102622064e-05, "loss": 0.5604, "step": 25240 }, { "epoch": 0.535322686687451, "grad_norm": 0.38822197914123535, "learning_rate": 1.6686480126196947e-05, "loss": 0.5055, "step": 25241 }, { "epoch": 0.5353438951453839, "grad_norm": 0.381072998046875, "learning_rate": 1.668623214233519e-05, "loss": 0.5309, "step": 25242 }, { "epoch": 0.535365103603317, "grad_norm": 0.35367706418037415, "learning_rate": 1.668598415103706e-05, "loss": 0.4893, "step": 25243 }, { "epoch": 0.53538631206125, "grad_norm": 0.3490546643733978, "learning_rate": 1.668573615230284e-05, "loss": 0.5017, "step": 25244 }, { "epoch": 0.5354075205191831, "grad_norm": 0.32600709795951843, "learning_rate": 1.66854881461328e-05, "loss": 0.4205, "step": 25245 }, { "epoch": 0.535428728977116, "grad_norm": 0.35518109798431396, "learning_rate": 1.6685240132527216e-05, "loss": 0.5136, "step": 25246 }, { "epoch": 0.5354499374350491, "grad_norm": 0.35978877544403076, "learning_rate": 1.6684992111486367e-05, "loss": 0.4685, "step": 25247 }, { "epoch": 0.5354711458929822, "grad_norm": 0.3327558934688568, "learning_rate": 1.668474408301053e-05, "loss": 0.4325, "step": 25248 }, { "epoch": 0.5354923543509151, "grad_norm": 0.35352545976638794, "learning_rate": 1.6684496047099977e-05, "loss": 0.4898, "step": 25249 }, { "epoch": 0.5355135628088482, "grad_norm": 0.31111952662467957, "learning_rate": 1.6684248003754985e-05, "loss": 0.5067, "step": 25250 }, { "epoch": 0.5355347712667812, "grad_norm": 0.3389623463153839, "learning_rate": 1.6683999952975832e-05, "loss": 0.4854, "step": 25251 }, { "epoch": 0.5355559797247142, "grad_norm": 0.3341521918773651, "learning_rate": 1.668375189476279e-05, "loss": 0.472, "step": 25252 }, { "epoch": 0.5355771881826472, "grad_norm": 0.3587053418159485, "learning_rate": 1.668350382911614e-05, "loss": 0.5027, "step": 25253 }, { "epoch": 0.5355983966405803, "grad_norm": 0.34538012742996216, "learning_rate": 1.6683255756036154e-05, "loss": 0.5498, "step": 25254 }, { "epoch": 0.5356196050985133, "grad_norm": 0.5923030972480774, "learning_rate": 1.668300767552311e-05, "loss": 0.5577, "step": 25255 }, { "epoch": 0.5356408135564463, "grad_norm": 0.31142333149909973, "learning_rate": 1.668275958757728e-05, "loss": 0.4555, "step": 25256 }, { "epoch": 0.5356620220143793, "grad_norm": 0.3289494216442108, "learning_rate": 1.6682511492198945e-05, "loss": 0.5198, "step": 25257 }, { "epoch": 0.5356832304723124, "grad_norm": 0.3303506374359131, "learning_rate": 1.6682263389388378e-05, "loss": 0.5185, "step": 25258 }, { "epoch": 0.5357044389302453, "grad_norm": 0.3582816421985626, "learning_rate": 1.668201527914586e-05, "loss": 0.5676, "step": 25259 }, { "epoch": 0.5357256473881784, "grad_norm": 0.33934351801872253, "learning_rate": 1.6681767161471656e-05, "loss": 0.5895, "step": 25260 }, { "epoch": 0.5357468558461115, "grad_norm": 0.3522714078426361, "learning_rate": 1.6681519036366053e-05, "loss": 0.4637, "step": 25261 }, { "epoch": 0.5357680643040444, "grad_norm": 0.32905593514442444, "learning_rate": 1.6681270903829322e-05, "loss": 0.4701, "step": 25262 }, { "epoch": 0.5357892727619775, "grad_norm": 0.32396116852760315, "learning_rate": 1.6681022763861737e-05, "loss": 0.4993, "step": 25263 }, { "epoch": 0.5358104812199105, "grad_norm": 0.3222532570362091, "learning_rate": 1.6680774616463576e-05, "loss": 0.5181, "step": 25264 }, { "epoch": 0.5358316896778436, "grad_norm": 0.3612724244594574, "learning_rate": 1.668052646163512e-05, "loss": 0.4508, "step": 25265 }, { "epoch": 0.5358528981357765, "grad_norm": 0.34648001194000244, "learning_rate": 1.6680278299376636e-05, "loss": 0.5166, "step": 25266 }, { "epoch": 0.5358741065937096, "grad_norm": 0.3533266484737396, "learning_rate": 1.6680030129688406e-05, "loss": 0.48, "step": 25267 }, { "epoch": 0.5358953150516426, "grad_norm": 0.33352699875831604, "learning_rate": 1.6679781952570705e-05, "loss": 0.4175, "step": 25268 }, { "epoch": 0.5359165235095756, "grad_norm": 0.3543286919593811, "learning_rate": 1.6679533768023813e-05, "loss": 0.5134, "step": 25269 }, { "epoch": 0.5359377319675086, "grad_norm": 0.39857208728790283, "learning_rate": 1.6679285576047995e-05, "loss": 0.4929, "step": 25270 }, { "epoch": 0.5359589404254417, "grad_norm": 0.3299086093902588, "learning_rate": 1.6679037376643536e-05, "loss": 0.4438, "step": 25271 }, { "epoch": 0.5359801488833746, "grad_norm": 0.3352210521697998, "learning_rate": 1.667878916981071e-05, "loss": 0.4894, "step": 25272 }, { "epoch": 0.5360013573413077, "grad_norm": 0.36382362246513367, "learning_rate": 1.667854095554979e-05, "loss": 0.4848, "step": 25273 }, { "epoch": 0.5360225657992408, "grad_norm": 0.3313089609146118, "learning_rate": 1.6678292733861055e-05, "loss": 0.6251, "step": 25274 }, { "epoch": 0.5360437742571738, "grad_norm": 0.31869757175445557, "learning_rate": 1.6678044504744787e-05, "loss": 0.4131, "step": 25275 }, { "epoch": 0.5360649827151068, "grad_norm": 0.36496594548225403, "learning_rate": 1.667779626820125e-05, "loss": 0.544, "step": 25276 }, { "epoch": 0.5360861911730398, "grad_norm": 0.33319106698036194, "learning_rate": 1.6677548024230728e-05, "loss": 0.53, "step": 25277 }, { "epoch": 0.5361073996309729, "grad_norm": 0.3286595046520233, "learning_rate": 1.6677299772833496e-05, "loss": 0.6335, "step": 25278 }, { "epoch": 0.5361286080889058, "grad_norm": 0.365958571434021, "learning_rate": 1.6677051514009827e-05, "loss": 0.4972, "step": 25279 }, { "epoch": 0.5361498165468389, "grad_norm": 0.36614760756492615, "learning_rate": 1.667680324776e-05, "loss": 0.4789, "step": 25280 }, { "epoch": 0.5361710250047719, "grad_norm": 0.36642777919769287, "learning_rate": 1.667655497408429e-05, "loss": 0.5813, "step": 25281 }, { "epoch": 0.536192233462705, "grad_norm": 0.3007955551147461, "learning_rate": 1.6676306692982975e-05, "loss": 0.4612, "step": 25282 }, { "epoch": 0.5362134419206379, "grad_norm": 0.3400924801826477, "learning_rate": 1.667605840445633e-05, "loss": 0.5599, "step": 25283 }, { "epoch": 0.536234650378571, "grad_norm": 0.3511846363544464, "learning_rate": 1.667581010850463e-05, "loss": 0.5612, "step": 25284 }, { "epoch": 0.536255858836504, "grad_norm": 0.39439812302589417, "learning_rate": 1.6675561805128147e-05, "loss": 0.4356, "step": 25285 }, { "epoch": 0.536277067294437, "grad_norm": 0.3360115587711334, "learning_rate": 1.667531349432717e-05, "loss": 0.4691, "step": 25286 }, { "epoch": 0.53629827575237, "grad_norm": 0.4529353380203247, "learning_rate": 1.6675065176101965e-05, "loss": 0.4596, "step": 25287 }, { "epoch": 0.5363194842103031, "grad_norm": 0.38060906529426575, "learning_rate": 1.667481685045281e-05, "loss": 0.5248, "step": 25288 }, { "epoch": 0.5363406926682361, "grad_norm": 0.41083985567092896, "learning_rate": 1.667456851737998e-05, "loss": 0.5852, "step": 25289 }, { "epoch": 0.5363619011261691, "grad_norm": 0.3536822497844696, "learning_rate": 1.6674320176883754e-05, "loss": 0.5114, "step": 25290 }, { "epoch": 0.5363831095841022, "grad_norm": 0.36521849036216736, "learning_rate": 1.6674071828964407e-05, "loss": 0.5367, "step": 25291 }, { "epoch": 0.5364043180420351, "grad_norm": 0.34027808904647827, "learning_rate": 1.6673823473622215e-05, "loss": 0.5396, "step": 25292 }, { "epoch": 0.5364255264999682, "grad_norm": 0.37860003113746643, "learning_rate": 1.6673575110857457e-05, "loss": 0.5167, "step": 25293 }, { "epoch": 0.5364467349579012, "grad_norm": 0.3974398970603943, "learning_rate": 1.6673326740670406e-05, "loss": 0.4635, "step": 25294 }, { "epoch": 0.5364679434158343, "grad_norm": 0.323275089263916, "learning_rate": 1.6673078363061338e-05, "loss": 0.5673, "step": 25295 }, { "epoch": 0.5364891518737672, "grad_norm": 0.3308406174182892, "learning_rate": 1.667282997803053e-05, "loss": 0.5067, "step": 25296 }, { "epoch": 0.5365103603317003, "grad_norm": 0.32701489329338074, "learning_rate": 1.667258158557826e-05, "loss": 0.5011, "step": 25297 }, { "epoch": 0.5365315687896333, "grad_norm": 0.3515970706939697, "learning_rate": 1.6672333185704797e-05, "loss": 0.5286, "step": 25298 }, { "epoch": 0.5365527772475663, "grad_norm": 0.33119872212409973, "learning_rate": 1.667208477841043e-05, "loss": 0.4513, "step": 25299 }, { "epoch": 0.5365739857054993, "grad_norm": 0.3557886481285095, "learning_rate": 1.6671836363695426e-05, "loss": 0.4579, "step": 25300 }, { "epoch": 0.5365951941634324, "grad_norm": 0.32513678073883057, "learning_rate": 1.6671587941560065e-05, "loss": 0.5672, "step": 25301 }, { "epoch": 0.5366164026213655, "grad_norm": 0.6939427852630615, "learning_rate": 1.6671339512004617e-05, "loss": 0.5374, "step": 25302 }, { "epoch": 0.5366376110792984, "grad_norm": 0.3487434983253479, "learning_rate": 1.6671091075029368e-05, "loss": 0.5221, "step": 25303 }, { "epoch": 0.5366588195372315, "grad_norm": 0.35581979155540466, "learning_rate": 1.6670842630634587e-05, "loss": 0.5076, "step": 25304 }, { "epoch": 0.5366800279951645, "grad_norm": 0.36344900727272034, "learning_rate": 1.6670594178820554e-05, "loss": 0.4975, "step": 25305 }, { "epoch": 0.5367012364530975, "grad_norm": 0.36574602127075195, "learning_rate": 1.6670345719587545e-05, "loss": 0.4921, "step": 25306 }, { "epoch": 0.5367224449110305, "grad_norm": 0.3942148983478546, "learning_rate": 1.6670097252935832e-05, "loss": 0.5993, "step": 25307 }, { "epoch": 0.5367436533689636, "grad_norm": 0.42896768450737, "learning_rate": 1.6669848778865696e-05, "loss": 0.4673, "step": 25308 }, { "epoch": 0.5367648618268965, "grad_norm": 0.30586761236190796, "learning_rate": 1.6669600297377412e-05, "loss": 0.5055, "step": 25309 }, { "epoch": 0.5367860702848296, "grad_norm": 0.3600478172302246, "learning_rate": 1.666935180847126e-05, "loss": 0.5054, "step": 25310 }, { "epoch": 0.5368072787427626, "grad_norm": 0.37993091344833374, "learning_rate": 1.666910331214751e-05, "loss": 0.4717, "step": 25311 }, { "epoch": 0.5368284872006956, "grad_norm": 0.36625468730926514, "learning_rate": 1.6668854808406442e-05, "loss": 0.5312, "step": 25312 }, { "epoch": 0.5368496956586286, "grad_norm": 0.3731822669506073, "learning_rate": 1.666860629724833e-05, "loss": 0.5446, "step": 25313 }, { "epoch": 0.5368709041165617, "grad_norm": 0.35796359181404114, "learning_rate": 1.666835777867345e-05, "loss": 0.43, "step": 25314 }, { "epoch": 0.5368921125744948, "grad_norm": 0.3439551889896393, "learning_rate": 1.6668109252682087e-05, "loss": 0.4586, "step": 25315 }, { "epoch": 0.5369133210324277, "grad_norm": 0.35199660062789917, "learning_rate": 1.6667860719274504e-05, "loss": 0.464, "step": 25316 }, { "epoch": 0.5369345294903608, "grad_norm": 0.3102402687072754, "learning_rate": 1.666761217845099e-05, "loss": 0.4745, "step": 25317 }, { "epoch": 0.5369557379482938, "grad_norm": 0.32465386390686035, "learning_rate": 1.6667363630211813e-05, "loss": 0.4687, "step": 25318 }, { "epoch": 0.5369769464062268, "grad_norm": 0.3586457669734955, "learning_rate": 1.6667115074557252e-05, "loss": 0.5631, "step": 25319 }, { "epoch": 0.5369981548641598, "grad_norm": 0.3494618237018585, "learning_rate": 1.6666866511487586e-05, "loss": 0.4833, "step": 25320 }, { "epoch": 0.5370193633220929, "grad_norm": 0.4678605794906616, "learning_rate": 1.6666617941003088e-05, "loss": 0.4379, "step": 25321 }, { "epoch": 0.5370405717800258, "grad_norm": 0.4202677607536316, "learning_rate": 1.6666369363104033e-05, "loss": 0.5981, "step": 25322 }, { "epoch": 0.5370617802379589, "grad_norm": 0.40496301651000977, "learning_rate": 1.66661207777907e-05, "loss": 0.5322, "step": 25323 }, { "epoch": 0.5370829886958919, "grad_norm": 0.37391120195388794, "learning_rate": 1.6665872185063366e-05, "loss": 0.5363, "step": 25324 }, { "epoch": 0.537104197153825, "grad_norm": 0.34572842717170715, "learning_rate": 1.6665623584922306e-05, "loss": 0.4682, "step": 25325 }, { "epoch": 0.5371254056117579, "grad_norm": 0.34234967827796936, "learning_rate": 1.6665374977367798e-05, "loss": 0.4737, "step": 25326 }, { "epoch": 0.537146614069691, "grad_norm": 0.3892197012901306, "learning_rate": 1.6665126362400118e-05, "loss": 0.4891, "step": 25327 }, { "epoch": 0.537167822527624, "grad_norm": 0.34104329347610474, "learning_rate": 1.6664877740019547e-05, "loss": 0.5151, "step": 25328 }, { "epoch": 0.537189030985557, "grad_norm": 0.3137524425983429, "learning_rate": 1.6664629110226348e-05, "loss": 0.4328, "step": 25329 }, { "epoch": 0.5372102394434901, "grad_norm": 0.36782506108283997, "learning_rate": 1.6664380473020814e-05, "loss": 0.4511, "step": 25330 }, { "epoch": 0.5372314479014231, "grad_norm": 0.3709847629070282, "learning_rate": 1.6664131828403213e-05, "loss": 0.4608, "step": 25331 }, { "epoch": 0.5372526563593562, "grad_norm": 0.3723719120025635, "learning_rate": 1.666388317637382e-05, "loss": 0.5187, "step": 25332 }, { "epoch": 0.5372738648172891, "grad_norm": 0.3533393442630768, "learning_rate": 1.6663634516932916e-05, "loss": 0.4701, "step": 25333 }, { "epoch": 0.5372950732752222, "grad_norm": 0.38011249899864197, "learning_rate": 1.6663385850080774e-05, "loss": 0.4936, "step": 25334 }, { "epoch": 0.5373162817331552, "grad_norm": 0.36372455954551697, "learning_rate": 1.666313717581767e-05, "loss": 0.4758, "step": 25335 }, { "epoch": 0.5373374901910882, "grad_norm": 0.36560919880867004, "learning_rate": 1.6662888494143886e-05, "loss": 0.5212, "step": 25336 }, { "epoch": 0.5373586986490212, "grad_norm": 0.4107073247432709, "learning_rate": 1.6662639805059692e-05, "loss": 0.5017, "step": 25337 }, { "epoch": 0.5373799071069543, "grad_norm": 0.3852400481700897, "learning_rate": 1.6662391108565372e-05, "loss": 0.5345, "step": 25338 }, { "epoch": 0.5374011155648872, "grad_norm": 0.3399050831794739, "learning_rate": 1.6662142404661197e-05, "loss": 0.4465, "step": 25339 }, { "epoch": 0.5374223240228203, "grad_norm": 0.3700256943702698, "learning_rate": 1.6661893693347443e-05, "loss": 0.4966, "step": 25340 }, { "epoch": 0.5374435324807533, "grad_norm": 0.3432415723800659, "learning_rate": 1.666164497462439e-05, "loss": 0.4479, "step": 25341 }, { "epoch": 0.5374647409386863, "grad_norm": 0.35646799206733704, "learning_rate": 1.6661396248492313e-05, "loss": 0.4862, "step": 25342 }, { "epoch": 0.5374859493966194, "grad_norm": 0.3342593312263489, "learning_rate": 1.6661147514951492e-05, "loss": 0.5507, "step": 25343 }, { "epoch": 0.5375071578545524, "grad_norm": 0.39815813302993774, "learning_rate": 1.66608987740022e-05, "loss": 0.5879, "step": 25344 }, { "epoch": 0.5375283663124855, "grad_norm": 0.38619375228881836, "learning_rate": 1.666065002564471e-05, "loss": 0.4893, "step": 25345 }, { "epoch": 0.5375495747704184, "grad_norm": 0.3421049118041992, "learning_rate": 1.6660401269879307e-05, "loss": 0.5288, "step": 25346 }, { "epoch": 0.5375707832283515, "grad_norm": 0.43760231137275696, "learning_rate": 1.666015250670626e-05, "loss": 0.4779, "step": 25347 }, { "epoch": 0.5375919916862845, "grad_norm": 0.3486179709434509, "learning_rate": 1.6659903736125853e-05, "loss": 0.5499, "step": 25348 }, { "epoch": 0.5376132001442175, "grad_norm": 0.3382026255130768, "learning_rate": 1.6659654958138357e-05, "loss": 0.5337, "step": 25349 }, { "epoch": 0.5376344086021505, "grad_norm": 0.3450939953327179, "learning_rate": 1.665940617274405e-05, "loss": 0.5539, "step": 25350 }, { "epoch": 0.5376556170600836, "grad_norm": 0.3617558181285858, "learning_rate": 1.6659157379943215e-05, "loss": 0.5473, "step": 25351 }, { "epoch": 0.5376768255180165, "grad_norm": 0.38920941948890686, "learning_rate": 1.6658908579736115e-05, "loss": 0.5152, "step": 25352 }, { "epoch": 0.5376980339759496, "grad_norm": 0.39848896861076355, "learning_rate": 1.665865977212304e-05, "loss": 0.5162, "step": 25353 }, { "epoch": 0.5377192424338826, "grad_norm": 0.32572293281555176, "learning_rate": 1.665841095710426e-05, "loss": 0.4198, "step": 25354 }, { "epoch": 0.5377404508918157, "grad_norm": 0.41062667965888977, "learning_rate": 1.665816213468005e-05, "loss": 0.5237, "step": 25355 }, { "epoch": 0.5377616593497487, "grad_norm": 0.3862508535385132, "learning_rate": 1.6657913304850692e-05, "loss": 0.4936, "step": 25356 }, { "epoch": 0.5377828678076817, "grad_norm": 0.3791061043739319, "learning_rate": 1.665766446761646e-05, "loss": 0.5423, "step": 25357 }, { "epoch": 0.5378040762656148, "grad_norm": 0.36392807960510254, "learning_rate": 1.6657415622977634e-05, "loss": 0.5411, "step": 25358 }, { "epoch": 0.5378252847235477, "grad_norm": 0.39029741287231445, "learning_rate": 1.6657166770934488e-05, "loss": 0.5113, "step": 25359 }, { "epoch": 0.5378464931814808, "grad_norm": 0.3334375023841858, "learning_rate": 1.6656917911487296e-05, "loss": 0.4873, "step": 25360 }, { "epoch": 0.5378677016394138, "grad_norm": 0.340920090675354, "learning_rate": 1.665666904463634e-05, "loss": 0.4747, "step": 25361 }, { "epoch": 0.5378889100973469, "grad_norm": 0.34377095103263855, "learning_rate": 1.6656420170381894e-05, "loss": 0.5452, "step": 25362 }, { "epoch": 0.5379101185552798, "grad_norm": 0.3444557189941406, "learning_rate": 1.6656171288724233e-05, "loss": 0.5029, "step": 25363 }, { "epoch": 0.5379313270132129, "grad_norm": 0.3389633595943451, "learning_rate": 1.665592239966364e-05, "loss": 0.5263, "step": 25364 }, { "epoch": 0.5379525354711459, "grad_norm": 0.4241020977497101, "learning_rate": 1.6655673503200387e-05, "loss": 0.5433, "step": 25365 }, { "epoch": 0.5379737439290789, "grad_norm": 0.34783506393432617, "learning_rate": 1.6655424599334752e-05, "loss": 0.4877, "step": 25366 }, { "epoch": 0.5379949523870119, "grad_norm": 0.2979125380516052, "learning_rate": 1.665517568806701e-05, "loss": 0.4391, "step": 25367 }, { "epoch": 0.538016160844945, "grad_norm": 0.369130939245224, "learning_rate": 1.6654926769397438e-05, "loss": 0.5182, "step": 25368 }, { "epoch": 0.5380373693028779, "grad_norm": 0.3410288989543915, "learning_rate": 1.665467784332632e-05, "loss": 0.4595, "step": 25369 }, { "epoch": 0.538058577760811, "grad_norm": 0.4968820810317993, "learning_rate": 1.665442890985392e-05, "loss": 0.5096, "step": 25370 }, { "epoch": 0.5380797862187441, "grad_norm": 0.4770686626434326, "learning_rate": 1.6654179968980528e-05, "loss": 0.5459, "step": 25371 }, { "epoch": 0.538100994676677, "grad_norm": 0.31824231147766113, "learning_rate": 1.665393102070641e-05, "loss": 0.4716, "step": 25372 }, { "epoch": 0.5381222031346101, "grad_norm": 0.4099403917789459, "learning_rate": 1.665368206503185e-05, "loss": 0.491, "step": 25373 }, { "epoch": 0.5381434115925431, "grad_norm": 0.4764125943183899, "learning_rate": 1.665343310195712e-05, "loss": 0.5271, "step": 25374 }, { "epoch": 0.5381646200504762, "grad_norm": 0.320917546749115, "learning_rate": 1.6653184131482503e-05, "loss": 0.4528, "step": 25375 }, { "epoch": 0.5381858285084091, "grad_norm": 0.4065081477165222, "learning_rate": 1.6652935153608273e-05, "loss": 0.5018, "step": 25376 }, { "epoch": 0.5382070369663422, "grad_norm": 0.3772004544734955, "learning_rate": 1.6652686168334706e-05, "loss": 0.5805, "step": 25377 }, { "epoch": 0.5382282454242752, "grad_norm": 0.35005342960357666, "learning_rate": 1.6652437175662078e-05, "loss": 0.5033, "step": 25378 }, { "epoch": 0.5382494538822082, "grad_norm": 0.3183036148548126, "learning_rate": 1.6652188175590665e-05, "loss": 0.4482, "step": 25379 }, { "epoch": 0.5382706623401412, "grad_norm": 0.35062670707702637, "learning_rate": 1.665193916812075e-05, "loss": 0.4293, "step": 25380 }, { "epoch": 0.5382918707980743, "grad_norm": 0.38394781947135925, "learning_rate": 1.6651690153252605e-05, "loss": 0.499, "step": 25381 }, { "epoch": 0.5383130792560072, "grad_norm": 0.3448401093482971, "learning_rate": 1.6651441130986507e-05, "loss": 0.4699, "step": 25382 }, { "epoch": 0.5383342877139403, "grad_norm": 0.3739127814769745, "learning_rate": 1.6651192101322734e-05, "loss": 0.4788, "step": 25383 }, { "epoch": 0.5383554961718734, "grad_norm": 0.4115130603313446, "learning_rate": 1.6650943064261562e-05, "loss": 0.5377, "step": 25384 }, { "epoch": 0.5383767046298064, "grad_norm": 0.36732542514801025, "learning_rate": 1.665069401980327e-05, "loss": 0.5096, "step": 25385 }, { "epoch": 0.5383979130877394, "grad_norm": 0.3490311801433563, "learning_rate": 1.6650444967948133e-05, "loss": 0.4847, "step": 25386 }, { "epoch": 0.5384191215456724, "grad_norm": 0.3513452410697937, "learning_rate": 1.6650195908696432e-05, "loss": 0.4692, "step": 25387 }, { "epoch": 0.5384403300036055, "grad_norm": 0.3259214162826538, "learning_rate": 1.6649946842048437e-05, "loss": 0.465, "step": 25388 }, { "epoch": 0.5384615384615384, "grad_norm": 0.3985467553138733, "learning_rate": 1.664969776800443e-05, "loss": 0.5668, "step": 25389 }, { "epoch": 0.5384827469194715, "grad_norm": 0.37527939677238464, "learning_rate": 1.6649448686564688e-05, "loss": 0.5806, "step": 25390 }, { "epoch": 0.5385039553774045, "grad_norm": 0.3939899206161499, "learning_rate": 1.6649199597729488e-05, "loss": 0.5139, "step": 25391 }, { "epoch": 0.5385251638353376, "grad_norm": 0.3518364131450653, "learning_rate": 1.6648950501499107e-05, "loss": 0.4865, "step": 25392 }, { "epoch": 0.5385463722932705, "grad_norm": 0.3250337541103363, "learning_rate": 1.6648701397873813e-05, "loss": 0.5443, "step": 25393 }, { "epoch": 0.5385675807512036, "grad_norm": 0.3554430902004242, "learning_rate": 1.66484522868539e-05, "loss": 0.5611, "step": 25394 }, { "epoch": 0.5385887892091366, "grad_norm": 0.43080615997314453, "learning_rate": 1.6648203168439633e-05, "loss": 0.4915, "step": 25395 }, { "epoch": 0.5386099976670696, "grad_norm": 0.4026864469051361, "learning_rate": 1.664795404263129e-05, "loss": 0.5715, "step": 25396 }, { "epoch": 0.5386312061250027, "grad_norm": 0.35959142446517944, "learning_rate": 1.6647704909429152e-05, "loss": 0.5432, "step": 25397 }, { "epoch": 0.5386524145829357, "grad_norm": 0.38302600383758545, "learning_rate": 1.6647455768833496e-05, "loss": 0.5427, "step": 25398 }, { "epoch": 0.5386736230408687, "grad_norm": 0.3949384391307831, "learning_rate": 1.6647206620844595e-05, "loss": 0.5725, "step": 25399 }, { "epoch": 0.5386948314988017, "grad_norm": 0.38455966114997864, "learning_rate": 1.664695746546273e-05, "loss": 0.5613, "step": 25400 }, { "epoch": 0.5387160399567348, "grad_norm": 0.3946421146392822, "learning_rate": 1.6646708302688175e-05, "loss": 0.5576, "step": 25401 }, { "epoch": 0.5387372484146677, "grad_norm": 0.3965626657009125, "learning_rate": 1.664645913252121e-05, "loss": 0.533, "step": 25402 }, { "epoch": 0.5387584568726008, "grad_norm": 0.37252819538116455, "learning_rate": 1.664620995496211e-05, "loss": 0.504, "step": 25403 }, { "epoch": 0.5387796653305338, "grad_norm": 0.3738250732421875, "learning_rate": 1.6645960770011154e-05, "loss": 0.4843, "step": 25404 }, { "epoch": 0.5388008737884669, "grad_norm": 0.33149996399879456, "learning_rate": 1.664571157766862e-05, "loss": 0.4858, "step": 25405 }, { "epoch": 0.5388220822463998, "grad_norm": 0.33190566301345825, "learning_rate": 1.664546237793478e-05, "loss": 0.476, "step": 25406 }, { "epoch": 0.5388432907043329, "grad_norm": 0.3579590916633606, "learning_rate": 1.6645213170809916e-05, "loss": 0.5387, "step": 25407 }, { "epoch": 0.5388644991622659, "grad_norm": 0.36674854159355164, "learning_rate": 1.66449639562943e-05, "loss": 0.5693, "step": 25408 }, { "epoch": 0.5388857076201989, "grad_norm": 0.4073835611343384, "learning_rate": 1.664471473438822e-05, "loss": 0.4472, "step": 25409 }, { "epoch": 0.538906916078132, "grad_norm": 0.35757946968078613, "learning_rate": 1.6644465505091944e-05, "loss": 0.5044, "step": 25410 }, { "epoch": 0.538928124536065, "grad_norm": 0.36296480894088745, "learning_rate": 1.6644216268405747e-05, "loss": 0.5093, "step": 25411 }, { "epoch": 0.538949332993998, "grad_norm": 0.36666420102119446, "learning_rate": 1.6643967024329912e-05, "loss": 0.5279, "step": 25412 }, { "epoch": 0.538970541451931, "grad_norm": 0.3620469570159912, "learning_rate": 1.6643717772864714e-05, "loss": 0.5587, "step": 25413 }, { "epoch": 0.5389917499098641, "grad_norm": 0.5569602251052856, "learning_rate": 1.664346851401043e-05, "loss": 0.5336, "step": 25414 }, { "epoch": 0.5390129583677971, "grad_norm": 0.41491368412971497, "learning_rate": 1.6643219247767344e-05, "loss": 0.5274, "step": 25415 }, { "epoch": 0.5390341668257301, "grad_norm": 0.4223612844944, "learning_rate": 1.6642969974135725e-05, "loss": 0.435, "step": 25416 }, { "epoch": 0.5390553752836631, "grad_norm": 0.3348017632961273, "learning_rate": 1.664272069311585e-05, "loss": 0.4942, "step": 25417 }, { "epoch": 0.5390765837415962, "grad_norm": 0.3576321303844452, "learning_rate": 1.6642471404708e-05, "loss": 0.5023, "step": 25418 }, { "epoch": 0.5390977921995291, "grad_norm": 0.35206857323646545, "learning_rate": 1.664222210891245e-05, "loss": 0.4317, "step": 25419 }, { "epoch": 0.5391190006574622, "grad_norm": 0.3364231288433075, "learning_rate": 1.6641972805729477e-05, "loss": 0.4719, "step": 25420 }, { "epoch": 0.5391402091153952, "grad_norm": 0.33874407410621643, "learning_rate": 1.664172349515936e-05, "loss": 0.4793, "step": 25421 }, { "epoch": 0.5391614175733282, "grad_norm": 0.36662471294403076, "learning_rate": 1.6641474177202378e-05, "loss": 0.4993, "step": 25422 }, { "epoch": 0.5391826260312612, "grad_norm": 0.35475602746009827, "learning_rate": 1.6641224851858807e-05, "loss": 0.4646, "step": 25423 }, { "epoch": 0.5392038344891943, "grad_norm": 0.40671229362487793, "learning_rate": 1.6640975519128923e-05, "loss": 0.5053, "step": 25424 }, { "epoch": 0.5392250429471274, "grad_norm": 0.3132641017436981, "learning_rate": 1.6640726179013e-05, "loss": 0.4484, "step": 25425 }, { "epoch": 0.5392462514050603, "grad_norm": 0.39348891377449036, "learning_rate": 1.6640476831511325e-05, "loss": 0.5485, "step": 25426 }, { "epoch": 0.5392674598629934, "grad_norm": 0.34773170948028564, "learning_rate": 1.6640227476624163e-05, "loss": 0.5043, "step": 25427 }, { "epoch": 0.5392886683209264, "grad_norm": 0.38497790694236755, "learning_rate": 1.6639978114351802e-05, "loss": 0.5031, "step": 25428 }, { "epoch": 0.5393098767788594, "grad_norm": 0.3760392963886261, "learning_rate": 1.6639728744694514e-05, "loss": 0.4979, "step": 25429 }, { "epoch": 0.5393310852367924, "grad_norm": 0.3946022391319275, "learning_rate": 1.663947936765258e-05, "loss": 0.5551, "step": 25430 }, { "epoch": 0.5393522936947255, "grad_norm": 0.3643615245819092, "learning_rate": 1.6639229983226268e-05, "loss": 0.4616, "step": 25431 }, { "epoch": 0.5393735021526584, "grad_norm": 0.35639023780822754, "learning_rate": 1.6638980591415866e-05, "loss": 0.4268, "step": 25432 }, { "epoch": 0.5393947106105915, "grad_norm": 0.4004722535610199, "learning_rate": 1.6638731192221647e-05, "loss": 0.6033, "step": 25433 }, { "epoch": 0.5394159190685245, "grad_norm": 0.3500030040740967, "learning_rate": 1.663848178564389e-05, "loss": 0.4675, "step": 25434 }, { "epoch": 0.5394371275264576, "grad_norm": 0.3743577003479004, "learning_rate": 1.663823237168287e-05, "loss": 0.4646, "step": 25435 }, { "epoch": 0.5394583359843905, "grad_norm": 0.32487940788269043, "learning_rate": 1.6637982950338867e-05, "loss": 0.5478, "step": 25436 }, { "epoch": 0.5394795444423236, "grad_norm": 0.38762184977531433, "learning_rate": 1.6637733521612154e-05, "loss": 0.4602, "step": 25437 }, { "epoch": 0.5395007529002567, "grad_norm": 0.746353268623352, "learning_rate": 1.6637484085503013e-05, "loss": 0.5049, "step": 25438 }, { "epoch": 0.5395219613581896, "grad_norm": 0.3391467034816742, "learning_rate": 1.6637234642011723e-05, "loss": 0.4348, "step": 25439 }, { "epoch": 0.5395431698161227, "grad_norm": 0.36440134048461914, "learning_rate": 1.6636985191138554e-05, "loss": 0.6249, "step": 25440 }, { "epoch": 0.5395643782740557, "grad_norm": 0.922325849533081, "learning_rate": 1.6636735732883787e-05, "loss": 0.5403, "step": 25441 }, { "epoch": 0.5395855867319888, "grad_norm": 0.3279753625392914, "learning_rate": 1.6636486267247708e-05, "loss": 0.4699, "step": 25442 }, { "epoch": 0.5396067951899217, "grad_norm": 0.41130614280700684, "learning_rate": 1.6636236794230577e-05, "loss": 0.5665, "step": 25443 }, { "epoch": 0.5396280036478548, "grad_norm": 0.3527686297893524, "learning_rate": 1.6635987313832686e-05, "loss": 0.4369, "step": 25444 }, { "epoch": 0.5396492121057878, "grad_norm": 0.2938883304595947, "learning_rate": 1.6635737826054308e-05, "loss": 0.401, "step": 25445 }, { "epoch": 0.5396704205637208, "grad_norm": 0.3396279513835907, "learning_rate": 1.663548833089572e-05, "loss": 0.5097, "step": 25446 }, { "epoch": 0.5396916290216538, "grad_norm": 0.33734622597694397, "learning_rate": 1.6635238828357196e-05, "loss": 0.4304, "step": 25447 }, { "epoch": 0.5397128374795869, "grad_norm": 0.4244663119316101, "learning_rate": 1.663498931843902e-05, "loss": 0.5465, "step": 25448 }, { "epoch": 0.5397340459375198, "grad_norm": 0.3884080946445465, "learning_rate": 1.6634739801141466e-05, "loss": 0.4922, "step": 25449 }, { "epoch": 0.5397552543954529, "grad_norm": 0.3597645163536072, "learning_rate": 1.6634490276464812e-05, "loss": 0.4688, "step": 25450 }, { "epoch": 0.539776462853386, "grad_norm": 0.34332701563835144, "learning_rate": 1.6634240744409336e-05, "loss": 0.5076, "step": 25451 }, { "epoch": 0.539797671311319, "grad_norm": 0.3144432306289673, "learning_rate": 1.663399120497531e-05, "loss": 0.4225, "step": 25452 }, { "epoch": 0.539818879769252, "grad_norm": 0.3636758625507355, "learning_rate": 1.6633741658163023e-05, "loss": 0.5233, "step": 25453 }, { "epoch": 0.539840088227185, "grad_norm": 0.3492283523082733, "learning_rate": 1.6633492103972746e-05, "loss": 0.38, "step": 25454 }, { "epoch": 0.5398612966851181, "grad_norm": 0.557531476020813, "learning_rate": 1.6633242542404754e-05, "loss": 0.4564, "step": 25455 }, { "epoch": 0.539882505143051, "grad_norm": 0.34352269768714905, "learning_rate": 1.663299297345933e-05, "loss": 0.5706, "step": 25456 }, { "epoch": 0.5399037136009841, "grad_norm": 0.33015432953834534, "learning_rate": 1.6632743397136746e-05, "loss": 0.5697, "step": 25457 }, { "epoch": 0.5399249220589171, "grad_norm": 0.7164980173110962, "learning_rate": 1.6632493813437284e-05, "loss": 0.4413, "step": 25458 }, { "epoch": 0.5399461305168501, "grad_norm": 0.3732883930206299, "learning_rate": 1.663224422236122e-05, "loss": 0.5362, "step": 25459 }, { "epoch": 0.5399673389747831, "grad_norm": 0.3176955580711365, "learning_rate": 1.6631994623908832e-05, "loss": 0.5141, "step": 25460 }, { "epoch": 0.5399885474327162, "grad_norm": 0.3542892336845398, "learning_rate": 1.6631745018080394e-05, "loss": 0.4962, "step": 25461 }, { "epoch": 0.5400097558906491, "grad_norm": 0.3230418860912323, "learning_rate": 1.663149540487619e-05, "loss": 0.5357, "step": 25462 }, { "epoch": 0.5400309643485822, "grad_norm": 0.3451632857322693, "learning_rate": 1.6631245784296494e-05, "loss": 0.4579, "step": 25463 }, { "epoch": 0.5400521728065152, "grad_norm": 0.3774917721748352, "learning_rate": 1.6630996156341584e-05, "loss": 0.526, "step": 25464 }, { "epoch": 0.5400733812644483, "grad_norm": 0.40015673637390137, "learning_rate": 1.6630746521011737e-05, "loss": 0.5021, "step": 25465 }, { "epoch": 0.5400945897223813, "grad_norm": 0.3504539728164673, "learning_rate": 1.663049687830723e-05, "loss": 0.4465, "step": 25466 }, { "epoch": 0.5401157981803143, "grad_norm": 0.46296951174736023, "learning_rate": 1.6630247228228343e-05, "loss": 0.446, "step": 25467 }, { "epoch": 0.5401370066382474, "grad_norm": 0.3598388433456421, "learning_rate": 1.6629997570775357e-05, "loss": 0.4762, "step": 25468 }, { "epoch": 0.5401582150961803, "grad_norm": 0.35526710748672485, "learning_rate": 1.662974790594854e-05, "loss": 0.5759, "step": 25469 }, { "epoch": 0.5401794235541134, "grad_norm": 0.32555559277534485, "learning_rate": 1.6629498233748173e-05, "loss": 0.447, "step": 25470 }, { "epoch": 0.5402006320120464, "grad_norm": 0.3263533413410187, "learning_rate": 1.6629248554174543e-05, "loss": 0.4997, "step": 25471 }, { "epoch": 0.5402218404699795, "grad_norm": 0.400236040353775, "learning_rate": 1.6628998867227914e-05, "loss": 0.4787, "step": 25472 }, { "epoch": 0.5402430489279124, "grad_norm": 0.32831043004989624, "learning_rate": 1.6628749172908572e-05, "loss": 0.4761, "step": 25473 }, { "epoch": 0.5402642573858455, "grad_norm": 0.39506861567497253, "learning_rate": 1.6628499471216794e-05, "loss": 0.5687, "step": 25474 }, { "epoch": 0.5402854658437785, "grad_norm": 0.3700112998485565, "learning_rate": 1.6628249762152858e-05, "loss": 0.5595, "step": 25475 }, { "epoch": 0.5403066743017115, "grad_norm": 0.3538037836551666, "learning_rate": 1.6628000045717035e-05, "loss": 0.4961, "step": 25476 }, { "epoch": 0.5403278827596445, "grad_norm": 0.3725162744522095, "learning_rate": 1.662775032190961e-05, "loss": 0.5243, "step": 25477 }, { "epoch": 0.5403490912175776, "grad_norm": 0.3603973686695099, "learning_rate": 1.662750059073086e-05, "loss": 0.4544, "step": 25478 }, { "epoch": 0.5403702996755106, "grad_norm": 0.35457703471183777, "learning_rate": 1.662725085218106e-05, "loss": 0.4798, "step": 25479 }, { "epoch": 0.5403915081334436, "grad_norm": 0.3849449157714844, "learning_rate": 1.662700110626049e-05, "loss": 0.4547, "step": 25480 }, { "epoch": 0.5404127165913767, "grad_norm": 0.3406699299812317, "learning_rate": 1.6626751352969428e-05, "loss": 0.51, "step": 25481 }, { "epoch": 0.5404339250493096, "grad_norm": 0.31475332379341125, "learning_rate": 1.662650159230815e-05, "loss": 0.5213, "step": 25482 }, { "epoch": 0.5404551335072427, "grad_norm": 0.3223179280757904, "learning_rate": 1.6626251824276932e-05, "loss": 0.5058, "step": 25483 }, { "epoch": 0.5404763419651757, "grad_norm": 0.4158872067928314, "learning_rate": 1.662600204887606e-05, "loss": 0.5066, "step": 25484 }, { "epoch": 0.5404975504231088, "grad_norm": 0.3671872019767761, "learning_rate": 1.66257522661058e-05, "loss": 0.4837, "step": 25485 }, { "epoch": 0.5405187588810417, "grad_norm": 0.34552741050720215, "learning_rate": 1.6625502475966442e-05, "loss": 0.4693, "step": 25486 }, { "epoch": 0.5405399673389748, "grad_norm": 0.35238760709762573, "learning_rate": 1.6625252678458253e-05, "loss": 0.5961, "step": 25487 }, { "epoch": 0.5405611757969078, "grad_norm": 0.3253527283668518, "learning_rate": 1.6625002873581518e-05, "loss": 0.5122, "step": 25488 }, { "epoch": 0.5405823842548408, "grad_norm": 0.3492546081542969, "learning_rate": 1.6624753061336507e-05, "loss": 0.5383, "step": 25489 }, { "epoch": 0.5406035927127738, "grad_norm": 0.3691394627094269, "learning_rate": 1.662450324172351e-05, "loss": 0.5049, "step": 25490 }, { "epoch": 0.5406248011707069, "grad_norm": 0.3947259783744812, "learning_rate": 1.6624253414742795e-05, "loss": 0.5097, "step": 25491 }, { "epoch": 0.54064600962864, "grad_norm": 0.3636123538017273, "learning_rate": 1.6624003580394643e-05, "loss": 0.5562, "step": 25492 }, { "epoch": 0.5406672180865729, "grad_norm": 0.36030206084251404, "learning_rate": 1.662375373867933e-05, "loss": 0.4588, "step": 25493 }, { "epoch": 0.540688426544506, "grad_norm": 0.38890212774276733, "learning_rate": 1.6623503889597138e-05, "loss": 0.5143, "step": 25494 }, { "epoch": 0.540709635002439, "grad_norm": 0.4376645088195801, "learning_rate": 1.6623254033148344e-05, "loss": 0.4684, "step": 25495 }, { "epoch": 0.540730843460372, "grad_norm": 0.43167924880981445, "learning_rate": 1.662300416933322e-05, "loss": 0.4628, "step": 25496 }, { "epoch": 0.540752051918305, "grad_norm": 0.3479679822921753, "learning_rate": 1.6622754298152054e-05, "loss": 0.5673, "step": 25497 }, { "epoch": 0.5407732603762381, "grad_norm": 0.39593440294265747, "learning_rate": 1.6622504419605112e-05, "loss": 0.5308, "step": 25498 }, { "epoch": 0.540794468834171, "grad_norm": 0.3775053024291992, "learning_rate": 1.6622254533692684e-05, "loss": 0.5119, "step": 25499 }, { "epoch": 0.5408156772921041, "grad_norm": 0.3463537395000458, "learning_rate": 1.662200464041504e-05, "loss": 0.5091, "step": 25500 }, { "epoch": 0.5408368857500371, "grad_norm": 1.5529975891113281, "learning_rate": 1.662175473977246e-05, "loss": 0.4921, "step": 25501 }, { "epoch": 0.5408580942079702, "grad_norm": 0.41178518533706665, "learning_rate": 1.6621504831765217e-05, "loss": 0.4751, "step": 25502 }, { "epoch": 0.5408793026659031, "grad_norm": 0.401226282119751, "learning_rate": 1.66212549163936e-05, "loss": 0.4876, "step": 25503 }, { "epoch": 0.5409005111238362, "grad_norm": 0.3299093246459961, "learning_rate": 1.6621004993657877e-05, "loss": 0.4467, "step": 25504 }, { "epoch": 0.5409217195817692, "grad_norm": 0.36460116505622864, "learning_rate": 1.6620755063558332e-05, "loss": 0.5513, "step": 25505 }, { "epoch": 0.5409429280397022, "grad_norm": 0.36162224411964417, "learning_rate": 1.6620505126095242e-05, "loss": 0.5287, "step": 25506 }, { "epoch": 0.5409641364976353, "grad_norm": 0.359670490026474, "learning_rate": 1.662025518126888e-05, "loss": 0.5275, "step": 25507 }, { "epoch": 0.5409853449555683, "grad_norm": 0.3030189275741577, "learning_rate": 1.662000522907953e-05, "loss": 0.482, "step": 25508 }, { "epoch": 0.5410065534135013, "grad_norm": 0.31918442249298096, "learning_rate": 1.6619755269527466e-05, "loss": 0.4925, "step": 25509 }, { "epoch": 0.5410277618714343, "grad_norm": 0.3349458873271942, "learning_rate": 1.661950530261297e-05, "loss": 0.496, "step": 25510 }, { "epoch": 0.5410489703293674, "grad_norm": 0.4207737147808075, "learning_rate": 1.6619255328336315e-05, "loss": 0.5578, "step": 25511 }, { "epoch": 0.5410701787873003, "grad_norm": 0.37692490220069885, "learning_rate": 1.6619005346697787e-05, "loss": 0.5206, "step": 25512 }, { "epoch": 0.5410913872452334, "grad_norm": 0.31457820534706116, "learning_rate": 1.6618755357697654e-05, "loss": 0.3962, "step": 25513 }, { "epoch": 0.5411125957031664, "grad_norm": 0.3766908645629883, "learning_rate": 1.6618505361336202e-05, "loss": 0.534, "step": 25514 }, { "epoch": 0.5411338041610995, "grad_norm": 0.4272698163986206, "learning_rate": 1.66182553576137e-05, "loss": 0.4376, "step": 25515 }, { "epoch": 0.5411550126190324, "grad_norm": 0.32927924394607544, "learning_rate": 1.6618005346530438e-05, "loss": 0.4178, "step": 25516 }, { "epoch": 0.5411762210769655, "grad_norm": 0.40731945633888245, "learning_rate": 1.6617755328086685e-05, "loss": 0.526, "step": 25517 }, { "epoch": 0.5411974295348985, "grad_norm": 0.3309917747974396, "learning_rate": 1.6617505302282723e-05, "loss": 0.5778, "step": 25518 }, { "epoch": 0.5412186379928315, "grad_norm": 0.35693106055259705, "learning_rate": 1.6617255269118828e-05, "loss": 0.5485, "step": 25519 }, { "epoch": 0.5412398464507646, "grad_norm": 0.37691155076026917, "learning_rate": 1.661700522859528e-05, "loss": 0.4717, "step": 25520 }, { "epoch": 0.5412610549086976, "grad_norm": 0.40166717767715454, "learning_rate": 1.6616755180712355e-05, "loss": 0.5197, "step": 25521 }, { "epoch": 0.5412822633666307, "grad_norm": 0.48858997225761414, "learning_rate": 1.6616505125470333e-05, "loss": 0.5471, "step": 25522 }, { "epoch": 0.5413034718245636, "grad_norm": 0.3512541353702545, "learning_rate": 1.661625506286949e-05, "loss": 0.5516, "step": 25523 }, { "epoch": 0.5413246802824967, "grad_norm": 0.3918294906616211, "learning_rate": 1.6616004992910112e-05, "loss": 0.4941, "step": 25524 }, { "epoch": 0.5413458887404297, "grad_norm": 0.34751561284065247, "learning_rate": 1.6615754915592463e-05, "loss": 0.4878, "step": 25525 }, { "epoch": 0.5413670971983627, "grad_norm": 0.3861120939254761, "learning_rate": 1.6615504830916832e-05, "loss": 0.4995, "step": 25526 }, { "epoch": 0.5413883056562957, "grad_norm": 0.3750368356704712, "learning_rate": 1.6615254738883497e-05, "loss": 0.5542, "step": 25527 }, { "epoch": 0.5414095141142288, "grad_norm": 0.36660036444664, "learning_rate": 1.6615004639492728e-05, "loss": 0.6303, "step": 25528 }, { "epoch": 0.5414307225721617, "grad_norm": 0.3640822768211365, "learning_rate": 1.661475453274481e-05, "loss": 0.4586, "step": 25529 }, { "epoch": 0.5414519310300948, "grad_norm": 0.41317662596702576, "learning_rate": 1.6614504418640023e-05, "loss": 0.5248, "step": 25530 }, { "epoch": 0.5414731394880278, "grad_norm": 0.39532390236854553, "learning_rate": 1.661425429717864e-05, "loss": 0.4866, "step": 25531 }, { "epoch": 0.5414943479459609, "grad_norm": 0.32930803298950195, "learning_rate": 1.661400416836094e-05, "loss": 0.4831, "step": 25532 }, { "epoch": 0.5415155564038939, "grad_norm": 0.37208378314971924, "learning_rate": 1.66137540321872e-05, "loss": 0.5502, "step": 25533 }, { "epoch": 0.5415367648618269, "grad_norm": 0.31357139348983765, "learning_rate": 1.66135038886577e-05, "loss": 0.4352, "step": 25534 }, { "epoch": 0.54155797331976, "grad_norm": 0.3085504472255707, "learning_rate": 1.661325373777272e-05, "loss": 0.4622, "step": 25535 }, { "epoch": 0.5415791817776929, "grad_norm": 0.4316502809524536, "learning_rate": 1.6613003579532536e-05, "loss": 0.4983, "step": 25536 }, { "epoch": 0.541600390235626, "grad_norm": 1.0858855247497559, "learning_rate": 1.661275341393743e-05, "loss": 0.513, "step": 25537 }, { "epoch": 0.541621598693559, "grad_norm": 0.38534700870513916, "learning_rate": 1.6612503240987676e-05, "loss": 0.5092, "step": 25538 }, { "epoch": 0.541642807151492, "grad_norm": 0.33129921555519104, "learning_rate": 1.6612253060683553e-05, "loss": 0.4933, "step": 25539 }, { "epoch": 0.541664015609425, "grad_norm": 0.34355637431144714, "learning_rate": 1.6612002873025334e-05, "loss": 0.5871, "step": 25540 }, { "epoch": 0.5416852240673581, "grad_norm": 0.38461995124816895, "learning_rate": 1.6611752678013307e-05, "loss": 0.4922, "step": 25541 }, { "epoch": 0.541706432525291, "grad_norm": 0.3492242097854614, "learning_rate": 1.6611502475647747e-05, "loss": 0.4904, "step": 25542 }, { "epoch": 0.5417276409832241, "grad_norm": 0.4038306176662445, "learning_rate": 1.661125226592893e-05, "loss": 0.4955, "step": 25543 }, { "epoch": 0.5417488494411571, "grad_norm": 0.34473857283592224, "learning_rate": 1.661100204885714e-05, "loss": 0.478, "step": 25544 }, { "epoch": 0.5417700578990902, "grad_norm": 0.9025671482086182, "learning_rate": 1.661075182443265e-05, "loss": 0.5353, "step": 25545 }, { "epoch": 0.5417912663570231, "grad_norm": 0.36003032326698303, "learning_rate": 1.6610501592655735e-05, "loss": 0.5166, "step": 25546 }, { "epoch": 0.5418124748149562, "grad_norm": 0.32941874861717224, "learning_rate": 1.661025135352668e-05, "loss": 0.5066, "step": 25547 }, { "epoch": 0.5418336832728893, "grad_norm": 0.3707023859024048, "learning_rate": 1.661000110704576e-05, "loss": 0.5098, "step": 25548 }, { "epoch": 0.5418548917308222, "grad_norm": 0.3324768841266632, "learning_rate": 1.6609750853213253e-05, "loss": 0.4309, "step": 25549 }, { "epoch": 0.5418761001887553, "grad_norm": 0.3889928460121155, "learning_rate": 1.660950059202944e-05, "loss": 0.5041, "step": 25550 }, { "epoch": 0.5418973086466883, "grad_norm": 0.34762075543403625, "learning_rate": 1.66092503234946e-05, "loss": 0.4406, "step": 25551 }, { "epoch": 0.5419185171046214, "grad_norm": 0.3387458324432373, "learning_rate": 1.6609000047609004e-05, "loss": 0.5, "step": 25552 }, { "epoch": 0.5419397255625543, "grad_norm": 0.3391335904598236, "learning_rate": 1.660874976437294e-05, "loss": 0.5237, "step": 25553 }, { "epoch": 0.5419609340204874, "grad_norm": 0.37391966581344604, "learning_rate": 1.6608499473786682e-05, "loss": 0.4768, "step": 25554 }, { "epoch": 0.5419821424784204, "grad_norm": 0.7194785475730896, "learning_rate": 1.6608249175850506e-05, "loss": 0.4436, "step": 25555 }, { "epoch": 0.5420033509363534, "grad_norm": 0.40308836102485657, "learning_rate": 1.6607998870564694e-05, "loss": 0.6082, "step": 25556 }, { "epoch": 0.5420245593942864, "grad_norm": 0.42190685868263245, "learning_rate": 1.660774855792952e-05, "loss": 0.5169, "step": 25557 }, { "epoch": 0.5420457678522195, "grad_norm": 0.3689579963684082, "learning_rate": 1.660749823794527e-05, "loss": 0.5375, "step": 25558 }, { "epoch": 0.5420669763101524, "grad_norm": 0.34109458327293396, "learning_rate": 1.6607247910612217e-05, "loss": 0.5294, "step": 25559 }, { "epoch": 0.5420881847680855, "grad_norm": 0.47967275977134705, "learning_rate": 1.6606997575930637e-05, "loss": 0.5001, "step": 25560 }, { "epoch": 0.5421093932260186, "grad_norm": 0.3431667685508728, "learning_rate": 1.6606747233900816e-05, "loss": 0.4691, "step": 25561 }, { "epoch": 0.5421306016839516, "grad_norm": 0.3782481849193573, "learning_rate": 1.6606496884523025e-05, "loss": 0.5158, "step": 25562 }, { "epoch": 0.5421518101418846, "grad_norm": 0.3746800720691681, "learning_rate": 1.660624652779755e-05, "loss": 0.4917, "step": 25563 }, { "epoch": 0.5421730185998176, "grad_norm": 0.3617687225341797, "learning_rate": 1.660599616372466e-05, "loss": 0.5849, "step": 25564 }, { "epoch": 0.5421942270577507, "grad_norm": 0.4101332426071167, "learning_rate": 1.660574579230464e-05, "loss": 0.5528, "step": 25565 }, { "epoch": 0.5422154355156836, "grad_norm": 0.33707424998283386, "learning_rate": 1.6605495413537767e-05, "loss": 0.4374, "step": 25566 }, { "epoch": 0.5422366439736167, "grad_norm": 0.4478713572025299, "learning_rate": 1.660524502742432e-05, "loss": 0.4773, "step": 25567 }, { "epoch": 0.5422578524315497, "grad_norm": 0.39065277576446533, "learning_rate": 1.6604994633964575e-05, "loss": 0.4753, "step": 25568 }, { "epoch": 0.5422790608894827, "grad_norm": 0.3665043115615845, "learning_rate": 1.6604744233158813e-05, "loss": 0.4494, "step": 25569 }, { "epoch": 0.5423002693474157, "grad_norm": 0.3332363963127136, "learning_rate": 1.6604493825007313e-05, "loss": 0.4502, "step": 25570 }, { "epoch": 0.5423214778053488, "grad_norm": 0.34345772862434387, "learning_rate": 1.660424340951035e-05, "loss": 0.4708, "step": 25571 }, { "epoch": 0.5423426862632817, "grad_norm": 1.2219460010528564, "learning_rate": 1.660399298666821e-05, "loss": 0.5431, "step": 25572 }, { "epoch": 0.5423638947212148, "grad_norm": 0.3245224058628082, "learning_rate": 1.660374255648116e-05, "loss": 0.5032, "step": 25573 }, { "epoch": 0.5423851031791479, "grad_norm": 0.3295259475708008, "learning_rate": 1.660349211894949e-05, "loss": 0.4532, "step": 25574 }, { "epoch": 0.5424063116370809, "grad_norm": 0.40402212738990784, "learning_rate": 1.660324167407347e-05, "loss": 0.574, "step": 25575 }, { "epoch": 0.5424275200950139, "grad_norm": 0.3257415294647217, "learning_rate": 1.6602991221853384e-05, "loss": 0.5027, "step": 25576 }, { "epoch": 0.5424487285529469, "grad_norm": 0.3934735059738159, "learning_rate": 1.6602740762289505e-05, "loss": 0.6092, "step": 25577 }, { "epoch": 0.54246993701088, "grad_norm": 0.34999343752861023, "learning_rate": 1.6602490295382122e-05, "loss": 0.4946, "step": 25578 }, { "epoch": 0.5424911454688129, "grad_norm": 0.3652677536010742, "learning_rate": 1.6602239821131498e-05, "loss": 0.5533, "step": 25579 }, { "epoch": 0.542512353926746, "grad_norm": 0.3725440800189972, "learning_rate": 1.660198933953793e-05, "loss": 0.494, "step": 25580 }, { "epoch": 0.542533562384679, "grad_norm": 0.33899548649787903, "learning_rate": 1.6601738850601678e-05, "loss": 0.4132, "step": 25581 }, { "epoch": 0.542554770842612, "grad_norm": 0.39237552881240845, "learning_rate": 1.6601488354323034e-05, "loss": 0.5396, "step": 25582 }, { "epoch": 0.542575979300545, "grad_norm": 0.3418261408805847, "learning_rate": 1.660123785070227e-05, "loss": 0.5733, "step": 25583 }, { "epoch": 0.5425971877584781, "grad_norm": 0.323344349861145, "learning_rate": 1.6600987339739667e-05, "loss": 0.5155, "step": 25584 }, { "epoch": 0.5426183962164111, "grad_norm": 0.6483067870140076, "learning_rate": 1.6600736821435507e-05, "loss": 0.4766, "step": 25585 }, { "epoch": 0.5426396046743441, "grad_norm": 0.38506415486335754, "learning_rate": 1.6600486295790058e-05, "loss": 0.5629, "step": 25586 }, { "epoch": 0.5426608131322771, "grad_norm": 0.37093478441238403, "learning_rate": 1.660023576280361e-05, "loss": 0.4855, "step": 25587 }, { "epoch": 0.5426820215902102, "grad_norm": 0.3372739851474762, "learning_rate": 1.659998522247644e-05, "loss": 0.4407, "step": 25588 }, { "epoch": 0.5427032300481432, "grad_norm": 0.3481745719909668, "learning_rate": 1.6599734674808817e-05, "loss": 0.5498, "step": 25589 }, { "epoch": 0.5427244385060762, "grad_norm": 0.34171900153160095, "learning_rate": 1.6599484119801032e-05, "loss": 0.5403, "step": 25590 }, { "epoch": 0.5427456469640093, "grad_norm": 0.35479867458343506, "learning_rate": 1.6599233557453354e-05, "loss": 0.5168, "step": 25591 }, { "epoch": 0.5427668554219423, "grad_norm": 0.3727724254131317, "learning_rate": 1.6598982987766064e-05, "loss": 0.4686, "step": 25592 }, { "epoch": 0.5427880638798753, "grad_norm": 0.33365827798843384, "learning_rate": 1.6598732410739448e-05, "loss": 0.5147, "step": 25593 }, { "epoch": 0.5428092723378083, "grad_norm": 0.33882373571395874, "learning_rate": 1.6598481826373775e-05, "loss": 0.4596, "step": 25594 }, { "epoch": 0.5428304807957414, "grad_norm": 0.3569566607475281, "learning_rate": 1.659823123466933e-05, "loss": 0.4944, "step": 25595 }, { "epoch": 0.5428516892536743, "grad_norm": 0.40908554196357727, "learning_rate": 1.6597980635626393e-05, "loss": 0.4806, "step": 25596 }, { "epoch": 0.5428728977116074, "grad_norm": 0.36022189259529114, "learning_rate": 1.6597730029245233e-05, "loss": 0.5436, "step": 25597 }, { "epoch": 0.5428941061695404, "grad_norm": 0.7014786601066589, "learning_rate": 1.6597479415526138e-05, "loss": 0.4331, "step": 25598 }, { "epoch": 0.5429153146274734, "grad_norm": 0.6801043748855591, "learning_rate": 1.6597228794469383e-05, "loss": 0.5064, "step": 25599 }, { "epoch": 0.5429365230854064, "grad_norm": 0.3403759300708771, "learning_rate": 1.659697816607525e-05, "loss": 0.53, "step": 25600 }, { "epoch": 0.5429577315433395, "grad_norm": 0.3557280898094177, "learning_rate": 1.659672753034401e-05, "loss": 0.5734, "step": 25601 }, { "epoch": 0.5429789400012726, "grad_norm": 0.38852664828300476, "learning_rate": 1.659647688727595e-05, "loss": 0.529, "step": 25602 }, { "epoch": 0.5430001484592055, "grad_norm": 0.4094688594341278, "learning_rate": 1.6596226236871347e-05, "loss": 0.4345, "step": 25603 }, { "epoch": 0.5430213569171386, "grad_norm": 0.37288132309913635, "learning_rate": 1.6595975579130477e-05, "loss": 0.4586, "step": 25604 }, { "epoch": 0.5430425653750716, "grad_norm": 0.3567338287830353, "learning_rate": 1.659572491405362e-05, "loss": 0.5354, "step": 25605 }, { "epoch": 0.5430637738330046, "grad_norm": 0.4161599278450012, "learning_rate": 1.6595474241641054e-05, "loss": 0.5868, "step": 25606 }, { "epoch": 0.5430849822909376, "grad_norm": 0.40185415744781494, "learning_rate": 1.659522356189306e-05, "loss": 0.5086, "step": 25607 }, { "epoch": 0.5431061907488707, "grad_norm": 0.4627181887626648, "learning_rate": 1.6594972874809915e-05, "loss": 0.4644, "step": 25608 }, { "epoch": 0.5431273992068036, "grad_norm": 0.3530500829219818, "learning_rate": 1.65947221803919e-05, "loss": 0.4842, "step": 25609 }, { "epoch": 0.5431486076647367, "grad_norm": 0.3827725052833557, "learning_rate": 1.6594471478639293e-05, "loss": 0.512, "step": 25610 }, { "epoch": 0.5431698161226697, "grad_norm": 0.33090445399284363, "learning_rate": 1.659422076955237e-05, "loss": 0.4487, "step": 25611 }, { "epoch": 0.5431910245806028, "grad_norm": 0.3808774948120117, "learning_rate": 1.659397005313141e-05, "loss": 0.5938, "step": 25612 }, { "epoch": 0.5432122330385357, "grad_norm": 0.3266225755214691, "learning_rate": 1.65937193293767e-05, "loss": 0.4794, "step": 25613 }, { "epoch": 0.5432334414964688, "grad_norm": 0.3320620656013489, "learning_rate": 1.6593468598288507e-05, "loss": 0.512, "step": 25614 }, { "epoch": 0.5432546499544019, "grad_norm": 0.32553350925445557, "learning_rate": 1.6593217859867118e-05, "loss": 0.5085, "step": 25615 }, { "epoch": 0.5432758584123348, "grad_norm": 0.3556833863258362, "learning_rate": 1.659296711411281e-05, "loss": 0.5727, "step": 25616 }, { "epoch": 0.5432970668702679, "grad_norm": 0.36833229660987854, "learning_rate": 1.659271636102586e-05, "loss": 0.45, "step": 25617 }, { "epoch": 0.5433182753282009, "grad_norm": 0.3165169954299927, "learning_rate": 1.6592465600606548e-05, "loss": 0.438, "step": 25618 }, { "epoch": 0.543339483786134, "grad_norm": 0.3702920079231262, "learning_rate": 1.6592214832855152e-05, "loss": 0.5801, "step": 25619 }, { "epoch": 0.5433606922440669, "grad_norm": 0.34247931838035583, "learning_rate": 1.659196405777195e-05, "loss": 0.4948, "step": 25620 }, { "epoch": 0.543381900702, "grad_norm": 0.33183249831199646, "learning_rate": 1.659171327535723e-05, "loss": 0.4779, "step": 25621 }, { "epoch": 0.543403109159933, "grad_norm": 0.35006052255630493, "learning_rate": 1.6591462485611258e-05, "loss": 0.4941, "step": 25622 }, { "epoch": 0.543424317617866, "grad_norm": 0.4542349874973297, "learning_rate": 1.6591211688534324e-05, "loss": 0.4629, "step": 25623 }, { "epoch": 0.543445526075799, "grad_norm": 0.33862996101379395, "learning_rate": 1.6590960884126697e-05, "loss": 0.4513, "step": 25624 }, { "epoch": 0.5434667345337321, "grad_norm": 0.3864597678184509, "learning_rate": 1.659071007238866e-05, "loss": 0.4999, "step": 25625 }, { "epoch": 0.543487942991665, "grad_norm": 0.38578394055366516, "learning_rate": 1.6590459253320495e-05, "loss": 0.5745, "step": 25626 }, { "epoch": 0.5435091514495981, "grad_norm": 0.37234267592430115, "learning_rate": 1.6590208426922475e-05, "loss": 0.5241, "step": 25627 }, { "epoch": 0.5435303599075311, "grad_norm": 0.33310428261756897, "learning_rate": 1.6589957593194887e-05, "loss": 0.4648, "step": 25628 }, { "epoch": 0.5435515683654641, "grad_norm": 0.3249748945236206, "learning_rate": 1.6589706752138004e-05, "loss": 0.4956, "step": 25629 }, { "epoch": 0.5435727768233972, "grad_norm": 0.5302243828773499, "learning_rate": 1.6589455903752108e-05, "loss": 0.4863, "step": 25630 }, { "epoch": 0.5435939852813302, "grad_norm": 0.3090478777885437, "learning_rate": 1.6589205048037472e-05, "loss": 0.4865, "step": 25631 }, { "epoch": 0.5436151937392633, "grad_norm": 0.4676155745983124, "learning_rate": 1.658895418499438e-05, "loss": 0.4675, "step": 25632 }, { "epoch": 0.5436364021971962, "grad_norm": 0.33540135622024536, "learning_rate": 1.6588703314623115e-05, "loss": 0.4396, "step": 25633 }, { "epoch": 0.5436576106551293, "grad_norm": 0.41528916358947754, "learning_rate": 1.658845243692395e-05, "loss": 0.4261, "step": 25634 }, { "epoch": 0.5436788191130623, "grad_norm": 0.3419639468193054, "learning_rate": 1.6588201551897162e-05, "loss": 0.5552, "step": 25635 }, { "epoch": 0.5437000275709953, "grad_norm": 0.39847898483276367, "learning_rate": 1.6587950659543035e-05, "loss": 0.4979, "step": 25636 }, { "epoch": 0.5437212360289283, "grad_norm": 0.36856427788734436, "learning_rate": 1.6587699759861847e-05, "loss": 0.5074, "step": 25637 }, { "epoch": 0.5437424444868614, "grad_norm": 0.3421643078327179, "learning_rate": 1.6587448852853877e-05, "loss": 0.5112, "step": 25638 }, { "epoch": 0.5437636529447943, "grad_norm": 0.3760182857513428, "learning_rate": 1.6587197938519405e-05, "loss": 0.5913, "step": 25639 }, { "epoch": 0.5437848614027274, "grad_norm": 0.40160486102104187, "learning_rate": 1.6586947016858704e-05, "loss": 0.5739, "step": 25640 }, { "epoch": 0.5438060698606604, "grad_norm": 0.3619864881038666, "learning_rate": 1.658669608787206e-05, "loss": 0.4945, "step": 25641 }, { "epoch": 0.5438272783185935, "grad_norm": 0.3550651967525482, "learning_rate": 1.658644515155975e-05, "loss": 0.4982, "step": 25642 }, { "epoch": 0.5438484867765265, "grad_norm": 0.2941374182701111, "learning_rate": 1.6586194207922052e-05, "loss": 0.4197, "step": 25643 }, { "epoch": 0.5438696952344595, "grad_norm": 0.35150933265686035, "learning_rate": 1.658594325695925e-05, "loss": 0.5503, "step": 25644 }, { "epoch": 0.5438909036923926, "grad_norm": 0.37511029839515686, "learning_rate": 1.6585692298671614e-05, "loss": 0.5214, "step": 25645 }, { "epoch": 0.5439121121503255, "grad_norm": 0.3536311089992523, "learning_rate": 1.6585441333059434e-05, "loss": 0.5078, "step": 25646 }, { "epoch": 0.5439333206082586, "grad_norm": 0.33386772871017456, "learning_rate": 1.6585190360122977e-05, "loss": 0.5223, "step": 25647 }, { "epoch": 0.5439545290661916, "grad_norm": 0.41144922375679016, "learning_rate": 1.658493937986253e-05, "loss": 0.5416, "step": 25648 }, { "epoch": 0.5439757375241246, "grad_norm": 0.34475255012512207, "learning_rate": 1.6584688392278377e-05, "loss": 0.4717, "step": 25649 }, { "epoch": 0.5439969459820576, "grad_norm": 0.37530288100242615, "learning_rate": 1.6584437397370784e-05, "loss": 0.542, "step": 25650 }, { "epoch": 0.5440181544399907, "grad_norm": 0.3443559408187866, "learning_rate": 1.6584186395140038e-05, "loss": 0.5075, "step": 25651 }, { "epoch": 0.5440393628979237, "grad_norm": 0.3307485580444336, "learning_rate": 1.6583935385586418e-05, "loss": 0.4046, "step": 25652 }, { "epoch": 0.5440605713558567, "grad_norm": 0.38037195801734924, "learning_rate": 1.65836843687102e-05, "loss": 0.5214, "step": 25653 }, { "epoch": 0.5440817798137897, "grad_norm": 0.3471587300300598, "learning_rate": 1.658343334451167e-05, "loss": 0.4828, "step": 25654 }, { "epoch": 0.5441029882717228, "grad_norm": 0.4751342236995697, "learning_rate": 1.65831823129911e-05, "loss": 0.5583, "step": 25655 }, { "epoch": 0.5441241967296558, "grad_norm": 0.35579875111579895, "learning_rate": 1.658293127414877e-05, "loss": 0.5099, "step": 25656 }, { "epoch": 0.5441454051875888, "grad_norm": 0.3615850508213043, "learning_rate": 1.6582680227984964e-05, "loss": 0.4913, "step": 25657 }, { "epoch": 0.5441666136455219, "grad_norm": 0.33074745535850525, "learning_rate": 1.6582429174499956e-05, "loss": 0.4172, "step": 25658 }, { "epoch": 0.5441878221034548, "grad_norm": 0.3862382471561432, "learning_rate": 1.658217811369403e-05, "loss": 0.4502, "step": 25659 }, { "epoch": 0.5442090305613879, "grad_norm": 0.39145132899284363, "learning_rate": 1.6581927045567464e-05, "loss": 0.5637, "step": 25660 }, { "epoch": 0.5442302390193209, "grad_norm": 0.32562994956970215, "learning_rate": 1.6581675970120532e-05, "loss": 0.4912, "step": 25661 }, { "epoch": 0.544251447477254, "grad_norm": 0.347533643245697, "learning_rate": 1.6581424887353517e-05, "loss": 0.4694, "step": 25662 }, { "epoch": 0.5442726559351869, "grad_norm": 0.36369049549102783, "learning_rate": 1.65811737972667e-05, "loss": 0.5099, "step": 25663 }, { "epoch": 0.54429386439312, "grad_norm": 0.32613998651504517, "learning_rate": 1.658092269986036e-05, "loss": 0.5092, "step": 25664 }, { "epoch": 0.544315072851053, "grad_norm": 0.49432483315467834, "learning_rate": 1.658067159513477e-05, "loss": 0.4595, "step": 25665 }, { "epoch": 0.544336281308986, "grad_norm": 0.3828275501728058, "learning_rate": 1.658042048309022e-05, "loss": 0.5537, "step": 25666 }, { "epoch": 0.544357489766919, "grad_norm": 0.33988168835639954, "learning_rate": 1.6580169363726982e-05, "loss": 0.5376, "step": 25667 }, { "epoch": 0.5443786982248521, "grad_norm": 0.32454055547714233, "learning_rate": 1.6579918237045334e-05, "loss": 0.3823, "step": 25668 }, { "epoch": 0.544399906682785, "grad_norm": 0.37964993715286255, "learning_rate": 1.657966710304556e-05, "loss": 0.5481, "step": 25669 }, { "epoch": 0.5444211151407181, "grad_norm": 0.3645167052745819, "learning_rate": 1.657941596172794e-05, "loss": 0.5201, "step": 25670 }, { "epoch": 0.5444423235986512, "grad_norm": 0.3591403663158417, "learning_rate": 1.6579164813092747e-05, "loss": 0.5554, "step": 25671 }, { "epoch": 0.5444635320565842, "grad_norm": 0.34933850169181824, "learning_rate": 1.6578913657140267e-05, "loss": 0.4442, "step": 25672 }, { "epoch": 0.5444847405145172, "grad_norm": 0.3621234893798828, "learning_rate": 1.6578662493870773e-05, "loss": 0.4902, "step": 25673 }, { "epoch": 0.5445059489724502, "grad_norm": 0.3818506896495819, "learning_rate": 1.6578411323284548e-05, "loss": 0.595, "step": 25674 }, { "epoch": 0.5445271574303833, "grad_norm": 0.3410865068435669, "learning_rate": 1.6578160145381873e-05, "loss": 0.6015, "step": 25675 }, { "epoch": 0.5445483658883162, "grad_norm": 0.31691768765449524, "learning_rate": 1.6577908960163025e-05, "loss": 0.5129, "step": 25676 }, { "epoch": 0.5445695743462493, "grad_norm": 0.41735848784446716, "learning_rate": 1.6577657767628282e-05, "loss": 0.4989, "step": 25677 }, { "epoch": 0.5445907828041823, "grad_norm": 0.3274226188659668, "learning_rate": 1.657740656777793e-05, "loss": 0.5647, "step": 25678 }, { "epoch": 0.5446119912621153, "grad_norm": 0.4278508424758911, "learning_rate": 1.657715536061224e-05, "loss": 0.4924, "step": 25679 }, { "epoch": 0.5446331997200483, "grad_norm": 0.4126835763454437, "learning_rate": 1.6576904146131494e-05, "loss": 0.5616, "step": 25680 }, { "epoch": 0.5446544081779814, "grad_norm": 0.3896533250808716, "learning_rate": 1.6576652924335975e-05, "loss": 0.5004, "step": 25681 }, { "epoch": 0.5446756166359144, "grad_norm": 0.3452974855899811, "learning_rate": 1.657640169522596e-05, "loss": 0.4698, "step": 25682 }, { "epoch": 0.5446968250938474, "grad_norm": 0.33814331889152527, "learning_rate": 1.6576150458801724e-05, "loss": 0.4658, "step": 25683 }, { "epoch": 0.5447180335517805, "grad_norm": 0.41175299882888794, "learning_rate": 1.6575899215063555e-05, "loss": 0.5255, "step": 25684 }, { "epoch": 0.5447392420097135, "grad_norm": 0.3778455853462219, "learning_rate": 1.6575647964011724e-05, "loss": 0.5361, "step": 25685 }, { "epoch": 0.5447604504676465, "grad_norm": 0.42898744344711304, "learning_rate": 1.657539670564652e-05, "loss": 0.5834, "step": 25686 }, { "epoch": 0.5447816589255795, "grad_norm": 0.3406603932380676, "learning_rate": 1.657514543996821e-05, "loss": 0.4373, "step": 25687 }, { "epoch": 0.5448028673835126, "grad_norm": 0.3293513059616089, "learning_rate": 1.6574894166977085e-05, "loss": 0.5271, "step": 25688 }, { "epoch": 0.5448240758414455, "grad_norm": 0.3785044550895691, "learning_rate": 1.657464288667342e-05, "loss": 0.6549, "step": 25689 }, { "epoch": 0.5448452842993786, "grad_norm": 0.3106648921966553, "learning_rate": 1.6574391599057493e-05, "loss": 0.4058, "step": 25690 }, { "epoch": 0.5448664927573116, "grad_norm": 0.37246042490005493, "learning_rate": 1.6574140304129586e-05, "loss": 0.484, "step": 25691 }, { "epoch": 0.5448877012152447, "grad_norm": 0.316277414560318, "learning_rate": 1.6573889001889975e-05, "loss": 0.4983, "step": 25692 }, { "epoch": 0.5449089096731776, "grad_norm": 0.4288129210472107, "learning_rate": 1.6573637692338942e-05, "loss": 0.5223, "step": 25693 }, { "epoch": 0.5449301181311107, "grad_norm": 0.38624218106269836, "learning_rate": 1.6573386375476768e-05, "loss": 0.484, "step": 25694 }, { "epoch": 0.5449513265890437, "grad_norm": 0.3839998245239258, "learning_rate": 1.657313505130373e-05, "loss": 0.4976, "step": 25695 }, { "epoch": 0.5449725350469767, "grad_norm": 0.4103288948535919, "learning_rate": 1.6572883719820107e-05, "loss": 0.5378, "step": 25696 }, { "epoch": 0.5449937435049098, "grad_norm": 0.3424244821071625, "learning_rate": 1.6572632381026183e-05, "loss": 0.4741, "step": 25697 }, { "epoch": 0.5450149519628428, "grad_norm": 0.3694012463092804, "learning_rate": 1.6572381034922232e-05, "loss": 0.4705, "step": 25698 }, { "epoch": 0.5450361604207759, "grad_norm": 0.8084296584129333, "learning_rate": 1.6572129681508536e-05, "loss": 0.462, "step": 25699 }, { "epoch": 0.5450573688787088, "grad_norm": 0.3339475989341736, "learning_rate": 1.6571878320785376e-05, "loss": 0.4743, "step": 25700 }, { "epoch": 0.5450785773366419, "grad_norm": 0.3462445139884949, "learning_rate": 1.6571626952753027e-05, "loss": 0.4817, "step": 25701 }, { "epoch": 0.5450997857945749, "grad_norm": 0.3666283190250397, "learning_rate": 1.6571375577411776e-05, "loss": 0.5251, "step": 25702 }, { "epoch": 0.5451209942525079, "grad_norm": 0.3966488540172577, "learning_rate": 1.657112419476189e-05, "loss": 0.3727, "step": 25703 }, { "epoch": 0.5451422027104409, "grad_norm": 0.347184956073761, "learning_rate": 1.6570872804803663e-05, "loss": 0.3913, "step": 25704 }, { "epoch": 0.545163411168374, "grad_norm": 0.45406782627105713, "learning_rate": 1.657062140753737e-05, "loss": 0.5562, "step": 25705 }, { "epoch": 0.5451846196263069, "grad_norm": 0.48988208174705505, "learning_rate": 1.6570370002963288e-05, "loss": 0.5671, "step": 25706 }, { "epoch": 0.54520582808424, "grad_norm": 0.36316099762916565, "learning_rate": 1.6570118591081693e-05, "loss": 0.5262, "step": 25707 }, { "epoch": 0.545227036542173, "grad_norm": 0.48288121819496155, "learning_rate": 1.6569867171892874e-05, "loss": 0.3996, "step": 25708 }, { "epoch": 0.545248245000106, "grad_norm": 0.3117002844810486, "learning_rate": 1.6569615745397104e-05, "loss": 0.4561, "step": 25709 }, { "epoch": 0.545269453458039, "grad_norm": 0.38947078585624695, "learning_rate": 1.6569364311594665e-05, "loss": 0.3899, "step": 25710 }, { "epoch": 0.5452906619159721, "grad_norm": 0.4422842264175415, "learning_rate": 1.6569112870485836e-05, "loss": 0.5675, "step": 25711 }, { "epoch": 0.5453118703739052, "grad_norm": 0.3686985969543457, "learning_rate": 1.6568861422070897e-05, "loss": 0.4562, "step": 25712 }, { "epoch": 0.5453330788318381, "grad_norm": 0.49140337109565735, "learning_rate": 1.6568609966350125e-05, "loss": 0.5403, "step": 25713 }, { "epoch": 0.5453542872897712, "grad_norm": 0.33643051981925964, "learning_rate": 1.6568358503323803e-05, "loss": 0.4634, "step": 25714 }, { "epoch": 0.5453754957477042, "grad_norm": 0.3567512631416321, "learning_rate": 1.6568107032992212e-05, "loss": 0.5391, "step": 25715 }, { "epoch": 0.5453967042056372, "grad_norm": 0.4808482825756073, "learning_rate": 1.656785555535563e-05, "loss": 0.548, "step": 25716 }, { "epoch": 0.5454179126635702, "grad_norm": 0.3657069206237793, "learning_rate": 1.656760407041433e-05, "loss": 0.4577, "step": 25717 }, { "epoch": 0.5454391211215033, "grad_norm": 0.3402407467365265, "learning_rate": 1.65673525781686e-05, "loss": 0.5515, "step": 25718 }, { "epoch": 0.5454603295794362, "grad_norm": 0.392851322889328, "learning_rate": 1.6567101078618724e-05, "loss": 0.4566, "step": 25719 }, { "epoch": 0.5454815380373693, "grad_norm": 0.4221416711807251, "learning_rate": 1.656684957176497e-05, "loss": 0.5458, "step": 25720 }, { "epoch": 0.5455027464953023, "grad_norm": 0.341254860162735, "learning_rate": 1.6566598057607625e-05, "loss": 0.5129, "step": 25721 }, { "epoch": 0.5455239549532354, "grad_norm": 0.3666943311691284, "learning_rate": 1.6566346536146963e-05, "loss": 0.5548, "step": 25722 }, { "epoch": 0.5455451634111683, "grad_norm": 0.4560462236404419, "learning_rate": 1.6566095007383268e-05, "loss": 0.5552, "step": 25723 }, { "epoch": 0.5455663718691014, "grad_norm": 0.3966984748840332, "learning_rate": 1.6565843471316824e-05, "loss": 0.5496, "step": 25724 }, { "epoch": 0.5455875803270345, "grad_norm": 0.3450133502483368, "learning_rate": 1.6565591927947902e-05, "loss": 0.4942, "step": 25725 }, { "epoch": 0.5456087887849674, "grad_norm": 0.35897666215896606, "learning_rate": 1.6565340377276787e-05, "loss": 0.4473, "step": 25726 }, { "epoch": 0.5456299972429005, "grad_norm": 0.3874274492263794, "learning_rate": 1.6565088819303757e-05, "loss": 0.5118, "step": 25727 }, { "epoch": 0.5456512057008335, "grad_norm": 0.37300944328308105, "learning_rate": 1.656483725402909e-05, "loss": 0.5134, "step": 25728 }, { "epoch": 0.5456724141587665, "grad_norm": 0.31390541791915894, "learning_rate": 1.6564585681453073e-05, "loss": 0.4685, "step": 25729 }, { "epoch": 0.5456936226166995, "grad_norm": 0.3520384728908539, "learning_rate": 1.6564334101575977e-05, "loss": 0.4207, "step": 25730 }, { "epoch": 0.5457148310746326, "grad_norm": 0.3332113027572632, "learning_rate": 1.6564082514398085e-05, "loss": 0.4794, "step": 25731 }, { "epoch": 0.5457360395325656, "grad_norm": 0.7374812960624695, "learning_rate": 1.6563830919919683e-05, "loss": 0.5035, "step": 25732 }, { "epoch": 0.5457572479904986, "grad_norm": 0.3221180737018585, "learning_rate": 1.656357931814104e-05, "loss": 0.4651, "step": 25733 }, { "epoch": 0.5457784564484316, "grad_norm": 0.3097998797893524, "learning_rate": 1.6563327709062442e-05, "loss": 0.4582, "step": 25734 }, { "epoch": 0.5457996649063647, "grad_norm": 0.36932799220085144, "learning_rate": 1.6563076092684166e-05, "loss": 0.5265, "step": 25735 }, { "epoch": 0.5458208733642976, "grad_norm": 0.3483108580112457, "learning_rate": 1.65628244690065e-05, "loss": 0.4857, "step": 25736 }, { "epoch": 0.5458420818222307, "grad_norm": 0.3452743589878082, "learning_rate": 1.6562572838029712e-05, "loss": 0.5236, "step": 25737 }, { "epoch": 0.5458632902801638, "grad_norm": 0.3372702896595001, "learning_rate": 1.656232119975409e-05, "loss": 0.4933, "step": 25738 }, { "epoch": 0.5458844987380967, "grad_norm": 0.3453715145587921, "learning_rate": 1.6562069554179912e-05, "loss": 0.4938, "step": 25739 }, { "epoch": 0.5459057071960298, "grad_norm": 0.34405264258384705, "learning_rate": 1.6561817901307455e-05, "loss": 0.4984, "step": 25740 }, { "epoch": 0.5459269156539628, "grad_norm": 0.4367802143096924, "learning_rate": 1.6561566241137003e-05, "loss": 0.5539, "step": 25741 }, { "epoch": 0.5459481241118959, "grad_norm": 0.33841443061828613, "learning_rate": 1.6561314573668834e-05, "loss": 0.4797, "step": 25742 }, { "epoch": 0.5459693325698288, "grad_norm": 0.3513657748699188, "learning_rate": 1.6561062898903225e-05, "loss": 0.5644, "step": 25743 }, { "epoch": 0.5459905410277619, "grad_norm": 1.5163801908493042, "learning_rate": 1.656081121684046e-05, "loss": 0.4924, "step": 25744 }, { "epoch": 0.5460117494856949, "grad_norm": 0.37398263812065125, "learning_rate": 1.6560559527480823e-05, "loss": 0.527, "step": 25745 }, { "epoch": 0.5460329579436279, "grad_norm": 0.47623103857040405, "learning_rate": 1.656030783082458e-05, "loss": 0.552, "step": 25746 }, { "epoch": 0.5460541664015609, "grad_norm": 0.3485317826271057, "learning_rate": 1.6560056126872024e-05, "loss": 0.4605, "step": 25747 }, { "epoch": 0.546075374859494, "grad_norm": 0.36610275506973267, "learning_rate": 1.6559804415623432e-05, "loss": 0.5348, "step": 25748 }, { "epoch": 0.5460965833174269, "grad_norm": 0.4787466526031494, "learning_rate": 1.655955269707908e-05, "loss": 0.4418, "step": 25749 }, { "epoch": 0.54611779177536, "grad_norm": 0.4153136610984802, "learning_rate": 1.6559300971239252e-05, "loss": 0.5659, "step": 25750 }, { "epoch": 0.5461390002332931, "grad_norm": 0.4662277102470398, "learning_rate": 1.6559049238104227e-05, "loss": 0.5606, "step": 25751 }, { "epoch": 0.5461602086912261, "grad_norm": 0.3644927442073822, "learning_rate": 1.6558797497674283e-05, "loss": 0.5189, "step": 25752 }, { "epoch": 0.5461814171491591, "grad_norm": 0.3418596088886261, "learning_rate": 1.65585457499497e-05, "loss": 0.5006, "step": 25753 }, { "epoch": 0.5462026256070921, "grad_norm": 0.3289814591407776, "learning_rate": 1.6558293994930763e-05, "loss": 0.4795, "step": 25754 }, { "epoch": 0.5462238340650252, "grad_norm": 0.38242021203041077, "learning_rate": 1.6558042232617746e-05, "loss": 0.4803, "step": 25755 }, { "epoch": 0.5462450425229581, "grad_norm": 0.3697129786014557, "learning_rate": 1.6557790463010932e-05, "loss": 0.4454, "step": 25756 }, { "epoch": 0.5462662509808912, "grad_norm": 0.40758654475212097, "learning_rate": 1.65575386861106e-05, "loss": 0.4511, "step": 25757 }, { "epoch": 0.5462874594388242, "grad_norm": 0.3318195641040802, "learning_rate": 1.6557286901917033e-05, "loss": 0.457, "step": 25758 }, { "epoch": 0.5463086678967572, "grad_norm": 0.36993318796157837, "learning_rate": 1.6557035110430508e-05, "loss": 0.5029, "step": 25759 }, { "epoch": 0.5463298763546902, "grad_norm": 0.3466576337814331, "learning_rate": 1.65567833116513e-05, "loss": 0.5143, "step": 25760 }, { "epoch": 0.5463510848126233, "grad_norm": 0.3632461428642273, "learning_rate": 1.6556531505579697e-05, "loss": 0.541, "step": 25761 }, { "epoch": 0.5463722932705563, "grad_norm": 0.5274688601493835, "learning_rate": 1.655627969221598e-05, "loss": 0.4563, "step": 25762 }, { "epoch": 0.5463935017284893, "grad_norm": 0.3452245593070984, "learning_rate": 1.6556027871560425e-05, "loss": 0.4821, "step": 25763 }, { "epoch": 0.5464147101864223, "grad_norm": 0.5236344337463379, "learning_rate": 1.655577604361331e-05, "loss": 0.4597, "step": 25764 }, { "epoch": 0.5464359186443554, "grad_norm": 0.3436899185180664, "learning_rate": 1.655552420837492e-05, "loss": 0.4211, "step": 25765 }, { "epoch": 0.5464571271022884, "grad_norm": 0.4954198896884918, "learning_rate": 1.655527236584553e-05, "loss": 0.5686, "step": 25766 }, { "epoch": 0.5464783355602214, "grad_norm": 0.3813568651676178, "learning_rate": 1.6555020516025426e-05, "loss": 0.5198, "step": 25767 }, { "epoch": 0.5464995440181545, "grad_norm": 0.35877880454063416, "learning_rate": 1.6554768658914887e-05, "loss": 0.5134, "step": 25768 }, { "epoch": 0.5465207524760874, "grad_norm": 0.5578936338424683, "learning_rate": 1.6554516794514186e-05, "loss": 0.5442, "step": 25769 }, { "epoch": 0.5465419609340205, "grad_norm": 0.34569570422172546, "learning_rate": 1.6554264922823612e-05, "loss": 0.5192, "step": 25770 }, { "epoch": 0.5465631693919535, "grad_norm": 0.3477497696876526, "learning_rate": 1.655401304384344e-05, "loss": 0.4671, "step": 25771 }, { "epoch": 0.5465843778498866, "grad_norm": 0.40730369091033936, "learning_rate": 1.6553761157573953e-05, "loss": 0.5045, "step": 25772 }, { "epoch": 0.5466055863078195, "grad_norm": 0.3395410478115082, "learning_rate": 1.6553509264015428e-05, "loss": 0.5228, "step": 25773 }, { "epoch": 0.5466267947657526, "grad_norm": 0.3571432828903198, "learning_rate": 1.6553257363168146e-05, "loss": 0.5107, "step": 25774 }, { "epoch": 0.5466480032236856, "grad_norm": 0.3973500728607178, "learning_rate": 1.655300545503239e-05, "loss": 0.5046, "step": 25775 }, { "epoch": 0.5466692116816186, "grad_norm": 0.32682621479034424, "learning_rate": 1.655275353960844e-05, "loss": 0.5078, "step": 25776 }, { "epoch": 0.5466904201395516, "grad_norm": 0.7901997566223145, "learning_rate": 1.6552501616896576e-05, "loss": 0.456, "step": 25777 }, { "epoch": 0.5467116285974847, "grad_norm": 0.32162728905677795, "learning_rate": 1.6552249686897067e-05, "loss": 0.475, "step": 25778 }, { "epoch": 0.5467328370554178, "grad_norm": 0.3521847426891327, "learning_rate": 1.655199774961021e-05, "loss": 0.5739, "step": 25779 }, { "epoch": 0.5467540455133507, "grad_norm": 0.41873565316200256, "learning_rate": 1.655174580503628e-05, "loss": 0.5564, "step": 25780 }, { "epoch": 0.5467752539712838, "grad_norm": 0.39534687995910645, "learning_rate": 1.6551493853175552e-05, "loss": 0.528, "step": 25781 }, { "epoch": 0.5467964624292168, "grad_norm": 0.32694342732429504, "learning_rate": 1.6551241894028308e-05, "loss": 0.4544, "step": 25782 }, { "epoch": 0.5468176708871498, "grad_norm": 0.34704962372779846, "learning_rate": 1.6550989927594835e-05, "loss": 0.528, "step": 25783 }, { "epoch": 0.5468388793450828, "grad_norm": 0.4074017405509949, "learning_rate": 1.65507379538754e-05, "loss": 0.5242, "step": 25784 }, { "epoch": 0.5468600878030159, "grad_norm": 0.38396814465522766, "learning_rate": 1.6550485972870296e-05, "loss": 0.6038, "step": 25785 }, { "epoch": 0.5468812962609488, "grad_norm": 0.36175045371055603, "learning_rate": 1.6550233984579798e-05, "loss": 0.4847, "step": 25786 }, { "epoch": 0.5469025047188819, "grad_norm": 0.3636843264102936, "learning_rate": 1.6549981989004187e-05, "loss": 0.5618, "step": 25787 }, { "epoch": 0.5469237131768149, "grad_norm": 0.3402590751647949, "learning_rate": 1.6549729986143744e-05, "loss": 0.4714, "step": 25788 }, { "epoch": 0.546944921634748, "grad_norm": 0.3287971317768097, "learning_rate": 1.6549477975998747e-05, "loss": 0.5837, "step": 25789 }, { "epoch": 0.5469661300926809, "grad_norm": 0.3281221389770508, "learning_rate": 1.6549225958569476e-05, "loss": 0.4388, "step": 25790 }, { "epoch": 0.546987338550614, "grad_norm": 0.36480051279067993, "learning_rate": 1.6548973933856213e-05, "loss": 0.604, "step": 25791 }, { "epoch": 0.5470085470085471, "grad_norm": 0.6213334202766418, "learning_rate": 1.654872190185924e-05, "loss": 0.5522, "step": 25792 }, { "epoch": 0.54702975546648, "grad_norm": 0.44180983304977417, "learning_rate": 1.6548469862578832e-05, "loss": 0.5087, "step": 25793 }, { "epoch": 0.5470509639244131, "grad_norm": 0.38019120693206787, "learning_rate": 1.6548217816015277e-05, "loss": 0.5817, "step": 25794 }, { "epoch": 0.5470721723823461, "grad_norm": 0.40999671816825867, "learning_rate": 1.654796576216885e-05, "loss": 0.4859, "step": 25795 }, { "epoch": 0.5470933808402791, "grad_norm": 0.4094287157058716, "learning_rate": 1.6547713701039832e-05, "loss": 0.4967, "step": 25796 }, { "epoch": 0.5471145892982121, "grad_norm": 0.3499890863895416, "learning_rate": 1.6547461632628502e-05, "loss": 0.4836, "step": 25797 }, { "epoch": 0.5471357977561452, "grad_norm": 0.4306204915046692, "learning_rate": 1.6547209556935145e-05, "loss": 0.4746, "step": 25798 }, { "epoch": 0.5471570062140781, "grad_norm": 0.37288519740104675, "learning_rate": 1.6546957473960035e-05, "loss": 0.4441, "step": 25799 }, { "epoch": 0.5471782146720112, "grad_norm": 0.35181528329849243, "learning_rate": 1.6546705383703458e-05, "loss": 0.5153, "step": 25800 }, { "epoch": 0.5471994231299442, "grad_norm": 0.35706207156181335, "learning_rate": 1.654645328616569e-05, "loss": 0.4722, "step": 25801 }, { "epoch": 0.5472206315878773, "grad_norm": 0.3861708641052246, "learning_rate": 1.6546201181347017e-05, "loss": 0.5264, "step": 25802 }, { "epoch": 0.5472418400458102, "grad_norm": 0.3337053954601288, "learning_rate": 1.6545949069247713e-05, "loss": 0.4867, "step": 25803 }, { "epoch": 0.5472630485037433, "grad_norm": 0.35618025064468384, "learning_rate": 1.6545696949868062e-05, "loss": 0.5439, "step": 25804 }, { "epoch": 0.5472842569616763, "grad_norm": 0.35992276668548584, "learning_rate": 1.6545444823208346e-05, "loss": 0.4755, "step": 25805 }, { "epoch": 0.5473054654196093, "grad_norm": 0.3547429144382477, "learning_rate": 1.654519268926884e-05, "loss": 0.5003, "step": 25806 }, { "epoch": 0.5473266738775424, "grad_norm": 0.3095438778400421, "learning_rate": 1.6544940548049827e-05, "loss": 0.4713, "step": 25807 }, { "epoch": 0.5473478823354754, "grad_norm": 0.7153695225715637, "learning_rate": 1.6544688399551592e-05, "loss": 0.4505, "step": 25808 }, { "epoch": 0.5473690907934085, "grad_norm": 0.38272547721862793, "learning_rate": 1.6544436243774407e-05, "loss": 0.5023, "step": 25809 }, { "epoch": 0.5473902992513414, "grad_norm": 0.3419692814350128, "learning_rate": 1.654418408071856e-05, "loss": 0.4276, "step": 25810 }, { "epoch": 0.5474115077092745, "grad_norm": 0.3262450098991394, "learning_rate": 1.6543931910384328e-05, "loss": 0.4743, "step": 25811 }, { "epoch": 0.5474327161672075, "grad_norm": 0.5641870498657227, "learning_rate": 1.6543679732771992e-05, "loss": 0.5619, "step": 25812 }, { "epoch": 0.5474539246251405, "grad_norm": 0.3899506628513336, "learning_rate": 1.654342754788183e-05, "loss": 0.4795, "step": 25813 }, { "epoch": 0.5474751330830735, "grad_norm": 0.3250650465488434, "learning_rate": 1.6543175355714125e-05, "loss": 0.4806, "step": 25814 }, { "epoch": 0.5474963415410066, "grad_norm": 0.3825104832649231, "learning_rate": 1.654292315626916e-05, "loss": 0.5054, "step": 25815 }, { "epoch": 0.5475175499989395, "grad_norm": 0.5475152134895325, "learning_rate": 1.654267094954721e-05, "loss": 0.5292, "step": 25816 }, { "epoch": 0.5475387584568726, "grad_norm": 0.39253026247024536, "learning_rate": 1.6542418735548556e-05, "loss": 0.5219, "step": 25817 }, { "epoch": 0.5475599669148056, "grad_norm": 0.33178943395614624, "learning_rate": 1.6542166514273485e-05, "loss": 0.5153, "step": 25818 }, { "epoch": 0.5475811753727386, "grad_norm": 0.4214859902858734, "learning_rate": 1.6541914285722275e-05, "loss": 0.4369, "step": 25819 }, { "epoch": 0.5476023838306717, "grad_norm": 0.3529644310474396, "learning_rate": 1.65416620498952e-05, "loss": 0.5432, "step": 25820 }, { "epoch": 0.5476235922886047, "grad_norm": 0.3564179837703705, "learning_rate": 1.654140980679255e-05, "loss": 0.4567, "step": 25821 }, { "epoch": 0.5476448007465378, "grad_norm": 0.3867756426334381, "learning_rate": 1.6541157556414597e-05, "loss": 0.5001, "step": 25822 }, { "epoch": 0.5476660092044707, "grad_norm": 0.3742397129535675, "learning_rate": 1.6540905298761626e-05, "loss": 0.5216, "step": 25823 }, { "epoch": 0.5476872176624038, "grad_norm": 0.372761607170105, "learning_rate": 1.6540653033833915e-05, "loss": 0.4458, "step": 25824 }, { "epoch": 0.5477084261203368, "grad_norm": 0.3365301489830017, "learning_rate": 1.6540400761631753e-05, "loss": 0.5143, "step": 25825 }, { "epoch": 0.5477296345782698, "grad_norm": 0.37169918417930603, "learning_rate": 1.6540148482155408e-05, "loss": 0.3766, "step": 25826 }, { "epoch": 0.5477508430362028, "grad_norm": 0.32449662685394287, "learning_rate": 1.653989619540517e-05, "loss": 0.5232, "step": 25827 }, { "epoch": 0.5477720514941359, "grad_norm": 0.37944263219833374, "learning_rate": 1.6539643901381316e-05, "loss": 0.4667, "step": 25828 }, { "epoch": 0.5477932599520688, "grad_norm": 0.446251779794693, "learning_rate": 1.6539391600084122e-05, "loss": 0.6021, "step": 25829 }, { "epoch": 0.5478144684100019, "grad_norm": 0.32295307517051697, "learning_rate": 1.6539139291513875e-05, "loss": 0.4558, "step": 25830 }, { "epoch": 0.5478356768679349, "grad_norm": 0.45004233717918396, "learning_rate": 1.653888697567086e-05, "loss": 0.5001, "step": 25831 }, { "epoch": 0.547856885325868, "grad_norm": 0.37357041239738464, "learning_rate": 1.653863465255535e-05, "loss": 0.5826, "step": 25832 }, { "epoch": 0.547878093783801, "grad_norm": 0.35623469948768616, "learning_rate": 1.6538382322167624e-05, "loss": 0.5076, "step": 25833 }, { "epoch": 0.547899302241734, "grad_norm": 0.34364938735961914, "learning_rate": 1.6538129984507966e-05, "loss": 0.5245, "step": 25834 }, { "epoch": 0.5479205106996671, "grad_norm": 0.34393033385276794, "learning_rate": 1.653787763957666e-05, "loss": 0.4755, "step": 25835 }, { "epoch": 0.5479417191576, "grad_norm": 0.38235247135162354, "learning_rate": 1.653762528737398e-05, "loss": 0.5443, "step": 25836 }, { "epoch": 0.5479629276155331, "grad_norm": 0.35055920481681824, "learning_rate": 1.653737292790021e-05, "loss": 0.523, "step": 25837 }, { "epoch": 0.5479841360734661, "grad_norm": 0.3842514157295227, "learning_rate": 1.653712056115563e-05, "loss": 0.4855, "step": 25838 }, { "epoch": 0.5480053445313992, "grad_norm": 0.32453852891921997, "learning_rate": 1.6536868187140525e-05, "loss": 0.5104, "step": 25839 }, { "epoch": 0.5480265529893321, "grad_norm": 0.3573947548866272, "learning_rate": 1.653661580585517e-05, "loss": 0.5481, "step": 25840 }, { "epoch": 0.5480477614472652, "grad_norm": 0.33549070358276367, "learning_rate": 1.653636341729985e-05, "loss": 0.5299, "step": 25841 }, { "epoch": 0.5480689699051982, "grad_norm": 0.3675192594528198, "learning_rate": 1.653611102147484e-05, "loss": 0.5581, "step": 25842 }, { "epoch": 0.5480901783631312, "grad_norm": 0.295846164226532, "learning_rate": 1.6535858618380425e-05, "loss": 0.4059, "step": 25843 }, { "epoch": 0.5481113868210642, "grad_norm": 0.3296491801738739, "learning_rate": 1.6535606208016887e-05, "loss": 0.4706, "step": 25844 }, { "epoch": 0.5481325952789973, "grad_norm": 0.35742008686065674, "learning_rate": 1.6535353790384497e-05, "loss": 0.5423, "step": 25845 }, { "epoch": 0.5481538037369302, "grad_norm": 0.37130996584892273, "learning_rate": 1.6535101365483553e-05, "loss": 0.4818, "step": 25846 }, { "epoch": 0.5481750121948633, "grad_norm": 0.42394834756851196, "learning_rate": 1.6534848933314323e-05, "loss": 0.4818, "step": 25847 }, { "epoch": 0.5481962206527964, "grad_norm": 0.3458147346973419, "learning_rate": 1.653459649387709e-05, "loss": 0.4553, "step": 25848 }, { "epoch": 0.5482174291107293, "grad_norm": 0.5129666924476624, "learning_rate": 1.6534344047172134e-05, "loss": 0.4721, "step": 25849 }, { "epoch": 0.5482386375686624, "grad_norm": 0.4466383159160614, "learning_rate": 1.6534091593199738e-05, "loss": 0.4798, "step": 25850 }, { "epoch": 0.5482598460265954, "grad_norm": 0.3488399088382721, "learning_rate": 1.6533839131960182e-05, "loss": 0.4627, "step": 25851 }, { "epoch": 0.5482810544845285, "grad_norm": 0.3744499981403351, "learning_rate": 1.653358666345375e-05, "loss": 0.5554, "step": 25852 }, { "epoch": 0.5483022629424614, "grad_norm": 0.46406614780426025, "learning_rate": 1.6533334187680715e-05, "loss": 0.4725, "step": 25853 }, { "epoch": 0.5483234714003945, "grad_norm": 0.33364686369895935, "learning_rate": 1.6533081704641366e-05, "loss": 0.4916, "step": 25854 }, { "epoch": 0.5483446798583275, "grad_norm": 0.3146216869354248, "learning_rate": 1.653282921433598e-05, "loss": 0.4471, "step": 25855 }, { "epoch": 0.5483658883162605, "grad_norm": 0.3432253301143646, "learning_rate": 1.6532576716764836e-05, "loss": 0.4544, "step": 25856 }, { "epoch": 0.5483870967741935, "grad_norm": 0.3403053879737854, "learning_rate": 1.6532324211928216e-05, "loss": 0.484, "step": 25857 }, { "epoch": 0.5484083052321266, "grad_norm": 0.32543882727622986, "learning_rate": 1.6532071699826405e-05, "loss": 0.5035, "step": 25858 }, { "epoch": 0.5484295136900595, "grad_norm": 0.36514660716056824, "learning_rate": 1.6531819180459677e-05, "loss": 0.4952, "step": 25859 }, { "epoch": 0.5484507221479926, "grad_norm": 0.362628698348999, "learning_rate": 1.6531566653828323e-05, "loss": 0.4911, "step": 25860 }, { "epoch": 0.5484719306059257, "grad_norm": 0.34010645747184753, "learning_rate": 1.6531314119932612e-05, "loss": 0.4689, "step": 25861 }, { "epoch": 0.5484931390638587, "grad_norm": 0.3356383740901947, "learning_rate": 1.653106157877283e-05, "loss": 0.4852, "step": 25862 }, { "epoch": 0.5485143475217917, "grad_norm": 0.3696678876876831, "learning_rate": 1.653080903034926e-05, "loss": 0.5245, "step": 25863 }, { "epoch": 0.5485355559797247, "grad_norm": 0.3272886872291565, "learning_rate": 1.653055647466218e-05, "loss": 0.4325, "step": 25864 }, { "epoch": 0.5485567644376578, "grad_norm": 0.35221198201179504, "learning_rate": 1.6530303911711873e-05, "loss": 0.4628, "step": 25865 }, { "epoch": 0.5485779728955907, "grad_norm": 0.3339512348175049, "learning_rate": 1.6530051341498618e-05, "loss": 0.5348, "step": 25866 }, { "epoch": 0.5485991813535238, "grad_norm": 0.33552131056785583, "learning_rate": 1.65297987640227e-05, "loss": 0.4883, "step": 25867 }, { "epoch": 0.5486203898114568, "grad_norm": 0.3495352268218994, "learning_rate": 1.6529546179284387e-05, "loss": 0.4752, "step": 25868 }, { "epoch": 0.5486415982693899, "grad_norm": 0.3649557828903198, "learning_rate": 1.6529293587283977e-05, "loss": 0.5601, "step": 25869 }, { "epoch": 0.5486628067273228, "grad_norm": 0.34962907433509827, "learning_rate": 1.6529040988021744e-05, "loss": 0.4944, "step": 25870 }, { "epoch": 0.5486840151852559, "grad_norm": 0.3759896755218506, "learning_rate": 1.6528788381497965e-05, "loss": 0.4881, "step": 25871 }, { "epoch": 0.5487052236431889, "grad_norm": 0.35523292422294617, "learning_rate": 1.6528535767712925e-05, "loss": 0.4671, "step": 25872 }, { "epoch": 0.5487264321011219, "grad_norm": 0.3987213373184204, "learning_rate": 1.6528283146666905e-05, "loss": 0.4726, "step": 25873 }, { "epoch": 0.548747640559055, "grad_norm": 0.3895815312862396, "learning_rate": 1.6528030518360183e-05, "loss": 0.4867, "step": 25874 }, { "epoch": 0.548768849016988, "grad_norm": 0.36214762926101685, "learning_rate": 1.6527777882793047e-05, "loss": 0.5104, "step": 25875 }, { "epoch": 0.548790057474921, "grad_norm": 0.31032106280326843, "learning_rate": 1.6527525239965768e-05, "loss": 0.4538, "step": 25876 }, { "epoch": 0.548811265932854, "grad_norm": 0.33626025915145874, "learning_rate": 1.6527272589878637e-05, "loss": 0.447, "step": 25877 }, { "epoch": 0.5488324743907871, "grad_norm": 0.3183002173900604, "learning_rate": 1.6527019932531926e-05, "loss": 0.4536, "step": 25878 }, { "epoch": 0.54885368284872, "grad_norm": 0.4203079640865326, "learning_rate": 1.6526767267925922e-05, "loss": 0.4904, "step": 25879 }, { "epoch": 0.5488748913066531, "grad_norm": 0.3426753580570221, "learning_rate": 1.6526514596060903e-05, "loss": 0.4672, "step": 25880 }, { "epoch": 0.5488960997645861, "grad_norm": 0.3379870355129242, "learning_rate": 1.652626191693715e-05, "loss": 0.5036, "step": 25881 }, { "epoch": 0.5489173082225192, "grad_norm": 0.3647773861885071, "learning_rate": 1.652600923055495e-05, "loss": 0.5514, "step": 25882 }, { "epoch": 0.5489385166804521, "grad_norm": 0.3127833604812622, "learning_rate": 1.6525756536914578e-05, "loss": 0.4523, "step": 25883 }, { "epoch": 0.5489597251383852, "grad_norm": 0.3627774119377136, "learning_rate": 1.6525503836016313e-05, "loss": 0.4953, "step": 25884 }, { "epoch": 0.5489809335963182, "grad_norm": 0.37524887919425964, "learning_rate": 1.652525112786044e-05, "loss": 0.5236, "step": 25885 }, { "epoch": 0.5490021420542512, "grad_norm": 0.3560594916343689, "learning_rate": 1.652499841244724e-05, "loss": 0.576, "step": 25886 }, { "epoch": 0.5490233505121842, "grad_norm": 0.34005990624427795, "learning_rate": 1.6524745689776992e-05, "loss": 0.4536, "step": 25887 }, { "epoch": 0.5490445589701173, "grad_norm": 0.36059853434562683, "learning_rate": 1.6524492959849983e-05, "loss": 0.5327, "step": 25888 }, { "epoch": 0.5490657674280504, "grad_norm": 0.3645534813404083, "learning_rate": 1.6524240222666486e-05, "loss": 0.4568, "step": 25889 }, { "epoch": 0.5490869758859833, "grad_norm": 0.39132386445999146, "learning_rate": 1.652398747822679e-05, "loss": 0.5311, "step": 25890 }, { "epoch": 0.5491081843439164, "grad_norm": 0.3413692116737366, "learning_rate": 1.6523734726531166e-05, "loss": 0.4878, "step": 25891 }, { "epoch": 0.5491293928018494, "grad_norm": 0.33181032538414, "learning_rate": 1.6523481967579905e-05, "loss": 0.4982, "step": 25892 }, { "epoch": 0.5491506012597824, "grad_norm": 0.3455926179885864, "learning_rate": 1.652322920137328e-05, "loss": 0.5164, "step": 25893 }, { "epoch": 0.5491718097177154, "grad_norm": 0.32443487644195557, "learning_rate": 1.6522976427911577e-05, "loss": 0.457, "step": 25894 }, { "epoch": 0.5491930181756485, "grad_norm": 0.3613309860229492, "learning_rate": 1.652272364719508e-05, "loss": 0.5335, "step": 25895 }, { "epoch": 0.5492142266335814, "grad_norm": 0.4761742353439331, "learning_rate": 1.6522470859224063e-05, "loss": 0.5381, "step": 25896 }, { "epoch": 0.5492354350915145, "grad_norm": 0.336731880903244, "learning_rate": 1.6522218063998813e-05, "loss": 0.5468, "step": 25897 }, { "epoch": 0.5492566435494475, "grad_norm": 0.36347678303718567, "learning_rate": 1.652196526151961e-05, "loss": 0.5564, "step": 25898 }, { "epoch": 0.5492778520073806, "grad_norm": 0.3268340826034546, "learning_rate": 1.6521712451786728e-05, "loss": 0.4878, "step": 25899 }, { "epoch": 0.5492990604653135, "grad_norm": 0.38824957609176636, "learning_rate": 1.6521459634800455e-05, "loss": 0.4679, "step": 25900 }, { "epoch": 0.5493202689232466, "grad_norm": 0.3571928143501282, "learning_rate": 1.6521206810561077e-05, "loss": 0.5017, "step": 25901 }, { "epoch": 0.5493414773811797, "grad_norm": 0.37529152631759644, "learning_rate": 1.6520953979068865e-05, "loss": 0.4393, "step": 25902 }, { "epoch": 0.5493626858391126, "grad_norm": 0.34245777130126953, "learning_rate": 1.6520701140324105e-05, "loss": 0.4045, "step": 25903 }, { "epoch": 0.5493838942970457, "grad_norm": 0.3564486503601074, "learning_rate": 1.652044829432708e-05, "loss": 0.5421, "step": 25904 }, { "epoch": 0.5494051027549787, "grad_norm": 0.36928778886795044, "learning_rate": 1.6520195441078066e-05, "loss": 0.5206, "step": 25905 }, { "epoch": 0.5494263112129117, "grad_norm": 0.34606701135635376, "learning_rate": 1.6519942580577347e-05, "loss": 0.5181, "step": 25906 }, { "epoch": 0.5494475196708447, "grad_norm": 0.3927702009677887, "learning_rate": 1.651968971282521e-05, "loss": 0.5169, "step": 25907 }, { "epoch": 0.5494687281287778, "grad_norm": 0.3884887099266052, "learning_rate": 1.6519436837821926e-05, "loss": 0.4228, "step": 25908 }, { "epoch": 0.5494899365867107, "grad_norm": 0.33044323325157166, "learning_rate": 1.6519183955567784e-05, "loss": 0.5863, "step": 25909 }, { "epoch": 0.5495111450446438, "grad_norm": 0.3590768873691559, "learning_rate": 1.651893106606306e-05, "loss": 0.5025, "step": 25910 }, { "epoch": 0.5495323535025768, "grad_norm": 0.32075008749961853, "learning_rate": 1.6518678169308037e-05, "loss": 0.4223, "step": 25911 }, { "epoch": 0.5495535619605099, "grad_norm": 0.36473163962364197, "learning_rate": 1.6518425265302997e-05, "loss": 0.6692, "step": 25912 }, { "epoch": 0.5495747704184428, "grad_norm": 0.38305792212486267, "learning_rate": 1.6518172354048222e-05, "loss": 0.4633, "step": 25913 }, { "epoch": 0.5495959788763759, "grad_norm": 0.34838685393333435, "learning_rate": 1.651791943554399e-05, "loss": 0.4019, "step": 25914 }, { "epoch": 0.549617187334309, "grad_norm": 0.5116326212882996, "learning_rate": 1.6517666509790587e-05, "loss": 0.4992, "step": 25915 }, { "epoch": 0.5496383957922419, "grad_norm": 0.3698384463787079, "learning_rate": 1.6517413576788293e-05, "loss": 0.5998, "step": 25916 }, { "epoch": 0.549659604250175, "grad_norm": 0.33319902420043945, "learning_rate": 1.6517160636537388e-05, "loss": 0.4635, "step": 25917 }, { "epoch": 0.549680812708108, "grad_norm": 0.5056675672531128, "learning_rate": 1.651690768903815e-05, "loss": 0.4796, "step": 25918 }, { "epoch": 0.549702021166041, "grad_norm": 0.35574471950531006, "learning_rate": 1.6516654734290868e-05, "loss": 0.5195, "step": 25919 }, { "epoch": 0.549723229623974, "grad_norm": 0.32910439372062683, "learning_rate": 1.6516401772295814e-05, "loss": 0.4535, "step": 25920 }, { "epoch": 0.5497444380819071, "grad_norm": 0.33159127831459045, "learning_rate": 1.6516148803053283e-05, "loss": 0.4952, "step": 25921 }, { "epoch": 0.5497656465398401, "grad_norm": 0.36169105768203735, "learning_rate": 1.651589582656354e-05, "loss": 0.4932, "step": 25922 }, { "epoch": 0.5497868549977731, "grad_norm": 0.3282920718193054, "learning_rate": 1.6515642842826876e-05, "loss": 0.544, "step": 25923 }, { "epoch": 0.5498080634557061, "grad_norm": 0.3861982822418213, "learning_rate": 1.6515389851843574e-05, "loss": 0.539, "step": 25924 }, { "epoch": 0.5498292719136392, "grad_norm": 0.47301071882247925, "learning_rate": 1.651513685361391e-05, "loss": 0.5326, "step": 25925 }, { "epoch": 0.5498504803715721, "grad_norm": 0.40065816044807434, "learning_rate": 1.6514883848138166e-05, "loss": 0.4984, "step": 25926 }, { "epoch": 0.5498716888295052, "grad_norm": 0.3794741630554199, "learning_rate": 1.6514630835416627e-05, "loss": 0.5448, "step": 25927 }, { "epoch": 0.5498928972874382, "grad_norm": 0.3353230655193329, "learning_rate": 1.651437781544957e-05, "loss": 0.4939, "step": 25928 }, { "epoch": 0.5499141057453713, "grad_norm": 0.31950581073760986, "learning_rate": 1.651412478823728e-05, "loss": 0.4605, "step": 25929 }, { "epoch": 0.5499353142033043, "grad_norm": 0.3435351848602295, "learning_rate": 1.6513871753780036e-05, "loss": 0.5128, "step": 25930 }, { "epoch": 0.5499565226612373, "grad_norm": 0.3560033142566681, "learning_rate": 1.6513618712078122e-05, "loss": 0.5386, "step": 25931 }, { "epoch": 0.5499777311191704, "grad_norm": 0.43936511874198914, "learning_rate": 1.6513365663131816e-05, "loss": 0.484, "step": 25932 }, { "epoch": 0.5499989395771033, "grad_norm": 0.41000550985336304, "learning_rate": 1.6513112606941403e-05, "loss": 0.5312, "step": 25933 }, { "epoch": 0.5500201480350364, "grad_norm": 0.3710049092769623, "learning_rate": 1.651285954350716e-05, "loss": 0.4667, "step": 25934 }, { "epoch": 0.5500413564929694, "grad_norm": 0.37450265884399414, "learning_rate": 1.6512606472829378e-05, "loss": 0.5049, "step": 25935 }, { "epoch": 0.5500625649509024, "grad_norm": 0.3520229756832123, "learning_rate": 1.6512353394908328e-05, "loss": 0.4587, "step": 25936 }, { "epoch": 0.5500837734088354, "grad_norm": 0.3620515465736389, "learning_rate": 1.6512100309744293e-05, "loss": 0.5004, "step": 25937 }, { "epoch": 0.5501049818667685, "grad_norm": 0.3453475832939148, "learning_rate": 1.6511847217337557e-05, "loss": 0.4864, "step": 25938 }, { "epoch": 0.5501261903247014, "grad_norm": 0.3665992319583893, "learning_rate": 1.6511594117688397e-05, "loss": 0.4448, "step": 25939 }, { "epoch": 0.5501473987826345, "grad_norm": 0.35295018553733826, "learning_rate": 1.6511341010797106e-05, "loss": 0.4756, "step": 25940 }, { "epoch": 0.5501686072405675, "grad_norm": 0.34890586137771606, "learning_rate": 1.6511087896663958e-05, "loss": 0.5723, "step": 25941 }, { "epoch": 0.5501898156985006, "grad_norm": 0.38674432039260864, "learning_rate": 1.651083477528923e-05, "loss": 0.5711, "step": 25942 }, { "epoch": 0.5502110241564336, "grad_norm": 0.36350953578948975, "learning_rate": 1.651058164667321e-05, "loss": 0.5147, "step": 25943 }, { "epoch": 0.5502322326143666, "grad_norm": 0.5905322432518005, "learning_rate": 1.6510328510816182e-05, "loss": 0.555, "step": 25944 }, { "epoch": 0.5502534410722997, "grad_norm": 0.3401528000831604, "learning_rate": 1.6510075367718417e-05, "loss": 0.4457, "step": 25945 }, { "epoch": 0.5502746495302326, "grad_norm": 0.36274126172065735, "learning_rate": 1.6509822217380204e-05, "loss": 0.5388, "step": 25946 }, { "epoch": 0.5502958579881657, "grad_norm": 0.3575633764266968, "learning_rate": 1.6509569059801824e-05, "loss": 0.5615, "step": 25947 }, { "epoch": 0.5503170664460987, "grad_norm": 0.3066645562648773, "learning_rate": 1.6509315894983556e-05, "loss": 0.4327, "step": 25948 }, { "epoch": 0.5503382749040318, "grad_norm": 0.5592600703239441, "learning_rate": 1.6509062722925687e-05, "loss": 0.4769, "step": 25949 }, { "epoch": 0.5503594833619647, "grad_norm": 0.3436990976333618, "learning_rate": 1.650880954362849e-05, "loss": 0.5092, "step": 25950 }, { "epoch": 0.5503806918198978, "grad_norm": 0.3111898601055145, "learning_rate": 1.6508556357092258e-05, "loss": 0.44, "step": 25951 }, { "epoch": 0.5504019002778308, "grad_norm": 0.35810256004333496, "learning_rate": 1.6508303163317264e-05, "loss": 0.4697, "step": 25952 }, { "epoch": 0.5504231087357638, "grad_norm": 0.3192276358604431, "learning_rate": 1.6508049962303787e-05, "loss": 0.4642, "step": 25953 }, { "epoch": 0.5504443171936968, "grad_norm": 0.33279260993003845, "learning_rate": 1.650779675405212e-05, "loss": 0.4544, "step": 25954 }, { "epoch": 0.5504655256516299, "grad_norm": 0.4140267074108124, "learning_rate": 1.6507543538562533e-05, "loss": 0.5116, "step": 25955 }, { "epoch": 0.550486734109563, "grad_norm": 0.32576489448547363, "learning_rate": 1.6507290315835316e-05, "loss": 0.4817, "step": 25956 }, { "epoch": 0.5505079425674959, "grad_norm": 0.3663465678691864, "learning_rate": 1.6507037085870744e-05, "loss": 0.5813, "step": 25957 }, { "epoch": 0.550529151025429, "grad_norm": 0.3574652373790741, "learning_rate": 1.6506783848669106e-05, "loss": 0.4977, "step": 25958 }, { "epoch": 0.550550359483362, "grad_norm": 0.3567464053630829, "learning_rate": 1.6506530604230676e-05, "loss": 0.5011, "step": 25959 }, { "epoch": 0.550571567941295, "grad_norm": 0.3172592222690582, "learning_rate": 1.6506277352555738e-05, "loss": 0.4742, "step": 25960 }, { "epoch": 0.550592776399228, "grad_norm": 0.3129233717918396, "learning_rate": 1.650602409364458e-05, "loss": 0.4646, "step": 25961 }, { "epoch": 0.5506139848571611, "grad_norm": 0.33264124393463135, "learning_rate": 1.6505770827497476e-05, "loss": 0.4757, "step": 25962 }, { "epoch": 0.550635193315094, "grad_norm": 0.33020490407943726, "learning_rate": 1.650551755411471e-05, "loss": 0.4881, "step": 25963 }, { "epoch": 0.5506564017730271, "grad_norm": 0.3376465141773224, "learning_rate": 1.6505264273496565e-05, "loss": 0.487, "step": 25964 }, { "epoch": 0.5506776102309601, "grad_norm": 0.3800882399082184, "learning_rate": 1.6505010985643322e-05, "loss": 0.5611, "step": 25965 }, { "epoch": 0.5506988186888931, "grad_norm": 0.3722570836544037, "learning_rate": 1.6504757690555256e-05, "loss": 0.473, "step": 25966 }, { "epoch": 0.5507200271468261, "grad_norm": 0.3409077823162079, "learning_rate": 1.6504504388232664e-05, "loss": 0.4911, "step": 25967 }, { "epoch": 0.5507412356047592, "grad_norm": 0.49831873178482056, "learning_rate": 1.6504251078675815e-05, "loss": 0.534, "step": 25968 }, { "epoch": 0.5507624440626921, "grad_norm": 0.49009400606155396, "learning_rate": 1.6503997761884992e-05, "loss": 0.5187, "step": 25969 }, { "epoch": 0.5507836525206252, "grad_norm": 0.3514074385166168, "learning_rate": 1.650374443786048e-05, "loss": 0.4848, "step": 25970 }, { "epoch": 0.5508048609785583, "grad_norm": 0.4397019147872925, "learning_rate": 1.650349110660256e-05, "loss": 0.5156, "step": 25971 }, { "epoch": 0.5508260694364913, "grad_norm": 0.31679415702819824, "learning_rate": 1.650323776811152e-05, "loss": 0.4097, "step": 25972 }, { "epoch": 0.5508472778944243, "grad_norm": 0.865492582321167, "learning_rate": 1.650298442238763e-05, "loss": 0.5007, "step": 25973 }, { "epoch": 0.5508684863523573, "grad_norm": 0.3723769783973694, "learning_rate": 1.6502731069431174e-05, "loss": 0.4768, "step": 25974 }, { "epoch": 0.5508896948102904, "grad_norm": 0.3680575489997864, "learning_rate": 1.6502477709242442e-05, "loss": 0.5598, "step": 25975 }, { "epoch": 0.5509109032682233, "grad_norm": 0.31485500931739807, "learning_rate": 1.650222434182171e-05, "loss": 0.4575, "step": 25976 }, { "epoch": 0.5509321117261564, "grad_norm": 0.32066866755485535, "learning_rate": 1.650197096716926e-05, "loss": 0.4852, "step": 25977 }, { "epoch": 0.5509533201840894, "grad_norm": 0.3432852327823639, "learning_rate": 1.6501717585285372e-05, "loss": 0.5541, "step": 25978 }, { "epoch": 0.5509745286420225, "grad_norm": 0.338076114654541, "learning_rate": 1.6501464196170333e-05, "loss": 0.4232, "step": 25979 }, { "epoch": 0.5509957370999554, "grad_norm": 0.47496461868286133, "learning_rate": 1.650121079982442e-05, "loss": 0.5288, "step": 25980 }, { "epoch": 0.5510169455578885, "grad_norm": 0.4376106560230255, "learning_rate": 1.650095739624792e-05, "loss": 0.5875, "step": 25981 }, { "epoch": 0.5510381540158215, "grad_norm": 0.37791651487350464, "learning_rate": 1.650070398544111e-05, "loss": 0.5278, "step": 25982 }, { "epoch": 0.5510593624737545, "grad_norm": 0.33338019251823425, "learning_rate": 1.650045056740427e-05, "loss": 0.4725, "step": 25983 }, { "epoch": 0.5510805709316876, "grad_norm": 0.36491379141807556, "learning_rate": 1.6500197142137692e-05, "loss": 0.5374, "step": 25984 }, { "epoch": 0.5511017793896206, "grad_norm": 0.31625232100486755, "learning_rate": 1.6499943709641647e-05, "loss": 0.5064, "step": 25985 }, { "epoch": 0.5511229878475536, "grad_norm": 0.31637081503868103, "learning_rate": 1.6499690269916424e-05, "loss": 0.4042, "step": 25986 }, { "epoch": 0.5511441963054866, "grad_norm": 0.3367144465446472, "learning_rate": 1.64994368229623e-05, "loss": 0.5218, "step": 25987 }, { "epoch": 0.5511654047634197, "grad_norm": 0.3751540780067444, "learning_rate": 1.6499183368779558e-05, "loss": 0.5732, "step": 25988 }, { "epoch": 0.5511866132213527, "grad_norm": 0.3924674689769745, "learning_rate": 1.649892990736848e-05, "loss": 0.5065, "step": 25989 }, { "epoch": 0.5512078216792857, "grad_norm": 0.3391173481941223, "learning_rate": 1.649867643872935e-05, "loss": 0.5262, "step": 25990 }, { "epoch": 0.5512290301372187, "grad_norm": 0.33355531096458435, "learning_rate": 1.649842296286245e-05, "loss": 0.4221, "step": 25991 }, { "epoch": 0.5512502385951518, "grad_norm": 0.3452758491039276, "learning_rate": 1.649816947976806e-05, "loss": 0.4678, "step": 25992 }, { "epoch": 0.5512714470530847, "grad_norm": 0.3655000925064087, "learning_rate": 1.649791598944646e-05, "loss": 0.5858, "step": 25993 }, { "epoch": 0.5512926555110178, "grad_norm": 0.3593049645423889, "learning_rate": 1.6497662491897937e-05, "loss": 0.4524, "step": 25994 }, { "epoch": 0.5513138639689508, "grad_norm": 0.33866196870803833, "learning_rate": 1.649740898712277e-05, "loss": 0.5208, "step": 25995 }, { "epoch": 0.5513350724268838, "grad_norm": 0.33268454670906067, "learning_rate": 1.649715547512124e-05, "loss": 0.4132, "step": 25996 }, { "epoch": 0.5513562808848169, "grad_norm": 0.4017459452152252, "learning_rate": 1.6496901955893626e-05, "loss": 0.5328, "step": 25997 }, { "epoch": 0.5513774893427499, "grad_norm": 0.34253624081611633, "learning_rate": 1.649664842944022e-05, "loss": 0.4356, "step": 25998 }, { "epoch": 0.551398697800683, "grad_norm": 0.3692229390144348, "learning_rate": 1.6496394895761296e-05, "loss": 0.5018, "step": 25999 }, { "epoch": 0.5514199062586159, "grad_norm": 0.5616862773895264, "learning_rate": 1.649614135485714e-05, "loss": 0.5416, "step": 26000 }, { "epoch": 0.551441114716549, "grad_norm": 0.3540482819080353, "learning_rate": 1.649588780672803e-05, "loss": 0.5145, "step": 26001 }, { "epoch": 0.551462323174482, "grad_norm": 0.36781346797943115, "learning_rate": 1.649563425137425e-05, "loss": 0.5429, "step": 26002 }, { "epoch": 0.551483531632415, "grad_norm": 0.33899450302124023, "learning_rate": 1.6495380688796083e-05, "loss": 0.5028, "step": 26003 }, { "epoch": 0.551504740090348, "grad_norm": 0.32246193289756775, "learning_rate": 1.649512711899381e-05, "loss": 0.4619, "step": 26004 }, { "epoch": 0.5515259485482811, "grad_norm": 0.37983956933021545, "learning_rate": 1.6494873541967712e-05, "loss": 0.5277, "step": 26005 }, { "epoch": 0.551547157006214, "grad_norm": 0.33597686886787415, "learning_rate": 1.649461995771807e-05, "loss": 0.5777, "step": 26006 }, { "epoch": 0.5515683654641471, "grad_norm": 0.34382492303848267, "learning_rate": 1.6494366366245174e-05, "loss": 0.4376, "step": 26007 }, { "epoch": 0.5515895739220801, "grad_norm": 0.3364375829696655, "learning_rate": 1.6494112767549295e-05, "loss": 0.4882, "step": 26008 }, { "epoch": 0.5516107823800132, "grad_norm": 2.0626471042633057, "learning_rate": 1.6493859161630725e-05, "loss": 0.5357, "step": 26009 }, { "epoch": 0.5516319908379461, "grad_norm": 0.35164105892181396, "learning_rate": 1.649360554848974e-05, "loss": 0.5387, "step": 26010 }, { "epoch": 0.5516531992958792, "grad_norm": 0.31888240575790405, "learning_rate": 1.6493351928126617e-05, "loss": 0.3961, "step": 26011 }, { "epoch": 0.5516744077538123, "grad_norm": 0.33900895714759827, "learning_rate": 1.649309830054165e-05, "loss": 0.5518, "step": 26012 }, { "epoch": 0.5516956162117452, "grad_norm": 0.41621458530426025, "learning_rate": 1.6492844665735112e-05, "loss": 0.559, "step": 26013 }, { "epoch": 0.5517168246696783, "grad_norm": 0.35814812779426575, "learning_rate": 1.6492591023707294e-05, "loss": 0.4593, "step": 26014 }, { "epoch": 0.5517380331276113, "grad_norm": 0.3751277029514313, "learning_rate": 1.649233737445847e-05, "loss": 0.5139, "step": 26015 }, { "epoch": 0.5517592415855443, "grad_norm": 0.30967798829078674, "learning_rate": 1.6492083717988922e-05, "loss": 0.4663, "step": 26016 }, { "epoch": 0.5517804500434773, "grad_norm": 0.37227925658226013, "learning_rate": 1.6491830054298938e-05, "loss": 0.4885, "step": 26017 }, { "epoch": 0.5518016585014104, "grad_norm": 0.3507155776023865, "learning_rate": 1.6491576383388797e-05, "loss": 0.5064, "step": 26018 }, { "epoch": 0.5518228669593433, "grad_norm": 0.3694625198841095, "learning_rate": 1.649132270525878e-05, "loss": 0.5011, "step": 26019 }, { "epoch": 0.5518440754172764, "grad_norm": 0.443472683429718, "learning_rate": 1.649106901990917e-05, "loss": 0.4591, "step": 26020 }, { "epoch": 0.5518652838752094, "grad_norm": 0.432860404253006, "learning_rate": 1.6490815327340252e-05, "loss": 0.4825, "step": 26021 }, { "epoch": 0.5518864923331425, "grad_norm": 0.3782559633255005, "learning_rate": 1.6490561627552302e-05, "loss": 0.5267, "step": 26022 }, { "epoch": 0.5519077007910754, "grad_norm": 0.37004655599594116, "learning_rate": 1.6490307920545608e-05, "loss": 0.4951, "step": 26023 }, { "epoch": 0.5519289092490085, "grad_norm": 0.3690352141857147, "learning_rate": 1.6490054206320452e-05, "loss": 0.5197, "step": 26024 }, { "epoch": 0.5519501177069416, "grad_norm": 0.34737566113471985, "learning_rate": 1.6489800484877108e-05, "loss": 0.506, "step": 26025 }, { "epoch": 0.5519713261648745, "grad_norm": 0.38988855481147766, "learning_rate": 1.648954675621587e-05, "loss": 0.5735, "step": 26026 }, { "epoch": 0.5519925346228076, "grad_norm": 0.3533034324645996, "learning_rate": 1.6489293020337013e-05, "loss": 0.5414, "step": 26027 }, { "epoch": 0.5520137430807406, "grad_norm": 0.34032920002937317, "learning_rate": 1.648903927724082e-05, "loss": 0.4761, "step": 26028 }, { "epoch": 0.5520349515386737, "grad_norm": 0.34961041808128357, "learning_rate": 1.6488785526927575e-05, "loss": 0.5113, "step": 26029 }, { "epoch": 0.5520561599966066, "grad_norm": 0.3210674524307251, "learning_rate": 1.6488531769397554e-05, "loss": 0.4471, "step": 26030 }, { "epoch": 0.5520773684545397, "grad_norm": 0.35915258526802063, "learning_rate": 1.648827800465105e-05, "loss": 0.5212, "step": 26031 }, { "epoch": 0.5520985769124727, "grad_norm": 0.331335186958313, "learning_rate": 1.6488024232688337e-05, "loss": 0.567, "step": 26032 }, { "epoch": 0.5521197853704057, "grad_norm": 0.31884172558784485, "learning_rate": 1.6487770453509703e-05, "loss": 0.3828, "step": 26033 }, { "epoch": 0.5521409938283387, "grad_norm": 0.3477497696876526, "learning_rate": 1.6487516667115426e-05, "loss": 0.4399, "step": 26034 }, { "epoch": 0.5521622022862718, "grad_norm": 0.37459596991539, "learning_rate": 1.6487262873505786e-05, "loss": 0.5175, "step": 26035 }, { "epoch": 0.5521834107442047, "grad_norm": 0.3399779200553894, "learning_rate": 1.648700907268107e-05, "loss": 0.5, "step": 26036 }, { "epoch": 0.5522046192021378, "grad_norm": 0.3581697642803192, "learning_rate": 1.648675526464156e-05, "loss": 0.5656, "step": 26037 }, { "epoch": 0.5522258276600709, "grad_norm": 0.34453466534614563, "learning_rate": 1.6486501449387535e-05, "loss": 0.4381, "step": 26038 }, { "epoch": 0.5522470361180039, "grad_norm": 0.3585110604763031, "learning_rate": 1.6486247626919283e-05, "loss": 0.5146, "step": 26039 }, { "epoch": 0.5522682445759369, "grad_norm": 0.3617022633552551, "learning_rate": 1.648599379723708e-05, "loss": 0.4834, "step": 26040 }, { "epoch": 0.5522894530338699, "grad_norm": 0.35433778166770935, "learning_rate": 1.648573996034121e-05, "loss": 0.5519, "step": 26041 }, { "epoch": 0.552310661491803, "grad_norm": 0.3997710645198822, "learning_rate": 1.648548611623196e-05, "loss": 0.5704, "step": 26042 }, { "epoch": 0.5523318699497359, "grad_norm": 0.3794000744819641, "learning_rate": 1.648523226490961e-05, "loss": 0.4708, "step": 26043 }, { "epoch": 0.552353078407669, "grad_norm": 0.34137633442878723, "learning_rate": 1.6484978406374433e-05, "loss": 0.4807, "step": 26044 }, { "epoch": 0.552374286865602, "grad_norm": 0.3891817629337311, "learning_rate": 1.6484724540626728e-05, "loss": 0.4867, "step": 26045 }, { "epoch": 0.552395495323535, "grad_norm": 0.4927288293838501, "learning_rate": 1.6484470667666764e-05, "loss": 0.5307, "step": 26046 }, { "epoch": 0.552416703781468, "grad_norm": 0.4192056357860565, "learning_rate": 1.6484216787494826e-05, "loss": 0.4443, "step": 26047 }, { "epoch": 0.5524379122394011, "grad_norm": 0.4195973575115204, "learning_rate": 1.6483962900111202e-05, "loss": 0.556, "step": 26048 }, { "epoch": 0.552459120697334, "grad_norm": 0.38149842619895935, "learning_rate": 1.648370900551617e-05, "loss": 0.4656, "step": 26049 }, { "epoch": 0.5524803291552671, "grad_norm": 0.39314332604408264, "learning_rate": 1.6483455103710015e-05, "loss": 0.5241, "step": 26050 }, { "epoch": 0.5525015376132001, "grad_norm": 0.36459094285964966, "learning_rate": 1.6483201194693014e-05, "loss": 0.5466, "step": 26051 }, { "epoch": 0.5525227460711332, "grad_norm": 0.3267619013786316, "learning_rate": 1.648294727846546e-05, "loss": 0.5205, "step": 26052 }, { "epoch": 0.5525439545290662, "grad_norm": 0.33981871604919434, "learning_rate": 1.648269335502762e-05, "loss": 0.4869, "step": 26053 }, { "epoch": 0.5525651629869992, "grad_norm": 0.33571669459342957, "learning_rate": 1.6482439424379787e-05, "loss": 0.5051, "step": 26054 }, { "epoch": 0.5525863714449323, "grad_norm": 0.36415958404541016, "learning_rate": 1.6482185486522242e-05, "loss": 0.4766, "step": 26055 }, { "epoch": 0.5526075799028652, "grad_norm": 0.36981281638145447, "learning_rate": 1.6481931541455268e-05, "loss": 0.5467, "step": 26056 }, { "epoch": 0.5526287883607983, "grad_norm": 0.390663743019104, "learning_rate": 1.6481677589179142e-05, "loss": 0.543, "step": 26057 }, { "epoch": 0.5526499968187313, "grad_norm": 0.3878817856311798, "learning_rate": 1.6481423629694157e-05, "loss": 0.5545, "step": 26058 }, { "epoch": 0.5526712052766644, "grad_norm": 0.3493661880493164, "learning_rate": 1.6481169663000585e-05, "loss": 0.3875, "step": 26059 }, { "epoch": 0.5526924137345973, "grad_norm": 0.32175055146217346, "learning_rate": 1.6480915689098712e-05, "loss": 0.528, "step": 26060 }, { "epoch": 0.5527136221925304, "grad_norm": 0.49382156133651733, "learning_rate": 1.6480661707988823e-05, "loss": 0.504, "step": 26061 }, { "epoch": 0.5527348306504634, "grad_norm": 0.390666127204895, "learning_rate": 1.6480407719671198e-05, "loss": 0.4304, "step": 26062 }, { "epoch": 0.5527560391083964, "grad_norm": 0.36890435218811035, "learning_rate": 1.6480153724146115e-05, "loss": 0.5593, "step": 26063 }, { "epoch": 0.5527772475663294, "grad_norm": 0.3111740052700043, "learning_rate": 1.6479899721413867e-05, "loss": 0.4678, "step": 26064 }, { "epoch": 0.5527984560242625, "grad_norm": 0.3650941550731659, "learning_rate": 1.6479645711474726e-05, "loss": 0.5348, "step": 26065 }, { "epoch": 0.5528196644821955, "grad_norm": 0.3923475742340088, "learning_rate": 1.6479391694328986e-05, "loss": 0.562, "step": 26066 }, { "epoch": 0.5528408729401285, "grad_norm": 0.3244357407093048, "learning_rate": 1.6479137669976916e-05, "loss": 0.5616, "step": 26067 }, { "epoch": 0.5528620813980616, "grad_norm": 0.3431836664676666, "learning_rate": 1.6478883638418808e-05, "loss": 0.5061, "step": 26068 }, { "epoch": 0.5528832898559946, "grad_norm": 0.35094040632247925, "learning_rate": 1.6478629599654943e-05, "loss": 0.4816, "step": 26069 }, { "epoch": 0.5529044983139276, "grad_norm": 0.34854695200920105, "learning_rate": 1.6478375553685603e-05, "loss": 0.4905, "step": 26070 }, { "epoch": 0.5529257067718606, "grad_norm": 0.3447343409061432, "learning_rate": 1.647812150051107e-05, "loss": 0.4935, "step": 26071 }, { "epoch": 0.5529469152297937, "grad_norm": 0.33155331015586853, "learning_rate": 1.6477867440131625e-05, "loss": 0.4408, "step": 26072 }, { "epoch": 0.5529681236877266, "grad_norm": 0.3621249496936798, "learning_rate": 1.647761337254755e-05, "loss": 0.6303, "step": 26073 }, { "epoch": 0.5529893321456597, "grad_norm": 0.4050738215446472, "learning_rate": 1.6477359297759132e-05, "loss": 0.5052, "step": 26074 }, { "epoch": 0.5530105406035927, "grad_norm": 0.5429657697677612, "learning_rate": 1.647710521576665e-05, "loss": 0.4856, "step": 26075 }, { "epoch": 0.5530317490615257, "grad_norm": 0.3697175979614258, "learning_rate": 1.647685112657039e-05, "loss": 0.487, "step": 26076 }, { "epoch": 0.5530529575194587, "grad_norm": 0.45110446214675903, "learning_rate": 1.6476597030170635e-05, "loss": 0.5901, "step": 26077 }, { "epoch": 0.5530741659773918, "grad_norm": 0.3325071930885315, "learning_rate": 1.6476342926567662e-05, "loss": 0.5462, "step": 26078 }, { "epoch": 0.5530953744353249, "grad_norm": 0.3409852385520935, "learning_rate": 1.647608881576175e-05, "loss": 0.4149, "step": 26079 }, { "epoch": 0.5531165828932578, "grad_norm": 0.299445241689682, "learning_rate": 1.6475834697753198e-05, "loss": 0.4273, "step": 26080 }, { "epoch": 0.5531377913511909, "grad_norm": 0.3336265981197357, "learning_rate": 1.6475580572542275e-05, "loss": 0.5216, "step": 26081 }, { "epoch": 0.5531589998091239, "grad_norm": 0.43966421484947205, "learning_rate": 1.647532644012927e-05, "loss": 0.5851, "step": 26082 }, { "epoch": 0.5531802082670569, "grad_norm": 0.38936343789100647, "learning_rate": 1.647507230051446e-05, "loss": 0.499, "step": 26083 }, { "epoch": 0.5532014167249899, "grad_norm": 0.3511214256286621, "learning_rate": 1.6474818153698132e-05, "loss": 0.5202, "step": 26084 }, { "epoch": 0.553222625182923, "grad_norm": 0.3173108696937561, "learning_rate": 1.6474563999680568e-05, "loss": 0.4995, "step": 26085 }, { "epoch": 0.5532438336408559, "grad_norm": 0.33745330572128296, "learning_rate": 1.647430983846205e-05, "loss": 0.4923, "step": 26086 }, { "epoch": 0.553265042098789, "grad_norm": 0.3509015738964081, "learning_rate": 1.6474055670042857e-05, "loss": 0.602, "step": 26087 }, { "epoch": 0.553286250556722, "grad_norm": 0.3402586877346039, "learning_rate": 1.647380149442328e-05, "loss": 0.5027, "step": 26088 }, { "epoch": 0.5533074590146551, "grad_norm": 0.3284984230995178, "learning_rate": 1.64735473116036e-05, "loss": 0.5138, "step": 26089 }, { "epoch": 0.553328667472588, "grad_norm": 0.3460480570793152, "learning_rate": 1.647329312158409e-05, "loss": 0.4364, "step": 26090 }, { "epoch": 0.5533498759305211, "grad_norm": 0.33505338430404663, "learning_rate": 1.6473038924365043e-05, "loss": 0.5712, "step": 26091 }, { "epoch": 0.5533710843884542, "grad_norm": 0.35256990790367126, "learning_rate": 1.6472784719946738e-05, "loss": 0.4746, "step": 26092 }, { "epoch": 0.5533922928463871, "grad_norm": 0.4332021176815033, "learning_rate": 1.647253050832946e-05, "loss": 0.4879, "step": 26093 }, { "epoch": 0.5534135013043202, "grad_norm": 0.3619594871997833, "learning_rate": 1.647227628951349e-05, "loss": 0.5655, "step": 26094 }, { "epoch": 0.5534347097622532, "grad_norm": 0.31270790100097656, "learning_rate": 1.6472022063499107e-05, "loss": 0.4736, "step": 26095 }, { "epoch": 0.5534559182201862, "grad_norm": 0.3484829068183899, "learning_rate": 1.6471767830286598e-05, "loss": 0.5345, "step": 26096 }, { "epoch": 0.5534771266781192, "grad_norm": 0.3499463200569153, "learning_rate": 1.6471513589876247e-05, "loss": 0.5539, "step": 26097 }, { "epoch": 0.5534983351360523, "grad_norm": 0.3412824273109436, "learning_rate": 1.6471259342268332e-05, "loss": 0.4505, "step": 26098 }, { "epoch": 0.5535195435939853, "grad_norm": 0.414326548576355, "learning_rate": 1.647100508746314e-05, "loss": 0.5439, "step": 26099 }, { "epoch": 0.5535407520519183, "grad_norm": 0.3663597106933594, "learning_rate": 1.6470750825460954e-05, "loss": 0.4093, "step": 26100 }, { "epoch": 0.5535619605098513, "grad_norm": 0.7684645056724548, "learning_rate": 1.6470496556262054e-05, "loss": 0.5674, "step": 26101 }, { "epoch": 0.5535831689677844, "grad_norm": 0.541172444820404, "learning_rate": 1.6470242279866725e-05, "loss": 0.5977, "step": 26102 }, { "epoch": 0.5536043774257173, "grad_norm": 0.3759738504886627, "learning_rate": 1.6469987996275248e-05, "loss": 0.5283, "step": 26103 }, { "epoch": 0.5536255858836504, "grad_norm": 0.407953679561615, "learning_rate": 1.6469733705487903e-05, "loss": 0.4826, "step": 26104 }, { "epoch": 0.5536467943415834, "grad_norm": 0.38231566548347473, "learning_rate": 1.646947940750498e-05, "loss": 0.4839, "step": 26105 }, { "epoch": 0.5536680027995164, "grad_norm": 0.3547327220439911, "learning_rate": 1.6469225102326757e-05, "loss": 0.5484, "step": 26106 }, { "epoch": 0.5536892112574495, "grad_norm": 0.4430289566516876, "learning_rate": 1.646897078995352e-05, "loss": 0.5205, "step": 26107 }, { "epoch": 0.5537104197153825, "grad_norm": 0.3851686418056488, "learning_rate": 1.646871647038555e-05, "loss": 0.617, "step": 26108 }, { "epoch": 0.5537316281733156, "grad_norm": 0.3623250722885132, "learning_rate": 1.6468462143623126e-05, "loss": 0.4995, "step": 26109 }, { "epoch": 0.5537528366312485, "grad_norm": 0.33556896448135376, "learning_rate": 1.6468207809666538e-05, "loss": 0.459, "step": 26110 }, { "epoch": 0.5537740450891816, "grad_norm": 0.3793432414531708, "learning_rate": 1.6467953468516065e-05, "loss": 0.5295, "step": 26111 }, { "epoch": 0.5537952535471146, "grad_norm": 0.4140298366546631, "learning_rate": 1.646769912017199e-05, "loss": 0.463, "step": 26112 }, { "epoch": 0.5538164620050476, "grad_norm": 0.4546594023704529, "learning_rate": 1.6467444764634593e-05, "loss": 0.4982, "step": 26113 }, { "epoch": 0.5538376704629806, "grad_norm": 0.3693879246711731, "learning_rate": 1.6467190401904163e-05, "loss": 0.4906, "step": 26114 }, { "epoch": 0.5538588789209137, "grad_norm": 0.33851945400238037, "learning_rate": 1.646693603198098e-05, "loss": 0.3763, "step": 26115 }, { "epoch": 0.5538800873788466, "grad_norm": 0.3927631676197052, "learning_rate": 1.646668165486533e-05, "loss": 0.5287, "step": 26116 }, { "epoch": 0.5539012958367797, "grad_norm": 0.32633310556411743, "learning_rate": 1.646642727055749e-05, "loss": 0.5122, "step": 26117 }, { "epoch": 0.5539225042947127, "grad_norm": 0.36059147119522095, "learning_rate": 1.6466172879057743e-05, "loss": 0.4592, "step": 26118 }, { "epoch": 0.5539437127526458, "grad_norm": 0.37925055623054504, "learning_rate": 1.6465918480366378e-05, "loss": 0.4812, "step": 26119 }, { "epoch": 0.5539649212105788, "grad_norm": 0.3166307806968689, "learning_rate": 1.6465664074483675e-05, "loss": 0.4725, "step": 26120 }, { "epoch": 0.5539861296685118, "grad_norm": 0.3411501348018646, "learning_rate": 1.6465409661409913e-05, "loss": 0.4493, "step": 26121 }, { "epoch": 0.5540073381264449, "grad_norm": 0.3574542999267578, "learning_rate": 1.646515524114538e-05, "loss": 0.4486, "step": 26122 }, { "epoch": 0.5540285465843778, "grad_norm": 0.34250104427337646, "learning_rate": 1.646490081369036e-05, "loss": 0.465, "step": 26123 }, { "epoch": 0.5540497550423109, "grad_norm": 0.32867565751075745, "learning_rate": 1.6464646379045134e-05, "loss": 0.5016, "step": 26124 }, { "epoch": 0.5540709635002439, "grad_norm": 0.378542959690094, "learning_rate": 1.646439193720998e-05, "loss": 0.492, "step": 26125 }, { "epoch": 0.554092171958177, "grad_norm": 0.38102254271507263, "learning_rate": 1.6464137488185188e-05, "loss": 0.3667, "step": 26126 }, { "epoch": 0.5541133804161099, "grad_norm": 0.37222471833229065, "learning_rate": 1.646388303197104e-05, "loss": 0.533, "step": 26127 }, { "epoch": 0.554134588874043, "grad_norm": 0.3446711003780365, "learning_rate": 1.6463628568567816e-05, "loss": 0.5376, "step": 26128 }, { "epoch": 0.554155797331976, "grad_norm": 0.40716350078582764, "learning_rate": 1.64633740979758e-05, "loss": 0.5644, "step": 26129 }, { "epoch": 0.554177005789909, "grad_norm": 0.3579237461090088, "learning_rate": 1.6463119620195275e-05, "loss": 0.5151, "step": 26130 }, { "epoch": 0.554198214247842, "grad_norm": 0.35148605704307556, "learning_rate": 1.6462865135226525e-05, "loss": 0.5309, "step": 26131 }, { "epoch": 0.5542194227057751, "grad_norm": 0.36935216188430786, "learning_rate": 1.6462610643069834e-05, "loss": 0.4987, "step": 26132 }, { "epoch": 0.5542406311637081, "grad_norm": 0.37342125177383423, "learning_rate": 1.646235614372548e-05, "loss": 0.5409, "step": 26133 }, { "epoch": 0.5542618396216411, "grad_norm": 0.33808568120002747, "learning_rate": 1.6462101637193754e-05, "loss": 0.4453, "step": 26134 }, { "epoch": 0.5542830480795742, "grad_norm": 0.3578205108642578, "learning_rate": 1.646184712347493e-05, "loss": 0.5022, "step": 26135 }, { "epoch": 0.5543042565375071, "grad_norm": 0.34765011072158813, "learning_rate": 1.64615926025693e-05, "loss": 0.5118, "step": 26136 }, { "epoch": 0.5543254649954402, "grad_norm": 0.3620522916316986, "learning_rate": 1.646133807447714e-05, "loss": 0.5621, "step": 26137 }, { "epoch": 0.5543466734533732, "grad_norm": 0.3748644292354584, "learning_rate": 1.646108353919874e-05, "loss": 0.5698, "step": 26138 }, { "epoch": 0.5543678819113063, "grad_norm": 0.3361847996711731, "learning_rate": 1.6460828996734376e-05, "loss": 0.4955, "step": 26139 }, { "epoch": 0.5543890903692392, "grad_norm": 0.3669563829898834, "learning_rate": 1.6460574447084334e-05, "loss": 0.4934, "step": 26140 }, { "epoch": 0.5544102988271723, "grad_norm": 0.3039288818836212, "learning_rate": 1.6460319890248898e-05, "loss": 0.4027, "step": 26141 }, { "epoch": 0.5544315072851053, "grad_norm": 0.42165878415107727, "learning_rate": 1.646006532622835e-05, "loss": 0.4694, "step": 26142 }, { "epoch": 0.5544527157430383, "grad_norm": 0.36861488223075867, "learning_rate": 1.6459810755022972e-05, "loss": 0.4896, "step": 26143 }, { "epoch": 0.5544739242009713, "grad_norm": 0.42359593510627747, "learning_rate": 1.6459556176633048e-05, "loss": 0.5195, "step": 26144 }, { "epoch": 0.5544951326589044, "grad_norm": 0.3465729057788849, "learning_rate": 1.6459301591058865e-05, "loss": 0.4317, "step": 26145 }, { "epoch": 0.5545163411168373, "grad_norm": 0.6028974652290344, "learning_rate": 1.6459046998300704e-05, "loss": 0.5572, "step": 26146 }, { "epoch": 0.5545375495747704, "grad_norm": 0.30201634764671326, "learning_rate": 1.6458792398358843e-05, "loss": 0.4429, "step": 26147 }, { "epoch": 0.5545587580327035, "grad_norm": 0.4544169008731842, "learning_rate": 1.645853779123357e-05, "loss": 0.5577, "step": 26148 }, { "epoch": 0.5545799664906365, "grad_norm": 0.3710257411003113, "learning_rate": 1.645828317692517e-05, "loss": 0.5638, "step": 26149 }, { "epoch": 0.5546011749485695, "grad_norm": 0.34008482098579407, "learning_rate": 1.645802855543392e-05, "loss": 0.4999, "step": 26150 }, { "epoch": 0.5546223834065025, "grad_norm": 0.31547024846076965, "learning_rate": 1.645777392676011e-05, "loss": 0.5756, "step": 26151 }, { "epoch": 0.5546435918644356, "grad_norm": 0.32109585404396057, "learning_rate": 1.6457519290904018e-05, "loss": 0.4453, "step": 26152 }, { "epoch": 0.5546648003223685, "grad_norm": 0.34145087003707886, "learning_rate": 1.645726464786593e-05, "loss": 0.537, "step": 26153 }, { "epoch": 0.5546860087803016, "grad_norm": 0.3641207814216614, "learning_rate": 1.645700999764613e-05, "loss": 0.3976, "step": 26154 }, { "epoch": 0.5547072172382346, "grad_norm": 0.3418794274330139, "learning_rate": 1.6456755340244893e-05, "loss": 0.5389, "step": 26155 }, { "epoch": 0.5547284256961676, "grad_norm": 0.5294424295425415, "learning_rate": 1.6456500675662513e-05, "loss": 0.5032, "step": 26156 }, { "epoch": 0.5547496341541006, "grad_norm": 0.39257684350013733, "learning_rate": 1.645624600389927e-05, "loss": 0.4709, "step": 26157 }, { "epoch": 0.5547708426120337, "grad_norm": 0.3523404598236084, "learning_rate": 1.6455991324955447e-05, "loss": 0.4394, "step": 26158 }, { "epoch": 0.5547920510699667, "grad_norm": 0.3325341045856476, "learning_rate": 1.6455736638831325e-05, "loss": 0.4631, "step": 26159 }, { "epoch": 0.5548132595278997, "grad_norm": 0.3345317244529724, "learning_rate": 1.645548194552719e-05, "loss": 0.4761, "step": 26160 }, { "epoch": 0.5548344679858328, "grad_norm": 0.3502922058105469, "learning_rate": 1.645522724504332e-05, "loss": 0.5435, "step": 26161 }, { "epoch": 0.5548556764437658, "grad_norm": 0.38647446036338806, "learning_rate": 1.6454972537380007e-05, "loss": 0.4535, "step": 26162 }, { "epoch": 0.5548768849016988, "grad_norm": 0.3785003125667572, "learning_rate": 1.6454717822537527e-05, "loss": 0.4925, "step": 26163 }, { "epoch": 0.5548980933596318, "grad_norm": 0.34557265043258667, "learning_rate": 1.645446310051617e-05, "loss": 0.5304, "step": 26164 }, { "epoch": 0.5549193018175649, "grad_norm": 0.33351778984069824, "learning_rate": 1.645420837131621e-05, "loss": 0.459, "step": 26165 }, { "epoch": 0.5549405102754978, "grad_norm": 0.3851461410522461, "learning_rate": 1.6453953634937937e-05, "loss": 0.5137, "step": 26166 }, { "epoch": 0.5549617187334309, "grad_norm": 0.33388349413871765, "learning_rate": 1.6453698891381633e-05, "loss": 0.491, "step": 26167 }, { "epoch": 0.5549829271913639, "grad_norm": 0.34576839208602905, "learning_rate": 1.6453444140647582e-05, "loss": 0.502, "step": 26168 }, { "epoch": 0.555004135649297, "grad_norm": 0.32218557596206665, "learning_rate": 1.6453189382736066e-05, "loss": 0.4786, "step": 26169 }, { "epoch": 0.5550253441072299, "grad_norm": 0.3457070589065552, "learning_rate": 1.6452934617647368e-05, "loss": 0.4919, "step": 26170 }, { "epoch": 0.555046552565163, "grad_norm": 0.34609735012054443, "learning_rate": 1.6452679845381774e-05, "loss": 0.481, "step": 26171 }, { "epoch": 0.555067761023096, "grad_norm": 0.636630654335022, "learning_rate": 1.6452425065939564e-05, "loss": 0.5692, "step": 26172 }, { "epoch": 0.555088969481029, "grad_norm": 0.33131757378578186, "learning_rate": 1.6452170279321026e-05, "loss": 0.4868, "step": 26173 }, { "epoch": 0.5551101779389621, "grad_norm": 0.3771301805973053, "learning_rate": 1.6451915485526436e-05, "loss": 0.6215, "step": 26174 }, { "epoch": 0.5551313863968951, "grad_norm": 0.36234161257743835, "learning_rate": 1.6451660684556084e-05, "loss": 0.5338, "step": 26175 }, { "epoch": 0.5551525948548282, "grad_norm": 0.3251207768917084, "learning_rate": 1.645140587641025e-05, "loss": 0.489, "step": 26176 }, { "epoch": 0.5551738033127611, "grad_norm": 0.38737496733665466, "learning_rate": 1.6451151061089218e-05, "loss": 0.4718, "step": 26177 }, { "epoch": 0.5551950117706942, "grad_norm": 0.34524402022361755, "learning_rate": 1.6450896238593275e-05, "loss": 0.455, "step": 26178 }, { "epoch": 0.5552162202286272, "grad_norm": 0.4180423617362976, "learning_rate": 1.6450641408922696e-05, "loss": 0.548, "step": 26179 }, { "epoch": 0.5552374286865602, "grad_norm": 0.3770727515220642, "learning_rate": 1.6450386572077775e-05, "loss": 0.5335, "step": 26180 }, { "epoch": 0.5552586371444932, "grad_norm": 0.36898818612098694, "learning_rate": 1.6450131728058786e-05, "loss": 0.5405, "step": 26181 }, { "epoch": 0.5552798456024263, "grad_norm": 0.4028586149215698, "learning_rate": 1.6449876876866018e-05, "loss": 0.4877, "step": 26182 }, { "epoch": 0.5553010540603592, "grad_norm": 0.5543770790100098, "learning_rate": 1.6449622018499755e-05, "loss": 0.4788, "step": 26183 }, { "epoch": 0.5553222625182923, "grad_norm": 0.37397322058677673, "learning_rate": 1.6449367152960274e-05, "loss": 0.4757, "step": 26184 }, { "epoch": 0.5553434709762253, "grad_norm": 0.4509284496307373, "learning_rate": 1.644911228024787e-05, "loss": 0.5394, "step": 26185 }, { "epoch": 0.5553646794341583, "grad_norm": 0.35581693053245544, "learning_rate": 1.644885740036281e-05, "loss": 0.5812, "step": 26186 }, { "epoch": 0.5553858878920913, "grad_norm": 0.37125787138938904, "learning_rate": 1.6448602513305394e-05, "loss": 0.5256, "step": 26187 }, { "epoch": 0.5554070963500244, "grad_norm": 0.35787689685821533, "learning_rate": 1.6448347619075895e-05, "loss": 0.5093, "step": 26188 }, { "epoch": 0.5554283048079575, "grad_norm": 0.34844887256622314, "learning_rate": 1.64480927176746e-05, "loss": 0.4533, "step": 26189 }, { "epoch": 0.5554495132658904, "grad_norm": 0.32578709721565247, "learning_rate": 1.6447837809101792e-05, "loss": 0.4365, "step": 26190 }, { "epoch": 0.5554707217238235, "grad_norm": 0.3686126470565796, "learning_rate": 1.6447582893357757e-05, "loss": 0.5284, "step": 26191 }, { "epoch": 0.5554919301817565, "grad_norm": 0.31630218029022217, "learning_rate": 1.6447327970442776e-05, "loss": 0.4727, "step": 26192 }, { "epoch": 0.5555131386396895, "grad_norm": 0.34422722458839417, "learning_rate": 1.6447073040357132e-05, "loss": 0.4593, "step": 26193 }, { "epoch": 0.5555343470976225, "grad_norm": 0.335712194442749, "learning_rate": 1.644681810310111e-05, "loss": 0.521, "step": 26194 }, { "epoch": 0.5555555555555556, "grad_norm": 0.4111298620700836, "learning_rate": 1.6446563158674992e-05, "loss": 0.4747, "step": 26195 }, { "epoch": 0.5555767640134885, "grad_norm": 0.307262659072876, "learning_rate": 1.644630820707906e-05, "loss": 0.4946, "step": 26196 }, { "epoch": 0.5555979724714216, "grad_norm": 0.36919376254081726, "learning_rate": 1.6446053248313604e-05, "loss": 0.5007, "step": 26197 }, { "epoch": 0.5556191809293546, "grad_norm": 0.375229150056839, "learning_rate": 1.6445798282378903e-05, "loss": 0.505, "step": 26198 }, { "epoch": 0.5556403893872877, "grad_norm": 0.33434367179870605, "learning_rate": 1.644554330927524e-05, "loss": 0.4157, "step": 26199 }, { "epoch": 0.5556615978452206, "grad_norm": 0.3289220631122589, "learning_rate": 1.64452883290029e-05, "loss": 0.4345, "step": 26200 }, { "epoch": 0.5556828063031537, "grad_norm": 0.3273840844631195, "learning_rate": 1.6445033341562166e-05, "loss": 0.4508, "step": 26201 }, { "epoch": 0.5557040147610868, "grad_norm": 0.3298206031322479, "learning_rate": 1.6444778346953323e-05, "loss": 0.4818, "step": 26202 }, { "epoch": 0.5557252232190197, "grad_norm": 0.3593917787075043, "learning_rate": 1.6444523345176654e-05, "loss": 0.5533, "step": 26203 }, { "epoch": 0.5557464316769528, "grad_norm": 0.36424368619918823, "learning_rate": 1.644426833623244e-05, "loss": 0.4547, "step": 26204 }, { "epoch": 0.5557676401348858, "grad_norm": 0.34808701276779175, "learning_rate": 1.6444013320120967e-05, "loss": 0.5187, "step": 26205 }, { "epoch": 0.5557888485928189, "grad_norm": 0.3198654353618622, "learning_rate": 1.6443758296842522e-05, "loss": 0.4269, "step": 26206 }, { "epoch": 0.5558100570507518, "grad_norm": 0.9182539582252502, "learning_rate": 1.6443503266397382e-05, "loss": 0.5423, "step": 26207 }, { "epoch": 0.5558312655086849, "grad_norm": 0.3490324020385742, "learning_rate": 1.6443248228785832e-05, "loss": 0.551, "step": 26208 }, { "epoch": 0.5558524739666179, "grad_norm": 0.3463590145111084, "learning_rate": 1.644299318400816e-05, "loss": 0.5362, "step": 26209 }, { "epoch": 0.5558736824245509, "grad_norm": 0.3451627790927887, "learning_rate": 1.6442738132064646e-05, "loss": 0.4861, "step": 26210 }, { "epoch": 0.5558948908824839, "grad_norm": 0.3736935257911682, "learning_rate": 1.6442483072955572e-05, "loss": 0.467, "step": 26211 }, { "epoch": 0.555916099340417, "grad_norm": 0.29177919030189514, "learning_rate": 1.644222800668123e-05, "loss": 0.3826, "step": 26212 }, { "epoch": 0.5559373077983499, "grad_norm": 0.31525179743766785, "learning_rate": 1.6441972933241897e-05, "loss": 0.5507, "step": 26213 }, { "epoch": 0.555958516256283, "grad_norm": 0.3873656988143921, "learning_rate": 1.6441717852637854e-05, "loss": 0.5058, "step": 26214 }, { "epoch": 0.5559797247142161, "grad_norm": 0.30531802773475647, "learning_rate": 1.644146276486939e-05, "loss": 0.4807, "step": 26215 }, { "epoch": 0.556000933172149, "grad_norm": 0.34529924392700195, "learning_rate": 1.6441207669936786e-05, "loss": 0.4063, "step": 26216 }, { "epoch": 0.5560221416300821, "grad_norm": 0.46115800738334656, "learning_rate": 1.6440952567840326e-05, "loss": 0.4525, "step": 26217 }, { "epoch": 0.5560433500880151, "grad_norm": 0.36011791229248047, "learning_rate": 1.6440697458580295e-05, "loss": 0.5211, "step": 26218 }, { "epoch": 0.5560645585459482, "grad_norm": 0.3532218337059021, "learning_rate": 1.644044234215698e-05, "loss": 0.5173, "step": 26219 }, { "epoch": 0.5560857670038811, "grad_norm": 0.371849000453949, "learning_rate": 1.644018721857066e-05, "loss": 0.4206, "step": 26220 }, { "epoch": 0.5561069754618142, "grad_norm": 0.3283291459083557, "learning_rate": 1.6439932087821615e-05, "loss": 0.4579, "step": 26221 }, { "epoch": 0.5561281839197472, "grad_norm": 0.3651250898838043, "learning_rate": 1.6439676949910136e-05, "loss": 0.4735, "step": 26222 }, { "epoch": 0.5561493923776802, "grad_norm": 0.37091052532196045, "learning_rate": 1.6439421804836508e-05, "loss": 0.4601, "step": 26223 }, { "epoch": 0.5561706008356132, "grad_norm": 0.36819127202033997, "learning_rate": 1.6439166652601007e-05, "loss": 0.5224, "step": 26224 }, { "epoch": 0.5561918092935463, "grad_norm": 0.3612998425960541, "learning_rate": 1.643891149320392e-05, "loss": 0.4885, "step": 26225 }, { "epoch": 0.5562130177514792, "grad_norm": 0.3785262405872345, "learning_rate": 1.6438656326645536e-05, "loss": 0.4545, "step": 26226 }, { "epoch": 0.5562342262094123, "grad_norm": 0.48418787121772766, "learning_rate": 1.6438401152926128e-05, "loss": 0.535, "step": 26227 }, { "epoch": 0.5562554346673453, "grad_norm": 0.43059977889060974, "learning_rate": 1.6438145972045992e-05, "loss": 0.4461, "step": 26228 }, { "epoch": 0.5562766431252784, "grad_norm": 0.3490172028541565, "learning_rate": 1.6437890784005405e-05, "loss": 0.5813, "step": 26229 }, { "epoch": 0.5562978515832114, "grad_norm": 0.42553162574768066, "learning_rate": 1.643763558880465e-05, "loss": 0.4517, "step": 26230 }, { "epoch": 0.5563190600411444, "grad_norm": 0.35318490862846375, "learning_rate": 1.6437380386444015e-05, "loss": 0.5049, "step": 26231 }, { "epoch": 0.5563402684990775, "grad_norm": 0.3629903793334961, "learning_rate": 1.6437125176923777e-05, "loss": 0.4603, "step": 26232 }, { "epoch": 0.5563614769570104, "grad_norm": 0.40030473470687866, "learning_rate": 1.6436869960244227e-05, "loss": 0.5685, "step": 26233 }, { "epoch": 0.5563826854149435, "grad_norm": 0.3507813513278961, "learning_rate": 1.6436614736405646e-05, "loss": 0.493, "step": 26234 }, { "epoch": 0.5564038938728765, "grad_norm": 0.32421958446502686, "learning_rate": 1.643635950540832e-05, "loss": 0.4625, "step": 26235 }, { "epoch": 0.5564251023308096, "grad_norm": 0.4002203345298767, "learning_rate": 1.643610426725253e-05, "loss": 0.6069, "step": 26236 }, { "epoch": 0.5564463107887425, "grad_norm": 0.3647759258747101, "learning_rate": 1.643584902193856e-05, "loss": 0.5685, "step": 26237 }, { "epoch": 0.5564675192466756, "grad_norm": 0.3321765959262848, "learning_rate": 1.6435593769466694e-05, "loss": 0.4959, "step": 26238 }, { "epoch": 0.5564887277046086, "grad_norm": 0.3140076994895935, "learning_rate": 1.6435338509837214e-05, "loss": 0.4791, "step": 26239 }, { "epoch": 0.5565099361625416, "grad_norm": 0.32540076971054077, "learning_rate": 1.643508324305041e-05, "loss": 0.4743, "step": 26240 }, { "epoch": 0.5565311446204746, "grad_norm": 0.33747419714927673, "learning_rate": 1.6434827969106564e-05, "loss": 0.4545, "step": 26241 }, { "epoch": 0.5565523530784077, "grad_norm": 0.34364578127861023, "learning_rate": 1.6434572688005954e-05, "loss": 0.489, "step": 26242 }, { "epoch": 0.5565735615363407, "grad_norm": 0.3731178939342499, "learning_rate": 1.643431739974887e-05, "loss": 0.4947, "step": 26243 }, { "epoch": 0.5565947699942737, "grad_norm": 0.40102437138557434, "learning_rate": 1.6434062104335596e-05, "loss": 0.4305, "step": 26244 }, { "epoch": 0.5566159784522068, "grad_norm": 0.35385411977767944, "learning_rate": 1.643380680176641e-05, "loss": 0.4489, "step": 26245 }, { "epoch": 0.5566371869101397, "grad_norm": 1.0256181955337524, "learning_rate": 1.6433551492041602e-05, "loss": 0.4739, "step": 26246 }, { "epoch": 0.5566583953680728, "grad_norm": 0.4361423850059509, "learning_rate": 1.6433296175161456e-05, "loss": 0.5447, "step": 26247 }, { "epoch": 0.5566796038260058, "grad_norm": 0.3580736815929413, "learning_rate": 1.643304085112625e-05, "loss": 0.5171, "step": 26248 }, { "epoch": 0.5567008122839389, "grad_norm": 0.3414851725101471, "learning_rate": 1.6432785519936276e-05, "loss": 0.49, "step": 26249 }, { "epoch": 0.5567220207418718, "grad_norm": 0.35336223244667053, "learning_rate": 1.643253018159181e-05, "loss": 0.4797, "step": 26250 }, { "epoch": 0.5567432291998049, "grad_norm": 0.35908201336860657, "learning_rate": 1.643227483609314e-05, "loss": 0.5062, "step": 26251 }, { "epoch": 0.5567644376577379, "grad_norm": 0.39637139439582825, "learning_rate": 1.6432019483440555e-05, "loss": 0.4573, "step": 26252 }, { "epoch": 0.5567856461156709, "grad_norm": 0.34400323033332825, "learning_rate": 1.6431764123634327e-05, "loss": 0.4877, "step": 26253 }, { "epoch": 0.5568068545736039, "grad_norm": 0.33702772855758667, "learning_rate": 1.643150875667475e-05, "loss": 0.4426, "step": 26254 }, { "epoch": 0.556828063031537, "grad_norm": 0.34851518273353577, "learning_rate": 1.6431253382562103e-05, "loss": 0.5076, "step": 26255 }, { "epoch": 0.55684927148947, "grad_norm": 0.3729669451713562, "learning_rate": 1.6430998001296676e-05, "loss": 0.5279, "step": 26256 }, { "epoch": 0.556870479947403, "grad_norm": 0.3453899621963501, "learning_rate": 1.6430742612878746e-05, "loss": 0.5212, "step": 26257 }, { "epoch": 0.5568916884053361, "grad_norm": 0.3382466435432434, "learning_rate": 1.64304872173086e-05, "loss": 0.4331, "step": 26258 }, { "epoch": 0.5569128968632691, "grad_norm": 0.3556317090988159, "learning_rate": 1.6430231814586522e-05, "loss": 0.4894, "step": 26259 }, { "epoch": 0.5569341053212021, "grad_norm": 0.3792656362056732, "learning_rate": 1.6429976404712794e-05, "loss": 0.5776, "step": 26260 }, { "epoch": 0.5569553137791351, "grad_norm": 0.3781844973564148, "learning_rate": 1.6429720987687706e-05, "loss": 0.4911, "step": 26261 }, { "epoch": 0.5569765222370682, "grad_norm": 0.3533262014389038, "learning_rate": 1.6429465563511538e-05, "loss": 0.5621, "step": 26262 }, { "epoch": 0.5569977306950011, "grad_norm": 0.35657602548599243, "learning_rate": 1.6429210132184573e-05, "loss": 0.495, "step": 26263 }, { "epoch": 0.5570189391529342, "grad_norm": 0.4666098654270172, "learning_rate": 1.6428954693707094e-05, "loss": 0.5024, "step": 26264 }, { "epoch": 0.5570401476108672, "grad_norm": 0.37343889474868774, "learning_rate": 1.6428699248079394e-05, "loss": 0.3964, "step": 26265 }, { "epoch": 0.5570613560688003, "grad_norm": 0.36400675773620605, "learning_rate": 1.6428443795301743e-05, "loss": 0.5498, "step": 26266 }, { "epoch": 0.5570825645267332, "grad_norm": 0.36698025465011597, "learning_rate": 1.642818833537444e-05, "loss": 0.5316, "step": 26267 }, { "epoch": 0.5571037729846663, "grad_norm": 0.3918614685535431, "learning_rate": 1.6427932868297753e-05, "loss": 0.4771, "step": 26268 }, { "epoch": 0.5571249814425993, "grad_norm": 0.435217022895813, "learning_rate": 1.6427677394071978e-05, "loss": 0.4551, "step": 26269 }, { "epoch": 0.5571461899005323, "grad_norm": 0.5965576171875, "learning_rate": 1.64274219126974e-05, "loss": 0.5137, "step": 26270 }, { "epoch": 0.5571673983584654, "grad_norm": 0.3485095500946045, "learning_rate": 1.6427166424174298e-05, "loss": 0.4457, "step": 26271 }, { "epoch": 0.5571886068163984, "grad_norm": 0.3496206998825073, "learning_rate": 1.6426910928502956e-05, "loss": 0.4816, "step": 26272 }, { "epoch": 0.5572098152743314, "grad_norm": 0.3581596910953522, "learning_rate": 1.6426655425683658e-05, "loss": 0.5099, "step": 26273 }, { "epoch": 0.5572310237322644, "grad_norm": 0.3535913825035095, "learning_rate": 1.6426399915716694e-05, "loss": 0.4625, "step": 26274 }, { "epoch": 0.5572522321901975, "grad_norm": 0.4092918932437897, "learning_rate": 1.642614439860234e-05, "loss": 0.4746, "step": 26275 }, { "epoch": 0.5572734406481304, "grad_norm": 0.3610139787197113, "learning_rate": 1.6425888874340885e-05, "loss": 0.4772, "step": 26276 }, { "epoch": 0.5572946491060635, "grad_norm": 0.34695515036582947, "learning_rate": 1.6425633342932613e-05, "loss": 0.4641, "step": 26277 }, { "epoch": 0.5573158575639965, "grad_norm": 0.38939717411994934, "learning_rate": 1.6425377804377805e-05, "loss": 0.4838, "step": 26278 }, { "epoch": 0.5573370660219296, "grad_norm": 0.45790281891822815, "learning_rate": 1.642512225867675e-05, "loss": 0.5347, "step": 26279 }, { "epoch": 0.5573582744798625, "grad_norm": 0.366333931684494, "learning_rate": 1.642486670582973e-05, "loss": 0.5198, "step": 26280 }, { "epoch": 0.5573794829377956, "grad_norm": 0.42572131752967834, "learning_rate": 1.6424611145837026e-05, "loss": 0.4723, "step": 26281 }, { "epoch": 0.5574006913957286, "grad_norm": 0.5697081685066223, "learning_rate": 1.642435557869893e-05, "loss": 0.4509, "step": 26282 }, { "epoch": 0.5574218998536616, "grad_norm": 0.3681119978427887, "learning_rate": 1.6424100004415716e-05, "loss": 0.5546, "step": 26283 }, { "epoch": 0.5574431083115947, "grad_norm": 0.3328079879283905, "learning_rate": 1.6423844422987678e-05, "loss": 0.4741, "step": 26284 }, { "epoch": 0.5574643167695277, "grad_norm": 0.35133787989616394, "learning_rate": 1.6423588834415098e-05, "loss": 0.5492, "step": 26285 }, { "epoch": 0.5574855252274608, "grad_norm": 0.34807276725769043, "learning_rate": 1.642333323869825e-05, "loss": 0.5882, "step": 26286 }, { "epoch": 0.5575067336853937, "grad_norm": 0.3160560131072998, "learning_rate": 1.642307763583743e-05, "loss": 0.4514, "step": 26287 }, { "epoch": 0.5575279421433268, "grad_norm": 0.3287157118320465, "learning_rate": 1.6422822025832924e-05, "loss": 0.5354, "step": 26288 }, { "epoch": 0.5575491506012598, "grad_norm": 0.3359868824481964, "learning_rate": 1.642256640868501e-05, "loss": 0.4028, "step": 26289 }, { "epoch": 0.5575703590591928, "grad_norm": 0.41006338596343994, "learning_rate": 1.642231078439397e-05, "loss": 0.5362, "step": 26290 }, { "epoch": 0.5575915675171258, "grad_norm": 0.331368625164032, "learning_rate": 1.6422055152960093e-05, "loss": 0.434, "step": 26291 }, { "epoch": 0.5576127759750589, "grad_norm": 0.3705586791038513, "learning_rate": 1.642179951438366e-05, "loss": 0.5046, "step": 26292 }, { "epoch": 0.5576339844329918, "grad_norm": 0.3418329656124115, "learning_rate": 1.642154386866496e-05, "loss": 0.4822, "step": 26293 }, { "epoch": 0.5576551928909249, "grad_norm": 0.4313074052333832, "learning_rate": 1.6421288215804275e-05, "loss": 0.5109, "step": 26294 }, { "epoch": 0.5576764013488579, "grad_norm": 0.40899738669395447, "learning_rate": 1.6421032555801887e-05, "loss": 0.5317, "step": 26295 }, { "epoch": 0.557697609806791, "grad_norm": 0.3459702432155609, "learning_rate": 1.6420776888658086e-05, "loss": 0.4142, "step": 26296 }, { "epoch": 0.557718818264724, "grad_norm": 0.8409751653671265, "learning_rate": 1.642052121437315e-05, "loss": 0.4999, "step": 26297 }, { "epoch": 0.557740026722657, "grad_norm": 0.37169674038887024, "learning_rate": 1.6420265532947364e-05, "loss": 0.453, "step": 26298 }, { "epoch": 0.5577612351805901, "grad_norm": 0.3222944736480713, "learning_rate": 1.6420009844381016e-05, "loss": 0.4795, "step": 26299 }, { "epoch": 0.557782443638523, "grad_norm": 0.43045079708099365, "learning_rate": 1.6419754148674393e-05, "loss": 0.4812, "step": 26300 }, { "epoch": 0.5578036520964561, "grad_norm": 0.5889450907707214, "learning_rate": 1.641949844582777e-05, "loss": 0.4998, "step": 26301 }, { "epoch": 0.5578248605543891, "grad_norm": 0.4287126958370209, "learning_rate": 1.641924273584144e-05, "loss": 0.5181, "step": 26302 }, { "epoch": 0.5578460690123221, "grad_norm": 0.35890933871269226, "learning_rate": 1.6418987018715682e-05, "loss": 0.58, "step": 26303 }, { "epoch": 0.5578672774702551, "grad_norm": 0.34338289499282837, "learning_rate": 1.641873129445078e-05, "loss": 0.4965, "step": 26304 }, { "epoch": 0.5578884859281882, "grad_norm": 0.31548482179641724, "learning_rate": 1.6418475563047026e-05, "loss": 0.4225, "step": 26305 }, { "epoch": 0.5579096943861211, "grad_norm": 0.427654504776001, "learning_rate": 1.6418219824504697e-05, "loss": 0.5191, "step": 26306 }, { "epoch": 0.5579309028440542, "grad_norm": 0.31780996918678284, "learning_rate": 1.641796407882408e-05, "loss": 0.5148, "step": 26307 }, { "epoch": 0.5579521113019872, "grad_norm": 0.35627004504203796, "learning_rate": 1.6417708326005457e-05, "loss": 0.485, "step": 26308 }, { "epoch": 0.5579733197599203, "grad_norm": 0.3272639811038971, "learning_rate": 1.6417452566049115e-05, "loss": 0.4358, "step": 26309 }, { "epoch": 0.5579945282178532, "grad_norm": 0.36191219091415405, "learning_rate": 1.641719679895534e-05, "loss": 0.523, "step": 26310 }, { "epoch": 0.5580157366757863, "grad_norm": 0.5242385268211365, "learning_rate": 1.641694102472441e-05, "loss": 0.534, "step": 26311 }, { "epoch": 0.5580369451337194, "grad_norm": 0.36995694041252136, "learning_rate": 1.6416685243356622e-05, "loss": 0.4849, "step": 26312 }, { "epoch": 0.5580581535916523, "grad_norm": 0.4440765082836151, "learning_rate": 1.6416429454852248e-05, "loss": 0.4469, "step": 26313 }, { "epoch": 0.5580793620495854, "grad_norm": 0.3438839316368103, "learning_rate": 1.6416173659211577e-05, "loss": 0.4954, "step": 26314 }, { "epoch": 0.5581005705075184, "grad_norm": 0.39915114641189575, "learning_rate": 1.641591785643489e-05, "loss": 0.5185, "step": 26315 }, { "epoch": 0.5581217789654515, "grad_norm": 0.34799957275390625, "learning_rate": 1.641566204652248e-05, "loss": 0.6559, "step": 26316 }, { "epoch": 0.5581429874233844, "grad_norm": 0.3484496772289276, "learning_rate": 1.6415406229474624e-05, "loss": 0.5059, "step": 26317 }, { "epoch": 0.5581641958813175, "grad_norm": 0.38334304094314575, "learning_rate": 1.641515040529161e-05, "loss": 0.5, "step": 26318 }, { "epoch": 0.5581854043392505, "grad_norm": 0.3539603054523468, "learning_rate": 1.641489457397372e-05, "loss": 0.4748, "step": 26319 }, { "epoch": 0.5582066127971835, "grad_norm": 0.3844497799873352, "learning_rate": 1.6414638735521244e-05, "loss": 0.5184, "step": 26320 }, { "epoch": 0.5582278212551165, "grad_norm": 0.4048475921154022, "learning_rate": 1.641438288993446e-05, "loss": 0.4434, "step": 26321 }, { "epoch": 0.5582490297130496, "grad_norm": 0.5226022005081177, "learning_rate": 1.641412703721365e-05, "loss": 0.5389, "step": 26322 }, { "epoch": 0.5582702381709825, "grad_norm": 0.31640809774398804, "learning_rate": 1.641387117735911e-05, "loss": 0.4312, "step": 26323 }, { "epoch": 0.5582914466289156, "grad_norm": 0.4093831479549408, "learning_rate": 1.6413615310371116e-05, "loss": 0.5111, "step": 26324 }, { "epoch": 0.5583126550868487, "grad_norm": 0.3865559697151184, "learning_rate": 1.6413359436249953e-05, "loss": 0.5306, "step": 26325 }, { "epoch": 0.5583338635447817, "grad_norm": 0.34357407689094543, "learning_rate": 1.641310355499591e-05, "loss": 0.5169, "step": 26326 }, { "epoch": 0.5583550720027147, "grad_norm": 0.3722721338272095, "learning_rate": 1.641284766660927e-05, "loss": 0.4917, "step": 26327 }, { "epoch": 0.5583762804606477, "grad_norm": 0.3821725845336914, "learning_rate": 1.6412591771090312e-05, "loss": 0.535, "step": 26328 }, { "epoch": 0.5583974889185808, "grad_norm": 0.3505961000919342, "learning_rate": 1.641233586843933e-05, "loss": 0.4765, "step": 26329 }, { "epoch": 0.5584186973765137, "grad_norm": 0.31823936104774475, "learning_rate": 1.64120799586566e-05, "loss": 0.4644, "step": 26330 }, { "epoch": 0.5584399058344468, "grad_norm": 0.3357677459716797, "learning_rate": 1.6411824041742414e-05, "loss": 0.4505, "step": 26331 }, { "epoch": 0.5584611142923798, "grad_norm": 0.36409515142440796, "learning_rate": 1.641156811769705e-05, "loss": 0.4974, "step": 26332 }, { "epoch": 0.5584823227503128, "grad_norm": 0.4107241630554199, "learning_rate": 1.6411312186520798e-05, "loss": 0.5567, "step": 26333 }, { "epoch": 0.5585035312082458, "grad_norm": 0.4028595983982086, "learning_rate": 1.6411056248213938e-05, "loss": 0.4783, "step": 26334 }, { "epoch": 0.5585247396661789, "grad_norm": 0.3714854419231415, "learning_rate": 1.641080030277676e-05, "loss": 0.4674, "step": 26335 }, { "epoch": 0.5585459481241118, "grad_norm": 0.36121848225593567, "learning_rate": 1.641054435020954e-05, "loss": 0.5122, "step": 26336 }, { "epoch": 0.5585671565820449, "grad_norm": 0.42273563146591187, "learning_rate": 1.6410288390512575e-05, "loss": 0.5347, "step": 26337 }, { "epoch": 0.558588365039978, "grad_norm": 0.35031506419181824, "learning_rate": 1.641003242368614e-05, "loss": 0.4553, "step": 26338 }, { "epoch": 0.558609573497911, "grad_norm": 0.3884219825267792, "learning_rate": 1.640977644973052e-05, "loss": 0.6068, "step": 26339 }, { "epoch": 0.558630781955844, "grad_norm": 0.4127833843231201, "learning_rate": 1.6409520468646007e-05, "loss": 0.5091, "step": 26340 }, { "epoch": 0.558651990413777, "grad_norm": 0.3404582738876343, "learning_rate": 1.6409264480432877e-05, "loss": 0.4598, "step": 26341 }, { "epoch": 0.5586731988717101, "grad_norm": 0.336220920085907, "learning_rate": 1.640900848509142e-05, "loss": 0.5122, "step": 26342 }, { "epoch": 0.558694407329643, "grad_norm": 0.31488722562789917, "learning_rate": 1.640875248262192e-05, "loss": 0.4852, "step": 26343 }, { "epoch": 0.5587156157875761, "grad_norm": 0.7515076994895935, "learning_rate": 1.640849647302466e-05, "loss": 0.5016, "step": 26344 }, { "epoch": 0.5587368242455091, "grad_norm": 0.38182175159454346, "learning_rate": 1.640824045629993e-05, "loss": 0.5024, "step": 26345 }, { "epoch": 0.5587580327034422, "grad_norm": 0.3365572392940521, "learning_rate": 1.6407984432448008e-05, "loss": 0.5137, "step": 26346 }, { "epoch": 0.5587792411613751, "grad_norm": 0.4208427369594574, "learning_rate": 1.640772840146918e-05, "loss": 0.5437, "step": 26347 }, { "epoch": 0.5588004496193082, "grad_norm": 0.5900400280952454, "learning_rate": 1.6407472363363736e-05, "loss": 0.5223, "step": 26348 }, { "epoch": 0.5588216580772412, "grad_norm": 0.3820459842681885, "learning_rate": 1.640721631813195e-05, "loss": 0.4784, "step": 26349 }, { "epoch": 0.5588428665351742, "grad_norm": 0.36800384521484375, "learning_rate": 1.640696026577412e-05, "loss": 0.4392, "step": 26350 }, { "epoch": 0.5588640749931072, "grad_norm": 0.3297988474369049, "learning_rate": 1.640670420629052e-05, "loss": 0.4981, "step": 26351 }, { "epoch": 0.5588852834510403, "grad_norm": 0.32714664936065674, "learning_rate": 1.6406448139681443e-05, "loss": 0.5134, "step": 26352 }, { "epoch": 0.5589064919089733, "grad_norm": 0.3752257823944092, "learning_rate": 1.6406192065947167e-05, "loss": 0.4864, "step": 26353 }, { "epoch": 0.5589277003669063, "grad_norm": 0.3500429391860962, "learning_rate": 1.640593598508798e-05, "loss": 0.4817, "step": 26354 }, { "epoch": 0.5589489088248394, "grad_norm": 0.35557398200035095, "learning_rate": 1.640567989710417e-05, "loss": 0.5418, "step": 26355 }, { "epoch": 0.5589701172827723, "grad_norm": 0.4591216742992401, "learning_rate": 1.6405423801996016e-05, "loss": 0.4152, "step": 26356 }, { "epoch": 0.5589913257407054, "grad_norm": 0.41816315054893494, "learning_rate": 1.6405167699763804e-05, "loss": 0.4852, "step": 26357 }, { "epoch": 0.5590125341986384, "grad_norm": 0.3571065366268158, "learning_rate": 1.6404911590407825e-05, "loss": 0.4605, "step": 26358 }, { "epoch": 0.5590337426565715, "grad_norm": 0.35004231333732605, "learning_rate": 1.6404655473928354e-05, "loss": 0.5282, "step": 26359 }, { "epoch": 0.5590549511145044, "grad_norm": 0.3614386320114136, "learning_rate": 1.640439935032568e-05, "loss": 0.4979, "step": 26360 }, { "epoch": 0.5590761595724375, "grad_norm": 0.42446258664131165, "learning_rate": 1.6404143219600094e-05, "loss": 0.5259, "step": 26361 }, { "epoch": 0.5590973680303705, "grad_norm": 0.2951759994029999, "learning_rate": 1.640388708175187e-05, "loss": 0.4999, "step": 26362 }, { "epoch": 0.5591185764883035, "grad_norm": 0.35478663444519043, "learning_rate": 1.6403630936781304e-05, "loss": 0.4976, "step": 26363 }, { "epoch": 0.5591397849462365, "grad_norm": 0.38718029856681824, "learning_rate": 1.640337478468867e-05, "loss": 0.5486, "step": 26364 }, { "epoch": 0.5591609934041696, "grad_norm": 0.38266199827194214, "learning_rate": 1.6403118625474263e-05, "loss": 0.519, "step": 26365 }, { "epoch": 0.5591822018621027, "grad_norm": 0.3136531412601471, "learning_rate": 1.640286245913836e-05, "loss": 0.4349, "step": 26366 }, { "epoch": 0.5592034103200356, "grad_norm": 0.33653178811073303, "learning_rate": 1.640260628568125e-05, "loss": 0.4827, "step": 26367 }, { "epoch": 0.5592246187779687, "grad_norm": 0.34383153915405273, "learning_rate": 1.6402350105103216e-05, "loss": 0.5202, "step": 26368 }, { "epoch": 0.5592458272359017, "grad_norm": 0.4032869338989258, "learning_rate": 1.6402093917404544e-05, "loss": 0.4957, "step": 26369 }, { "epoch": 0.5592670356938347, "grad_norm": 0.34025636315345764, "learning_rate": 1.6401837722585522e-05, "loss": 0.4986, "step": 26370 }, { "epoch": 0.5592882441517677, "grad_norm": 0.32241490483283997, "learning_rate": 1.6401581520646426e-05, "loss": 0.5496, "step": 26371 }, { "epoch": 0.5593094526097008, "grad_norm": 0.3703995943069458, "learning_rate": 1.640132531158755e-05, "loss": 0.5918, "step": 26372 }, { "epoch": 0.5593306610676337, "grad_norm": 0.346249520778656, "learning_rate": 1.6401069095409178e-05, "loss": 0.5659, "step": 26373 }, { "epoch": 0.5593518695255668, "grad_norm": 0.3635110855102539, "learning_rate": 1.640081287211159e-05, "loss": 0.5322, "step": 26374 }, { "epoch": 0.5593730779834998, "grad_norm": 0.37154167890548706, "learning_rate": 1.640055664169507e-05, "loss": 0.5764, "step": 26375 }, { "epoch": 0.5593942864414329, "grad_norm": 0.3300092816352844, "learning_rate": 1.640030040415991e-05, "loss": 0.4737, "step": 26376 }, { "epoch": 0.5594154948993658, "grad_norm": 0.3717063367366791, "learning_rate": 1.6400044159506393e-05, "loss": 0.5016, "step": 26377 }, { "epoch": 0.5594367033572989, "grad_norm": 0.3495892584323883, "learning_rate": 1.63997879077348e-05, "loss": 0.4535, "step": 26378 }, { "epoch": 0.559457911815232, "grad_norm": 0.352365642786026, "learning_rate": 1.639953164884542e-05, "loss": 0.5227, "step": 26379 }, { "epoch": 0.5594791202731649, "grad_norm": 0.33446455001831055, "learning_rate": 1.6399275382838535e-05, "loss": 0.4423, "step": 26380 }, { "epoch": 0.559500328731098, "grad_norm": 0.40315619111061096, "learning_rate": 1.6399019109714435e-05, "loss": 0.5627, "step": 26381 }, { "epoch": 0.559521537189031, "grad_norm": 0.3949218690395355, "learning_rate": 1.6398762829473394e-05, "loss": 0.5277, "step": 26382 }, { "epoch": 0.559542745646964, "grad_norm": 0.31225624680519104, "learning_rate": 1.639850654211571e-05, "loss": 0.5554, "step": 26383 }, { "epoch": 0.559563954104897, "grad_norm": 0.381160706281662, "learning_rate": 1.6398250247641665e-05, "loss": 0.4086, "step": 26384 }, { "epoch": 0.5595851625628301, "grad_norm": 0.3132828176021576, "learning_rate": 1.6397993946051538e-05, "loss": 0.4242, "step": 26385 }, { "epoch": 0.559606371020763, "grad_norm": 0.42772340774536133, "learning_rate": 1.639773763734562e-05, "loss": 0.523, "step": 26386 }, { "epoch": 0.5596275794786961, "grad_norm": 0.41573378443717957, "learning_rate": 1.6397481321524188e-05, "loss": 0.5733, "step": 26387 }, { "epoch": 0.5596487879366291, "grad_norm": 0.327021986246109, "learning_rate": 1.639722499858754e-05, "loss": 0.5354, "step": 26388 }, { "epoch": 0.5596699963945622, "grad_norm": 0.3259614408016205, "learning_rate": 1.639696866853595e-05, "loss": 0.457, "step": 26389 }, { "epoch": 0.5596912048524951, "grad_norm": 0.3163834810256958, "learning_rate": 1.6396712331369705e-05, "loss": 0.5475, "step": 26390 }, { "epoch": 0.5597124133104282, "grad_norm": 0.3660813868045807, "learning_rate": 1.6396455987089098e-05, "loss": 0.5568, "step": 26391 }, { "epoch": 0.5597336217683612, "grad_norm": 0.35929885506629944, "learning_rate": 1.6396199635694406e-05, "loss": 0.4727, "step": 26392 }, { "epoch": 0.5597548302262942, "grad_norm": 0.36284133791923523, "learning_rate": 1.6395943277185915e-05, "loss": 0.4787, "step": 26393 }, { "epoch": 0.5597760386842273, "grad_norm": 0.6011926531791687, "learning_rate": 1.639568691156391e-05, "loss": 0.5103, "step": 26394 }, { "epoch": 0.5597972471421603, "grad_norm": 0.3805588483810425, "learning_rate": 1.639543053882868e-05, "loss": 0.5341, "step": 26395 }, { "epoch": 0.5598184556000934, "grad_norm": 0.3566505014896393, "learning_rate": 1.639517415898051e-05, "loss": 0.5405, "step": 26396 }, { "epoch": 0.5598396640580263, "grad_norm": 0.3614058196544647, "learning_rate": 1.639491777201968e-05, "loss": 0.4889, "step": 26397 }, { "epoch": 0.5598608725159594, "grad_norm": 0.3707912564277649, "learning_rate": 1.6394661377946478e-05, "loss": 0.5915, "step": 26398 }, { "epoch": 0.5598820809738924, "grad_norm": 0.3633699417114258, "learning_rate": 1.6394404976761187e-05, "loss": 0.4744, "step": 26399 }, { "epoch": 0.5599032894318254, "grad_norm": 0.42087066173553467, "learning_rate": 1.6394148568464096e-05, "loss": 0.5242, "step": 26400 }, { "epoch": 0.5599244978897584, "grad_norm": 0.39551684260368347, "learning_rate": 1.639389215305549e-05, "loss": 0.5251, "step": 26401 }, { "epoch": 0.5599457063476915, "grad_norm": 0.30530935525894165, "learning_rate": 1.639363573053565e-05, "loss": 0.4009, "step": 26402 }, { "epoch": 0.5599669148056244, "grad_norm": 0.37125763297080994, "learning_rate": 1.6393379300904867e-05, "loss": 0.4817, "step": 26403 }, { "epoch": 0.5599881232635575, "grad_norm": 0.36673927307128906, "learning_rate": 1.6393122864163424e-05, "loss": 0.5025, "step": 26404 }, { "epoch": 0.5600093317214905, "grad_norm": 0.39021798968315125, "learning_rate": 1.6392866420311603e-05, "loss": 0.4842, "step": 26405 }, { "epoch": 0.5600305401794236, "grad_norm": 0.4450500011444092, "learning_rate": 1.639260996934969e-05, "loss": 0.5557, "step": 26406 }, { "epoch": 0.5600517486373566, "grad_norm": 0.3707832396030426, "learning_rate": 1.6392353511277975e-05, "loss": 0.4569, "step": 26407 }, { "epoch": 0.5600729570952896, "grad_norm": 0.38657304644584656, "learning_rate": 1.6392097046096736e-05, "loss": 0.445, "step": 26408 }, { "epoch": 0.5600941655532227, "grad_norm": 0.3390060365200043, "learning_rate": 1.6391840573806265e-05, "loss": 0.5568, "step": 26409 }, { "epoch": 0.5601153740111556, "grad_norm": 0.3452792167663574, "learning_rate": 1.6391584094406843e-05, "loss": 0.5205, "step": 26410 }, { "epoch": 0.5601365824690887, "grad_norm": 0.31202855706214905, "learning_rate": 1.639132760789876e-05, "loss": 0.4536, "step": 26411 }, { "epoch": 0.5601577909270217, "grad_norm": 0.40336596965789795, "learning_rate": 1.6391071114282295e-05, "loss": 0.5766, "step": 26412 }, { "epoch": 0.5601789993849547, "grad_norm": 0.44272297620773315, "learning_rate": 1.639081461355774e-05, "loss": 0.4566, "step": 26413 }, { "epoch": 0.5602002078428877, "grad_norm": 0.31287243962287903, "learning_rate": 1.639055810572537e-05, "loss": 0.4548, "step": 26414 }, { "epoch": 0.5602214163008208, "grad_norm": 0.35069283843040466, "learning_rate": 1.639030159078548e-05, "loss": 0.4908, "step": 26415 }, { "epoch": 0.5602426247587537, "grad_norm": 0.3655075132846832, "learning_rate": 1.6390045068738354e-05, "loss": 0.422, "step": 26416 }, { "epoch": 0.5602638332166868, "grad_norm": 0.4286648631095886, "learning_rate": 1.6389788539584274e-05, "loss": 0.5265, "step": 26417 }, { "epoch": 0.5602850416746198, "grad_norm": 0.38854891061782837, "learning_rate": 1.6389532003323524e-05, "loss": 0.4826, "step": 26418 }, { "epoch": 0.5603062501325529, "grad_norm": 0.34871286153793335, "learning_rate": 1.6389275459956394e-05, "loss": 0.5163, "step": 26419 }, { "epoch": 0.5603274585904859, "grad_norm": 0.3539466857910156, "learning_rate": 1.6389018909483166e-05, "loss": 0.4124, "step": 26420 }, { "epoch": 0.5603486670484189, "grad_norm": 0.34923139214515686, "learning_rate": 1.638876235190413e-05, "loss": 0.567, "step": 26421 }, { "epoch": 0.560369875506352, "grad_norm": 0.41486412286758423, "learning_rate": 1.6388505787219568e-05, "loss": 0.5132, "step": 26422 }, { "epoch": 0.5603910839642849, "grad_norm": 0.4805951416492462, "learning_rate": 1.6388249215429763e-05, "loss": 0.4968, "step": 26423 }, { "epoch": 0.560412292422218, "grad_norm": 0.34902510046958923, "learning_rate": 1.6387992636535002e-05, "loss": 0.5949, "step": 26424 }, { "epoch": 0.560433500880151, "grad_norm": 0.5250771045684814, "learning_rate": 1.6387736050535575e-05, "loss": 0.483, "step": 26425 }, { "epoch": 0.5604547093380841, "grad_norm": 0.40100806951522827, "learning_rate": 1.638747945743176e-05, "loss": 0.469, "step": 26426 }, { "epoch": 0.560475917796017, "grad_norm": 0.38386213779449463, "learning_rate": 1.6387222857223846e-05, "loss": 0.5329, "step": 26427 }, { "epoch": 0.5604971262539501, "grad_norm": 0.38140177726745605, "learning_rate": 1.638696624991212e-05, "loss": 0.5224, "step": 26428 }, { "epoch": 0.5605183347118831, "grad_norm": 0.38495129346847534, "learning_rate": 1.6386709635496865e-05, "loss": 0.4617, "step": 26429 }, { "epoch": 0.5605395431698161, "grad_norm": 0.3613131642341614, "learning_rate": 1.6386453013978366e-05, "loss": 0.4958, "step": 26430 }, { "epoch": 0.5605607516277491, "grad_norm": 0.35364794731140137, "learning_rate": 1.638619638535691e-05, "loss": 0.4509, "step": 26431 }, { "epoch": 0.5605819600856822, "grad_norm": 0.36452293395996094, "learning_rate": 1.638593974963278e-05, "loss": 0.5097, "step": 26432 }, { "epoch": 0.5606031685436152, "grad_norm": 0.31651613116264343, "learning_rate": 1.6385683106806265e-05, "loss": 0.5544, "step": 26433 }, { "epoch": 0.5606243770015482, "grad_norm": 0.354055792093277, "learning_rate": 1.6385426456877647e-05, "loss": 0.503, "step": 26434 }, { "epoch": 0.5606455854594813, "grad_norm": 0.34397587180137634, "learning_rate": 1.6385169799847217e-05, "loss": 0.4926, "step": 26435 }, { "epoch": 0.5606667939174143, "grad_norm": 0.4096224009990692, "learning_rate": 1.6384913135715256e-05, "loss": 0.5016, "step": 26436 }, { "epoch": 0.5606880023753473, "grad_norm": 0.4226188361644745, "learning_rate": 1.638465646448205e-05, "loss": 0.5057, "step": 26437 }, { "epoch": 0.5607092108332803, "grad_norm": 0.3509581387042999, "learning_rate": 1.6384399786147883e-05, "loss": 0.5874, "step": 26438 }, { "epoch": 0.5607304192912134, "grad_norm": 0.3527773320674896, "learning_rate": 1.6384143100713043e-05, "loss": 0.5049, "step": 26439 }, { "epoch": 0.5607516277491463, "grad_norm": 0.31547272205352783, "learning_rate": 1.638388640817781e-05, "loss": 0.5084, "step": 26440 }, { "epoch": 0.5607728362070794, "grad_norm": 0.9381645917892456, "learning_rate": 1.638362970854248e-05, "loss": 0.5323, "step": 26441 }, { "epoch": 0.5607940446650124, "grad_norm": 0.3242174983024597, "learning_rate": 1.638337300180733e-05, "loss": 0.5256, "step": 26442 }, { "epoch": 0.5608152531229454, "grad_norm": 0.3116357922554016, "learning_rate": 1.6383116287972648e-05, "loss": 0.42, "step": 26443 }, { "epoch": 0.5608364615808784, "grad_norm": 0.3387364149093628, "learning_rate": 1.6382859567038722e-05, "loss": 0.5552, "step": 26444 }, { "epoch": 0.5608576700388115, "grad_norm": 0.34707197546958923, "learning_rate": 1.638260283900583e-05, "loss": 0.5452, "step": 26445 }, { "epoch": 0.5608788784967444, "grad_norm": 0.36548158526420593, "learning_rate": 1.6382346103874266e-05, "loss": 0.4677, "step": 26446 }, { "epoch": 0.5609000869546775, "grad_norm": 0.36425748467445374, "learning_rate": 1.6382089361644312e-05, "loss": 0.5371, "step": 26447 }, { "epoch": 0.5609212954126106, "grad_norm": 0.37355804443359375, "learning_rate": 1.6381832612316256e-05, "loss": 0.5842, "step": 26448 }, { "epoch": 0.5609425038705436, "grad_norm": 0.32818523049354553, "learning_rate": 1.6381575855890375e-05, "loss": 0.5303, "step": 26449 }, { "epoch": 0.5609637123284766, "grad_norm": 0.3672690987586975, "learning_rate": 1.6381319092366968e-05, "loss": 0.6078, "step": 26450 }, { "epoch": 0.5609849207864096, "grad_norm": 0.34673404693603516, "learning_rate": 1.638106232174631e-05, "loss": 0.4711, "step": 26451 }, { "epoch": 0.5610061292443427, "grad_norm": 0.3280484974384308, "learning_rate": 1.6380805544028687e-05, "loss": 0.5295, "step": 26452 }, { "epoch": 0.5610273377022756, "grad_norm": 0.32434341311454773, "learning_rate": 1.638054875921439e-05, "loss": 0.4935, "step": 26453 }, { "epoch": 0.5610485461602087, "grad_norm": 0.3546122908592224, "learning_rate": 1.6380291967303703e-05, "loss": 0.4383, "step": 26454 }, { "epoch": 0.5610697546181417, "grad_norm": 0.3606146574020386, "learning_rate": 1.638003516829691e-05, "loss": 0.5191, "step": 26455 }, { "epoch": 0.5610909630760748, "grad_norm": 0.43052634596824646, "learning_rate": 1.6379778362194295e-05, "loss": 0.5144, "step": 26456 }, { "epoch": 0.5611121715340077, "grad_norm": 0.34979763627052307, "learning_rate": 1.637952154899615e-05, "loss": 0.5084, "step": 26457 }, { "epoch": 0.5611333799919408, "grad_norm": 0.4315277934074402, "learning_rate": 1.6379264728702752e-05, "loss": 0.4797, "step": 26458 }, { "epoch": 0.5611545884498738, "grad_norm": 0.3719259798526764, "learning_rate": 1.6379007901314396e-05, "loss": 0.5522, "step": 26459 }, { "epoch": 0.5611757969078068, "grad_norm": 0.35542774200439453, "learning_rate": 1.637875106683136e-05, "loss": 0.4884, "step": 26460 }, { "epoch": 0.5611970053657399, "grad_norm": 0.32145461440086365, "learning_rate": 1.637849422525393e-05, "loss": 0.5411, "step": 26461 }, { "epoch": 0.5612182138236729, "grad_norm": 0.3031504154205322, "learning_rate": 1.63782373765824e-05, "loss": 0.5157, "step": 26462 }, { "epoch": 0.561239422281606, "grad_norm": 0.3118165135383606, "learning_rate": 1.6377980520817046e-05, "loss": 0.4785, "step": 26463 }, { "epoch": 0.5612606307395389, "grad_norm": 0.3473362624645233, "learning_rate": 1.637772365795816e-05, "loss": 0.5266, "step": 26464 }, { "epoch": 0.561281839197472, "grad_norm": 0.3622778058052063, "learning_rate": 1.6377466788006024e-05, "loss": 0.5451, "step": 26465 }, { "epoch": 0.561303047655405, "grad_norm": 0.30042675137519836, "learning_rate": 1.6377209910960924e-05, "loss": 0.4508, "step": 26466 }, { "epoch": 0.561324256113338, "grad_norm": 0.31844037771224976, "learning_rate": 1.6376953026823145e-05, "loss": 0.5128, "step": 26467 }, { "epoch": 0.561345464571271, "grad_norm": 1.458619236946106, "learning_rate": 1.6376696135592978e-05, "loss": 0.4356, "step": 26468 }, { "epoch": 0.5613666730292041, "grad_norm": 0.41468334197998047, "learning_rate": 1.63764392372707e-05, "loss": 0.5252, "step": 26469 }, { "epoch": 0.561387881487137, "grad_norm": 0.3425244390964508, "learning_rate": 1.6376182331856607e-05, "loss": 0.556, "step": 26470 }, { "epoch": 0.5614090899450701, "grad_norm": 0.34363600611686707, "learning_rate": 1.6375925419350974e-05, "loss": 0.5876, "step": 26471 }, { "epoch": 0.5614302984030031, "grad_norm": 0.41866981983184814, "learning_rate": 1.63756684997541e-05, "loss": 0.4295, "step": 26472 }, { "epoch": 0.5614515068609361, "grad_norm": 0.3618416488170624, "learning_rate": 1.637541157306626e-05, "loss": 0.5053, "step": 26473 }, { "epoch": 0.5614727153188692, "grad_norm": 0.3395978808403015, "learning_rate": 1.637515463928774e-05, "loss": 0.4122, "step": 26474 }, { "epoch": 0.5614939237768022, "grad_norm": 0.32520854473114014, "learning_rate": 1.6374897698418828e-05, "loss": 0.4669, "step": 26475 }, { "epoch": 0.5615151322347353, "grad_norm": 0.8666102886199951, "learning_rate": 1.637464075045981e-05, "loss": 0.4918, "step": 26476 }, { "epoch": 0.5615363406926682, "grad_norm": 0.33167344331741333, "learning_rate": 1.6374383795410972e-05, "loss": 0.5066, "step": 26477 }, { "epoch": 0.5615575491506013, "grad_norm": 0.3832596242427826, "learning_rate": 1.6374126833272603e-05, "loss": 0.589, "step": 26478 }, { "epoch": 0.5615787576085343, "grad_norm": 0.35062137246131897, "learning_rate": 1.6373869864044985e-05, "loss": 0.4611, "step": 26479 }, { "epoch": 0.5615999660664673, "grad_norm": 0.39334622025489807, "learning_rate": 1.6373612887728403e-05, "loss": 0.4933, "step": 26480 }, { "epoch": 0.5616211745244003, "grad_norm": 0.3361923098564148, "learning_rate": 1.6373355904323144e-05, "loss": 0.4957, "step": 26481 }, { "epoch": 0.5616423829823334, "grad_norm": 0.375750333070755, "learning_rate": 1.6373098913829493e-05, "loss": 0.4831, "step": 26482 }, { "epoch": 0.5616635914402663, "grad_norm": 0.3709706664085388, "learning_rate": 1.637284191624774e-05, "loss": 0.5376, "step": 26483 }, { "epoch": 0.5616847998981994, "grad_norm": 0.3328874409198761, "learning_rate": 1.637258491157816e-05, "loss": 0.496, "step": 26484 }, { "epoch": 0.5617060083561324, "grad_norm": 0.356801837682724, "learning_rate": 1.6372327899821053e-05, "loss": 0.4841, "step": 26485 }, { "epoch": 0.5617272168140655, "grad_norm": 0.3643970489501953, "learning_rate": 1.6372070880976698e-05, "loss": 0.4913, "step": 26486 }, { "epoch": 0.5617484252719984, "grad_norm": 0.35088494420051575, "learning_rate": 1.637181385504538e-05, "loss": 0.3865, "step": 26487 }, { "epoch": 0.5617696337299315, "grad_norm": 0.34923937916755676, "learning_rate": 1.6371556822027387e-05, "loss": 0.5626, "step": 26488 }, { "epoch": 0.5617908421878646, "grad_norm": 0.43312427401542664, "learning_rate": 1.6371299781923006e-05, "loss": 0.4906, "step": 26489 }, { "epoch": 0.5618120506457975, "grad_norm": 0.3451838791370392, "learning_rate": 1.6371042734732514e-05, "loss": 0.5691, "step": 26490 }, { "epoch": 0.5618332591037306, "grad_norm": 0.35074782371520996, "learning_rate": 1.637078568045621e-05, "loss": 0.4857, "step": 26491 }, { "epoch": 0.5618544675616636, "grad_norm": 0.41020524501800537, "learning_rate": 1.6370528619094367e-05, "loss": 0.5773, "step": 26492 }, { "epoch": 0.5618756760195966, "grad_norm": 0.3654206097126007, "learning_rate": 1.637027155064728e-05, "loss": 0.5169, "step": 26493 }, { "epoch": 0.5618968844775296, "grad_norm": 0.36019930243492126, "learning_rate": 1.6370014475115236e-05, "loss": 0.5763, "step": 26494 }, { "epoch": 0.5619180929354627, "grad_norm": 0.32131892442703247, "learning_rate": 1.6369757392498516e-05, "loss": 0.5026, "step": 26495 }, { "epoch": 0.5619393013933957, "grad_norm": 0.38027462363243103, "learning_rate": 1.6369500302797402e-05, "loss": 0.4986, "step": 26496 }, { "epoch": 0.5619605098513287, "grad_norm": 0.31368911266326904, "learning_rate": 1.6369243206012192e-05, "loss": 0.4515, "step": 26497 }, { "epoch": 0.5619817183092617, "grad_norm": 0.3921819031238556, "learning_rate": 1.636898610214316e-05, "loss": 0.4875, "step": 26498 }, { "epoch": 0.5620029267671948, "grad_norm": 0.34225034713745117, "learning_rate": 1.63687289911906e-05, "loss": 0.4499, "step": 26499 }, { "epoch": 0.5620241352251277, "grad_norm": 0.3428260385990143, "learning_rate": 1.6368471873154794e-05, "loss": 0.4617, "step": 26500 }, { "epoch": 0.5620453436830608, "grad_norm": 0.3200680613517761, "learning_rate": 1.636821474803603e-05, "loss": 0.5084, "step": 26501 }, { "epoch": 0.5620665521409939, "grad_norm": 0.4469069242477417, "learning_rate": 1.636795761583459e-05, "loss": 0.5008, "step": 26502 }, { "epoch": 0.5620877605989268, "grad_norm": 0.4132828414440155, "learning_rate": 1.6367700476550764e-05, "loss": 0.5477, "step": 26503 }, { "epoch": 0.5621089690568599, "grad_norm": 0.37820911407470703, "learning_rate": 1.6367443330184837e-05, "loss": 0.4913, "step": 26504 }, { "epoch": 0.5621301775147929, "grad_norm": 0.3679064214229584, "learning_rate": 1.6367186176737097e-05, "loss": 0.5535, "step": 26505 }, { "epoch": 0.562151385972726, "grad_norm": 0.37420809268951416, "learning_rate": 1.6366929016207826e-05, "loss": 0.5142, "step": 26506 }, { "epoch": 0.5621725944306589, "grad_norm": 0.32889649271965027, "learning_rate": 1.6366671848597313e-05, "loss": 0.46, "step": 26507 }, { "epoch": 0.562193802888592, "grad_norm": 0.3805384933948517, "learning_rate": 1.636641467390584e-05, "loss": 0.535, "step": 26508 }, { "epoch": 0.562215011346525, "grad_norm": 0.3251386284828186, "learning_rate": 1.63661574921337e-05, "loss": 0.5067, "step": 26509 }, { "epoch": 0.562236219804458, "grad_norm": 0.321351557970047, "learning_rate": 1.6365900303281172e-05, "loss": 0.4335, "step": 26510 }, { "epoch": 0.562257428262391, "grad_norm": 0.3866548538208008, "learning_rate": 1.6365643107348547e-05, "loss": 0.5046, "step": 26511 }, { "epoch": 0.5622786367203241, "grad_norm": 0.32526665925979614, "learning_rate": 1.6365385904336105e-05, "loss": 0.4755, "step": 26512 }, { "epoch": 0.562299845178257, "grad_norm": 0.45233720541000366, "learning_rate": 1.636512869424414e-05, "loss": 0.5625, "step": 26513 }, { "epoch": 0.5623210536361901, "grad_norm": 0.3333665728569031, "learning_rate": 1.6364871477072934e-05, "loss": 0.5649, "step": 26514 }, { "epoch": 0.5623422620941232, "grad_norm": 0.33066582679748535, "learning_rate": 1.636461425282277e-05, "loss": 0.5063, "step": 26515 }, { "epoch": 0.5623634705520562, "grad_norm": 0.3507932722568512, "learning_rate": 1.636435702149394e-05, "loss": 0.5355, "step": 26516 }, { "epoch": 0.5623846790099892, "grad_norm": 0.3415625989437103, "learning_rate": 1.636409978308673e-05, "loss": 0.5257, "step": 26517 }, { "epoch": 0.5624058874679222, "grad_norm": 0.37254053354263306, "learning_rate": 1.636384253760142e-05, "loss": 0.5013, "step": 26518 }, { "epoch": 0.5624270959258553, "grad_norm": 0.39631977677345276, "learning_rate": 1.63635852850383e-05, "loss": 0.57, "step": 26519 }, { "epoch": 0.5624483043837882, "grad_norm": 0.32835787534713745, "learning_rate": 1.6363328025397652e-05, "loss": 0.5895, "step": 26520 }, { "epoch": 0.5624695128417213, "grad_norm": 0.4094112515449524, "learning_rate": 1.6363070758679775e-05, "loss": 0.4913, "step": 26521 }, { "epoch": 0.5624907212996543, "grad_norm": 0.3514667749404907, "learning_rate": 1.6362813484884936e-05, "loss": 0.4786, "step": 26522 }, { "epoch": 0.5625119297575873, "grad_norm": 0.35687196254730225, "learning_rate": 1.6362556204013436e-05, "loss": 0.4485, "step": 26523 }, { "epoch": 0.5625331382155203, "grad_norm": 0.33891475200653076, "learning_rate": 1.636229891606556e-05, "loss": 0.427, "step": 26524 }, { "epoch": 0.5625543466734534, "grad_norm": 0.3982011377811432, "learning_rate": 1.6362041621041585e-05, "loss": 0.48, "step": 26525 }, { "epoch": 0.5625755551313864, "grad_norm": 0.3393004238605499, "learning_rate": 1.6361784318941802e-05, "loss": 0.4951, "step": 26526 }, { "epoch": 0.5625967635893194, "grad_norm": 0.3947117030620575, "learning_rate": 1.6361527009766503e-05, "loss": 0.536, "step": 26527 }, { "epoch": 0.5626179720472524, "grad_norm": 0.39602792263031006, "learning_rate": 1.6361269693515965e-05, "loss": 0.5526, "step": 26528 }, { "epoch": 0.5626391805051855, "grad_norm": 0.3543040454387665, "learning_rate": 1.6361012370190475e-05, "loss": 0.5129, "step": 26529 }, { "epoch": 0.5626603889631185, "grad_norm": 0.3675987422466278, "learning_rate": 1.636075503979033e-05, "loss": 0.5661, "step": 26530 }, { "epoch": 0.5626815974210515, "grad_norm": 0.5212661623954773, "learning_rate": 1.63604977023158e-05, "loss": 0.5131, "step": 26531 }, { "epoch": 0.5627028058789846, "grad_norm": 0.389661967754364, "learning_rate": 1.6360240357767187e-05, "loss": 0.5209, "step": 26532 }, { "epoch": 0.5627240143369175, "grad_norm": 0.39676570892333984, "learning_rate": 1.635998300614477e-05, "loss": 0.5463, "step": 26533 }, { "epoch": 0.5627452227948506, "grad_norm": 0.3602531850337982, "learning_rate": 1.635972564744883e-05, "loss": 0.5214, "step": 26534 }, { "epoch": 0.5627664312527836, "grad_norm": 0.3094250559806824, "learning_rate": 1.635946828167966e-05, "loss": 0.4574, "step": 26535 }, { "epoch": 0.5627876397107167, "grad_norm": 0.4896319806575775, "learning_rate": 1.6359210908837545e-05, "loss": 0.5084, "step": 26536 }, { "epoch": 0.5628088481686496, "grad_norm": 0.3544275164604187, "learning_rate": 1.6358953528922773e-05, "loss": 0.4111, "step": 26537 }, { "epoch": 0.5628300566265827, "grad_norm": 0.35946252942085266, "learning_rate": 1.6358696141935622e-05, "loss": 0.5118, "step": 26538 }, { "epoch": 0.5628512650845157, "grad_norm": 0.32446226477622986, "learning_rate": 1.6358438747876394e-05, "loss": 0.4159, "step": 26539 }, { "epoch": 0.5628724735424487, "grad_norm": 0.3322995901107788, "learning_rate": 1.635818134674536e-05, "loss": 0.4859, "step": 26540 }, { "epoch": 0.5628936820003817, "grad_norm": 0.3752819001674652, "learning_rate": 1.635792393854281e-05, "loss": 0.4943, "step": 26541 }, { "epoch": 0.5629148904583148, "grad_norm": 0.45270419120788574, "learning_rate": 1.6357666523269035e-05, "loss": 0.534, "step": 26542 }, { "epoch": 0.5629360989162479, "grad_norm": 0.4020910859107971, "learning_rate": 1.635740910092432e-05, "loss": 0.5417, "step": 26543 }, { "epoch": 0.5629573073741808, "grad_norm": 0.35858938097953796, "learning_rate": 1.635715167150895e-05, "loss": 0.4745, "step": 26544 }, { "epoch": 0.5629785158321139, "grad_norm": 0.37102028727531433, "learning_rate": 1.6356894235023206e-05, "loss": 0.5138, "step": 26545 }, { "epoch": 0.5629997242900469, "grad_norm": 0.3699454069137573, "learning_rate": 1.6356636791467383e-05, "loss": 0.512, "step": 26546 }, { "epoch": 0.5630209327479799, "grad_norm": 0.349001407623291, "learning_rate": 1.6356379340841763e-05, "loss": 0.5174, "step": 26547 }, { "epoch": 0.5630421412059129, "grad_norm": 0.309763103723526, "learning_rate": 1.6356121883146632e-05, "loss": 0.4842, "step": 26548 }, { "epoch": 0.563063349663846, "grad_norm": 0.3134148418903351, "learning_rate": 1.635586441838228e-05, "loss": 0.4772, "step": 26549 }, { "epoch": 0.5630845581217789, "grad_norm": 0.32001036405563354, "learning_rate": 1.635560694654899e-05, "loss": 0.5483, "step": 26550 }, { "epoch": 0.563105766579712, "grad_norm": 0.44164422154426575, "learning_rate": 1.635534946764705e-05, "loss": 0.5461, "step": 26551 }, { "epoch": 0.563126975037645, "grad_norm": 0.35569390654563904, "learning_rate": 1.6355091981676743e-05, "loss": 0.5572, "step": 26552 }, { "epoch": 0.563148183495578, "grad_norm": 0.4037708640098572, "learning_rate": 1.635483448863836e-05, "loss": 0.4725, "step": 26553 }, { "epoch": 0.563169391953511, "grad_norm": 0.37742623686790466, "learning_rate": 1.635457698853218e-05, "loss": 0.5181, "step": 26554 }, { "epoch": 0.5631906004114441, "grad_norm": 0.32684069871902466, "learning_rate": 1.6354319481358503e-05, "loss": 0.5046, "step": 26555 }, { "epoch": 0.5632118088693772, "grad_norm": 0.3456457555294037, "learning_rate": 1.6354061967117604e-05, "loss": 0.5772, "step": 26556 }, { "epoch": 0.5632330173273101, "grad_norm": 0.3531799912452698, "learning_rate": 1.635380444580977e-05, "loss": 0.4691, "step": 26557 }, { "epoch": 0.5632542257852432, "grad_norm": 0.35643601417541504, "learning_rate": 1.635354691743529e-05, "loss": 0.52, "step": 26558 }, { "epoch": 0.5632754342431762, "grad_norm": 0.34232524037361145, "learning_rate": 1.6353289381994456e-05, "loss": 0.385, "step": 26559 }, { "epoch": 0.5632966427011092, "grad_norm": 0.3451620936393738, "learning_rate": 1.6353031839487546e-05, "loss": 0.5102, "step": 26560 }, { "epoch": 0.5633178511590422, "grad_norm": 0.33702054619789124, "learning_rate": 1.6352774289914847e-05, "loss": 0.5222, "step": 26561 }, { "epoch": 0.5633390596169753, "grad_norm": 0.3688179552555084, "learning_rate": 1.6352516733276646e-05, "loss": 0.4815, "step": 26562 }, { "epoch": 0.5633602680749082, "grad_norm": 0.3509567975997925, "learning_rate": 1.6352259169573235e-05, "loss": 0.4757, "step": 26563 }, { "epoch": 0.5633814765328413, "grad_norm": 0.31948423385620117, "learning_rate": 1.6352001598804895e-05, "loss": 0.406, "step": 26564 }, { "epoch": 0.5634026849907743, "grad_norm": 0.3286997675895691, "learning_rate": 1.6351744020971914e-05, "loss": 0.5081, "step": 26565 }, { "epoch": 0.5634238934487074, "grad_norm": 0.3320055902004242, "learning_rate": 1.635148643607458e-05, "loss": 0.4843, "step": 26566 }, { "epoch": 0.5634451019066403, "grad_norm": 0.39683565497398376, "learning_rate": 1.6351228844113177e-05, "loss": 0.4305, "step": 26567 }, { "epoch": 0.5634663103645734, "grad_norm": 0.4088260233402252, "learning_rate": 1.635097124508799e-05, "loss": 0.6894, "step": 26568 }, { "epoch": 0.5634875188225064, "grad_norm": 0.3361545503139496, "learning_rate": 1.6350713638999312e-05, "loss": 0.5227, "step": 26569 }, { "epoch": 0.5635087272804394, "grad_norm": 0.3380080461502075, "learning_rate": 1.6350456025847422e-05, "loss": 0.3574, "step": 26570 }, { "epoch": 0.5635299357383725, "grad_norm": 0.3176071345806122, "learning_rate": 1.6350198405632612e-05, "loss": 0.4519, "step": 26571 }, { "epoch": 0.5635511441963055, "grad_norm": 0.32681992650032043, "learning_rate": 1.6349940778355167e-05, "loss": 0.4951, "step": 26572 }, { "epoch": 0.5635723526542386, "grad_norm": 0.35549911856651306, "learning_rate": 1.634968314401537e-05, "loss": 0.4807, "step": 26573 }, { "epoch": 0.5635935611121715, "grad_norm": 0.4059560298919678, "learning_rate": 1.6349425502613513e-05, "loss": 0.5449, "step": 26574 }, { "epoch": 0.5636147695701046, "grad_norm": 0.3529997169971466, "learning_rate": 1.634916785414988e-05, "loss": 0.4214, "step": 26575 }, { "epoch": 0.5636359780280376, "grad_norm": 0.4141976237297058, "learning_rate": 1.6348910198624756e-05, "loss": 0.4594, "step": 26576 }, { "epoch": 0.5636571864859706, "grad_norm": 0.3698638081550598, "learning_rate": 1.634865253603843e-05, "loss": 0.4941, "step": 26577 }, { "epoch": 0.5636783949439036, "grad_norm": 0.3588966727256775, "learning_rate": 1.6348394866391192e-05, "loss": 0.4465, "step": 26578 }, { "epoch": 0.5636996034018367, "grad_norm": 0.3312433958053589, "learning_rate": 1.634813718968332e-05, "loss": 0.5387, "step": 26579 }, { "epoch": 0.5637208118597696, "grad_norm": 0.38106727600097656, "learning_rate": 1.6347879505915103e-05, "loss": 0.5668, "step": 26580 }, { "epoch": 0.5637420203177027, "grad_norm": 0.3341476619243622, "learning_rate": 1.6347621815086832e-05, "loss": 0.5156, "step": 26581 }, { "epoch": 0.5637632287756357, "grad_norm": 0.3735010027885437, "learning_rate": 1.6347364117198792e-05, "loss": 0.5026, "step": 26582 }, { "epoch": 0.5637844372335687, "grad_norm": 0.3227123022079468, "learning_rate": 1.634710641225127e-05, "loss": 0.4952, "step": 26583 }, { "epoch": 0.5638056456915018, "grad_norm": 0.3637520670890808, "learning_rate": 1.634684870024455e-05, "loss": 0.5392, "step": 26584 }, { "epoch": 0.5638268541494348, "grad_norm": 0.31367582082748413, "learning_rate": 1.6346590981178917e-05, "loss": 0.464, "step": 26585 }, { "epoch": 0.5638480626073679, "grad_norm": 0.3330087959766388, "learning_rate": 1.634633325505466e-05, "loss": 0.4645, "step": 26586 }, { "epoch": 0.5638692710653008, "grad_norm": 0.3028488755226135, "learning_rate": 1.6346075521872073e-05, "loss": 0.4303, "step": 26587 }, { "epoch": 0.5638904795232339, "grad_norm": 0.5002396106719971, "learning_rate": 1.634581778163143e-05, "loss": 0.5702, "step": 26588 }, { "epoch": 0.5639116879811669, "grad_norm": 0.36194562911987305, "learning_rate": 1.634556003433303e-05, "loss": 0.5346, "step": 26589 }, { "epoch": 0.5639328964390999, "grad_norm": 0.3651304543018341, "learning_rate": 1.634530227997715e-05, "loss": 0.4617, "step": 26590 }, { "epoch": 0.5639541048970329, "grad_norm": 0.3325619399547577, "learning_rate": 1.6345044518564077e-05, "loss": 0.4435, "step": 26591 }, { "epoch": 0.563975313354966, "grad_norm": 0.3250575363636017, "learning_rate": 1.6344786750094104e-05, "loss": 0.4464, "step": 26592 }, { "epoch": 0.5639965218128989, "grad_norm": 0.37390002608299255, "learning_rate": 1.6344528974567512e-05, "loss": 0.5672, "step": 26593 }, { "epoch": 0.564017730270832, "grad_norm": 0.36121881008148193, "learning_rate": 1.6344271191984594e-05, "loss": 0.4856, "step": 26594 }, { "epoch": 0.564038938728765, "grad_norm": 0.3257361650466919, "learning_rate": 1.6344013402345627e-05, "loss": 0.4969, "step": 26595 }, { "epoch": 0.5640601471866981, "grad_norm": 0.376460999250412, "learning_rate": 1.634375560565091e-05, "loss": 0.5189, "step": 26596 }, { "epoch": 0.5640813556446311, "grad_norm": 0.36668580770492554, "learning_rate": 1.6343497801900716e-05, "loss": 0.5832, "step": 26597 }, { "epoch": 0.5641025641025641, "grad_norm": 0.45006242394447327, "learning_rate": 1.634323999109534e-05, "loss": 0.5595, "step": 26598 }, { "epoch": 0.5641237725604972, "grad_norm": 0.34559741616249084, "learning_rate": 1.634298217323507e-05, "loss": 0.4556, "step": 26599 }, { "epoch": 0.5641449810184301, "grad_norm": 0.37642085552215576, "learning_rate": 1.6342724348320193e-05, "loss": 0.5901, "step": 26600 }, { "epoch": 0.5641661894763632, "grad_norm": 0.3772923946380615, "learning_rate": 1.634246651635099e-05, "loss": 0.4643, "step": 26601 }, { "epoch": 0.5641873979342962, "grad_norm": 0.4687124192714691, "learning_rate": 1.634220867732775e-05, "loss": 0.487, "step": 26602 }, { "epoch": 0.5642086063922293, "grad_norm": 0.34401100873947144, "learning_rate": 1.6341950831250762e-05, "loss": 0.4597, "step": 26603 }, { "epoch": 0.5642298148501622, "grad_norm": 0.34466975927352905, "learning_rate": 1.6341692978120313e-05, "loss": 0.5055, "step": 26604 }, { "epoch": 0.5642510233080953, "grad_norm": 0.3396870791912079, "learning_rate": 1.6341435117936685e-05, "loss": 0.4352, "step": 26605 }, { "epoch": 0.5642722317660283, "grad_norm": 0.3833342492580414, "learning_rate": 1.634117725070017e-05, "loss": 0.5748, "step": 26606 }, { "epoch": 0.5642934402239613, "grad_norm": 0.31706827878952026, "learning_rate": 1.634091937641105e-05, "loss": 0.3961, "step": 26607 }, { "epoch": 0.5643146486818943, "grad_norm": 0.35721489787101746, "learning_rate": 1.6340661495069617e-05, "loss": 0.4523, "step": 26608 }, { "epoch": 0.5643358571398274, "grad_norm": 0.3407203257083893, "learning_rate": 1.6340403606676153e-05, "loss": 0.4831, "step": 26609 }, { "epoch": 0.5643570655977603, "grad_norm": 0.3825824558734894, "learning_rate": 1.634014571123095e-05, "loss": 0.5488, "step": 26610 }, { "epoch": 0.5643782740556934, "grad_norm": 0.39548319578170776, "learning_rate": 1.6339887808734292e-05, "loss": 0.5662, "step": 26611 }, { "epoch": 0.5643994825136265, "grad_norm": 0.3457048833370209, "learning_rate": 1.633962989918646e-05, "loss": 0.4617, "step": 26612 }, { "epoch": 0.5644206909715594, "grad_norm": 0.34993693232536316, "learning_rate": 1.6339371982587755e-05, "loss": 0.5498, "step": 26613 }, { "epoch": 0.5644418994294925, "grad_norm": 0.36241137981414795, "learning_rate": 1.6339114058938452e-05, "loss": 0.4885, "step": 26614 }, { "epoch": 0.5644631078874255, "grad_norm": 0.3578217327594757, "learning_rate": 1.6338856128238843e-05, "loss": 0.464, "step": 26615 }, { "epoch": 0.5644843163453586, "grad_norm": 0.4010336101055145, "learning_rate": 1.633859819048921e-05, "loss": 0.4965, "step": 26616 }, { "epoch": 0.5645055248032915, "grad_norm": 0.35682132840156555, "learning_rate": 1.6338340245689847e-05, "loss": 0.4507, "step": 26617 }, { "epoch": 0.5645267332612246, "grad_norm": 0.39026880264282227, "learning_rate": 1.6338082293841035e-05, "loss": 0.5045, "step": 26618 }, { "epoch": 0.5645479417191576, "grad_norm": 0.3556429147720337, "learning_rate": 1.633782433494306e-05, "loss": 0.5247, "step": 26619 }, { "epoch": 0.5645691501770906, "grad_norm": 0.4406346082687378, "learning_rate": 1.6337566368996217e-05, "loss": 0.4983, "step": 26620 }, { "epoch": 0.5645903586350236, "grad_norm": 0.46984249353408813, "learning_rate": 1.6337308396000783e-05, "loss": 0.5084, "step": 26621 }, { "epoch": 0.5646115670929567, "grad_norm": 0.3749528229236603, "learning_rate": 1.6337050415957053e-05, "loss": 0.4119, "step": 26622 }, { "epoch": 0.5646327755508896, "grad_norm": 0.3756754696369171, "learning_rate": 1.633679242886531e-05, "loss": 0.5668, "step": 26623 }, { "epoch": 0.5646539840088227, "grad_norm": 0.33577701449394226, "learning_rate": 1.6336534434725845e-05, "loss": 0.4782, "step": 26624 }, { "epoch": 0.5646751924667558, "grad_norm": 0.3939625024795532, "learning_rate": 1.6336276433538936e-05, "loss": 0.5208, "step": 26625 }, { "epoch": 0.5646964009246888, "grad_norm": 0.3281297981739044, "learning_rate": 1.6336018425304875e-05, "loss": 0.4756, "step": 26626 }, { "epoch": 0.5647176093826218, "grad_norm": 0.370466411113739, "learning_rate": 1.633576041002395e-05, "loss": 0.5809, "step": 26627 }, { "epoch": 0.5647388178405548, "grad_norm": 0.36990007758140564, "learning_rate": 1.633550238769645e-05, "loss": 0.5677, "step": 26628 }, { "epoch": 0.5647600262984879, "grad_norm": 0.3396408259868622, "learning_rate": 1.6335244358322658e-05, "loss": 0.5344, "step": 26629 }, { "epoch": 0.5647812347564208, "grad_norm": 0.351553350687027, "learning_rate": 1.6334986321902864e-05, "loss": 0.4798, "step": 26630 }, { "epoch": 0.5648024432143539, "grad_norm": 0.3625502586364746, "learning_rate": 1.633472827843735e-05, "loss": 0.5891, "step": 26631 }, { "epoch": 0.5648236516722869, "grad_norm": 0.3681681156158447, "learning_rate": 1.6334470227926407e-05, "loss": 0.4789, "step": 26632 }, { "epoch": 0.56484486013022, "grad_norm": 0.34439074993133545, "learning_rate": 1.6334212170370323e-05, "loss": 0.4993, "step": 26633 }, { "epoch": 0.5648660685881529, "grad_norm": 0.4023473560810089, "learning_rate": 1.633395410576938e-05, "loss": 0.4908, "step": 26634 }, { "epoch": 0.564887277046086, "grad_norm": 0.34717464447021484, "learning_rate": 1.6333696034123872e-05, "loss": 0.422, "step": 26635 }, { "epoch": 0.564908485504019, "grad_norm": 0.3561907112598419, "learning_rate": 1.6333437955434082e-05, "loss": 0.5099, "step": 26636 }, { "epoch": 0.564929693961952, "grad_norm": 0.43393588066101074, "learning_rate": 1.63331798697003e-05, "loss": 0.4565, "step": 26637 }, { "epoch": 0.5649509024198851, "grad_norm": 0.3669906556606293, "learning_rate": 1.6332921776922802e-05, "loss": 0.5232, "step": 26638 }, { "epoch": 0.5649721108778181, "grad_norm": 0.3614374101161957, "learning_rate": 1.6332663677101888e-05, "loss": 0.5477, "step": 26639 }, { "epoch": 0.5649933193357511, "grad_norm": 0.3189513087272644, "learning_rate": 1.633240557023784e-05, "loss": 0.5403, "step": 26640 }, { "epoch": 0.5650145277936841, "grad_norm": 0.34536013007164, "learning_rate": 1.6332147456330948e-05, "loss": 0.4979, "step": 26641 }, { "epoch": 0.5650357362516172, "grad_norm": 0.4143597185611725, "learning_rate": 1.633188933538149e-05, "loss": 0.4947, "step": 26642 }, { "epoch": 0.5650569447095501, "grad_norm": 0.3339044451713562, "learning_rate": 1.633163120738977e-05, "loss": 0.5259, "step": 26643 }, { "epoch": 0.5650781531674832, "grad_norm": 0.38020482659339905, "learning_rate": 1.6331373072356055e-05, "loss": 0.533, "step": 26644 }, { "epoch": 0.5650993616254162, "grad_norm": 0.44471868872642517, "learning_rate": 1.6331114930280648e-05, "loss": 0.5325, "step": 26645 }, { "epoch": 0.5651205700833493, "grad_norm": 0.3433522582054138, "learning_rate": 1.6330856781163826e-05, "loss": 0.5605, "step": 26646 }, { "epoch": 0.5651417785412822, "grad_norm": 0.3443980813026428, "learning_rate": 1.6330598625005883e-05, "loss": 0.5218, "step": 26647 }, { "epoch": 0.5651629869992153, "grad_norm": 0.4426563084125519, "learning_rate": 1.63303404618071e-05, "loss": 0.5657, "step": 26648 }, { "epoch": 0.5651841954571483, "grad_norm": 0.38676726818084717, "learning_rate": 1.6330082291567774e-05, "loss": 0.453, "step": 26649 }, { "epoch": 0.5652054039150813, "grad_norm": 0.3433012068271637, "learning_rate": 1.6329824114288177e-05, "loss": 0.5051, "step": 26650 }, { "epoch": 0.5652266123730143, "grad_norm": 0.35259678959846497, "learning_rate": 1.632956592996861e-05, "loss": 0.4843, "step": 26651 }, { "epoch": 0.5652478208309474, "grad_norm": 0.34236395359039307, "learning_rate": 1.6329307738609354e-05, "loss": 0.5178, "step": 26652 }, { "epoch": 0.5652690292888805, "grad_norm": 0.35333219170570374, "learning_rate": 1.6329049540210695e-05, "loss": 0.4952, "step": 26653 }, { "epoch": 0.5652902377468134, "grad_norm": 0.38801950216293335, "learning_rate": 1.6328791334772924e-05, "loss": 0.4962, "step": 26654 }, { "epoch": 0.5653114462047465, "grad_norm": 0.3394413888454437, "learning_rate": 1.6328533122296323e-05, "loss": 0.4761, "step": 26655 }, { "epoch": 0.5653326546626795, "grad_norm": 0.34231865406036377, "learning_rate": 1.632827490278119e-05, "loss": 0.4575, "step": 26656 }, { "epoch": 0.5653538631206125, "grad_norm": 0.42292720079421997, "learning_rate": 1.6328016676227795e-05, "loss": 0.5397, "step": 26657 }, { "epoch": 0.5653750715785455, "grad_norm": 0.32935965061187744, "learning_rate": 1.632775844263644e-05, "loss": 0.4906, "step": 26658 }, { "epoch": 0.5653962800364786, "grad_norm": 0.3429949879646301, "learning_rate": 1.6327500202007406e-05, "loss": 0.5258, "step": 26659 }, { "epoch": 0.5654174884944115, "grad_norm": 0.39081689715385437, "learning_rate": 1.6327241954340984e-05, "loss": 0.461, "step": 26660 }, { "epoch": 0.5654386969523446, "grad_norm": 0.3347660005092621, "learning_rate": 1.6326983699637456e-05, "loss": 0.47, "step": 26661 }, { "epoch": 0.5654599054102776, "grad_norm": 0.31369009613990784, "learning_rate": 1.632672543789711e-05, "loss": 0.4894, "step": 26662 }, { "epoch": 0.5654811138682106, "grad_norm": 0.34607139229774475, "learning_rate": 1.6326467169120238e-05, "loss": 0.4172, "step": 26663 }, { "epoch": 0.5655023223261436, "grad_norm": 0.4529338777065277, "learning_rate": 1.632620889330712e-05, "loss": 0.5525, "step": 26664 }, { "epoch": 0.5655235307840767, "grad_norm": 0.374176949262619, "learning_rate": 1.6325950610458054e-05, "loss": 0.451, "step": 26665 }, { "epoch": 0.5655447392420098, "grad_norm": 0.3672068119049072, "learning_rate": 1.6325692320573317e-05, "loss": 0.4189, "step": 26666 }, { "epoch": 0.5655659476999427, "grad_norm": 0.3819902837276459, "learning_rate": 1.63254340236532e-05, "loss": 0.4921, "step": 26667 }, { "epoch": 0.5655871561578758, "grad_norm": 0.3636837303638458, "learning_rate": 1.632517571969799e-05, "loss": 0.4883, "step": 26668 }, { "epoch": 0.5656083646158088, "grad_norm": 0.35168755054473877, "learning_rate": 1.6324917408707973e-05, "loss": 0.5016, "step": 26669 }, { "epoch": 0.5656295730737418, "grad_norm": 0.35286974906921387, "learning_rate": 1.632465909068344e-05, "loss": 0.4807, "step": 26670 }, { "epoch": 0.5656507815316748, "grad_norm": 0.5603315830230713, "learning_rate": 1.6324400765624674e-05, "loss": 0.5263, "step": 26671 }, { "epoch": 0.5656719899896079, "grad_norm": 0.3880581855773926, "learning_rate": 1.6324142433531967e-05, "loss": 0.3522, "step": 26672 }, { "epoch": 0.5656931984475408, "grad_norm": 0.4746516942977905, "learning_rate": 1.6323884094405604e-05, "loss": 0.5167, "step": 26673 }, { "epoch": 0.5657144069054739, "grad_norm": 0.34529098868370056, "learning_rate": 1.6323625748245872e-05, "loss": 0.569, "step": 26674 }, { "epoch": 0.5657356153634069, "grad_norm": 0.35584157705307007, "learning_rate": 1.6323367395053056e-05, "loss": 0.4479, "step": 26675 }, { "epoch": 0.56575682382134, "grad_norm": 0.38414424657821655, "learning_rate": 1.6323109034827448e-05, "loss": 0.4596, "step": 26676 }, { "epoch": 0.5657780322792729, "grad_norm": 0.39303159713745117, "learning_rate": 1.632285066756933e-05, "loss": 0.5202, "step": 26677 }, { "epoch": 0.565799240737206, "grad_norm": 0.3302772045135498, "learning_rate": 1.6322592293278995e-05, "loss": 0.4286, "step": 26678 }, { "epoch": 0.5658204491951391, "grad_norm": 0.345306932926178, "learning_rate": 1.6322333911956727e-05, "loss": 0.485, "step": 26679 }, { "epoch": 0.565841657653072, "grad_norm": 0.3460221290588379, "learning_rate": 1.6322075523602814e-05, "loss": 0.4985, "step": 26680 }, { "epoch": 0.5658628661110051, "grad_norm": 0.41153889894485474, "learning_rate": 1.6321817128217546e-05, "loss": 0.5681, "step": 26681 }, { "epoch": 0.5658840745689381, "grad_norm": 0.35788869857788086, "learning_rate": 1.6321558725801202e-05, "loss": 0.5708, "step": 26682 }, { "epoch": 0.5659052830268712, "grad_norm": 0.33838868141174316, "learning_rate": 1.632130031635408e-05, "loss": 0.5447, "step": 26683 }, { "epoch": 0.5659264914848041, "grad_norm": 0.3644130527973175, "learning_rate": 1.632104189987646e-05, "loss": 0.472, "step": 26684 }, { "epoch": 0.5659476999427372, "grad_norm": 0.38109737634658813, "learning_rate": 1.6320783476368637e-05, "loss": 0.454, "step": 26685 }, { "epoch": 0.5659689084006702, "grad_norm": 0.36394762992858887, "learning_rate": 1.6320525045830892e-05, "loss": 0.5285, "step": 26686 }, { "epoch": 0.5659901168586032, "grad_norm": 0.32640209794044495, "learning_rate": 1.6320266608263515e-05, "loss": 0.4568, "step": 26687 }, { "epoch": 0.5660113253165362, "grad_norm": 0.32658204436302185, "learning_rate": 1.6320008163666784e-05, "loss": 0.4699, "step": 26688 }, { "epoch": 0.5660325337744693, "grad_norm": 0.3964617848396301, "learning_rate": 1.6319749712041e-05, "loss": 0.4904, "step": 26689 }, { "epoch": 0.5660537422324022, "grad_norm": 0.3450622856616974, "learning_rate": 1.6319491253386447e-05, "loss": 0.4581, "step": 26690 }, { "epoch": 0.5660749506903353, "grad_norm": 0.3457476496696472, "learning_rate": 1.631923278770341e-05, "loss": 0.4884, "step": 26691 }, { "epoch": 0.5660961591482683, "grad_norm": 0.35064825415611267, "learning_rate": 1.6318974314992176e-05, "loss": 0.436, "step": 26692 }, { "epoch": 0.5661173676062013, "grad_norm": 0.383703351020813, "learning_rate": 1.6318715835253038e-05, "loss": 0.4403, "step": 26693 }, { "epoch": 0.5661385760641344, "grad_norm": 0.3600885570049286, "learning_rate": 1.6318457348486274e-05, "loss": 0.482, "step": 26694 }, { "epoch": 0.5661597845220674, "grad_norm": 0.3285703659057617, "learning_rate": 1.631819885469218e-05, "loss": 0.4459, "step": 26695 }, { "epoch": 0.5661809929800005, "grad_norm": 0.3426609933376312, "learning_rate": 1.6317940353871037e-05, "loss": 0.4258, "step": 26696 }, { "epoch": 0.5662022014379334, "grad_norm": 0.3575364649295807, "learning_rate": 1.6317681846023138e-05, "loss": 0.5397, "step": 26697 }, { "epoch": 0.5662234098958665, "grad_norm": 0.3184337615966797, "learning_rate": 1.6317423331148764e-05, "loss": 0.4691, "step": 26698 }, { "epoch": 0.5662446183537995, "grad_norm": 0.3442334234714508, "learning_rate": 1.631716480924821e-05, "loss": 0.45, "step": 26699 }, { "epoch": 0.5662658268117325, "grad_norm": 0.37663692235946655, "learning_rate": 1.6316906280321762e-05, "loss": 0.5211, "step": 26700 }, { "epoch": 0.5662870352696655, "grad_norm": 0.42119842767715454, "learning_rate": 1.6316647744369702e-05, "loss": 0.4859, "step": 26701 }, { "epoch": 0.5663082437275986, "grad_norm": 0.37070316076278687, "learning_rate": 1.631638920139232e-05, "loss": 0.5704, "step": 26702 }, { "epoch": 0.5663294521855315, "grad_norm": 0.38719305396080017, "learning_rate": 1.631613065138991e-05, "loss": 0.5062, "step": 26703 }, { "epoch": 0.5663506606434646, "grad_norm": 0.3617963194847107, "learning_rate": 1.631587209436275e-05, "loss": 0.4563, "step": 26704 }, { "epoch": 0.5663718691013976, "grad_norm": 0.3942505121231079, "learning_rate": 1.6315613530311135e-05, "loss": 0.5226, "step": 26705 }, { "epoch": 0.5663930775593307, "grad_norm": 0.33093786239624023, "learning_rate": 1.6315354959235347e-05, "loss": 0.5108, "step": 26706 }, { "epoch": 0.5664142860172637, "grad_norm": 0.37610670924186707, "learning_rate": 1.631509638113568e-05, "loss": 0.4914, "step": 26707 }, { "epoch": 0.5664354944751967, "grad_norm": 0.37531590461730957, "learning_rate": 1.6314837796012412e-05, "loss": 0.5568, "step": 26708 }, { "epoch": 0.5664567029331298, "grad_norm": 0.3850986361503601, "learning_rate": 1.6314579203865842e-05, "loss": 0.5592, "step": 26709 }, { "epoch": 0.5664779113910627, "grad_norm": 0.35398852825164795, "learning_rate": 1.631432060469625e-05, "loss": 0.4505, "step": 26710 }, { "epoch": 0.5664991198489958, "grad_norm": 0.3693601191043854, "learning_rate": 1.6314061998503926e-05, "loss": 0.4995, "step": 26711 }, { "epoch": 0.5665203283069288, "grad_norm": 0.5091856718063354, "learning_rate": 1.6313803385289153e-05, "loss": 0.512, "step": 26712 }, { "epoch": 0.5665415367648619, "grad_norm": 0.3489363491535187, "learning_rate": 1.6313544765052226e-05, "loss": 0.4973, "step": 26713 }, { "epoch": 0.5665627452227948, "grad_norm": 0.3758735656738281, "learning_rate": 1.631328613779343e-05, "loss": 0.5418, "step": 26714 }, { "epoch": 0.5665839536807279, "grad_norm": 0.33644938468933105, "learning_rate": 1.6313027503513052e-05, "loss": 0.5013, "step": 26715 }, { "epoch": 0.5666051621386609, "grad_norm": 0.431090772151947, "learning_rate": 1.6312768862211378e-05, "loss": 0.5659, "step": 26716 }, { "epoch": 0.5666263705965939, "grad_norm": 0.4200083017349243, "learning_rate": 1.6312510213888698e-05, "loss": 0.5341, "step": 26717 }, { "epoch": 0.5666475790545269, "grad_norm": 0.3217204511165619, "learning_rate": 1.6312251558545298e-05, "loss": 0.5432, "step": 26718 }, { "epoch": 0.56666878751246, "grad_norm": 0.4170352518558502, "learning_rate": 1.631199289618147e-05, "loss": 0.5015, "step": 26719 }, { "epoch": 0.566689995970393, "grad_norm": 0.3245510756969452, "learning_rate": 1.6311734226797495e-05, "loss": 0.5654, "step": 26720 }, { "epoch": 0.566711204428326, "grad_norm": 0.35607531666755676, "learning_rate": 1.6311475550393664e-05, "loss": 0.5822, "step": 26721 }, { "epoch": 0.5667324128862591, "grad_norm": 0.33455535769462585, "learning_rate": 1.6311216866970264e-05, "loss": 0.514, "step": 26722 }, { "epoch": 0.566753621344192, "grad_norm": 0.34397268295288086, "learning_rate": 1.6310958176527584e-05, "loss": 0.5458, "step": 26723 }, { "epoch": 0.5667748298021251, "grad_norm": 0.3825961649417877, "learning_rate": 1.6310699479065914e-05, "loss": 0.5269, "step": 26724 }, { "epoch": 0.5667960382600581, "grad_norm": 0.41016143560409546, "learning_rate": 1.6310440774585537e-05, "loss": 0.5695, "step": 26725 }, { "epoch": 0.5668172467179912, "grad_norm": 0.3867901563644409, "learning_rate": 1.6310182063086742e-05, "loss": 0.5229, "step": 26726 }, { "epoch": 0.5668384551759241, "grad_norm": 0.34993836283683777, "learning_rate": 1.630992334456982e-05, "loss": 0.4915, "step": 26727 }, { "epoch": 0.5668596636338572, "grad_norm": 0.35130417346954346, "learning_rate": 1.630966461903505e-05, "loss": 0.4254, "step": 26728 }, { "epoch": 0.5668808720917902, "grad_norm": 0.3347402513027191, "learning_rate": 1.630940588648273e-05, "loss": 0.5145, "step": 26729 }, { "epoch": 0.5669020805497232, "grad_norm": 0.34154751896858215, "learning_rate": 1.6309147146913143e-05, "loss": 0.5289, "step": 26730 }, { "epoch": 0.5669232890076562, "grad_norm": 0.36515775322914124, "learning_rate": 1.6308888400326578e-05, "loss": 0.5253, "step": 26731 }, { "epoch": 0.5669444974655893, "grad_norm": 0.35797616839408875, "learning_rate": 1.6308629646723324e-05, "loss": 0.4775, "step": 26732 }, { "epoch": 0.5669657059235224, "grad_norm": 0.38867276906967163, "learning_rate": 1.6308370886103662e-05, "loss": 0.5761, "step": 26733 }, { "epoch": 0.5669869143814553, "grad_norm": 0.36447569727897644, "learning_rate": 1.630811211846789e-05, "loss": 0.4506, "step": 26734 }, { "epoch": 0.5670081228393884, "grad_norm": 0.3152642846107483, "learning_rate": 1.6307853343816286e-05, "loss": 0.5572, "step": 26735 }, { "epoch": 0.5670293312973214, "grad_norm": 0.3117631673812866, "learning_rate": 1.6307594562149146e-05, "loss": 0.5141, "step": 26736 }, { "epoch": 0.5670505397552544, "grad_norm": 1.6398961544036865, "learning_rate": 1.630733577346675e-05, "loss": 0.5265, "step": 26737 }, { "epoch": 0.5670717482131874, "grad_norm": 0.3451182246208191, "learning_rate": 1.6307076977769396e-05, "loss": 0.5228, "step": 26738 }, { "epoch": 0.5670929566711205, "grad_norm": 0.47622305154800415, "learning_rate": 1.630681817505736e-05, "loss": 0.5049, "step": 26739 }, { "epoch": 0.5671141651290534, "grad_norm": 0.3391436040401459, "learning_rate": 1.630655936533094e-05, "loss": 0.4754, "step": 26740 }, { "epoch": 0.5671353735869865, "grad_norm": 0.3582782447338104, "learning_rate": 1.6306300548590416e-05, "loss": 0.422, "step": 26741 }, { "epoch": 0.5671565820449195, "grad_norm": 0.39660507440567017, "learning_rate": 1.6306041724836083e-05, "loss": 0.4825, "step": 26742 }, { "epoch": 0.5671777905028526, "grad_norm": 0.36218008399009705, "learning_rate": 1.630578289406822e-05, "loss": 0.5207, "step": 26743 }, { "epoch": 0.5671989989607855, "grad_norm": 0.3572472929954529, "learning_rate": 1.6305524056287122e-05, "loss": 0.5897, "step": 26744 }, { "epoch": 0.5672202074187186, "grad_norm": 0.3681023418903351, "learning_rate": 1.6305265211493077e-05, "loss": 0.5388, "step": 26745 }, { "epoch": 0.5672414158766516, "grad_norm": 0.6393734812736511, "learning_rate": 1.630500635968637e-05, "loss": 0.5371, "step": 26746 }, { "epoch": 0.5672626243345846, "grad_norm": 0.38722312450408936, "learning_rate": 1.6304747500867287e-05, "loss": 0.5067, "step": 26747 }, { "epoch": 0.5672838327925177, "grad_norm": 0.34632807970046997, "learning_rate": 1.630448863503612e-05, "loss": 0.4881, "step": 26748 }, { "epoch": 0.5673050412504507, "grad_norm": 0.36866557598114014, "learning_rate": 1.630422976219316e-05, "loss": 0.5123, "step": 26749 }, { "epoch": 0.5673262497083837, "grad_norm": 0.5448717474937439, "learning_rate": 1.6303970882338686e-05, "loss": 0.4731, "step": 26750 }, { "epoch": 0.5673474581663167, "grad_norm": 0.40931436419487, "learning_rate": 1.630371199547299e-05, "loss": 0.5494, "step": 26751 }, { "epoch": 0.5673686666242498, "grad_norm": 0.4508828818798065, "learning_rate": 1.6303453101596363e-05, "loss": 0.486, "step": 26752 }, { "epoch": 0.5673898750821827, "grad_norm": 0.36583125591278076, "learning_rate": 1.630319420070909e-05, "loss": 0.5476, "step": 26753 }, { "epoch": 0.5674110835401158, "grad_norm": 0.32789644598960876, "learning_rate": 1.6302935292811455e-05, "loss": 0.461, "step": 26754 }, { "epoch": 0.5674322919980488, "grad_norm": 0.374319463968277, "learning_rate": 1.6302676377903753e-05, "loss": 0.549, "step": 26755 }, { "epoch": 0.5674535004559819, "grad_norm": 0.4377918541431427, "learning_rate": 1.6302417455986267e-05, "loss": 0.5924, "step": 26756 }, { "epoch": 0.5674747089139148, "grad_norm": 0.3676113784313202, "learning_rate": 1.6302158527059292e-05, "loss": 0.467, "step": 26757 }, { "epoch": 0.5674959173718479, "grad_norm": 0.35012558102607727, "learning_rate": 1.6301899591123106e-05, "loss": 0.5124, "step": 26758 }, { "epoch": 0.5675171258297809, "grad_norm": 0.6609777212142944, "learning_rate": 1.6301640648178003e-05, "loss": 0.4582, "step": 26759 }, { "epoch": 0.5675383342877139, "grad_norm": 0.3562527894973755, "learning_rate": 1.6301381698224272e-05, "loss": 0.498, "step": 26760 }, { "epoch": 0.567559542745647, "grad_norm": 0.49674850702285767, "learning_rate": 1.6301122741262197e-05, "loss": 0.4712, "step": 26761 }, { "epoch": 0.56758075120358, "grad_norm": 0.333330899477005, "learning_rate": 1.630086377729207e-05, "loss": 0.4639, "step": 26762 }, { "epoch": 0.5676019596615131, "grad_norm": 0.39186036586761475, "learning_rate": 1.6300604806314178e-05, "loss": 0.619, "step": 26763 }, { "epoch": 0.567623168119446, "grad_norm": 0.3244903087615967, "learning_rate": 1.6300345828328807e-05, "loss": 0.449, "step": 26764 }, { "epoch": 0.5676443765773791, "grad_norm": 0.35008737444877625, "learning_rate": 1.6300086843336243e-05, "loss": 0.5269, "step": 26765 }, { "epoch": 0.5676655850353121, "grad_norm": 0.3571877181529999, "learning_rate": 1.629982785133678e-05, "loss": 0.4981, "step": 26766 }, { "epoch": 0.5676867934932451, "grad_norm": 0.3747692406177521, "learning_rate": 1.6299568852330703e-05, "loss": 0.5142, "step": 26767 }, { "epoch": 0.5677080019511781, "grad_norm": 0.33853861689567566, "learning_rate": 1.62993098463183e-05, "loss": 0.4849, "step": 26768 }, { "epoch": 0.5677292104091112, "grad_norm": 0.409455806016922, "learning_rate": 1.629905083329986e-05, "loss": 0.5017, "step": 26769 }, { "epoch": 0.5677504188670441, "grad_norm": 0.37426069378852844, "learning_rate": 1.6298791813275668e-05, "loss": 0.5799, "step": 26770 }, { "epoch": 0.5677716273249772, "grad_norm": 0.34399643540382385, "learning_rate": 1.6298532786246018e-05, "loss": 0.5645, "step": 26771 }, { "epoch": 0.5677928357829102, "grad_norm": 0.34256601333618164, "learning_rate": 1.6298273752211192e-05, "loss": 0.4977, "step": 26772 }, { "epoch": 0.5678140442408433, "grad_norm": 0.6140531301498413, "learning_rate": 1.6298014711171482e-05, "loss": 0.4366, "step": 26773 }, { "epoch": 0.5678352526987763, "grad_norm": 0.4247627556324005, "learning_rate": 1.629775566312717e-05, "loss": 0.5324, "step": 26774 }, { "epoch": 0.5678564611567093, "grad_norm": 0.34990325570106506, "learning_rate": 1.6297496608078556e-05, "loss": 0.4659, "step": 26775 }, { "epoch": 0.5678776696146424, "grad_norm": 0.35683995485305786, "learning_rate": 1.629723754602592e-05, "loss": 0.5064, "step": 26776 }, { "epoch": 0.5678988780725753, "grad_norm": 0.3305215835571289, "learning_rate": 1.6296978476969548e-05, "loss": 0.4141, "step": 26777 }, { "epoch": 0.5679200865305084, "grad_norm": 0.7402079105377197, "learning_rate": 1.629671940090973e-05, "loss": 0.5421, "step": 26778 }, { "epoch": 0.5679412949884414, "grad_norm": 0.3594120144844055, "learning_rate": 1.6296460317846755e-05, "loss": 0.5245, "step": 26779 }, { "epoch": 0.5679625034463744, "grad_norm": 0.7663868069648743, "learning_rate": 1.6296201227780916e-05, "loss": 0.541, "step": 26780 }, { "epoch": 0.5679837119043074, "grad_norm": 0.36356085538864136, "learning_rate": 1.6295942130712497e-05, "loss": 0.5274, "step": 26781 }, { "epoch": 0.5680049203622405, "grad_norm": 0.36207327246665955, "learning_rate": 1.6295683026641782e-05, "loss": 0.5609, "step": 26782 }, { "epoch": 0.5680261288201734, "grad_norm": 0.3860679268836975, "learning_rate": 1.6295423915569066e-05, "loss": 0.5133, "step": 26783 }, { "epoch": 0.5680473372781065, "grad_norm": 0.4234564006328583, "learning_rate": 1.6295164797494633e-05, "loss": 0.4376, "step": 26784 }, { "epoch": 0.5680685457360395, "grad_norm": 0.36663398146629333, "learning_rate": 1.629490567241877e-05, "loss": 0.4532, "step": 26785 }, { "epoch": 0.5680897541939726, "grad_norm": 0.322884202003479, "learning_rate": 1.6294646540341772e-05, "loss": 0.4892, "step": 26786 }, { "epoch": 0.5681109626519055, "grad_norm": 0.3413535952568054, "learning_rate": 1.6294387401263917e-05, "loss": 0.4414, "step": 26787 }, { "epoch": 0.5681321711098386, "grad_norm": 0.4187220335006714, "learning_rate": 1.6294128255185505e-05, "loss": 0.5118, "step": 26788 }, { "epoch": 0.5681533795677717, "grad_norm": 0.3477083146572113, "learning_rate": 1.6293869102106815e-05, "loss": 0.5032, "step": 26789 }, { "epoch": 0.5681745880257046, "grad_norm": 0.3552725613117218, "learning_rate": 1.629360994202814e-05, "loss": 0.5114, "step": 26790 }, { "epoch": 0.5681957964836377, "grad_norm": 0.31688275933265686, "learning_rate": 1.6293350774949765e-05, "loss": 0.5069, "step": 26791 }, { "epoch": 0.5682170049415707, "grad_norm": 0.3571408987045288, "learning_rate": 1.6293091600871983e-05, "loss": 0.5293, "step": 26792 }, { "epoch": 0.5682382133995038, "grad_norm": 0.35788285732269287, "learning_rate": 1.6292832419795077e-05, "loss": 0.5246, "step": 26793 }, { "epoch": 0.5682594218574367, "grad_norm": 0.3668670356273651, "learning_rate": 1.6292573231719337e-05, "loss": 0.5561, "step": 26794 }, { "epoch": 0.5682806303153698, "grad_norm": 0.3602920472621918, "learning_rate": 1.6292314036645052e-05, "loss": 0.5635, "step": 26795 }, { "epoch": 0.5683018387733028, "grad_norm": 0.3637177646160126, "learning_rate": 1.629205483457251e-05, "loss": 0.6451, "step": 26796 }, { "epoch": 0.5683230472312358, "grad_norm": 0.3313917815685272, "learning_rate": 1.6291795625502002e-05, "loss": 0.4644, "step": 26797 }, { "epoch": 0.5683442556891688, "grad_norm": 0.6878552436828613, "learning_rate": 1.629153640943381e-05, "loss": 0.5133, "step": 26798 }, { "epoch": 0.5683654641471019, "grad_norm": 0.3975543975830078, "learning_rate": 1.6291277186368226e-05, "loss": 0.4045, "step": 26799 }, { "epoch": 0.5683866726050348, "grad_norm": 0.3541250228881836, "learning_rate": 1.629101795630554e-05, "loss": 0.5105, "step": 26800 }, { "epoch": 0.5684078810629679, "grad_norm": 0.38779503107070923, "learning_rate": 1.629075871924604e-05, "loss": 0.4614, "step": 26801 }, { "epoch": 0.568429089520901, "grad_norm": 0.4037732183933258, "learning_rate": 1.629049947519001e-05, "loss": 0.4966, "step": 26802 }, { "epoch": 0.568450297978834, "grad_norm": 0.39249753952026367, "learning_rate": 1.629024022413774e-05, "loss": 0.5029, "step": 26803 }, { "epoch": 0.568471506436767, "grad_norm": 0.35381874442100525, "learning_rate": 1.628998096608952e-05, "loss": 0.4388, "step": 26804 }, { "epoch": 0.5684927148947, "grad_norm": 0.37488478422164917, "learning_rate": 1.628972170104564e-05, "loss": 0.5458, "step": 26805 }, { "epoch": 0.5685139233526331, "grad_norm": 0.3544509708881378, "learning_rate": 1.6289462429006385e-05, "loss": 0.4936, "step": 26806 }, { "epoch": 0.568535131810566, "grad_norm": 0.396262526512146, "learning_rate": 1.6289203149972043e-05, "loss": 0.5717, "step": 26807 }, { "epoch": 0.5685563402684991, "grad_norm": 0.3523988723754883, "learning_rate": 1.6288943863942906e-05, "loss": 0.5244, "step": 26808 }, { "epoch": 0.5685775487264321, "grad_norm": 0.40732529759407043, "learning_rate": 1.6288684570919263e-05, "loss": 0.4475, "step": 26809 }, { "epoch": 0.5685987571843651, "grad_norm": 0.34379249811172485, "learning_rate": 1.6288425270901394e-05, "loss": 0.4385, "step": 26810 }, { "epoch": 0.5686199656422981, "grad_norm": 0.30572471022605896, "learning_rate": 1.6288165963889594e-05, "loss": 0.4285, "step": 26811 }, { "epoch": 0.5686411741002312, "grad_norm": 0.3942126929759979, "learning_rate": 1.6287906649884152e-05, "loss": 0.5214, "step": 26812 }, { "epoch": 0.5686623825581641, "grad_norm": 0.3260495364665985, "learning_rate": 1.6287647328885358e-05, "loss": 0.5148, "step": 26813 }, { "epoch": 0.5686835910160972, "grad_norm": 0.32026687264442444, "learning_rate": 1.6287388000893494e-05, "loss": 0.4659, "step": 26814 }, { "epoch": 0.5687047994740303, "grad_norm": 0.35794663429260254, "learning_rate": 1.628712866590885e-05, "loss": 0.5331, "step": 26815 }, { "epoch": 0.5687260079319633, "grad_norm": 0.3504117429256439, "learning_rate": 1.6286869323931718e-05, "loss": 0.5161, "step": 26816 }, { "epoch": 0.5687472163898963, "grad_norm": 0.34315231442451477, "learning_rate": 1.6286609974962385e-05, "loss": 0.4259, "step": 26817 }, { "epoch": 0.5687684248478293, "grad_norm": 0.7522879838943481, "learning_rate": 1.6286350619001138e-05, "loss": 0.4649, "step": 26818 }, { "epoch": 0.5687896333057624, "grad_norm": 0.3754326403141022, "learning_rate": 1.6286091256048267e-05, "loss": 0.4561, "step": 26819 }, { "epoch": 0.5688108417636953, "grad_norm": 0.3232984244823456, "learning_rate": 1.628583188610406e-05, "loss": 0.4519, "step": 26820 }, { "epoch": 0.5688320502216284, "grad_norm": 0.37250715494155884, "learning_rate": 1.6285572509168807e-05, "loss": 0.5529, "step": 26821 }, { "epoch": 0.5688532586795614, "grad_norm": 0.33476054668426514, "learning_rate": 1.6285313125242793e-05, "loss": 0.5536, "step": 26822 }, { "epoch": 0.5688744671374945, "grad_norm": 0.3263239860534668, "learning_rate": 1.6285053734326305e-05, "loss": 0.48, "step": 26823 }, { "epoch": 0.5688956755954274, "grad_norm": 0.3578617572784424, "learning_rate": 1.6284794336419637e-05, "loss": 0.4583, "step": 26824 }, { "epoch": 0.5689168840533605, "grad_norm": 0.3132275342941284, "learning_rate": 1.628453493152308e-05, "loss": 0.4683, "step": 26825 }, { "epoch": 0.5689380925112935, "grad_norm": 0.36020728945732117, "learning_rate": 1.6284275519636913e-05, "loss": 0.4815, "step": 26826 }, { "epoch": 0.5689593009692265, "grad_norm": 0.3293677270412445, "learning_rate": 1.628401610076143e-05, "loss": 0.496, "step": 26827 }, { "epoch": 0.5689805094271595, "grad_norm": 0.3330591022968292, "learning_rate": 1.628375667489692e-05, "loss": 0.505, "step": 26828 }, { "epoch": 0.5690017178850926, "grad_norm": 0.5173860192298889, "learning_rate": 1.628349724204367e-05, "loss": 0.4832, "step": 26829 }, { "epoch": 0.5690229263430256, "grad_norm": 0.44196245074272156, "learning_rate": 1.628323780220197e-05, "loss": 0.4551, "step": 26830 }, { "epoch": 0.5690441348009586, "grad_norm": 0.3655870258808136, "learning_rate": 1.6282978355372107e-05, "loss": 0.5234, "step": 26831 }, { "epoch": 0.5690653432588917, "grad_norm": 0.3523319959640503, "learning_rate": 1.628271890155437e-05, "loss": 0.5361, "step": 26832 }, { "epoch": 0.5690865517168247, "grad_norm": 0.3265765905380249, "learning_rate": 1.6282459440749044e-05, "loss": 0.4323, "step": 26833 }, { "epoch": 0.5691077601747577, "grad_norm": 0.43393605947494507, "learning_rate": 1.6282199972956425e-05, "loss": 0.4935, "step": 26834 }, { "epoch": 0.5691289686326907, "grad_norm": 0.3691483736038208, "learning_rate": 1.6281940498176797e-05, "loss": 0.5274, "step": 26835 }, { "epoch": 0.5691501770906238, "grad_norm": 0.43855711817741394, "learning_rate": 1.6281681016410445e-05, "loss": 0.5775, "step": 26836 }, { "epoch": 0.5691713855485567, "grad_norm": 0.35954535007476807, "learning_rate": 1.628142152765767e-05, "loss": 0.462, "step": 26837 }, { "epoch": 0.5691925940064898, "grad_norm": 0.7294866442680359, "learning_rate": 1.6281162031918748e-05, "loss": 0.4794, "step": 26838 }, { "epoch": 0.5692138024644228, "grad_norm": 0.372991681098938, "learning_rate": 1.6280902529193975e-05, "loss": 0.5433, "step": 26839 }, { "epoch": 0.5692350109223558, "grad_norm": 0.3955008089542389, "learning_rate": 1.6280643019483634e-05, "loss": 0.4463, "step": 26840 }, { "epoch": 0.5692562193802888, "grad_norm": 0.5173130631446838, "learning_rate": 1.6280383502788018e-05, "loss": 0.5784, "step": 26841 }, { "epoch": 0.5692774278382219, "grad_norm": 0.34984850883483887, "learning_rate": 1.6280123979107413e-05, "loss": 0.542, "step": 26842 }, { "epoch": 0.569298636296155, "grad_norm": 0.3162042498588562, "learning_rate": 1.627986444844211e-05, "loss": 0.4714, "step": 26843 }, { "epoch": 0.5693198447540879, "grad_norm": 0.3832699656486511, "learning_rate": 1.6279604910792394e-05, "loss": 0.616, "step": 26844 }, { "epoch": 0.569341053212021, "grad_norm": 0.3347413241863251, "learning_rate": 1.6279345366158555e-05, "loss": 0.4108, "step": 26845 }, { "epoch": 0.569362261669954, "grad_norm": 0.3887487053871155, "learning_rate": 1.6279085814540886e-05, "loss": 0.5427, "step": 26846 }, { "epoch": 0.569383470127887, "grad_norm": 0.3704145848751068, "learning_rate": 1.627882625593967e-05, "loss": 0.5615, "step": 26847 }, { "epoch": 0.56940467858582, "grad_norm": 0.3787311017513275, "learning_rate": 1.6278566690355204e-05, "loss": 0.494, "step": 26848 }, { "epoch": 0.5694258870437531, "grad_norm": 0.4439025819301605, "learning_rate": 1.627830711778776e-05, "loss": 0.4579, "step": 26849 }, { "epoch": 0.569447095501686, "grad_norm": 0.3539227843284607, "learning_rate": 1.6278047538237645e-05, "loss": 0.5922, "step": 26850 }, { "epoch": 0.5694683039596191, "grad_norm": 0.429995059967041, "learning_rate": 1.6277787951705138e-05, "loss": 0.5024, "step": 26851 }, { "epoch": 0.5694895124175521, "grad_norm": 0.33422574400901794, "learning_rate": 1.627752835819053e-05, "loss": 0.5117, "step": 26852 }, { "epoch": 0.5695107208754852, "grad_norm": 0.8656559586524963, "learning_rate": 1.627726875769411e-05, "loss": 0.5396, "step": 26853 }, { "epoch": 0.5695319293334181, "grad_norm": 0.35575634241104126, "learning_rate": 1.627700915021617e-05, "loss": 0.4523, "step": 26854 }, { "epoch": 0.5695531377913512, "grad_norm": 0.38127630949020386, "learning_rate": 1.627674953575699e-05, "loss": 0.5237, "step": 26855 }, { "epoch": 0.5695743462492843, "grad_norm": 0.34577587246894836, "learning_rate": 1.627648991431686e-05, "loss": 0.4286, "step": 26856 }, { "epoch": 0.5695955547072172, "grad_norm": 0.3688441812992096, "learning_rate": 1.6276230285896077e-05, "loss": 0.4585, "step": 26857 }, { "epoch": 0.5696167631651503, "grad_norm": 0.40155187249183655, "learning_rate": 1.6275970650494926e-05, "loss": 0.536, "step": 26858 }, { "epoch": 0.5696379716230833, "grad_norm": 1.7637903690338135, "learning_rate": 1.6275711008113692e-05, "loss": 0.5949, "step": 26859 }, { "epoch": 0.5696591800810163, "grad_norm": 0.36490872502326965, "learning_rate": 1.6275451358752673e-05, "loss": 0.4859, "step": 26860 }, { "epoch": 0.5696803885389493, "grad_norm": 0.35998350381851196, "learning_rate": 1.6275191702412146e-05, "loss": 0.5397, "step": 26861 }, { "epoch": 0.5697015969968824, "grad_norm": 0.32423239946365356, "learning_rate": 1.6274932039092403e-05, "loss": 0.4872, "step": 26862 }, { "epoch": 0.5697228054548154, "grad_norm": 0.35525596141815186, "learning_rate": 1.627467236879374e-05, "loss": 0.5133, "step": 26863 }, { "epoch": 0.5697440139127484, "grad_norm": 0.36755841970443726, "learning_rate": 1.627441269151644e-05, "loss": 0.5236, "step": 26864 }, { "epoch": 0.5697652223706814, "grad_norm": 0.5254856944084167, "learning_rate": 1.6274153007260792e-05, "loss": 0.4754, "step": 26865 }, { "epoch": 0.5697864308286145, "grad_norm": 0.38677212595939636, "learning_rate": 1.6273893316027086e-05, "loss": 0.536, "step": 26866 }, { "epoch": 0.5698076392865474, "grad_norm": 0.4444558024406433, "learning_rate": 1.627363361781561e-05, "loss": 0.5579, "step": 26867 }, { "epoch": 0.5698288477444805, "grad_norm": 0.4865567982196808, "learning_rate": 1.6273373912626652e-05, "loss": 0.4489, "step": 26868 }, { "epoch": 0.5698500562024135, "grad_norm": 0.3782790005207062, "learning_rate": 1.62731142004605e-05, "loss": 0.503, "step": 26869 }, { "epoch": 0.5698712646603465, "grad_norm": 0.378664493560791, "learning_rate": 1.6272854481317452e-05, "loss": 0.5129, "step": 26870 }, { "epoch": 0.5698924731182796, "grad_norm": 0.35099127888679504, "learning_rate": 1.6272594755197785e-05, "loss": 0.5402, "step": 26871 }, { "epoch": 0.5699136815762126, "grad_norm": 0.3788614869117737, "learning_rate": 1.6272335022101796e-05, "loss": 0.5246, "step": 26872 }, { "epoch": 0.5699348900341457, "grad_norm": 0.33614644408226013, "learning_rate": 1.6272075282029765e-05, "loss": 0.5094, "step": 26873 }, { "epoch": 0.5699560984920786, "grad_norm": 0.38334691524505615, "learning_rate": 1.627181553498199e-05, "loss": 0.5145, "step": 26874 }, { "epoch": 0.5699773069500117, "grad_norm": 0.3381271958351135, "learning_rate": 1.6271555780958755e-05, "loss": 0.4581, "step": 26875 }, { "epoch": 0.5699985154079447, "grad_norm": 0.3690093755722046, "learning_rate": 1.6271296019960352e-05, "loss": 0.4528, "step": 26876 }, { "epoch": 0.5700197238658777, "grad_norm": 0.27842381596565247, "learning_rate": 1.6271036251987066e-05, "loss": 0.428, "step": 26877 }, { "epoch": 0.5700409323238107, "grad_norm": 0.39523738622665405, "learning_rate": 1.627077647703919e-05, "loss": 0.5616, "step": 26878 }, { "epoch": 0.5700621407817438, "grad_norm": 0.3480061888694763, "learning_rate": 1.6270516695117012e-05, "loss": 0.4877, "step": 26879 }, { "epoch": 0.5700833492396767, "grad_norm": 0.43979519605636597, "learning_rate": 1.6270256906220816e-05, "loss": 0.5183, "step": 26880 }, { "epoch": 0.5701045576976098, "grad_norm": 0.3527536690235138, "learning_rate": 1.6269997110350894e-05, "loss": 0.5108, "step": 26881 }, { "epoch": 0.5701257661555428, "grad_norm": 0.368327796459198, "learning_rate": 1.6269737307507538e-05, "loss": 0.5254, "step": 26882 }, { "epoch": 0.5701469746134759, "grad_norm": 0.40854641795158386, "learning_rate": 1.6269477497691035e-05, "loss": 0.6, "step": 26883 }, { "epoch": 0.5701681830714089, "grad_norm": 0.4339603781700134, "learning_rate": 1.6269217680901675e-05, "loss": 0.4298, "step": 26884 }, { "epoch": 0.5701893915293419, "grad_norm": 0.32150059938430786, "learning_rate": 1.6268957857139743e-05, "loss": 0.4527, "step": 26885 }, { "epoch": 0.570210599987275, "grad_norm": 0.43576401472091675, "learning_rate": 1.626869802640553e-05, "loss": 0.5292, "step": 26886 }, { "epoch": 0.5702318084452079, "grad_norm": 0.4503067433834076, "learning_rate": 1.6268438188699326e-05, "loss": 0.4879, "step": 26887 }, { "epoch": 0.570253016903141, "grad_norm": 0.3180776536464691, "learning_rate": 1.6268178344021423e-05, "loss": 0.545, "step": 26888 }, { "epoch": 0.570274225361074, "grad_norm": 0.36888864636421204, "learning_rate": 1.62679184923721e-05, "loss": 0.5241, "step": 26889 }, { "epoch": 0.570295433819007, "grad_norm": 0.32068267464637756, "learning_rate": 1.6267658633751656e-05, "loss": 0.4928, "step": 26890 }, { "epoch": 0.57031664227694, "grad_norm": 0.3261622190475464, "learning_rate": 1.6267398768160377e-05, "loss": 0.4917, "step": 26891 }, { "epoch": 0.5703378507348731, "grad_norm": 0.3616860806941986, "learning_rate": 1.6267138895598553e-05, "loss": 0.4771, "step": 26892 }, { "epoch": 0.570359059192806, "grad_norm": 0.36882567405700684, "learning_rate": 1.626687901606647e-05, "loss": 0.5123, "step": 26893 }, { "epoch": 0.5703802676507391, "grad_norm": 0.3039599061012268, "learning_rate": 1.6266619129564416e-05, "loss": 0.4012, "step": 26894 }, { "epoch": 0.5704014761086721, "grad_norm": 0.3963313400745392, "learning_rate": 1.6266359236092685e-05, "loss": 0.576, "step": 26895 }, { "epoch": 0.5704226845666052, "grad_norm": 0.33841973543167114, "learning_rate": 1.6266099335651565e-05, "loss": 0.4353, "step": 26896 }, { "epoch": 0.5704438930245382, "grad_norm": 0.49040094017982483, "learning_rate": 1.626583942824134e-05, "loss": 0.661, "step": 26897 }, { "epoch": 0.5704651014824712, "grad_norm": 0.3166061341762543, "learning_rate": 1.6265579513862307e-05, "loss": 0.4393, "step": 26898 }, { "epoch": 0.5704863099404043, "grad_norm": 0.3961351811885834, "learning_rate": 1.626531959251475e-05, "loss": 0.4888, "step": 26899 }, { "epoch": 0.5705075183983372, "grad_norm": 0.34137704968452454, "learning_rate": 1.6265059664198957e-05, "loss": 0.453, "step": 26900 }, { "epoch": 0.5705287268562703, "grad_norm": 0.36380311846733093, "learning_rate": 1.626479972891522e-05, "loss": 0.5661, "step": 26901 }, { "epoch": 0.5705499353142033, "grad_norm": 0.3566688895225525, "learning_rate": 1.6264539786663828e-05, "loss": 0.46, "step": 26902 }, { "epoch": 0.5705711437721364, "grad_norm": 0.347382515668869, "learning_rate": 1.6264279837445068e-05, "loss": 0.4449, "step": 26903 }, { "epoch": 0.5705923522300693, "grad_norm": 0.38676366209983826, "learning_rate": 1.6264019881259235e-05, "loss": 0.4283, "step": 26904 }, { "epoch": 0.5706135606880024, "grad_norm": 0.3579443693161011, "learning_rate": 1.6263759918106607e-05, "loss": 0.4813, "step": 26905 }, { "epoch": 0.5706347691459354, "grad_norm": 0.36570125818252563, "learning_rate": 1.6263499947987482e-05, "loss": 0.5296, "step": 26906 }, { "epoch": 0.5706559776038684, "grad_norm": 0.3095517158508301, "learning_rate": 1.6263239970902148e-05, "loss": 0.4452, "step": 26907 }, { "epoch": 0.5706771860618014, "grad_norm": 0.33362096548080444, "learning_rate": 1.626297998685089e-05, "loss": 0.5247, "step": 26908 }, { "epoch": 0.5706983945197345, "grad_norm": 0.36385834217071533, "learning_rate": 1.6262719995834e-05, "loss": 0.4645, "step": 26909 }, { "epoch": 0.5707196029776674, "grad_norm": 0.35786619782447815, "learning_rate": 1.6262459997851775e-05, "loss": 0.6271, "step": 26910 }, { "epoch": 0.5707408114356005, "grad_norm": 0.3695070743560791, "learning_rate": 1.6262199992904488e-05, "loss": 0.5165, "step": 26911 }, { "epoch": 0.5707620198935336, "grad_norm": 0.3533078134059906, "learning_rate": 1.6261939980992442e-05, "loss": 0.5179, "step": 26912 }, { "epoch": 0.5707832283514666, "grad_norm": 0.33171334862709045, "learning_rate": 1.6261679962115912e-05, "loss": 0.4443, "step": 26913 }, { "epoch": 0.5708044368093996, "grad_norm": 0.7997984886169434, "learning_rate": 1.6261419936275205e-05, "loss": 0.5349, "step": 26914 }, { "epoch": 0.5708256452673326, "grad_norm": 0.3755313456058502, "learning_rate": 1.6261159903470597e-05, "loss": 0.5042, "step": 26915 }, { "epoch": 0.5708468537252657, "grad_norm": 0.3187408745288849, "learning_rate": 1.6260899863702382e-05, "loss": 0.3926, "step": 26916 }, { "epoch": 0.5708680621831986, "grad_norm": 0.3874093294143677, "learning_rate": 1.6260639816970848e-05, "loss": 0.5601, "step": 26917 }, { "epoch": 0.5708892706411317, "grad_norm": 0.5338522791862488, "learning_rate": 1.6260379763276287e-05, "loss": 0.5511, "step": 26918 }, { "epoch": 0.5709104790990647, "grad_norm": 0.33904820680618286, "learning_rate": 1.6260119702618985e-05, "loss": 0.4114, "step": 26919 }, { "epoch": 0.5709316875569977, "grad_norm": 0.3485325872898102, "learning_rate": 1.625985963499923e-05, "loss": 0.4155, "step": 26920 }, { "epoch": 0.5709528960149307, "grad_norm": 0.4019840955734253, "learning_rate": 1.6259599560417316e-05, "loss": 0.5052, "step": 26921 }, { "epoch": 0.5709741044728638, "grad_norm": 0.31442511081695557, "learning_rate": 1.625933947887353e-05, "loss": 0.4386, "step": 26922 }, { "epoch": 0.5709953129307968, "grad_norm": 0.3562109172344208, "learning_rate": 1.625907939036816e-05, "loss": 0.4722, "step": 26923 }, { "epoch": 0.5710165213887298, "grad_norm": 0.38666149973869324, "learning_rate": 1.6258819294901496e-05, "loss": 0.5361, "step": 26924 }, { "epoch": 0.5710377298466629, "grad_norm": 0.47880902886390686, "learning_rate": 1.625855919247383e-05, "loss": 0.4655, "step": 26925 }, { "epoch": 0.5710589383045959, "grad_norm": 0.3531521260738373, "learning_rate": 1.6258299083085445e-05, "loss": 0.4801, "step": 26926 }, { "epoch": 0.5710801467625289, "grad_norm": 1.014273762702942, "learning_rate": 1.6258038966736636e-05, "loss": 0.5211, "step": 26927 }, { "epoch": 0.5711013552204619, "grad_norm": 0.3974964916706085, "learning_rate": 1.6257778843427692e-05, "loss": 0.5337, "step": 26928 }, { "epoch": 0.571122563678395, "grad_norm": 0.35890403389930725, "learning_rate": 1.6257518713158896e-05, "loss": 0.4459, "step": 26929 }, { "epoch": 0.5711437721363279, "grad_norm": 0.3818136155605316, "learning_rate": 1.6257258575930545e-05, "loss": 0.4735, "step": 26930 }, { "epoch": 0.571164980594261, "grad_norm": 0.368450403213501, "learning_rate": 1.6256998431742927e-05, "loss": 0.5715, "step": 26931 }, { "epoch": 0.571186189052194, "grad_norm": 0.33170294761657715, "learning_rate": 1.625673828059633e-05, "loss": 0.5334, "step": 26932 }, { "epoch": 0.5712073975101271, "grad_norm": 0.37443020939826965, "learning_rate": 1.625647812249104e-05, "loss": 0.4738, "step": 26933 }, { "epoch": 0.57122860596806, "grad_norm": 0.3090493679046631, "learning_rate": 1.6256217957427353e-05, "loss": 0.4789, "step": 26934 }, { "epoch": 0.5712498144259931, "grad_norm": 0.3280799388885498, "learning_rate": 1.6255957785405548e-05, "loss": 0.4264, "step": 26935 }, { "epoch": 0.5712710228839261, "grad_norm": 0.3342209458351135, "learning_rate": 1.6255697606425926e-05, "loss": 0.3931, "step": 26936 }, { "epoch": 0.5712922313418591, "grad_norm": 0.43833833932876587, "learning_rate": 1.625543742048877e-05, "loss": 0.4188, "step": 26937 }, { "epoch": 0.5713134397997922, "grad_norm": 1.9592164754867554, "learning_rate": 1.6255177227594375e-05, "loss": 0.5232, "step": 26938 }, { "epoch": 0.5713346482577252, "grad_norm": 0.3472232222557068, "learning_rate": 1.625491702774302e-05, "loss": 0.5154, "step": 26939 }, { "epoch": 0.5713558567156583, "grad_norm": 0.367310494184494, "learning_rate": 1.6254656820935007e-05, "loss": 0.5026, "step": 26940 }, { "epoch": 0.5713770651735912, "grad_norm": 0.4132513105869293, "learning_rate": 1.6254396607170612e-05, "loss": 0.5089, "step": 26941 }, { "epoch": 0.5713982736315243, "grad_norm": 0.3988933265209198, "learning_rate": 1.6254136386450138e-05, "loss": 0.5496, "step": 26942 }, { "epoch": 0.5714194820894573, "grad_norm": 0.35378941893577576, "learning_rate": 1.6253876158773864e-05, "loss": 0.5448, "step": 26943 }, { "epoch": 0.5714406905473903, "grad_norm": 0.3848567306995392, "learning_rate": 1.6253615924142084e-05, "loss": 0.5141, "step": 26944 }, { "epoch": 0.5714618990053233, "grad_norm": 0.3263973295688629, "learning_rate": 1.6253355682555086e-05, "loss": 0.4493, "step": 26945 }, { "epoch": 0.5714831074632564, "grad_norm": 0.3194774091243744, "learning_rate": 1.625309543401316e-05, "loss": 0.4509, "step": 26946 }, { "epoch": 0.5715043159211893, "grad_norm": 0.3536650538444519, "learning_rate": 1.62528351785166e-05, "loss": 0.491, "step": 26947 }, { "epoch": 0.5715255243791224, "grad_norm": 0.3654482066631317, "learning_rate": 1.6252574916065685e-05, "loss": 0.4944, "step": 26948 }, { "epoch": 0.5715467328370554, "grad_norm": 0.34355807304382324, "learning_rate": 1.6252314646660716e-05, "loss": 0.5071, "step": 26949 }, { "epoch": 0.5715679412949884, "grad_norm": 0.34135496616363525, "learning_rate": 1.625205437030197e-05, "loss": 0.4839, "step": 26950 }, { "epoch": 0.5715891497529214, "grad_norm": 0.3519687354564667, "learning_rate": 1.625179408698975e-05, "loss": 0.5088, "step": 26951 }, { "epoch": 0.5716103582108545, "grad_norm": 0.36505162715911865, "learning_rate": 1.6251533796724336e-05, "loss": 0.5658, "step": 26952 }, { "epoch": 0.5716315666687876, "grad_norm": 0.4039246439933777, "learning_rate": 1.625127349950602e-05, "loss": 0.5651, "step": 26953 }, { "epoch": 0.5716527751267205, "grad_norm": 0.3496769368648529, "learning_rate": 1.6251013195335094e-05, "loss": 0.4736, "step": 26954 }, { "epoch": 0.5716739835846536, "grad_norm": 0.38634055852890015, "learning_rate": 1.6250752884211845e-05, "loss": 0.526, "step": 26955 }, { "epoch": 0.5716951920425866, "grad_norm": 0.3500113785266876, "learning_rate": 1.625049256613656e-05, "loss": 0.5745, "step": 26956 }, { "epoch": 0.5717164005005196, "grad_norm": 0.3376935124397278, "learning_rate": 1.6250232241109534e-05, "loss": 0.5091, "step": 26957 }, { "epoch": 0.5717376089584526, "grad_norm": 0.38857394456863403, "learning_rate": 1.6249971909131056e-05, "loss": 0.5193, "step": 26958 }, { "epoch": 0.5717588174163857, "grad_norm": 0.34059587121009827, "learning_rate": 1.624971157020141e-05, "loss": 0.4696, "step": 26959 }, { "epoch": 0.5717800258743186, "grad_norm": 0.3642485737800598, "learning_rate": 1.6249451224320888e-05, "loss": 0.427, "step": 26960 }, { "epoch": 0.5718012343322517, "grad_norm": 0.3485933840274811, "learning_rate": 1.6249190871489786e-05, "loss": 0.5106, "step": 26961 }, { "epoch": 0.5718224427901847, "grad_norm": 0.3452008068561554, "learning_rate": 1.6248930511708384e-05, "loss": 0.4728, "step": 26962 }, { "epoch": 0.5718436512481178, "grad_norm": 0.38460391759872437, "learning_rate": 1.6248670144976977e-05, "loss": 0.4543, "step": 26963 }, { "epoch": 0.5718648597060507, "grad_norm": 0.5844859480857849, "learning_rate": 1.6248409771295852e-05, "loss": 0.452, "step": 26964 }, { "epoch": 0.5718860681639838, "grad_norm": 0.33655333518981934, "learning_rate": 1.6248149390665302e-05, "loss": 0.4627, "step": 26965 }, { "epoch": 0.5719072766219169, "grad_norm": 0.31212317943573, "learning_rate": 1.6247889003085613e-05, "loss": 0.4882, "step": 26966 }, { "epoch": 0.5719284850798498, "grad_norm": 0.337850421667099, "learning_rate": 1.624762860855708e-05, "loss": 0.5211, "step": 26967 }, { "epoch": 0.5719496935377829, "grad_norm": 0.3674720525741577, "learning_rate": 1.624736820707998e-05, "loss": 0.434, "step": 26968 }, { "epoch": 0.5719709019957159, "grad_norm": 0.3422193229198456, "learning_rate": 1.624710779865462e-05, "loss": 0.4165, "step": 26969 }, { "epoch": 0.571992110453649, "grad_norm": 0.3837575614452362, "learning_rate": 1.6246847383281277e-05, "loss": 0.516, "step": 26970 }, { "epoch": 0.5720133189115819, "grad_norm": 0.35660532116889954, "learning_rate": 1.6246586960960244e-05, "loss": 0.484, "step": 26971 }, { "epoch": 0.572034527369515, "grad_norm": 0.34134721755981445, "learning_rate": 1.6246326531691815e-05, "loss": 0.4794, "step": 26972 }, { "epoch": 0.572055735827448, "grad_norm": 0.3056587278842926, "learning_rate": 1.624606609547627e-05, "loss": 0.4776, "step": 26973 }, { "epoch": 0.572076944285381, "grad_norm": 0.3508995771408081, "learning_rate": 1.624580565231391e-05, "loss": 0.5258, "step": 26974 }, { "epoch": 0.572098152743314, "grad_norm": 0.4418460428714752, "learning_rate": 1.624554520220502e-05, "loss": 0.4797, "step": 26975 }, { "epoch": 0.5721193612012471, "grad_norm": 0.3607683777809143, "learning_rate": 1.6245284745149884e-05, "loss": 0.5088, "step": 26976 }, { "epoch": 0.57214056965918, "grad_norm": 0.36426660418510437, "learning_rate": 1.62450242811488e-05, "loss": 0.5228, "step": 26977 }, { "epoch": 0.5721617781171131, "grad_norm": 0.34686991572380066, "learning_rate": 1.624476381020205e-05, "loss": 0.5368, "step": 26978 }, { "epoch": 0.5721829865750462, "grad_norm": 0.4233948886394501, "learning_rate": 1.6244503332309933e-05, "loss": 0.5684, "step": 26979 }, { "epoch": 0.5722041950329791, "grad_norm": 0.3735208511352539, "learning_rate": 1.624424284747273e-05, "loss": 0.5055, "step": 26980 }, { "epoch": 0.5722254034909122, "grad_norm": 0.36858171224594116, "learning_rate": 1.6243982355690737e-05, "loss": 0.5274, "step": 26981 }, { "epoch": 0.5722466119488452, "grad_norm": 0.39015862345695496, "learning_rate": 1.624372185696424e-05, "loss": 0.5676, "step": 26982 }, { "epoch": 0.5722678204067783, "grad_norm": 1.4522541761398315, "learning_rate": 1.624346135129353e-05, "loss": 0.5094, "step": 26983 }, { "epoch": 0.5722890288647112, "grad_norm": 0.3626229465007782, "learning_rate": 1.6243200838678896e-05, "loss": 0.4884, "step": 26984 }, { "epoch": 0.5723102373226443, "grad_norm": 0.42255154252052307, "learning_rate": 1.6242940319120628e-05, "loss": 0.5458, "step": 26985 }, { "epoch": 0.5723314457805773, "grad_norm": 0.3779623806476593, "learning_rate": 1.6242679792619018e-05, "loss": 0.4684, "step": 26986 }, { "epoch": 0.5723526542385103, "grad_norm": 0.3755479156970978, "learning_rate": 1.6242419259174353e-05, "loss": 0.5173, "step": 26987 }, { "epoch": 0.5723738626964433, "grad_norm": 0.37445297837257385, "learning_rate": 1.624215871878692e-05, "loss": 0.5103, "step": 26988 }, { "epoch": 0.5723950711543764, "grad_norm": 0.381255567073822, "learning_rate": 1.6241898171457018e-05, "loss": 0.4758, "step": 26989 }, { "epoch": 0.5724162796123093, "grad_norm": 0.4709490239620209, "learning_rate": 1.6241637617184928e-05, "loss": 0.5495, "step": 26990 }, { "epoch": 0.5724374880702424, "grad_norm": 0.32071712613105774, "learning_rate": 1.624137705597094e-05, "loss": 0.4357, "step": 26991 }, { "epoch": 0.5724586965281754, "grad_norm": 0.34128981828689575, "learning_rate": 1.624111648781535e-05, "loss": 0.5991, "step": 26992 }, { "epoch": 0.5724799049861085, "grad_norm": 0.4146798551082611, "learning_rate": 1.6240855912718444e-05, "loss": 0.5066, "step": 26993 }, { "epoch": 0.5725011134440415, "grad_norm": 0.3558943271636963, "learning_rate": 1.6240595330680515e-05, "loss": 0.5381, "step": 26994 }, { "epoch": 0.5725223219019745, "grad_norm": 0.34530720114707947, "learning_rate": 1.624033474170185e-05, "loss": 0.4776, "step": 26995 }, { "epoch": 0.5725435303599076, "grad_norm": 0.33492931723594666, "learning_rate": 1.6240074145782735e-05, "loss": 0.5307, "step": 26996 }, { "epoch": 0.5725647388178405, "grad_norm": 0.31995895504951477, "learning_rate": 1.6239813542923463e-05, "loss": 0.5087, "step": 26997 }, { "epoch": 0.5725859472757736, "grad_norm": 0.37240198254585266, "learning_rate": 1.6239552933124327e-05, "loss": 0.5641, "step": 26998 }, { "epoch": 0.5726071557337066, "grad_norm": 0.38074201345443726, "learning_rate": 1.6239292316385617e-05, "loss": 0.5997, "step": 26999 }, { "epoch": 0.5726283641916396, "grad_norm": 0.3425070643424988, "learning_rate": 1.6239031692707613e-05, "loss": 0.4745, "step": 27000 }, { "epoch": 0.5726495726495726, "grad_norm": 0.34344029426574707, "learning_rate": 1.623877106209062e-05, "loss": 0.4695, "step": 27001 }, { "epoch": 0.5726707811075057, "grad_norm": 0.37502938508987427, "learning_rate": 1.6238510424534917e-05, "loss": 0.5417, "step": 27002 }, { "epoch": 0.5726919895654387, "grad_norm": 0.35326117277145386, "learning_rate": 1.6238249780040797e-05, "loss": 0.5061, "step": 27003 }, { "epoch": 0.5727131980233717, "grad_norm": 0.33427751064300537, "learning_rate": 1.623798912860855e-05, "loss": 0.519, "step": 27004 }, { "epoch": 0.5727344064813047, "grad_norm": 0.36105671525001526, "learning_rate": 1.6237728470238464e-05, "loss": 0.4917, "step": 27005 }, { "epoch": 0.5727556149392378, "grad_norm": 0.343381404876709, "learning_rate": 1.623746780493083e-05, "loss": 0.529, "step": 27006 }, { "epoch": 0.5727768233971708, "grad_norm": 0.36679068207740784, "learning_rate": 1.6237207132685943e-05, "loss": 0.5587, "step": 27007 }, { "epoch": 0.5727980318551038, "grad_norm": 0.33690494298934937, "learning_rate": 1.6236946453504082e-05, "loss": 0.4787, "step": 27008 }, { "epoch": 0.5728192403130369, "grad_norm": 0.33026447892189026, "learning_rate": 1.6236685767385547e-05, "loss": 0.4171, "step": 27009 }, { "epoch": 0.5728404487709698, "grad_norm": 0.4153207540512085, "learning_rate": 1.6236425074330625e-05, "loss": 0.4604, "step": 27010 }, { "epoch": 0.5728616572289029, "grad_norm": 0.5424229502677917, "learning_rate": 1.6236164374339605e-05, "loss": 0.5749, "step": 27011 }, { "epoch": 0.5728828656868359, "grad_norm": 0.5338180661201477, "learning_rate": 1.6235903667412777e-05, "loss": 0.5059, "step": 27012 }, { "epoch": 0.572904074144769, "grad_norm": 0.3305949866771698, "learning_rate": 1.623564295355043e-05, "loss": 0.4572, "step": 27013 }, { "epoch": 0.5729252826027019, "grad_norm": 0.3627314269542694, "learning_rate": 1.6235382232752858e-05, "loss": 0.523, "step": 27014 }, { "epoch": 0.572946491060635, "grad_norm": 0.3260931968688965, "learning_rate": 1.6235121505020343e-05, "loss": 0.5038, "step": 27015 }, { "epoch": 0.572967699518568, "grad_norm": 0.33486881852149963, "learning_rate": 1.6234860770353182e-05, "loss": 0.4701, "step": 27016 }, { "epoch": 0.572988907976501, "grad_norm": 0.3410993218421936, "learning_rate": 1.6234600028751665e-05, "loss": 0.517, "step": 27017 }, { "epoch": 0.573010116434434, "grad_norm": 0.3971346616744995, "learning_rate": 1.6234339280216078e-05, "loss": 0.4275, "step": 27018 }, { "epoch": 0.5730313248923671, "grad_norm": 0.29023870825767517, "learning_rate": 1.6234078524746713e-05, "loss": 0.3833, "step": 27019 }, { "epoch": 0.5730525333503002, "grad_norm": 0.32696908712387085, "learning_rate": 1.6233817762343863e-05, "loss": 0.3674, "step": 27020 }, { "epoch": 0.5730737418082331, "grad_norm": 0.3661682605743408, "learning_rate": 1.6233556993007813e-05, "loss": 0.5117, "step": 27021 }, { "epoch": 0.5730949502661662, "grad_norm": 0.36533859372138977, "learning_rate": 1.6233296216738853e-05, "loss": 0.5052, "step": 27022 }, { "epoch": 0.5731161587240992, "grad_norm": 0.40030035376548767, "learning_rate": 1.6233035433537278e-05, "loss": 0.5118, "step": 27023 }, { "epoch": 0.5731373671820322, "grad_norm": 0.3654066026210785, "learning_rate": 1.6232774643403374e-05, "loss": 0.5496, "step": 27024 }, { "epoch": 0.5731585756399652, "grad_norm": 0.3479505777359009, "learning_rate": 1.6232513846337435e-05, "loss": 0.5389, "step": 27025 }, { "epoch": 0.5731797840978983, "grad_norm": 0.35387083888053894, "learning_rate": 1.6232253042339743e-05, "loss": 0.5505, "step": 27026 }, { "epoch": 0.5732009925558312, "grad_norm": 0.3573942482471466, "learning_rate": 1.6231992231410595e-05, "loss": 0.5348, "step": 27027 }, { "epoch": 0.5732222010137643, "grad_norm": 0.9175252318382263, "learning_rate": 1.623173141355028e-05, "loss": 0.5184, "step": 27028 }, { "epoch": 0.5732434094716973, "grad_norm": 0.39259210228919983, "learning_rate": 1.623147058875909e-05, "loss": 0.5283, "step": 27029 }, { "epoch": 0.5732646179296303, "grad_norm": 0.6172758340835571, "learning_rate": 1.623120975703731e-05, "loss": 0.4712, "step": 27030 }, { "epoch": 0.5732858263875633, "grad_norm": 0.36929795145988464, "learning_rate": 1.6230948918385233e-05, "loss": 0.5351, "step": 27031 }, { "epoch": 0.5733070348454964, "grad_norm": 0.35371261835098267, "learning_rate": 1.623068807280315e-05, "loss": 0.537, "step": 27032 }, { "epoch": 0.5733282433034294, "grad_norm": 0.3326999843120575, "learning_rate": 1.623042722029135e-05, "loss": 0.514, "step": 27033 }, { "epoch": 0.5733494517613624, "grad_norm": 0.33911752700805664, "learning_rate": 1.623016636085012e-05, "loss": 0.5195, "step": 27034 }, { "epoch": 0.5733706602192955, "grad_norm": 0.389409601688385, "learning_rate": 1.622990549447975e-05, "loss": 0.5345, "step": 27035 }, { "epoch": 0.5733918686772285, "grad_norm": 0.3459104597568512, "learning_rate": 1.622964462118054e-05, "loss": 0.5689, "step": 27036 }, { "epoch": 0.5734130771351615, "grad_norm": 0.34109199047088623, "learning_rate": 1.622938374095277e-05, "loss": 0.446, "step": 27037 }, { "epoch": 0.5734342855930945, "grad_norm": 0.36890044808387756, "learning_rate": 1.6229122853796736e-05, "loss": 0.5401, "step": 27038 }, { "epoch": 0.5734554940510276, "grad_norm": 0.3567301630973816, "learning_rate": 1.6228861959712724e-05, "loss": 0.5075, "step": 27039 }, { "epoch": 0.5734767025089605, "grad_norm": 0.35421276092529297, "learning_rate": 1.622860105870103e-05, "loss": 0.4911, "step": 27040 }, { "epoch": 0.5734979109668936, "grad_norm": 0.5509387254714966, "learning_rate": 1.6228340150761935e-05, "loss": 0.537, "step": 27041 }, { "epoch": 0.5735191194248266, "grad_norm": 0.40272751450538635, "learning_rate": 1.6228079235895732e-05, "loss": 0.4957, "step": 27042 }, { "epoch": 0.5735403278827597, "grad_norm": 0.39640378952026367, "learning_rate": 1.622781831410272e-05, "loss": 0.4497, "step": 27043 }, { "epoch": 0.5735615363406926, "grad_norm": 0.33819398283958435, "learning_rate": 1.622755738538318e-05, "loss": 0.4705, "step": 27044 }, { "epoch": 0.5735827447986257, "grad_norm": 0.6400256752967834, "learning_rate": 1.6227296449737404e-05, "loss": 0.4412, "step": 27045 }, { "epoch": 0.5736039532565587, "grad_norm": 0.41791728138923645, "learning_rate": 1.622703550716568e-05, "loss": 0.4713, "step": 27046 }, { "epoch": 0.5736251617144917, "grad_norm": 0.38446760177612305, "learning_rate": 1.6226774557668306e-05, "loss": 0.4652, "step": 27047 }, { "epoch": 0.5736463701724248, "grad_norm": 0.3486664891242981, "learning_rate": 1.6226513601245565e-05, "loss": 0.4903, "step": 27048 }, { "epoch": 0.5736675786303578, "grad_norm": 0.3463554382324219, "learning_rate": 1.622625263789775e-05, "loss": 0.4641, "step": 27049 }, { "epoch": 0.5736887870882909, "grad_norm": 0.40197423100471497, "learning_rate": 1.622599166762515e-05, "loss": 0.4968, "step": 27050 }, { "epoch": 0.5737099955462238, "grad_norm": 0.35084015130996704, "learning_rate": 1.622573069042806e-05, "loss": 0.5187, "step": 27051 }, { "epoch": 0.5737312040041569, "grad_norm": 0.3626212477684021, "learning_rate": 1.6225469706306766e-05, "loss": 0.5234, "step": 27052 }, { "epoch": 0.5737524124620899, "grad_norm": 0.31393861770629883, "learning_rate": 1.6225208715261557e-05, "loss": 0.3838, "step": 27053 }, { "epoch": 0.5737736209200229, "grad_norm": 0.34675532579421997, "learning_rate": 1.6224947717292724e-05, "loss": 0.42, "step": 27054 }, { "epoch": 0.5737948293779559, "grad_norm": 0.3712306618690491, "learning_rate": 1.6224686712400556e-05, "loss": 0.5276, "step": 27055 }, { "epoch": 0.573816037835889, "grad_norm": 0.44832146167755127, "learning_rate": 1.622442570058535e-05, "loss": 0.4662, "step": 27056 }, { "epoch": 0.5738372462938219, "grad_norm": 0.4085793197154999, "learning_rate": 1.622416468184739e-05, "loss": 0.4722, "step": 27057 }, { "epoch": 0.573858454751755, "grad_norm": 0.34125226736068726, "learning_rate": 1.622390365618697e-05, "loss": 0.4583, "step": 27058 }, { "epoch": 0.573879663209688, "grad_norm": 0.3588607609272003, "learning_rate": 1.6223642623604375e-05, "loss": 0.5176, "step": 27059 }, { "epoch": 0.573900871667621, "grad_norm": 0.40054821968078613, "learning_rate": 1.62233815840999e-05, "loss": 0.4775, "step": 27060 }, { "epoch": 0.5739220801255541, "grad_norm": 0.3577536642551422, "learning_rate": 1.6223120537673833e-05, "loss": 0.4421, "step": 27061 }, { "epoch": 0.5739432885834871, "grad_norm": 0.3460191488265991, "learning_rate": 1.6222859484326468e-05, "loss": 0.5298, "step": 27062 }, { "epoch": 0.5739644970414202, "grad_norm": 0.3989737033843994, "learning_rate": 1.6222598424058088e-05, "loss": 0.4929, "step": 27063 }, { "epoch": 0.5739857054993531, "grad_norm": 0.3944874107837677, "learning_rate": 1.6222337356868994e-05, "loss": 0.452, "step": 27064 }, { "epoch": 0.5740069139572862, "grad_norm": 0.34242939949035645, "learning_rate": 1.622207628275947e-05, "loss": 0.4375, "step": 27065 }, { "epoch": 0.5740281224152192, "grad_norm": 0.353714257478714, "learning_rate": 1.6221815201729806e-05, "loss": 0.4756, "step": 27066 }, { "epoch": 0.5740493308731522, "grad_norm": 0.4004068076610565, "learning_rate": 1.6221554113780287e-05, "loss": 0.5155, "step": 27067 }, { "epoch": 0.5740705393310852, "grad_norm": 0.35514068603515625, "learning_rate": 1.6221293018911218e-05, "loss": 0.4241, "step": 27068 }, { "epoch": 0.5740917477890183, "grad_norm": 0.37212854623794556, "learning_rate": 1.6221031917122875e-05, "loss": 0.5016, "step": 27069 }, { "epoch": 0.5741129562469512, "grad_norm": 0.30687105655670166, "learning_rate": 1.6220770808415557e-05, "loss": 0.4785, "step": 27070 }, { "epoch": 0.5741341647048843, "grad_norm": 0.4269905090332031, "learning_rate": 1.6220509692789555e-05, "loss": 0.4934, "step": 27071 }, { "epoch": 0.5741553731628173, "grad_norm": 0.3580463230609894, "learning_rate": 1.622024857024515e-05, "loss": 0.5099, "step": 27072 }, { "epoch": 0.5741765816207504, "grad_norm": 0.3682972192764282, "learning_rate": 1.6219987440782646e-05, "loss": 0.4675, "step": 27073 }, { "epoch": 0.5741977900786834, "grad_norm": 0.3441982865333557, "learning_rate": 1.6219726304402317e-05, "loss": 0.5002, "step": 27074 }, { "epoch": 0.5742189985366164, "grad_norm": 0.3577088415622711, "learning_rate": 1.6219465161104468e-05, "loss": 0.5656, "step": 27075 }, { "epoch": 0.5742402069945495, "grad_norm": 0.2994197905063629, "learning_rate": 1.621920401088938e-05, "loss": 0.3975, "step": 27076 }, { "epoch": 0.5742614154524824, "grad_norm": 0.44270917773246765, "learning_rate": 1.6218942853757352e-05, "loss": 0.5715, "step": 27077 }, { "epoch": 0.5742826239104155, "grad_norm": 0.38796159625053406, "learning_rate": 1.621868168970867e-05, "loss": 0.4845, "step": 27078 }, { "epoch": 0.5743038323683485, "grad_norm": 0.4063289165496826, "learning_rate": 1.6218420518743623e-05, "loss": 0.5355, "step": 27079 }, { "epoch": 0.5743250408262816, "grad_norm": 0.40439459681510925, "learning_rate": 1.62181593408625e-05, "loss": 0.5056, "step": 27080 }, { "epoch": 0.5743462492842145, "grad_norm": 0.3320305645465851, "learning_rate": 1.6217898156065594e-05, "loss": 0.4684, "step": 27081 }, { "epoch": 0.5743674577421476, "grad_norm": 0.4087488651275635, "learning_rate": 1.6217636964353198e-05, "loss": 0.41, "step": 27082 }, { "epoch": 0.5743886662000806, "grad_norm": 0.4305048882961273, "learning_rate": 1.62173757657256e-05, "loss": 0.4814, "step": 27083 }, { "epoch": 0.5744098746580136, "grad_norm": 0.37408381700515747, "learning_rate": 1.621711456018309e-05, "loss": 0.4289, "step": 27084 }, { "epoch": 0.5744310831159466, "grad_norm": 0.3332462012767792, "learning_rate": 1.6216853347725963e-05, "loss": 0.5443, "step": 27085 }, { "epoch": 0.5744522915738797, "grad_norm": 0.3385337293148041, "learning_rate": 1.62165921283545e-05, "loss": 0.4579, "step": 27086 }, { "epoch": 0.5744735000318126, "grad_norm": 0.329593688249588, "learning_rate": 1.6216330902069e-05, "loss": 0.3882, "step": 27087 }, { "epoch": 0.5744947084897457, "grad_norm": 0.3308012783527374, "learning_rate": 1.6216069668869752e-05, "loss": 0.5192, "step": 27088 }, { "epoch": 0.5745159169476788, "grad_norm": 0.3466686010360718, "learning_rate": 1.6215808428757043e-05, "loss": 0.5145, "step": 27089 }, { "epoch": 0.5745371254056117, "grad_norm": 0.3988005816936493, "learning_rate": 1.6215547181731166e-05, "loss": 0.5394, "step": 27090 }, { "epoch": 0.5745583338635448, "grad_norm": 0.42096662521362305, "learning_rate": 1.6215285927792412e-05, "loss": 0.4867, "step": 27091 }, { "epoch": 0.5745795423214778, "grad_norm": 0.33786723017692566, "learning_rate": 1.6215024666941073e-05, "loss": 0.4084, "step": 27092 }, { "epoch": 0.5746007507794109, "grad_norm": 0.3360700011253357, "learning_rate": 1.6214763399177434e-05, "loss": 0.5117, "step": 27093 }, { "epoch": 0.5746219592373438, "grad_norm": 0.3359532058238983, "learning_rate": 1.621450212450179e-05, "loss": 0.4207, "step": 27094 }, { "epoch": 0.5746431676952769, "grad_norm": 0.3224400281906128, "learning_rate": 1.6214240842914433e-05, "loss": 0.5044, "step": 27095 }, { "epoch": 0.5746643761532099, "grad_norm": 0.33269742131233215, "learning_rate": 1.621397955441565e-05, "loss": 0.5007, "step": 27096 }, { "epoch": 0.5746855846111429, "grad_norm": 0.36116158962249756, "learning_rate": 1.6213718259005734e-05, "loss": 0.5901, "step": 27097 }, { "epoch": 0.5747067930690759, "grad_norm": 0.3588195741176605, "learning_rate": 1.621345695668497e-05, "loss": 0.5135, "step": 27098 }, { "epoch": 0.574728001527009, "grad_norm": 0.41506513953208923, "learning_rate": 1.6213195647453657e-05, "loss": 0.4456, "step": 27099 }, { "epoch": 0.5747492099849419, "grad_norm": 0.34938547015190125, "learning_rate": 1.621293433131208e-05, "loss": 0.4883, "step": 27100 }, { "epoch": 0.574770418442875, "grad_norm": 0.36737126111984253, "learning_rate": 1.6212673008260534e-05, "loss": 0.5904, "step": 27101 }, { "epoch": 0.5747916269008081, "grad_norm": 0.3533090353012085, "learning_rate": 1.6212411678299306e-05, "loss": 0.5095, "step": 27102 }, { "epoch": 0.5748128353587411, "grad_norm": 0.34568798542022705, "learning_rate": 1.6212150341428688e-05, "loss": 0.444, "step": 27103 }, { "epoch": 0.5748340438166741, "grad_norm": 0.3729036748409271, "learning_rate": 1.621188899764897e-05, "loss": 0.5001, "step": 27104 }, { "epoch": 0.5748552522746071, "grad_norm": 0.3244538903236389, "learning_rate": 1.621162764696044e-05, "loss": 0.5129, "step": 27105 }, { "epoch": 0.5748764607325402, "grad_norm": 0.29344409704208374, "learning_rate": 1.6211366289363394e-05, "loss": 0.4443, "step": 27106 }, { "epoch": 0.5748976691904731, "grad_norm": 0.3817843794822693, "learning_rate": 1.621110492485812e-05, "loss": 0.5657, "step": 27107 }, { "epoch": 0.5749188776484062, "grad_norm": 0.35588645935058594, "learning_rate": 1.621084355344491e-05, "loss": 0.5539, "step": 27108 }, { "epoch": 0.5749400861063392, "grad_norm": 0.32765087485313416, "learning_rate": 1.6210582175124056e-05, "loss": 0.464, "step": 27109 }, { "epoch": 0.5749612945642723, "grad_norm": 1.1979044675827026, "learning_rate": 1.6210320789895843e-05, "loss": 0.4511, "step": 27110 }, { "epoch": 0.5749825030222052, "grad_norm": 0.34256210923194885, "learning_rate": 1.621005939776057e-05, "loss": 0.4838, "step": 27111 }, { "epoch": 0.5750037114801383, "grad_norm": 0.3497268259525299, "learning_rate": 1.6209797998718517e-05, "loss": 0.5403, "step": 27112 }, { "epoch": 0.5750249199380713, "grad_norm": 0.38666075468063354, "learning_rate": 1.620953659276998e-05, "loss": 0.4186, "step": 27113 }, { "epoch": 0.5750461283960043, "grad_norm": 0.32176530361175537, "learning_rate": 1.6209275179915255e-05, "loss": 0.538, "step": 27114 }, { "epoch": 0.5750673368539374, "grad_norm": 0.34072452783584595, "learning_rate": 1.6209013760154627e-05, "loss": 0.4928, "step": 27115 }, { "epoch": 0.5750885453118704, "grad_norm": 0.3324896991252899, "learning_rate": 1.6208752333488386e-05, "loss": 0.4526, "step": 27116 }, { "epoch": 0.5751097537698034, "grad_norm": 0.3898807764053345, "learning_rate": 1.6208490899916828e-05, "loss": 0.4979, "step": 27117 }, { "epoch": 0.5751309622277364, "grad_norm": 0.38753074407577515, "learning_rate": 1.6208229459440236e-05, "loss": 0.5292, "step": 27118 }, { "epoch": 0.5751521706856695, "grad_norm": 0.34636494517326355, "learning_rate": 1.6207968012058907e-05, "loss": 0.5471, "step": 27119 }, { "epoch": 0.5751733791436024, "grad_norm": 0.42880144715309143, "learning_rate": 1.6207706557773127e-05, "loss": 0.478, "step": 27120 }, { "epoch": 0.5751945876015355, "grad_norm": 0.5098500847816467, "learning_rate": 1.6207445096583196e-05, "loss": 0.5259, "step": 27121 }, { "epoch": 0.5752157960594685, "grad_norm": 0.32385319471359253, "learning_rate": 1.6207183628489394e-05, "loss": 0.4325, "step": 27122 }, { "epoch": 0.5752370045174016, "grad_norm": 0.42422300577163696, "learning_rate": 1.620692215349202e-05, "loss": 0.6076, "step": 27123 }, { "epoch": 0.5752582129753345, "grad_norm": 0.40140867233276367, "learning_rate": 1.6206660671591356e-05, "loss": 0.5603, "step": 27124 }, { "epoch": 0.5752794214332676, "grad_norm": 0.33361008763313293, "learning_rate": 1.62063991827877e-05, "loss": 0.5152, "step": 27125 }, { "epoch": 0.5753006298912006, "grad_norm": 0.3493604362010956, "learning_rate": 1.620613768708134e-05, "loss": 0.5002, "step": 27126 }, { "epoch": 0.5753218383491336, "grad_norm": 0.32417336106300354, "learning_rate": 1.6205876184472573e-05, "loss": 0.4529, "step": 27127 }, { "epoch": 0.5753430468070666, "grad_norm": 0.4029470682144165, "learning_rate": 1.6205614674961675e-05, "loss": 0.5306, "step": 27128 }, { "epoch": 0.5753642552649997, "grad_norm": 0.3643171191215515, "learning_rate": 1.6205353158548956e-05, "loss": 0.5251, "step": 27129 }, { "epoch": 0.5753854637229328, "grad_norm": 0.37637436389923096, "learning_rate": 1.620509163523469e-05, "loss": 0.5323, "step": 27130 }, { "epoch": 0.5754066721808657, "grad_norm": 0.3271796405315399, "learning_rate": 1.6204830105019176e-05, "loss": 0.5065, "step": 27131 }, { "epoch": 0.5754278806387988, "grad_norm": 0.3516424894332886, "learning_rate": 1.620456856790271e-05, "loss": 0.4708, "step": 27132 }, { "epoch": 0.5754490890967318, "grad_norm": 0.36536386609077454, "learning_rate": 1.620430702388557e-05, "loss": 0.5479, "step": 27133 }, { "epoch": 0.5754702975546648, "grad_norm": 0.3684649169445038, "learning_rate": 1.6204045472968055e-05, "loss": 0.5788, "step": 27134 }, { "epoch": 0.5754915060125978, "grad_norm": 0.339604914188385, "learning_rate": 1.6203783915150455e-05, "loss": 0.5288, "step": 27135 }, { "epoch": 0.5755127144705309, "grad_norm": 0.3535606265068054, "learning_rate": 1.6203522350433063e-05, "loss": 0.5882, "step": 27136 }, { "epoch": 0.5755339229284638, "grad_norm": 0.4318191409111023, "learning_rate": 1.6203260778816165e-05, "loss": 0.5218, "step": 27137 }, { "epoch": 0.5755551313863969, "grad_norm": 0.400968074798584, "learning_rate": 1.620299920030005e-05, "loss": 0.5549, "step": 27138 }, { "epoch": 0.5755763398443299, "grad_norm": 0.3474178910255432, "learning_rate": 1.620273761488502e-05, "loss": 0.5358, "step": 27139 }, { "epoch": 0.575597548302263, "grad_norm": 0.35568317770957947, "learning_rate": 1.6202476022571355e-05, "loss": 0.4911, "step": 27140 }, { "epoch": 0.5756187567601959, "grad_norm": 0.3359229862689972, "learning_rate": 1.6202214423359356e-05, "loss": 0.5269, "step": 27141 }, { "epoch": 0.575639965218129, "grad_norm": 0.3486461341381073, "learning_rate": 1.62019528172493e-05, "loss": 0.537, "step": 27142 }, { "epoch": 0.5756611736760621, "grad_norm": 0.32127687335014343, "learning_rate": 1.6201691204241493e-05, "loss": 0.4788, "step": 27143 }, { "epoch": 0.575682382133995, "grad_norm": 0.36483141779899597, "learning_rate": 1.6201429584336212e-05, "loss": 0.4611, "step": 27144 }, { "epoch": 0.5757035905919281, "grad_norm": 0.6920483708381653, "learning_rate": 1.620116795753376e-05, "loss": 0.5003, "step": 27145 }, { "epoch": 0.5757247990498611, "grad_norm": 0.44396036863327026, "learning_rate": 1.620090632383442e-05, "loss": 0.9827, "step": 27146 }, { "epoch": 0.5757460075077941, "grad_norm": 0.3394852578639984, "learning_rate": 1.620064468323849e-05, "loss": 0.4306, "step": 27147 }, { "epoch": 0.5757672159657271, "grad_norm": 0.310519278049469, "learning_rate": 1.6200383035746252e-05, "loss": 0.4747, "step": 27148 }, { "epoch": 0.5757884244236602, "grad_norm": 0.37947317957878113, "learning_rate": 1.6200121381358002e-05, "loss": 0.4317, "step": 27149 }, { "epoch": 0.5758096328815931, "grad_norm": 0.4148377478122711, "learning_rate": 1.6199859720074033e-05, "loss": 0.5804, "step": 27150 }, { "epoch": 0.5758308413395262, "grad_norm": 0.35345134139060974, "learning_rate": 1.6199598051894633e-05, "loss": 0.4805, "step": 27151 }, { "epoch": 0.5758520497974592, "grad_norm": 0.35219553112983704, "learning_rate": 1.6199336376820097e-05, "loss": 0.5004, "step": 27152 }, { "epoch": 0.5758732582553923, "grad_norm": 0.4237208366394043, "learning_rate": 1.619907469485071e-05, "loss": 0.4213, "step": 27153 }, { "epoch": 0.5758944667133252, "grad_norm": 0.37891146540641785, "learning_rate": 1.6198813005986765e-05, "loss": 0.5289, "step": 27154 }, { "epoch": 0.5759156751712583, "grad_norm": 0.4032444357872009, "learning_rate": 1.6198551310228555e-05, "loss": 0.5731, "step": 27155 }, { "epoch": 0.5759368836291914, "grad_norm": 0.5268189907073975, "learning_rate": 1.619828960757637e-05, "loss": 0.4717, "step": 27156 }, { "epoch": 0.5759580920871243, "grad_norm": 0.47655701637268066, "learning_rate": 1.61980278980305e-05, "loss": 0.5647, "step": 27157 }, { "epoch": 0.5759793005450574, "grad_norm": 0.37983134388923645, "learning_rate": 1.6197766181591243e-05, "loss": 0.5236, "step": 27158 }, { "epoch": 0.5760005090029904, "grad_norm": 0.3858718276023865, "learning_rate": 1.6197504458258878e-05, "loss": 0.4413, "step": 27159 }, { "epoch": 0.5760217174609235, "grad_norm": 0.5033395886421204, "learning_rate": 1.6197242728033705e-05, "loss": 0.5419, "step": 27160 }, { "epoch": 0.5760429259188564, "grad_norm": 0.38039740920066833, "learning_rate": 1.6196980990916014e-05, "loss": 0.5018, "step": 27161 }, { "epoch": 0.5760641343767895, "grad_norm": 0.35860928893089294, "learning_rate": 1.619671924690609e-05, "loss": 0.4641, "step": 27162 }, { "epoch": 0.5760853428347225, "grad_norm": 0.3722129464149475, "learning_rate": 1.619645749600423e-05, "loss": 0.5317, "step": 27163 }, { "epoch": 0.5761065512926555, "grad_norm": 1.7987693548202515, "learning_rate": 1.6196195738210727e-05, "loss": 0.5495, "step": 27164 }, { "epoch": 0.5761277597505885, "grad_norm": 0.3294881284236908, "learning_rate": 1.6195933973525865e-05, "loss": 0.5208, "step": 27165 }, { "epoch": 0.5761489682085216, "grad_norm": 0.3363792896270752, "learning_rate": 1.6195672201949942e-05, "loss": 0.3996, "step": 27166 }, { "epoch": 0.5761701766664545, "grad_norm": 0.3620012402534485, "learning_rate": 1.6195410423483248e-05, "loss": 0.5252, "step": 27167 }, { "epoch": 0.5761913851243876, "grad_norm": 0.3602891266345978, "learning_rate": 1.619514863812607e-05, "loss": 0.4161, "step": 27168 }, { "epoch": 0.5762125935823206, "grad_norm": 0.357516884803772, "learning_rate": 1.61948868458787e-05, "loss": 0.5094, "step": 27169 }, { "epoch": 0.5762338020402537, "grad_norm": 0.34818506240844727, "learning_rate": 1.619462504674143e-05, "loss": 0.5493, "step": 27170 }, { "epoch": 0.5762550104981867, "grad_norm": 0.38556361198425293, "learning_rate": 1.6194363240714554e-05, "loss": 0.4592, "step": 27171 }, { "epoch": 0.5762762189561197, "grad_norm": 0.3921844959259033, "learning_rate": 1.619410142779836e-05, "loss": 0.4504, "step": 27172 }, { "epoch": 0.5762974274140528, "grad_norm": 0.30616578459739685, "learning_rate": 1.6193839607993142e-05, "loss": 0.4467, "step": 27173 }, { "epoch": 0.5763186358719857, "grad_norm": 0.4081217348575592, "learning_rate": 1.6193577781299186e-05, "loss": 0.5691, "step": 27174 }, { "epoch": 0.5763398443299188, "grad_norm": 0.43073228001594543, "learning_rate": 1.619331594771679e-05, "loss": 0.5442, "step": 27175 }, { "epoch": 0.5763610527878518, "grad_norm": 0.366393119096756, "learning_rate": 1.619305410724624e-05, "loss": 0.485, "step": 27176 }, { "epoch": 0.5763822612457848, "grad_norm": 0.3623964488506317, "learning_rate": 1.619279225988783e-05, "loss": 0.4495, "step": 27177 }, { "epoch": 0.5764034697037178, "grad_norm": 0.34678301215171814, "learning_rate": 1.6192530405641852e-05, "loss": 0.466, "step": 27178 }, { "epoch": 0.5764246781616509, "grad_norm": 0.31504327058792114, "learning_rate": 1.619226854450859e-05, "loss": 0.568, "step": 27179 }, { "epoch": 0.5764458866195838, "grad_norm": 0.33828607201576233, "learning_rate": 1.6192006676488345e-05, "loss": 0.4228, "step": 27180 }, { "epoch": 0.5764670950775169, "grad_norm": 0.3638005554676056, "learning_rate": 1.6191744801581406e-05, "loss": 0.5089, "step": 27181 }, { "epoch": 0.5764883035354499, "grad_norm": 0.3821314871311188, "learning_rate": 1.6191482919788056e-05, "loss": 0.522, "step": 27182 }, { "epoch": 0.576509511993383, "grad_norm": 0.3449125289916992, "learning_rate": 1.6191221031108595e-05, "loss": 0.5042, "step": 27183 }, { "epoch": 0.576530720451316, "grad_norm": 0.33852776885032654, "learning_rate": 1.6190959135543313e-05, "loss": 0.5366, "step": 27184 }, { "epoch": 0.576551928909249, "grad_norm": 0.6746240258216858, "learning_rate": 1.6190697233092502e-05, "loss": 0.5127, "step": 27185 }, { "epoch": 0.5765731373671821, "grad_norm": 0.41831496357917786, "learning_rate": 1.6190435323756446e-05, "loss": 0.5923, "step": 27186 }, { "epoch": 0.576594345825115, "grad_norm": 0.42434629797935486, "learning_rate": 1.6190173407535444e-05, "loss": 0.5607, "step": 27187 }, { "epoch": 0.5766155542830481, "grad_norm": 0.3863242566585541, "learning_rate": 1.6189911484429787e-05, "loss": 0.4871, "step": 27188 }, { "epoch": 0.5766367627409811, "grad_norm": 0.6245601177215576, "learning_rate": 1.618964955443976e-05, "loss": 0.4261, "step": 27189 }, { "epoch": 0.5766579711989142, "grad_norm": 0.3192826807498932, "learning_rate": 1.6189387617565663e-05, "loss": 0.4475, "step": 27190 }, { "epoch": 0.5766791796568471, "grad_norm": 0.3857452869415283, "learning_rate": 1.6189125673807783e-05, "loss": 0.506, "step": 27191 }, { "epoch": 0.5767003881147802, "grad_norm": 0.44249311089515686, "learning_rate": 1.618886372316641e-05, "loss": 0.4473, "step": 27192 }, { "epoch": 0.5767215965727132, "grad_norm": 0.36909687519073486, "learning_rate": 1.6188601765641835e-05, "loss": 0.4402, "step": 27193 }, { "epoch": 0.5767428050306462, "grad_norm": 0.3466953933238983, "learning_rate": 1.618833980123435e-05, "loss": 0.4293, "step": 27194 }, { "epoch": 0.5767640134885792, "grad_norm": 0.3414202630519867, "learning_rate": 1.618807782994425e-05, "loss": 0.4719, "step": 27195 }, { "epoch": 0.5767852219465123, "grad_norm": 0.2978041172027588, "learning_rate": 1.618781585177182e-05, "loss": 0.3972, "step": 27196 }, { "epoch": 0.5768064304044453, "grad_norm": 0.3362840712070465, "learning_rate": 1.6187553866717358e-05, "loss": 0.5401, "step": 27197 }, { "epoch": 0.5768276388623783, "grad_norm": 0.36503008008003235, "learning_rate": 1.6187291874781152e-05, "loss": 0.5901, "step": 27198 }, { "epoch": 0.5768488473203114, "grad_norm": 0.31894081830978394, "learning_rate": 1.6187029875963495e-05, "loss": 0.4647, "step": 27199 }, { "epoch": 0.5768700557782444, "grad_norm": 0.39831504225730896, "learning_rate": 1.6186767870264674e-05, "loss": 0.5372, "step": 27200 }, { "epoch": 0.5768912642361774, "grad_norm": 0.35347452759742737, "learning_rate": 1.6186505857684985e-05, "loss": 0.4583, "step": 27201 }, { "epoch": 0.5769124726941104, "grad_norm": 0.3217483162879944, "learning_rate": 1.6186243838224718e-05, "loss": 0.4403, "step": 27202 }, { "epoch": 0.5769336811520435, "grad_norm": 0.3392775058746338, "learning_rate": 1.6185981811884165e-05, "loss": 0.5131, "step": 27203 }, { "epoch": 0.5769548896099764, "grad_norm": 0.36384710669517517, "learning_rate": 1.6185719778663614e-05, "loss": 0.5489, "step": 27204 }, { "epoch": 0.5769760980679095, "grad_norm": 0.37672606110572815, "learning_rate": 1.618545773856336e-05, "loss": 0.4919, "step": 27205 }, { "epoch": 0.5769973065258425, "grad_norm": 0.3639872968196869, "learning_rate": 1.61851956915837e-05, "loss": 0.4039, "step": 27206 }, { "epoch": 0.5770185149837755, "grad_norm": 0.3675149977207184, "learning_rate": 1.6184933637724914e-05, "loss": 0.489, "step": 27207 }, { "epoch": 0.5770397234417085, "grad_norm": 0.34529635310173035, "learning_rate": 1.6184671576987295e-05, "loss": 0.5094, "step": 27208 }, { "epoch": 0.5770609318996416, "grad_norm": 0.3629785180091858, "learning_rate": 1.6184409509371142e-05, "loss": 0.5208, "step": 27209 }, { "epoch": 0.5770821403575745, "grad_norm": 0.34699106216430664, "learning_rate": 1.6184147434876742e-05, "loss": 0.4907, "step": 27210 }, { "epoch": 0.5771033488155076, "grad_norm": 0.3608332872390747, "learning_rate": 1.6183885353504388e-05, "loss": 0.5073, "step": 27211 }, { "epoch": 0.5771245572734407, "grad_norm": 0.38567617535591125, "learning_rate": 1.618362326525437e-05, "loss": 0.47, "step": 27212 }, { "epoch": 0.5771457657313737, "grad_norm": 0.32393980026245117, "learning_rate": 1.618336117012698e-05, "loss": 0.4477, "step": 27213 }, { "epoch": 0.5771669741893067, "grad_norm": 0.3534506559371948, "learning_rate": 1.6183099068122508e-05, "loss": 0.4975, "step": 27214 }, { "epoch": 0.5771881826472397, "grad_norm": 0.33575958013534546, "learning_rate": 1.6182836959241247e-05, "loss": 0.4439, "step": 27215 }, { "epoch": 0.5772093911051728, "grad_norm": 0.34360378980636597, "learning_rate": 1.6182574843483488e-05, "loss": 0.4631, "step": 27216 }, { "epoch": 0.5772305995631057, "grad_norm": 0.37244755029678345, "learning_rate": 1.6182312720849526e-05, "loss": 0.4662, "step": 27217 }, { "epoch": 0.5772518080210388, "grad_norm": 0.3397839367389679, "learning_rate": 1.6182050591339645e-05, "loss": 0.4957, "step": 27218 }, { "epoch": 0.5772730164789718, "grad_norm": 0.3517977297306061, "learning_rate": 1.6181788454954148e-05, "loss": 0.5012, "step": 27219 }, { "epoch": 0.5772942249369049, "grad_norm": 0.36508336663246155, "learning_rate": 1.6181526311693314e-05, "loss": 0.5816, "step": 27220 }, { "epoch": 0.5773154333948378, "grad_norm": 0.3443961441516876, "learning_rate": 1.618126416155744e-05, "loss": 0.5284, "step": 27221 }, { "epoch": 0.5773366418527709, "grad_norm": 0.4336271584033966, "learning_rate": 1.6181002004546817e-05, "loss": 0.4416, "step": 27222 }, { "epoch": 0.5773578503107039, "grad_norm": 0.381782203912735, "learning_rate": 1.6180739840661737e-05, "loss": 0.4467, "step": 27223 }, { "epoch": 0.5773790587686369, "grad_norm": 0.36042603850364685, "learning_rate": 1.6180477669902497e-05, "loss": 0.5611, "step": 27224 }, { "epoch": 0.57740026722657, "grad_norm": 0.3792380690574646, "learning_rate": 1.618021549226938e-05, "loss": 0.4763, "step": 27225 }, { "epoch": 0.577421475684503, "grad_norm": 0.3571057915687561, "learning_rate": 1.6179953307762685e-05, "loss": 0.464, "step": 27226 }, { "epoch": 0.577442684142436, "grad_norm": 0.3159539997577667, "learning_rate": 1.6179691116382695e-05, "loss": 0.4435, "step": 27227 }, { "epoch": 0.577463892600369, "grad_norm": 0.37793564796447754, "learning_rate": 1.6179428918129704e-05, "loss": 0.5334, "step": 27228 }, { "epoch": 0.5774851010583021, "grad_norm": 0.35633793473243713, "learning_rate": 1.617916671300401e-05, "loss": 0.5431, "step": 27229 }, { "epoch": 0.577506309516235, "grad_norm": 0.34367215633392334, "learning_rate": 1.61789045010059e-05, "loss": 0.4906, "step": 27230 }, { "epoch": 0.5775275179741681, "grad_norm": 0.34282225370407104, "learning_rate": 1.617864228213567e-05, "loss": 0.4933, "step": 27231 }, { "epoch": 0.5775487264321011, "grad_norm": 0.3835925757884979, "learning_rate": 1.61783800563936e-05, "loss": 0.5407, "step": 27232 }, { "epoch": 0.5775699348900342, "grad_norm": 0.32216450572013855, "learning_rate": 1.6178117823779995e-05, "loss": 0.5076, "step": 27233 }, { "epoch": 0.5775911433479671, "grad_norm": 0.34297749400138855, "learning_rate": 1.6177855584295135e-05, "loss": 0.4159, "step": 27234 }, { "epoch": 0.5776123518059002, "grad_norm": 0.3490377366542816, "learning_rate": 1.6177593337939322e-05, "loss": 0.5543, "step": 27235 }, { "epoch": 0.5776335602638332, "grad_norm": 0.3607751131057739, "learning_rate": 1.6177331084712842e-05, "loss": 0.5084, "step": 27236 }, { "epoch": 0.5776547687217662, "grad_norm": 0.3409869074821472, "learning_rate": 1.617706882461599e-05, "loss": 0.4773, "step": 27237 }, { "epoch": 0.5776759771796993, "grad_norm": 0.3847016990184784, "learning_rate": 1.6176806557649057e-05, "loss": 0.4711, "step": 27238 }, { "epoch": 0.5776971856376323, "grad_norm": 0.3451760411262512, "learning_rate": 1.617654428381233e-05, "loss": 0.5431, "step": 27239 }, { "epoch": 0.5777183940955654, "grad_norm": 0.3820507228374481, "learning_rate": 1.6176282003106104e-05, "loss": 0.5688, "step": 27240 }, { "epoch": 0.5777396025534983, "grad_norm": 0.3733289837837219, "learning_rate": 1.6176019715530673e-05, "loss": 0.5075, "step": 27241 }, { "epoch": 0.5777608110114314, "grad_norm": 0.6423176527023315, "learning_rate": 1.6175757421086327e-05, "loss": 0.464, "step": 27242 }, { "epoch": 0.5777820194693644, "grad_norm": 0.3259766399860382, "learning_rate": 1.6175495119773353e-05, "loss": 0.4906, "step": 27243 }, { "epoch": 0.5778032279272974, "grad_norm": 0.36327725648880005, "learning_rate": 1.6175232811592053e-05, "loss": 0.5442, "step": 27244 }, { "epoch": 0.5778244363852304, "grad_norm": 0.35158872604370117, "learning_rate": 1.617497049654271e-05, "loss": 0.5319, "step": 27245 }, { "epoch": 0.5778456448431635, "grad_norm": 0.3544219732284546, "learning_rate": 1.6174708174625618e-05, "loss": 0.4697, "step": 27246 }, { "epoch": 0.5778668533010964, "grad_norm": 0.36196252703666687, "learning_rate": 1.6174445845841065e-05, "loss": 0.5089, "step": 27247 }, { "epoch": 0.5778880617590295, "grad_norm": 0.3721238672733307, "learning_rate": 1.6174183510189352e-05, "loss": 0.4672, "step": 27248 }, { "epoch": 0.5779092702169625, "grad_norm": 0.5887489914894104, "learning_rate": 1.6173921167670767e-05, "loss": 0.4769, "step": 27249 }, { "epoch": 0.5779304786748956, "grad_norm": 0.48891931772232056, "learning_rate": 1.6173658818285597e-05, "loss": 0.5331, "step": 27250 }, { "epoch": 0.5779516871328285, "grad_norm": 0.3832511007785797, "learning_rate": 1.6173396462034137e-05, "loss": 0.5103, "step": 27251 }, { "epoch": 0.5779728955907616, "grad_norm": 0.3356964588165283, "learning_rate": 1.6173134098916683e-05, "loss": 0.4787, "step": 27252 }, { "epoch": 0.5779941040486947, "grad_norm": 0.3768174648284912, "learning_rate": 1.6172871728933518e-05, "loss": 0.4886, "step": 27253 }, { "epoch": 0.5780153125066276, "grad_norm": 0.3908938467502594, "learning_rate": 1.617260935208494e-05, "loss": 0.5287, "step": 27254 }, { "epoch": 0.5780365209645607, "grad_norm": 0.31922054290771484, "learning_rate": 1.6172346968371244e-05, "loss": 0.4406, "step": 27255 }, { "epoch": 0.5780577294224937, "grad_norm": 0.378336638212204, "learning_rate": 1.6172084577792714e-05, "loss": 0.488, "step": 27256 }, { "epoch": 0.5780789378804267, "grad_norm": 0.3087892234325409, "learning_rate": 1.6171822180349645e-05, "loss": 0.496, "step": 27257 }, { "epoch": 0.5781001463383597, "grad_norm": 0.3574196696281433, "learning_rate": 1.617155977604233e-05, "loss": 0.4926, "step": 27258 }, { "epoch": 0.5781213547962928, "grad_norm": 0.44687604904174805, "learning_rate": 1.617129736487106e-05, "loss": 0.5313, "step": 27259 }, { "epoch": 0.5781425632542257, "grad_norm": 0.38165390491485596, "learning_rate": 1.6171034946836127e-05, "loss": 0.4533, "step": 27260 }, { "epoch": 0.5781637717121588, "grad_norm": 0.3698848485946655, "learning_rate": 1.6170772521937817e-05, "loss": 0.4883, "step": 27261 }, { "epoch": 0.5781849801700918, "grad_norm": 0.38030847907066345, "learning_rate": 1.6170510090176433e-05, "loss": 0.5229, "step": 27262 }, { "epoch": 0.5782061886280249, "grad_norm": 0.32914257049560547, "learning_rate": 1.6170247651552263e-05, "loss": 0.5124, "step": 27263 }, { "epoch": 0.5782273970859578, "grad_norm": 0.37948867678642273, "learning_rate": 1.616998520606559e-05, "loss": 0.4896, "step": 27264 }, { "epoch": 0.5782486055438909, "grad_norm": 0.3342263996601105, "learning_rate": 1.616972275371672e-05, "loss": 0.4586, "step": 27265 }, { "epoch": 0.578269814001824, "grad_norm": 0.36024945974349976, "learning_rate": 1.6169460294505935e-05, "loss": 0.505, "step": 27266 }, { "epoch": 0.5782910224597569, "grad_norm": 0.8067654967308044, "learning_rate": 1.616919782843353e-05, "loss": 0.436, "step": 27267 }, { "epoch": 0.57831223091769, "grad_norm": 0.4826241135597229, "learning_rate": 1.61689353554998e-05, "loss": 0.3885, "step": 27268 }, { "epoch": 0.578333439375623, "grad_norm": 0.42124319076538086, "learning_rate": 1.6168672875705026e-05, "loss": 0.5569, "step": 27269 }, { "epoch": 0.5783546478335561, "grad_norm": 0.4241694211959839, "learning_rate": 1.6168410389049514e-05, "loss": 0.4669, "step": 27270 }, { "epoch": 0.578375856291489, "grad_norm": 0.34273800253868103, "learning_rate": 1.6168147895533546e-05, "loss": 0.4794, "step": 27271 }, { "epoch": 0.5783970647494221, "grad_norm": 0.34538906812667847, "learning_rate": 1.6167885395157418e-05, "loss": 0.5076, "step": 27272 }, { "epoch": 0.5784182732073551, "grad_norm": 0.3790726959705353, "learning_rate": 1.6167622887921424e-05, "loss": 0.5019, "step": 27273 }, { "epoch": 0.5784394816652881, "grad_norm": 0.3753429651260376, "learning_rate": 1.6167360373825853e-05, "loss": 0.516, "step": 27274 }, { "epoch": 0.5784606901232211, "grad_norm": 0.34570273756980896, "learning_rate": 1.6167097852870998e-05, "loss": 0.3846, "step": 27275 }, { "epoch": 0.5784818985811542, "grad_norm": 0.34052759408950806, "learning_rate": 1.6166835325057145e-05, "loss": 0.5514, "step": 27276 }, { "epoch": 0.5785031070390871, "grad_norm": 0.38694050908088684, "learning_rate": 1.6166572790384595e-05, "loss": 0.5238, "step": 27277 }, { "epoch": 0.5785243154970202, "grad_norm": 0.38529738783836365, "learning_rate": 1.6166310248853637e-05, "loss": 0.4677, "step": 27278 }, { "epoch": 0.5785455239549533, "grad_norm": 0.38553646206855774, "learning_rate": 1.616604770046456e-05, "loss": 0.5537, "step": 27279 }, { "epoch": 0.5785667324128863, "grad_norm": 0.49772390723228455, "learning_rate": 1.6165785145217662e-05, "loss": 0.4673, "step": 27280 }, { "epoch": 0.5785879408708193, "grad_norm": 0.3568219244480133, "learning_rate": 1.616552258311323e-05, "loss": 0.5899, "step": 27281 }, { "epoch": 0.5786091493287523, "grad_norm": 0.3272700905799866, "learning_rate": 1.6165260014151555e-05, "loss": 0.4463, "step": 27282 }, { "epoch": 0.5786303577866854, "grad_norm": 0.3515302538871765, "learning_rate": 1.6164997438332934e-05, "loss": 0.5551, "step": 27283 }, { "epoch": 0.5786515662446183, "grad_norm": 0.7305037379264832, "learning_rate": 1.6164734855657655e-05, "loss": 0.4344, "step": 27284 }, { "epoch": 0.5786727747025514, "grad_norm": 0.3616562485694885, "learning_rate": 1.6164472266126013e-05, "loss": 0.5229, "step": 27285 }, { "epoch": 0.5786939831604844, "grad_norm": 0.34471797943115234, "learning_rate": 1.6164209669738297e-05, "loss": 0.5171, "step": 27286 }, { "epoch": 0.5787151916184174, "grad_norm": 0.3665022552013397, "learning_rate": 1.6163947066494802e-05, "loss": 0.5124, "step": 27287 }, { "epoch": 0.5787364000763504, "grad_norm": 0.3271990120410919, "learning_rate": 1.6163684456395815e-05, "loss": 0.4202, "step": 27288 }, { "epoch": 0.5787576085342835, "grad_norm": 0.3429848849773407, "learning_rate": 1.6163421839441634e-05, "loss": 0.506, "step": 27289 }, { "epoch": 0.5787788169922164, "grad_norm": 0.35624706745147705, "learning_rate": 1.6163159215632548e-05, "loss": 0.4997, "step": 27290 }, { "epoch": 0.5788000254501495, "grad_norm": 0.3709973990917206, "learning_rate": 1.616289658496885e-05, "loss": 0.5247, "step": 27291 }, { "epoch": 0.5788212339080825, "grad_norm": 0.36198845505714417, "learning_rate": 1.6162633947450835e-05, "loss": 0.5184, "step": 27292 }, { "epoch": 0.5788424423660156, "grad_norm": 0.4269043505191803, "learning_rate": 1.616237130307879e-05, "loss": 0.5631, "step": 27293 }, { "epoch": 0.5788636508239486, "grad_norm": 0.4423134922981262, "learning_rate": 1.616210865185301e-05, "loss": 0.546, "step": 27294 }, { "epoch": 0.5788848592818816, "grad_norm": 0.39384162425994873, "learning_rate": 1.6161845993773787e-05, "loss": 0.5314, "step": 27295 }, { "epoch": 0.5789060677398147, "grad_norm": 0.3723881244659424, "learning_rate": 1.6161583328841407e-05, "loss": 0.4352, "step": 27296 }, { "epoch": 0.5789272761977476, "grad_norm": 0.3586970567703247, "learning_rate": 1.616132065705617e-05, "loss": 0.4957, "step": 27297 }, { "epoch": 0.5789484846556807, "grad_norm": 0.3657732307910919, "learning_rate": 1.6161057978418368e-05, "loss": 0.4168, "step": 27298 }, { "epoch": 0.5789696931136137, "grad_norm": 0.3858638107776642, "learning_rate": 1.6160795292928292e-05, "loss": 0.5806, "step": 27299 }, { "epoch": 0.5789909015715468, "grad_norm": 0.319237619638443, "learning_rate": 1.616053260058623e-05, "loss": 0.4517, "step": 27300 }, { "epoch": 0.5790121100294797, "grad_norm": 0.3418307602405548, "learning_rate": 1.616026990139248e-05, "loss": 0.5452, "step": 27301 }, { "epoch": 0.5790333184874128, "grad_norm": 0.37839213013648987, "learning_rate": 1.6160007195347326e-05, "loss": 0.5367, "step": 27302 }, { "epoch": 0.5790545269453458, "grad_norm": 0.38799840211868286, "learning_rate": 1.615974448245107e-05, "loss": 0.5216, "step": 27303 }, { "epoch": 0.5790757354032788, "grad_norm": 0.3416171967983246, "learning_rate": 1.6159481762704e-05, "loss": 0.5139, "step": 27304 }, { "epoch": 0.5790969438612118, "grad_norm": 0.392738401889801, "learning_rate": 1.6159219036106404e-05, "loss": 0.5422, "step": 27305 }, { "epoch": 0.5791181523191449, "grad_norm": 0.3481738567352295, "learning_rate": 1.615895630265858e-05, "loss": 0.4879, "step": 27306 }, { "epoch": 0.579139360777078, "grad_norm": 0.5225796699523926, "learning_rate": 1.6158693562360823e-05, "loss": 0.483, "step": 27307 }, { "epoch": 0.5791605692350109, "grad_norm": 0.3736112117767334, "learning_rate": 1.6158430815213412e-05, "loss": 0.4039, "step": 27308 }, { "epoch": 0.579181777692944, "grad_norm": 0.3687954843044281, "learning_rate": 1.6158168061216653e-05, "loss": 0.4594, "step": 27309 }, { "epoch": 0.579202986150877, "grad_norm": 0.3536270558834076, "learning_rate": 1.6157905300370833e-05, "loss": 0.4379, "step": 27310 }, { "epoch": 0.57922419460881, "grad_norm": 0.3614933490753174, "learning_rate": 1.6157642532676243e-05, "loss": 0.4843, "step": 27311 }, { "epoch": 0.579245403066743, "grad_norm": 0.5102739930152893, "learning_rate": 1.6157379758133175e-05, "loss": 0.5199, "step": 27312 }, { "epoch": 0.5792666115246761, "grad_norm": 0.43253278732299805, "learning_rate": 1.6157116976741923e-05, "loss": 0.5179, "step": 27313 }, { "epoch": 0.579287819982609, "grad_norm": 0.3604075014591217, "learning_rate": 1.6156854188502783e-05, "loss": 0.4233, "step": 27314 }, { "epoch": 0.5793090284405421, "grad_norm": 0.46324393153190613, "learning_rate": 1.6156591393416042e-05, "loss": 0.4467, "step": 27315 }, { "epoch": 0.5793302368984751, "grad_norm": 0.3747723698616028, "learning_rate": 1.6156328591481992e-05, "loss": 0.5558, "step": 27316 }, { "epoch": 0.5793514453564081, "grad_norm": 0.39819830656051636, "learning_rate": 1.6156065782700924e-05, "loss": 0.4915, "step": 27317 }, { "epoch": 0.5793726538143411, "grad_norm": 0.4090100824832916, "learning_rate": 1.6155802967073136e-05, "loss": 0.6458, "step": 27318 }, { "epoch": 0.5793938622722742, "grad_norm": 0.3440916836261749, "learning_rate": 1.615554014459892e-05, "loss": 0.468, "step": 27319 }, { "epoch": 0.5794150707302073, "grad_norm": 0.337365984916687, "learning_rate": 1.6155277315278563e-05, "loss": 0.488, "step": 27320 }, { "epoch": 0.5794362791881402, "grad_norm": 0.38430875539779663, "learning_rate": 1.6155014479112357e-05, "loss": 0.5507, "step": 27321 }, { "epoch": 0.5794574876460733, "grad_norm": 0.31886667013168335, "learning_rate": 1.6154751636100604e-05, "loss": 0.476, "step": 27322 }, { "epoch": 0.5794786961040063, "grad_norm": 0.34271588921546936, "learning_rate": 1.6154488786243585e-05, "loss": 0.387, "step": 27323 }, { "epoch": 0.5794999045619393, "grad_norm": 0.39785757660865784, "learning_rate": 1.6154225929541597e-05, "loss": 0.5969, "step": 27324 }, { "epoch": 0.5795211130198723, "grad_norm": 0.372446209192276, "learning_rate": 1.6153963065994936e-05, "loss": 0.5525, "step": 27325 }, { "epoch": 0.5795423214778054, "grad_norm": 0.3913741409778595, "learning_rate": 1.615370019560389e-05, "loss": 0.4444, "step": 27326 }, { "epoch": 0.5795635299357383, "grad_norm": 0.5088322758674622, "learning_rate": 1.615343731836875e-05, "loss": 0.5686, "step": 27327 }, { "epoch": 0.5795847383936714, "grad_norm": 0.3334987759590149, "learning_rate": 1.615317443428981e-05, "loss": 0.5247, "step": 27328 }, { "epoch": 0.5796059468516044, "grad_norm": 0.38248366117477417, "learning_rate": 1.6152911543367366e-05, "loss": 0.4742, "step": 27329 }, { "epoch": 0.5796271553095375, "grad_norm": 0.33681830763816833, "learning_rate": 1.6152648645601702e-05, "loss": 0.4271, "step": 27330 }, { "epoch": 0.5796483637674704, "grad_norm": 0.4171087443828583, "learning_rate": 1.615238574099312e-05, "loss": 0.547, "step": 27331 }, { "epoch": 0.5796695722254035, "grad_norm": 0.37959787249565125, "learning_rate": 1.6152122829541908e-05, "loss": 0.4693, "step": 27332 }, { "epoch": 0.5796907806833365, "grad_norm": 0.421789288520813, "learning_rate": 1.6151859911248356e-05, "loss": 0.5009, "step": 27333 }, { "epoch": 0.5797119891412695, "grad_norm": 0.34255364537239075, "learning_rate": 1.615159698611276e-05, "loss": 0.4187, "step": 27334 }, { "epoch": 0.5797331975992026, "grad_norm": 0.369098424911499, "learning_rate": 1.6151334054135413e-05, "loss": 0.5042, "step": 27335 }, { "epoch": 0.5797544060571356, "grad_norm": 0.3541073501110077, "learning_rate": 1.6151071115316606e-05, "loss": 0.4739, "step": 27336 }, { "epoch": 0.5797756145150686, "grad_norm": 0.40219414234161377, "learning_rate": 1.6150808169656627e-05, "loss": 0.4688, "step": 27337 }, { "epoch": 0.5797968229730016, "grad_norm": 0.5657691359519958, "learning_rate": 1.6150545217155772e-05, "loss": 0.5396, "step": 27338 }, { "epoch": 0.5798180314309347, "grad_norm": 0.41474485397338867, "learning_rate": 1.615028225781434e-05, "loss": 0.4598, "step": 27339 }, { "epoch": 0.5798392398888677, "grad_norm": 0.36055976152420044, "learning_rate": 1.6150019291632615e-05, "loss": 0.5073, "step": 27340 }, { "epoch": 0.5798604483468007, "grad_norm": 0.3313153088092804, "learning_rate": 1.614975631861089e-05, "loss": 0.5144, "step": 27341 }, { "epoch": 0.5798816568047337, "grad_norm": 0.34302952885627747, "learning_rate": 1.614949333874946e-05, "loss": 0.4975, "step": 27342 }, { "epoch": 0.5799028652626668, "grad_norm": 0.37243449687957764, "learning_rate": 1.6149230352048617e-05, "loss": 0.4405, "step": 27343 }, { "epoch": 0.5799240737205997, "grad_norm": 0.3484342694282532, "learning_rate": 1.6148967358508657e-05, "loss": 0.4293, "step": 27344 }, { "epoch": 0.5799452821785328, "grad_norm": 0.3475634753704071, "learning_rate": 1.6148704358129865e-05, "loss": 0.4084, "step": 27345 }, { "epoch": 0.5799664906364658, "grad_norm": 0.3619990050792694, "learning_rate": 1.6148441350912538e-05, "loss": 0.4824, "step": 27346 }, { "epoch": 0.5799876990943988, "grad_norm": 0.5386241674423218, "learning_rate": 1.614817833685697e-05, "loss": 0.5265, "step": 27347 }, { "epoch": 0.5800089075523319, "grad_norm": 0.3697563707828522, "learning_rate": 1.6147915315963448e-05, "loss": 0.5327, "step": 27348 }, { "epoch": 0.5800301160102649, "grad_norm": 0.3314570188522339, "learning_rate": 1.614765228823227e-05, "loss": 0.5186, "step": 27349 }, { "epoch": 0.580051324468198, "grad_norm": 0.3510321080684662, "learning_rate": 1.6147389253663725e-05, "loss": 0.4907, "step": 27350 }, { "epoch": 0.5800725329261309, "grad_norm": 0.339106023311615, "learning_rate": 1.614712621225811e-05, "loss": 0.5359, "step": 27351 }, { "epoch": 0.580093741384064, "grad_norm": 0.3537323772907257, "learning_rate": 1.614686316401571e-05, "loss": 0.4841, "step": 27352 }, { "epoch": 0.580114949841997, "grad_norm": 0.37572023272514343, "learning_rate": 1.6146600108936828e-05, "loss": 0.437, "step": 27353 }, { "epoch": 0.58013615829993, "grad_norm": 0.3460825979709625, "learning_rate": 1.614633704702175e-05, "loss": 0.4768, "step": 27354 }, { "epoch": 0.580157366757863, "grad_norm": 0.39526015520095825, "learning_rate": 1.6146073978270766e-05, "loss": 0.4928, "step": 27355 }, { "epoch": 0.5801785752157961, "grad_norm": 0.4067058265209198, "learning_rate": 1.6145810902684172e-05, "loss": 0.5181, "step": 27356 }, { "epoch": 0.580199783673729, "grad_norm": 0.3362147808074951, "learning_rate": 1.6145547820262263e-05, "loss": 0.54, "step": 27357 }, { "epoch": 0.5802209921316621, "grad_norm": 0.39299482107162476, "learning_rate": 1.6145284731005326e-05, "loss": 0.4947, "step": 27358 }, { "epoch": 0.5802422005895951, "grad_norm": 0.32317090034484863, "learning_rate": 1.614502163491366e-05, "loss": 0.4983, "step": 27359 }, { "epoch": 0.5802634090475282, "grad_norm": 0.339942991733551, "learning_rate": 1.6144758531987554e-05, "loss": 0.3997, "step": 27360 }, { "epoch": 0.5802846175054612, "grad_norm": 0.4697127342224121, "learning_rate": 1.61444954222273e-05, "loss": 0.4764, "step": 27361 }, { "epoch": 0.5803058259633942, "grad_norm": 0.3320270776748657, "learning_rate": 1.614423230563319e-05, "loss": 0.5105, "step": 27362 }, { "epoch": 0.5803270344213273, "grad_norm": 0.361173152923584, "learning_rate": 1.614396918220552e-05, "loss": 0.5148, "step": 27363 }, { "epoch": 0.5803482428792602, "grad_norm": 0.34913086891174316, "learning_rate": 1.614370605194458e-05, "loss": 0.5463, "step": 27364 }, { "epoch": 0.5803694513371933, "grad_norm": 0.34915250539779663, "learning_rate": 1.6143442914850664e-05, "loss": 0.5434, "step": 27365 }, { "epoch": 0.5803906597951263, "grad_norm": 0.36584118008613586, "learning_rate": 1.6143179770924063e-05, "loss": 0.5306, "step": 27366 }, { "epoch": 0.5804118682530593, "grad_norm": 0.35038483142852783, "learning_rate": 1.6142916620165074e-05, "loss": 0.4867, "step": 27367 }, { "epoch": 0.5804330767109923, "grad_norm": 0.3035221993923187, "learning_rate": 1.6142653462573985e-05, "loss": 0.4582, "step": 27368 }, { "epoch": 0.5804542851689254, "grad_norm": 0.3515782654285431, "learning_rate": 1.6142390298151087e-05, "loss": 0.4975, "step": 27369 }, { "epoch": 0.5804754936268584, "grad_norm": 0.36482611298561096, "learning_rate": 1.6142127126896682e-05, "loss": 0.4799, "step": 27370 }, { "epoch": 0.5804967020847914, "grad_norm": 0.3483511507511139, "learning_rate": 1.6141863948811048e-05, "loss": 0.4831, "step": 27371 }, { "epoch": 0.5805179105427244, "grad_norm": 0.3629274368286133, "learning_rate": 1.6141600763894493e-05, "loss": 0.5007, "step": 27372 }, { "epoch": 0.5805391190006575, "grad_norm": 0.37743639945983887, "learning_rate": 1.6141337572147305e-05, "loss": 0.5131, "step": 27373 }, { "epoch": 0.5805603274585904, "grad_norm": 0.4040844440460205, "learning_rate": 1.614107437356977e-05, "loss": 0.4591, "step": 27374 }, { "epoch": 0.5805815359165235, "grad_norm": 0.31170469522476196, "learning_rate": 1.6140811168162183e-05, "loss": 0.5227, "step": 27375 }, { "epoch": 0.5806027443744566, "grad_norm": 0.3620034456253052, "learning_rate": 1.6140547955924844e-05, "loss": 0.5451, "step": 27376 }, { "epoch": 0.5806239528323895, "grad_norm": 0.36828577518463135, "learning_rate": 1.6140284736858042e-05, "loss": 0.4488, "step": 27377 }, { "epoch": 0.5806451612903226, "grad_norm": 0.4251807928085327, "learning_rate": 1.6140021510962063e-05, "loss": 0.5239, "step": 27378 }, { "epoch": 0.5806663697482556, "grad_norm": 0.39401936531066895, "learning_rate": 1.613975827823721e-05, "loss": 0.508, "step": 27379 }, { "epoch": 0.5806875782061887, "grad_norm": 0.38087329268455505, "learning_rate": 1.613949503868377e-05, "loss": 0.4514, "step": 27380 }, { "epoch": 0.5807087866641216, "grad_norm": 0.36752021312713623, "learning_rate": 1.6139231792302036e-05, "loss": 0.5069, "step": 27381 }, { "epoch": 0.5807299951220547, "grad_norm": 0.34716904163360596, "learning_rate": 1.61389685390923e-05, "loss": 0.6008, "step": 27382 }, { "epoch": 0.5807512035799877, "grad_norm": 0.44790133833885193, "learning_rate": 1.613870527905486e-05, "loss": 0.5362, "step": 27383 }, { "epoch": 0.5807724120379207, "grad_norm": 0.35093018412590027, "learning_rate": 1.6138442012190006e-05, "loss": 0.4848, "step": 27384 }, { "epoch": 0.5807936204958537, "grad_norm": 0.32828131318092346, "learning_rate": 1.613817873849803e-05, "loss": 0.4319, "step": 27385 }, { "epoch": 0.5808148289537868, "grad_norm": 0.41657906770706177, "learning_rate": 1.613791545797922e-05, "loss": 0.4904, "step": 27386 }, { "epoch": 0.5808360374117197, "grad_norm": 0.3858339488506317, "learning_rate": 1.613765217063388e-05, "loss": 0.4631, "step": 27387 }, { "epoch": 0.5808572458696528, "grad_norm": 0.33528977632522583, "learning_rate": 1.6137388876462293e-05, "loss": 0.4795, "step": 27388 }, { "epoch": 0.5808784543275859, "grad_norm": 0.31497451663017273, "learning_rate": 1.6137125575464755e-05, "loss": 0.4457, "step": 27389 }, { "epoch": 0.5808996627855189, "grad_norm": 0.3132936656475067, "learning_rate": 1.613686226764156e-05, "loss": 0.5012, "step": 27390 }, { "epoch": 0.5809208712434519, "grad_norm": 0.35055238008499146, "learning_rate": 1.6136598952993003e-05, "loss": 0.4475, "step": 27391 }, { "epoch": 0.5809420797013849, "grad_norm": 0.3382524251937866, "learning_rate": 1.613633563151937e-05, "loss": 0.4964, "step": 27392 }, { "epoch": 0.580963288159318, "grad_norm": 0.3591499924659729, "learning_rate": 1.613607230322096e-05, "loss": 0.4589, "step": 27393 }, { "epoch": 0.5809844966172509, "grad_norm": 0.31068697571754456, "learning_rate": 1.6135808968098063e-05, "loss": 0.4452, "step": 27394 }, { "epoch": 0.581005705075184, "grad_norm": 0.3469933271408081, "learning_rate": 1.613554562615097e-05, "loss": 0.522, "step": 27395 }, { "epoch": 0.581026913533117, "grad_norm": 0.5280201435089111, "learning_rate": 1.613528227737998e-05, "loss": 0.5373, "step": 27396 }, { "epoch": 0.58104812199105, "grad_norm": 0.3574022948741913, "learning_rate": 1.613501892178538e-05, "loss": 0.5445, "step": 27397 }, { "epoch": 0.581069330448983, "grad_norm": 0.3313269019126892, "learning_rate": 1.6134755559367467e-05, "loss": 0.5094, "step": 27398 }, { "epoch": 0.5810905389069161, "grad_norm": 0.33444324135780334, "learning_rate": 1.613449219012653e-05, "loss": 0.39, "step": 27399 }, { "epoch": 0.581111747364849, "grad_norm": 0.3525210916996002, "learning_rate": 1.613422881406287e-05, "loss": 0.5137, "step": 27400 }, { "epoch": 0.5811329558227821, "grad_norm": 0.3453482985496521, "learning_rate": 1.6133965431176768e-05, "loss": 0.547, "step": 27401 }, { "epoch": 0.5811541642807152, "grad_norm": 0.356182336807251, "learning_rate": 1.6133702041468524e-05, "loss": 0.5221, "step": 27402 }, { "epoch": 0.5811753727386482, "grad_norm": 0.3457324802875519, "learning_rate": 1.613343864493843e-05, "loss": 0.428, "step": 27403 }, { "epoch": 0.5811965811965812, "grad_norm": 0.31971505284309387, "learning_rate": 1.613317524158678e-05, "loss": 0.4509, "step": 27404 }, { "epoch": 0.5812177896545142, "grad_norm": 0.5139031410217285, "learning_rate": 1.6132911831413866e-05, "loss": 0.593, "step": 27405 }, { "epoch": 0.5812389981124473, "grad_norm": 0.47789302468299866, "learning_rate": 1.613264841441998e-05, "loss": 0.4884, "step": 27406 }, { "epoch": 0.5812602065703802, "grad_norm": 0.3530166447162628, "learning_rate": 1.6132384990605416e-05, "loss": 0.461, "step": 27407 }, { "epoch": 0.5812814150283133, "grad_norm": 0.37213125824928284, "learning_rate": 1.6132121559970464e-05, "loss": 0.5688, "step": 27408 }, { "epoch": 0.5813026234862463, "grad_norm": 0.42292794585227966, "learning_rate": 1.6131858122515424e-05, "loss": 0.517, "step": 27409 }, { "epoch": 0.5813238319441794, "grad_norm": 0.326577752828598, "learning_rate": 1.6131594678240582e-05, "loss": 0.549, "step": 27410 }, { "epoch": 0.5813450404021123, "grad_norm": 0.3479582965373993, "learning_rate": 1.6131331227146234e-05, "loss": 0.5054, "step": 27411 }, { "epoch": 0.5813662488600454, "grad_norm": 0.43343546986579895, "learning_rate": 1.6131067769232675e-05, "loss": 0.4963, "step": 27412 }, { "epoch": 0.5813874573179784, "grad_norm": 0.40205225348472595, "learning_rate": 1.6130804304500195e-05, "loss": 0.5032, "step": 27413 }, { "epoch": 0.5814086657759114, "grad_norm": 0.3248078525066376, "learning_rate": 1.6130540832949083e-05, "loss": 0.487, "step": 27414 }, { "epoch": 0.5814298742338445, "grad_norm": 0.45999467372894287, "learning_rate": 1.613027735457964e-05, "loss": 0.5328, "step": 27415 }, { "epoch": 0.5814510826917775, "grad_norm": 0.37321150302886963, "learning_rate": 1.6130013869392156e-05, "loss": 0.5477, "step": 27416 }, { "epoch": 0.5814722911497106, "grad_norm": 0.3512813448905945, "learning_rate": 1.6129750377386923e-05, "loss": 0.5555, "step": 27417 }, { "epoch": 0.5814934996076435, "grad_norm": 0.49268847703933716, "learning_rate": 1.6129486878564237e-05, "loss": 0.6295, "step": 27418 }, { "epoch": 0.5815147080655766, "grad_norm": 0.32423892617225647, "learning_rate": 1.6129223372924386e-05, "loss": 0.4798, "step": 27419 }, { "epoch": 0.5815359165235096, "grad_norm": 0.3351018726825714, "learning_rate": 1.6128959860467668e-05, "loss": 0.533, "step": 27420 }, { "epoch": 0.5815571249814426, "grad_norm": 0.40762975811958313, "learning_rate": 1.6128696341194374e-05, "loss": 0.5317, "step": 27421 }, { "epoch": 0.5815783334393756, "grad_norm": 0.3805203437805176, "learning_rate": 1.6128432815104796e-05, "loss": 0.4722, "step": 27422 }, { "epoch": 0.5815995418973087, "grad_norm": 0.37757226824760437, "learning_rate": 1.6128169282199232e-05, "loss": 0.4696, "step": 27423 }, { "epoch": 0.5816207503552416, "grad_norm": 0.3546440303325653, "learning_rate": 1.6127905742477967e-05, "loss": 0.456, "step": 27424 }, { "epoch": 0.5816419588131747, "grad_norm": 0.45492884516716003, "learning_rate": 1.61276421959413e-05, "loss": 0.4618, "step": 27425 }, { "epoch": 0.5816631672711077, "grad_norm": 0.36892056465148926, "learning_rate": 1.612737864258952e-05, "loss": 0.4869, "step": 27426 }, { "epoch": 0.5816843757290407, "grad_norm": 0.46614736318588257, "learning_rate": 1.6127115082422927e-05, "loss": 0.5351, "step": 27427 }, { "epoch": 0.5817055841869737, "grad_norm": 0.3669290840625763, "learning_rate": 1.6126851515441807e-05, "loss": 0.4868, "step": 27428 }, { "epoch": 0.5817267926449068, "grad_norm": 0.3511391878128052, "learning_rate": 1.612658794164646e-05, "loss": 0.5195, "step": 27429 }, { "epoch": 0.5817480011028399, "grad_norm": 0.3514500856399536, "learning_rate": 1.612632436103717e-05, "loss": 0.4785, "step": 27430 }, { "epoch": 0.5817692095607728, "grad_norm": 0.3349292278289795, "learning_rate": 1.6126060773614237e-05, "loss": 0.5653, "step": 27431 }, { "epoch": 0.5817904180187059, "grad_norm": 0.35221222043037415, "learning_rate": 1.6125797179377952e-05, "loss": 0.5672, "step": 27432 }, { "epoch": 0.5818116264766389, "grad_norm": 0.35189640522003174, "learning_rate": 1.6125533578328608e-05, "loss": 0.5297, "step": 27433 }, { "epoch": 0.5818328349345719, "grad_norm": 0.3393426239490509, "learning_rate": 1.6125269970466503e-05, "loss": 0.5079, "step": 27434 }, { "epoch": 0.5818540433925049, "grad_norm": 0.36630865931510925, "learning_rate": 1.6125006355791923e-05, "loss": 0.5131, "step": 27435 }, { "epoch": 0.581875251850438, "grad_norm": 0.3337786793708801, "learning_rate": 1.6124742734305164e-05, "loss": 0.506, "step": 27436 }, { "epoch": 0.5818964603083709, "grad_norm": 0.3523378074169159, "learning_rate": 1.612447910600652e-05, "loss": 0.5389, "step": 27437 }, { "epoch": 0.581917668766304, "grad_norm": 0.3894362449645996, "learning_rate": 1.6124215470896283e-05, "loss": 0.5171, "step": 27438 }, { "epoch": 0.581938877224237, "grad_norm": 0.416511207818985, "learning_rate": 1.6123951828974746e-05, "loss": 0.4739, "step": 27439 }, { "epoch": 0.5819600856821701, "grad_norm": 0.3386971354484558, "learning_rate": 1.6123688180242204e-05, "loss": 0.4489, "step": 27440 }, { "epoch": 0.581981294140103, "grad_norm": 0.3956243395805359, "learning_rate": 1.6123424524698948e-05, "loss": 0.5001, "step": 27441 }, { "epoch": 0.5820025025980361, "grad_norm": 0.3876984119415283, "learning_rate": 1.6123160862345276e-05, "loss": 0.4595, "step": 27442 }, { "epoch": 0.5820237110559692, "grad_norm": 0.3986627459526062, "learning_rate": 1.6122897193181476e-05, "loss": 0.4315, "step": 27443 }, { "epoch": 0.5820449195139021, "grad_norm": 0.34854617714881897, "learning_rate": 1.6122633517207842e-05, "loss": 0.4829, "step": 27444 }, { "epoch": 0.5820661279718352, "grad_norm": 0.3405173420906067, "learning_rate": 1.6122369834424667e-05, "loss": 0.4938, "step": 27445 }, { "epoch": 0.5820873364297682, "grad_norm": 0.35584843158721924, "learning_rate": 1.6122106144832245e-05, "loss": 0.5141, "step": 27446 }, { "epoch": 0.5821085448877013, "grad_norm": 0.3397946059703827, "learning_rate": 1.6121842448430874e-05, "loss": 0.4578, "step": 27447 }, { "epoch": 0.5821297533456342, "grad_norm": 0.4410704970359802, "learning_rate": 1.612157874522084e-05, "loss": 0.5044, "step": 27448 }, { "epoch": 0.5821509618035673, "grad_norm": 0.3575468957424164, "learning_rate": 1.612131503520244e-05, "loss": 0.4948, "step": 27449 }, { "epoch": 0.5821721702615003, "grad_norm": 0.3735875189304352, "learning_rate": 1.6121051318375968e-05, "loss": 0.509, "step": 27450 }, { "epoch": 0.5821933787194333, "grad_norm": 0.3539387285709381, "learning_rate": 1.612078759474171e-05, "loss": 0.5006, "step": 27451 }, { "epoch": 0.5822145871773663, "grad_norm": 0.34290626645088196, "learning_rate": 1.612052386429997e-05, "loss": 0.4806, "step": 27452 }, { "epoch": 0.5822357956352994, "grad_norm": 0.3936346173286438, "learning_rate": 1.6120260127051035e-05, "loss": 0.4594, "step": 27453 }, { "epoch": 0.5822570040932323, "grad_norm": 0.37940970063209534, "learning_rate": 1.61199963829952e-05, "loss": 0.571, "step": 27454 }, { "epoch": 0.5822782125511654, "grad_norm": 0.4130101203918457, "learning_rate": 1.6119732632132762e-05, "loss": 0.4615, "step": 27455 }, { "epoch": 0.5822994210090985, "grad_norm": 0.3311595916748047, "learning_rate": 1.6119468874464007e-05, "loss": 0.4616, "step": 27456 }, { "epoch": 0.5823206294670314, "grad_norm": 0.3393121659755707, "learning_rate": 1.6119205109989234e-05, "loss": 0.5138, "step": 27457 }, { "epoch": 0.5823418379249645, "grad_norm": 0.3188527524471283, "learning_rate": 1.611894133870873e-05, "loss": 0.4708, "step": 27458 }, { "epoch": 0.5823630463828975, "grad_norm": 0.3404874801635742, "learning_rate": 1.61186775606228e-05, "loss": 0.4996, "step": 27459 }, { "epoch": 0.5823842548408306, "grad_norm": 0.33009546995162964, "learning_rate": 1.611841377573172e-05, "loss": 0.4663, "step": 27460 }, { "epoch": 0.5824054632987635, "grad_norm": 0.3385879397392273, "learning_rate": 1.61181499840358e-05, "loss": 0.4669, "step": 27461 }, { "epoch": 0.5824266717566966, "grad_norm": 0.3459616005420685, "learning_rate": 1.6117886185535325e-05, "loss": 0.4955, "step": 27462 }, { "epoch": 0.5824478802146296, "grad_norm": 0.390953928232193, "learning_rate": 1.6117622380230593e-05, "loss": 0.4457, "step": 27463 }, { "epoch": 0.5824690886725626, "grad_norm": 0.37277746200561523, "learning_rate": 1.6117358568121892e-05, "loss": 0.5946, "step": 27464 }, { "epoch": 0.5824902971304956, "grad_norm": 0.330091655254364, "learning_rate": 1.6117094749209518e-05, "loss": 0.4702, "step": 27465 }, { "epoch": 0.5825115055884287, "grad_norm": 0.3148167133331299, "learning_rate": 1.611683092349376e-05, "loss": 0.4203, "step": 27466 }, { "epoch": 0.5825327140463616, "grad_norm": 0.557042121887207, "learning_rate": 1.6116567090974924e-05, "loss": 0.4506, "step": 27467 }, { "epoch": 0.5825539225042947, "grad_norm": 0.5126620531082153, "learning_rate": 1.611630325165329e-05, "loss": 0.4849, "step": 27468 }, { "epoch": 0.5825751309622277, "grad_norm": 0.34337523579597473, "learning_rate": 1.611603940552916e-05, "loss": 0.4797, "step": 27469 }, { "epoch": 0.5825963394201608, "grad_norm": 0.3617863059043884, "learning_rate": 1.6115775552602818e-05, "loss": 0.501, "step": 27470 }, { "epoch": 0.5826175478780938, "grad_norm": 0.36118659377098083, "learning_rate": 1.611551169287457e-05, "loss": 0.5412, "step": 27471 }, { "epoch": 0.5826387563360268, "grad_norm": 0.36134758591651917, "learning_rate": 1.61152478263447e-05, "loss": 0.5359, "step": 27472 }, { "epoch": 0.5826599647939599, "grad_norm": 0.3755868673324585, "learning_rate": 1.6114983953013504e-05, "loss": 0.614, "step": 27473 }, { "epoch": 0.5826811732518928, "grad_norm": 0.41309550404548645, "learning_rate": 1.6114720072881278e-05, "loss": 0.4653, "step": 27474 }, { "epoch": 0.5827023817098259, "grad_norm": 0.3201541602611542, "learning_rate": 1.6114456185948312e-05, "loss": 0.4532, "step": 27475 }, { "epoch": 0.5827235901677589, "grad_norm": 0.36269083619117737, "learning_rate": 1.6114192292214902e-05, "loss": 0.5491, "step": 27476 }, { "epoch": 0.582744798625692, "grad_norm": 0.528218686580658, "learning_rate": 1.611392839168134e-05, "loss": 0.5231, "step": 27477 }, { "epoch": 0.5827660070836249, "grad_norm": 0.35506725311279297, "learning_rate": 1.611366448434792e-05, "loss": 0.5383, "step": 27478 }, { "epoch": 0.582787215541558, "grad_norm": 0.3337475061416626, "learning_rate": 1.6113400570214937e-05, "loss": 0.439, "step": 27479 }, { "epoch": 0.582808423999491, "grad_norm": 0.4290772080421448, "learning_rate": 1.6113136649282682e-05, "loss": 0.4498, "step": 27480 }, { "epoch": 0.582829632457424, "grad_norm": 0.38060423731803894, "learning_rate": 1.6112872721551446e-05, "loss": 0.5202, "step": 27481 }, { "epoch": 0.582850840915357, "grad_norm": 0.3314967453479767, "learning_rate": 1.611260878702153e-05, "loss": 0.4369, "step": 27482 }, { "epoch": 0.5828720493732901, "grad_norm": 0.3416404724121094, "learning_rate": 1.611234484569322e-05, "loss": 0.5063, "step": 27483 }, { "epoch": 0.5828932578312231, "grad_norm": 0.3434464931488037, "learning_rate": 1.6112080897566817e-05, "loss": 0.4643, "step": 27484 }, { "epoch": 0.5829144662891561, "grad_norm": 0.3362754285335541, "learning_rate": 1.6111816942642612e-05, "loss": 0.5376, "step": 27485 }, { "epoch": 0.5829356747470892, "grad_norm": 0.3607109487056732, "learning_rate": 1.6111552980920896e-05, "loss": 0.4901, "step": 27486 }, { "epoch": 0.5829568832050221, "grad_norm": 0.3470945358276367, "learning_rate": 1.611128901240196e-05, "loss": 0.4761, "step": 27487 }, { "epoch": 0.5829780916629552, "grad_norm": 0.39844003319740295, "learning_rate": 1.6111025037086105e-05, "loss": 0.4753, "step": 27488 }, { "epoch": 0.5829993001208882, "grad_norm": 0.5024241805076599, "learning_rate": 1.611076105497362e-05, "loss": 0.4631, "step": 27489 }, { "epoch": 0.5830205085788213, "grad_norm": 0.40458914637565613, "learning_rate": 1.61104970660648e-05, "loss": 0.521, "step": 27490 }, { "epoch": 0.5830417170367542, "grad_norm": 1.0586659908294678, "learning_rate": 1.6110233070359934e-05, "loss": 0.4551, "step": 27491 }, { "epoch": 0.5830629254946873, "grad_norm": 0.5041073560714722, "learning_rate": 1.6109969067859325e-05, "loss": 0.5729, "step": 27492 }, { "epoch": 0.5830841339526203, "grad_norm": 0.3827250897884369, "learning_rate": 1.6109705058563265e-05, "loss": 0.4595, "step": 27493 }, { "epoch": 0.5831053424105533, "grad_norm": 0.3568129241466522, "learning_rate": 1.6109441042472035e-05, "loss": 0.4728, "step": 27494 }, { "epoch": 0.5831265508684863, "grad_norm": 0.49740466475486755, "learning_rate": 1.610917701958594e-05, "loss": 0.4652, "step": 27495 }, { "epoch": 0.5831477593264194, "grad_norm": 0.3280240595340729, "learning_rate": 1.6108912989905278e-05, "loss": 0.4516, "step": 27496 }, { "epoch": 0.5831689677843525, "grad_norm": 0.6502081155776978, "learning_rate": 1.610864895343033e-05, "loss": 0.5189, "step": 27497 }, { "epoch": 0.5831901762422854, "grad_norm": 0.3546232581138611, "learning_rate": 1.6108384910161397e-05, "loss": 0.4927, "step": 27498 }, { "epoch": 0.5832113847002185, "grad_norm": 0.3679298460483551, "learning_rate": 1.6108120860098773e-05, "loss": 0.5188, "step": 27499 }, { "epoch": 0.5832325931581515, "grad_norm": 0.41277801990509033, "learning_rate": 1.6107856803242748e-05, "loss": 0.5522, "step": 27500 }, { "epoch": 0.5832538016160845, "grad_norm": 0.7343049645423889, "learning_rate": 1.6107592739593617e-05, "loss": 0.5524, "step": 27501 }, { "epoch": 0.5832750100740175, "grad_norm": 0.3413934111595154, "learning_rate": 1.610732866915168e-05, "loss": 0.5355, "step": 27502 }, { "epoch": 0.5832962185319506, "grad_norm": 0.40389955043792725, "learning_rate": 1.6107064591917217e-05, "loss": 0.5485, "step": 27503 }, { "epoch": 0.5833174269898835, "grad_norm": 0.3739722669124603, "learning_rate": 1.6106800507890533e-05, "loss": 0.5194, "step": 27504 }, { "epoch": 0.5833386354478166, "grad_norm": 0.3576752543449402, "learning_rate": 1.610653641707192e-05, "loss": 0.4781, "step": 27505 }, { "epoch": 0.5833598439057496, "grad_norm": 0.35380181670188904, "learning_rate": 1.610627231946167e-05, "loss": 0.4743, "step": 27506 }, { "epoch": 0.5833810523636827, "grad_norm": 0.40138474106788635, "learning_rate": 1.610600821506008e-05, "loss": 0.5515, "step": 27507 }, { "epoch": 0.5834022608216156, "grad_norm": 0.3397049903869629, "learning_rate": 1.6105744103867437e-05, "loss": 0.4242, "step": 27508 }, { "epoch": 0.5834234692795487, "grad_norm": 0.35317566990852356, "learning_rate": 1.6105479985884036e-05, "loss": 0.5183, "step": 27509 }, { "epoch": 0.5834446777374817, "grad_norm": 0.3095444142818451, "learning_rate": 1.6105215861110175e-05, "loss": 0.4417, "step": 27510 }, { "epoch": 0.5834658861954147, "grad_norm": 0.34343427419662476, "learning_rate": 1.610495172954615e-05, "loss": 0.4981, "step": 27511 }, { "epoch": 0.5834870946533478, "grad_norm": 0.44092321395874023, "learning_rate": 1.6104687591192246e-05, "loss": 0.5166, "step": 27512 }, { "epoch": 0.5835083031112808, "grad_norm": 0.3566458225250244, "learning_rate": 1.6104423446048764e-05, "loss": 0.5116, "step": 27513 }, { "epoch": 0.5835295115692138, "grad_norm": 0.37214940786361694, "learning_rate": 1.6104159294115994e-05, "loss": 0.6077, "step": 27514 }, { "epoch": 0.5835507200271468, "grad_norm": 0.3593709170818329, "learning_rate": 1.6103895135394232e-05, "loss": 0.4636, "step": 27515 }, { "epoch": 0.5835719284850799, "grad_norm": 0.35259148478507996, "learning_rate": 1.6103630969883768e-05, "loss": 0.4771, "step": 27516 }, { "epoch": 0.5835931369430128, "grad_norm": 0.377271831035614, "learning_rate": 1.61033667975849e-05, "loss": 0.4237, "step": 27517 }, { "epoch": 0.5836143454009459, "grad_norm": 0.3859461843967438, "learning_rate": 1.6103102618497922e-05, "loss": 0.4756, "step": 27518 }, { "epoch": 0.5836355538588789, "grad_norm": 0.38761329650878906, "learning_rate": 1.6102838432623123e-05, "loss": 0.5074, "step": 27519 }, { "epoch": 0.583656762316812, "grad_norm": 0.36501774191856384, "learning_rate": 1.6102574239960805e-05, "loss": 0.4885, "step": 27520 }, { "epoch": 0.5836779707747449, "grad_norm": 0.344719797372818, "learning_rate": 1.6102310040511256e-05, "loss": 0.4408, "step": 27521 }, { "epoch": 0.583699179232678, "grad_norm": 0.33717888593673706, "learning_rate": 1.610204583427477e-05, "loss": 0.4845, "step": 27522 }, { "epoch": 0.583720387690611, "grad_norm": 0.3074275851249695, "learning_rate": 1.610178162125164e-05, "loss": 0.4735, "step": 27523 }, { "epoch": 0.583741596148544, "grad_norm": 0.40289047360420227, "learning_rate": 1.6101517401442163e-05, "loss": 0.4176, "step": 27524 }, { "epoch": 0.5837628046064771, "grad_norm": 0.3250584900379181, "learning_rate": 1.610125317484663e-05, "loss": 0.4214, "step": 27525 }, { "epoch": 0.5837840130644101, "grad_norm": 0.4624856412410736, "learning_rate": 1.6100988941465338e-05, "loss": 0.5027, "step": 27526 }, { "epoch": 0.5838052215223432, "grad_norm": 0.3740153908729553, "learning_rate": 1.610072470129858e-05, "loss": 0.4709, "step": 27527 }, { "epoch": 0.5838264299802761, "grad_norm": 0.5728452205657959, "learning_rate": 1.6100460454346647e-05, "loss": 0.4862, "step": 27528 }, { "epoch": 0.5838476384382092, "grad_norm": 0.34361502528190613, "learning_rate": 1.610019620060983e-05, "loss": 0.5042, "step": 27529 }, { "epoch": 0.5838688468961422, "grad_norm": 0.42598915100097656, "learning_rate": 1.6099931940088437e-05, "loss": 0.54, "step": 27530 }, { "epoch": 0.5838900553540752, "grad_norm": 0.3756542205810547, "learning_rate": 1.609966767278275e-05, "loss": 0.4842, "step": 27531 }, { "epoch": 0.5839112638120082, "grad_norm": 0.31662195920944214, "learning_rate": 1.609940339869306e-05, "loss": 0.4935, "step": 27532 }, { "epoch": 0.5839324722699413, "grad_norm": 0.3770727515220642, "learning_rate": 1.6099139117819674e-05, "loss": 0.5648, "step": 27533 }, { "epoch": 0.5839536807278742, "grad_norm": 0.35468754172325134, "learning_rate": 1.6098874830162875e-05, "loss": 0.5257, "step": 27534 }, { "epoch": 0.5839748891858073, "grad_norm": 0.3597412109375, "learning_rate": 1.6098610535722958e-05, "loss": 0.5511, "step": 27535 }, { "epoch": 0.5839960976437403, "grad_norm": 0.34780606627464294, "learning_rate": 1.609834623450022e-05, "loss": 0.4848, "step": 27536 }, { "epoch": 0.5840173061016734, "grad_norm": 0.36126425862312317, "learning_rate": 1.6098081926494957e-05, "loss": 0.5222, "step": 27537 }, { "epoch": 0.5840385145596064, "grad_norm": 0.34347307682037354, "learning_rate": 1.6097817611707462e-05, "loss": 0.4923, "step": 27538 }, { "epoch": 0.5840597230175394, "grad_norm": 0.34662747383117676, "learning_rate": 1.6097553290138025e-05, "loss": 0.4855, "step": 27539 }, { "epoch": 0.5840809314754725, "grad_norm": 0.3646421432495117, "learning_rate": 1.609728896178694e-05, "loss": 0.4735, "step": 27540 }, { "epoch": 0.5841021399334054, "grad_norm": 0.3799313008785248, "learning_rate": 1.6097024626654508e-05, "loss": 0.4981, "step": 27541 }, { "epoch": 0.5841233483913385, "grad_norm": 0.47972503304481506, "learning_rate": 1.6096760284741013e-05, "loss": 0.4687, "step": 27542 }, { "epoch": 0.5841445568492715, "grad_norm": 0.32172539830207825, "learning_rate": 1.6096495936046756e-05, "loss": 0.5128, "step": 27543 }, { "epoch": 0.5841657653072045, "grad_norm": 0.353533536195755, "learning_rate": 1.609623158057203e-05, "loss": 0.541, "step": 27544 }, { "epoch": 0.5841869737651375, "grad_norm": 0.39552590250968933, "learning_rate": 1.609596721831713e-05, "loss": 0.4972, "step": 27545 }, { "epoch": 0.5842081822230706, "grad_norm": 0.33189475536346436, "learning_rate": 1.6095702849282346e-05, "loss": 0.4612, "step": 27546 }, { "epoch": 0.5842293906810035, "grad_norm": 0.3853816092014313, "learning_rate": 1.6095438473467974e-05, "loss": 0.5191, "step": 27547 }, { "epoch": 0.5842505991389366, "grad_norm": 0.3465403616428375, "learning_rate": 1.6095174090874306e-05, "loss": 0.4549, "step": 27548 }, { "epoch": 0.5842718075968696, "grad_norm": 0.37710580229759216, "learning_rate": 1.6094909701501644e-05, "loss": 0.5656, "step": 27549 }, { "epoch": 0.5842930160548027, "grad_norm": 0.36621513962745667, "learning_rate": 1.609464530535027e-05, "loss": 0.5081, "step": 27550 }, { "epoch": 0.5843142245127356, "grad_norm": 0.44001415371894836, "learning_rate": 1.609438090242049e-05, "loss": 0.5068, "step": 27551 }, { "epoch": 0.5843354329706687, "grad_norm": 0.36241385340690613, "learning_rate": 1.609411649271259e-05, "loss": 0.5137, "step": 27552 }, { "epoch": 0.5843566414286018, "grad_norm": 0.3329770565032959, "learning_rate": 1.6093852076226867e-05, "loss": 0.4068, "step": 27553 }, { "epoch": 0.5843778498865347, "grad_norm": 0.48732027411460876, "learning_rate": 1.6093587652963614e-05, "loss": 0.5902, "step": 27554 }, { "epoch": 0.5843990583444678, "grad_norm": 0.40279850363731384, "learning_rate": 1.6093323222923124e-05, "loss": 0.5044, "step": 27555 }, { "epoch": 0.5844202668024008, "grad_norm": 0.3939559757709503, "learning_rate": 1.6093058786105696e-05, "loss": 0.4727, "step": 27556 }, { "epoch": 0.5844414752603339, "grad_norm": 0.3148360252380371, "learning_rate": 1.6092794342511623e-05, "loss": 0.4728, "step": 27557 }, { "epoch": 0.5844626837182668, "grad_norm": 0.33108770847320557, "learning_rate": 1.609252989214119e-05, "loss": 0.5159, "step": 27558 }, { "epoch": 0.5844838921761999, "grad_norm": 0.37824976444244385, "learning_rate": 1.6092265434994705e-05, "loss": 0.5677, "step": 27559 }, { "epoch": 0.5845051006341329, "grad_norm": 0.33870548009872437, "learning_rate": 1.6092000971072453e-05, "loss": 0.4636, "step": 27560 }, { "epoch": 0.5845263090920659, "grad_norm": 0.31491246819496155, "learning_rate": 1.6091736500374728e-05, "loss": 0.4495, "step": 27561 }, { "epoch": 0.5845475175499989, "grad_norm": 0.35771599411964417, "learning_rate": 1.609147202290183e-05, "loss": 0.4598, "step": 27562 }, { "epoch": 0.584568726007932, "grad_norm": 0.35842210054397583, "learning_rate": 1.6091207538654047e-05, "loss": 0.4951, "step": 27563 }, { "epoch": 0.5845899344658649, "grad_norm": 0.45332610607147217, "learning_rate": 1.6090943047631678e-05, "loss": 0.4979, "step": 27564 }, { "epoch": 0.584611142923798, "grad_norm": 0.35925018787384033, "learning_rate": 1.6090678549835014e-05, "loss": 0.5227, "step": 27565 }, { "epoch": 0.5846323513817311, "grad_norm": 0.34735599160194397, "learning_rate": 1.6090414045264352e-05, "loss": 0.5584, "step": 27566 }, { "epoch": 0.584653559839664, "grad_norm": 0.37596648931503296, "learning_rate": 1.6090149533919982e-05, "loss": 0.5351, "step": 27567 }, { "epoch": 0.5846747682975971, "grad_norm": 0.6180748343467712, "learning_rate": 1.60898850158022e-05, "loss": 0.4718, "step": 27568 }, { "epoch": 0.5846959767555301, "grad_norm": 0.36594897508621216, "learning_rate": 1.60896204909113e-05, "loss": 0.5273, "step": 27569 }, { "epoch": 0.5847171852134632, "grad_norm": 0.3504714071750641, "learning_rate": 1.608935595924758e-05, "loss": 0.5147, "step": 27570 }, { "epoch": 0.5847383936713961, "grad_norm": 0.3597590923309326, "learning_rate": 1.6089091420811332e-05, "loss": 0.4983, "step": 27571 }, { "epoch": 0.5847596021293292, "grad_norm": 0.3915712833404541, "learning_rate": 1.608882687560285e-05, "loss": 0.5323, "step": 27572 }, { "epoch": 0.5847808105872622, "grad_norm": 0.3637220859527588, "learning_rate": 1.6088562323622422e-05, "loss": 0.4602, "step": 27573 }, { "epoch": 0.5848020190451952, "grad_norm": 0.461922824382782, "learning_rate": 1.6088297764870353e-05, "loss": 0.5284, "step": 27574 }, { "epoch": 0.5848232275031282, "grad_norm": 0.3796065151691437, "learning_rate": 1.608803319934693e-05, "loss": 0.4638, "step": 27575 }, { "epoch": 0.5848444359610613, "grad_norm": 0.5171182751655579, "learning_rate": 1.6087768627052448e-05, "loss": 0.458, "step": 27576 }, { "epoch": 0.5848656444189942, "grad_norm": 0.38047555088996887, "learning_rate": 1.6087504047987207e-05, "loss": 0.5174, "step": 27577 }, { "epoch": 0.5848868528769273, "grad_norm": 0.3572366237640381, "learning_rate": 1.6087239462151494e-05, "loss": 0.5474, "step": 27578 }, { "epoch": 0.5849080613348604, "grad_norm": 0.3305574357509613, "learning_rate": 1.6086974869545605e-05, "loss": 0.4505, "step": 27579 }, { "epoch": 0.5849292697927934, "grad_norm": 0.34604954719543457, "learning_rate": 1.6086710270169836e-05, "loss": 0.4575, "step": 27580 }, { "epoch": 0.5849504782507264, "grad_norm": 0.3310672342777252, "learning_rate": 1.608644566402448e-05, "loss": 0.4493, "step": 27581 }, { "epoch": 0.5849716867086594, "grad_norm": 0.3126499056816101, "learning_rate": 1.6086181051109836e-05, "loss": 0.4527, "step": 27582 }, { "epoch": 0.5849928951665925, "grad_norm": 0.3463958501815796, "learning_rate": 1.6085916431426194e-05, "loss": 0.5746, "step": 27583 }, { "epoch": 0.5850141036245254, "grad_norm": 0.42667222023010254, "learning_rate": 1.6085651804973845e-05, "loss": 0.5118, "step": 27584 }, { "epoch": 0.5850353120824585, "grad_norm": 0.6010944247245789, "learning_rate": 1.608538717175309e-05, "loss": 0.5153, "step": 27585 }, { "epoch": 0.5850565205403915, "grad_norm": 0.3453666567802429, "learning_rate": 1.6085122531764218e-05, "loss": 0.5462, "step": 27586 }, { "epoch": 0.5850777289983246, "grad_norm": 0.3681199550628662, "learning_rate": 1.6084857885007526e-05, "loss": 0.5795, "step": 27587 }, { "epoch": 0.5850989374562575, "grad_norm": 0.5639059543609619, "learning_rate": 1.608459323148331e-05, "loss": 0.5564, "step": 27588 }, { "epoch": 0.5851201459141906, "grad_norm": 0.35398972034454346, "learning_rate": 1.608432857119186e-05, "loss": 0.4636, "step": 27589 }, { "epoch": 0.5851413543721236, "grad_norm": 0.3771201968193054, "learning_rate": 1.608406390413347e-05, "loss": 0.5836, "step": 27590 }, { "epoch": 0.5851625628300566, "grad_norm": 0.4170858860015869, "learning_rate": 1.6083799230308442e-05, "loss": 0.6085, "step": 27591 }, { "epoch": 0.5851837712879896, "grad_norm": 0.3536789119243622, "learning_rate": 1.6083534549717063e-05, "loss": 0.443, "step": 27592 }, { "epoch": 0.5852049797459227, "grad_norm": 0.3715115487575531, "learning_rate": 1.608326986235963e-05, "loss": 0.459, "step": 27593 }, { "epoch": 0.5852261882038557, "grad_norm": 0.3200371563434601, "learning_rate": 1.608300516823644e-05, "loss": 0.4609, "step": 27594 }, { "epoch": 0.5852473966617887, "grad_norm": 0.4511019289493561, "learning_rate": 1.608274046734778e-05, "loss": 0.4586, "step": 27595 }, { "epoch": 0.5852686051197218, "grad_norm": 0.3798612654209137, "learning_rate": 1.608247575969395e-05, "loss": 0.4791, "step": 27596 }, { "epoch": 0.5852898135776547, "grad_norm": 0.33461058139801025, "learning_rate": 1.6082211045275246e-05, "loss": 0.5057, "step": 27597 }, { "epoch": 0.5853110220355878, "grad_norm": 0.3555087447166443, "learning_rate": 1.6081946324091955e-05, "loss": 0.5457, "step": 27598 }, { "epoch": 0.5853322304935208, "grad_norm": 0.3219895660877228, "learning_rate": 1.608168159614438e-05, "loss": 0.4486, "step": 27599 }, { "epoch": 0.5853534389514539, "grad_norm": 0.3649526536464691, "learning_rate": 1.6081416861432808e-05, "loss": 0.5283, "step": 27600 }, { "epoch": 0.5853746474093868, "grad_norm": 0.3474538326263428, "learning_rate": 1.608115211995754e-05, "loss": 0.4756, "step": 27601 }, { "epoch": 0.5853958558673199, "grad_norm": 0.38151469826698303, "learning_rate": 1.608088737171887e-05, "loss": 0.5525, "step": 27602 }, { "epoch": 0.5854170643252529, "grad_norm": 0.33203455805778503, "learning_rate": 1.6080622616717086e-05, "loss": 0.4328, "step": 27603 }, { "epoch": 0.5854382727831859, "grad_norm": 0.4128730595111847, "learning_rate": 1.6080357854952487e-05, "loss": 0.524, "step": 27604 }, { "epoch": 0.5854594812411189, "grad_norm": 0.3614962697029114, "learning_rate": 1.6080093086425367e-05, "loss": 0.4938, "step": 27605 }, { "epoch": 0.585480689699052, "grad_norm": 0.3549515902996063, "learning_rate": 1.607982831113602e-05, "loss": 0.4173, "step": 27606 }, { "epoch": 0.5855018981569851, "grad_norm": 0.3923172056674957, "learning_rate": 1.6079563529084738e-05, "loss": 0.4879, "step": 27607 }, { "epoch": 0.585523106614918, "grad_norm": 0.36933866143226624, "learning_rate": 1.6079298740271822e-05, "loss": 0.5426, "step": 27608 }, { "epoch": 0.5855443150728511, "grad_norm": 0.36863693594932556, "learning_rate": 1.6079033944697564e-05, "loss": 0.51, "step": 27609 }, { "epoch": 0.5855655235307841, "grad_norm": 0.35415899753570557, "learning_rate": 1.6078769142362255e-05, "loss": 0.4324, "step": 27610 }, { "epoch": 0.5855867319887171, "grad_norm": 0.39045944809913635, "learning_rate": 1.607850433326619e-05, "loss": 0.4809, "step": 27611 }, { "epoch": 0.5856079404466501, "grad_norm": 0.35236892104148865, "learning_rate": 1.6078239517409667e-05, "loss": 0.4815, "step": 27612 }, { "epoch": 0.5856291489045832, "grad_norm": 0.35871610045433044, "learning_rate": 1.6077974694792978e-05, "loss": 0.525, "step": 27613 }, { "epoch": 0.5856503573625161, "grad_norm": 0.3765637278556824, "learning_rate": 1.6077709865416423e-05, "loss": 0.4784, "step": 27614 }, { "epoch": 0.5856715658204492, "grad_norm": 0.3413546085357666, "learning_rate": 1.6077445029280285e-05, "loss": 0.4541, "step": 27615 }, { "epoch": 0.5856927742783822, "grad_norm": 0.41236674785614014, "learning_rate": 1.607718018638487e-05, "loss": 0.4925, "step": 27616 }, { "epoch": 0.5857139827363153, "grad_norm": 0.3625016510486603, "learning_rate": 1.6076915336730467e-05, "loss": 0.4474, "step": 27617 }, { "epoch": 0.5857351911942482, "grad_norm": 0.515849769115448, "learning_rate": 1.6076650480317373e-05, "loss": 0.4621, "step": 27618 }, { "epoch": 0.5857563996521813, "grad_norm": 0.38960525393486023, "learning_rate": 1.6076385617145874e-05, "loss": 0.4929, "step": 27619 }, { "epoch": 0.5857776081101144, "grad_norm": 0.33952364325523376, "learning_rate": 1.6076120747216282e-05, "loss": 0.4934, "step": 27620 }, { "epoch": 0.5857988165680473, "grad_norm": 0.3288598656654358, "learning_rate": 1.6075855870528875e-05, "loss": 0.4708, "step": 27621 }, { "epoch": 0.5858200250259804, "grad_norm": 0.35208848118782043, "learning_rate": 1.6075590987083955e-05, "loss": 0.4982, "step": 27622 }, { "epoch": 0.5858412334839134, "grad_norm": 0.37530216574668884, "learning_rate": 1.6075326096881815e-05, "loss": 0.4503, "step": 27623 }, { "epoch": 0.5858624419418464, "grad_norm": 0.3641345202922821, "learning_rate": 1.6075061199922752e-05, "loss": 0.5432, "step": 27624 }, { "epoch": 0.5858836503997794, "grad_norm": 0.335675448179245, "learning_rate": 1.6074796296207055e-05, "loss": 0.4888, "step": 27625 }, { "epoch": 0.5859048588577125, "grad_norm": 0.3671417832374573, "learning_rate": 1.6074531385735025e-05, "loss": 0.4975, "step": 27626 }, { "epoch": 0.5859260673156454, "grad_norm": 0.4025143086910248, "learning_rate": 1.6074266468506957e-05, "loss": 0.5221, "step": 27627 }, { "epoch": 0.5859472757735785, "grad_norm": 0.3842259347438812, "learning_rate": 1.6074001544523138e-05, "loss": 0.5749, "step": 27628 }, { "epoch": 0.5859684842315115, "grad_norm": 0.7716999053955078, "learning_rate": 1.607373661378387e-05, "loss": 0.5567, "step": 27629 }, { "epoch": 0.5859896926894446, "grad_norm": 0.369335412979126, "learning_rate": 1.6073471676289444e-05, "loss": 0.4637, "step": 27630 }, { "epoch": 0.5860109011473775, "grad_norm": 0.3887942433357239, "learning_rate": 1.6073206732040154e-05, "loss": 0.5576, "step": 27631 }, { "epoch": 0.5860321096053106, "grad_norm": 0.37241145968437195, "learning_rate": 1.6072941781036296e-05, "loss": 0.4739, "step": 27632 }, { "epoch": 0.5860533180632436, "grad_norm": 0.33405545353889465, "learning_rate": 1.6072676823278166e-05, "loss": 0.5313, "step": 27633 }, { "epoch": 0.5860745265211766, "grad_norm": 0.5528078079223633, "learning_rate": 1.607241185876606e-05, "loss": 0.583, "step": 27634 }, { "epoch": 0.5860957349791097, "grad_norm": 0.6026507616043091, "learning_rate": 1.607214688750027e-05, "loss": 0.4934, "step": 27635 }, { "epoch": 0.5861169434370427, "grad_norm": 0.4274381101131439, "learning_rate": 1.6071881909481087e-05, "loss": 0.4811, "step": 27636 }, { "epoch": 0.5861381518949758, "grad_norm": 0.35042649507522583, "learning_rate": 1.6071616924708814e-05, "loss": 0.4858, "step": 27637 }, { "epoch": 0.5861593603529087, "grad_norm": 0.35240551829338074, "learning_rate": 1.6071351933183736e-05, "loss": 0.4309, "step": 27638 }, { "epoch": 0.5861805688108418, "grad_norm": 0.33780476450920105, "learning_rate": 1.6071086934906155e-05, "loss": 0.4354, "step": 27639 }, { "epoch": 0.5862017772687748, "grad_norm": 0.5347410440444946, "learning_rate": 1.6070821929876367e-05, "loss": 0.5328, "step": 27640 }, { "epoch": 0.5862229857267078, "grad_norm": 0.2947074770927429, "learning_rate": 1.6070556918094662e-05, "loss": 0.4782, "step": 27641 }, { "epoch": 0.5862441941846408, "grad_norm": 0.6948851943016052, "learning_rate": 1.6070291899561336e-05, "loss": 0.4433, "step": 27642 }, { "epoch": 0.5862654026425739, "grad_norm": 2.946110963821411, "learning_rate": 1.6070026874276683e-05, "loss": 0.6138, "step": 27643 }, { "epoch": 0.5862866111005068, "grad_norm": 0.32816964387893677, "learning_rate": 1.6069761842241e-05, "loss": 0.4966, "step": 27644 }, { "epoch": 0.5863078195584399, "grad_norm": 0.3462403118610382, "learning_rate": 1.606949680345458e-05, "loss": 0.5644, "step": 27645 }, { "epoch": 0.5863290280163729, "grad_norm": 0.42170730233192444, "learning_rate": 1.606923175791772e-05, "loss": 0.5992, "step": 27646 }, { "epoch": 0.586350236474306, "grad_norm": 0.3768234848976135, "learning_rate": 1.6068966705630712e-05, "loss": 0.5308, "step": 27647 }, { "epoch": 0.586371444932239, "grad_norm": 0.3677058517932892, "learning_rate": 1.6068701646593853e-05, "loss": 0.469, "step": 27648 }, { "epoch": 0.586392653390172, "grad_norm": 0.3642232120037079, "learning_rate": 1.6068436580807434e-05, "loss": 0.5412, "step": 27649 }, { "epoch": 0.5864138618481051, "grad_norm": 0.3551274836063385, "learning_rate": 1.6068171508271756e-05, "loss": 0.4557, "step": 27650 }, { "epoch": 0.586435070306038, "grad_norm": 0.3380841016769409, "learning_rate": 1.606790642898711e-05, "loss": 0.5169, "step": 27651 }, { "epoch": 0.5864562787639711, "grad_norm": 0.3733348250389099, "learning_rate": 1.6067641342953788e-05, "loss": 0.5068, "step": 27652 }, { "epoch": 0.5864774872219041, "grad_norm": 0.34729620814323425, "learning_rate": 1.606737625017209e-05, "loss": 0.4635, "step": 27653 }, { "epoch": 0.5864986956798371, "grad_norm": 0.36897626519203186, "learning_rate": 1.606711115064231e-05, "loss": 0.503, "step": 27654 }, { "epoch": 0.5865199041377701, "grad_norm": 0.32765451073646545, "learning_rate": 1.606684604436474e-05, "loss": 0.4987, "step": 27655 }, { "epoch": 0.5865411125957032, "grad_norm": 0.374164342880249, "learning_rate": 1.6066580931339676e-05, "loss": 0.4386, "step": 27656 }, { "epoch": 0.5865623210536361, "grad_norm": 0.3515854477882385, "learning_rate": 1.6066315811567413e-05, "loss": 0.5314, "step": 27657 }, { "epoch": 0.5865835295115692, "grad_norm": 0.40032148361206055, "learning_rate": 1.6066050685048252e-05, "loss": 0.4714, "step": 27658 }, { "epoch": 0.5866047379695022, "grad_norm": 0.38717418909072876, "learning_rate": 1.6065785551782477e-05, "loss": 0.5087, "step": 27659 }, { "epoch": 0.5866259464274353, "grad_norm": 0.41316652297973633, "learning_rate": 1.606552041177039e-05, "loss": 0.4678, "step": 27660 }, { "epoch": 0.5866471548853683, "grad_norm": 0.3769185245037079, "learning_rate": 1.606525526501228e-05, "loss": 0.4992, "step": 27661 }, { "epoch": 0.5866683633433013, "grad_norm": 0.35257530212402344, "learning_rate": 1.606499011150845e-05, "loss": 0.5477, "step": 27662 }, { "epoch": 0.5866895718012344, "grad_norm": 0.3543109595775604, "learning_rate": 1.6064724951259187e-05, "loss": 0.471, "step": 27663 }, { "epoch": 0.5867107802591673, "grad_norm": 0.3423389196395874, "learning_rate": 1.6064459784264793e-05, "loss": 0.4271, "step": 27664 }, { "epoch": 0.5867319887171004, "grad_norm": 0.4133704602718353, "learning_rate": 1.6064194610525558e-05, "loss": 0.5194, "step": 27665 }, { "epoch": 0.5867531971750334, "grad_norm": 0.39031925797462463, "learning_rate": 1.6063929430041778e-05, "loss": 0.5284, "step": 27666 }, { "epoch": 0.5867744056329665, "grad_norm": 0.3836537301540375, "learning_rate": 1.606366424281375e-05, "loss": 0.432, "step": 27667 }, { "epoch": 0.5867956140908994, "grad_norm": 0.3754890561103821, "learning_rate": 1.6063399048841766e-05, "loss": 0.5285, "step": 27668 }, { "epoch": 0.5868168225488325, "grad_norm": 0.351796954870224, "learning_rate": 1.6063133848126124e-05, "loss": 0.5869, "step": 27669 }, { "epoch": 0.5868380310067655, "grad_norm": 0.31774404644966125, "learning_rate": 1.6062868640667113e-05, "loss": 0.5008, "step": 27670 }, { "epoch": 0.5868592394646985, "grad_norm": 0.4063325524330139, "learning_rate": 1.6062603426465035e-05, "loss": 0.5781, "step": 27671 }, { "epoch": 0.5868804479226315, "grad_norm": 0.3426198661327362, "learning_rate": 1.6062338205520186e-05, "loss": 0.4871, "step": 27672 }, { "epoch": 0.5869016563805646, "grad_norm": 0.3478820323944092, "learning_rate": 1.6062072977832852e-05, "loss": 0.4669, "step": 27673 }, { "epoch": 0.5869228648384975, "grad_norm": 0.35177740454673767, "learning_rate": 1.6061807743403333e-05, "loss": 0.4744, "step": 27674 }, { "epoch": 0.5869440732964306, "grad_norm": 0.38204512000083923, "learning_rate": 1.6061542502231927e-05, "loss": 0.4876, "step": 27675 }, { "epoch": 0.5869652817543637, "grad_norm": 0.33671024441719055, "learning_rate": 1.6061277254318924e-05, "loss": 0.5327, "step": 27676 }, { "epoch": 0.5869864902122967, "grad_norm": 0.36460962891578674, "learning_rate": 1.606101199966462e-05, "loss": 0.4023, "step": 27677 }, { "epoch": 0.5870076986702297, "grad_norm": 0.33980968594551086, "learning_rate": 1.6060746738269312e-05, "loss": 0.5591, "step": 27678 }, { "epoch": 0.5870289071281627, "grad_norm": 0.343648761510849, "learning_rate": 1.6060481470133296e-05, "loss": 0.429, "step": 27679 }, { "epoch": 0.5870501155860958, "grad_norm": 0.36650505661964417, "learning_rate": 1.6060216195256866e-05, "loss": 0.506, "step": 27680 }, { "epoch": 0.5870713240440287, "grad_norm": 0.30696600675582886, "learning_rate": 1.6059950913640313e-05, "loss": 0.4665, "step": 27681 }, { "epoch": 0.5870925325019618, "grad_norm": 0.3858224153518677, "learning_rate": 1.6059685625283937e-05, "loss": 0.6153, "step": 27682 }, { "epoch": 0.5871137409598948, "grad_norm": 0.3468274176120758, "learning_rate": 1.6059420330188028e-05, "loss": 0.5574, "step": 27683 }, { "epoch": 0.5871349494178278, "grad_norm": 0.3726828098297119, "learning_rate": 1.6059155028352887e-05, "loss": 0.5024, "step": 27684 }, { "epoch": 0.5871561578757608, "grad_norm": 0.3380274772644043, "learning_rate": 1.6058889719778806e-05, "loss": 0.5284, "step": 27685 }, { "epoch": 0.5871773663336939, "grad_norm": 0.35539567470550537, "learning_rate": 1.6058624404466084e-05, "loss": 0.5258, "step": 27686 }, { "epoch": 0.5871985747916268, "grad_norm": 0.4266120493412018, "learning_rate": 1.6058359082415008e-05, "loss": 0.5073, "step": 27687 }, { "epoch": 0.5872197832495599, "grad_norm": 0.6447359919548035, "learning_rate": 1.605809375362588e-05, "loss": 0.5349, "step": 27688 }, { "epoch": 0.587240991707493, "grad_norm": 0.5145888328552246, "learning_rate": 1.605782841809899e-05, "loss": 0.5818, "step": 27689 }, { "epoch": 0.587262200165426, "grad_norm": 0.3533736765384674, "learning_rate": 1.605756307583464e-05, "loss": 0.5474, "step": 27690 }, { "epoch": 0.587283408623359, "grad_norm": 0.3548114001750946, "learning_rate": 1.605729772683312e-05, "loss": 0.4832, "step": 27691 }, { "epoch": 0.587304617081292, "grad_norm": 0.36998993158340454, "learning_rate": 1.6057032371094722e-05, "loss": 0.5208, "step": 27692 }, { "epoch": 0.5873258255392251, "grad_norm": 0.35468512773513794, "learning_rate": 1.6056767008619748e-05, "loss": 0.4837, "step": 27693 }, { "epoch": 0.587347033997158, "grad_norm": 0.38614749908447266, "learning_rate": 1.6056501639408493e-05, "loss": 0.5325, "step": 27694 }, { "epoch": 0.5873682424550911, "grad_norm": 0.39116963744163513, "learning_rate": 1.6056236263461245e-05, "loss": 0.4702, "step": 27695 }, { "epoch": 0.5873894509130241, "grad_norm": 0.33806297183036804, "learning_rate": 1.6055970880778304e-05, "loss": 0.5149, "step": 27696 }, { "epoch": 0.5874106593709572, "grad_norm": 0.3975939452648163, "learning_rate": 1.6055705491359967e-05, "loss": 0.4829, "step": 27697 }, { "epoch": 0.5874318678288901, "grad_norm": 0.3475690186023712, "learning_rate": 1.605544009520653e-05, "loss": 0.5235, "step": 27698 }, { "epoch": 0.5874530762868232, "grad_norm": 0.35728901624679565, "learning_rate": 1.605517469231828e-05, "loss": 0.5085, "step": 27699 }, { "epoch": 0.5874742847447562, "grad_norm": 0.3697972893714905, "learning_rate": 1.6054909282695517e-05, "loss": 0.5407, "step": 27700 }, { "epoch": 0.5874954932026892, "grad_norm": 0.36811143159866333, "learning_rate": 1.605464386633854e-05, "loss": 0.4727, "step": 27701 }, { "epoch": 0.5875167016606223, "grad_norm": 0.3850114047527313, "learning_rate": 1.605437844324764e-05, "loss": 0.48, "step": 27702 }, { "epoch": 0.5875379101185553, "grad_norm": 0.3569246530532837, "learning_rate": 1.605411301342311e-05, "loss": 0.4613, "step": 27703 }, { "epoch": 0.5875591185764883, "grad_norm": 0.3723498582839966, "learning_rate": 1.605384757686525e-05, "loss": 0.5166, "step": 27704 }, { "epoch": 0.5875803270344213, "grad_norm": 0.41141584515571594, "learning_rate": 1.6053582133574354e-05, "loss": 0.4625, "step": 27705 }, { "epoch": 0.5876015354923544, "grad_norm": 0.33866798877716064, "learning_rate": 1.6053316683550714e-05, "loss": 0.5145, "step": 27706 }, { "epoch": 0.5876227439502874, "grad_norm": 0.3537108600139618, "learning_rate": 1.605305122679463e-05, "loss": 0.5155, "step": 27707 }, { "epoch": 0.5876439524082204, "grad_norm": 0.33691486716270447, "learning_rate": 1.6052785763306396e-05, "loss": 0.5223, "step": 27708 }, { "epoch": 0.5876651608661534, "grad_norm": 0.3662170469760895, "learning_rate": 1.6052520293086306e-05, "loss": 0.4604, "step": 27709 }, { "epoch": 0.5876863693240865, "grad_norm": 0.36714038252830505, "learning_rate": 1.6052254816134652e-05, "loss": 0.4927, "step": 27710 }, { "epoch": 0.5877075777820194, "grad_norm": 0.3344610333442688, "learning_rate": 1.6051989332451732e-05, "loss": 0.4886, "step": 27711 }, { "epoch": 0.5877287862399525, "grad_norm": 0.349927693605423, "learning_rate": 1.605172384203785e-05, "loss": 0.4534, "step": 27712 }, { "epoch": 0.5877499946978855, "grad_norm": 0.33518534898757935, "learning_rate": 1.605145834489329e-05, "loss": 0.5242, "step": 27713 }, { "epoch": 0.5877712031558185, "grad_norm": 0.371925950050354, "learning_rate": 1.6051192841018347e-05, "loss": 0.5369, "step": 27714 }, { "epoch": 0.5877924116137515, "grad_norm": 0.34099122881889343, "learning_rate": 1.605092733041332e-05, "loss": 0.4794, "step": 27715 }, { "epoch": 0.5878136200716846, "grad_norm": 0.33908843994140625, "learning_rate": 1.6050661813078505e-05, "loss": 0.4803, "step": 27716 }, { "epoch": 0.5878348285296177, "grad_norm": 0.34896859526634216, "learning_rate": 1.6050396289014198e-05, "loss": 0.5052, "step": 27717 }, { "epoch": 0.5878560369875506, "grad_norm": 0.3370022475719452, "learning_rate": 1.6050130758220692e-05, "loss": 0.4695, "step": 27718 }, { "epoch": 0.5878772454454837, "grad_norm": 0.338215172290802, "learning_rate": 1.6049865220698285e-05, "loss": 0.3788, "step": 27719 }, { "epoch": 0.5878984539034167, "grad_norm": 0.3947709798812866, "learning_rate": 1.6049599676447268e-05, "loss": 0.5612, "step": 27720 }, { "epoch": 0.5879196623613497, "grad_norm": 0.4020858407020569, "learning_rate": 1.604933412546794e-05, "loss": 0.5943, "step": 27721 }, { "epoch": 0.5879408708192827, "grad_norm": 0.3307294249534607, "learning_rate": 1.6049068567760595e-05, "loss": 0.5047, "step": 27722 }, { "epoch": 0.5879620792772158, "grad_norm": 0.3730355501174927, "learning_rate": 1.604880300332553e-05, "loss": 0.4352, "step": 27723 }, { "epoch": 0.5879832877351487, "grad_norm": 0.3189704716205597, "learning_rate": 1.6048537432163035e-05, "loss": 0.4904, "step": 27724 }, { "epoch": 0.5880044961930818, "grad_norm": 0.3545401096343994, "learning_rate": 1.6048271854273413e-05, "loss": 0.4505, "step": 27725 }, { "epoch": 0.5880257046510148, "grad_norm": 0.3329429626464844, "learning_rate": 1.6048006269656956e-05, "loss": 0.4883, "step": 27726 }, { "epoch": 0.5880469131089479, "grad_norm": 0.3597036600112915, "learning_rate": 1.6047740678313957e-05, "loss": 0.5053, "step": 27727 }, { "epoch": 0.5880681215668808, "grad_norm": 0.402498334646225, "learning_rate": 1.604747508024471e-05, "loss": 0.5681, "step": 27728 }, { "epoch": 0.5880893300248139, "grad_norm": 0.4161544442176819, "learning_rate": 1.604720947544952e-05, "loss": 0.4615, "step": 27729 }, { "epoch": 0.588110538482747, "grad_norm": 0.34291771054267883, "learning_rate": 1.604694386392867e-05, "loss": 0.4791, "step": 27730 }, { "epoch": 0.5881317469406799, "grad_norm": 0.36562615633010864, "learning_rate": 1.604667824568247e-05, "loss": 0.5082, "step": 27731 }, { "epoch": 0.588152955398613, "grad_norm": 0.3102472126483917, "learning_rate": 1.6046412620711197e-05, "loss": 0.4954, "step": 27732 }, { "epoch": 0.588174163856546, "grad_norm": 0.37970519065856934, "learning_rate": 1.6046146989015162e-05, "loss": 0.4596, "step": 27733 }, { "epoch": 0.588195372314479, "grad_norm": 0.3390553891658783, "learning_rate": 1.6045881350594655e-05, "loss": 0.5038, "step": 27734 }, { "epoch": 0.588216580772412, "grad_norm": 0.3643184006214142, "learning_rate": 1.604561570544997e-05, "loss": 0.5057, "step": 27735 }, { "epoch": 0.5882377892303451, "grad_norm": 0.3361237943172455, "learning_rate": 1.6045350053581404e-05, "loss": 0.5159, "step": 27736 }, { "epoch": 0.588258997688278, "grad_norm": 0.35365110635757446, "learning_rate": 1.604508439498925e-05, "loss": 0.5023, "step": 27737 }, { "epoch": 0.5882802061462111, "grad_norm": 0.4181225895881653, "learning_rate": 1.604481872967381e-05, "loss": 0.4266, "step": 27738 }, { "epoch": 0.5883014146041441, "grad_norm": 0.3518834412097931, "learning_rate": 1.6044553057635375e-05, "loss": 0.5397, "step": 27739 }, { "epoch": 0.5883226230620772, "grad_norm": 0.44156837463378906, "learning_rate": 1.604428737887424e-05, "loss": 0.4946, "step": 27740 }, { "epoch": 0.5883438315200101, "grad_norm": 0.39521118998527527, "learning_rate": 1.6044021693390693e-05, "loss": 0.4337, "step": 27741 }, { "epoch": 0.5883650399779432, "grad_norm": 0.3459957540035248, "learning_rate": 1.6043756001185047e-05, "loss": 0.4786, "step": 27742 }, { "epoch": 0.5883862484358763, "grad_norm": 0.33620989322662354, "learning_rate": 1.6043490302257584e-05, "loss": 0.4821, "step": 27743 }, { "epoch": 0.5884074568938092, "grad_norm": 0.3474283516407013, "learning_rate": 1.604322459660861e-05, "loss": 0.4362, "step": 27744 }, { "epoch": 0.5884286653517423, "grad_norm": 0.3512042760848999, "learning_rate": 1.6042958884238406e-05, "loss": 0.5433, "step": 27745 }, { "epoch": 0.5884498738096753, "grad_norm": 0.3353448212146759, "learning_rate": 1.604269316514728e-05, "loss": 0.4744, "step": 27746 }, { "epoch": 0.5884710822676084, "grad_norm": 0.3548332750797272, "learning_rate": 1.604242743933552e-05, "loss": 0.579, "step": 27747 }, { "epoch": 0.5884922907255413, "grad_norm": 0.41785940527915955, "learning_rate": 1.6042161706803424e-05, "loss": 0.5112, "step": 27748 }, { "epoch": 0.5885134991834744, "grad_norm": 0.49197524785995483, "learning_rate": 1.6041895967551295e-05, "loss": 0.5115, "step": 27749 }, { "epoch": 0.5885347076414074, "grad_norm": 0.372824102640152, "learning_rate": 1.6041630221579416e-05, "loss": 0.5162, "step": 27750 }, { "epoch": 0.5885559160993404, "grad_norm": 0.4194640815258026, "learning_rate": 1.6041364468888092e-05, "loss": 0.5583, "step": 27751 }, { "epoch": 0.5885771245572734, "grad_norm": 0.3782215714454651, "learning_rate": 1.604109870947761e-05, "loss": 0.4689, "step": 27752 }, { "epoch": 0.5885983330152065, "grad_norm": 0.38803988695144653, "learning_rate": 1.6040832943348276e-05, "loss": 0.5011, "step": 27753 }, { "epoch": 0.5886195414731394, "grad_norm": 0.3331664502620697, "learning_rate": 1.6040567170500377e-05, "loss": 0.4834, "step": 27754 }, { "epoch": 0.5886407499310725, "grad_norm": 0.49004197120666504, "learning_rate": 1.6040301390934212e-05, "loss": 0.4705, "step": 27755 }, { "epoch": 0.5886619583890056, "grad_norm": 0.33102887868881226, "learning_rate": 1.6040035604650076e-05, "loss": 0.4726, "step": 27756 }, { "epoch": 0.5886831668469386, "grad_norm": 0.37719449400901794, "learning_rate": 1.6039769811648265e-05, "loss": 0.4597, "step": 27757 }, { "epoch": 0.5887043753048716, "grad_norm": 0.3632343113422394, "learning_rate": 1.6039504011929075e-05, "loss": 0.5419, "step": 27758 }, { "epoch": 0.5887255837628046, "grad_norm": 0.39138880372047424, "learning_rate": 1.6039238205492803e-05, "loss": 0.5023, "step": 27759 }, { "epoch": 0.5887467922207377, "grad_norm": 0.32112279534339905, "learning_rate": 1.603897239233974e-05, "loss": 0.5499, "step": 27760 }, { "epoch": 0.5887680006786706, "grad_norm": 0.35976263880729675, "learning_rate": 1.6038706572470185e-05, "loss": 0.395, "step": 27761 }, { "epoch": 0.5887892091366037, "grad_norm": 0.3536219894886017, "learning_rate": 1.603844074588443e-05, "loss": 0.5389, "step": 27762 }, { "epoch": 0.5888104175945367, "grad_norm": 0.3734339475631714, "learning_rate": 1.6038174912582774e-05, "loss": 0.5256, "step": 27763 }, { "epoch": 0.5888316260524697, "grad_norm": 0.35988113284111023, "learning_rate": 1.6037909072565518e-05, "loss": 0.54, "step": 27764 }, { "epoch": 0.5888528345104027, "grad_norm": 0.33686837553977966, "learning_rate": 1.6037643225832948e-05, "loss": 0.4809, "step": 27765 }, { "epoch": 0.5888740429683358, "grad_norm": 0.3418870270252228, "learning_rate": 1.6037377372385364e-05, "loss": 0.4396, "step": 27766 }, { "epoch": 0.5888952514262688, "grad_norm": 0.3509811758995056, "learning_rate": 1.6037111512223066e-05, "loss": 0.505, "step": 27767 }, { "epoch": 0.5889164598842018, "grad_norm": 0.3909209370613098, "learning_rate": 1.6036845645346337e-05, "loss": 0.4915, "step": 27768 }, { "epoch": 0.5889376683421348, "grad_norm": 0.4040369689464569, "learning_rate": 1.6036579771755487e-05, "loss": 0.4462, "step": 27769 }, { "epoch": 0.5889588768000679, "grad_norm": 0.359878271818161, "learning_rate": 1.6036313891450803e-05, "loss": 0.4683, "step": 27770 }, { "epoch": 0.5889800852580009, "grad_norm": 0.426972895860672, "learning_rate": 1.6036048004432583e-05, "loss": 0.4096, "step": 27771 }, { "epoch": 0.5890012937159339, "grad_norm": 0.34721407294273376, "learning_rate": 1.6035782110701125e-05, "loss": 0.4797, "step": 27772 }, { "epoch": 0.589022502173867, "grad_norm": 0.3607008755207062, "learning_rate": 1.6035516210256715e-05, "loss": 0.545, "step": 27773 }, { "epoch": 0.5890437106317999, "grad_norm": 0.37339910864830017, "learning_rate": 1.6035250303099666e-05, "loss": 0.5166, "step": 27774 }, { "epoch": 0.589064919089733, "grad_norm": 0.5703606009483337, "learning_rate": 1.6034984389230258e-05, "loss": 0.54, "step": 27775 }, { "epoch": 0.589086127547666, "grad_norm": 0.3718121349811554, "learning_rate": 1.6034718468648795e-05, "loss": 0.5648, "step": 27776 }, { "epoch": 0.5891073360055991, "grad_norm": 0.38939470052719116, "learning_rate": 1.603445254135557e-05, "loss": 0.4637, "step": 27777 }, { "epoch": 0.589128544463532, "grad_norm": 0.3959733247756958, "learning_rate": 1.603418660735088e-05, "loss": 0.4636, "step": 27778 }, { "epoch": 0.5891497529214651, "grad_norm": 0.39007672667503357, "learning_rate": 1.6033920666635018e-05, "loss": 0.6335, "step": 27779 }, { "epoch": 0.5891709613793981, "grad_norm": 0.5445965528488159, "learning_rate": 1.6033654719208285e-05, "loss": 0.5539, "step": 27780 }, { "epoch": 0.5891921698373311, "grad_norm": 0.3614044785499573, "learning_rate": 1.603338876507097e-05, "loss": 0.5204, "step": 27781 }, { "epoch": 0.5892133782952641, "grad_norm": 0.3643363416194916, "learning_rate": 1.6033122804223378e-05, "loss": 0.4465, "step": 27782 }, { "epoch": 0.5892345867531972, "grad_norm": 0.41620007157325745, "learning_rate": 1.6032856836665796e-05, "loss": 0.5301, "step": 27783 }, { "epoch": 0.5892557952111303, "grad_norm": 0.32296523451805115, "learning_rate": 1.6032590862398524e-05, "loss": 0.5447, "step": 27784 }, { "epoch": 0.5892770036690632, "grad_norm": 0.37796610593795776, "learning_rate": 1.6032324881421853e-05, "loss": 0.5449, "step": 27785 }, { "epoch": 0.5892982121269963, "grad_norm": 0.6924338340759277, "learning_rate": 1.6032058893736084e-05, "loss": 0.5155, "step": 27786 }, { "epoch": 0.5893194205849293, "grad_norm": 0.35578665137290955, "learning_rate": 1.6031792899341514e-05, "loss": 0.5784, "step": 27787 }, { "epoch": 0.5893406290428623, "grad_norm": 0.4151741564273834, "learning_rate": 1.6031526898238435e-05, "loss": 0.4587, "step": 27788 }, { "epoch": 0.5893618375007953, "grad_norm": 0.34483471512794495, "learning_rate": 1.6031260890427144e-05, "loss": 0.5248, "step": 27789 }, { "epoch": 0.5893830459587284, "grad_norm": 0.3662181794643402, "learning_rate": 1.6030994875907938e-05, "loss": 0.5089, "step": 27790 }, { "epoch": 0.5894042544166613, "grad_norm": 0.3088931441307068, "learning_rate": 1.603072885468111e-05, "loss": 0.471, "step": 27791 }, { "epoch": 0.5894254628745944, "grad_norm": 0.4017143249511719, "learning_rate": 1.603046282674696e-05, "loss": 0.5781, "step": 27792 }, { "epoch": 0.5894466713325274, "grad_norm": 0.3716675937175751, "learning_rate": 1.6030196792105778e-05, "loss": 0.4824, "step": 27793 }, { "epoch": 0.5894678797904604, "grad_norm": 0.446919322013855, "learning_rate": 1.6029930750757867e-05, "loss": 0.555, "step": 27794 }, { "epoch": 0.5894890882483934, "grad_norm": 0.4270080327987671, "learning_rate": 1.602966470270352e-05, "loss": 0.495, "step": 27795 }, { "epoch": 0.5895102967063265, "grad_norm": 0.3566458225250244, "learning_rate": 1.602939864794303e-05, "loss": 0.5218, "step": 27796 }, { "epoch": 0.5895315051642596, "grad_norm": 0.4261372983455658, "learning_rate": 1.6029132586476698e-05, "loss": 0.5631, "step": 27797 }, { "epoch": 0.5895527136221925, "grad_norm": 0.41075336933135986, "learning_rate": 1.6028866518304816e-05, "loss": 0.5068, "step": 27798 }, { "epoch": 0.5895739220801256, "grad_norm": 0.335334450006485, "learning_rate": 1.6028600443427678e-05, "loss": 0.4006, "step": 27799 }, { "epoch": 0.5895951305380586, "grad_norm": 0.374914288520813, "learning_rate": 1.6028334361845587e-05, "loss": 0.5132, "step": 27800 }, { "epoch": 0.5896163389959916, "grad_norm": 0.35359904170036316, "learning_rate": 1.6028068273558834e-05, "loss": 0.5426, "step": 27801 }, { "epoch": 0.5896375474539246, "grad_norm": 0.36171677708625793, "learning_rate": 1.6027802178567713e-05, "loss": 0.4891, "step": 27802 }, { "epoch": 0.5896587559118577, "grad_norm": 0.34979552030563354, "learning_rate": 1.6027536076872526e-05, "loss": 0.4911, "step": 27803 }, { "epoch": 0.5896799643697906, "grad_norm": 0.3558416962623596, "learning_rate": 1.6027269968473565e-05, "loss": 0.4633, "step": 27804 }, { "epoch": 0.5897011728277237, "grad_norm": 0.35306769609451294, "learning_rate": 1.6027003853371124e-05, "loss": 0.5229, "step": 27805 }, { "epoch": 0.5897223812856567, "grad_norm": 0.35913029313087463, "learning_rate": 1.6026737731565502e-05, "loss": 0.4947, "step": 27806 }, { "epoch": 0.5897435897435898, "grad_norm": 0.3523709774017334, "learning_rate": 1.6026471603056996e-05, "loss": 0.5292, "step": 27807 }, { "epoch": 0.5897647982015227, "grad_norm": 0.35336917638778687, "learning_rate": 1.6026205467845902e-05, "loss": 0.5378, "step": 27808 }, { "epoch": 0.5897860066594558, "grad_norm": 0.35644087195396423, "learning_rate": 1.6025939325932513e-05, "loss": 0.4436, "step": 27809 }, { "epoch": 0.5898072151173888, "grad_norm": 0.4036785066127777, "learning_rate": 1.602567317731713e-05, "loss": 0.5099, "step": 27810 }, { "epoch": 0.5898284235753218, "grad_norm": 0.4233858287334442, "learning_rate": 1.602540702200004e-05, "loss": 0.5707, "step": 27811 }, { "epoch": 0.5898496320332549, "grad_norm": 0.3658055067062378, "learning_rate": 1.602514085998155e-05, "loss": 0.5015, "step": 27812 }, { "epoch": 0.5898708404911879, "grad_norm": 0.7111803889274597, "learning_rate": 1.602487469126195e-05, "loss": 0.5419, "step": 27813 }, { "epoch": 0.589892048949121, "grad_norm": 0.40018394589424133, "learning_rate": 1.6024608515841534e-05, "loss": 0.5257, "step": 27814 }, { "epoch": 0.5899132574070539, "grad_norm": 0.38730308413505554, "learning_rate": 1.6024342333720603e-05, "loss": 0.5404, "step": 27815 }, { "epoch": 0.589934465864987, "grad_norm": 0.3763054609298706, "learning_rate": 1.602407614489945e-05, "loss": 0.4708, "step": 27816 }, { "epoch": 0.58995567432292, "grad_norm": 0.3237363398075104, "learning_rate": 1.6023809949378368e-05, "loss": 0.4824, "step": 27817 }, { "epoch": 0.589976882780853, "grad_norm": 0.33543235063552856, "learning_rate": 1.602354374715766e-05, "loss": 0.4622, "step": 27818 }, { "epoch": 0.589998091238786, "grad_norm": 0.3149167001247406, "learning_rate": 1.602327753823762e-05, "loss": 0.4169, "step": 27819 }, { "epoch": 0.5900192996967191, "grad_norm": 0.3638822138309479, "learning_rate": 1.602301132261854e-05, "loss": 0.587, "step": 27820 }, { "epoch": 0.590040508154652, "grad_norm": 0.3520623445510864, "learning_rate": 1.6022745100300723e-05, "loss": 0.5053, "step": 27821 }, { "epoch": 0.5900617166125851, "grad_norm": 0.34998825192451477, "learning_rate": 1.602247887128446e-05, "loss": 0.5301, "step": 27822 }, { "epoch": 0.5900829250705181, "grad_norm": 0.3441276550292969, "learning_rate": 1.602221263557005e-05, "loss": 0.5168, "step": 27823 }, { "epoch": 0.5901041335284511, "grad_norm": 0.3457344174385071, "learning_rate": 1.6021946393157782e-05, "loss": 0.5109, "step": 27824 }, { "epoch": 0.5901253419863842, "grad_norm": 0.4234525263309479, "learning_rate": 1.6021680144047963e-05, "loss": 0.5586, "step": 27825 }, { "epoch": 0.5901465504443172, "grad_norm": 0.36271703243255615, "learning_rate": 1.602141388824088e-05, "loss": 0.4717, "step": 27826 }, { "epoch": 0.5901677589022503, "grad_norm": 0.37153321504592896, "learning_rate": 1.6021147625736835e-05, "loss": 0.4869, "step": 27827 }, { "epoch": 0.5901889673601832, "grad_norm": 0.333846777677536, "learning_rate": 1.6020881356536123e-05, "loss": 0.5113, "step": 27828 }, { "epoch": 0.5902101758181163, "grad_norm": 0.34363406896591187, "learning_rate": 1.6020615080639036e-05, "loss": 0.4799, "step": 27829 }, { "epoch": 0.5902313842760493, "grad_norm": 0.33745837211608887, "learning_rate": 1.6020348798045872e-05, "loss": 0.4592, "step": 27830 }, { "epoch": 0.5902525927339823, "grad_norm": 0.3513262867927551, "learning_rate": 1.6020082508756933e-05, "loss": 0.5125, "step": 27831 }, { "epoch": 0.5902738011919153, "grad_norm": 0.36285099387168884, "learning_rate": 1.6019816212772507e-05, "loss": 0.5019, "step": 27832 }, { "epoch": 0.5902950096498484, "grad_norm": 0.37015634775161743, "learning_rate": 1.6019549910092895e-05, "loss": 0.474, "step": 27833 }, { "epoch": 0.5903162181077813, "grad_norm": 0.39880499243736267, "learning_rate": 1.6019283600718393e-05, "loss": 0.4644, "step": 27834 }, { "epoch": 0.5903374265657144, "grad_norm": 0.4102707505226135, "learning_rate": 1.6019017284649298e-05, "loss": 0.5816, "step": 27835 }, { "epoch": 0.5903586350236474, "grad_norm": 0.32020407915115356, "learning_rate": 1.60187509618859e-05, "loss": 0.4493, "step": 27836 }, { "epoch": 0.5903798434815805, "grad_norm": 0.37015220522880554, "learning_rate": 1.60184846324285e-05, "loss": 0.4549, "step": 27837 }, { "epoch": 0.5904010519395135, "grad_norm": 0.33092185854911804, "learning_rate": 1.601821829627739e-05, "loss": 0.5105, "step": 27838 }, { "epoch": 0.5904222603974465, "grad_norm": 0.3313685953617096, "learning_rate": 1.601795195343288e-05, "loss": 0.5894, "step": 27839 }, { "epoch": 0.5904434688553796, "grad_norm": 0.36605769395828247, "learning_rate": 1.601768560389525e-05, "loss": 0.5126, "step": 27840 }, { "epoch": 0.5904646773133125, "grad_norm": 0.37272894382476807, "learning_rate": 1.6017419247664802e-05, "loss": 0.5069, "step": 27841 }, { "epoch": 0.5904858857712456, "grad_norm": 0.42479056119918823, "learning_rate": 1.6017152884741833e-05, "loss": 0.5083, "step": 27842 }, { "epoch": 0.5905070942291786, "grad_norm": 0.32377755641937256, "learning_rate": 1.601688651512664e-05, "loss": 0.4275, "step": 27843 }, { "epoch": 0.5905283026871117, "grad_norm": 0.4224981963634491, "learning_rate": 1.6016620138819518e-05, "loss": 0.5013, "step": 27844 }, { "epoch": 0.5905495111450446, "grad_norm": 0.3874465525150299, "learning_rate": 1.6016353755820763e-05, "loss": 0.4877, "step": 27845 }, { "epoch": 0.5905707196029777, "grad_norm": 0.37847021222114563, "learning_rate": 1.6016087366130668e-05, "loss": 0.5079, "step": 27846 }, { "epoch": 0.5905919280609107, "grad_norm": 0.37377455830574036, "learning_rate": 1.601582096974954e-05, "loss": 0.4448, "step": 27847 }, { "epoch": 0.5906131365188437, "grad_norm": 0.379077285528183, "learning_rate": 1.6015554566677663e-05, "loss": 0.4789, "step": 27848 }, { "epoch": 0.5906343449767767, "grad_norm": 0.3470788598060608, "learning_rate": 1.601528815691534e-05, "loss": 0.4889, "step": 27849 }, { "epoch": 0.5906555534347098, "grad_norm": 0.38640037178993225, "learning_rate": 1.6015021740462862e-05, "loss": 0.5285, "step": 27850 }, { "epoch": 0.5906767618926427, "grad_norm": 0.39324715733528137, "learning_rate": 1.6014755317320532e-05, "loss": 0.4672, "step": 27851 }, { "epoch": 0.5906979703505758, "grad_norm": 0.3566764295101166, "learning_rate": 1.6014488887488648e-05, "loss": 0.5509, "step": 27852 }, { "epoch": 0.5907191788085089, "grad_norm": 0.3614944517612457, "learning_rate": 1.6014222450967497e-05, "loss": 0.5064, "step": 27853 }, { "epoch": 0.5907403872664418, "grad_norm": 0.36378300189971924, "learning_rate": 1.6013956007757384e-05, "loss": 0.4878, "step": 27854 }, { "epoch": 0.5907615957243749, "grad_norm": 0.3353674113750458, "learning_rate": 1.6013689557858595e-05, "loss": 0.434, "step": 27855 }, { "epoch": 0.5907828041823079, "grad_norm": 0.33228057622909546, "learning_rate": 1.6013423101271433e-05, "loss": 0.4839, "step": 27856 }, { "epoch": 0.590804012640241, "grad_norm": 0.3611438572406769, "learning_rate": 1.6013156637996198e-05, "loss": 0.4218, "step": 27857 }, { "epoch": 0.5908252210981739, "grad_norm": 0.33268487453460693, "learning_rate": 1.6012890168033183e-05, "loss": 0.4499, "step": 27858 }, { "epoch": 0.590846429556107, "grad_norm": 0.3502671718597412, "learning_rate": 1.6012623691382684e-05, "loss": 0.4721, "step": 27859 }, { "epoch": 0.59086763801404, "grad_norm": 0.3353406488895416, "learning_rate": 1.6012357208044992e-05, "loss": 0.5506, "step": 27860 }, { "epoch": 0.590888846471973, "grad_norm": 0.3357187509536743, "learning_rate": 1.6012090718020415e-05, "loss": 0.5021, "step": 27861 }, { "epoch": 0.590910054929906, "grad_norm": 0.35329750180244446, "learning_rate": 1.601182422130924e-05, "loss": 0.4654, "step": 27862 }, { "epoch": 0.5909312633878391, "grad_norm": 0.3465728461742401, "learning_rate": 1.6011557717911763e-05, "loss": 0.4443, "step": 27863 }, { "epoch": 0.590952471845772, "grad_norm": 0.3665030896663666, "learning_rate": 1.601129120782829e-05, "loss": 0.487, "step": 27864 }, { "epoch": 0.5909736803037051, "grad_norm": 0.36255791783332825, "learning_rate": 1.6011024691059106e-05, "loss": 0.4707, "step": 27865 }, { "epoch": 0.5909948887616382, "grad_norm": 0.3561830222606659, "learning_rate": 1.6010758167604518e-05, "loss": 0.4954, "step": 27866 }, { "epoch": 0.5910160972195712, "grad_norm": 0.3583027720451355, "learning_rate": 1.601049163746481e-05, "loss": 0.5065, "step": 27867 }, { "epoch": 0.5910373056775042, "grad_norm": 0.3447134792804718, "learning_rate": 1.601022510064029e-05, "loss": 0.5604, "step": 27868 }, { "epoch": 0.5910585141354372, "grad_norm": 0.3127230703830719, "learning_rate": 1.6009958557131248e-05, "loss": 0.441, "step": 27869 }, { "epoch": 0.5910797225933703, "grad_norm": 0.3338382840156555, "learning_rate": 1.600969200693798e-05, "loss": 0.4992, "step": 27870 }, { "epoch": 0.5911009310513032, "grad_norm": 0.36887630820274353, "learning_rate": 1.6009425450060793e-05, "loss": 0.4973, "step": 27871 }, { "epoch": 0.5911221395092363, "grad_norm": 0.43825188279151917, "learning_rate": 1.600915888649997e-05, "loss": 0.468, "step": 27872 }, { "epoch": 0.5911433479671693, "grad_norm": 0.34643325209617615, "learning_rate": 1.600889231625581e-05, "loss": 0.4649, "step": 27873 }, { "epoch": 0.5911645564251023, "grad_norm": 0.3909362554550171, "learning_rate": 1.6008625739328615e-05, "loss": 0.5448, "step": 27874 }, { "epoch": 0.5911857648830353, "grad_norm": 0.42577874660491943, "learning_rate": 1.600835915571868e-05, "loss": 0.6145, "step": 27875 }, { "epoch": 0.5912069733409684, "grad_norm": 0.35468268394470215, "learning_rate": 1.6008092565426298e-05, "loss": 0.4843, "step": 27876 }, { "epoch": 0.5912281817989014, "grad_norm": 0.31666117906570435, "learning_rate": 1.600782596845177e-05, "loss": 0.4153, "step": 27877 }, { "epoch": 0.5912493902568344, "grad_norm": 0.34238678216934204, "learning_rate": 1.600755936479539e-05, "loss": 0.4966, "step": 27878 }, { "epoch": 0.5912705987147675, "grad_norm": 0.39433860778808594, "learning_rate": 1.6007292754457453e-05, "loss": 0.5703, "step": 27879 }, { "epoch": 0.5912918071727005, "grad_norm": 0.3864765763282776, "learning_rate": 1.6007026137438257e-05, "loss": 0.5243, "step": 27880 }, { "epoch": 0.5913130156306335, "grad_norm": 0.3412263095378876, "learning_rate": 1.60067595137381e-05, "loss": 0.3816, "step": 27881 }, { "epoch": 0.5913342240885665, "grad_norm": 0.40489596128463745, "learning_rate": 1.6006492883357272e-05, "loss": 0.4856, "step": 27882 }, { "epoch": 0.5913554325464996, "grad_norm": 0.3760471045970917, "learning_rate": 1.600622624629608e-05, "loss": 0.4687, "step": 27883 }, { "epoch": 0.5913766410044325, "grad_norm": 0.41057953238487244, "learning_rate": 1.6005959602554814e-05, "loss": 0.4718, "step": 27884 }, { "epoch": 0.5913978494623656, "grad_norm": 0.3781242370605469, "learning_rate": 1.6005692952133774e-05, "loss": 0.5009, "step": 27885 }, { "epoch": 0.5914190579202986, "grad_norm": 0.3578261733055115, "learning_rate": 1.6005426295033257e-05, "loss": 0.4907, "step": 27886 }, { "epoch": 0.5914402663782317, "grad_norm": 0.34206926822662354, "learning_rate": 1.600515963125355e-05, "loss": 0.4621, "step": 27887 }, { "epoch": 0.5914614748361646, "grad_norm": 0.30758902430534363, "learning_rate": 1.600489296079496e-05, "loss": 0.4673, "step": 27888 }, { "epoch": 0.5914826832940977, "grad_norm": 0.41596344113349915, "learning_rate": 1.600462628365778e-05, "loss": 0.5744, "step": 27889 }, { "epoch": 0.5915038917520307, "grad_norm": 0.36432796716690063, "learning_rate": 1.6004359599842306e-05, "loss": 0.5003, "step": 27890 }, { "epoch": 0.5915251002099637, "grad_norm": 0.3855915069580078, "learning_rate": 1.6004092909348835e-05, "loss": 0.5566, "step": 27891 }, { "epoch": 0.5915463086678967, "grad_norm": 0.3158933222293854, "learning_rate": 1.6003826212177666e-05, "loss": 0.5148, "step": 27892 }, { "epoch": 0.5915675171258298, "grad_norm": 0.3767562806606293, "learning_rate": 1.600355950832909e-05, "loss": 0.454, "step": 27893 }, { "epoch": 0.5915887255837629, "grad_norm": 0.36042723059654236, "learning_rate": 1.600329279780341e-05, "loss": 0.5117, "step": 27894 }, { "epoch": 0.5916099340416958, "grad_norm": 0.33837196230888367, "learning_rate": 1.600302608060092e-05, "loss": 0.5202, "step": 27895 }, { "epoch": 0.5916311424996289, "grad_norm": 0.3708939254283905, "learning_rate": 1.600275935672192e-05, "loss": 0.488, "step": 27896 }, { "epoch": 0.5916523509575619, "grad_norm": 0.35855382680892944, "learning_rate": 1.6002492626166697e-05, "loss": 0.47, "step": 27897 }, { "epoch": 0.5916735594154949, "grad_norm": 0.3586133122444153, "learning_rate": 1.6002225888935558e-05, "loss": 0.4382, "step": 27898 }, { "epoch": 0.5916947678734279, "grad_norm": 0.4138050079345703, "learning_rate": 1.6001959145028793e-05, "loss": 0.4049, "step": 27899 }, { "epoch": 0.591715976331361, "grad_norm": 0.3048408031463623, "learning_rate": 1.60016923944467e-05, "loss": 0.5135, "step": 27900 }, { "epoch": 0.5917371847892939, "grad_norm": 1.4466207027435303, "learning_rate": 1.600142563718958e-05, "loss": 0.5196, "step": 27901 }, { "epoch": 0.591758393247227, "grad_norm": 0.3261694014072418, "learning_rate": 1.6001158873257725e-05, "loss": 0.4577, "step": 27902 }, { "epoch": 0.59177960170516, "grad_norm": 0.3540970981121063, "learning_rate": 1.6000892102651434e-05, "loss": 0.5049, "step": 27903 }, { "epoch": 0.591800810163093, "grad_norm": 0.37225985527038574, "learning_rate": 1.6000625325371002e-05, "loss": 0.5181, "step": 27904 }, { "epoch": 0.591822018621026, "grad_norm": 0.3866095542907715, "learning_rate": 1.6000358541416726e-05, "loss": 0.4636, "step": 27905 }, { "epoch": 0.5918432270789591, "grad_norm": 0.35321879386901855, "learning_rate": 1.6000091750788905e-05, "loss": 0.5121, "step": 27906 }, { "epoch": 0.5918644355368922, "grad_norm": 0.36269301176071167, "learning_rate": 1.5999824953487836e-05, "loss": 0.5457, "step": 27907 }, { "epoch": 0.5918856439948251, "grad_norm": 0.3649248778820038, "learning_rate": 1.599955814951381e-05, "loss": 0.5572, "step": 27908 }, { "epoch": 0.5919068524527582, "grad_norm": 0.3196624219417572, "learning_rate": 1.5999291338867128e-05, "loss": 0.4024, "step": 27909 }, { "epoch": 0.5919280609106912, "grad_norm": 0.34882423281669617, "learning_rate": 1.599902452154809e-05, "loss": 0.5268, "step": 27910 }, { "epoch": 0.5919492693686242, "grad_norm": 0.4463310241699219, "learning_rate": 1.5998757697556985e-05, "loss": 0.48, "step": 27911 }, { "epoch": 0.5919704778265572, "grad_norm": 0.40073901414871216, "learning_rate": 1.5998490866894117e-05, "loss": 0.4476, "step": 27912 }, { "epoch": 0.5919916862844903, "grad_norm": 0.3279511332511902, "learning_rate": 1.5998224029559777e-05, "loss": 0.4486, "step": 27913 }, { "epoch": 0.5920128947424232, "grad_norm": 0.36487582325935364, "learning_rate": 1.5997957185554264e-05, "loss": 0.4813, "step": 27914 }, { "epoch": 0.5920341032003563, "grad_norm": 0.3645111620426178, "learning_rate": 1.5997690334877874e-05, "loss": 0.4424, "step": 27915 }, { "epoch": 0.5920553116582893, "grad_norm": 0.35203325748443604, "learning_rate": 1.5997423477530907e-05, "loss": 0.4696, "step": 27916 }, { "epoch": 0.5920765201162224, "grad_norm": 0.39701592922210693, "learning_rate": 1.599715661351366e-05, "loss": 0.5182, "step": 27917 }, { "epoch": 0.5920977285741553, "grad_norm": 0.3223141133785248, "learning_rate": 1.5996889742826427e-05, "loss": 0.5007, "step": 27918 }, { "epoch": 0.5921189370320884, "grad_norm": 0.4028913378715515, "learning_rate": 1.5996622865469502e-05, "loss": 0.5224, "step": 27919 }, { "epoch": 0.5921401454900215, "grad_norm": 0.3284616768360138, "learning_rate": 1.5996355981443186e-05, "loss": 0.3845, "step": 27920 }, { "epoch": 0.5921613539479544, "grad_norm": 0.528440535068512, "learning_rate": 1.5996089090747777e-05, "loss": 0.4661, "step": 27921 }, { "epoch": 0.5921825624058875, "grad_norm": 0.33370521664619446, "learning_rate": 1.599582219338357e-05, "loss": 0.5323, "step": 27922 }, { "epoch": 0.5922037708638205, "grad_norm": 0.3574221134185791, "learning_rate": 1.599555528935086e-05, "loss": 0.4347, "step": 27923 }, { "epoch": 0.5922249793217536, "grad_norm": 0.3682478666305542, "learning_rate": 1.5995288378649948e-05, "loss": 0.6466, "step": 27924 }, { "epoch": 0.5922461877796865, "grad_norm": 0.37478306889533997, "learning_rate": 1.5995021461281125e-05, "loss": 0.496, "step": 27925 }, { "epoch": 0.5922673962376196, "grad_norm": 0.3582492768764496, "learning_rate": 1.5994754537244693e-05, "loss": 0.5259, "step": 27926 }, { "epoch": 0.5922886046955526, "grad_norm": 0.40789374709129333, "learning_rate": 1.5994487606540942e-05, "loss": 0.5586, "step": 27927 }, { "epoch": 0.5923098131534856, "grad_norm": 0.3246845602989197, "learning_rate": 1.599422066917018e-05, "loss": 0.4874, "step": 27928 }, { "epoch": 0.5923310216114186, "grad_norm": 0.44982457160949707, "learning_rate": 1.5993953725132696e-05, "loss": 0.5373, "step": 27929 }, { "epoch": 0.5923522300693517, "grad_norm": 0.3018895983695984, "learning_rate": 1.599368677442879e-05, "loss": 0.4536, "step": 27930 }, { "epoch": 0.5923734385272846, "grad_norm": 0.32956716418266296, "learning_rate": 1.599341981705876e-05, "loss": 0.4202, "step": 27931 }, { "epoch": 0.5923946469852177, "grad_norm": 0.3563518822193146, "learning_rate": 1.5993152853022893e-05, "loss": 0.5116, "step": 27932 }, { "epoch": 0.5924158554431507, "grad_norm": 0.35163652896881104, "learning_rate": 1.5992885882321498e-05, "loss": 0.4547, "step": 27933 }, { "epoch": 0.5924370639010837, "grad_norm": 0.39868301153182983, "learning_rate": 1.5992618904954865e-05, "loss": 0.4531, "step": 27934 }, { "epoch": 0.5924582723590168, "grad_norm": 0.3415434658527374, "learning_rate": 1.5992351920923296e-05, "loss": 0.4965, "step": 27935 }, { "epoch": 0.5924794808169498, "grad_norm": 0.3831188976764679, "learning_rate": 1.5992084930227085e-05, "loss": 0.4331, "step": 27936 }, { "epoch": 0.5925006892748829, "grad_norm": 0.3513660430908203, "learning_rate": 1.5991817932866528e-05, "loss": 0.5009, "step": 27937 }, { "epoch": 0.5925218977328158, "grad_norm": 0.35483667254447937, "learning_rate": 1.5991550928841925e-05, "loss": 0.4823, "step": 27938 }, { "epoch": 0.5925431061907489, "grad_norm": 0.3911248445510864, "learning_rate": 1.599128391815357e-05, "loss": 0.5501, "step": 27939 }, { "epoch": 0.5925643146486819, "grad_norm": 0.3342723250389099, "learning_rate": 1.599101690080176e-05, "loss": 0.5206, "step": 27940 }, { "epoch": 0.5925855231066149, "grad_norm": 0.41326043009757996, "learning_rate": 1.5990749876786792e-05, "loss": 0.556, "step": 27941 }, { "epoch": 0.5926067315645479, "grad_norm": 0.3267214298248291, "learning_rate": 1.599048284610897e-05, "loss": 0.433, "step": 27942 }, { "epoch": 0.592627940022481, "grad_norm": 0.34809306263923645, "learning_rate": 1.599021580876858e-05, "loss": 0.5189, "step": 27943 }, { "epoch": 0.592649148480414, "grad_norm": 0.327042818069458, "learning_rate": 1.5989948764765925e-05, "loss": 0.4634, "step": 27944 }, { "epoch": 0.592670356938347, "grad_norm": 0.30380311608314514, "learning_rate": 1.5989681714101303e-05, "loss": 0.4642, "step": 27945 }, { "epoch": 0.59269156539628, "grad_norm": 0.38097748160362244, "learning_rate": 1.5989414656775007e-05, "loss": 0.5473, "step": 27946 }, { "epoch": 0.5927127738542131, "grad_norm": 0.33851107954978943, "learning_rate": 1.5989147592787336e-05, "loss": 0.4376, "step": 27947 }, { "epoch": 0.5927339823121461, "grad_norm": 0.3800186514854431, "learning_rate": 1.5988880522138587e-05, "loss": 0.5201, "step": 27948 }, { "epoch": 0.5927551907700791, "grad_norm": 0.38009124994277954, "learning_rate": 1.5988613444829056e-05, "loss": 0.6462, "step": 27949 }, { "epoch": 0.5927763992280122, "grad_norm": 0.3641844689846039, "learning_rate": 1.598834636085904e-05, "loss": 0.4746, "step": 27950 }, { "epoch": 0.5927976076859451, "grad_norm": 0.38708505034446716, "learning_rate": 1.5988079270228843e-05, "loss": 0.5452, "step": 27951 }, { "epoch": 0.5928188161438782, "grad_norm": 0.3635849356651306, "learning_rate": 1.5987812172938753e-05, "loss": 0.487, "step": 27952 }, { "epoch": 0.5928400246018112, "grad_norm": 0.32766008377075195, "learning_rate": 1.598754506898907e-05, "loss": 0.4478, "step": 27953 }, { "epoch": 0.5928612330597443, "grad_norm": 0.33703240752220154, "learning_rate": 1.5987277958380093e-05, "loss": 0.5092, "step": 27954 }, { "epoch": 0.5928824415176772, "grad_norm": 0.38513222336769104, "learning_rate": 1.5987010841112114e-05, "loss": 0.4915, "step": 27955 }, { "epoch": 0.5929036499756103, "grad_norm": 0.3354324996471405, "learning_rate": 1.5986743717185434e-05, "loss": 0.4983, "step": 27956 }, { "epoch": 0.5929248584335433, "grad_norm": 0.37341123819351196, "learning_rate": 1.5986476586600353e-05, "loss": 0.4393, "step": 27957 }, { "epoch": 0.5929460668914763, "grad_norm": 0.34698858857154846, "learning_rate": 1.5986209449357163e-05, "loss": 0.5035, "step": 27958 }, { "epoch": 0.5929672753494093, "grad_norm": 0.32435187697410583, "learning_rate": 1.5985942305456165e-05, "loss": 0.5042, "step": 27959 }, { "epoch": 0.5929884838073424, "grad_norm": 0.3752773106098175, "learning_rate": 1.598567515489765e-05, "loss": 0.5264, "step": 27960 }, { "epoch": 0.5930096922652754, "grad_norm": 0.3498825430870056, "learning_rate": 1.598540799768192e-05, "loss": 0.4399, "step": 27961 }, { "epoch": 0.5930309007232084, "grad_norm": 0.37509456276893616, "learning_rate": 1.5985140833809275e-05, "loss": 0.4805, "step": 27962 }, { "epoch": 0.5930521091811415, "grad_norm": 0.3706853985786438, "learning_rate": 1.5984873663280004e-05, "loss": 0.5198, "step": 27963 }, { "epoch": 0.5930733176390744, "grad_norm": 0.3862967789173126, "learning_rate": 1.5984606486094413e-05, "loss": 0.4958, "step": 27964 }, { "epoch": 0.5930945260970075, "grad_norm": 0.3864031136035919, "learning_rate": 1.5984339302252786e-05, "loss": 0.5089, "step": 27965 }, { "epoch": 0.5931157345549405, "grad_norm": 0.34687918424606323, "learning_rate": 1.598407211175544e-05, "loss": 0.5026, "step": 27966 }, { "epoch": 0.5931369430128736, "grad_norm": 0.4390254616737366, "learning_rate": 1.598380491460265e-05, "loss": 0.4764, "step": 27967 }, { "epoch": 0.5931581514708065, "grad_norm": 0.3131549656391144, "learning_rate": 1.5983537710794733e-05, "loss": 0.4496, "step": 27968 }, { "epoch": 0.5931793599287396, "grad_norm": 0.32991528511047363, "learning_rate": 1.5983270500331975e-05, "loss": 0.4584, "step": 27969 }, { "epoch": 0.5932005683866726, "grad_norm": 0.4092349112033844, "learning_rate": 1.5983003283214676e-05, "loss": 0.5395, "step": 27970 }, { "epoch": 0.5932217768446056, "grad_norm": 0.7470805048942566, "learning_rate": 1.598273605944313e-05, "loss": 0.5346, "step": 27971 }, { "epoch": 0.5932429853025386, "grad_norm": 0.3389720618724823, "learning_rate": 1.5982468829017638e-05, "loss": 0.4653, "step": 27972 }, { "epoch": 0.5932641937604717, "grad_norm": 0.35552075505256653, "learning_rate": 1.5982201591938496e-05, "loss": 0.5753, "step": 27973 }, { "epoch": 0.5932854022184046, "grad_norm": 0.3383796215057373, "learning_rate": 1.5981934348206002e-05, "loss": 0.583, "step": 27974 }, { "epoch": 0.5933066106763377, "grad_norm": 0.35781219601631165, "learning_rate": 1.5981667097820453e-05, "loss": 0.557, "step": 27975 }, { "epoch": 0.5933278191342708, "grad_norm": 0.36858442425727844, "learning_rate": 1.5981399840782142e-05, "loss": 0.432, "step": 27976 }, { "epoch": 0.5933490275922038, "grad_norm": 0.3350084722042084, "learning_rate": 1.5981132577091373e-05, "loss": 0.5033, "step": 27977 }, { "epoch": 0.5933702360501368, "grad_norm": 0.32020625472068787, "learning_rate": 1.598086530674844e-05, "loss": 0.4622, "step": 27978 }, { "epoch": 0.5933914445080698, "grad_norm": 0.38228803873062134, "learning_rate": 1.5980598029753644e-05, "loss": 0.51, "step": 27979 }, { "epoch": 0.5934126529660029, "grad_norm": 0.354830801486969, "learning_rate": 1.5980330746107276e-05, "loss": 0.4563, "step": 27980 }, { "epoch": 0.5934338614239358, "grad_norm": 0.3165772557258606, "learning_rate": 1.5980063455809637e-05, "loss": 0.4764, "step": 27981 }, { "epoch": 0.5934550698818689, "grad_norm": 0.40233564376831055, "learning_rate": 1.597979615886102e-05, "loss": 0.5743, "step": 27982 }, { "epoch": 0.5934762783398019, "grad_norm": 0.36869052052497864, "learning_rate": 1.597952885526173e-05, "loss": 0.456, "step": 27983 }, { "epoch": 0.593497486797735, "grad_norm": 0.3271926939487457, "learning_rate": 1.5979261545012055e-05, "loss": 0.4743, "step": 27984 }, { "epoch": 0.5935186952556679, "grad_norm": 0.3903515040874481, "learning_rate": 1.5978994228112303e-05, "loss": 0.3932, "step": 27985 }, { "epoch": 0.593539903713601, "grad_norm": 0.3321523368358612, "learning_rate": 1.597872690456276e-05, "loss": 0.479, "step": 27986 }, { "epoch": 0.593561112171534, "grad_norm": 0.38145098090171814, "learning_rate": 1.5978459574363735e-05, "loss": 0.5084, "step": 27987 }, { "epoch": 0.593582320629467, "grad_norm": 0.3789438307285309, "learning_rate": 1.5978192237515516e-05, "loss": 0.5558, "step": 27988 }, { "epoch": 0.5936035290874001, "grad_norm": 0.36302605271339417, "learning_rate": 1.59779248940184e-05, "loss": 0.5048, "step": 27989 }, { "epoch": 0.5936247375453331, "grad_norm": 0.36150723695755005, "learning_rate": 1.5977657543872693e-05, "loss": 0.4694, "step": 27990 }, { "epoch": 0.5936459460032661, "grad_norm": 0.3895137310028076, "learning_rate": 1.5977390187078685e-05, "loss": 0.5025, "step": 27991 }, { "epoch": 0.5936671544611991, "grad_norm": 0.4013427793979645, "learning_rate": 1.5977122823636678e-05, "loss": 0.5744, "step": 27992 }, { "epoch": 0.5936883629191322, "grad_norm": 0.39954692125320435, "learning_rate": 1.5976855453546966e-05, "loss": 0.4572, "step": 27993 }, { "epoch": 0.5937095713770651, "grad_norm": 0.37638556957244873, "learning_rate": 1.5976588076809845e-05, "loss": 0.5497, "step": 27994 }, { "epoch": 0.5937307798349982, "grad_norm": 0.3832591474056244, "learning_rate": 1.5976320693425616e-05, "loss": 0.5268, "step": 27995 }, { "epoch": 0.5937519882929312, "grad_norm": 0.3480004072189331, "learning_rate": 1.5976053303394573e-05, "loss": 0.4405, "step": 27996 }, { "epoch": 0.5937731967508643, "grad_norm": 0.39211827516555786, "learning_rate": 1.597578590671702e-05, "loss": 0.5232, "step": 27997 }, { "epoch": 0.5937944052087972, "grad_norm": 0.3485618531703949, "learning_rate": 1.5975518503393245e-05, "loss": 0.4601, "step": 27998 }, { "epoch": 0.5938156136667303, "grad_norm": 0.3316695988178253, "learning_rate": 1.5975251093423552e-05, "loss": 0.4078, "step": 27999 }, { "epoch": 0.5938368221246633, "grad_norm": 0.3665398061275482, "learning_rate": 1.5974983676808235e-05, "loss": 0.5146, "step": 28000 }, { "epoch": 0.5938580305825963, "grad_norm": 0.37350520491600037, "learning_rate": 1.59747162535476e-05, "loss": 0.5034, "step": 28001 }, { "epoch": 0.5938792390405294, "grad_norm": 0.4093380868434906, "learning_rate": 1.597444882364193e-05, "loss": 0.4996, "step": 28002 }, { "epoch": 0.5939004474984624, "grad_norm": 0.3544136881828308, "learning_rate": 1.5974181387091534e-05, "loss": 0.4987, "step": 28003 }, { "epoch": 0.5939216559563955, "grad_norm": 0.3967844843864441, "learning_rate": 1.5973913943896703e-05, "loss": 0.5059, "step": 28004 }, { "epoch": 0.5939428644143284, "grad_norm": 0.34194815158843994, "learning_rate": 1.5973646494057735e-05, "loss": 0.4529, "step": 28005 }, { "epoch": 0.5939640728722615, "grad_norm": 0.3135766386985779, "learning_rate": 1.5973379037574935e-05, "loss": 0.464, "step": 28006 }, { "epoch": 0.5939852813301945, "grad_norm": 0.3642655909061432, "learning_rate": 1.597311157444859e-05, "loss": 0.5083, "step": 28007 }, { "epoch": 0.5940064897881275, "grad_norm": 0.37152954936027527, "learning_rate": 1.5972844104679002e-05, "loss": 0.5147, "step": 28008 }, { "epoch": 0.5940276982460605, "grad_norm": 0.34833234548568726, "learning_rate": 1.5972576628266473e-05, "loss": 0.5264, "step": 28009 }, { "epoch": 0.5940489067039936, "grad_norm": 0.3378804326057434, "learning_rate": 1.5972309145211294e-05, "loss": 0.4589, "step": 28010 }, { "epoch": 0.5940701151619265, "grad_norm": 0.3494698107242584, "learning_rate": 1.5972041655513764e-05, "loss": 0.5184, "step": 28011 }, { "epoch": 0.5940913236198596, "grad_norm": 0.3434560298919678, "learning_rate": 1.597177415917418e-05, "loss": 0.4216, "step": 28012 }, { "epoch": 0.5941125320777926, "grad_norm": 0.36514630913734436, "learning_rate": 1.5971506656192844e-05, "loss": 0.5383, "step": 28013 }, { "epoch": 0.5941337405357257, "grad_norm": 0.38849931955337524, "learning_rate": 1.5971239146570046e-05, "loss": 0.4208, "step": 28014 }, { "epoch": 0.5941549489936586, "grad_norm": 0.35974571108818054, "learning_rate": 1.597097163030609e-05, "loss": 0.4501, "step": 28015 }, { "epoch": 0.5941761574515917, "grad_norm": 0.3905099332332611, "learning_rate": 1.5970704107401274e-05, "loss": 0.5538, "step": 28016 }, { "epoch": 0.5941973659095248, "grad_norm": 0.41426676511764526, "learning_rate": 1.597043657785589e-05, "loss": 0.4713, "step": 28017 }, { "epoch": 0.5942185743674577, "grad_norm": 0.3398522734642029, "learning_rate": 1.5970169041670237e-05, "loss": 0.4724, "step": 28018 }, { "epoch": 0.5942397828253908, "grad_norm": 0.38685527443885803, "learning_rate": 1.5969901498844615e-05, "loss": 0.5419, "step": 28019 }, { "epoch": 0.5942609912833238, "grad_norm": 0.3527786433696747, "learning_rate": 1.596963394937932e-05, "loss": 0.4782, "step": 28020 }, { "epoch": 0.5942821997412568, "grad_norm": 0.337410032749176, "learning_rate": 1.5969366393274653e-05, "loss": 0.423, "step": 28021 }, { "epoch": 0.5943034081991898, "grad_norm": 0.36871784925460815, "learning_rate": 1.5969098830530906e-05, "loss": 0.5055, "step": 28022 }, { "epoch": 0.5943246166571229, "grad_norm": 0.32973140478134155, "learning_rate": 1.5968831261148376e-05, "loss": 0.5015, "step": 28023 }, { "epoch": 0.5943458251150558, "grad_norm": 0.33755338191986084, "learning_rate": 1.5968563685127367e-05, "loss": 0.538, "step": 28024 }, { "epoch": 0.5943670335729889, "grad_norm": 0.5104449391365051, "learning_rate": 1.5968296102468174e-05, "loss": 0.5008, "step": 28025 }, { "epoch": 0.5943882420309219, "grad_norm": 0.3308470547199249, "learning_rate": 1.5968028513171093e-05, "loss": 0.5027, "step": 28026 }, { "epoch": 0.594409450488855, "grad_norm": 0.33184221386909485, "learning_rate": 1.5967760917236424e-05, "loss": 0.491, "step": 28027 }, { "epoch": 0.5944306589467879, "grad_norm": 0.4095349907875061, "learning_rate": 1.5967493314664463e-05, "loss": 0.4711, "step": 28028 }, { "epoch": 0.594451867404721, "grad_norm": 0.331220418214798, "learning_rate": 1.5967225705455504e-05, "loss": 0.4718, "step": 28029 }, { "epoch": 0.5944730758626541, "grad_norm": 0.35435038805007935, "learning_rate": 1.596695808960985e-05, "loss": 0.4605, "step": 28030 }, { "epoch": 0.594494284320587, "grad_norm": 0.3510437607765198, "learning_rate": 1.5966690467127798e-05, "loss": 0.5308, "step": 28031 }, { "epoch": 0.5945154927785201, "grad_norm": 0.3336206376552582, "learning_rate": 1.5966422838009648e-05, "loss": 0.4939, "step": 28032 }, { "epoch": 0.5945367012364531, "grad_norm": 0.4203898012638092, "learning_rate": 1.5966155202255693e-05, "loss": 0.5151, "step": 28033 }, { "epoch": 0.5945579096943862, "grad_norm": 0.33882877230644226, "learning_rate": 1.596588755986623e-05, "loss": 0.4838, "step": 28034 }, { "epoch": 0.5945791181523191, "grad_norm": 0.3801799416542053, "learning_rate": 1.5965619910841556e-05, "loss": 0.5892, "step": 28035 }, { "epoch": 0.5946003266102522, "grad_norm": 0.3570895493030548, "learning_rate": 1.5965352255181974e-05, "loss": 0.4744, "step": 28036 }, { "epoch": 0.5946215350681852, "grad_norm": 0.40646007657051086, "learning_rate": 1.596508459288778e-05, "loss": 0.6054, "step": 28037 }, { "epoch": 0.5946427435261182, "grad_norm": 0.34969326853752136, "learning_rate": 1.596481692395927e-05, "loss": 0.5774, "step": 28038 }, { "epoch": 0.5946639519840512, "grad_norm": 0.3288220465183258, "learning_rate": 1.5964549248396742e-05, "loss": 0.5301, "step": 28039 }, { "epoch": 0.5946851604419843, "grad_norm": 0.33177700638771057, "learning_rate": 1.5964281566200495e-05, "loss": 0.5245, "step": 28040 }, { "epoch": 0.5947063688999172, "grad_norm": 0.3670952320098877, "learning_rate": 1.5964013877370825e-05, "loss": 0.5061, "step": 28041 }, { "epoch": 0.5947275773578503, "grad_norm": 0.3853660523891449, "learning_rate": 1.596374618190803e-05, "loss": 0.5746, "step": 28042 }, { "epoch": 0.5947487858157834, "grad_norm": 0.36590471863746643, "learning_rate": 1.596347847981241e-05, "loss": 0.5179, "step": 28043 }, { "epoch": 0.5947699942737164, "grad_norm": 0.36172640323638916, "learning_rate": 1.596321077108426e-05, "loss": 0.5311, "step": 28044 }, { "epoch": 0.5947912027316494, "grad_norm": 0.358470618724823, "learning_rate": 1.596294305572388e-05, "loss": 0.4998, "step": 28045 }, { "epoch": 0.5948124111895824, "grad_norm": 0.33564087748527527, "learning_rate": 1.5962675333731564e-05, "loss": 0.5716, "step": 28046 }, { "epoch": 0.5948336196475155, "grad_norm": 0.36482498049736023, "learning_rate": 1.5962407605107616e-05, "loss": 0.5057, "step": 28047 }, { "epoch": 0.5948548281054484, "grad_norm": 0.4055672287940979, "learning_rate": 1.5962139869852327e-05, "loss": 0.4571, "step": 28048 }, { "epoch": 0.5948760365633815, "grad_norm": 0.3714081943035126, "learning_rate": 1.5961872127966e-05, "loss": 0.4821, "step": 28049 }, { "epoch": 0.5948972450213145, "grad_norm": 0.3511103093624115, "learning_rate": 1.5961604379448928e-05, "loss": 0.5118, "step": 28050 }, { "epoch": 0.5949184534792475, "grad_norm": 0.46364012360572815, "learning_rate": 1.5961336624301413e-05, "loss": 0.5698, "step": 28051 }, { "epoch": 0.5949396619371805, "grad_norm": 0.3612309396266937, "learning_rate": 1.596106886252375e-05, "loss": 0.5462, "step": 28052 }, { "epoch": 0.5949608703951136, "grad_norm": 0.4067639708518982, "learning_rate": 1.596080109411624e-05, "loss": 0.4797, "step": 28053 }, { "epoch": 0.5949820788530465, "grad_norm": 0.35574328899383545, "learning_rate": 1.596053331907918e-05, "loss": 0.5974, "step": 28054 }, { "epoch": 0.5950032873109796, "grad_norm": 0.35899338126182556, "learning_rate": 1.5960265537412862e-05, "loss": 0.4765, "step": 28055 }, { "epoch": 0.5950244957689126, "grad_norm": 0.32317855954170227, "learning_rate": 1.5959997749117593e-05, "loss": 0.441, "step": 28056 }, { "epoch": 0.5950457042268457, "grad_norm": 0.7868201732635498, "learning_rate": 1.5959729954193662e-05, "loss": 0.6008, "step": 28057 }, { "epoch": 0.5950669126847787, "grad_norm": 0.32799068093299866, "learning_rate": 1.5959462152641373e-05, "loss": 0.5289, "step": 28058 }, { "epoch": 0.5950881211427117, "grad_norm": 0.3259603977203369, "learning_rate": 1.5959194344461023e-05, "loss": 0.4624, "step": 28059 }, { "epoch": 0.5951093296006448, "grad_norm": 0.36080390214920044, "learning_rate": 1.595892652965291e-05, "loss": 0.5512, "step": 28060 }, { "epoch": 0.5951305380585777, "grad_norm": 0.3818785846233368, "learning_rate": 1.5958658708217324e-05, "loss": 0.4478, "step": 28061 }, { "epoch": 0.5951517465165108, "grad_norm": 0.3757493197917938, "learning_rate": 1.5958390880154577e-05, "loss": 0.584, "step": 28062 }, { "epoch": 0.5951729549744438, "grad_norm": 0.37986674904823303, "learning_rate": 1.5958123045464956e-05, "loss": 0.504, "step": 28063 }, { "epoch": 0.5951941634323769, "grad_norm": 0.3882538080215454, "learning_rate": 1.5957855204148766e-05, "loss": 0.5324, "step": 28064 }, { "epoch": 0.5952153718903098, "grad_norm": 0.39592665433883667, "learning_rate": 1.5957587356206298e-05, "loss": 0.4982, "step": 28065 }, { "epoch": 0.5952365803482429, "grad_norm": 0.417299747467041, "learning_rate": 1.5957319501637854e-05, "loss": 0.5481, "step": 28066 }, { "epoch": 0.5952577888061759, "grad_norm": 0.4319797158241272, "learning_rate": 1.5957051640443732e-05, "loss": 0.528, "step": 28067 }, { "epoch": 0.5952789972641089, "grad_norm": 0.33291518688201904, "learning_rate": 1.5956783772624223e-05, "loss": 0.5011, "step": 28068 }, { "epoch": 0.5953002057220419, "grad_norm": 0.8914650678634644, "learning_rate": 1.5956515898179636e-05, "loss": 0.4974, "step": 28069 }, { "epoch": 0.595321414179975, "grad_norm": 0.5497329235076904, "learning_rate": 1.595624801711026e-05, "loss": 0.4366, "step": 28070 }, { "epoch": 0.595342622637908, "grad_norm": 0.3455263674259186, "learning_rate": 1.5955980129416402e-05, "loss": 0.4682, "step": 28071 }, { "epoch": 0.595363831095841, "grad_norm": 0.3668966591358185, "learning_rate": 1.595571223509835e-05, "loss": 0.5537, "step": 28072 }, { "epoch": 0.5953850395537741, "grad_norm": 0.32778027653694153, "learning_rate": 1.5955444334156412e-05, "loss": 0.5098, "step": 28073 }, { "epoch": 0.595406248011707, "grad_norm": 0.3165695071220398, "learning_rate": 1.5955176426590875e-05, "loss": 0.4899, "step": 28074 }, { "epoch": 0.5954274564696401, "grad_norm": 0.29326331615448, "learning_rate": 1.5954908512402045e-05, "loss": 0.3407, "step": 28075 }, { "epoch": 0.5954486649275731, "grad_norm": 0.45079100131988525, "learning_rate": 1.5954640591590217e-05, "loss": 0.4848, "step": 28076 }, { "epoch": 0.5954698733855062, "grad_norm": 10.027347564697266, "learning_rate": 1.5954372664155692e-05, "loss": 0.6727, "step": 28077 }, { "epoch": 0.5954910818434391, "grad_norm": 0.39671459794044495, "learning_rate": 1.5954104730098763e-05, "loss": 0.5314, "step": 28078 }, { "epoch": 0.5955122903013722, "grad_norm": 0.3219514489173889, "learning_rate": 1.595383678941973e-05, "loss": 0.4588, "step": 28079 }, { "epoch": 0.5955334987593052, "grad_norm": 0.3807174861431122, "learning_rate": 1.5953568842118894e-05, "loss": 0.5458, "step": 28080 }, { "epoch": 0.5955547072172382, "grad_norm": 0.49683618545532227, "learning_rate": 1.5953300888196545e-05, "loss": 0.6024, "step": 28081 }, { "epoch": 0.5955759156751712, "grad_norm": 0.3202250599861145, "learning_rate": 1.5953032927652993e-05, "loss": 0.4411, "step": 28082 }, { "epoch": 0.5955971241331043, "grad_norm": 0.36813071370124817, "learning_rate": 1.5952764960488524e-05, "loss": 0.4519, "step": 28083 }, { "epoch": 0.5956183325910374, "grad_norm": 0.3767075538635254, "learning_rate": 1.5952496986703448e-05, "loss": 0.5005, "step": 28084 }, { "epoch": 0.5956395410489703, "grad_norm": 0.3666975796222687, "learning_rate": 1.595222900629805e-05, "loss": 0.4498, "step": 28085 }, { "epoch": 0.5956607495069034, "grad_norm": 0.36714136600494385, "learning_rate": 1.5951961019272638e-05, "loss": 0.5207, "step": 28086 }, { "epoch": 0.5956819579648364, "grad_norm": 0.41669222712516785, "learning_rate": 1.5951693025627503e-05, "loss": 0.5426, "step": 28087 }, { "epoch": 0.5957031664227694, "grad_norm": 0.3655093312263489, "learning_rate": 1.595142502536295e-05, "loss": 0.5453, "step": 28088 }, { "epoch": 0.5957243748807024, "grad_norm": 0.4961884319782257, "learning_rate": 1.5951157018479274e-05, "loss": 0.5528, "step": 28089 }, { "epoch": 0.5957455833386355, "grad_norm": 0.37372761964797974, "learning_rate": 1.5950889004976773e-05, "loss": 0.5222, "step": 28090 }, { "epoch": 0.5957667917965684, "grad_norm": 0.39631426334381104, "learning_rate": 1.5950620984855746e-05, "loss": 0.5666, "step": 28091 }, { "epoch": 0.5957880002545015, "grad_norm": 0.3015232980251312, "learning_rate": 1.5950352958116488e-05, "loss": 0.4539, "step": 28092 }, { "epoch": 0.5958092087124345, "grad_norm": 0.36667245626449585, "learning_rate": 1.59500849247593e-05, "loss": 0.4671, "step": 28093 }, { "epoch": 0.5958304171703676, "grad_norm": 0.4523359537124634, "learning_rate": 1.5949816884784475e-05, "loss": 0.4489, "step": 28094 }, { "epoch": 0.5958516256283005, "grad_norm": 0.3578093349933624, "learning_rate": 1.594954883819232e-05, "loss": 0.5896, "step": 28095 }, { "epoch": 0.5958728340862336, "grad_norm": 0.31171929836273193, "learning_rate": 1.5949280784983127e-05, "loss": 0.4593, "step": 28096 }, { "epoch": 0.5958940425441667, "grad_norm": 0.32508599758148193, "learning_rate": 1.5949012725157197e-05, "loss": 0.4857, "step": 28097 }, { "epoch": 0.5959152510020996, "grad_norm": 0.4872419238090515, "learning_rate": 1.594874465871483e-05, "loss": 0.5027, "step": 28098 }, { "epoch": 0.5959364594600327, "grad_norm": 0.35086122155189514, "learning_rate": 1.5948476585656312e-05, "loss": 0.4901, "step": 28099 }, { "epoch": 0.5959576679179657, "grad_norm": 0.3797893226146698, "learning_rate": 1.5948208505981954e-05, "loss": 0.4432, "step": 28100 }, { "epoch": 0.5959788763758987, "grad_norm": 0.37452232837677, "learning_rate": 1.5947940419692052e-05, "loss": 0.4506, "step": 28101 }, { "epoch": 0.5960000848338317, "grad_norm": 0.34595876932144165, "learning_rate": 1.59476723267869e-05, "loss": 0.4401, "step": 28102 }, { "epoch": 0.5960212932917648, "grad_norm": 0.3445851504802704, "learning_rate": 1.59474042272668e-05, "loss": 0.4251, "step": 28103 }, { "epoch": 0.5960425017496978, "grad_norm": 0.38569188117980957, "learning_rate": 1.5947136121132048e-05, "loss": 0.5363, "step": 28104 }, { "epoch": 0.5960637102076308, "grad_norm": 0.3476608395576477, "learning_rate": 1.5946868008382947e-05, "loss": 0.4832, "step": 28105 }, { "epoch": 0.5960849186655638, "grad_norm": 0.37992510199546814, "learning_rate": 1.5946599889019782e-05, "loss": 0.5538, "step": 28106 }, { "epoch": 0.5961061271234969, "grad_norm": 0.4736419916152954, "learning_rate": 1.594633176304287e-05, "loss": 0.5021, "step": 28107 }, { "epoch": 0.5961273355814298, "grad_norm": 0.4482797086238861, "learning_rate": 1.5946063630452493e-05, "loss": 0.5152, "step": 28108 }, { "epoch": 0.5961485440393629, "grad_norm": 0.4331573247909546, "learning_rate": 1.5945795491248957e-05, "loss": 0.5424, "step": 28109 }, { "epoch": 0.5961697524972959, "grad_norm": 0.3491480052471161, "learning_rate": 1.5945527345432557e-05, "loss": 0.4556, "step": 28110 }, { "epoch": 0.5961909609552289, "grad_norm": 0.37052395939826965, "learning_rate": 1.59452591930036e-05, "loss": 0.5415, "step": 28111 }, { "epoch": 0.596212169413162, "grad_norm": 0.34483802318573, "learning_rate": 1.5944991033962375e-05, "loss": 0.4846, "step": 28112 }, { "epoch": 0.596233377871095, "grad_norm": 0.3678751289844513, "learning_rate": 1.594472286830918e-05, "loss": 0.4654, "step": 28113 }, { "epoch": 0.5962545863290281, "grad_norm": 0.32599133253097534, "learning_rate": 1.5944454696044314e-05, "loss": 0.4413, "step": 28114 }, { "epoch": 0.596275794786961, "grad_norm": 0.34727710485458374, "learning_rate": 1.594418651716808e-05, "loss": 0.4924, "step": 28115 }, { "epoch": 0.5962970032448941, "grad_norm": 0.3727398216724396, "learning_rate": 1.5943918331680773e-05, "loss": 0.5188, "step": 28116 }, { "epoch": 0.5963182117028271, "grad_norm": 0.3267380893230438, "learning_rate": 1.5943650139582693e-05, "loss": 0.5085, "step": 28117 }, { "epoch": 0.5963394201607601, "grad_norm": 0.3617793917655945, "learning_rate": 1.5943381940874134e-05, "loss": 0.5544, "step": 28118 }, { "epoch": 0.5963606286186931, "grad_norm": 0.3396175503730774, "learning_rate": 1.5943113735555397e-05, "loss": 0.4604, "step": 28119 }, { "epoch": 0.5963818370766262, "grad_norm": 0.33994337916374207, "learning_rate": 1.5942845523626785e-05, "loss": 0.5192, "step": 28120 }, { "epoch": 0.5964030455345591, "grad_norm": 0.3648567199707031, "learning_rate": 1.594257730508859e-05, "loss": 0.4374, "step": 28121 }, { "epoch": 0.5964242539924922, "grad_norm": 0.3320021331310272, "learning_rate": 1.5942309079941112e-05, "loss": 0.411, "step": 28122 }, { "epoch": 0.5964454624504252, "grad_norm": 0.3662775158882141, "learning_rate": 1.5942040848184648e-05, "loss": 0.4612, "step": 28123 }, { "epoch": 0.5964666709083583, "grad_norm": 0.33336541056632996, "learning_rate": 1.5941772609819497e-05, "loss": 0.5123, "step": 28124 }, { "epoch": 0.5964878793662913, "grad_norm": 0.3518159091472626, "learning_rate": 1.594150436484596e-05, "loss": 0.5052, "step": 28125 }, { "epoch": 0.5965090878242243, "grad_norm": 0.46498537063598633, "learning_rate": 1.5941236113264332e-05, "loss": 0.6044, "step": 28126 }, { "epoch": 0.5965302962821574, "grad_norm": 0.47215214371681213, "learning_rate": 1.5940967855074917e-05, "loss": 0.5148, "step": 28127 }, { "epoch": 0.5965515047400903, "grad_norm": 0.33090171217918396, "learning_rate": 1.5940699590278005e-05, "loss": 0.4403, "step": 28128 }, { "epoch": 0.5965727131980234, "grad_norm": 0.3447459638118744, "learning_rate": 1.59404313188739e-05, "loss": 0.4856, "step": 28129 }, { "epoch": 0.5965939216559564, "grad_norm": 0.3881334066390991, "learning_rate": 1.5940163040862898e-05, "loss": 0.5647, "step": 28130 }, { "epoch": 0.5966151301138894, "grad_norm": 0.3654283881187439, "learning_rate": 1.59398947562453e-05, "loss": 0.5189, "step": 28131 }, { "epoch": 0.5966363385718224, "grad_norm": 0.36603254079818726, "learning_rate": 1.59396264650214e-05, "loss": 0.457, "step": 28132 }, { "epoch": 0.5966575470297555, "grad_norm": 0.36686450242996216, "learning_rate": 1.59393581671915e-05, "loss": 0.5489, "step": 28133 }, { "epoch": 0.5966787554876885, "grad_norm": 0.3509931266307831, "learning_rate": 1.59390898627559e-05, "loss": 0.544, "step": 28134 }, { "epoch": 0.5966999639456215, "grad_norm": 0.32276204228401184, "learning_rate": 1.5938821551714895e-05, "loss": 0.513, "step": 28135 }, { "epoch": 0.5967211724035545, "grad_norm": 0.3287002146244049, "learning_rate": 1.5938553234068785e-05, "loss": 0.4626, "step": 28136 }, { "epoch": 0.5967423808614876, "grad_norm": 0.3775707185268402, "learning_rate": 1.5938284909817864e-05, "loss": 0.5193, "step": 28137 }, { "epoch": 0.5967635893194206, "grad_norm": 0.4118417203426361, "learning_rate": 1.5938016578962435e-05, "loss": 0.4939, "step": 28138 }, { "epoch": 0.5967847977773536, "grad_norm": 0.33618268370628357, "learning_rate": 1.5937748241502794e-05, "loss": 0.4776, "step": 28139 }, { "epoch": 0.5968060062352867, "grad_norm": 0.3535297214984894, "learning_rate": 1.5937479897439245e-05, "loss": 0.4884, "step": 28140 }, { "epoch": 0.5968272146932196, "grad_norm": 0.4002801477909088, "learning_rate": 1.593721154677208e-05, "loss": 0.4195, "step": 28141 }, { "epoch": 0.5968484231511527, "grad_norm": 0.3659912943840027, "learning_rate": 1.5936943189501603e-05, "loss": 0.6137, "step": 28142 }, { "epoch": 0.5968696316090857, "grad_norm": 0.31561413407325745, "learning_rate": 1.5936674825628108e-05, "loss": 0.477, "step": 28143 }, { "epoch": 0.5968908400670188, "grad_norm": 0.32612499594688416, "learning_rate": 1.5936406455151892e-05, "loss": 0.5302, "step": 28144 }, { "epoch": 0.5969120485249517, "grad_norm": 0.3928024172782898, "learning_rate": 1.593613807807326e-05, "loss": 0.5039, "step": 28145 }, { "epoch": 0.5969332569828848, "grad_norm": 0.3450058102607727, "learning_rate": 1.5935869694392504e-05, "loss": 0.5076, "step": 28146 }, { "epoch": 0.5969544654408178, "grad_norm": 0.3688640594482422, "learning_rate": 1.5935601304109924e-05, "loss": 0.5314, "step": 28147 }, { "epoch": 0.5969756738987508, "grad_norm": 0.3239705264568329, "learning_rate": 1.5935332907225823e-05, "loss": 0.4853, "step": 28148 }, { "epoch": 0.5969968823566838, "grad_norm": 0.32151561975479126, "learning_rate": 1.59350645037405e-05, "loss": 0.5569, "step": 28149 }, { "epoch": 0.5970180908146169, "grad_norm": 0.31797167658805847, "learning_rate": 1.5934796093654242e-05, "loss": 0.4722, "step": 28150 }, { "epoch": 0.5970392992725498, "grad_norm": 0.33665719628334045, "learning_rate": 1.5934527676967358e-05, "loss": 0.4818, "step": 28151 }, { "epoch": 0.5970605077304829, "grad_norm": 0.3951030671596527, "learning_rate": 1.5934259253680146e-05, "loss": 0.4742, "step": 28152 }, { "epoch": 0.597081716188416, "grad_norm": 0.45360323786735535, "learning_rate": 1.59339908237929e-05, "loss": 0.5493, "step": 28153 }, { "epoch": 0.597102924646349, "grad_norm": 0.32967808842658997, "learning_rate": 1.5933722387305923e-05, "loss": 0.4693, "step": 28154 }, { "epoch": 0.597124133104282, "grad_norm": 0.3544503450393677, "learning_rate": 1.593345394421951e-05, "loss": 0.5301, "step": 28155 }, { "epoch": 0.597145341562215, "grad_norm": 0.38649266958236694, "learning_rate": 1.5933185494533963e-05, "loss": 0.4145, "step": 28156 }, { "epoch": 0.5971665500201481, "grad_norm": 0.5070125460624695, "learning_rate": 1.5932917038249576e-05, "loss": 0.5264, "step": 28157 }, { "epoch": 0.597187758478081, "grad_norm": 0.3432774543762207, "learning_rate": 1.593264857536665e-05, "loss": 0.4584, "step": 28158 }, { "epoch": 0.5972089669360141, "grad_norm": 0.35455524921417236, "learning_rate": 1.5932380105885487e-05, "loss": 0.5316, "step": 28159 }, { "epoch": 0.5972301753939471, "grad_norm": 0.2906014323234558, "learning_rate": 1.593211162980638e-05, "loss": 0.4782, "step": 28160 }, { "epoch": 0.5972513838518801, "grad_norm": 0.36349543929100037, "learning_rate": 1.593184314712963e-05, "loss": 0.4888, "step": 28161 }, { "epoch": 0.5972725923098131, "grad_norm": 0.44551292061805725, "learning_rate": 1.5931574657855535e-05, "loss": 0.5764, "step": 28162 }, { "epoch": 0.5972938007677462, "grad_norm": 0.35052844882011414, "learning_rate": 1.5931306161984396e-05, "loss": 0.553, "step": 28163 }, { "epoch": 0.5973150092256791, "grad_norm": 0.34595170617103577, "learning_rate": 1.5931037659516507e-05, "loss": 0.4248, "step": 28164 }, { "epoch": 0.5973362176836122, "grad_norm": 0.3687556982040405, "learning_rate": 1.5930769150452173e-05, "loss": 0.5625, "step": 28165 }, { "epoch": 0.5973574261415453, "grad_norm": 0.59174644947052, "learning_rate": 1.5930500634791688e-05, "loss": 0.5462, "step": 28166 }, { "epoch": 0.5973786345994783, "grad_norm": 0.3798069655895233, "learning_rate": 1.593023211253535e-05, "loss": 0.4902, "step": 28167 }, { "epoch": 0.5973998430574113, "grad_norm": 0.36301878094673157, "learning_rate": 1.5929963583683462e-05, "loss": 0.5552, "step": 28168 }, { "epoch": 0.5974210515153443, "grad_norm": 0.40495458245277405, "learning_rate": 1.592969504823632e-05, "loss": 0.4702, "step": 28169 }, { "epoch": 0.5974422599732774, "grad_norm": 0.373562753200531, "learning_rate": 1.592942650619422e-05, "loss": 0.4976, "step": 28170 }, { "epoch": 0.5974634684312103, "grad_norm": 0.3714735805988312, "learning_rate": 1.5929157957557464e-05, "loss": 0.547, "step": 28171 }, { "epoch": 0.5974846768891434, "grad_norm": 0.34515324234962463, "learning_rate": 1.592888940232635e-05, "loss": 0.5397, "step": 28172 }, { "epoch": 0.5975058853470764, "grad_norm": 0.3541092872619629, "learning_rate": 1.5928620840501178e-05, "loss": 0.5419, "step": 28173 }, { "epoch": 0.5975270938050095, "grad_norm": 0.37328630685806274, "learning_rate": 1.5928352272082246e-05, "loss": 0.503, "step": 28174 }, { "epoch": 0.5975483022629424, "grad_norm": 0.35647183656692505, "learning_rate": 1.5928083697069848e-05, "loss": 0.5136, "step": 28175 }, { "epoch": 0.5975695107208755, "grad_norm": 0.341871976852417, "learning_rate": 1.5927815115464294e-05, "loss": 0.4449, "step": 28176 }, { "epoch": 0.5975907191788085, "grad_norm": 0.35308972001075745, "learning_rate": 1.592754652726587e-05, "loss": 0.4559, "step": 28177 }, { "epoch": 0.5976119276367415, "grad_norm": 0.39439794421195984, "learning_rate": 1.592727793247488e-05, "loss": 0.4772, "step": 28178 }, { "epoch": 0.5976331360946746, "grad_norm": 0.3592132329940796, "learning_rate": 1.5927009331091626e-05, "loss": 0.4587, "step": 28179 }, { "epoch": 0.5976543445526076, "grad_norm": 0.3212973475456238, "learning_rate": 1.5926740723116402e-05, "loss": 0.4322, "step": 28180 }, { "epoch": 0.5976755530105406, "grad_norm": 0.3918362855911255, "learning_rate": 1.592647210854951e-05, "loss": 0.4908, "step": 28181 }, { "epoch": 0.5976967614684736, "grad_norm": 0.3479367792606354, "learning_rate": 1.5926203487391247e-05, "loss": 0.5383, "step": 28182 }, { "epoch": 0.5977179699264067, "grad_norm": 0.3318924307823181, "learning_rate": 1.5925934859641906e-05, "loss": 0.4995, "step": 28183 }, { "epoch": 0.5977391783843397, "grad_norm": 0.36237862706184387, "learning_rate": 1.5925666225301798e-05, "loss": 0.5867, "step": 28184 }, { "epoch": 0.5977603868422727, "grad_norm": 0.3284294605255127, "learning_rate": 1.5925397584371215e-05, "loss": 0.4745, "step": 28185 }, { "epoch": 0.5977815953002057, "grad_norm": 0.4684944450855255, "learning_rate": 1.5925128936850455e-05, "loss": 0.5012, "step": 28186 }, { "epoch": 0.5978028037581388, "grad_norm": 0.328872948884964, "learning_rate": 1.592486028273982e-05, "loss": 0.5034, "step": 28187 }, { "epoch": 0.5978240122160717, "grad_norm": 0.3404182493686676, "learning_rate": 1.5924591622039606e-05, "loss": 0.5259, "step": 28188 }, { "epoch": 0.5978452206740048, "grad_norm": 0.35798901319503784, "learning_rate": 1.592432295475011e-05, "loss": 0.5376, "step": 28189 }, { "epoch": 0.5978664291319378, "grad_norm": 0.36580777168273926, "learning_rate": 1.5924054280871636e-05, "loss": 0.4813, "step": 28190 }, { "epoch": 0.5978876375898708, "grad_norm": 0.382428914308548, "learning_rate": 1.592378560040448e-05, "loss": 0.5104, "step": 28191 }, { "epoch": 0.5979088460478038, "grad_norm": 0.3386644721031189, "learning_rate": 1.5923516913348937e-05, "loss": 0.4837, "step": 28192 }, { "epoch": 0.5979300545057369, "grad_norm": 0.3635932505130768, "learning_rate": 1.5923248219705315e-05, "loss": 0.5131, "step": 28193 }, { "epoch": 0.59795126296367, "grad_norm": 0.37678563594818115, "learning_rate": 1.5922979519473908e-05, "loss": 0.4961, "step": 28194 }, { "epoch": 0.5979724714216029, "grad_norm": 0.36280369758605957, "learning_rate": 1.5922710812655014e-05, "loss": 0.5439, "step": 28195 }, { "epoch": 0.597993679879536, "grad_norm": 0.37784314155578613, "learning_rate": 1.592244209924893e-05, "loss": 0.4608, "step": 28196 }, { "epoch": 0.598014888337469, "grad_norm": 0.36579430103302, "learning_rate": 1.5922173379255957e-05, "loss": 0.4694, "step": 28197 }, { "epoch": 0.598036096795402, "grad_norm": 0.3616783618927002, "learning_rate": 1.5921904652676398e-05, "loss": 0.4814, "step": 28198 }, { "epoch": 0.598057305253335, "grad_norm": 0.31769758462905884, "learning_rate": 1.5921635919510547e-05, "loss": 0.4552, "step": 28199 }, { "epoch": 0.5980785137112681, "grad_norm": 0.33642783761024475, "learning_rate": 1.5921367179758704e-05, "loss": 0.4901, "step": 28200 }, { "epoch": 0.598099722169201, "grad_norm": 0.33527353405952454, "learning_rate": 1.5921098433421166e-05, "loss": 0.4605, "step": 28201 }, { "epoch": 0.5981209306271341, "grad_norm": 0.34133902192115784, "learning_rate": 1.5920829680498234e-05, "loss": 0.4549, "step": 28202 }, { "epoch": 0.5981421390850671, "grad_norm": 0.3201843798160553, "learning_rate": 1.5920560920990208e-05, "loss": 0.5217, "step": 28203 }, { "epoch": 0.5981633475430002, "grad_norm": 0.34096741676330566, "learning_rate": 1.5920292154897385e-05, "loss": 0.502, "step": 28204 }, { "epoch": 0.5981845560009331, "grad_norm": 0.40191754698753357, "learning_rate": 1.5920023382220063e-05, "loss": 0.5294, "step": 28205 }, { "epoch": 0.5982057644588662, "grad_norm": 0.38824939727783203, "learning_rate": 1.5919754602958547e-05, "loss": 0.4831, "step": 28206 }, { "epoch": 0.5982269729167993, "grad_norm": 0.38999083638191223, "learning_rate": 1.591948581711313e-05, "loss": 0.5323, "step": 28207 }, { "epoch": 0.5982481813747322, "grad_norm": 0.3815634846687317, "learning_rate": 1.5919217024684107e-05, "loss": 0.4898, "step": 28208 }, { "epoch": 0.5982693898326653, "grad_norm": 0.37950170040130615, "learning_rate": 1.5918948225671785e-05, "loss": 0.5302, "step": 28209 }, { "epoch": 0.5982905982905983, "grad_norm": 0.40061306953430176, "learning_rate": 1.5918679420076463e-05, "loss": 0.5349, "step": 28210 }, { "epoch": 0.5983118067485313, "grad_norm": 0.3728388845920563, "learning_rate": 1.5918410607898437e-05, "loss": 0.6023, "step": 28211 }, { "epoch": 0.5983330152064643, "grad_norm": 0.35536518692970276, "learning_rate": 1.5918141789138003e-05, "loss": 0.6103, "step": 28212 }, { "epoch": 0.5983542236643974, "grad_norm": 0.3781934976577759, "learning_rate": 1.5917872963795466e-05, "loss": 0.4396, "step": 28213 }, { "epoch": 0.5983754321223304, "grad_norm": 0.3088796138763428, "learning_rate": 1.591760413187112e-05, "loss": 0.4154, "step": 28214 }, { "epoch": 0.5983966405802634, "grad_norm": 0.43441662192344666, "learning_rate": 1.5917335293365267e-05, "loss": 0.5534, "step": 28215 }, { "epoch": 0.5984178490381964, "grad_norm": 0.4477209448814392, "learning_rate": 1.5917066448278205e-05, "loss": 0.6216, "step": 28216 }, { "epoch": 0.5984390574961295, "grad_norm": 0.35126784443855286, "learning_rate": 1.5916797596610233e-05, "loss": 0.4712, "step": 28217 }, { "epoch": 0.5984602659540624, "grad_norm": 0.34201934933662415, "learning_rate": 1.5916528738361648e-05, "loss": 0.4281, "step": 28218 }, { "epoch": 0.5984814744119955, "grad_norm": 0.33661413192749023, "learning_rate": 1.5916259873532752e-05, "loss": 0.5039, "step": 28219 }, { "epoch": 0.5985026828699286, "grad_norm": 0.3602829575538635, "learning_rate": 1.5915991002123847e-05, "loss": 0.5059, "step": 28220 }, { "epoch": 0.5985238913278615, "grad_norm": 0.33721449971199036, "learning_rate": 1.5915722124135227e-05, "loss": 0.458, "step": 28221 }, { "epoch": 0.5985450997857946, "grad_norm": 0.3484352231025696, "learning_rate": 1.5915453239567187e-05, "loss": 0.4953, "step": 28222 }, { "epoch": 0.5985663082437276, "grad_norm": 0.39288023114204407, "learning_rate": 1.5915184348420036e-05, "loss": 0.462, "step": 28223 }, { "epoch": 0.5985875167016607, "grad_norm": 0.4026052951812744, "learning_rate": 1.591491545069407e-05, "loss": 0.5354, "step": 28224 }, { "epoch": 0.5986087251595936, "grad_norm": 0.3776179254055023, "learning_rate": 1.591464654638958e-05, "loss": 0.4897, "step": 28225 }, { "epoch": 0.5986299336175267, "grad_norm": 0.3518778085708618, "learning_rate": 1.5914377635506877e-05, "loss": 0.562, "step": 28226 }, { "epoch": 0.5986511420754597, "grad_norm": 0.383569598197937, "learning_rate": 1.5914108718046254e-05, "loss": 0.5558, "step": 28227 }, { "epoch": 0.5986723505333927, "grad_norm": 0.33849310874938965, "learning_rate": 1.591383979400801e-05, "loss": 0.468, "step": 28228 }, { "epoch": 0.5986935589913257, "grad_norm": 0.3491300642490387, "learning_rate": 1.591357086339244e-05, "loss": 0.526, "step": 28229 }, { "epoch": 0.5987147674492588, "grad_norm": 0.3306080102920532, "learning_rate": 1.5913301926199857e-05, "loss": 0.5524, "step": 28230 }, { "epoch": 0.5987359759071917, "grad_norm": 0.3303320109844208, "learning_rate": 1.5913032982430546e-05, "loss": 0.4414, "step": 28231 }, { "epoch": 0.5987571843651248, "grad_norm": 0.33956247568130493, "learning_rate": 1.591276403208481e-05, "loss": 0.5515, "step": 28232 }, { "epoch": 0.5987783928230578, "grad_norm": 0.36645829677581787, "learning_rate": 1.5912495075162953e-05, "loss": 0.5563, "step": 28233 }, { "epoch": 0.5987996012809909, "grad_norm": 0.4109514057636261, "learning_rate": 1.5912226111665266e-05, "loss": 0.4365, "step": 28234 }, { "epoch": 0.5988208097389239, "grad_norm": 0.3235630393028259, "learning_rate": 1.5911957141592055e-05, "loss": 0.4748, "step": 28235 }, { "epoch": 0.5988420181968569, "grad_norm": 0.3357916474342346, "learning_rate": 1.5911688164943616e-05, "loss": 0.4471, "step": 28236 }, { "epoch": 0.59886322665479, "grad_norm": 0.3529699444770813, "learning_rate": 1.5911419181720246e-05, "loss": 0.4742, "step": 28237 }, { "epoch": 0.5988844351127229, "grad_norm": 0.4066004455089569, "learning_rate": 1.591115019192225e-05, "loss": 0.5341, "step": 28238 }, { "epoch": 0.598905643570656, "grad_norm": 0.37760502099990845, "learning_rate": 1.5910881195549924e-05, "loss": 0.5235, "step": 28239 }, { "epoch": 0.598926852028589, "grad_norm": 0.38660338521003723, "learning_rate": 1.591061219260357e-05, "loss": 0.5211, "step": 28240 }, { "epoch": 0.598948060486522, "grad_norm": 0.33596473932266235, "learning_rate": 1.5910343183083478e-05, "loss": 0.4786, "step": 28241 }, { "epoch": 0.598969268944455, "grad_norm": 0.37473565340042114, "learning_rate": 1.5910074166989958e-05, "loss": 0.507, "step": 28242 }, { "epoch": 0.5989904774023881, "grad_norm": 0.5651763081550598, "learning_rate": 1.5909805144323306e-05, "loss": 0.4388, "step": 28243 }, { "epoch": 0.599011685860321, "grad_norm": 0.35586440563201904, "learning_rate": 1.5909536115083818e-05, "loss": 0.4259, "step": 28244 }, { "epoch": 0.5990328943182541, "grad_norm": 0.47125330567359924, "learning_rate": 1.5909267079271794e-05, "loss": 0.4941, "step": 28245 }, { "epoch": 0.5990541027761871, "grad_norm": 0.39886540174484253, "learning_rate": 1.5908998036887537e-05, "loss": 0.5184, "step": 28246 }, { "epoch": 0.5990753112341202, "grad_norm": 0.3455311059951782, "learning_rate": 1.5908728987931337e-05, "loss": 0.4713, "step": 28247 }, { "epoch": 0.5990965196920532, "grad_norm": 0.35012343525886536, "learning_rate": 1.5908459932403507e-05, "loss": 0.4725, "step": 28248 }, { "epoch": 0.5991177281499862, "grad_norm": 0.36848872900009155, "learning_rate": 1.5908190870304338e-05, "loss": 0.3964, "step": 28249 }, { "epoch": 0.5991389366079193, "grad_norm": 0.3633292317390442, "learning_rate": 1.590792180163413e-05, "loss": 0.4395, "step": 28250 }, { "epoch": 0.5991601450658522, "grad_norm": 0.35618487000465393, "learning_rate": 1.5907652726393184e-05, "loss": 0.4912, "step": 28251 }, { "epoch": 0.5991813535237853, "grad_norm": 0.32216212153434753, "learning_rate": 1.5907383644581796e-05, "loss": 0.4016, "step": 28252 }, { "epoch": 0.5992025619817183, "grad_norm": 0.3298405706882477, "learning_rate": 1.590711455620027e-05, "loss": 0.4429, "step": 28253 }, { "epoch": 0.5992237704396514, "grad_norm": 0.370897114276886, "learning_rate": 1.59068454612489e-05, "loss": 0.5327, "step": 28254 }, { "epoch": 0.5992449788975843, "grad_norm": 0.3429828882217407, "learning_rate": 1.5906576359727986e-05, "loss": 0.5588, "step": 28255 }, { "epoch": 0.5992661873555174, "grad_norm": 0.3659321367740631, "learning_rate": 1.5906307251637835e-05, "loss": 0.4844, "step": 28256 }, { "epoch": 0.5992873958134504, "grad_norm": 0.4279347062110901, "learning_rate": 1.5906038136978733e-05, "loss": 0.559, "step": 28257 }, { "epoch": 0.5993086042713834, "grad_norm": 0.42042675614356995, "learning_rate": 1.5905769015750992e-05, "loss": 0.5746, "step": 28258 }, { "epoch": 0.5993298127293164, "grad_norm": 0.3492578864097595, "learning_rate": 1.5905499887954907e-05, "loss": 0.498, "step": 28259 }, { "epoch": 0.5993510211872495, "grad_norm": 0.5616193413734436, "learning_rate": 1.590523075359077e-05, "loss": 0.4377, "step": 28260 }, { "epoch": 0.5993722296451826, "grad_norm": 0.4158271253108978, "learning_rate": 1.590496161265889e-05, "loss": 0.5218, "step": 28261 }, { "epoch": 0.5993934381031155, "grad_norm": 0.3586195111274719, "learning_rate": 1.590469246515957e-05, "loss": 0.4358, "step": 28262 }, { "epoch": 0.5994146465610486, "grad_norm": 0.36789509654045105, "learning_rate": 1.5904423311093093e-05, "loss": 0.4642, "step": 28263 }, { "epoch": 0.5994358550189816, "grad_norm": 0.3375263810157776, "learning_rate": 1.590415415045977e-05, "loss": 0.5178, "step": 28264 }, { "epoch": 0.5994570634769146, "grad_norm": 0.3221713900566101, "learning_rate": 1.59038849832599e-05, "loss": 0.4356, "step": 28265 }, { "epoch": 0.5994782719348476, "grad_norm": 0.3783475458621979, "learning_rate": 1.5903615809493778e-05, "loss": 0.4703, "step": 28266 }, { "epoch": 0.5994994803927807, "grad_norm": 0.378824919462204, "learning_rate": 1.5903346629161704e-05, "loss": 0.473, "step": 28267 }, { "epoch": 0.5995206888507136, "grad_norm": 0.46818071603775024, "learning_rate": 1.590307744226398e-05, "loss": 0.5237, "step": 28268 }, { "epoch": 0.5995418973086467, "grad_norm": 0.3353840112686157, "learning_rate": 1.590280824880091e-05, "loss": 0.4952, "step": 28269 }, { "epoch": 0.5995631057665797, "grad_norm": 0.36121416091918945, "learning_rate": 1.5902539048772783e-05, "loss": 0.5239, "step": 28270 }, { "epoch": 0.5995843142245127, "grad_norm": 0.31467127799987793, "learning_rate": 1.5902269842179906e-05, "loss": 0.4387, "step": 28271 }, { "epoch": 0.5996055226824457, "grad_norm": 0.37714216113090515, "learning_rate": 1.5902000629022575e-05, "loss": 0.4797, "step": 28272 }, { "epoch": 0.5996267311403788, "grad_norm": 0.3517466187477112, "learning_rate": 1.5901731409301088e-05, "loss": 0.532, "step": 28273 }, { "epoch": 0.5996479395983118, "grad_norm": 0.4157122075557709, "learning_rate": 1.590146218301575e-05, "loss": 0.5095, "step": 28274 }, { "epoch": 0.5996691480562448, "grad_norm": 0.39406025409698486, "learning_rate": 1.5901192950166857e-05, "loss": 0.5318, "step": 28275 }, { "epoch": 0.5996903565141779, "grad_norm": 0.37503835558891296, "learning_rate": 1.5900923710754702e-05, "loss": 0.597, "step": 28276 }, { "epoch": 0.5997115649721109, "grad_norm": 0.39474043250083923, "learning_rate": 1.5900654464779598e-05, "loss": 0.452, "step": 28277 }, { "epoch": 0.5997327734300439, "grad_norm": 0.3676651120185852, "learning_rate": 1.5900385212241835e-05, "loss": 0.4583, "step": 28278 }, { "epoch": 0.5997539818879769, "grad_norm": 0.5482077598571777, "learning_rate": 1.5900115953141715e-05, "loss": 0.4791, "step": 28279 }, { "epoch": 0.59977519034591, "grad_norm": 0.3890932500362396, "learning_rate": 1.5899846687479535e-05, "loss": 0.5096, "step": 28280 }, { "epoch": 0.599796398803843, "grad_norm": 0.3586229681968689, "learning_rate": 1.5899577415255598e-05, "loss": 0.5074, "step": 28281 }, { "epoch": 0.599817607261776, "grad_norm": 0.35562747716903687, "learning_rate": 1.5899308136470205e-05, "loss": 0.5441, "step": 28282 }, { "epoch": 0.599838815719709, "grad_norm": 0.398404598236084, "learning_rate": 1.589903885112365e-05, "loss": 0.436, "step": 28283 }, { "epoch": 0.5998600241776421, "grad_norm": 0.32254958152770996, "learning_rate": 1.5898769559216237e-05, "loss": 0.4811, "step": 28284 }, { "epoch": 0.599881232635575, "grad_norm": 0.46779361367225647, "learning_rate": 1.589850026074826e-05, "loss": 0.5242, "step": 28285 }, { "epoch": 0.5999024410935081, "grad_norm": 0.34140270948410034, "learning_rate": 1.589823095572003e-05, "loss": 0.5215, "step": 28286 }, { "epoch": 0.5999236495514411, "grad_norm": 0.4529072046279907, "learning_rate": 1.5897961644131828e-05, "loss": 0.4971, "step": 28287 }, { "epoch": 0.5999448580093741, "grad_norm": 0.45548877120018005, "learning_rate": 1.5897692325983974e-05, "loss": 0.5275, "step": 28288 }, { "epoch": 0.5999660664673072, "grad_norm": 0.361032098531723, "learning_rate": 1.5897423001276754e-05, "loss": 0.5242, "step": 28289 }, { "epoch": 0.5999872749252402, "grad_norm": 0.3821198344230652, "learning_rate": 1.589715367001047e-05, "loss": 0.5092, "step": 28290 }, { "epoch": 0.6000084833831733, "grad_norm": 0.3494918942451477, "learning_rate": 1.5896884332185427e-05, "loss": 0.5246, "step": 28291 }, { "epoch": 0.6000296918411062, "grad_norm": 0.39556455612182617, "learning_rate": 1.5896614987801916e-05, "loss": 0.5647, "step": 28292 }, { "epoch": 0.6000509002990393, "grad_norm": 0.3513489067554474, "learning_rate": 1.5896345636860243e-05, "loss": 0.4611, "step": 28293 }, { "epoch": 0.6000721087569723, "grad_norm": 0.5043058395385742, "learning_rate": 1.5896076279360705e-05, "loss": 0.646, "step": 28294 }, { "epoch": 0.6000933172149053, "grad_norm": 0.364622563123703, "learning_rate": 1.58958069153036e-05, "loss": 0.5252, "step": 28295 }, { "epoch": 0.6001145256728383, "grad_norm": 0.37562716007232666, "learning_rate": 1.5895537544689234e-05, "loss": 0.5889, "step": 28296 }, { "epoch": 0.6001357341307714, "grad_norm": 0.3475149869918823, "learning_rate": 1.5895268167517902e-05, "loss": 0.4374, "step": 28297 }, { "epoch": 0.6001569425887043, "grad_norm": 0.40749499201774597, "learning_rate": 1.58949987837899e-05, "loss": 0.5549, "step": 28298 }, { "epoch": 0.6001781510466374, "grad_norm": 0.3709664046764374, "learning_rate": 1.5894729393505535e-05, "loss": 0.4765, "step": 28299 }, { "epoch": 0.6001993595045704, "grad_norm": 0.3593684434890747, "learning_rate": 1.5894459996665105e-05, "loss": 0.5531, "step": 28300 }, { "epoch": 0.6002205679625034, "grad_norm": 0.40719765424728394, "learning_rate": 1.5894190593268904e-05, "loss": 0.4942, "step": 28301 }, { "epoch": 0.6002417764204365, "grad_norm": 0.5300114750862122, "learning_rate": 1.5893921183317237e-05, "loss": 0.5933, "step": 28302 }, { "epoch": 0.6002629848783695, "grad_norm": 0.35991114377975464, "learning_rate": 1.5893651766810402e-05, "loss": 0.4525, "step": 28303 }, { "epoch": 0.6002841933363026, "grad_norm": 0.3800285756587982, "learning_rate": 1.5893382343748698e-05, "loss": 0.5927, "step": 28304 }, { "epoch": 0.6003054017942355, "grad_norm": 0.37097203731536865, "learning_rate": 1.5893112914132426e-05, "loss": 0.5925, "step": 28305 }, { "epoch": 0.6003266102521686, "grad_norm": 0.3466252386569977, "learning_rate": 1.5892843477961884e-05, "loss": 0.5251, "step": 28306 }, { "epoch": 0.6003478187101016, "grad_norm": 0.3475759029388428, "learning_rate": 1.589257403523737e-05, "loss": 0.5031, "step": 28307 }, { "epoch": 0.6003690271680346, "grad_norm": 0.36593618988990784, "learning_rate": 1.5892304585959193e-05, "loss": 0.4969, "step": 28308 }, { "epoch": 0.6003902356259676, "grad_norm": 0.37690073251724243, "learning_rate": 1.5892035130127644e-05, "loss": 0.537, "step": 28309 }, { "epoch": 0.6004114440839007, "grad_norm": 0.3842712938785553, "learning_rate": 1.589176566774302e-05, "loss": 0.5301, "step": 28310 }, { "epoch": 0.6004326525418336, "grad_norm": 0.3672480583190918, "learning_rate": 1.589149619880563e-05, "loss": 0.5808, "step": 28311 }, { "epoch": 0.6004538609997667, "grad_norm": 0.5454379916191101, "learning_rate": 1.5891226723315768e-05, "loss": 0.4305, "step": 28312 }, { "epoch": 0.6004750694576997, "grad_norm": 0.3675972521305084, "learning_rate": 1.5890957241273737e-05, "loss": 0.4653, "step": 28313 }, { "epoch": 0.6004962779156328, "grad_norm": 0.32379040122032166, "learning_rate": 1.589068775267983e-05, "loss": 0.4712, "step": 28314 }, { "epoch": 0.6005174863735657, "grad_norm": 0.3538479208946228, "learning_rate": 1.5890418257534356e-05, "loss": 0.5554, "step": 28315 }, { "epoch": 0.6005386948314988, "grad_norm": 0.33705103397369385, "learning_rate": 1.5890148755837606e-05, "loss": 0.4853, "step": 28316 }, { "epoch": 0.6005599032894319, "grad_norm": 0.397330105304718, "learning_rate": 1.5889879247589886e-05, "loss": 0.5524, "step": 28317 }, { "epoch": 0.6005811117473648, "grad_norm": 0.3355109989643097, "learning_rate": 1.5889609732791492e-05, "loss": 0.4442, "step": 28318 }, { "epoch": 0.6006023202052979, "grad_norm": 0.338711142539978, "learning_rate": 1.5889340211442724e-05, "loss": 0.5056, "step": 28319 }, { "epoch": 0.6006235286632309, "grad_norm": 0.3407413363456726, "learning_rate": 1.5889070683543887e-05, "loss": 0.6477, "step": 28320 }, { "epoch": 0.600644737121164, "grad_norm": 0.361360102891922, "learning_rate": 1.5888801149095273e-05, "loss": 0.5618, "step": 28321 }, { "epoch": 0.6006659455790969, "grad_norm": 0.35932108759880066, "learning_rate": 1.5888531608097188e-05, "loss": 0.6023, "step": 28322 }, { "epoch": 0.60068715403703, "grad_norm": 0.35974234342575073, "learning_rate": 1.5888262060549928e-05, "loss": 0.5934, "step": 28323 }, { "epoch": 0.600708362494963, "grad_norm": 0.33580732345581055, "learning_rate": 1.5887992506453795e-05, "loss": 0.5314, "step": 28324 }, { "epoch": 0.600729570952896, "grad_norm": 0.3552122116088867, "learning_rate": 1.588772294580909e-05, "loss": 0.4935, "step": 28325 }, { "epoch": 0.600750779410829, "grad_norm": 0.4154440760612488, "learning_rate": 1.5887453378616106e-05, "loss": 0.5306, "step": 28326 }, { "epoch": 0.6007719878687621, "grad_norm": 0.3253573477268219, "learning_rate": 1.588718380487515e-05, "loss": 0.4997, "step": 28327 }, { "epoch": 0.600793196326695, "grad_norm": 0.3672424256801605, "learning_rate": 1.588691422458652e-05, "loss": 0.4497, "step": 28328 }, { "epoch": 0.6008144047846281, "grad_norm": 0.32854947447776794, "learning_rate": 1.5886644637750515e-05, "loss": 0.4977, "step": 28329 }, { "epoch": 0.6008356132425612, "grad_norm": 0.3633081912994385, "learning_rate": 1.5886375044367432e-05, "loss": 0.5417, "step": 28330 }, { "epoch": 0.6008568217004941, "grad_norm": 0.39213523268699646, "learning_rate": 1.5886105444437575e-05, "loss": 0.4761, "step": 28331 }, { "epoch": 0.6008780301584272, "grad_norm": 0.5657333135604858, "learning_rate": 1.5885835837961243e-05, "loss": 0.5135, "step": 28332 }, { "epoch": 0.6008992386163602, "grad_norm": 0.4095478355884552, "learning_rate": 1.5885566224938736e-05, "loss": 0.512, "step": 28333 }, { "epoch": 0.6009204470742933, "grad_norm": 0.3408052921295166, "learning_rate": 1.5885296605370355e-05, "loss": 0.4638, "step": 28334 }, { "epoch": 0.6009416555322262, "grad_norm": 0.43074989318847656, "learning_rate": 1.58850269792564e-05, "loss": 0.5033, "step": 28335 }, { "epoch": 0.6009628639901593, "grad_norm": 0.5290780067443848, "learning_rate": 1.588475734659716e-05, "loss": 0.5203, "step": 28336 }, { "epoch": 0.6009840724480923, "grad_norm": 0.3264453411102295, "learning_rate": 1.5884487707392954e-05, "loss": 0.4923, "step": 28337 }, { "epoch": 0.6010052809060253, "grad_norm": 0.3410972058773041, "learning_rate": 1.5884218061644065e-05, "loss": 0.4689, "step": 28338 }, { "epoch": 0.6010264893639583, "grad_norm": 0.37896960973739624, "learning_rate": 1.5883948409350803e-05, "loss": 0.548, "step": 28339 }, { "epoch": 0.6010476978218914, "grad_norm": 0.3141428828239441, "learning_rate": 1.5883678750513463e-05, "loss": 0.4363, "step": 28340 }, { "epoch": 0.6010689062798243, "grad_norm": 0.3970956802368164, "learning_rate": 1.588340908513235e-05, "loss": 0.512, "step": 28341 }, { "epoch": 0.6010901147377574, "grad_norm": 0.3732426166534424, "learning_rate": 1.588313941320776e-05, "loss": 0.5146, "step": 28342 }, { "epoch": 0.6011113231956905, "grad_norm": 0.3408198952674866, "learning_rate": 1.588286973473999e-05, "loss": 0.5113, "step": 28343 }, { "epoch": 0.6011325316536235, "grad_norm": 0.3542202115058899, "learning_rate": 1.5882600049729344e-05, "loss": 0.5434, "step": 28344 }, { "epoch": 0.6011537401115565, "grad_norm": 0.31494438648223877, "learning_rate": 1.5882330358176122e-05, "loss": 0.4813, "step": 28345 }, { "epoch": 0.6011749485694895, "grad_norm": 0.39622339606285095, "learning_rate": 1.5882060660080625e-05, "loss": 0.5643, "step": 28346 }, { "epoch": 0.6011961570274226, "grad_norm": 0.4459351897239685, "learning_rate": 1.588179095544315e-05, "loss": 0.516, "step": 28347 }, { "epoch": 0.6012173654853555, "grad_norm": 0.3697505593299866, "learning_rate": 1.5881521244264e-05, "loss": 0.5187, "step": 28348 }, { "epoch": 0.6012385739432886, "grad_norm": 0.3602934181690216, "learning_rate": 1.588125152654347e-05, "loss": 0.5273, "step": 28349 }, { "epoch": 0.6012597824012216, "grad_norm": 0.3778429925441742, "learning_rate": 1.5880981802281863e-05, "loss": 0.4433, "step": 28350 }, { "epoch": 0.6012809908591547, "grad_norm": 0.3158893883228302, "learning_rate": 1.588071207147948e-05, "loss": 0.4411, "step": 28351 }, { "epoch": 0.6013021993170876, "grad_norm": 0.5011118054389954, "learning_rate": 1.5880442334136625e-05, "loss": 0.4836, "step": 28352 }, { "epoch": 0.6013234077750207, "grad_norm": 0.3521072566509247, "learning_rate": 1.5880172590253585e-05, "loss": 0.4181, "step": 28353 }, { "epoch": 0.6013446162329537, "grad_norm": 0.3729068338871002, "learning_rate": 1.5879902839830676e-05, "loss": 0.5536, "step": 28354 }, { "epoch": 0.6013658246908867, "grad_norm": 0.35884958505630493, "learning_rate": 1.587963308286818e-05, "loss": 0.4373, "step": 28355 }, { "epoch": 0.6013870331488197, "grad_norm": 1.6080355644226074, "learning_rate": 1.5879363319366415e-05, "loss": 0.4761, "step": 28356 }, { "epoch": 0.6014082416067528, "grad_norm": 0.378849059343338, "learning_rate": 1.5879093549325674e-05, "loss": 0.4958, "step": 28357 }, { "epoch": 0.6014294500646858, "grad_norm": 0.3755452334880829, "learning_rate": 1.587882377274625e-05, "loss": 0.5424, "step": 28358 }, { "epoch": 0.6014506585226188, "grad_norm": 0.3905968964099884, "learning_rate": 1.5878553989628456e-05, "loss": 0.5509, "step": 28359 }, { "epoch": 0.6014718669805519, "grad_norm": 0.40722009539604187, "learning_rate": 1.587828419997258e-05, "loss": 0.5216, "step": 28360 }, { "epoch": 0.6014930754384848, "grad_norm": 0.3359719216823578, "learning_rate": 1.5878014403778933e-05, "loss": 0.5156, "step": 28361 }, { "epoch": 0.6015142838964179, "grad_norm": 0.32790157198905945, "learning_rate": 1.5877744601047806e-05, "loss": 0.4925, "step": 28362 }, { "epoch": 0.6015354923543509, "grad_norm": 0.5364903211593628, "learning_rate": 1.58774747917795e-05, "loss": 0.5216, "step": 28363 }, { "epoch": 0.601556700812284, "grad_norm": 0.3606014847755432, "learning_rate": 1.5877204975974322e-05, "loss": 0.4702, "step": 28364 }, { "epoch": 0.6015779092702169, "grad_norm": 0.35847222805023193, "learning_rate": 1.5876935153632565e-05, "loss": 0.5348, "step": 28365 }, { "epoch": 0.60159911772815, "grad_norm": 0.4426126778125763, "learning_rate": 1.5876665324754534e-05, "loss": 0.5409, "step": 28366 }, { "epoch": 0.601620326186083, "grad_norm": 0.37250322103500366, "learning_rate": 1.5876395489340524e-05, "loss": 0.5332, "step": 28367 }, { "epoch": 0.601641534644016, "grad_norm": 0.3390820324420929, "learning_rate": 1.587612564739084e-05, "loss": 0.5263, "step": 28368 }, { "epoch": 0.601662743101949, "grad_norm": 0.3681863248348236, "learning_rate": 1.5875855798905777e-05, "loss": 0.5281, "step": 28369 }, { "epoch": 0.6016839515598821, "grad_norm": 0.340795636177063, "learning_rate": 1.5875585943885642e-05, "loss": 0.5427, "step": 28370 }, { "epoch": 0.6017051600178152, "grad_norm": 0.37417325377464294, "learning_rate": 1.5875316082330732e-05, "loss": 0.5268, "step": 28371 }, { "epoch": 0.6017263684757481, "grad_norm": 0.35203251242637634, "learning_rate": 1.5875046214241347e-05, "loss": 0.4684, "step": 28372 }, { "epoch": 0.6017475769336812, "grad_norm": 0.3569452464580536, "learning_rate": 1.5874776339617782e-05, "loss": 0.5451, "step": 28373 }, { "epoch": 0.6017687853916142, "grad_norm": 0.3259051740169525, "learning_rate": 1.5874506458460345e-05, "loss": 0.4309, "step": 28374 }, { "epoch": 0.6017899938495472, "grad_norm": 0.4359584450721741, "learning_rate": 1.5874236570769333e-05, "loss": 0.5011, "step": 28375 }, { "epoch": 0.6018112023074802, "grad_norm": 0.35088062286376953, "learning_rate": 1.5873966676545043e-05, "loss": 0.595, "step": 28376 }, { "epoch": 0.6018324107654133, "grad_norm": 0.36047857999801636, "learning_rate": 1.587369677578778e-05, "loss": 0.45, "step": 28377 }, { "epoch": 0.6018536192233462, "grad_norm": 0.6431945562362671, "learning_rate": 1.5873426868497844e-05, "loss": 0.572, "step": 28378 }, { "epoch": 0.6018748276812793, "grad_norm": 0.35572633147239685, "learning_rate": 1.5873156954675533e-05, "loss": 0.4669, "step": 28379 }, { "epoch": 0.6018960361392123, "grad_norm": 0.501108705997467, "learning_rate": 1.5872887034321148e-05, "loss": 0.5721, "step": 28380 }, { "epoch": 0.6019172445971454, "grad_norm": 0.3778773248195648, "learning_rate": 1.5872617107434987e-05, "loss": 0.5362, "step": 28381 }, { "epoch": 0.6019384530550783, "grad_norm": 0.37763309478759766, "learning_rate": 1.587234717401735e-05, "loss": 0.4884, "step": 28382 }, { "epoch": 0.6019596615130114, "grad_norm": 0.3257872462272644, "learning_rate": 1.5872077234068546e-05, "loss": 0.5436, "step": 28383 }, { "epoch": 0.6019808699709445, "grad_norm": 0.32537761330604553, "learning_rate": 1.5871807287588867e-05, "loss": 0.4539, "step": 28384 }, { "epoch": 0.6020020784288774, "grad_norm": 0.36648234724998474, "learning_rate": 1.5871537334578614e-05, "loss": 0.5232, "step": 28385 }, { "epoch": 0.6020232868868105, "grad_norm": 0.3636138439178467, "learning_rate": 1.587126737503809e-05, "loss": 0.3909, "step": 28386 }, { "epoch": 0.6020444953447435, "grad_norm": 0.3986629545688629, "learning_rate": 1.587099740896759e-05, "loss": 0.4712, "step": 28387 }, { "epoch": 0.6020657038026765, "grad_norm": 0.4483901560306549, "learning_rate": 1.5870727436367418e-05, "loss": 0.4961, "step": 28388 }, { "epoch": 0.6020869122606095, "grad_norm": 0.34473079442977905, "learning_rate": 1.5870457457237874e-05, "loss": 0.4619, "step": 28389 }, { "epoch": 0.6021081207185426, "grad_norm": 0.3453851044178009, "learning_rate": 1.587018747157926e-05, "loss": 0.4956, "step": 28390 }, { "epoch": 0.6021293291764755, "grad_norm": 0.39263901114463806, "learning_rate": 1.5869917479391875e-05, "loss": 0.5306, "step": 28391 }, { "epoch": 0.6021505376344086, "grad_norm": 0.30478593707084656, "learning_rate": 1.586964748067602e-05, "loss": 0.4664, "step": 28392 }, { "epoch": 0.6021717460923416, "grad_norm": 0.372690886259079, "learning_rate": 1.586937747543199e-05, "loss": 0.5532, "step": 28393 }, { "epoch": 0.6021929545502747, "grad_norm": 0.43775999546051025, "learning_rate": 1.5869107463660092e-05, "loss": 0.4829, "step": 28394 }, { "epoch": 0.6022141630082076, "grad_norm": 0.3340784013271332, "learning_rate": 1.586883744536062e-05, "loss": 0.5098, "step": 28395 }, { "epoch": 0.6022353714661407, "grad_norm": 0.34856081008911133, "learning_rate": 1.5868567420533883e-05, "loss": 0.478, "step": 28396 }, { "epoch": 0.6022565799240738, "grad_norm": 0.45310017466545105, "learning_rate": 1.5868297389180174e-05, "loss": 0.533, "step": 28397 }, { "epoch": 0.6022777883820067, "grad_norm": 0.3324471712112427, "learning_rate": 1.5868027351299795e-05, "loss": 0.4927, "step": 28398 }, { "epoch": 0.6022989968399398, "grad_norm": 0.3719002306461334, "learning_rate": 1.5867757306893047e-05, "loss": 0.5649, "step": 28399 }, { "epoch": 0.6023202052978728, "grad_norm": 0.3662759065628052, "learning_rate": 1.5867487255960232e-05, "loss": 0.4807, "step": 28400 }, { "epoch": 0.6023414137558059, "grad_norm": 0.3531191349029541, "learning_rate": 1.5867217198501645e-05, "loss": 0.48, "step": 28401 }, { "epoch": 0.6023626222137388, "grad_norm": 0.34100770950317383, "learning_rate": 1.586694713451759e-05, "loss": 0.4656, "step": 28402 }, { "epoch": 0.6023838306716719, "grad_norm": 3.247074604034424, "learning_rate": 1.5866677064008374e-05, "loss": 0.6298, "step": 28403 }, { "epoch": 0.6024050391296049, "grad_norm": 0.3449726104736328, "learning_rate": 1.5866406986974287e-05, "loss": 0.485, "step": 28404 }, { "epoch": 0.6024262475875379, "grad_norm": 0.48486387729644775, "learning_rate": 1.5866136903415633e-05, "loss": 0.4744, "step": 28405 }, { "epoch": 0.6024474560454709, "grad_norm": 0.3378617763519287, "learning_rate": 1.5865866813332712e-05, "loss": 0.5103, "step": 28406 }, { "epoch": 0.602468664503404, "grad_norm": 0.39924177527427673, "learning_rate": 1.5865596716725827e-05, "loss": 0.5355, "step": 28407 }, { "epoch": 0.6024898729613369, "grad_norm": 0.3734852075576782, "learning_rate": 1.586532661359527e-05, "loss": 0.4746, "step": 28408 }, { "epoch": 0.60251108141927, "grad_norm": 0.4075023829936981, "learning_rate": 1.5865056503941355e-05, "loss": 0.5227, "step": 28409 }, { "epoch": 0.602532289877203, "grad_norm": 0.3396620452404022, "learning_rate": 1.586478638776437e-05, "loss": 0.4651, "step": 28410 }, { "epoch": 0.602553498335136, "grad_norm": 0.377221018075943, "learning_rate": 1.5864516265064623e-05, "loss": 0.4997, "step": 28411 }, { "epoch": 0.6025747067930691, "grad_norm": 0.370838463306427, "learning_rate": 1.5864246135842413e-05, "loss": 0.4294, "step": 28412 }, { "epoch": 0.6025959152510021, "grad_norm": 0.35680970549583435, "learning_rate": 1.586397600009804e-05, "loss": 0.5089, "step": 28413 }, { "epoch": 0.6026171237089352, "grad_norm": 0.3528456389904022, "learning_rate": 1.58637058578318e-05, "loss": 0.5209, "step": 28414 }, { "epoch": 0.6026383321668681, "grad_norm": 0.7393240928649902, "learning_rate": 1.5863435709043996e-05, "loss": 0.4639, "step": 28415 }, { "epoch": 0.6026595406248012, "grad_norm": 0.3589988648891449, "learning_rate": 1.5863165553734936e-05, "loss": 0.4861, "step": 28416 }, { "epoch": 0.6026807490827342, "grad_norm": 0.3583018183708191, "learning_rate": 1.5862895391904913e-05, "loss": 0.5189, "step": 28417 }, { "epoch": 0.6027019575406672, "grad_norm": 0.3300805389881134, "learning_rate": 1.5862625223554226e-05, "loss": 0.545, "step": 28418 }, { "epoch": 0.6027231659986002, "grad_norm": 0.3447268307209015, "learning_rate": 1.5862355048683177e-05, "loss": 0.4809, "step": 28419 }, { "epoch": 0.6027443744565333, "grad_norm": 0.38029640913009644, "learning_rate": 1.586208486729207e-05, "loss": 0.6071, "step": 28420 }, { "epoch": 0.6027655829144662, "grad_norm": 0.35263392329216003, "learning_rate": 1.5861814679381203e-05, "loss": 0.5571, "step": 28421 }, { "epoch": 0.6027867913723993, "grad_norm": 0.361077219247818, "learning_rate": 1.5861544484950876e-05, "loss": 0.517, "step": 28422 }, { "epoch": 0.6028079998303323, "grad_norm": 0.37612029910087585, "learning_rate": 1.586127428400139e-05, "loss": 0.5603, "step": 28423 }, { "epoch": 0.6028292082882654, "grad_norm": 0.4210160970687866, "learning_rate": 1.5861004076533046e-05, "loss": 0.5061, "step": 28424 }, { "epoch": 0.6028504167461984, "grad_norm": 0.37416696548461914, "learning_rate": 1.5860733862546145e-05, "loss": 0.4615, "step": 28425 }, { "epoch": 0.6028716252041314, "grad_norm": 0.37223973870277405, "learning_rate": 1.5860463642040986e-05, "loss": 0.6197, "step": 28426 }, { "epoch": 0.6028928336620645, "grad_norm": 0.39952415227890015, "learning_rate": 1.5860193415017872e-05, "loss": 0.4796, "step": 28427 }, { "epoch": 0.6029140421199974, "grad_norm": 0.3323288857936859, "learning_rate": 1.58599231814771e-05, "loss": 0.5046, "step": 28428 }, { "epoch": 0.6029352505779305, "grad_norm": 0.3632332682609558, "learning_rate": 1.585965294141897e-05, "loss": 0.4651, "step": 28429 }, { "epoch": 0.6029564590358635, "grad_norm": 0.4482518136501312, "learning_rate": 1.585938269484379e-05, "loss": 0.5398, "step": 28430 }, { "epoch": 0.6029776674937966, "grad_norm": 0.34950682520866394, "learning_rate": 1.5859112441751854e-05, "loss": 0.4792, "step": 28431 }, { "epoch": 0.6029988759517295, "grad_norm": 0.327619343996048, "learning_rate": 1.585884218214346e-05, "loss": 0.5365, "step": 28432 }, { "epoch": 0.6030200844096626, "grad_norm": 0.3982923924922943, "learning_rate": 1.5858571916018917e-05, "loss": 0.509, "step": 28433 }, { "epoch": 0.6030412928675956, "grad_norm": 0.3922085165977478, "learning_rate": 1.5858301643378518e-05, "loss": 0.521, "step": 28434 }, { "epoch": 0.6030625013255286, "grad_norm": 0.4125787317752838, "learning_rate": 1.585803136422257e-05, "loss": 0.4558, "step": 28435 }, { "epoch": 0.6030837097834616, "grad_norm": 0.41156551241874695, "learning_rate": 1.5857761078551368e-05, "loss": 0.4991, "step": 28436 }, { "epoch": 0.6031049182413947, "grad_norm": 0.399738609790802, "learning_rate": 1.5857490786365217e-05, "loss": 0.5347, "step": 28437 }, { "epoch": 0.6031261266993277, "grad_norm": 0.4958704113960266, "learning_rate": 1.5857220487664413e-05, "loss": 0.5093, "step": 28438 }, { "epoch": 0.6031473351572607, "grad_norm": 0.361842542886734, "learning_rate": 1.585695018244926e-05, "loss": 0.5033, "step": 28439 }, { "epoch": 0.6031685436151938, "grad_norm": 0.3291238248348236, "learning_rate": 1.585667987072006e-05, "loss": 0.5439, "step": 28440 }, { "epoch": 0.6031897520731268, "grad_norm": 0.33614999055862427, "learning_rate": 1.585640955247711e-05, "loss": 0.4972, "step": 28441 }, { "epoch": 0.6032109605310598, "grad_norm": 0.3971332907676697, "learning_rate": 1.5856139227720714e-05, "loss": 0.5518, "step": 28442 }, { "epoch": 0.6032321689889928, "grad_norm": 0.36506387591362, "learning_rate": 1.585586889645117e-05, "loss": 0.5391, "step": 28443 }, { "epoch": 0.6032533774469259, "grad_norm": 0.4233555793762207, "learning_rate": 1.5855598558668775e-05, "loss": 0.512, "step": 28444 }, { "epoch": 0.6032745859048588, "grad_norm": 0.2988923490047455, "learning_rate": 1.585532821437384e-05, "loss": 0.3999, "step": 28445 }, { "epoch": 0.6032957943627919, "grad_norm": 0.37119531631469727, "learning_rate": 1.5855057863566657e-05, "loss": 0.4459, "step": 28446 }, { "epoch": 0.6033170028207249, "grad_norm": 0.40931692719459534, "learning_rate": 1.585478750624753e-05, "loss": 0.4997, "step": 28447 }, { "epoch": 0.6033382112786579, "grad_norm": 0.3247581124305725, "learning_rate": 1.585451714241676e-05, "loss": 0.4846, "step": 28448 }, { "epoch": 0.6033594197365909, "grad_norm": 0.3535074293613434, "learning_rate": 1.5854246772074645e-05, "loss": 0.4615, "step": 28449 }, { "epoch": 0.603380628194524, "grad_norm": 0.2976268529891968, "learning_rate": 1.5853976395221493e-05, "loss": 0.4818, "step": 28450 }, { "epoch": 0.603401836652457, "grad_norm": 0.4337935149669647, "learning_rate": 1.5853706011857594e-05, "loss": 0.4279, "step": 28451 }, { "epoch": 0.60342304511039, "grad_norm": 0.30535078048706055, "learning_rate": 1.5853435621983255e-05, "loss": 0.4332, "step": 28452 }, { "epoch": 0.6034442535683231, "grad_norm": 0.354116290807724, "learning_rate": 1.5853165225598776e-05, "loss": 0.5098, "step": 28453 }, { "epoch": 0.6034654620262561, "grad_norm": 0.4422476887702942, "learning_rate": 1.5852894822704456e-05, "loss": 0.4976, "step": 28454 }, { "epoch": 0.6034866704841891, "grad_norm": 0.3205682039260864, "learning_rate": 1.58526244133006e-05, "loss": 0.5304, "step": 28455 }, { "epoch": 0.6035078789421221, "grad_norm": 0.3649166524410248, "learning_rate": 1.5852353997387505e-05, "loss": 0.5693, "step": 28456 }, { "epoch": 0.6035290874000552, "grad_norm": 0.36802512407302856, "learning_rate": 1.585208357496547e-05, "loss": 0.5734, "step": 28457 }, { "epoch": 0.6035502958579881, "grad_norm": 0.36909762024879456, "learning_rate": 1.58518131460348e-05, "loss": 0.4775, "step": 28458 }, { "epoch": 0.6035715043159212, "grad_norm": 0.3197237253189087, "learning_rate": 1.5851542710595794e-05, "loss": 0.4686, "step": 28459 }, { "epoch": 0.6035927127738542, "grad_norm": 0.3771435618400574, "learning_rate": 1.5851272268648756e-05, "loss": 0.5338, "step": 28460 }, { "epoch": 0.6036139212317873, "grad_norm": 0.40339958667755127, "learning_rate": 1.5851001820193984e-05, "loss": 0.4499, "step": 28461 }, { "epoch": 0.6036351296897202, "grad_norm": 0.3680380582809448, "learning_rate": 1.5850731365231776e-05, "loss": 0.4574, "step": 28462 }, { "epoch": 0.6036563381476533, "grad_norm": 0.3285451829433441, "learning_rate": 1.5850460903762437e-05, "loss": 0.5233, "step": 28463 }, { "epoch": 0.6036775466055863, "grad_norm": 0.5559892654418945, "learning_rate": 1.5850190435786263e-05, "loss": 0.492, "step": 28464 }, { "epoch": 0.6036987550635193, "grad_norm": 0.4120064079761505, "learning_rate": 1.584991996130356e-05, "loss": 0.565, "step": 28465 }, { "epoch": 0.6037199635214524, "grad_norm": 0.3654478192329407, "learning_rate": 1.5849649480314626e-05, "loss": 0.5153, "step": 28466 }, { "epoch": 0.6037411719793854, "grad_norm": 0.5825126767158508, "learning_rate": 1.5849378992819766e-05, "loss": 0.4891, "step": 28467 }, { "epoch": 0.6037623804373184, "grad_norm": 0.4531995356082916, "learning_rate": 1.5849108498819274e-05, "loss": 0.5587, "step": 28468 }, { "epoch": 0.6037835888952514, "grad_norm": 0.31561917066574097, "learning_rate": 1.5848837998313456e-05, "loss": 0.4267, "step": 28469 }, { "epoch": 0.6038047973531845, "grad_norm": 0.3188697397708893, "learning_rate": 1.5848567491302612e-05, "loss": 0.4018, "step": 28470 }, { "epoch": 0.6038260058111174, "grad_norm": 0.3560221493244171, "learning_rate": 1.584829697778704e-05, "loss": 0.4864, "step": 28471 }, { "epoch": 0.6038472142690505, "grad_norm": 0.38942912220954895, "learning_rate": 1.5848026457767043e-05, "loss": 0.4714, "step": 28472 }, { "epoch": 0.6038684227269835, "grad_norm": 0.31825852394104004, "learning_rate": 1.584775593124292e-05, "loss": 0.4909, "step": 28473 }, { "epoch": 0.6038896311849166, "grad_norm": 0.3870214819908142, "learning_rate": 1.5847485398214975e-05, "loss": 0.498, "step": 28474 }, { "epoch": 0.6039108396428495, "grad_norm": 0.3523777425289154, "learning_rate": 1.5847214858683514e-05, "loss": 0.5369, "step": 28475 }, { "epoch": 0.6039320481007826, "grad_norm": 0.3352400064468384, "learning_rate": 1.5846944312648825e-05, "loss": 0.5398, "step": 28476 }, { "epoch": 0.6039532565587156, "grad_norm": 0.3952939212322235, "learning_rate": 1.5846673760111214e-05, "loss": 0.5029, "step": 28477 }, { "epoch": 0.6039744650166486, "grad_norm": 0.37992221117019653, "learning_rate": 1.5846403201070984e-05, "loss": 0.5, "step": 28478 }, { "epoch": 0.6039956734745817, "grad_norm": 0.3269822597503662, "learning_rate": 1.584613263552844e-05, "loss": 0.5031, "step": 28479 }, { "epoch": 0.6040168819325147, "grad_norm": 0.3691634237766266, "learning_rate": 1.5845862063483876e-05, "loss": 0.531, "step": 28480 }, { "epoch": 0.6040380903904478, "grad_norm": 0.3555227518081665, "learning_rate": 1.5845591484937592e-05, "loss": 0.498, "step": 28481 }, { "epoch": 0.6040592988483807, "grad_norm": 0.3910728096961975, "learning_rate": 1.5845320899889894e-05, "loss": 0.5948, "step": 28482 }, { "epoch": 0.6040805073063138, "grad_norm": 0.3579954206943512, "learning_rate": 1.5845050308341082e-05, "loss": 0.4604, "step": 28483 }, { "epoch": 0.6041017157642468, "grad_norm": 0.39757847785949707, "learning_rate": 1.5844779710291452e-05, "loss": 0.5265, "step": 28484 }, { "epoch": 0.6041229242221798, "grad_norm": 0.404606431722641, "learning_rate": 1.584450910574131e-05, "loss": 0.5582, "step": 28485 }, { "epoch": 0.6041441326801128, "grad_norm": 0.3552382290363312, "learning_rate": 1.584423849469096e-05, "loss": 0.5434, "step": 28486 }, { "epoch": 0.6041653411380459, "grad_norm": 0.3582577407360077, "learning_rate": 1.5843967877140695e-05, "loss": 0.5276, "step": 28487 }, { "epoch": 0.6041865495959788, "grad_norm": 0.3689395487308502, "learning_rate": 1.584369725309082e-05, "loss": 0.4307, "step": 28488 }, { "epoch": 0.6042077580539119, "grad_norm": 0.3432175815105438, "learning_rate": 1.5843426622541636e-05, "loss": 0.5812, "step": 28489 }, { "epoch": 0.6042289665118449, "grad_norm": 0.3745236098766327, "learning_rate": 1.5843155985493447e-05, "loss": 0.4695, "step": 28490 }, { "epoch": 0.604250174969778, "grad_norm": 0.3244037628173828, "learning_rate": 1.5842885341946543e-05, "loss": 0.5085, "step": 28491 }, { "epoch": 0.6042713834277109, "grad_norm": 0.3871556222438812, "learning_rate": 1.584261469190124e-05, "loss": 0.5329, "step": 28492 }, { "epoch": 0.604292591885644, "grad_norm": 0.3151954412460327, "learning_rate": 1.5842344035357825e-05, "loss": 0.4468, "step": 28493 }, { "epoch": 0.6043138003435771, "grad_norm": 0.3582278788089752, "learning_rate": 1.584207337231661e-05, "loss": 0.4898, "step": 28494 }, { "epoch": 0.60433500880151, "grad_norm": 0.379970908164978, "learning_rate": 1.5841802702777893e-05, "loss": 0.4468, "step": 28495 }, { "epoch": 0.6043562172594431, "grad_norm": 0.3423343300819397, "learning_rate": 1.5841532026741974e-05, "loss": 0.5577, "step": 28496 }, { "epoch": 0.6043774257173761, "grad_norm": 0.3565870523452759, "learning_rate": 1.584126134420915e-05, "loss": 0.571, "step": 28497 }, { "epoch": 0.6043986341753091, "grad_norm": 0.3729574978351593, "learning_rate": 1.5840990655179726e-05, "loss": 0.5735, "step": 28498 }, { "epoch": 0.6044198426332421, "grad_norm": 0.5406878590583801, "learning_rate": 1.584071995965401e-05, "loss": 0.4497, "step": 28499 }, { "epoch": 0.6044410510911752, "grad_norm": 0.38002315163612366, "learning_rate": 1.584044925763229e-05, "loss": 0.4953, "step": 28500 }, { "epoch": 0.6044622595491081, "grad_norm": 0.37738004326820374, "learning_rate": 1.584017854911487e-05, "loss": 0.5151, "step": 28501 }, { "epoch": 0.6044834680070412, "grad_norm": 0.3647327125072479, "learning_rate": 1.583990783410206e-05, "loss": 0.4794, "step": 28502 }, { "epoch": 0.6045046764649742, "grad_norm": 0.3348459303379059, "learning_rate": 1.5839637112594153e-05, "loss": 0.4688, "step": 28503 }, { "epoch": 0.6045258849229073, "grad_norm": 0.3737832307815552, "learning_rate": 1.5839366384591454e-05, "loss": 0.5143, "step": 28504 }, { "epoch": 0.6045470933808402, "grad_norm": 0.3882001042366028, "learning_rate": 1.583909565009426e-05, "loss": 0.4885, "step": 28505 }, { "epoch": 0.6045683018387733, "grad_norm": 0.3122579753398895, "learning_rate": 1.5838824909102875e-05, "loss": 0.4732, "step": 28506 }, { "epoch": 0.6045895102967064, "grad_norm": 0.38817349076271057, "learning_rate": 1.5838554161617602e-05, "loss": 0.5558, "step": 28507 }, { "epoch": 0.6046107187546393, "grad_norm": 0.3400367498397827, "learning_rate": 1.5838283407638733e-05, "loss": 0.4463, "step": 28508 }, { "epoch": 0.6046319272125724, "grad_norm": 0.4013824760913849, "learning_rate": 1.583801264716658e-05, "loss": 0.4664, "step": 28509 }, { "epoch": 0.6046531356705054, "grad_norm": 0.32452070713043213, "learning_rate": 1.5837741880201444e-05, "loss": 0.508, "step": 28510 }, { "epoch": 0.6046743441284385, "grad_norm": 0.3883149325847626, "learning_rate": 1.5837471106743617e-05, "loss": 0.4877, "step": 28511 }, { "epoch": 0.6046955525863714, "grad_norm": 1.7535552978515625, "learning_rate": 1.5837200326793407e-05, "loss": 0.5281, "step": 28512 }, { "epoch": 0.6047167610443045, "grad_norm": 0.3791084587574005, "learning_rate": 1.5836929540351116e-05, "loss": 0.5022, "step": 28513 }, { "epoch": 0.6047379695022375, "grad_norm": 0.4854670464992523, "learning_rate": 1.583665874741704e-05, "loss": 0.5064, "step": 28514 }, { "epoch": 0.6047591779601705, "grad_norm": 0.4713881313800812, "learning_rate": 1.5836387947991486e-05, "loss": 0.5132, "step": 28515 }, { "epoch": 0.6047803864181035, "grad_norm": 0.36617979407310486, "learning_rate": 1.5836117142074746e-05, "loss": 0.523, "step": 28516 }, { "epoch": 0.6048015948760366, "grad_norm": 0.3675335645675659, "learning_rate": 1.583584632966713e-05, "loss": 0.4778, "step": 28517 }, { "epoch": 0.6048228033339695, "grad_norm": 0.39389851689338684, "learning_rate": 1.583557551076894e-05, "loss": 0.5724, "step": 28518 }, { "epoch": 0.6048440117919026, "grad_norm": 0.31326812505722046, "learning_rate": 1.583530468538047e-05, "loss": 0.4155, "step": 28519 }, { "epoch": 0.6048652202498357, "grad_norm": 0.38268595933914185, "learning_rate": 1.5835033853502023e-05, "loss": 0.572, "step": 28520 }, { "epoch": 0.6048864287077687, "grad_norm": 0.38788285851478577, "learning_rate": 1.5834763015133906e-05, "loss": 0.4541, "step": 28521 }, { "epoch": 0.6049076371657017, "grad_norm": 0.345284640789032, "learning_rate": 1.5834492170276418e-05, "loss": 0.4992, "step": 28522 }, { "epoch": 0.6049288456236347, "grad_norm": 0.3557119369506836, "learning_rate": 1.5834221318929852e-05, "loss": 0.541, "step": 28523 }, { "epoch": 0.6049500540815678, "grad_norm": 0.3423614203929901, "learning_rate": 1.583395046109452e-05, "loss": 0.5166, "step": 28524 }, { "epoch": 0.6049712625395007, "grad_norm": 0.3463742733001709, "learning_rate": 1.5833679596770717e-05, "loss": 0.484, "step": 28525 }, { "epoch": 0.6049924709974338, "grad_norm": 0.39712756872177124, "learning_rate": 1.5833408725958747e-05, "loss": 0.533, "step": 28526 }, { "epoch": 0.6050136794553668, "grad_norm": 0.33480381965637207, "learning_rate": 1.583313784865891e-05, "loss": 0.4439, "step": 28527 }, { "epoch": 0.6050348879132998, "grad_norm": 0.3988783061504364, "learning_rate": 1.583286696487151e-05, "loss": 0.5015, "step": 28528 }, { "epoch": 0.6050560963712328, "grad_norm": 0.4546692371368408, "learning_rate": 1.5832596074596844e-05, "loss": 0.5579, "step": 28529 }, { "epoch": 0.6050773048291659, "grad_norm": 0.37755638360977173, "learning_rate": 1.5832325177835217e-05, "loss": 0.4626, "step": 28530 }, { "epoch": 0.6050985132870988, "grad_norm": 0.351895809173584, "learning_rate": 1.5832054274586924e-05, "loss": 0.4253, "step": 28531 }, { "epoch": 0.6051197217450319, "grad_norm": 0.33455541729927063, "learning_rate": 1.5831783364852276e-05, "loss": 0.4317, "step": 28532 }, { "epoch": 0.6051409302029649, "grad_norm": 0.33939728140830994, "learning_rate": 1.583151244863157e-05, "loss": 0.4407, "step": 28533 }, { "epoch": 0.605162138660898, "grad_norm": 0.3927800953388214, "learning_rate": 1.5831241525925102e-05, "loss": 0.5458, "step": 28534 }, { "epoch": 0.605183347118831, "grad_norm": 0.5712064504623413, "learning_rate": 1.583097059673318e-05, "loss": 0.4957, "step": 28535 }, { "epoch": 0.605204555576764, "grad_norm": 0.3570583462715149, "learning_rate": 1.5830699661056104e-05, "loss": 0.4947, "step": 28536 }, { "epoch": 0.6052257640346971, "grad_norm": 0.35021382570266724, "learning_rate": 1.5830428718894172e-05, "loss": 0.4402, "step": 28537 }, { "epoch": 0.60524697249263, "grad_norm": 0.3528594672679901, "learning_rate": 1.5830157770247688e-05, "loss": 0.4736, "step": 28538 }, { "epoch": 0.6052681809505631, "grad_norm": 1.200063705444336, "learning_rate": 1.5829886815116957e-05, "loss": 0.3657, "step": 28539 }, { "epoch": 0.6052893894084961, "grad_norm": 0.3829233944416046, "learning_rate": 1.5829615853502273e-05, "loss": 0.5569, "step": 28540 }, { "epoch": 0.6053105978664292, "grad_norm": 0.30057352781295776, "learning_rate": 1.5829344885403944e-05, "loss": 0.5059, "step": 28541 }, { "epoch": 0.6053318063243621, "grad_norm": 0.3567725419998169, "learning_rate": 1.5829073910822263e-05, "loss": 0.5134, "step": 28542 }, { "epoch": 0.6053530147822952, "grad_norm": 0.3513420820236206, "learning_rate": 1.582880292975754e-05, "loss": 0.4969, "step": 28543 }, { "epoch": 0.6053742232402282, "grad_norm": 0.36498594284057617, "learning_rate": 1.5828531942210076e-05, "loss": 0.476, "step": 28544 }, { "epoch": 0.6053954316981612, "grad_norm": 0.352706640958786, "learning_rate": 1.5828260948180166e-05, "loss": 0.4357, "step": 28545 }, { "epoch": 0.6054166401560942, "grad_norm": 0.40045255422592163, "learning_rate": 1.5827989947668113e-05, "loss": 0.5608, "step": 28546 }, { "epoch": 0.6054378486140273, "grad_norm": 0.3709854483604431, "learning_rate": 1.5827718940674222e-05, "loss": 0.515, "step": 28547 }, { "epoch": 0.6054590570719603, "grad_norm": 0.36093488335609436, "learning_rate": 1.5827447927198797e-05, "loss": 0.4572, "step": 28548 }, { "epoch": 0.6054802655298933, "grad_norm": 0.3543805181980133, "learning_rate": 1.5827176907242127e-05, "loss": 0.4727, "step": 28549 }, { "epoch": 0.6055014739878264, "grad_norm": 0.36774495244026184, "learning_rate": 1.5826905880804526e-05, "loss": 0.5529, "step": 28550 }, { "epoch": 0.6055226824457594, "grad_norm": 0.44637632369995117, "learning_rate": 1.582663484788629e-05, "loss": 0.5689, "step": 28551 }, { "epoch": 0.6055438909036924, "grad_norm": 0.3277881443500519, "learning_rate": 1.582636380848772e-05, "loss": 0.4211, "step": 28552 }, { "epoch": 0.6055650993616254, "grad_norm": 0.38689497113227844, "learning_rate": 1.582609276260912e-05, "loss": 0.479, "step": 28553 }, { "epoch": 0.6055863078195585, "grad_norm": 0.40659719705581665, "learning_rate": 1.582582171025079e-05, "loss": 0.5099, "step": 28554 }, { "epoch": 0.6056075162774914, "grad_norm": 0.3637917935848236, "learning_rate": 1.5825550651413032e-05, "loss": 0.4902, "step": 28555 }, { "epoch": 0.6056287247354245, "grad_norm": 1.442677617073059, "learning_rate": 1.582527958609615e-05, "loss": 0.4954, "step": 28556 }, { "epoch": 0.6056499331933575, "grad_norm": 0.44387972354888916, "learning_rate": 1.582500851430044e-05, "loss": 0.5198, "step": 28557 }, { "epoch": 0.6056711416512905, "grad_norm": 0.35530880093574524, "learning_rate": 1.5824737436026207e-05, "loss": 0.4407, "step": 28558 }, { "epoch": 0.6056923501092235, "grad_norm": 0.33595186471939087, "learning_rate": 1.5824466351273753e-05, "loss": 0.5427, "step": 28559 }, { "epoch": 0.6057135585671566, "grad_norm": 0.49193382263183594, "learning_rate": 1.5824195260043373e-05, "loss": 0.5261, "step": 28560 }, { "epoch": 0.6057347670250897, "grad_norm": 0.3758440315723419, "learning_rate": 1.5823924162335377e-05, "loss": 0.5793, "step": 28561 }, { "epoch": 0.6057559754830226, "grad_norm": 0.37602779269218445, "learning_rate": 1.5823653058150062e-05, "loss": 0.4942, "step": 28562 }, { "epoch": 0.6057771839409557, "grad_norm": 0.3387509286403656, "learning_rate": 1.5823381947487734e-05, "loss": 0.5172, "step": 28563 }, { "epoch": 0.6057983923988887, "grad_norm": 0.322156697511673, "learning_rate": 1.5823110830348687e-05, "loss": 0.4832, "step": 28564 }, { "epoch": 0.6058196008568217, "grad_norm": 0.3388137221336365, "learning_rate": 1.582283970673323e-05, "loss": 0.4843, "step": 28565 }, { "epoch": 0.6058408093147547, "grad_norm": 0.32955116033554077, "learning_rate": 1.582256857664166e-05, "loss": 0.3611, "step": 28566 }, { "epoch": 0.6058620177726878, "grad_norm": 0.33374983072280884, "learning_rate": 1.5822297440074278e-05, "loss": 0.4581, "step": 28567 }, { "epoch": 0.6058832262306207, "grad_norm": 0.33640867471694946, "learning_rate": 1.5822026297031388e-05, "loss": 0.5044, "step": 28568 }, { "epoch": 0.6059044346885538, "grad_norm": 0.3015190362930298, "learning_rate": 1.582175514751329e-05, "loss": 0.457, "step": 28569 }, { "epoch": 0.6059256431464868, "grad_norm": 0.35977500677108765, "learning_rate": 1.582148399152029e-05, "loss": 0.4836, "step": 28570 }, { "epoch": 0.6059468516044199, "grad_norm": 0.35162705183029175, "learning_rate": 1.5821212829052687e-05, "loss": 0.525, "step": 28571 }, { "epoch": 0.6059680600623528, "grad_norm": 0.34092050790786743, "learning_rate": 1.582094166011078e-05, "loss": 0.4855, "step": 28572 }, { "epoch": 0.6059892685202859, "grad_norm": 0.3412666618824005, "learning_rate": 1.582067048469487e-05, "loss": 0.4983, "step": 28573 }, { "epoch": 0.6060104769782189, "grad_norm": 0.34950733184814453, "learning_rate": 1.5820399302805262e-05, "loss": 0.5648, "step": 28574 }, { "epoch": 0.6060316854361519, "grad_norm": 0.3710569143295288, "learning_rate": 1.5820128114442253e-05, "loss": 0.4854, "step": 28575 }, { "epoch": 0.606052893894085, "grad_norm": 0.39035654067993164, "learning_rate": 1.5819856919606155e-05, "loss": 0.4631, "step": 28576 }, { "epoch": 0.606074102352018, "grad_norm": 0.35380277037620544, "learning_rate": 1.5819585718297257e-05, "loss": 0.4597, "step": 28577 }, { "epoch": 0.606095310809951, "grad_norm": 0.4564036428928375, "learning_rate": 1.581931451051587e-05, "loss": 0.4333, "step": 28578 }, { "epoch": 0.606116519267884, "grad_norm": 0.38344043493270874, "learning_rate": 1.5819043296262292e-05, "loss": 0.5022, "step": 28579 }, { "epoch": 0.6061377277258171, "grad_norm": 0.39626801013946533, "learning_rate": 1.581877207553682e-05, "loss": 0.4081, "step": 28580 }, { "epoch": 0.60615893618375, "grad_norm": 0.3839409351348877, "learning_rate": 1.5818500848339764e-05, "loss": 0.4936, "step": 28581 }, { "epoch": 0.6061801446416831, "grad_norm": 0.3515993356704712, "learning_rate": 1.5818229614671423e-05, "loss": 0.5376, "step": 28582 }, { "epoch": 0.6062013530996161, "grad_norm": 0.3218128979206085, "learning_rate": 1.5817958374532093e-05, "loss": 0.4637, "step": 28583 }, { "epoch": 0.6062225615575492, "grad_norm": 0.4284101724624634, "learning_rate": 1.5817687127922084e-05, "loss": 0.5676, "step": 28584 }, { "epoch": 0.6062437700154821, "grad_norm": 0.3318345844745636, "learning_rate": 1.5817415874841696e-05, "loss": 0.4463, "step": 28585 }, { "epoch": 0.6062649784734152, "grad_norm": 0.3386804461479187, "learning_rate": 1.5817144615291223e-05, "loss": 0.5153, "step": 28586 }, { "epoch": 0.6062861869313482, "grad_norm": 0.324552059173584, "learning_rate": 1.5816873349270973e-05, "loss": 0.4455, "step": 28587 }, { "epoch": 0.6063073953892812, "grad_norm": 0.3452083170413971, "learning_rate": 1.5816602076781252e-05, "loss": 0.4743, "step": 28588 }, { "epoch": 0.6063286038472143, "grad_norm": 0.34217286109924316, "learning_rate": 1.581633079782235e-05, "loss": 0.509, "step": 28589 }, { "epoch": 0.6063498123051473, "grad_norm": 0.4050271511077881, "learning_rate": 1.581605951239458e-05, "loss": 0.484, "step": 28590 }, { "epoch": 0.6063710207630804, "grad_norm": 0.3772827684879303, "learning_rate": 1.581578822049824e-05, "loss": 0.4615, "step": 28591 }, { "epoch": 0.6063922292210133, "grad_norm": 0.3633457124233246, "learning_rate": 1.5815516922133628e-05, "loss": 0.4536, "step": 28592 }, { "epoch": 0.6064134376789464, "grad_norm": 0.49422135949134827, "learning_rate": 1.581524561730105e-05, "loss": 0.5182, "step": 28593 }, { "epoch": 0.6064346461368794, "grad_norm": 0.40587228536605835, "learning_rate": 1.5814974306000805e-05, "loss": 0.5116, "step": 28594 }, { "epoch": 0.6064558545948124, "grad_norm": 0.4190681576728821, "learning_rate": 1.58147029882332e-05, "loss": 0.4452, "step": 28595 }, { "epoch": 0.6064770630527454, "grad_norm": 0.3287356197834015, "learning_rate": 1.581443166399853e-05, "loss": 0.4142, "step": 28596 }, { "epoch": 0.6064982715106785, "grad_norm": 0.3600972890853882, "learning_rate": 1.5814160333297096e-05, "loss": 0.4912, "step": 28597 }, { "epoch": 0.6065194799686114, "grad_norm": 0.35262250900268555, "learning_rate": 1.581388899612921e-05, "loss": 0.4761, "step": 28598 }, { "epoch": 0.6065406884265445, "grad_norm": 0.4109085202217102, "learning_rate": 1.5813617652495165e-05, "loss": 0.5662, "step": 28599 }, { "epoch": 0.6065618968844775, "grad_norm": 0.37076497077941895, "learning_rate": 1.581334630239526e-05, "loss": 0.5868, "step": 28600 }, { "epoch": 0.6065831053424106, "grad_norm": 0.33160704374313354, "learning_rate": 1.5813074945829808e-05, "loss": 0.5025, "step": 28601 }, { "epoch": 0.6066043138003436, "grad_norm": 0.42941778898239136, "learning_rate": 1.5812803582799102e-05, "loss": 0.5958, "step": 28602 }, { "epoch": 0.6066255222582766, "grad_norm": 0.3174569606781006, "learning_rate": 1.5812532213303448e-05, "loss": 0.5239, "step": 28603 }, { "epoch": 0.6066467307162097, "grad_norm": 0.34328794479370117, "learning_rate": 1.5812260837343143e-05, "loss": 0.5492, "step": 28604 }, { "epoch": 0.6066679391741426, "grad_norm": 0.41184085607528687, "learning_rate": 1.5811989454918496e-05, "loss": 0.5126, "step": 28605 }, { "epoch": 0.6066891476320757, "grad_norm": 0.34424275159835815, "learning_rate": 1.58117180660298e-05, "loss": 0.389, "step": 28606 }, { "epoch": 0.6067103560900087, "grad_norm": 0.3255350589752197, "learning_rate": 1.5811446670677366e-05, "loss": 0.4621, "step": 28607 }, { "epoch": 0.6067315645479417, "grad_norm": 0.37570279836654663, "learning_rate": 1.581117526886149e-05, "loss": 0.5947, "step": 28608 }, { "epoch": 0.6067527730058747, "grad_norm": 0.3417178988456726, "learning_rate": 1.5810903860582475e-05, "loss": 0.5965, "step": 28609 }, { "epoch": 0.6067739814638078, "grad_norm": 0.3445069491863251, "learning_rate": 1.5810632445840625e-05, "loss": 0.4827, "step": 28610 }, { "epoch": 0.6067951899217408, "grad_norm": 0.3687593638896942, "learning_rate": 1.5810361024636235e-05, "loss": 0.5245, "step": 28611 }, { "epoch": 0.6068163983796738, "grad_norm": 0.3840867877006531, "learning_rate": 1.5810089596969617e-05, "loss": 0.5685, "step": 28612 }, { "epoch": 0.6068376068376068, "grad_norm": 0.3759922981262207, "learning_rate": 1.5809818162841066e-05, "loss": 0.5189, "step": 28613 }, { "epoch": 0.6068588152955399, "grad_norm": 0.4003976285457611, "learning_rate": 1.5809546722250883e-05, "loss": 0.5263, "step": 28614 }, { "epoch": 0.6068800237534728, "grad_norm": 0.3434968590736389, "learning_rate": 1.5809275275199378e-05, "loss": 0.5631, "step": 28615 }, { "epoch": 0.6069012322114059, "grad_norm": 0.37087488174438477, "learning_rate": 1.5809003821686847e-05, "loss": 0.567, "step": 28616 }, { "epoch": 0.606922440669339, "grad_norm": 0.37911123037338257, "learning_rate": 1.580873236171359e-05, "loss": 0.4872, "step": 28617 }, { "epoch": 0.606943649127272, "grad_norm": 0.3404906392097473, "learning_rate": 1.580846089527991e-05, "loss": 0.4869, "step": 28618 }, { "epoch": 0.606964857585205, "grad_norm": 0.34662893414497375, "learning_rate": 1.580818942238611e-05, "loss": 0.4546, "step": 28619 }, { "epoch": 0.606986066043138, "grad_norm": 0.3184551000595093, "learning_rate": 1.5807917943032495e-05, "loss": 0.4592, "step": 28620 }, { "epoch": 0.6070072745010711, "grad_norm": 0.3515136241912842, "learning_rate": 1.5807646457219362e-05, "loss": 0.5225, "step": 28621 }, { "epoch": 0.607028482959004, "grad_norm": 0.35477983951568604, "learning_rate": 1.580737496494702e-05, "loss": 0.4661, "step": 28622 }, { "epoch": 0.6070496914169371, "grad_norm": 0.34938690066337585, "learning_rate": 1.580710346621576e-05, "loss": 0.4733, "step": 28623 }, { "epoch": 0.6070708998748701, "grad_norm": 0.3860034942626953, "learning_rate": 1.5806831961025893e-05, "loss": 0.5906, "step": 28624 }, { "epoch": 0.6070921083328031, "grad_norm": 0.38115406036376953, "learning_rate": 1.5806560449377717e-05, "loss": 0.4903, "step": 28625 }, { "epoch": 0.6071133167907361, "grad_norm": 0.38660889863967896, "learning_rate": 1.5806288931271535e-05, "loss": 0.5445, "step": 28626 }, { "epoch": 0.6071345252486692, "grad_norm": 0.35045498609542847, "learning_rate": 1.580601740670765e-05, "loss": 0.5111, "step": 28627 }, { "epoch": 0.6071557337066021, "grad_norm": 0.42759039998054504, "learning_rate": 1.580574587568636e-05, "loss": 0.4558, "step": 28628 }, { "epoch": 0.6071769421645352, "grad_norm": 0.3806748390197754, "learning_rate": 1.5805474338207972e-05, "loss": 0.4834, "step": 28629 }, { "epoch": 0.6071981506224683, "grad_norm": 0.8740400075912476, "learning_rate": 1.5805202794272787e-05, "loss": 0.5094, "step": 28630 }, { "epoch": 0.6072193590804013, "grad_norm": 0.33477166295051575, "learning_rate": 1.5804931243881107e-05, "loss": 0.5329, "step": 28631 }, { "epoch": 0.6072405675383343, "grad_norm": 0.3730510175228119, "learning_rate": 1.5804659687033227e-05, "loss": 0.5142, "step": 28632 }, { "epoch": 0.6072617759962673, "grad_norm": 0.34068527817726135, "learning_rate": 1.5804388123729458e-05, "loss": 0.5216, "step": 28633 }, { "epoch": 0.6072829844542004, "grad_norm": 0.4198368787765503, "learning_rate": 1.5804116553970102e-05, "loss": 0.5328, "step": 28634 }, { "epoch": 0.6073041929121333, "grad_norm": 0.33344998955726624, "learning_rate": 1.5803844977755454e-05, "loss": 0.4899, "step": 28635 }, { "epoch": 0.6073254013700664, "grad_norm": 0.3440605700016022, "learning_rate": 1.580357339508582e-05, "loss": 0.4846, "step": 28636 }, { "epoch": 0.6073466098279994, "grad_norm": 0.3786483108997345, "learning_rate": 1.5803301805961507e-05, "loss": 0.4699, "step": 28637 }, { "epoch": 0.6073678182859324, "grad_norm": 0.43311864137649536, "learning_rate": 1.5803030210382807e-05, "loss": 0.4642, "step": 28638 }, { "epoch": 0.6073890267438654, "grad_norm": 0.3425169587135315, "learning_rate": 1.580275860835003e-05, "loss": 0.4724, "step": 28639 }, { "epoch": 0.6074102352017985, "grad_norm": 0.33549588918685913, "learning_rate": 1.5802486999863473e-05, "loss": 0.4872, "step": 28640 }, { "epoch": 0.6074314436597315, "grad_norm": 0.3475551903247833, "learning_rate": 1.580221538492344e-05, "loss": 0.4905, "step": 28641 }, { "epoch": 0.6074526521176645, "grad_norm": 0.3692038953304291, "learning_rate": 1.5801943763530237e-05, "loss": 0.4786, "step": 28642 }, { "epoch": 0.6074738605755976, "grad_norm": 0.4208456873893738, "learning_rate": 1.580167213568416e-05, "loss": 0.5098, "step": 28643 }, { "epoch": 0.6074950690335306, "grad_norm": 0.3388596475124359, "learning_rate": 1.5801400501385514e-05, "loss": 0.4921, "step": 28644 }, { "epoch": 0.6075162774914636, "grad_norm": 0.3177911043167114, "learning_rate": 1.58011288606346e-05, "loss": 0.4153, "step": 28645 }, { "epoch": 0.6075374859493966, "grad_norm": 0.40573418140411377, "learning_rate": 1.5800857213431725e-05, "loss": 0.5829, "step": 28646 }, { "epoch": 0.6075586944073297, "grad_norm": 0.3491775691509247, "learning_rate": 1.580058555977718e-05, "loss": 0.4499, "step": 28647 }, { "epoch": 0.6075799028652626, "grad_norm": 0.40219181776046753, "learning_rate": 1.580031389967128e-05, "loss": 0.5223, "step": 28648 }, { "epoch": 0.6076011113231957, "grad_norm": 0.7084525227546692, "learning_rate": 1.580004223311432e-05, "loss": 0.5237, "step": 28649 }, { "epoch": 0.6076223197811287, "grad_norm": 0.3694015443325043, "learning_rate": 1.5799770560106603e-05, "loss": 0.5295, "step": 28650 }, { "epoch": 0.6076435282390618, "grad_norm": 0.4294664263725281, "learning_rate": 1.5799498880648427e-05, "loss": 0.3933, "step": 28651 }, { "epoch": 0.6076647366969947, "grad_norm": 0.38853392004966736, "learning_rate": 1.5799227194740105e-05, "loss": 0.4794, "step": 28652 }, { "epoch": 0.6076859451549278, "grad_norm": 0.3815259635448456, "learning_rate": 1.579895550238193e-05, "loss": 0.4516, "step": 28653 }, { "epoch": 0.6077071536128608, "grad_norm": 0.349595308303833, "learning_rate": 1.579868380357421e-05, "loss": 0.4723, "step": 28654 }, { "epoch": 0.6077283620707938, "grad_norm": 0.4402362108230591, "learning_rate": 1.5798412098317242e-05, "loss": 0.5434, "step": 28655 }, { "epoch": 0.6077495705287268, "grad_norm": 0.37095627188682556, "learning_rate": 1.579814038661133e-05, "loss": 0.4916, "step": 28656 }, { "epoch": 0.6077707789866599, "grad_norm": 0.33168625831604004, "learning_rate": 1.5797868668456775e-05, "loss": 0.4434, "step": 28657 }, { "epoch": 0.607791987444593, "grad_norm": 0.3594064712524414, "learning_rate": 1.5797596943853884e-05, "loss": 0.5027, "step": 28658 }, { "epoch": 0.6078131959025259, "grad_norm": 0.41284191608428955, "learning_rate": 1.5797325212802954e-05, "loss": 0.5775, "step": 28659 }, { "epoch": 0.607834404360459, "grad_norm": 0.3613356947898865, "learning_rate": 1.579705347530429e-05, "loss": 0.551, "step": 28660 }, { "epoch": 0.607855612818392, "grad_norm": 0.36346516013145447, "learning_rate": 1.5796781731358197e-05, "loss": 0.5825, "step": 28661 }, { "epoch": 0.607876821276325, "grad_norm": 0.3450378179550171, "learning_rate": 1.579650998096497e-05, "loss": 0.5232, "step": 28662 }, { "epoch": 0.607898029734258, "grad_norm": 0.37466591596603394, "learning_rate": 1.5796238224124915e-05, "loss": 0.4529, "step": 28663 }, { "epoch": 0.6079192381921911, "grad_norm": 0.3905564546585083, "learning_rate": 1.5795966460838334e-05, "loss": 0.4819, "step": 28664 }, { "epoch": 0.607940446650124, "grad_norm": 0.6628118753433228, "learning_rate": 1.579569469110553e-05, "loss": 0.5486, "step": 28665 }, { "epoch": 0.6079616551080571, "grad_norm": 0.49079811573028564, "learning_rate": 1.5795422914926803e-05, "loss": 0.4368, "step": 28666 }, { "epoch": 0.6079828635659901, "grad_norm": 0.34077921509742737, "learning_rate": 1.579515113230246e-05, "loss": 0.5403, "step": 28667 }, { "epoch": 0.6080040720239231, "grad_norm": 0.33548080921173096, "learning_rate": 1.57948793432328e-05, "loss": 0.5061, "step": 28668 }, { "epoch": 0.6080252804818561, "grad_norm": 0.34599775075912476, "learning_rate": 1.5794607547718123e-05, "loss": 0.4899, "step": 28669 }, { "epoch": 0.6080464889397892, "grad_norm": 0.35609251260757446, "learning_rate": 1.5794335745758735e-05, "loss": 0.4472, "step": 28670 }, { "epoch": 0.6080676973977223, "grad_norm": 0.33093535900115967, "learning_rate": 1.5794063937354937e-05, "loss": 0.4546, "step": 28671 }, { "epoch": 0.6080889058556552, "grad_norm": 0.37049397826194763, "learning_rate": 1.579379212250703e-05, "loss": 0.5419, "step": 28672 }, { "epoch": 0.6081101143135883, "grad_norm": 0.31508949398994446, "learning_rate": 1.5793520301215323e-05, "loss": 0.4707, "step": 28673 }, { "epoch": 0.6081313227715213, "grad_norm": 0.3182671070098877, "learning_rate": 1.5793248473480112e-05, "loss": 0.5214, "step": 28674 }, { "epoch": 0.6081525312294543, "grad_norm": 0.3331970274448395, "learning_rate": 1.5792976639301695e-05, "loss": 0.4454, "step": 28675 }, { "epoch": 0.6081737396873873, "grad_norm": 0.3535922169685364, "learning_rate": 1.5792704798680386e-05, "loss": 0.5681, "step": 28676 }, { "epoch": 0.6081949481453204, "grad_norm": 0.3708062767982483, "learning_rate": 1.5792432951616477e-05, "loss": 0.4587, "step": 28677 }, { "epoch": 0.6082161566032533, "grad_norm": 0.32062414288520813, "learning_rate": 1.5792161098110272e-05, "loss": 0.4022, "step": 28678 }, { "epoch": 0.6082373650611864, "grad_norm": 0.3312121331691742, "learning_rate": 1.579188923816208e-05, "loss": 0.4203, "step": 28679 }, { "epoch": 0.6082585735191194, "grad_norm": 0.3791730999946594, "learning_rate": 1.57916173717722e-05, "loss": 0.525, "step": 28680 }, { "epoch": 0.6082797819770525, "grad_norm": 0.3978734314441681, "learning_rate": 1.579134549894093e-05, "loss": 0.496, "step": 28681 }, { "epoch": 0.6083009904349854, "grad_norm": 0.3793165683746338, "learning_rate": 1.579107361966858e-05, "loss": 0.4855, "step": 28682 }, { "epoch": 0.6083221988929185, "grad_norm": 0.39153435826301575, "learning_rate": 1.5790801733955442e-05, "loss": 0.4961, "step": 28683 }, { "epoch": 0.6083434073508516, "grad_norm": 0.3396790623664856, "learning_rate": 1.579052984180183e-05, "loss": 0.5432, "step": 28684 }, { "epoch": 0.6083646158087845, "grad_norm": 0.38206031918525696, "learning_rate": 1.579025794320804e-05, "loss": 0.5702, "step": 28685 }, { "epoch": 0.6083858242667176, "grad_norm": 0.3188730776309967, "learning_rate": 1.5789986038174374e-05, "loss": 0.4275, "step": 28686 }, { "epoch": 0.6084070327246506, "grad_norm": 0.3192594051361084, "learning_rate": 1.5789714126701134e-05, "loss": 0.4829, "step": 28687 }, { "epoch": 0.6084282411825837, "grad_norm": 0.354608952999115, "learning_rate": 1.5789442208788627e-05, "loss": 0.4477, "step": 28688 }, { "epoch": 0.6084494496405166, "grad_norm": 0.3498991131782532, "learning_rate": 1.5789170284437156e-05, "loss": 0.5387, "step": 28689 }, { "epoch": 0.6084706580984497, "grad_norm": 0.3485819399356842, "learning_rate": 1.5788898353647015e-05, "loss": 0.5369, "step": 28690 }, { "epoch": 0.6084918665563827, "grad_norm": 0.3509558439254761, "learning_rate": 1.5788626416418515e-05, "loss": 0.4945, "step": 28691 }, { "epoch": 0.6085130750143157, "grad_norm": 0.3481849133968353, "learning_rate": 1.578835447275195e-05, "loss": 0.4751, "step": 28692 }, { "epoch": 0.6085342834722487, "grad_norm": 0.343106210231781, "learning_rate": 1.578808252264763e-05, "loss": 0.4516, "step": 28693 }, { "epoch": 0.6085554919301818, "grad_norm": 0.3445824384689331, "learning_rate": 1.5787810566105857e-05, "loss": 0.4817, "step": 28694 }, { "epoch": 0.6085767003881147, "grad_norm": 0.5369943380355835, "learning_rate": 1.578753860312693e-05, "loss": 0.4923, "step": 28695 }, { "epoch": 0.6085979088460478, "grad_norm": 0.35821533203125, "learning_rate": 1.5787266633711153e-05, "loss": 0.526, "step": 28696 }, { "epoch": 0.6086191173039808, "grad_norm": 0.34399837255477905, "learning_rate": 1.5786994657858825e-05, "loss": 0.485, "step": 28697 }, { "epoch": 0.6086403257619138, "grad_norm": 0.3887925148010254, "learning_rate": 1.5786722675570256e-05, "loss": 0.5829, "step": 28698 }, { "epoch": 0.6086615342198469, "grad_norm": 0.39033573865890503, "learning_rate": 1.5786450686845744e-05, "loss": 0.4801, "step": 28699 }, { "epoch": 0.6086827426777799, "grad_norm": 0.34337958693504333, "learning_rate": 1.5786178691685588e-05, "loss": 0.4559, "step": 28700 }, { "epoch": 0.608703951135713, "grad_norm": 0.37233859300613403, "learning_rate": 1.57859066900901e-05, "loss": 0.53, "step": 28701 }, { "epoch": 0.6087251595936459, "grad_norm": 0.34601566195487976, "learning_rate": 1.5785634682059576e-05, "loss": 0.4575, "step": 28702 }, { "epoch": 0.608746368051579, "grad_norm": 0.36064010858535767, "learning_rate": 1.5785362667594312e-05, "loss": 0.4956, "step": 28703 }, { "epoch": 0.608767576509512, "grad_norm": 0.4256313443183899, "learning_rate": 1.5785090646694624e-05, "loss": 0.5302, "step": 28704 }, { "epoch": 0.608788784967445, "grad_norm": 0.39203348755836487, "learning_rate": 1.578481861936081e-05, "loss": 0.5724, "step": 28705 }, { "epoch": 0.608809993425378, "grad_norm": 0.3804548382759094, "learning_rate": 1.578454658559317e-05, "loss": 0.5153, "step": 28706 }, { "epoch": 0.6088312018833111, "grad_norm": 0.330040842294693, "learning_rate": 1.5784274545392005e-05, "loss": 0.4242, "step": 28707 }, { "epoch": 0.608852410341244, "grad_norm": 0.46019163727760315, "learning_rate": 1.5784002498757623e-05, "loss": 0.5592, "step": 28708 }, { "epoch": 0.6088736187991771, "grad_norm": 0.4065020680427551, "learning_rate": 1.5783730445690317e-05, "loss": 0.5164, "step": 28709 }, { "epoch": 0.6088948272571101, "grad_norm": 0.41474220156669617, "learning_rate": 1.5783458386190402e-05, "loss": 0.5546, "step": 28710 }, { "epoch": 0.6089160357150432, "grad_norm": 0.41065311431884766, "learning_rate": 1.5783186320258173e-05, "loss": 0.4876, "step": 28711 }, { "epoch": 0.6089372441729762, "grad_norm": 0.3260936439037323, "learning_rate": 1.578291424789394e-05, "loss": 0.4326, "step": 28712 }, { "epoch": 0.6089584526309092, "grad_norm": 0.38890933990478516, "learning_rate": 1.5782642169097994e-05, "loss": 0.498, "step": 28713 }, { "epoch": 0.6089796610888423, "grad_norm": 0.3338744342327118, "learning_rate": 1.5782370083870646e-05, "loss": 0.4733, "step": 28714 }, { "epoch": 0.6090008695467752, "grad_norm": 0.33535778522491455, "learning_rate": 1.5782097992212193e-05, "loss": 0.4121, "step": 28715 }, { "epoch": 0.6090220780047083, "grad_norm": 0.3728090524673462, "learning_rate": 1.5781825894122945e-05, "loss": 0.4752, "step": 28716 }, { "epoch": 0.6090432864626413, "grad_norm": 0.38538381457328796, "learning_rate": 1.5781553789603198e-05, "loss": 0.4959, "step": 28717 }, { "epoch": 0.6090644949205744, "grad_norm": 0.3610970377922058, "learning_rate": 1.578128167865326e-05, "loss": 0.4339, "step": 28718 }, { "epoch": 0.6090857033785073, "grad_norm": 0.37684857845306396, "learning_rate": 1.5781009561273428e-05, "loss": 0.4742, "step": 28719 }, { "epoch": 0.6091069118364404, "grad_norm": 0.3897014856338501, "learning_rate": 1.5780737437464007e-05, "loss": 0.491, "step": 28720 }, { "epoch": 0.6091281202943734, "grad_norm": 0.33416101336479187, "learning_rate": 1.57804653072253e-05, "loss": 0.4512, "step": 28721 }, { "epoch": 0.6091493287523064, "grad_norm": 0.3240082561969757, "learning_rate": 1.578019317055761e-05, "loss": 0.4423, "step": 28722 }, { "epoch": 0.6091705372102394, "grad_norm": 0.33897364139556885, "learning_rate": 1.577992102746124e-05, "loss": 0.4049, "step": 28723 }, { "epoch": 0.6091917456681725, "grad_norm": 0.3566156327724457, "learning_rate": 1.5779648877936492e-05, "loss": 0.5207, "step": 28724 }, { "epoch": 0.6092129541261055, "grad_norm": 0.4012867212295532, "learning_rate": 1.577937672198367e-05, "loss": 0.4831, "step": 28725 }, { "epoch": 0.6092341625840385, "grad_norm": 0.3438222110271454, "learning_rate": 1.5779104559603074e-05, "loss": 0.4511, "step": 28726 }, { "epoch": 0.6092553710419716, "grad_norm": 0.33774709701538086, "learning_rate": 1.5778832390795008e-05, "loss": 0.4854, "step": 28727 }, { "epoch": 0.6092765794999045, "grad_norm": 0.3263615071773529, "learning_rate": 1.5778560215559774e-05, "loss": 0.4995, "step": 28728 }, { "epoch": 0.6092977879578376, "grad_norm": 0.36451661586761475, "learning_rate": 1.5778288033897677e-05, "loss": 0.5117, "step": 28729 }, { "epoch": 0.6093189964157706, "grad_norm": 0.3921639025211334, "learning_rate": 1.5778015845809017e-05, "loss": 0.6115, "step": 28730 }, { "epoch": 0.6093402048737037, "grad_norm": 0.3499630391597748, "learning_rate": 1.57777436512941e-05, "loss": 0.3936, "step": 28731 }, { "epoch": 0.6093614133316366, "grad_norm": 0.36348676681518555, "learning_rate": 1.5777471450353225e-05, "loss": 0.5714, "step": 28732 }, { "epoch": 0.6093826217895697, "grad_norm": 0.41379591822624207, "learning_rate": 1.5777199242986698e-05, "loss": 0.5508, "step": 28733 }, { "epoch": 0.6094038302475027, "grad_norm": 0.40670138597488403, "learning_rate": 1.577692702919482e-05, "loss": 0.4787, "step": 28734 }, { "epoch": 0.6094250387054357, "grad_norm": 0.34137725830078125, "learning_rate": 1.577665480897789e-05, "loss": 0.4958, "step": 28735 }, { "epoch": 0.6094462471633687, "grad_norm": 0.33722659945487976, "learning_rate": 1.577638258233622e-05, "loss": 0.4551, "step": 28736 }, { "epoch": 0.6094674556213018, "grad_norm": 0.3642670810222626, "learning_rate": 1.5776110349270102e-05, "loss": 0.5111, "step": 28737 }, { "epoch": 0.6094886640792349, "grad_norm": 0.3785962760448456, "learning_rate": 1.577583810977985e-05, "loss": 0.529, "step": 28738 }, { "epoch": 0.6095098725371678, "grad_norm": 0.4675072431564331, "learning_rate": 1.5775565863865758e-05, "loss": 0.5983, "step": 28739 }, { "epoch": 0.6095310809951009, "grad_norm": 0.35434162616729736, "learning_rate": 1.5775293611528134e-05, "loss": 0.5836, "step": 28740 }, { "epoch": 0.6095522894530339, "grad_norm": 0.3465454876422882, "learning_rate": 1.5775021352767276e-05, "loss": 0.5043, "step": 28741 }, { "epoch": 0.6095734979109669, "grad_norm": 0.3966311812400818, "learning_rate": 1.5774749087583492e-05, "loss": 0.5299, "step": 28742 }, { "epoch": 0.6095947063688999, "grad_norm": 0.37529176473617554, "learning_rate": 1.577447681597708e-05, "loss": 0.5059, "step": 28743 }, { "epoch": 0.609615914826833, "grad_norm": 0.35775697231292725, "learning_rate": 1.5774204537948344e-05, "loss": 0.5401, "step": 28744 }, { "epoch": 0.6096371232847659, "grad_norm": 0.3524918854236603, "learning_rate": 1.577393225349759e-05, "loss": 0.533, "step": 28745 }, { "epoch": 0.609658331742699, "grad_norm": 0.33887216448783875, "learning_rate": 1.577365996262512e-05, "loss": 0.5617, "step": 28746 }, { "epoch": 0.609679540200632, "grad_norm": 0.35296952724456787, "learning_rate": 1.5773387665331236e-05, "loss": 0.4507, "step": 28747 }, { "epoch": 0.609700748658565, "grad_norm": 1.252191185951233, "learning_rate": 1.5773115361616235e-05, "loss": 0.5659, "step": 28748 }, { "epoch": 0.609721957116498, "grad_norm": 0.35763537883758545, "learning_rate": 1.5772843051480426e-05, "loss": 0.5166, "step": 28749 }, { "epoch": 0.6097431655744311, "grad_norm": 0.3466617465019226, "learning_rate": 1.5772570734924115e-05, "loss": 0.4889, "step": 28750 }, { "epoch": 0.609764374032364, "grad_norm": 0.349054217338562, "learning_rate": 1.57722984119476e-05, "loss": 0.5177, "step": 28751 }, { "epoch": 0.6097855824902971, "grad_norm": 0.4486210346221924, "learning_rate": 1.5772026082551182e-05, "loss": 0.4328, "step": 28752 }, { "epoch": 0.6098067909482302, "grad_norm": 0.3377346694469452, "learning_rate": 1.577175374673517e-05, "loss": 0.5438, "step": 28753 }, { "epoch": 0.6098279994061632, "grad_norm": 0.32857266068458557, "learning_rate": 1.5771481404499863e-05, "loss": 0.4792, "step": 28754 }, { "epoch": 0.6098492078640962, "grad_norm": 0.3848399221897125, "learning_rate": 1.5771209055845564e-05, "loss": 0.4623, "step": 28755 }, { "epoch": 0.6098704163220292, "grad_norm": 0.3446149230003357, "learning_rate": 1.577093670077258e-05, "loss": 0.5554, "step": 28756 }, { "epoch": 0.6098916247799623, "grad_norm": 0.4597555100917816, "learning_rate": 1.5770664339281205e-05, "loss": 0.5255, "step": 28757 }, { "epoch": 0.6099128332378952, "grad_norm": 0.3178344964981079, "learning_rate": 1.577039197137175e-05, "loss": 0.4841, "step": 28758 }, { "epoch": 0.6099340416958283, "grad_norm": 0.3684765100479126, "learning_rate": 1.5770119597044514e-05, "loss": 0.4856, "step": 28759 }, { "epoch": 0.6099552501537613, "grad_norm": 0.3920991122722626, "learning_rate": 1.5769847216299802e-05, "loss": 0.5, "step": 28760 }, { "epoch": 0.6099764586116944, "grad_norm": 0.3518149256706238, "learning_rate": 1.5769574829137914e-05, "loss": 0.48, "step": 28761 }, { "epoch": 0.6099976670696273, "grad_norm": 0.349886029958725, "learning_rate": 1.5769302435559158e-05, "loss": 0.4741, "step": 28762 }, { "epoch": 0.6100188755275604, "grad_norm": 0.3731265068054199, "learning_rate": 1.5769030035563833e-05, "loss": 0.475, "step": 28763 }, { "epoch": 0.6100400839854934, "grad_norm": 0.3297783136367798, "learning_rate": 1.5768757629152244e-05, "loss": 0.4415, "step": 28764 }, { "epoch": 0.6100612924434264, "grad_norm": 0.5691766738891602, "learning_rate": 1.576848521632469e-05, "loss": 0.5151, "step": 28765 }, { "epoch": 0.6100825009013595, "grad_norm": 0.41402655839920044, "learning_rate": 1.576821279708148e-05, "loss": 0.5727, "step": 28766 }, { "epoch": 0.6101037093592925, "grad_norm": 0.3584708273410797, "learning_rate": 1.576794037142291e-05, "loss": 0.5274, "step": 28767 }, { "epoch": 0.6101249178172256, "grad_norm": 0.3608933091163635, "learning_rate": 1.5767667939349287e-05, "loss": 0.5367, "step": 28768 }, { "epoch": 0.6101461262751585, "grad_norm": 0.3926270306110382, "learning_rate": 1.5767395500860916e-05, "loss": 0.5138, "step": 28769 }, { "epoch": 0.6101673347330916, "grad_norm": 0.4684220254421234, "learning_rate": 1.5767123055958096e-05, "loss": 0.5165, "step": 28770 }, { "epoch": 0.6101885431910246, "grad_norm": 0.35782021284103394, "learning_rate": 1.5766850604641137e-05, "loss": 0.537, "step": 28771 }, { "epoch": 0.6102097516489576, "grad_norm": 0.36664286255836487, "learning_rate": 1.5766578146910332e-05, "loss": 0.4766, "step": 28772 }, { "epoch": 0.6102309601068906, "grad_norm": 0.3813309371471405, "learning_rate": 1.576630568276599e-05, "loss": 0.5089, "step": 28773 }, { "epoch": 0.6102521685648237, "grad_norm": 0.31068485975265503, "learning_rate": 1.5766033212208413e-05, "loss": 0.4973, "step": 28774 }, { "epoch": 0.6102733770227566, "grad_norm": 0.3403579890727997, "learning_rate": 1.57657607352379e-05, "loss": 0.5381, "step": 28775 }, { "epoch": 0.6102945854806897, "grad_norm": 0.43051090836524963, "learning_rate": 1.5765488251854765e-05, "loss": 0.5042, "step": 28776 }, { "epoch": 0.6103157939386227, "grad_norm": 0.3324217200279236, "learning_rate": 1.5765215762059304e-05, "loss": 0.3935, "step": 28777 }, { "epoch": 0.6103370023965558, "grad_norm": 0.3420220613479614, "learning_rate": 1.5764943265851814e-05, "loss": 0.4448, "step": 28778 }, { "epoch": 0.6103582108544888, "grad_norm": 0.43153101205825806, "learning_rate": 1.576467076323261e-05, "loss": 0.4112, "step": 28779 }, { "epoch": 0.6103794193124218, "grad_norm": 0.3332788050174713, "learning_rate": 1.5764398254201983e-05, "loss": 0.4363, "step": 28780 }, { "epoch": 0.6104006277703549, "grad_norm": 0.40240219235420227, "learning_rate": 1.5764125738760245e-05, "loss": 0.4887, "step": 28781 }, { "epoch": 0.6104218362282878, "grad_norm": 0.3425149917602539, "learning_rate": 1.5763853216907697e-05, "loss": 0.518, "step": 28782 }, { "epoch": 0.6104430446862209, "grad_norm": 0.38216182589530945, "learning_rate": 1.5763580688644645e-05, "loss": 0.5021, "step": 28783 }, { "epoch": 0.6104642531441539, "grad_norm": 0.33172351121902466, "learning_rate": 1.5763308153971383e-05, "loss": 0.5297, "step": 28784 }, { "epoch": 0.6104854616020869, "grad_norm": 0.33534297347068787, "learning_rate": 1.5763035612888223e-05, "loss": 0.4499, "step": 28785 }, { "epoch": 0.6105066700600199, "grad_norm": 0.3813084363937378, "learning_rate": 1.5762763065395463e-05, "loss": 0.5361, "step": 28786 }, { "epoch": 0.610527878517953, "grad_norm": 0.34039872884750366, "learning_rate": 1.576249051149341e-05, "loss": 0.4618, "step": 28787 }, { "epoch": 0.610549086975886, "grad_norm": 0.36779093742370605, "learning_rate": 1.5762217951182366e-05, "loss": 0.431, "step": 28788 }, { "epoch": 0.610570295433819, "grad_norm": 0.3783094584941864, "learning_rate": 1.576194538446263e-05, "loss": 0.5855, "step": 28789 }, { "epoch": 0.610591503891752, "grad_norm": 0.34073832631111145, "learning_rate": 1.576167281133451e-05, "loss": 0.4641, "step": 28790 }, { "epoch": 0.6106127123496851, "grad_norm": 0.3455255627632141, "learning_rate": 1.5761400231798306e-05, "loss": 0.4605, "step": 28791 }, { "epoch": 0.610633920807618, "grad_norm": 0.3355591595172882, "learning_rate": 1.5761127645854322e-05, "loss": 0.5582, "step": 28792 }, { "epoch": 0.6106551292655511, "grad_norm": 0.4520726203918457, "learning_rate": 1.5760855053502867e-05, "loss": 0.5089, "step": 28793 }, { "epoch": 0.6106763377234842, "grad_norm": 0.3347991108894348, "learning_rate": 1.5760582454744237e-05, "loss": 0.5107, "step": 28794 }, { "epoch": 0.6106975461814171, "grad_norm": 0.38071322441101074, "learning_rate": 1.5760309849578735e-05, "loss": 0.4804, "step": 28795 }, { "epoch": 0.6107187546393502, "grad_norm": 0.3982143998146057, "learning_rate": 1.5760037238006666e-05, "loss": 0.5622, "step": 28796 }, { "epoch": 0.6107399630972832, "grad_norm": 0.37863925099372864, "learning_rate": 1.5759764620028337e-05, "loss": 0.4525, "step": 28797 }, { "epoch": 0.6107611715552163, "grad_norm": 0.36713919043540955, "learning_rate": 1.5759491995644047e-05, "loss": 0.5074, "step": 28798 }, { "epoch": 0.6107823800131492, "grad_norm": 0.3497590124607086, "learning_rate": 1.57592193648541e-05, "loss": 0.4819, "step": 28799 }, { "epoch": 0.6108035884710823, "grad_norm": 0.3428484797477722, "learning_rate": 1.5758946727658793e-05, "loss": 0.4883, "step": 28800 }, { "epoch": 0.6108247969290153, "grad_norm": 0.3307541310787201, "learning_rate": 1.5758674084058446e-05, "loss": 0.5081, "step": 28801 }, { "epoch": 0.6108460053869483, "grad_norm": 0.3514063358306885, "learning_rate": 1.5758401434053344e-05, "loss": 0.4601, "step": 28802 }, { "epoch": 0.6108672138448813, "grad_norm": 0.3390699028968811, "learning_rate": 1.5758128777643802e-05, "loss": 0.4569, "step": 28803 }, { "epoch": 0.6108884223028144, "grad_norm": 0.34361928701400757, "learning_rate": 1.5757856114830115e-05, "loss": 0.4569, "step": 28804 }, { "epoch": 0.6109096307607473, "grad_norm": 0.32399773597717285, "learning_rate": 1.5757583445612595e-05, "loss": 0.472, "step": 28805 }, { "epoch": 0.6109308392186804, "grad_norm": 0.34235242009162903, "learning_rate": 1.575731076999154e-05, "loss": 0.4798, "step": 28806 }, { "epoch": 0.6109520476766135, "grad_norm": 0.36207181215286255, "learning_rate": 1.575703808796725e-05, "loss": 0.5454, "step": 28807 }, { "epoch": 0.6109732561345464, "grad_norm": 0.4017564058303833, "learning_rate": 1.5756765399540036e-05, "loss": 0.4926, "step": 28808 }, { "epoch": 0.6109944645924795, "grad_norm": 0.3642624020576477, "learning_rate": 1.5756492704710198e-05, "loss": 0.5617, "step": 28809 }, { "epoch": 0.6110156730504125, "grad_norm": 0.33595937490463257, "learning_rate": 1.5756220003478037e-05, "loss": 0.4779, "step": 28810 }, { "epoch": 0.6110368815083456, "grad_norm": 0.3298455476760864, "learning_rate": 1.575594729584386e-05, "loss": 0.4723, "step": 28811 }, { "epoch": 0.6110580899662785, "grad_norm": 0.34114035964012146, "learning_rate": 1.5755674581807966e-05, "loss": 0.5516, "step": 28812 }, { "epoch": 0.6110792984242116, "grad_norm": 0.3500005900859833, "learning_rate": 1.575540186137066e-05, "loss": 0.4264, "step": 28813 }, { "epoch": 0.6111005068821446, "grad_norm": 0.30040690302848816, "learning_rate": 1.575512913453225e-05, "loss": 0.4407, "step": 28814 }, { "epoch": 0.6111217153400776, "grad_norm": 0.37157294154167175, "learning_rate": 1.5754856401293033e-05, "loss": 0.5118, "step": 28815 }, { "epoch": 0.6111429237980106, "grad_norm": 0.4332437515258789, "learning_rate": 1.5754583661653312e-05, "loss": 0.614, "step": 28816 }, { "epoch": 0.6111641322559437, "grad_norm": 0.3849963843822479, "learning_rate": 1.57543109156134e-05, "loss": 0.4518, "step": 28817 }, { "epoch": 0.6111853407138766, "grad_norm": 0.48483359813690186, "learning_rate": 1.5754038163173588e-05, "loss": 0.4573, "step": 28818 }, { "epoch": 0.6112065491718097, "grad_norm": 0.362633615732193, "learning_rate": 1.5753765404334185e-05, "loss": 0.5517, "step": 28819 }, { "epoch": 0.6112277576297428, "grad_norm": 0.3468568027019501, "learning_rate": 1.5753492639095495e-05, "loss": 0.5144, "step": 28820 }, { "epoch": 0.6112489660876758, "grad_norm": 0.3460683226585388, "learning_rate": 1.575321986745782e-05, "loss": 0.5345, "step": 28821 }, { "epoch": 0.6112701745456088, "grad_norm": 0.31927481293678284, "learning_rate": 1.5752947089421463e-05, "loss": 0.4872, "step": 28822 }, { "epoch": 0.6112913830035418, "grad_norm": 0.3205627202987671, "learning_rate": 1.5752674304986733e-05, "loss": 0.4027, "step": 28823 }, { "epoch": 0.6113125914614749, "grad_norm": 0.35106393694877625, "learning_rate": 1.5752401514153923e-05, "loss": 0.5544, "step": 28824 }, { "epoch": 0.6113337999194078, "grad_norm": 0.3633821904659271, "learning_rate": 1.5752128716923345e-05, "loss": 0.5102, "step": 28825 }, { "epoch": 0.6113550083773409, "grad_norm": 0.38061901926994324, "learning_rate": 1.57518559132953e-05, "loss": 0.4638, "step": 28826 }, { "epoch": 0.6113762168352739, "grad_norm": 0.3615661561489105, "learning_rate": 1.5751583103270088e-05, "loss": 0.4837, "step": 28827 }, { "epoch": 0.611397425293207, "grad_norm": 0.390547513961792, "learning_rate": 1.5751310286848016e-05, "loss": 0.5509, "step": 28828 }, { "epoch": 0.6114186337511399, "grad_norm": 0.36797821521759033, "learning_rate": 1.5751037464029392e-05, "loss": 0.4842, "step": 28829 }, { "epoch": 0.611439842209073, "grad_norm": 0.37441202998161316, "learning_rate": 1.5750764634814507e-05, "loss": 0.5714, "step": 28830 }, { "epoch": 0.611461050667006, "grad_norm": 0.352772980928421, "learning_rate": 1.5750491799203673e-05, "loss": 0.4192, "step": 28831 }, { "epoch": 0.611482259124939, "grad_norm": 0.33880555629730225, "learning_rate": 1.5750218957197195e-05, "loss": 0.4805, "step": 28832 }, { "epoch": 0.611503467582872, "grad_norm": 0.3545519709587097, "learning_rate": 1.5749946108795367e-05, "loss": 0.4832, "step": 28833 }, { "epoch": 0.6115246760408051, "grad_norm": 0.35156017541885376, "learning_rate": 1.5749673253998505e-05, "loss": 0.4396, "step": 28834 }, { "epoch": 0.6115458844987381, "grad_norm": 0.33677029609680176, "learning_rate": 1.5749400392806907e-05, "loss": 0.506, "step": 28835 }, { "epoch": 0.6115670929566711, "grad_norm": 0.3496101200580597, "learning_rate": 1.5749127525220873e-05, "loss": 0.4368, "step": 28836 }, { "epoch": 0.6115883014146042, "grad_norm": 0.3454727828502655, "learning_rate": 1.5748854651240707e-05, "loss": 0.4168, "step": 28837 }, { "epoch": 0.6116095098725371, "grad_norm": 0.46673983335494995, "learning_rate": 1.574858177086672e-05, "loss": 0.5294, "step": 28838 }, { "epoch": 0.6116307183304702, "grad_norm": 0.35249266028404236, "learning_rate": 1.574830888409921e-05, "loss": 0.4686, "step": 28839 }, { "epoch": 0.6116519267884032, "grad_norm": 0.36225947737693787, "learning_rate": 1.5748035990938476e-05, "loss": 0.4936, "step": 28840 }, { "epoch": 0.6116731352463363, "grad_norm": 0.5975335836410522, "learning_rate": 1.574776309138483e-05, "loss": 0.5356, "step": 28841 }, { "epoch": 0.6116943437042692, "grad_norm": 0.36836034059524536, "learning_rate": 1.574749018543857e-05, "loss": 0.4832, "step": 28842 }, { "epoch": 0.6117155521622023, "grad_norm": 0.36373844742774963, "learning_rate": 1.57472172731e-05, "loss": 0.4971, "step": 28843 }, { "epoch": 0.6117367606201353, "grad_norm": 0.4226566255092621, "learning_rate": 1.5746944354369427e-05, "loss": 0.4931, "step": 28844 }, { "epoch": 0.6117579690780683, "grad_norm": 0.36290180683135986, "learning_rate": 1.5746671429247157e-05, "loss": 0.4719, "step": 28845 }, { "epoch": 0.6117791775360013, "grad_norm": 0.40296661853790283, "learning_rate": 1.574639849773348e-05, "loss": 0.6065, "step": 28846 }, { "epoch": 0.6118003859939344, "grad_norm": 0.4868584871292114, "learning_rate": 1.5746125559828717e-05, "loss": 0.6033, "step": 28847 }, { "epoch": 0.6118215944518675, "grad_norm": 0.3318956792354584, "learning_rate": 1.5745852615533154e-05, "loss": 0.4575, "step": 28848 }, { "epoch": 0.6118428029098004, "grad_norm": 0.3620340824127197, "learning_rate": 1.574557966484711e-05, "loss": 0.4898, "step": 28849 }, { "epoch": 0.6118640113677335, "grad_norm": 0.3727528750896454, "learning_rate": 1.574530670777088e-05, "loss": 0.5075, "step": 28850 }, { "epoch": 0.6118852198256665, "grad_norm": 0.3233022689819336, "learning_rate": 1.574503374430477e-05, "loss": 0.5038, "step": 28851 }, { "epoch": 0.6119064282835995, "grad_norm": 0.33939477801322937, "learning_rate": 1.5744760774449087e-05, "loss": 0.4508, "step": 28852 }, { "epoch": 0.6119276367415325, "grad_norm": 0.37869012355804443, "learning_rate": 1.5744487798204126e-05, "loss": 0.507, "step": 28853 }, { "epoch": 0.6119488451994656, "grad_norm": 0.35298842191696167, "learning_rate": 1.57442148155702e-05, "loss": 0.5041, "step": 28854 }, { "epoch": 0.6119700536573985, "grad_norm": 0.4254539906978607, "learning_rate": 1.5743941826547603e-05, "loss": 0.5344, "step": 28855 }, { "epoch": 0.6119912621153316, "grad_norm": 0.37043195962905884, "learning_rate": 1.5743668831136648e-05, "loss": 0.4549, "step": 28856 }, { "epoch": 0.6120124705732646, "grad_norm": 0.37646228075027466, "learning_rate": 1.5743395829337635e-05, "loss": 0.552, "step": 28857 }, { "epoch": 0.6120336790311977, "grad_norm": 0.33065468072891235, "learning_rate": 1.574312282115086e-05, "loss": 0.4197, "step": 28858 }, { "epoch": 0.6120548874891306, "grad_norm": 0.31820353865623474, "learning_rate": 1.574284980657664e-05, "loss": 0.5132, "step": 28859 }, { "epoch": 0.6120760959470637, "grad_norm": 0.3588016927242279, "learning_rate": 1.5742576785615274e-05, "loss": 0.3958, "step": 28860 }, { "epoch": 0.6120973044049968, "grad_norm": 0.39098912477493286, "learning_rate": 1.574230375826706e-05, "loss": 0.4883, "step": 28861 }, { "epoch": 0.6121185128629297, "grad_norm": 0.318074107170105, "learning_rate": 1.5742030724532306e-05, "loss": 0.4359, "step": 28862 }, { "epoch": 0.6121397213208628, "grad_norm": 0.33506080508232117, "learning_rate": 1.5741757684411317e-05, "loss": 0.501, "step": 28863 }, { "epoch": 0.6121609297787958, "grad_norm": 0.35542765259742737, "learning_rate": 1.5741484637904393e-05, "loss": 0.463, "step": 28864 }, { "epoch": 0.6121821382367288, "grad_norm": 0.5501086115837097, "learning_rate": 1.574121158501184e-05, "loss": 0.4653, "step": 28865 }, { "epoch": 0.6122033466946618, "grad_norm": 0.39194217324256897, "learning_rate": 1.5740938525733964e-05, "loss": 0.5185, "step": 28866 }, { "epoch": 0.6122245551525949, "grad_norm": 0.3827365040779114, "learning_rate": 1.5740665460071062e-05, "loss": 0.4918, "step": 28867 }, { "epoch": 0.6122457636105278, "grad_norm": 0.3465360403060913, "learning_rate": 1.5740392388023446e-05, "loss": 0.4114, "step": 28868 }, { "epoch": 0.6122669720684609, "grad_norm": 0.41564762592315674, "learning_rate": 1.574011930959141e-05, "loss": 0.5435, "step": 28869 }, { "epoch": 0.6122881805263939, "grad_norm": 0.34093376994132996, "learning_rate": 1.5739846224775267e-05, "loss": 0.4819, "step": 28870 }, { "epoch": 0.612309388984327, "grad_norm": 0.3378220498561859, "learning_rate": 1.5739573133575317e-05, "loss": 0.4928, "step": 28871 }, { "epoch": 0.6123305974422599, "grad_norm": 0.3337673246860504, "learning_rate": 1.5739300035991863e-05, "loss": 0.4364, "step": 28872 }, { "epoch": 0.612351805900193, "grad_norm": 0.3531213700771332, "learning_rate": 1.573902693202521e-05, "loss": 0.5735, "step": 28873 }, { "epoch": 0.612373014358126, "grad_norm": 0.33509567379951477, "learning_rate": 1.5738753821675658e-05, "loss": 0.4739, "step": 28874 }, { "epoch": 0.612394222816059, "grad_norm": 0.33762744069099426, "learning_rate": 1.5738480704943516e-05, "loss": 0.458, "step": 28875 }, { "epoch": 0.6124154312739921, "grad_norm": 0.4477444887161255, "learning_rate": 1.5738207581829087e-05, "loss": 0.4793, "step": 28876 }, { "epoch": 0.6124366397319251, "grad_norm": 0.37967249751091003, "learning_rate": 1.573793445233267e-05, "loss": 0.4992, "step": 28877 }, { "epoch": 0.6124578481898582, "grad_norm": 0.3862478733062744, "learning_rate": 1.573766131645458e-05, "loss": 0.5079, "step": 28878 }, { "epoch": 0.6124790566477911, "grad_norm": 0.350629597902298, "learning_rate": 1.5737388174195104e-05, "loss": 0.5407, "step": 28879 }, { "epoch": 0.6125002651057242, "grad_norm": 0.5804603695869446, "learning_rate": 1.573711502555456e-05, "loss": 0.4629, "step": 28880 }, { "epoch": 0.6125214735636572, "grad_norm": 0.3950224816799164, "learning_rate": 1.5736841870533243e-05, "loss": 0.4455, "step": 28881 }, { "epoch": 0.6125426820215902, "grad_norm": 0.40511637926101685, "learning_rate": 1.573656870913146e-05, "loss": 0.5127, "step": 28882 }, { "epoch": 0.6125638904795232, "grad_norm": 0.33776170015335083, "learning_rate": 1.5736295541349517e-05, "loss": 0.4901, "step": 28883 }, { "epoch": 0.6125850989374563, "grad_norm": 0.33584320545196533, "learning_rate": 1.5736022367187715e-05, "loss": 0.5003, "step": 28884 }, { "epoch": 0.6126063073953892, "grad_norm": 0.34508469700813293, "learning_rate": 1.573574918664636e-05, "loss": 0.5052, "step": 28885 }, { "epoch": 0.6126275158533223, "grad_norm": 0.38050398230552673, "learning_rate": 1.5735475999725754e-05, "loss": 0.5053, "step": 28886 }, { "epoch": 0.6126487243112553, "grad_norm": 0.46244314312934875, "learning_rate": 1.57352028064262e-05, "loss": 0.4289, "step": 28887 }, { "epoch": 0.6126699327691884, "grad_norm": 0.3296111822128296, "learning_rate": 1.573492960674801e-05, "loss": 0.4221, "step": 28888 }, { "epoch": 0.6126911412271214, "grad_norm": 0.3644402325153351, "learning_rate": 1.5734656400691473e-05, "loss": 0.505, "step": 28889 }, { "epoch": 0.6127123496850544, "grad_norm": 0.3893831670284271, "learning_rate": 1.5734383188256904e-05, "loss": 0.5088, "step": 28890 }, { "epoch": 0.6127335581429875, "grad_norm": 0.34106671810150146, "learning_rate": 1.5734109969444604e-05, "loss": 0.4899, "step": 28891 }, { "epoch": 0.6127547666009204, "grad_norm": 0.365410715341568, "learning_rate": 1.5733836744254878e-05, "loss": 0.4709, "step": 28892 }, { "epoch": 0.6127759750588535, "grad_norm": 0.3448059856891632, "learning_rate": 1.5733563512688026e-05, "loss": 0.5473, "step": 28893 }, { "epoch": 0.6127971835167865, "grad_norm": 0.3972858190536499, "learning_rate": 1.5733290274744355e-05, "loss": 0.5049, "step": 28894 }, { "epoch": 0.6128183919747195, "grad_norm": 0.3709932863712311, "learning_rate": 1.5733017030424166e-05, "loss": 0.5006, "step": 28895 }, { "epoch": 0.6128396004326525, "grad_norm": 0.35123178362846375, "learning_rate": 1.573274377972777e-05, "loss": 0.453, "step": 28896 }, { "epoch": 0.6128608088905856, "grad_norm": 0.31598493456840515, "learning_rate": 1.5732470522655466e-05, "loss": 0.4048, "step": 28897 }, { "epoch": 0.6128820173485185, "grad_norm": 0.3699429929256439, "learning_rate": 1.5732197259207555e-05, "loss": 0.4797, "step": 28898 }, { "epoch": 0.6129032258064516, "grad_norm": 0.34360185265541077, "learning_rate": 1.5731923989384346e-05, "loss": 0.4956, "step": 28899 }, { "epoch": 0.6129244342643846, "grad_norm": 0.3611701726913452, "learning_rate": 1.5731650713186143e-05, "loss": 0.4575, "step": 28900 }, { "epoch": 0.6129456427223177, "grad_norm": 0.35898756980895996, "learning_rate": 1.573137743061324e-05, "loss": 0.5087, "step": 28901 }, { "epoch": 0.6129668511802507, "grad_norm": 0.37689974904060364, "learning_rate": 1.5731104141665956e-05, "loss": 0.5169, "step": 28902 }, { "epoch": 0.6129880596381837, "grad_norm": 0.42124730348587036, "learning_rate": 1.5730830846344588e-05, "loss": 0.4912, "step": 28903 }, { "epoch": 0.6130092680961168, "grad_norm": 0.3537840247154236, "learning_rate": 1.5730557544649436e-05, "loss": 0.4589, "step": 28904 }, { "epoch": 0.6130304765540497, "grad_norm": 0.349386990070343, "learning_rate": 1.573028423658081e-05, "loss": 0.5408, "step": 28905 }, { "epoch": 0.6130516850119828, "grad_norm": 0.4403747022151947, "learning_rate": 1.573001092213901e-05, "loss": 0.6491, "step": 28906 }, { "epoch": 0.6130728934699158, "grad_norm": 0.36424291133880615, "learning_rate": 1.5729737601324343e-05, "loss": 0.4851, "step": 28907 }, { "epoch": 0.6130941019278489, "grad_norm": 0.351523756980896, "learning_rate": 1.5729464274137108e-05, "loss": 0.4696, "step": 28908 }, { "epoch": 0.6131153103857818, "grad_norm": 0.34547409415245056, "learning_rate": 1.5729190940577617e-05, "loss": 0.4763, "step": 28909 }, { "epoch": 0.6131365188437149, "grad_norm": 0.3690202832221985, "learning_rate": 1.5728917600646168e-05, "loss": 0.4415, "step": 28910 }, { "epoch": 0.6131577273016479, "grad_norm": 0.407446026802063, "learning_rate": 1.572864425434307e-05, "loss": 0.4937, "step": 28911 }, { "epoch": 0.6131789357595809, "grad_norm": 0.37698894739151, "learning_rate": 1.572837090166862e-05, "loss": 0.534, "step": 28912 }, { "epoch": 0.6132001442175139, "grad_norm": 0.3290236294269562, "learning_rate": 1.5728097542623124e-05, "loss": 0.4891, "step": 28913 }, { "epoch": 0.613221352675447, "grad_norm": 0.2980497479438782, "learning_rate": 1.572782417720689e-05, "loss": 0.4868, "step": 28914 }, { "epoch": 0.6132425611333799, "grad_norm": 0.38401153683662415, "learning_rate": 1.572755080542022e-05, "loss": 0.3967, "step": 28915 }, { "epoch": 0.613263769591313, "grad_norm": 0.30242568254470825, "learning_rate": 1.5727277427263417e-05, "loss": 0.4888, "step": 28916 }, { "epoch": 0.6132849780492461, "grad_norm": 0.3653479814529419, "learning_rate": 1.5727004042736784e-05, "loss": 0.5041, "step": 28917 }, { "epoch": 0.613306186507179, "grad_norm": 0.35329633951187134, "learning_rate": 1.572673065184063e-05, "loss": 0.4791, "step": 28918 }, { "epoch": 0.6133273949651121, "grad_norm": 0.32379260659217834, "learning_rate": 1.5726457254575256e-05, "loss": 0.5105, "step": 28919 }, { "epoch": 0.6133486034230451, "grad_norm": 0.36696740984916687, "learning_rate": 1.5726183850940966e-05, "loss": 0.5114, "step": 28920 }, { "epoch": 0.6133698118809782, "grad_norm": 0.34216055274009705, "learning_rate": 1.5725910440938063e-05, "loss": 0.4842, "step": 28921 }, { "epoch": 0.6133910203389111, "grad_norm": 0.362079381942749, "learning_rate": 1.572563702456685e-05, "loss": 0.4805, "step": 28922 }, { "epoch": 0.6134122287968442, "grad_norm": 0.35320037603378296, "learning_rate": 1.5725363601827635e-05, "loss": 0.5332, "step": 28923 }, { "epoch": 0.6134334372547772, "grad_norm": 0.370243638753891, "learning_rate": 1.572509017272072e-05, "loss": 0.5374, "step": 28924 }, { "epoch": 0.6134546457127102, "grad_norm": 0.3761554956436157, "learning_rate": 1.572481673724641e-05, "loss": 0.4936, "step": 28925 }, { "epoch": 0.6134758541706432, "grad_norm": 0.3576388657093048, "learning_rate": 1.572454329540501e-05, "loss": 0.5363, "step": 28926 }, { "epoch": 0.6134970626285763, "grad_norm": 0.40140876173973083, "learning_rate": 1.572426984719682e-05, "loss": 0.4922, "step": 28927 }, { "epoch": 0.6135182710865092, "grad_norm": 0.3167346119880676, "learning_rate": 1.5723996392622146e-05, "loss": 0.5132, "step": 28928 }, { "epoch": 0.6135394795444423, "grad_norm": 0.36338916420936584, "learning_rate": 1.5723722931681297e-05, "loss": 0.5577, "step": 28929 }, { "epoch": 0.6135606880023754, "grad_norm": 0.44003966450691223, "learning_rate": 1.572344946437457e-05, "loss": 0.6055, "step": 28930 }, { "epoch": 0.6135818964603084, "grad_norm": 0.41037601232528687, "learning_rate": 1.5723175990702275e-05, "loss": 0.5005, "step": 28931 }, { "epoch": 0.6136031049182414, "grad_norm": 0.33944937586784363, "learning_rate": 1.5722902510664712e-05, "loss": 0.5409, "step": 28932 }, { "epoch": 0.6136243133761744, "grad_norm": 0.42288336157798767, "learning_rate": 1.5722629024262185e-05, "loss": 0.5536, "step": 28933 }, { "epoch": 0.6136455218341075, "grad_norm": 0.39244014024734497, "learning_rate": 1.5722355531495e-05, "loss": 0.5041, "step": 28934 }, { "epoch": 0.6136667302920404, "grad_norm": 0.364999383687973, "learning_rate": 1.5722082032363463e-05, "loss": 0.5559, "step": 28935 }, { "epoch": 0.6136879387499735, "grad_norm": 0.3924497067928314, "learning_rate": 1.5721808526867874e-05, "loss": 0.5398, "step": 28936 }, { "epoch": 0.6137091472079065, "grad_norm": 0.3556845784187317, "learning_rate": 1.5721535015008544e-05, "loss": 0.4871, "step": 28937 }, { "epoch": 0.6137303556658396, "grad_norm": 0.3616538643836975, "learning_rate": 1.5721261496785767e-05, "loss": 0.4541, "step": 28938 }, { "epoch": 0.6137515641237725, "grad_norm": 0.4556594491004944, "learning_rate": 1.5720987972199855e-05, "loss": 0.5189, "step": 28939 }, { "epoch": 0.6137727725817056, "grad_norm": 0.3849092721939087, "learning_rate": 1.572071444125111e-05, "loss": 0.4616, "step": 28940 }, { "epoch": 0.6137939810396386, "grad_norm": 0.3058135211467743, "learning_rate": 1.572044090393983e-05, "loss": 0.4945, "step": 28941 }, { "epoch": 0.6138151894975716, "grad_norm": 0.36211779713630676, "learning_rate": 1.5720167360266333e-05, "loss": 0.4508, "step": 28942 }, { "epoch": 0.6138363979555047, "grad_norm": 0.31891825795173645, "learning_rate": 1.5719893810230913e-05, "loss": 0.4, "step": 28943 }, { "epoch": 0.6138576064134377, "grad_norm": 0.3276421129703522, "learning_rate": 1.571962025383388e-05, "loss": 0.5224, "step": 28944 }, { "epoch": 0.6138788148713707, "grad_norm": 0.3796129524707794, "learning_rate": 1.571934669107553e-05, "loss": 0.5099, "step": 28945 }, { "epoch": 0.6139000233293037, "grad_norm": 0.3339241147041321, "learning_rate": 1.5719073121956176e-05, "loss": 0.3933, "step": 28946 }, { "epoch": 0.6139212317872368, "grad_norm": 0.3803528845310211, "learning_rate": 1.5718799546476113e-05, "loss": 0.529, "step": 28947 }, { "epoch": 0.6139424402451698, "grad_norm": 0.38648492097854614, "learning_rate": 1.5718525964635656e-05, "loss": 0.4895, "step": 28948 }, { "epoch": 0.6139636487031028, "grad_norm": 0.3615071773529053, "learning_rate": 1.5718252376435103e-05, "loss": 0.559, "step": 28949 }, { "epoch": 0.6139848571610358, "grad_norm": 0.3789767622947693, "learning_rate": 1.571797878187476e-05, "loss": 0.5543, "step": 28950 }, { "epoch": 0.6140060656189689, "grad_norm": 0.37523964047431946, "learning_rate": 1.571770518095493e-05, "loss": 0.5584, "step": 28951 }, { "epoch": 0.6140272740769018, "grad_norm": 0.3194863200187683, "learning_rate": 1.5717431573675918e-05, "loss": 0.4167, "step": 28952 }, { "epoch": 0.6140484825348349, "grad_norm": 0.33441710472106934, "learning_rate": 1.5717157960038027e-05, "loss": 0.4771, "step": 28953 }, { "epoch": 0.6140696909927679, "grad_norm": 0.43928736448287964, "learning_rate": 1.571688434004156e-05, "loss": 0.5059, "step": 28954 }, { "epoch": 0.6140908994507009, "grad_norm": 0.3592768609523773, "learning_rate": 1.571661071368683e-05, "loss": 0.4899, "step": 28955 }, { "epoch": 0.6141121079086339, "grad_norm": 0.3613681197166443, "learning_rate": 1.5716337080974136e-05, "loss": 0.397, "step": 28956 }, { "epoch": 0.614133316366567, "grad_norm": 0.40827423334121704, "learning_rate": 1.5716063441903776e-05, "loss": 0.5513, "step": 28957 }, { "epoch": 0.6141545248245001, "grad_norm": 0.34045442938804626, "learning_rate": 1.5715789796476064e-05, "loss": 0.4408, "step": 28958 }, { "epoch": 0.614175733282433, "grad_norm": 0.3729704022407532, "learning_rate": 1.57155161446913e-05, "loss": 0.525, "step": 28959 }, { "epoch": 0.6141969417403661, "grad_norm": 0.36705482006073, "learning_rate": 1.5715242486549785e-05, "loss": 0.4606, "step": 28960 }, { "epoch": 0.6142181501982991, "grad_norm": 0.3635420799255371, "learning_rate": 1.571496882205183e-05, "loss": 0.467, "step": 28961 }, { "epoch": 0.6142393586562321, "grad_norm": 0.3811218738555908, "learning_rate": 1.5714695151197733e-05, "loss": 0.5062, "step": 28962 }, { "epoch": 0.6142605671141651, "grad_norm": 0.35659530758857727, "learning_rate": 1.5714421473987807e-05, "loss": 0.4877, "step": 28963 }, { "epoch": 0.6142817755720982, "grad_norm": 0.3996789753437042, "learning_rate": 1.571414779042235e-05, "loss": 0.5373, "step": 28964 }, { "epoch": 0.6143029840300311, "grad_norm": 0.4165891110897064, "learning_rate": 1.571387410050166e-05, "loss": 0.4765, "step": 28965 }, { "epoch": 0.6143241924879642, "grad_norm": 0.3896796703338623, "learning_rate": 1.571360040422606e-05, "loss": 0.5092, "step": 28966 }, { "epoch": 0.6143454009458972, "grad_norm": 0.3816003203392029, "learning_rate": 1.5713326701595834e-05, "loss": 0.5256, "step": 28967 }, { "epoch": 0.6143666094038303, "grad_norm": 0.382874459028244, "learning_rate": 1.57130529926113e-05, "loss": 0.5342, "step": 28968 }, { "epoch": 0.6143878178617632, "grad_norm": 0.3220999538898468, "learning_rate": 1.5712779277272755e-05, "loss": 0.5078, "step": 28969 }, { "epoch": 0.6144090263196963, "grad_norm": 0.3838721513748169, "learning_rate": 1.571250555558051e-05, "loss": 0.5054, "step": 28970 }, { "epoch": 0.6144302347776294, "grad_norm": 0.36589622497558594, "learning_rate": 1.5712231827534865e-05, "loss": 0.4819, "step": 28971 }, { "epoch": 0.6144514432355623, "grad_norm": 0.38349613547325134, "learning_rate": 1.5711958093136128e-05, "loss": 0.468, "step": 28972 }, { "epoch": 0.6144726516934954, "grad_norm": 0.3763431906700134, "learning_rate": 1.5711684352384595e-05, "loss": 0.4743, "step": 28973 }, { "epoch": 0.6144938601514284, "grad_norm": 0.3472488820552826, "learning_rate": 1.571141060528058e-05, "loss": 0.4873, "step": 28974 }, { "epoch": 0.6145150686093614, "grad_norm": 0.368044376373291, "learning_rate": 1.5711136851824383e-05, "loss": 0.5076, "step": 28975 }, { "epoch": 0.6145362770672944, "grad_norm": 0.3233676254749298, "learning_rate": 1.5710863092016307e-05, "loss": 0.4022, "step": 28976 }, { "epoch": 0.6145574855252275, "grad_norm": 0.350887656211853, "learning_rate": 1.5710589325856663e-05, "loss": 0.4308, "step": 28977 }, { "epoch": 0.6145786939831605, "grad_norm": 0.3680380582809448, "learning_rate": 1.571031555334575e-05, "loss": 0.5441, "step": 28978 }, { "epoch": 0.6145999024410935, "grad_norm": 0.37000158429145813, "learning_rate": 1.5710041774483873e-05, "loss": 0.5115, "step": 28979 }, { "epoch": 0.6146211108990265, "grad_norm": 0.3242502510547638, "learning_rate": 1.5709767989271337e-05, "loss": 0.5532, "step": 28980 }, { "epoch": 0.6146423193569596, "grad_norm": 0.43805766105651855, "learning_rate": 1.5709494197708447e-05, "loss": 0.5229, "step": 28981 }, { "epoch": 0.6146635278148925, "grad_norm": 0.3665716052055359, "learning_rate": 1.5709220399795506e-05, "loss": 0.4655, "step": 28982 }, { "epoch": 0.6146847362728256, "grad_norm": 0.3877858519554138, "learning_rate": 1.570894659553282e-05, "loss": 0.4739, "step": 28983 }, { "epoch": 0.6147059447307587, "grad_norm": 0.4030119776725769, "learning_rate": 1.5708672784920696e-05, "loss": 0.462, "step": 28984 }, { "epoch": 0.6147271531886916, "grad_norm": 0.35257238149642944, "learning_rate": 1.570839896795943e-05, "loss": 0.4851, "step": 28985 }, { "epoch": 0.6147483616466247, "grad_norm": 0.4368170201778412, "learning_rate": 1.570812514464934e-05, "loss": 0.5558, "step": 28986 }, { "epoch": 0.6147695701045577, "grad_norm": 0.3598046898841858, "learning_rate": 1.5707851314990718e-05, "loss": 0.4712, "step": 28987 }, { "epoch": 0.6147907785624908, "grad_norm": 0.35284462571144104, "learning_rate": 1.5707577478983873e-05, "loss": 0.5005, "step": 28988 }, { "epoch": 0.6148119870204237, "grad_norm": 0.37169864773750305, "learning_rate": 1.570730363662911e-05, "loss": 0.5864, "step": 28989 }, { "epoch": 0.6148331954783568, "grad_norm": 0.360830157995224, "learning_rate": 1.5707029787926734e-05, "loss": 0.4802, "step": 28990 }, { "epoch": 0.6148544039362898, "grad_norm": 0.34282049536705017, "learning_rate": 1.570675593287705e-05, "loss": 0.4597, "step": 28991 }, { "epoch": 0.6148756123942228, "grad_norm": 0.33745232224464417, "learning_rate": 1.570648207148036e-05, "loss": 0.4359, "step": 28992 }, { "epoch": 0.6148968208521558, "grad_norm": 0.34743058681488037, "learning_rate": 1.570620820373697e-05, "loss": 0.4772, "step": 28993 }, { "epoch": 0.6149180293100889, "grad_norm": 0.3243425190448761, "learning_rate": 1.5705934329647186e-05, "loss": 0.4539, "step": 28994 }, { "epoch": 0.6149392377680218, "grad_norm": 0.3559298813343048, "learning_rate": 1.5705660449211315e-05, "loss": 0.6584, "step": 28995 }, { "epoch": 0.6149604462259549, "grad_norm": 0.38027289509773254, "learning_rate": 1.570538656242965e-05, "loss": 0.5505, "step": 28996 }, { "epoch": 0.6149816546838879, "grad_norm": 0.3214404582977295, "learning_rate": 1.570511266930251e-05, "loss": 0.4183, "step": 28997 }, { "epoch": 0.615002863141821, "grad_norm": 0.40582430362701416, "learning_rate": 1.570483876983019e-05, "loss": 0.4707, "step": 28998 }, { "epoch": 0.615024071599754, "grad_norm": 0.29924044013023376, "learning_rate": 1.5704564864013e-05, "loss": 0.4215, "step": 28999 }, { "epoch": 0.615045280057687, "grad_norm": 0.3903052806854248, "learning_rate": 1.5704290951851242e-05, "loss": 0.5362, "step": 29000 }, { "epoch": 0.6150664885156201, "grad_norm": 0.47194862365722656, "learning_rate": 1.5704017033345218e-05, "loss": 0.4707, "step": 29001 }, { "epoch": 0.615087696973553, "grad_norm": 0.8418944478034973, "learning_rate": 1.570374310849524e-05, "loss": 0.5772, "step": 29002 }, { "epoch": 0.6151089054314861, "grad_norm": 0.3587038218975067, "learning_rate": 1.5703469177301606e-05, "loss": 0.4893, "step": 29003 }, { "epoch": 0.6151301138894191, "grad_norm": 0.33395108580589294, "learning_rate": 1.5703195239764624e-05, "loss": 0.436, "step": 29004 }, { "epoch": 0.6151513223473521, "grad_norm": 0.34685489535331726, "learning_rate": 1.57029212958846e-05, "loss": 0.4476, "step": 29005 }, { "epoch": 0.6151725308052851, "grad_norm": 0.346965491771698, "learning_rate": 1.5702647345661833e-05, "loss": 0.4124, "step": 29006 }, { "epoch": 0.6151937392632182, "grad_norm": 0.34152543544769287, "learning_rate": 1.5702373389096633e-05, "loss": 0.575, "step": 29007 }, { "epoch": 0.6152149477211512, "grad_norm": 0.3458573520183563, "learning_rate": 1.5702099426189304e-05, "loss": 0.4852, "step": 29008 }, { "epoch": 0.6152361561790842, "grad_norm": 0.3542083501815796, "learning_rate": 1.5701825456940144e-05, "loss": 0.5416, "step": 29009 }, { "epoch": 0.6152573646370172, "grad_norm": 0.3666113317012787, "learning_rate": 1.570155148134947e-05, "loss": 0.4841, "step": 29010 }, { "epoch": 0.6152785730949503, "grad_norm": 0.4018048346042633, "learning_rate": 1.5701277499417577e-05, "loss": 0.4868, "step": 29011 }, { "epoch": 0.6152997815528833, "grad_norm": 0.36896631121635437, "learning_rate": 1.5701003511144772e-05, "loss": 0.5273, "step": 29012 }, { "epoch": 0.6153209900108163, "grad_norm": 0.3377099633216858, "learning_rate": 1.570072951653136e-05, "loss": 0.5424, "step": 29013 }, { "epoch": 0.6153421984687494, "grad_norm": 0.3399973213672638, "learning_rate": 1.570045551557765e-05, "loss": 0.4825, "step": 29014 }, { "epoch": 0.6153634069266823, "grad_norm": 0.45788466930389404, "learning_rate": 1.570018150828394e-05, "loss": 0.5432, "step": 29015 }, { "epoch": 0.6153846153846154, "grad_norm": 0.5334922075271606, "learning_rate": 1.569990749465054e-05, "loss": 0.4908, "step": 29016 }, { "epoch": 0.6154058238425484, "grad_norm": 0.387746661901474, "learning_rate": 1.5699633474677746e-05, "loss": 0.4937, "step": 29017 }, { "epoch": 0.6154270323004815, "grad_norm": 0.34005671739578247, "learning_rate": 1.5699359448365874e-05, "loss": 0.5161, "step": 29018 }, { "epoch": 0.6154482407584144, "grad_norm": 0.39115089178085327, "learning_rate": 1.569908541571522e-05, "loss": 0.5083, "step": 29019 }, { "epoch": 0.6154694492163475, "grad_norm": 0.39853981137275696, "learning_rate": 1.56988113767261e-05, "loss": 0.5351, "step": 29020 }, { "epoch": 0.6154906576742805, "grad_norm": 0.33381837606430054, "learning_rate": 1.5698537331398808e-05, "loss": 0.5058, "step": 29021 }, { "epoch": 0.6155118661322135, "grad_norm": 0.6551583409309387, "learning_rate": 1.5698263279733655e-05, "loss": 0.5002, "step": 29022 }, { "epoch": 0.6155330745901465, "grad_norm": 0.5255182981491089, "learning_rate": 1.5697989221730936e-05, "loss": 0.5022, "step": 29023 }, { "epoch": 0.6155542830480796, "grad_norm": 0.33340391516685486, "learning_rate": 1.5697715157390968e-05, "loss": 0.4616, "step": 29024 }, { "epoch": 0.6155754915060127, "grad_norm": 0.48987653851509094, "learning_rate": 1.5697441086714048e-05, "loss": 0.4269, "step": 29025 }, { "epoch": 0.6155966999639456, "grad_norm": 0.40040794014930725, "learning_rate": 1.5697167009700488e-05, "loss": 0.4771, "step": 29026 }, { "epoch": 0.6156179084218787, "grad_norm": 0.3583178222179413, "learning_rate": 1.5696892926350583e-05, "loss": 0.4569, "step": 29027 }, { "epoch": 0.6156391168798117, "grad_norm": 0.3201388716697693, "learning_rate": 1.5696618836664648e-05, "loss": 0.4337, "step": 29028 }, { "epoch": 0.6156603253377447, "grad_norm": 0.41993317008018494, "learning_rate": 1.5696344740642978e-05, "loss": 0.5579, "step": 29029 }, { "epoch": 0.6156815337956777, "grad_norm": 0.3272402882575989, "learning_rate": 1.5696070638285887e-05, "loss": 0.4645, "step": 29030 }, { "epoch": 0.6157027422536108, "grad_norm": 0.34168165922164917, "learning_rate": 1.5695796529593675e-05, "loss": 0.5982, "step": 29031 }, { "epoch": 0.6157239507115437, "grad_norm": 0.3856218457221985, "learning_rate": 1.5695522414566645e-05, "loss": 0.5626, "step": 29032 }, { "epoch": 0.6157451591694768, "grad_norm": 0.37456217408180237, "learning_rate": 1.5695248293205106e-05, "loss": 0.5096, "step": 29033 }, { "epoch": 0.6157663676274098, "grad_norm": 0.3219073712825775, "learning_rate": 1.5694974165509364e-05, "loss": 0.5115, "step": 29034 }, { "epoch": 0.6157875760853428, "grad_norm": 0.3694823980331421, "learning_rate": 1.569470003147972e-05, "loss": 0.5245, "step": 29035 }, { "epoch": 0.6158087845432758, "grad_norm": 0.43474143743515015, "learning_rate": 1.5694425891116477e-05, "loss": 0.5482, "step": 29036 }, { "epoch": 0.6158299930012089, "grad_norm": 0.43600091338157654, "learning_rate": 1.5694151744419946e-05, "loss": 0.4689, "step": 29037 }, { "epoch": 0.6158512014591419, "grad_norm": 0.398744136095047, "learning_rate": 1.5693877591390427e-05, "loss": 0.6015, "step": 29038 }, { "epoch": 0.6158724099170749, "grad_norm": 0.4093575179576874, "learning_rate": 1.569360343202823e-05, "loss": 0.52, "step": 29039 }, { "epoch": 0.615893618375008, "grad_norm": 0.33333712816238403, "learning_rate": 1.569332926633365e-05, "loss": 0.4976, "step": 29040 }, { "epoch": 0.615914826832941, "grad_norm": 0.3193158805370331, "learning_rate": 1.5693055094307003e-05, "loss": 0.515, "step": 29041 }, { "epoch": 0.615936035290874, "grad_norm": 0.360059529542923, "learning_rate": 1.569278091594859e-05, "loss": 0.5915, "step": 29042 }, { "epoch": 0.615957243748807, "grad_norm": 0.35715553164482117, "learning_rate": 1.5692506731258714e-05, "loss": 0.4224, "step": 29043 }, { "epoch": 0.6159784522067401, "grad_norm": 0.3491255044937134, "learning_rate": 1.5692232540237684e-05, "loss": 0.5111, "step": 29044 }, { "epoch": 0.615999660664673, "grad_norm": 0.32715708017349243, "learning_rate": 1.56919583428858e-05, "loss": 0.4815, "step": 29045 }, { "epoch": 0.6160208691226061, "grad_norm": 0.3694986402988434, "learning_rate": 1.569168413920337e-05, "loss": 0.4816, "step": 29046 }, { "epoch": 0.6160420775805391, "grad_norm": 0.3147013485431671, "learning_rate": 1.5691409929190703e-05, "loss": 0.4351, "step": 29047 }, { "epoch": 0.6160632860384722, "grad_norm": 0.3322782814502716, "learning_rate": 1.5691135712848094e-05, "loss": 0.5585, "step": 29048 }, { "epoch": 0.6160844944964051, "grad_norm": 0.38524970412254333, "learning_rate": 1.569086149017585e-05, "loss": 0.466, "step": 29049 }, { "epoch": 0.6161057029543382, "grad_norm": 0.343386709690094, "learning_rate": 1.5690587261174288e-05, "loss": 0.5032, "step": 29050 }, { "epoch": 0.6161269114122712, "grad_norm": 0.3582208752632141, "learning_rate": 1.56903130258437e-05, "loss": 0.49, "step": 29051 }, { "epoch": 0.6161481198702042, "grad_norm": 0.36630287766456604, "learning_rate": 1.5690038784184395e-05, "loss": 0.4878, "step": 29052 }, { "epoch": 0.6161693283281373, "grad_norm": 0.3335592448711395, "learning_rate": 1.568976453619668e-05, "loss": 0.4903, "step": 29053 }, { "epoch": 0.6161905367860703, "grad_norm": 0.41966307163238525, "learning_rate": 1.5689490281880854e-05, "loss": 0.433, "step": 29054 }, { "epoch": 0.6162117452440034, "grad_norm": 0.34189972281455994, "learning_rate": 1.568921602123723e-05, "loss": 0.5398, "step": 29055 }, { "epoch": 0.6162329537019363, "grad_norm": 0.37843525409698486, "learning_rate": 1.5688941754266106e-05, "loss": 0.5009, "step": 29056 }, { "epoch": 0.6162541621598694, "grad_norm": 0.3306613266468048, "learning_rate": 1.5688667480967795e-05, "loss": 0.4904, "step": 29057 }, { "epoch": 0.6162753706178024, "grad_norm": 0.33721354603767395, "learning_rate": 1.5688393201342592e-05, "loss": 0.5014, "step": 29058 }, { "epoch": 0.6162965790757354, "grad_norm": 0.33621594309806824, "learning_rate": 1.5688118915390816e-05, "loss": 0.4635, "step": 29059 }, { "epoch": 0.6163177875336684, "grad_norm": 0.36118441820144653, "learning_rate": 1.5687844623112757e-05, "loss": 0.509, "step": 29060 }, { "epoch": 0.6163389959916015, "grad_norm": 0.35394152998924255, "learning_rate": 1.5687570324508727e-05, "loss": 0.477, "step": 29061 }, { "epoch": 0.6163602044495344, "grad_norm": 0.3392432928085327, "learning_rate": 1.568729601957903e-05, "loss": 0.5144, "step": 29062 }, { "epoch": 0.6163814129074675, "grad_norm": 0.37647414207458496, "learning_rate": 1.5687021708323975e-05, "loss": 0.517, "step": 29063 }, { "epoch": 0.6164026213654005, "grad_norm": 0.33251476287841797, "learning_rate": 1.5686747390743863e-05, "loss": 0.5092, "step": 29064 }, { "epoch": 0.6164238298233335, "grad_norm": 0.3674817383289337, "learning_rate": 1.5686473066839e-05, "loss": 0.5082, "step": 29065 }, { "epoch": 0.6164450382812666, "grad_norm": 0.3697107434272766, "learning_rate": 1.568619873660969e-05, "loss": 0.5401, "step": 29066 }, { "epoch": 0.6164662467391996, "grad_norm": 0.3132193088531494, "learning_rate": 1.5685924400056237e-05, "loss": 0.4825, "step": 29067 }, { "epoch": 0.6164874551971327, "grad_norm": 0.34077292680740356, "learning_rate": 1.568565005717895e-05, "loss": 0.5188, "step": 29068 }, { "epoch": 0.6165086636550656, "grad_norm": 0.3935457766056061, "learning_rate": 1.5685375707978136e-05, "loss": 0.4718, "step": 29069 }, { "epoch": 0.6165298721129987, "grad_norm": 0.42820101976394653, "learning_rate": 1.568510135245409e-05, "loss": 0.5485, "step": 29070 }, { "epoch": 0.6165510805709317, "grad_norm": 0.35859760642051697, "learning_rate": 1.568482699060713e-05, "loss": 0.5437, "step": 29071 }, { "epoch": 0.6165722890288647, "grad_norm": 0.4310033619403839, "learning_rate": 1.568455262243755e-05, "loss": 0.538, "step": 29072 }, { "epoch": 0.6165934974867977, "grad_norm": 0.4688837230205536, "learning_rate": 1.5684278247945664e-05, "loss": 0.4757, "step": 29073 }, { "epoch": 0.6166147059447308, "grad_norm": 0.326987087726593, "learning_rate": 1.568400386713177e-05, "loss": 0.4988, "step": 29074 }, { "epoch": 0.6166359144026637, "grad_norm": 0.38270896673202515, "learning_rate": 1.568372947999618e-05, "loss": 0.4474, "step": 29075 }, { "epoch": 0.6166571228605968, "grad_norm": 0.3192771077156067, "learning_rate": 1.568345508653919e-05, "loss": 0.4711, "step": 29076 }, { "epoch": 0.6166783313185298, "grad_norm": 0.44677308201789856, "learning_rate": 1.5683180686761118e-05, "loss": 0.5649, "step": 29077 }, { "epoch": 0.6166995397764629, "grad_norm": 0.4374704360961914, "learning_rate": 1.5682906280662255e-05, "loss": 0.499, "step": 29078 }, { "epoch": 0.6167207482343959, "grad_norm": 0.3305366039276123, "learning_rate": 1.5682631868242916e-05, "loss": 0.4934, "step": 29079 }, { "epoch": 0.6167419566923289, "grad_norm": 0.35647130012512207, "learning_rate": 1.5682357449503404e-05, "loss": 0.5376, "step": 29080 }, { "epoch": 0.616763165150262, "grad_norm": 0.37008577585220337, "learning_rate": 1.568208302444402e-05, "loss": 0.389, "step": 29081 }, { "epoch": 0.6167843736081949, "grad_norm": 0.47193852066993713, "learning_rate": 1.5681808593065075e-05, "loss": 0.5316, "step": 29082 }, { "epoch": 0.616805582066128, "grad_norm": 0.32534918189048767, "learning_rate": 1.568153415536687e-05, "loss": 0.3902, "step": 29083 }, { "epoch": 0.616826790524061, "grad_norm": 0.328725665807724, "learning_rate": 1.5681259711349714e-05, "loss": 0.4644, "step": 29084 }, { "epoch": 0.616847998981994, "grad_norm": 0.38945382833480835, "learning_rate": 1.568098526101391e-05, "loss": 0.5246, "step": 29085 }, { "epoch": 0.616869207439927, "grad_norm": 0.32557353377342224, "learning_rate": 1.5680710804359764e-05, "loss": 0.4156, "step": 29086 }, { "epoch": 0.6168904158978601, "grad_norm": 0.31875723600387573, "learning_rate": 1.5680436341387578e-05, "loss": 0.4471, "step": 29087 }, { "epoch": 0.616911624355793, "grad_norm": 0.35931655764579773, "learning_rate": 1.5680161872097665e-05, "loss": 0.5239, "step": 29088 }, { "epoch": 0.6169328328137261, "grad_norm": 0.3214619755744934, "learning_rate": 1.5679887396490322e-05, "loss": 0.4487, "step": 29089 }, { "epoch": 0.6169540412716591, "grad_norm": 0.33031365275382996, "learning_rate": 1.567961291456586e-05, "loss": 0.4707, "step": 29090 }, { "epoch": 0.6169752497295922, "grad_norm": 0.3467174172401428, "learning_rate": 1.567933842632458e-05, "loss": 0.512, "step": 29091 }, { "epoch": 0.6169964581875251, "grad_norm": 0.3842534124851227, "learning_rate": 1.567906393176679e-05, "loss": 0.5761, "step": 29092 }, { "epoch": 0.6170176666454582, "grad_norm": 0.32311370968818665, "learning_rate": 1.5678789430892792e-05, "loss": 0.5062, "step": 29093 }, { "epoch": 0.6170388751033913, "grad_norm": 0.3499475121498108, "learning_rate": 1.567851492370289e-05, "loss": 0.4231, "step": 29094 }, { "epoch": 0.6170600835613242, "grad_norm": 0.34061431884765625, "learning_rate": 1.56782404101974e-05, "loss": 0.4463, "step": 29095 }, { "epoch": 0.6170812920192573, "grad_norm": 0.3256705701351166, "learning_rate": 1.567796589037662e-05, "loss": 0.486, "step": 29096 }, { "epoch": 0.6171025004771903, "grad_norm": 0.5084142684936523, "learning_rate": 1.5677691364240853e-05, "loss": 0.5652, "step": 29097 }, { "epoch": 0.6171237089351234, "grad_norm": 0.37244951725006104, "learning_rate": 1.5677416831790407e-05, "loss": 0.502, "step": 29098 }, { "epoch": 0.6171449173930563, "grad_norm": 0.36353105306625366, "learning_rate": 1.567714229302559e-05, "loss": 0.4766, "step": 29099 }, { "epoch": 0.6171661258509894, "grad_norm": 0.37572938203811646, "learning_rate": 1.5676867747946703e-05, "loss": 0.5057, "step": 29100 }, { "epoch": 0.6171873343089224, "grad_norm": 0.3740431070327759, "learning_rate": 1.5676593196554052e-05, "loss": 0.4821, "step": 29101 }, { "epoch": 0.6172085427668554, "grad_norm": 0.34318816661834717, "learning_rate": 1.5676318638847946e-05, "loss": 0.4573, "step": 29102 }, { "epoch": 0.6172297512247884, "grad_norm": 0.4138425588607788, "learning_rate": 1.5676044074828687e-05, "loss": 0.5545, "step": 29103 }, { "epoch": 0.6172509596827215, "grad_norm": 0.32904911041259766, "learning_rate": 1.567576950449658e-05, "loss": 0.5098, "step": 29104 }, { "epoch": 0.6172721681406544, "grad_norm": 0.35634303092956543, "learning_rate": 1.5675494927851935e-05, "loss": 0.4497, "step": 29105 }, { "epoch": 0.6172933765985875, "grad_norm": 0.371463805437088, "learning_rate": 1.567522034489505e-05, "loss": 0.5767, "step": 29106 }, { "epoch": 0.6173145850565206, "grad_norm": 0.3521546721458435, "learning_rate": 1.5674945755626235e-05, "loss": 0.5605, "step": 29107 }, { "epoch": 0.6173357935144536, "grad_norm": 0.5737690925598145, "learning_rate": 1.5674671160045793e-05, "loss": 0.4943, "step": 29108 }, { "epoch": 0.6173570019723866, "grad_norm": 0.3618849515914917, "learning_rate": 1.5674396558154034e-05, "loss": 0.4921, "step": 29109 }, { "epoch": 0.6173782104303196, "grad_norm": 0.38845768570899963, "learning_rate": 1.567412194995126e-05, "loss": 0.4432, "step": 29110 }, { "epoch": 0.6173994188882527, "grad_norm": 0.3334450423717499, "learning_rate": 1.5673847335437774e-05, "loss": 0.4135, "step": 29111 }, { "epoch": 0.6174206273461856, "grad_norm": 0.41197431087493896, "learning_rate": 1.5673572714613886e-05, "loss": 0.4738, "step": 29112 }, { "epoch": 0.6174418358041187, "grad_norm": 0.864361047744751, "learning_rate": 1.56732980874799e-05, "loss": 0.5809, "step": 29113 }, { "epoch": 0.6174630442620517, "grad_norm": 0.38358423113822937, "learning_rate": 1.567302345403612e-05, "loss": 0.467, "step": 29114 }, { "epoch": 0.6174842527199847, "grad_norm": 0.360866516828537, "learning_rate": 1.5672748814282853e-05, "loss": 0.5267, "step": 29115 }, { "epoch": 0.6175054611779177, "grad_norm": 0.37952131032943726, "learning_rate": 1.5672474168220406e-05, "loss": 0.4772, "step": 29116 }, { "epoch": 0.6175266696358508, "grad_norm": 0.3717937767505646, "learning_rate": 1.567219951584908e-05, "loss": 0.427, "step": 29117 }, { "epoch": 0.6175478780937838, "grad_norm": 0.35626742243766785, "learning_rate": 1.5671924857169188e-05, "loss": 0.5257, "step": 29118 }, { "epoch": 0.6175690865517168, "grad_norm": 0.39218637347221375, "learning_rate": 1.5671650192181026e-05, "loss": 0.4994, "step": 29119 }, { "epoch": 0.6175902950096499, "grad_norm": 0.33762094378471375, "learning_rate": 1.5671375520884905e-05, "loss": 0.4852, "step": 29120 }, { "epoch": 0.6176115034675829, "grad_norm": 0.3439764976501465, "learning_rate": 1.5671100843281126e-05, "loss": 0.5314, "step": 29121 }, { "epoch": 0.6176327119255159, "grad_norm": 0.37350544333457947, "learning_rate": 1.567082615937e-05, "loss": 0.5309, "step": 29122 }, { "epoch": 0.6176539203834489, "grad_norm": 0.37566938996315, "learning_rate": 1.5670551469151832e-05, "loss": 0.4828, "step": 29123 }, { "epoch": 0.617675128841382, "grad_norm": 0.37900346517562866, "learning_rate": 1.5670276772626926e-05, "loss": 0.5231, "step": 29124 }, { "epoch": 0.617696337299315, "grad_norm": 0.3622244596481323, "learning_rate": 1.5670002069795586e-05, "loss": 0.5808, "step": 29125 }, { "epoch": 0.617717545757248, "grad_norm": 0.32760247588157654, "learning_rate": 1.566972736065812e-05, "loss": 0.4924, "step": 29126 }, { "epoch": 0.617738754215181, "grad_norm": 0.3097456991672516, "learning_rate": 1.5669452645214834e-05, "loss": 0.4547, "step": 29127 }, { "epoch": 0.6177599626731141, "grad_norm": 0.34593161940574646, "learning_rate": 1.566917792346603e-05, "loss": 0.5428, "step": 29128 }, { "epoch": 0.617781171131047, "grad_norm": 0.3472137153148651, "learning_rate": 1.5668903195412015e-05, "loss": 0.4895, "step": 29129 }, { "epoch": 0.6178023795889801, "grad_norm": 0.37279486656188965, "learning_rate": 1.5668628461053095e-05, "loss": 0.5336, "step": 29130 }, { "epoch": 0.6178235880469131, "grad_norm": 0.3625218868255615, "learning_rate": 1.5668353720389577e-05, "loss": 0.4926, "step": 29131 }, { "epoch": 0.6178447965048461, "grad_norm": 0.44986292719841003, "learning_rate": 1.5668078973421762e-05, "loss": 0.4295, "step": 29132 }, { "epoch": 0.6178660049627791, "grad_norm": 0.4207858145236969, "learning_rate": 1.5667804220149962e-05, "loss": 0.5446, "step": 29133 }, { "epoch": 0.6178872134207122, "grad_norm": 0.3175007402896881, "learning_rate": 1.566752946057448e-05, "loss": 0.422, "step": 29134 }, { "epoch": 0.6179084218786453, "grad_norm": 0.3454550504684448, "learning_rate": 1.566725469469562e-05, "loss": 0.3852, "step": 29135 }, { "epoch": 0.6179296303365782, "grad_norm": 0.31317898631095886, "learning_rate": 1.5666979922513686e-05, "loss": 0.3953, "step": 29136 }, { "epoch": 0.6179508387945113, "grad_norm": 0.3903021812438965, "learning_rate": 1.5666705144028992e-05, "loss": 0.5537, "step": 29137 }, { "epoch": 0.6179720472524443, "grad_norm": 0.3754444122314453, "learning_rate": 1.5666430359241837e-05, "loss": 0.4743, "step": 29138 }, { "epoch": 0.6179932557103773, "grad_norm": 0.4336187541484833, "learning_rate": 1.5666155568152522e-05, "loss": 0.4631, "step": 29139 }, { "epoch": 0.6180144641683103, "grad_norm": 0.3496752381324768, "learning_rate": 1.5665880770761358e-05, "loss": 0.4872, "step": 29140 }, { "epoch": 0.6180356726262434, "grad_norm": 0.3240756094455719, "learning_rate": 1.5665605967068654e-05, "loss": 0.4967, "step": 29141 }, { "epoch": 0.6180568810841763, "grad_norm": 0.4748387634754181, "learning_rate": 1.5665331157074713e-05, "loss": 0.4691, "step": 29142 }, { "epoch": 0.6180780895421094, "grad_norm": 0.3559872508049011, "learning_rate": 1.5665056340779837e-05, "loss": 0.4266, "step": 29143 }, { "epoch": 0.6180992980000424, "grad_norm": 0.33422890305519104, "learning_rate": 1.566478151818434e-05, "loss": 0.4786, "step": 29144 }, { "epoch": 0.6181205064579754, "grad_norm": 0.35540348291397095, "learning_rate": 1.5664506689288516e-05, "loss": 0.5471, "step": 29145 }, { "epoch": 0.6181417149159084, "grad_norm": 0.3556927442550659, "learning_rate": 1.566423185409268e-05, "loss": 0.527, "step": 29146 }, { "epoch": 0.6181629233738415, "grad_norm": 0.37832751870155334, "learning_rate": 1.5663957012597132e-05, "loss": 0.5418, "step": 29147 }, { "epoch": 0.6181841318317746, "grad_norm": 0.33024510741233826, "learning_rate": 1.5663682164802184e-05, "loss": 0.4505, "step": 29148 }, { "epoch": 0.6182053402897075, "grad_norm": 0.4524649381637573, "learning_rate": 1.5663407310708133e-05, "loss": 0.4666, "step": 29149 }, { "epoch": 0.6182265487476406, "grad_norm": 0.35457509756088257, "learning_rate": 1.5663132450315295e-05, "loss": 0.5219, "step": 29150 }, { "epoch": 0.6182477572055736, "grad_norm": 0.34106001257896423, "learning_rate": 1.5662857583623967e-05, "loss": 0.455, "step": 29151 }, { "epoch": 0.6182689656635066, "grad_norm": 0.371805340051651, "learning_rate": 1.5662582710634458e-05, "loss": 0.5605, "step": 29152 }, { "epoch": 0.6182901741214396, "grad_norm": 0.34136486053466797, "learning_rate": 1.5662307831347076e-05, "loss": 0.4298, "step": 29153 }, { "epoch": 0.6183113825793727, "grad_norm": 0.42988064885139465, "learning_rate": 1.566203294576212e-05, "loss": 0.4612, "step": 29154 }, { "epoch": 0.6183325910373056, "grad_norm": 0.35262331366539, "learning_rate": 1.5661758053879906e-05, "loss": 0.4947, "step": 29155 }, { "epoch": 0.6183537994952387, "grad_norm": 0.3745866119861603, "learning_rate": 1.5661483155700728e-05, "loss": 0.5835, "step": 29156 }, { "epoch": 0.6183750079531717, "grad_norm": 0.3409418761730194, "learning_rate": 1.56612082512249e-05, "loss": 0.52, "step": 29157 }, { "epoch": 0.6183962164111048, "grad_norm": 0.3517523407936096, "learning_rate": 1.5660933340452728e-05, "loss": 0.5261, "step": 29158 }, { "epoch": 0.6184174248690377, "grad_norm": 0.35325753688812256, "learning_rate": 1.5660658423384512e-05, "loss": 0.5072, "step": 29159 }, { "epoch": 0.6184386333269708, "grad_norm": 0.34311172366142273, "learning_rate": 1.566038350002056e-05, "loss": 0.3833, "step": 29160 }, { "epoch": 0.6184598417849039, "grad_norm": 0.36333999037742615, "learning_rate": 1.566010857036118e-05, "loss": 0.5271, "step": 29161 }, { "epoch": 0.6184810502428368, "grad_norm": 0.4414032995700836, "learning_rate": 1.565983363440668e-05, "loss": 0.4157, "step": 29162 }, { "epoch": 0.6185022587007699, "grad_norm": 0.34101995825767517, "learning_rate": 1.5659558692157357e-05, "loss": 0.4796, "step": 29163 }, { "epoch": 0.6185234671587029, "grad_norm": 0.3057592213153839, "learning_rate": 1.5659283743613522e-05, "loss": 0.4644, "step": 29164 }, { "epoch": 0.618544675616636, "grad_norm": 0.3435594439506531, "learning_rate": 1.5659008788775484e-05, "loss": 0.464, "step": 29165 }, { "epoch": 0.6185658840745689, "grad_norm": 0.36360564827919006, "learning_rate": 1.5658733827643543e-05, "loss": 0.4955, "step": 29166 }, { "epoch": 0.618587092532502, "grad_norm": 0.4454430639743805, "learning_rate": 1.5658458860218012e-05, "loss": 0.5248, "step": 29167 }, { "epoch": 0.618608300990435, "grad_norm": 0.35320815443992615, "learning_rate": 1.565818388649919e-05, "loss": 0.4644, "step": 29168 }, { "epoch": 0.618629509448368, "grad_norm": 0.3651653528213501, "learning_rate": 1.5657908906487382e-05, "loss": 0.5481, "step": 29169 }, { "epoch": 0.618650717906301, "grad_norm": 0.3464938998222351, "learning_rate": 1.56576339201829e-05, "loss": 0.5306, "step": 29170 }, { "epoch": 0.6186719263642341, "grad_norm": 0.3345598876476288, "learning_rate": 1.5657358927586043e-05, "loss": 0.463, "step": 29171 }, { "epoch": 0.618693134822167, "grad_norm": 0.37010008096694946, "learning_rate": 1.5657083928697125e-05, "loss": 0.499, "step": 29172 }, { "epoch": 0.6187143432801001, "grad_norm": 0.3662635385990143, "learning_rate": 1.5656808923516445e-05, "loss": 0.5395, "step": 29173 }, { "epoch": 0.6187355517380331, "grad_norm": 0.3630083203315735, "learning_rate": 1.565653391204431e-05, "loss": 0.5344, "step": 29174 }, { "epoch": 0.6187567601959661, "grad_norm": 0.3739418089389801, "learning_rate": 1.565625889428103e-05, "loss": 0.4255, "step": 29175 }, { "epoch": 0.6187779686538992, "grad_norm": 0.390863299369812, "learning_rate": 1.565598387022691e-05, "loss": 0.6395, "step": 29176 }, { "epoch": 0.6187991771118322, "grad_norm": 0.41561058163642883, "learning_rate": 1.5655708839882246e-05, "loss": 0.4801, "step": 29177 }, { "epoch": 0.6188203855697653, "grad_norm": 0.36168158054351807, "learning_rate": 1.5655433803247357e-05, "loss": 0.5187, "step": 29178 }, { "epoch": 0.6188415940276982, "grad_norm": 0.3212919235229492, "learning_rate": 1.5655158760322544e-05, "loss": 0.5394, "step": 29179 }, { "epoch": 0.6188628024856313, "grad_norm": 0.36197736859321594, "learning_rate": 1.565488371110811e-05, "loss": 0.4924, "step": 29180 }, { "epoch": 0.6188840109435643, "grad_norm": 0.4508042335510254, "learning_rate": 1.5654608655604368e-05, "loss": 0.5849, "step": 29181 }, { "epoch": 0.6189052194014973, "grad_norm": 0.3722486197948456, "learning_rate": 1.5654333593811618e-05, "loss": 0.5545, "step": 29182 }, { "epoch": 0.6189264278594303, "grad_norm": 0.3059813976287842, "learning_rate": 1.5654058525730164e-05, "loss": 0.4442, "step": 29183 }, { "epoch": 0.6189476363173634, "grad_norm": 0.325018048286438, "learning_rate": 1.5653783451360316e-05, "loss": 0.5133, "step": 29184 }, { "epoch": 0.6189688447752963, "grad_norm": 0.3496428430080414, "learning_rate": 1.565350837070238e-05, "loss": 0.4504, "step": 29185 }, { "epoch": 0.6189900532332294, "grad_norm": 0.35279908776283264, "learning_rate": 1.5653233283756662e-05, "loss": 0.4716, "step": 29186 }, { "epoch": 0.6190112616911624, "grad_norm": 0.3981126844882965, "learning_rate": 1.565295819052347e-05, "loss": 0.5283, "step": 29187 }, { "epoch": 0.6190324701490955, "grad_norm": 0.40259504318237305, "learning_rate": 1.56526830910031e-05, "loss": 0.5469, "step": 29188 }, { "epoch": 0.6190536786070285, "grad_norm": 0.5512002110481262, "learning_rate": 1.5652407985195868e-05, "loss": 0.4822, "step": 29189 }, { "epoch": 0.6190748870649615, "grad_norm": 0.3698880672454834, "learning_rate": 1.5652132873102077e-05, "loss": 0.4937, "step": 29190 }, { "epoch": 0.6190960955228946, "grad_norm": 0.3888966143131256, "learning_rate": 1.5651857754722034e-05, "loss": 0.4967, "step": 29191 }, { "epoch": 0.6191173039808275, "grad_norm": 0.33260345458984375, "learning_rate": 1.5651582630056044e-05, "loss": 0.5248, "step": 29192 }, { "epoch": 0.6191385124387606, "grad_norm": 0.3748040199279785, "learning_rate": 1.5651307499104413e-05, "loss": 0.5048, "step": 29193 }, { "epoch": 0.6191597208966936, "grad_norm": 0.3632429540157318, "learning_rate": 1.5651032361867448e-05, "loss": 0.4888, "step": 29194 }, { "epoch": 0.6191809293546267, "grad_norm": 0.32996976375579834, "learning_rate": 1.5650757218345448e-05, "loss": 0.5066, "step": 29195 }, { "epoch": 0.6192021378125596, "grad_norm": 0.3759549558162689, "learning_rate": 1.5650482068538727e-05, "loss": 0.5215, "step": 29196 }, { "epoch": 0.6192233462704927, "grad_norm": 0.37717658281326294, "learning_rate": 1.5650206912447592e-05, "loss": 0.5417, "step": 29197 }, { "epoch": 0.6192445547284257, "grad_norm": 0.42227113246917725, "learning_rate": 1.5649931750072346e-05, "loss": 0.4431, "step": 29198 }, { "epoch": 0.6192657631863587, "grad_norm": 0.3469465970993042, "learning_rate": 1.564965658141329e-05, "loss": 0.4536, "step": 29199 }, { "epoch": 0.6192869716442917, "grad_norm": 0.30845433473587036, "learning_rate": 1.5649381406470743e-05, "loss": 0.437, "step": 29200 }, { "epoch": 0.6193081801022248, "grad_norm": 0.34318000078201294, "learning_rate": 1.5649106225245e-05, "loss": 0.4451, "step": 29201 }, { "epoch": 0.6193293885601578, "grad_norm": 0.3530959188938141, "learning_rate": 1.5648831037736365e-05, "loss": 0.4224, "step": 29202 }, { "epoch": 0.6193505970180908, "grad_norm": 0.39601972699165344, "learning_rate": 1.564855584394515e-05, "loss": 0.5583, "step": 29203 }, { "epoch": 0.6193718054760239, "grad_norm": 0.4160493016242981, "learning_rate": 1.5648280643871667e-05, "loss": 0.5304, "step": 29204 }, { "epoch": 0.6193930139339568, "grad_norm": 0.38539257645606995, "learning_rate": 1.564800543751621e-05, "loss": 0.5345, "step": 29205 }, { "epoch": 0.6194142223918899, "grad_norm": 0.3874301612377167, "learning_rate": 1.5647730224879094e-05, "loss": 0.5617, "step": 29206 }, { "epoch": 0.6194354308498229, "grad_norm": 0.36822858452796936, "learning_rate": 1.5647455005960617e-05, "loss": 0.5001, "step": 29207 }, { "epoch": 0.619456639307756, "grad_norm": 0.3325313925743103, "learning_rate": 1.5647179780761094e-05, "loss": 0.4866, "step": 29208 }, { "epoch": 0.6194778477656889, "grad_norm": 0.3475706875324249, "learning_rate": 1.5646904549280827e-05, "loss": 0.548, "step": 29209 }, { "epoch": 0.619499056223622, "grad_norm": 0.41488155722618103, "learning_rate": 1.564662931152012e-05, "loss": 0.473, "step": 29210 }, { "epoch": 0.619520264681555, "grad_norm": 0.365590900182724, "learning_rate": 1.564635406747928e-05, "loss": 0.493, "step": 29211 }, { "epoch": 0.619541473139488, "grad_norm": 0.38574469089508057, "learning_rate": 1.5646078817158613e-05, "loss": 0.4877, "step": 29212 }, { "epoch": 0.619562681597421, "grad_norm": 0.4044012427330017, "learning_rate": 1.564580356055843e-05, "loss": 0.5726, "step": 29213 }, { "epoch": 0.6195838900553541, "grad_norm": 0.466450035572052, "learning_rate": 1.5645528297679027e-05, "loss": 0.5369, "step": 29214 }, { "epoch": 0.619605098513287, "grad_norm": 0.35905733704566956, "learning_rate": 1.5645253028520723e-05, "loss": 0.5268, "step": 29215 }, { "epoch": 0.6196263069712201, "grad_norm": 0.3294004797935486, "learning_rate": 1.5644977753083814e-05, "loss": 0.4491, "step": 29216 }, { "epoch": 0.6196475154291532, "grad_norm": 0.34799909591674805, "learning_rate": 1.5644702471368612e-05, "loss": 0.5237, "step": 29217 }, { "epoch": 0.6196687238870862, "grad_norm": 0.33000293374061584, "learning_rate": 1.5644427183375422e-05, "loss": 0.4818, "step": 29218 }, { "epoch": 0.6196899323450192, "grad_norm": 0.3468925952911377, "learning_rate": 1.5644151889104543e-05, "loss": 0.4946, "step": 29219 }, { "epoch": 0.6197111408029522, "grad_norm": 0.3964599668979645, "learning_rate": 1.5643876588556295e-05, "loss": 0.5627, "step": 29220 }, { "epoch": 0.6197323492608853, "grad_norm": 0.3971889913082123, "learning_rate": 1.5643601281730975e-05, "loss": 0.5781, "step": 29221 }, { "epoch": 0.6197535577188182, "grad_norm": 0.3354794681072235, "learning_rate": 1.5643325968628886e-05, "loss": 0.4652, "step": 29222 }, { "epoch": 0.6197747661767513, "grad_norm": 0.391803115606308, "learning_rate": 1.564305064925034e-05, "loss": 0.4826, "step": 29223 }, { "epoch": 0.6197959746346843, "grad_norm": 0.35341498255729675, "learning_rate": 1.5642775323595646e-05, "loss": 0.557, "step": 29224 }, { "epoch": 0.6198171830926174, "grad_norm": 0.38817137479782104, "learning_rate": 1.5642499991665102e-05, "loss": 0.5665, "step": 29225 }, { "epoch": 0.6198383915505503, "grad_norm": 0.33548304438591003, "learning_rate": 1.564222465345902e-05, "loss": 0.4907, "step": 29226 }, { "epoch": 0.6198596000084834, "grad_norm": 0.32919102907180786, "learning_rate": 1.5641949308977708e-05, "loss": 0.4566, "step": 29227 }, { "epoch": 0.6198808084664164, "grad_norm": 0.45060521364212036, "learning_rate": 1.5641673958221467e-05, "loss": 0.4055, "step": 29228 }, { "epoch": 0.6199020169243494, "grad_norm": 0.37417882680892944, "learning_rate": 1.56413986011906e-05, "loss": 0.5175, "step": 29229 }, { "epoch": 0.6199232253822825, "grad_norm": 0.33279359340667725, "learning_rate": 1.5641123237885423e-05, "loss": 0.4901, "step": 29230 }, { "epoch": 0.6199444338402155, "grad_norm": 0.3208737075328827, "learning_rate": 1.5640847868306237e-05, "loss": 0.4869, "step": 29231 }, { "epoch": 0.6199656422981485, "grad_norm": 0.3406480848789215, "learning_rate": 1.5640572492453352e-05, "loss": 0.5404, "step": 29232 }, { "epoch": 0.6199868507560815, "grad_norm": 0.41968169808387756, "learning_rate": 1.5640297110327066e-05, "loss": 0.4965, "step": 29233 }, { "epoch": 0.6200080592140146, "grad_norm": 0.3351435959339142, "learning_rate": 1.5640021721927696e-05, "loss": 0.5287, "step": 29234 }, { "epoch": 0.6200292676719475, "grad_norm": 0.37664666771888733, "learning_rate": 1.5639746327255538e-05, "loss": 0.5155, "step": 29235 }, { "epoch": 0.6200504761298806, "grad_norm": 0.461777001619339, "learning_rate": 1.5639470926310907e-05, "loss": 0.4945, "step": 29236 }, { "epoch": 0.6200716845878136, "grad_norm": 0.3546636998653412, "learning_rate": 1.5639195519094103e-05, "loss": 0.4397, "step": 29237 }, { "epoch": 0.6200928930457467, "grad_norm": 0.34305956959724426, "learning_rate": 1.5638920105605437e-05, "loss": 0.5368, "step": 29238 }, { "epoch": 0.6201141015036796, "grad_norm": 0.320295125246048, "learning_rate": 1.5638644685845213e-05, "loss": 0.4404, "step": 29239 }, { "epoch": 0.6201353099616127, "grad_norm": 0.42653098702430725, "learning_rate": 1.5638369259813734e-05, "loss": 0.5151, "step": 29240 }, { "epoch": 0.6201565184195457, "grad_norm": 0.3935104310512543, "learning_rate": 1.563809382751131e-05, "loss": 0.5357, "step": 29241 }, { "epoch": 0.6201777268774787, "grad_norm": 0.3482096791267395, "learning_rate": 1.5637818388938248e-05, "loss": 0.5619, "step": 29242 }, { "epoch": 0.6201989353354118, "grad_norm": 0.4056479036808014, "learning_rate": 1.5637542944094858e-05, "loss": 0.6217, "step": 29243 }, { "epoch": 0.6202201437933448, "grad_norm": 0.3353404104709625, "learning_rate": 1.5637267492981435e-05, "loss": 0.4206, "step": 29244 }, { "epoch": 0.6202413522512779, "grad_norm": 0.320556640625, "learning_rate": 1.56369920355983e-05, "loss": 0.4429, "step": 29245 }, { "epoch": 0.6202625607092108, "grad_norm": 0.3625905513763428, "learning_rate": 1.5636716571945743e-05, "loss": 0.4924, "step": 29246 }, { "epoch": 0.6202837691671439, "grad_norm": 0.3669208288192749, "learning_rate": 1.5636441102024082e-05, "loss": 0.5365, "step": 29247 }, { "epoch": 0.6203049776250769, "grad_norm": 0.37004947662353516, "learning_rate": 1.563616562583362e-05, "loss": 0.5252, "step": 29248 }, { "epoch": 0.6203261860830099, "grad_norm": 0.36560913920402527, "learning_rate": 1.5635890143374665e-05, "loss": 0.4686, "step": 29249 }, { "epoch": 0.6203473945409429, "grad_norm": 0.3421747088432312, "learning_rate": 1.5635614654647518e-05, "loss": 0.4418, "step": 29250 }, { "epoch": 0.620368602998876, "grad_norm": 0.37467971444129944, "learning_rate": 1.5635339159652493e-05, "loss": 0.4745, "step": 29251 }, { "epoch": 0.6203898114568089, "grad_norm": 0.3699420392513275, "learning_rate": 1.563506365838989e-05, "loss": 0.4487, "step": 29252 }, { "epoch": 0.620411019914742, "grad_norm": 0.3367118239402771, "learning_rate": 1.5634788150860022e-05, "loss": 0.4793, "step": 29253 }, { "epoch": 0.620432228372675, "grad_norm": 0.35464775562286377, "learning_rate": 1.5634512637063187e-05, "loss": 0.4715, "step": 29254 }, { "epoch": 0.620453436830608, "grad_norm": 0.37422630190849304, "learning_rate": 1.56342371169997e-05, "loss": 0.5377, "step": 29255 }, { "epoch": 0.620474645288541, "grad_norm": 0.3493042588233948, "learning_rate": 1.563396159066986e-05, "loss": 0.5127, "step": 29256 }, { "epoch": 0.6204958537464741, "grad_norm": 0.34028908610343933, "learning_rate": 1.563368605807398e-05, "loss": 0.4414, "step": 29257 }, { "epoch": 0.6205170622044072, "grad_norm": 0.3818564713001251, "learning_rate": 1.563341051921236e-05, "loss": 0.4199, "step": 29258 }, { "epoch": 0.6205382706623401, "grad_norm": 0.3682464063167572, "learning_rate": 1.5633134974085315e-05, "loss": 0.5955, "step": 29259 }, { "epoch": 0.6205594791202732, "grad_norm": 0.3347243070602417, "learning_rate": 1.5632859422693143e-05, "loss": 0.49, "step": 29260 }, { "epoch": 0.6205806875782062, "grad_norm": 0.41962307691574097, "learning_rate": 1.563258386503615e-05, "loss": 0.5331, "step": 29261 }, { "epoch": 0.6206018960361392, "grad_norm": 0.3766498863697052, "learning_rate": 1.5632308301114654e-05, "loss": 0.4922, "step": 29262 }, { "epoch": 0.6206231044940722, "grad_norm": 0.3412463068962097, "learning_rate": 1.5632032730928948e-05, "loss": 0.51, "step": 29263 }, { "epoch": 0.6206443129520053, "grad_norm": 0.37822815775871277, "learning_rate": 1.5631757154479345e-05, "loss": 0.4914, "step": 29264 }, { "epoch": 0.6206655214099382, "grad_norm": 0.36878353357315063, "learning_rate": 1.5631481571766154e-05, "loss": 0.4867, "step": 29265 }, { "epoch": 0.6206867298678713, "grad_norm": 0.3752383291721344, "learning_rate": 1.5631205982789673e-05, "loss": 0.4658, "step": 29266 }, { "epoch": 0.6207079383258043, "grad_norm": 0.3415330648422241, "learning_rate": 1.563093038755022e-05, "loss": 0.4713, "step": 29267 }, { "epoch": 0.6207291467837374, "grad_norm": 0.34621086716651917, "learning_rate": 1.5630654786048086e-05, "loss": 0.5264, "step": 29268 }, { "epoch": 0.6207503552416703, "grad_norm": 0.33632415533065796, "learning_rate": 1.563037917828359e-05, "loss": 0.468, "step": 29269 }, { "epoch": 0.6207715636996034, "grad_norm": 0.334774374961853, "learning_rate": 1.563010356425704e-05, "loss": 0.5229, "step": 29270 }, { "epoch": 0.6207927721575365, "grad_norm": 0.37357062101364136, "learning_rate": 1.5629827943968737e-05, "loss": 0.4833, "step": 29271 }, { "epoch": 0.6208139806154694, "grad_norm": 0.3090669512748718, "learning_rate": 1.5629552317418986e-05, "loss": 0.408, "step": 29272 }, { "epoch": 0.6208351890734025, "grad_norm": 0.3376292884349823, "learning_rate": 1.5629276684608092e-05, "loss": 0.4669, "step": 29273 }, { "epoch": 0.6208563975313355, "grad_norm": 0.4348197281360626, "learning_rate": 1.562900104553637e-05, "loss": 0.4235, "step": 29274 }, { "epoch": 0.6208776059892686, "grad_norm": 0.33568304777145386, "learning_rate": 1.562872540020412e-05, "loss": 0.5401, "step": 29275 }, { "epoch": 0.6208988144472015, "grad_norm": 0.33703505992889404, "learning_rate": 1.5628449748611654e-05, "loss": 0.4418, "step": 29276 }, { "epoch": 0.6209200229051346, "grad_norm": 0.3819454610347748, "learning_rate": 1.5628174090759275e-05, "loss": 0.5613, "step": 29277 }, { "epoch": 0.6209412313630676, "grad_norm": 0.3515857458114624, "learning_rate": 1.5627898426647285e-05, "loss": 0.4494, "step": 29278 }, { "epoch": 0.6209624398210006, "grad_norm": 0.34388604760169983, "learning_rate": 1.5627622756276e-05, "loss": 0.5104, "step": 29279 }, { "epoch": 0.6209836482789336, "grad_norm": 0.3155791461467743, "learning_rate": 1.5627347079645716e-05, "loss": 0.4854, "step": 29280 }, { "epoch": 0.6210048567368667, "grad_norm": 0.36608967185020447, "learning_rate": 1.562707139675675e-05, "loss": 0.5125, "step": 29281 }, { "epoch": 0.6210260651947996, "grad_norm": 0.37237173318862915, "learning_rate": 1.5626795707609404e-05, "loss": 0.4956, "step": 29282 }, { "epoch": 0.6210472736527327, "grad_norm": 1.2480145692825317, "learning_rate": 1.562652001220398e-05, "loss": 0.4629, "step": 29283 }, { "epoch": 0.6210684821106658, "grad_norm": 0.37150630354881287, "learning_rate": 1.5626244310540798e-05, "loss": 0.487, "step": 29284 }, { "epoch": 0.6210896905685988, "grad_norm": 0.35650375485420227, "learning_rate": 1.562596860262015e-05, "loss": 0.5537, "step": 29285 }, { "epoch": 0.6211108990265318, "grad_norm": 0.3661295175552368, "learning_rate": 1.562569288844235e-05, "loss": 0.4311, "step": 29286 }, { "epoch": 0.6211321074844648, "grad_norm": 0.3491200804710388, "learning_rate": 1.56254171680077e-05, "loss": 0.4778, "step": 29287 }, { "epoch": 0.6211533159423979, "grad_norm": 0.33790838718414307, "learning_rate": 1.5625141441316515e-05, "loss": 0.5173, "step": 29288 }, { "epoch": 0.6211745244003308, "grad_norm": 0.37329772114753723, "learning_rate": 1.562486570836909e-05, "loss": 0.491, "step": 29289 }, { "epoch": 0.6211957328582639, "grad_norm": 0.32524263858795166, "learning_rate": 1.5624589969165746e-05, "loss": 0.4871, "step": 29290 }, { "epoch": 0.6212169413161969, "grad_norm": 0.39794325828552246, "learning_rate": 1.562431422370678e-05, "loss": 0.4827, "step": 29291 }, { "epoch": 0.6212381497741299, "grad_norm": 0.41771814227104187, "learning_rate": 1.5624038471992497e-05, "loss": 0.509, "step": 29292 }, { "epoch": 0.6212593582320629, "grad_norm": 0.3562554717063904, "learning_rate": 1.5623762714023208e-05, "loss": 0.4764, "step": 29293 }, { "epoch": 0.621280566689996, "grad_norm": 0.356794148683548, "learning_rate": 1.5623486949799222e-05, "loss": 0.4906, "step": 29294 }, { "epoch": 0.621301775147929, "grad_norm": 0.4879009425640106, "learning_rate": 1.5623211179320842e-05, "loss": 0.5179, "step": 29295 }, { "epoch": 0.621322983605862, "grad_norm": 0.3397703766822815, "learning_rate": 1.562293540258837e-05, "loss": 0.451, "step": 29296 }, { "epoch": 0.621344192063795, "grad_norm": 0.34666523337364197, "learning_rate": 1.5622659619602124e-05, "loss": 0.4856, "step": 29297 }, { "epoch": 0.6213654005217281, "grad_norm": 0.3349180221557617, "learning_rate": 1.5622383830362405e-05, "loss": 0.4191, "step": 29298 }, { "epoch": 0.6213866089796611, "grad_norm": 0.41310322284698486, "learning_rate": 1.5622108034869517e-05, "loss": 0.4885, "step": 29299 }, { "epoch": 0.6214078174375941, "grad_norm": 0.41871994733810425, "learning_rate": 1.562183223312377e-05, "loss": 0.5254, "step": 29300 }, { "epoch": 0.6214290258955272, "grad_norm": 0.32851341366767883, "learning_rate": 1.562155642512547e-05, "loss": 0.4803, "step": 29301 }, { "epoch": 0.6214502343534601, "grad_norm": 0.36840128898620605, "learning_rate": 1.5621280610874924e-05, "loss": 0.5482, "step": 29302 }, { "epoch": 0.6214714428113932, "grad_norm": 0.3367007076740265, "learning_rate": 1.562100479037244e-05, "loss": 0.405, "step": 29303 }, { "epoch": 0.6214926512693262, "grad_norm": 0.3522683084011078, "learning_rate": 1.5620728963618323e-05, "loss": 0.4158, "step": 29304 }, { "epoch": 0.6215138597272593, "grad_norm": 0.3319854140281677, "learning_rate": 1.5620453130612876e-05, "loss": 0.434, "step": 29305 }, { "epoch": 0.6215350681851922, "grad_norm": 0.33274224400520325, "learning_rate": 1.5620177291356414e-05, "loss": 0.5037, "step": 29306 }, { "epoch": 0.6215562766431253, "grad_norm": 0.35393571853637695, "learning_rate": 1.5619901445849235e-05, "loss": 0.4823, "step": 29307 }, { "epoch": 0.6215774851010583, "grad_norm": 0.3960553705692291, "learning_rate": 1.5619625594091654e-05, "loss": 0.4697, "step": 29308 }, { "epoch": 0.6215986935589913, "grad_norm": 0.35299620032310486, "learning_rate": 1.5619349736083976e-05, "loss": 0.5664, "step": 29309 }, { "epoch": 0.6216199020169243, "grad_norm": 0.33588656783103943, "learning_rate": 1.5619073871826503e-05, "loss": 0.4515, "step": 29310 }, { "epoch": 0.6216411104748574, "grad_norm": 0.3758566975593567, "learning_rate": 1.5618798001319547e-05, "loss": 0.5109, "step": 29311 }, { "epoch": 0.6216623189327904, "grad_norm": 0.3378663957118988, "learning_rate": 1.561852212456341e-05, "loss": 0.4406, "step": 29312 }, { "epoch": 0.6216835273907234, "grad_norm": 0.35364484786987305, "learning_rate": 1.5618246241558402e-05, "loss": 0.4714, "step": 29313 }, { "epoch": 0.6217047358486565, "grad_norm": 0.35443180799484253, "learning_rate": 1.561797035230483e-05, "loss": 0.4817, "step": 29314 }, { "epoch": 0.6217259443065895, "grad_norm": 0.31795424222946167, "learning_rate": 1.5617694456803e-05, "loss": 0.4177, "step": 29315 }, { "epoch": 0.6217471527645225, "grad_norm": 0.36385130882263184, "learning_rate": 1.5617418555053223e-05, "loss": 0.5251, "step": 29316 }, { "epoch": 0.6217683612224555, "grad_norm": 0.3296447992324829, "learning_rate": 1.5617142647055795e-05, "loss": 0.504, "step": 29317 }, { "epoch": 0.6217895696803886, "grad_norm": 0.323323130607605, "learning_rate": 1.5616866732811032e-05, "loss": 0.4694, "step": 29318 }, { "epoch": 0.6218107781383215, "grad_norm": 0.35820403695106506, "learning_rate": 1.561659081231924e-05, "loss": 0.501, "step": 29319 }, { "epoch": 0.6218319865962546, "grad_norm": 0.32829681038856506, "learning_rate": 1.5616314885580725e-05, "loss": 0.5041, "step": 29320 }, { "epoch": 0.6218531950541876, "grad_norm": 0.5261794328689575, "learning_rate": 1.5616038952595792e-05, "loss": 0.4535, "step": 29321 }, { "epoch": 0.6218744035121206, "grad_norm": 0.3507220149040222, "learning_rate": 1.561576301336475e-05, "loss": 0.5315, "step": 29322 }, { "epoch": 0.6218956119700536, "grad_norm": 0.3396601974964142, "learning_rate": 1.561548706788791e-05, "loss": 0.4327, "step": 29323 }, { "epoch": 0.6219168204279867, "grad_norm": 0.36196234822273254, "learning_rate": 1.5615211116165564e-05, "loss": 0.4351, "step": 29324 }, { "epoch": 0.6219380288859198, "grad_norm": 0.35148030519485474, "learning_rate": 1.5614935158198034e-05, "loss": 0.549, "step": 29325 }, { "epoch": 0.6219592373438527, "grad_norm": 0.3144315481185913, "learning_rate": 1.5614659193985625e-05, "loss": 0.4502, "step": 29326 }, { "epoch": 0.6219804458017858, "grad_norm": 0.34893161058425903, "learning_rate": 1.5614383223528635e-05, "loss": 0.5194, "step": 29327 }, { "epoch": 0.6220016542597188, "grad_norm": 0.3341737389564514, "learning_rate": 1.5614107246827382e-05, "loss": 0.5161, "step": 29328 }, { "epoch": 0.6220228627176518, "grad_norm": 0.3654034435749054, "learning_rate": 1.5613831263882165e-05, "loss": 0.4488, "step": 29329 }, { "epoch": 0.6220440711755848, "grad_norm": 0.37512004375457764, "learning_rate": 1.5613555274693296e-05, "loss": 0.5476, "step": 29330 }, { "epoch": 0.6220652796335179, "grad_norm": 0.3565618693828583, "learning_rate": 1.561327927926107e-05, "loss": 0.5132, "step": 29331 }, { "epoch": 0.6220864880914508, "grad_norm": 0.30295318365097046, "learning_rate": 1.5613003277585815e-05, "loss": 0.3809, "step": 29332 }, { "epoch": 0.6221076965493839, "grad_norm": 0.381667822599411, "learning_rate": 1.5612727269667824e-05, "loss": 0.477, "step": 29333 }, { "epoch": 0.6221289050073169, "grad_norm": 0.3455165922641754, "learning_rate": 1.5612451255507403e-05, "loss": 0.4906, "step": 29334 }, { "epoch": 0.62215011346525, "grad_norm": 0.33921605348587036, "learning_rate": 1.5612175235104865e-05, "loss": 0.5236, "step": 29335 }, { "epoch": 0.6221713219231829, "grad_norm": 0.3605993688106537, "learning_rate": 1.5611899208460513e-05, "loss": 0.5733, "step": 29336 }, { "epoch": 0.622192530381116, "grad_norm": 0.3368329405784607, "learning_rate": 1.5611623175574657e-05, "loss": 0.4541, "step": 29337 }, { "epoch": 0.622213738839049, "grad_norm": 0.3402075469493866, "learning_rate": 1.56113471364476e-05, "loss": 0.4555, "step": 29338 }, { "epoch": 0.622234947296982, "grad_norm": 0.38807010650634766, "learning_rate": 1.5611071091079655e-05, "loss": 0.4998, "step": 29339 }, { "epoch": 0.6222561557549151, "grad_norm": 0.39664483070373535, "learning_rate": 1.5610795039471125e-05, "loss": 0.54, "step": 29340 }, { "epoch": 0.6222773642128481, "grad_norm": 0.42098721861839294, "learning_rate": 1.5610518981622317e-05, "loss": 0.4717, "step": 29341 }, { "epoch": 0.6222985726707811, "grad_norm": 0.4403928220272064, "learning_rate": 1.561024291753354e-05, "loss": 0.4901, "step": 29342 }, { "epoch": 0.6223197811287141, "grad_norm": 0.33586394786834717, "learning_rate": 1.5609966847205093e-05, "loss": 0.4435, "step": 29343 }, { "epoch": 0.6223409895866472, "grad_norm": 0.342650443315506, "learning_rate": 1.5609690770637294e-05, "loss": 0.4844, "step": 29344 }, { "epoch": 0.6223621980445802, "grad_norm": 0.3276359736919403, "learning_rate": 1.5609414687830447e-05, "loss": 0.4637, "step": 29345 }, { "epoch": 0.6223834065025132, "grad_norm": 0.3521435856819153, "learning_rate": 1.5609138598784853e-05, "loss": 0.4508, "step": 29346 }, { "epoch": 0.6224046149604462, "grad_norm": 0.37373989820480347, "learning_rate": 1.560886250350083e-05, "loss": 0.4923, "step": 29347 }, { "epoch": 0.6224258234183793, "grad_norm": 0.3204745054244995, "learning_rate": 1.5608586401978675e-05, "loss": 0.4761, "step": 29348 }, { "epoch": 0.6224470318763122, "grad_norm": 0.34743571281433105, "learning_rate": 1.5608310294218702e-05, "loss": 0.4734, "step": 29349 }, { "epoch": 0.6224682403342453, "grad_norm": 0.35732051730155945, "learning_rate": 1.560803418022121e-05, "loss": 0.6091, "step": 29350 }, { "epoch": 0.6224894487921783, "grad_norm": 0.47398582100868225, "learning_rate": 1.5607758059986516e-05, "loss": 0.4257, "step": 29351 }, { "epoch": 0.6225106572501113, "grad_norm": 0.3221716284751892, "learning_rate": 1.5607481933514917e-05, "loss": 0.4189, "step": 29352 }, { "epoch": 0.6225318657080444, "grad_norm": 0.351779967546463, "learning_rate": 1.560720580080673e-05, "loss": 0.4722, "step": 29353 }, { "epoch": 0.6225530741659774, "grad_norm": 0.39845457673072815, "learning_rate": 1.560692966186226e-05, "loss": 0.4338, "step": 29354 }, { "epoch": 0.6225742826239105, "grad_norm": 0.37646740674972534, "learning_rate": 1.5606653516681806e-05, "loss": 0.5043, "step": 29355 }, { "epoch": 0.6225954910818434, "grad_norm": 0.3551304042339325, "learning_rate": 1.5606377365265683e-05, "loss": 0.4441, "step": 29356 }, { "epoch": 0.6226166995397765, "grad_norm": 0.4622849225997925, "learning_rate": 1.560610120761419e-05, "loss": 0.5185, "step": 29357 }, { "epoch": 0.6226379079977095, "grad_norm": 0.4962320923805237, "learning_rate": 1.5605825043727646e-05, "loss": 0.4883, "step": 29358 }, { "epoch": 0.6226591164556425, "grad_norm": 0.4311942160129547, "learning_rate": 1.5605548873606352e-05, "loss": 0.4283, "step": 29359 }, { "epoch": 0.6226803249135755, "grad_norm": 0.3292163908481598, "learning_rate": 1.560527269725061e-05, "loss": 0.4226, "step": 29360 }, { "epoch": 0.6227015333715086, "grad_norm": 0.42225590348243713, "learning_rate": 1.5604996514660742e-05, "loss": 0.5019, "step": 29361 }, { "epoch": 0.6227227418294415, "grad_norm": 0.3079950213432312, "learning_rate": 1.560472032583704e-05, "loss": 0.4776, "step": 29362 }, { "epoch": 0.6227439502873746, "grad_norm": 0.32337918877601624, "learning_rate": 1.5604444130779816e-05, "loss": 0.4009, "step": 29363 }, { "epoch": 0.6227651587453076, "grad_norm": 0.32515785098075867, "learning_rate": 1.5604167929489377e-05, "loss": 0.4383, "step": 29364 }, { "epoch": 0.6227863672032407, "grad_norm": 0.37322503328323364, "learning_rate": 1.5603891721966032e-05, "loss": 0.617, "step": 29365 }, { "epoch": 0.6228075756611737, "grad_norm": 0.44606634974479675, "learning_rate": 1.560361550821009e-05, "loss": 0.5337, "step": 29366 }, { "epoch": 0.6228287841191067, "grad_norm": 0.3678632080554962, "learning_rate": 1.5603339288221856e-05, "loss": 0.5958, "step": 29367 }, { "epoch": 0.6228499925770398, "grad_norm": 0.3625684678554535, "learning_rate": 1.5603063062001635e-05, "loss": 0.5113, "step": 29368 }, { "epoch": 0.6228712010349727, "grad_norm": 0.33032724261283875, "learning_rate": 1.5602786829549734e-05, "loss": 0.4307, "step": 29369 }, { "epoch": 0.6228924094929058, "grad_norm": 0.38169223070144653, "learning_rate": 1.5602510590866463e-05, "loss": 0.4371, "step": 29370 }, { "epoch": 0.6229136179508388, "grad_norm": 0.33795031905174255, "learning_rate": 1.560223434595213e-05, "loss": 0.4557, "step": 29371 }, { "epoch": 0.6229348264087718, "grad_norm": 0.34482061862945557, "learning_rate": 1.5601958094807038e-05, "loss": 0.4642, "step": 29372 }, { "epoch": 0.6229560348667048, "grad_norm": 0.3302845060825348, "learning_rate": 1.5601681837431498e-05, "loss": 0.4729, "step": 29373 }, { "epoch": 0.6229772433246379, "grad_norm": 0.3902665674686432, "learning_rate": 1.5601405573825817e-05, "loss": 0.5339, "step": 29374 }, { "epoch": 0.6229984517825709, "grad_norm": 0.3355821669101715, "learning_rate": 1.56011293039903e-05, "loss": 0.4621, "step": 29375 }, { "epoch": 0.6230196602405039, "grad_norm": 0.36279597878456116, "learning_rate": 1.5600853027925256e-05, "loss": 0.4409, "step": 29376 }, { "epoch": 0.6230408686984369, "grad_norm": 0.3733132779598236, "learning_rate": 1.5600576745630993e-05, "loss": 0.4253, "step": 29377 }, { "epoch": 0.62306207715637, "grad_norm": 0.3233073055744171, "learning_rate": 1.560030045710782e-05, "loss": 0.4579, "step": 29378 }, { "epoch": 0.6230832856143029, "grad_norm": 0.3872632086277008, "learning_rate": 1.5600024162356035e-05, "loss": 0.5289, "step": 29379 }, { "epoch": 0.623104494072236, "grad_norm": 0.9400379061698914, "learning_rate": 1.5599747861375957e-05, "loss": 0.5171, "step": 29380 }, { "epoch": 0.6231257025301691, "grad_norm": 0.3098165690898895, "learning_rate": 1.5599471554167883e-05, "loss": 0.4306, "step": 29381 }, { "epoch": 0.623146910988102, "grad_norm": 0.7747795581817627, "learning_rate": 1.5599195240732128e-05, "loss": 0.5258, "step": 29382 }, { "epoch": 0.6231681194460351, "grad_norm": 0.3417492210865021, "learning_rate": 1.5598918921069e-05, "loss": 0.5232, "step": 29383 }, { "epoch": 0.6231893279039681, "grad_norm": 0.34338319301605225, "learning_rate": 1.5598642595178798e-05, "loss": 0.4479, "step": 29384 }, { "epoch": 0.6232105363619012, "grad_norm": 0.3355773091316223, "learning_rate": 1.5598366263061837e-05, "loss": 0.4579, "step": 29385 }, { "epoch": 0.6232317448198341, "grad_norm": 0.379251629114151, "learning_rate": 1.559808992471842e-05, "loss": 0.5232, "step": 29386 }, { "epoch": 0.6232529532777672, "grad_norm": 0.3341095447540283, "learning_rate": 1.5597813580148858e-05, "loss": 0.4833, "step": 29387 }, { "epoch": 0.6232741617357002, "grad_norm": 0.35079777240753174, "learning_rate": 1.5597537229353458e-05, "loss": 0.5524, "step": 29388 }, { "epoch": 0.6232953701936332, "grad_norm": 0.3662983775138855, "learning_rate": 1.559726087233252e-05, "loss": 0.5094, "step": 29389 }, { "epoch": 0.6233165786515662, "grad_norm": 0.3373202681541443, "learning_rate": 1.559698450908636e-05, "loss": 0.5111, "step": 29390 }, { "epoch": 0.6233377871094993, "grad_norm": 0.35692352056503296, "learning_rate": 1.5596708139615287e-05, "loss": 0.5527, "step": 29391 }, { "epoch": 0.6233589955674322, "grad_norm": 0.352563738822937, "learning_rate": 1.5596431763919598e-05, "loss": 0.5225, "step": 29392 }, { "epoch": 0.6233802040253653, "grad_norm": 0.32647207379341125, "learning_rate": 1.5596155381999606e-05, "loss": 0.4474, "step": 29393 }, { "epoch": 0.6234014124832984, "grad_norm": 0.3276169002056122, "learning_rate": 1.5595878993855623e-05, "loss": 0.5098, "step": 29394 }, { "epoch": 0.6234226209412314, "grad_norm": 0.4014423191547394, "learning_rate": 1.5595602599487947e-05, "loss": 0.4619, "step": 29395 }, { "epoch": 0.6234438293991644, "grad_norm": 0.4055139422416687, "learning_rate": 1.5595326198896893e-05, "loss": 0.5583, "step": 29396 }, { "epoch": 0.6234650378570974, "grad_norm": 0.34117352962493896, "learning_rate": 1.5595049792082765e-05, "loss": 0.493, "step": 29397 }, { "epoch": 0.6234862463150305, "grad_norm": 0.40078750252723694, "learning_rate": 1.5594773379045873e-05, "loss": 0.5197, "step": 29398 }, { "epoch": 0.6235074547729634, "grad_norm": 0.37430715560913086, "learning_rate": 1.559449695978652e-05, "loss": 0.4999, "step": 29399 }, { "epoch": 0.6235286632308965, "grad_norm": 0.3712144196033478, "learning_rate": 1.559422053430502e-05, "loss": 0.5389, "step": 29400 }, { "epoch": 0.6235498716888295, "grad_norm": 0.3632029891014099, "learning_rate": 1.5593944102601674e-05, "loss": 0.5247, "step": 29401 }, { "epoch": 0.6235710801467625, "grad_norm": 0.42290765047073364, "learning_rate": 1.5593667664676792e-05, "loss": 0.5537, "step": 29402 }, { "epoch": 0.6235922886046955, "grad_norm": 0.3445245027542114, "learning_rate": 1.559339122053068e-05, "loss": 0.4933, "step": 29403 }, { "epoch": 0.6236134970626286, "grad_norm": 0.5623576641082764, "learning_rate": 1.559311477016365e-05, "loss": 0.6044, "step": 29404 }, { "epoch": 0.6236347055205615, "grad_norm": 0.43034929037094116, "learning_rate": 1.5592838313576003e-05, "loss": 0.4548, "step": 29405 }, { "epoch": 0.6236559139784946, "grad_norm": 0.3956218659877777, "learning_rate": 1.559256185076805e-05, "loss": 0.5671, "step": 29406 }, { "epoch": 0.6236771224364277, "grad_norm": 0.3548813760280609, "learning_rate": 1.5592285381740103e-05, "loss": 0.4778, "step": 29407 }, { "epoch": 0.6236983308943607, "grad_norm": 0.37841683626174927, "learning_rate": 1.559200890649246e-05, "loss": 0.4806, "step": 29408 }, { "epoch": 0.6237195393522937, "grad_norm": 0.3168076276779175, "learning_rate": 1.5591732425025435e-05, "loss": 0.4732, "step": 29409 }, { "epoch": 0.6237407478102267, "grad_norm": 0.3477446138858795, "learning_rate": 1.5591455937339333e-05, "loss": 0.5585, "step": 29410 }, { "epoch": 0.6237619562681598, "grad_norm": 0.34581780433654785, "learning_rate": 1.5591179443434464e-05, "loss": 0.4316, "step": 29411 }, { "epoch": 0.6237831647260927, "grad_norm": 0.4440540075302124, "learning_rate": 1.5590902943311132e-05, "loss": 0.5553, "step": 29412 }, { "epoch": 0.6238043731840258, "grad_norm": 0.3937595784664154, "learning_rate": 1.5590626436969648e-05, "loss": 0.49, "step": 29413 }, { "epoch": 0.6238255816419588, "grad_norm": 0.32522186636924744, "learning_rate": 1.5590349924410318e-05, "loss": 0.4111, "step": 29414 }, { "epoch": 0.6238467900998919, "grad_norm": 0.3388870060443878, "learning_rate": 1.5590073405633444e-05, "loss": 0.4963, "step": 29415 }, { "epoch": 0.6238679985578248, "grad_norm": 0.3359598219394684, "learning_rate": 1.5589796880639345e-05, "loss": 0.4351, "step": 29416 }, { "epoch": 0.6238892070157579, "grad_norm": 0.3582019507884979, "learning_rate": 1.558952034942832e-05, "loss": 0.4422, "step": 29417 }, { "epoch": 0.6239104154736909, "grad_norm": 0.4356926679611206, "learning_rate": 1.5589243812000682e-05, "loss": 0.4606, "step": 29418 }, { "epoch": 0.6239316239316239, "grad_norm": 0.38285282254219055, "learning_rate": 1.5588967268356733e-05, "loss": 0.4848, "step": 29419 }, { "epoch": 0.623952832389557, "grad_norm": 0.4710129499435425, "learning_rate": 1.5588690718496783e-05, "loss": 0.4937, "step": 29420 }, { "epoch": 0.62397404084749, "grad_norm": 0.3499530851840973, "learning_rate": 1.558841416242114e-05, "loss": 0.5004, "step": 29421 }, { "epoch": 0.623995249305423, "grad_norm": 1.1701325178146362, "learning_rate": 1.5588137600130113e-05, "loss": 0.5767, "step": 29422 }, { "epoch": 0.624016457763356, "grad_norm": 0.35226279497146606, "learning_rate": 1.5587861031624006e-05, "loss": 0.4702, "step": 29423 }, { "epoch": 0.6240376662212891, "grad_norm": 0.4171736240386963, "learning_rate": 1.5587584456903127e-05, "loss": 0.5252, "step": 29424 }, { "epoch": 0.624058874679222, "grad_norm": 0.37662601470947266, "learning_rate": 1.5587307875967787e-05, "loss": 0.5629, "step": 29425 }, { "epoch": 0.6240800831371551, "grad_norm": 0.3285941183567047, "learning_rate": 1.5587031288818292e-05, "loss": 0.4864, "step": 29426 }, { "epoch": 0.6241012915950881, "grad_norm": 0.3049430251121521, "learning_rate": 1.5586754695454947e-05, "loss": 0.4014, "step": 29427 }, { "epoch": 0.6241225000530212, "grad_norm": 0.35826215147972107, "learning_rate": 1.5586478095878065e-05, "loss": 0.4645, "step": 29428 }, { "epoch": 0.6241437085109541, "grad_norm": 0.35157644748687744, "learning_rate": 1.558620149008795e-05, "loss": 0.4108, "step": 29429 }, { "epoch": 0.6241649169688872, "grad_norm": 0.36206990480422974, "learning_rate": 1.558592487808491e-05, "loss": 0.5252, "step": 29430 }, { "epoch": 0.6241861254268202, "grad_norm": 0.43699049949645996, "learning_rate": 1.5585648259869254e-05, "loss": 0.5134, "step": 29431 }, { "epoch": 0.6242073338847532, "grad_norm": 0.3577177822589874, "learning_rate": 1.5585371635441283e-05, "loss": 0.5643, "step": 29432 }, { "epoch": 0.6242285423426862, "grad_norm": 0.3977234661579132, "learning_rate": 1.5585095004801315e-05, "loss": 0.5141, "step": 29433 }, { "epoch": 0.6242497508006193, "grad_norm": 0.3736102879047394, "learning_rate": 1.5584818367949655e-05, "loss": 0.4786, "step": 29434 }, { "epoch": 0.6242709592585524, "grad_norm": 0.42688852548599243, "learning_rate": 1.5584541724886604e-05, "loss": 0.5828, "step": 29435 }, { "epoch": 0.6242921677164853, "grad_norm": 0.33089330792427063, "learning_rate": 1.5584265075612476e-05, "loss": 0.4452, "step": 29436 }, { "epoch": 0.6243133761744184, "grad_norm": 0.3269215524196625, "learning_rate": 1.558398842012758e-05, "loss": 0.4823, "step": 29437 }, { "epoch": 0.6243345846323514, "grad_norm": 0.3545127809047699, "learning_rate": 1.5583711758432215e-05, "loss": 0.5225, "step": 29438 }, { "epoch": 0.6243557930902844, "grad_norm": 0.5083995461463928, "learning_rate": 1.5583435090526697e-05, "loss": 0.4588, "step": 29439 }, { "epoch": 0.6243770015482174, "grad_norm": 0.4265904426574707, "learning_rate": 1.558315841641133e-05, "loss": 0.4732, "step": 29440 }, { "epoch": 0.6243982100061505, "grad_norm": 0.378872275352478, "learning_rate": 1.5582881736086425e-05, "loss": 0.4751, "step": 29441 }, { "epoch": 0.6244194184640834, "grad_norm": 0.3716426491737366, "learning_rate": 1.5582605049552284e-05, "loss": 0.5307, "step": 29442 }, { "epoch": 0.6244406269220165, "grad_norm": 0.3636919856071472, "learning_rate": 1.5582328356809222e-05, "loss": 0.5106, "step": 29443 }, { "epoch": 0.6244618353799495, "grad_norm": 0.6669830083847046, "learning_rate": 1.558205165785754e-05, "loss": 0.5517, "step": 29444 }, { "epoch": 0.6244830438378826, "grad_norm": 0.319864422082901, "learning_rate": 1.558177495269755e-05, "loss": 0.4809, "step": 29445 }, { "epoch": 0.6245042522958155, "grad_norm": 0.3307889699935913, "learning_rate": 1.5581498241329557e-05, "loss": 0.5025, "step": 29446 }, { "epoch": 0.6245254607537486, "grad_norm": 0.3274296224117279, "learning_rate": 1.558122152375387e-05, "loss": 0.4439, "step": 29447 }, { "epoch": 0.6245466692116817, "grad_norm": 0.3637140691280365, "learning_rate": 1.55809447999708e-05, "loss": 0.5109, "step": 29448 }, { "epoch": 0.6245678776696146, "grad_norm": 0.41749751567840576, "learning_rate": 1.5580668069980646e-05, "loss": 0.4627, "step": 29449 }, { "epoch": 0.6245890861275477, "grad_norm": 0.3396316468715668, "learning_rate": 1.5580391333783727e-05, "loss": 0.5503, "step": 29450 }, { "epoch": 0.6246102945854807, "grad_norm": 0.34469470381736755, "learning_rate": 1.5580114591380343e-05, "loss": 0.5053, "step": 29451 }, { "epoch": 0.6246315030434137, "grad_norm": 0.3987466096878052, "learning_rate": 1.5579837842770804e-05, "loss": 0.4852, "step": 29452 }, { "epoch": 0.6246527115013467, "grad_norm": 0.3723873198032379, "learning_rate": 1.557956108795542e-05, "loss": 0.5747, "step": 29453 }, { "epoch": 0.6246739199592798, "grad_norm": 0.3458428978919983, "learning_rate": 1.5579284326934495e-05, "loss": 0.513, "step": 29454 }, { "epoch": 0.6246951284172128, "grad_norm": 0.41932183504104614, "learning_rate": 1.5579007559708337e-05, "loss": 0.5752, "step": 29455 }, { "epoch": 0.6247163368751458, "grad_norm": 0.3353016972541809, "learning_rate": 1.5578730786277255e-05, "loss": 0.5074, "step": 29456 }, { "epoch": 0.6247375453330788, "grad_norm": 0.346246600151062, "learning_rate": 1.5578454006641562e-05, "loss": 0.5247, "step": 29457 }, { "epoch": 0.6247587537910119, "grad_norm": 0.3381119668483734, "learning_rate": 1.5578177220801557e-05, "loss": 0.5328, "step": 29458 }, { "epoch": 0.6247799622489448, "grad_norm": 0.3849005699157715, "learning_rate": 1.557790042875755e-05, "loss": 0.4557, "step": 29459 }, { "epoch": 0.6248011707068779, "grad_norm": 0.3525627553462982, "learning_rate": 1.5577623630509853e-05, "loss": 0.4584, "step": 29460 }, { "epoch": 0.624822379164811, "grad_norm": 0.36363449692726135, "learning_rate": 1.557734682605877e-05, "loss": 0.4714, "step": 29461 }, { "epoch": 0.624843587622744, "grad_norm": 0.3719494342803955, "learning_rate": 1.5577070015404614e-05, "loss": 0.4604, "step": 29462 }, { "epoch": 0.624864796080677, "grad_norm": 0.351800799369812, "learning_rate": 1.5576793198547684e-05, "loss": 0.5282, "step": 29463 }, { "epoch": 0.62488600453861, "grad_norm": 0.33433058857917786, "learning_rate": 1.55765163754883e-05, "loss": 0.495, "step": 29464 }, { "epoch": 0.6249072129965431, "grad_norm": 0.3579857051372528, "learning_rate": 1.557623954622676e-05, "loss": 0.5409, "step": 29465 }, { "epoch": 0.624928421454476, "grad_norm": 0.3918668031692505, "learning_rate": 1.557596271076337e-05, "loss": 0.5532, "step": 29466 }, { "epoch": 0.6249496299124091, "grad_norm": 0.3532007336616516, "learning_rate": 1.5575685869098447e-05, "loss": 0.4951, "step": 29467 }, { "epoch": 0.6249708383703421, "grad_norm": 0.36229124665260315, "learning_rate": 1.5575409021232295e-05, "loss": 0.5897, "step": 29468 }, { "epoch": 0.6249920468282751, "grad_norm": 0.34191980957984924, "learning_rate": 1.557513216716522e-05, "loss": 0.4909, "step": 29469 }, { "epoch": 0.6250132552862081, "grad_norm": 0.3679988980293274, "learning_rate": 1.5574855306897532e-05, "loss": 0.4559, "step": 29470 }, { "epoch": 0.6250344637441412, "grad_norm": 0.311839759349823, "learning_rate": 1.5574578440429538e-05, "loss": 0.4231, "step": 29471 }, { "epoch": 0.6250556722020741, "grad_norm": 0.33864426612854004, "learning_rate": 1.5574301567761547e-05, "loss": 0.4866, "step": 29472 }, { "epoch": 0.6250768806600072, "grad_norm": 0.384857177734375, "learning_rate": 1.5574024688893864e-05, "loss": 0.5159, "step": 29473 }, { "epoch": 0.6250980891179402, "grad_norm": 0.31169775128364563, "learning_rate": 1.55737478038268e-05, "loss": 0.5097, "step": 29474 }, { "epoch": 0.6251192975758733, "grad_norm": 0.3499322533607483, "learning_rate": 1.5573470912560663e-05, "loss": 0.5773, "step": 29475 }, { "epoch": 0.6251405060338063, "grad_norm": 0.467256635427475, "learning_rate": 1.557319401509576e-05, "loss": 0.4442, "step": 29476 }, { "epoch": 0.6251617144917393, "grad_norm": 0.37693923711776733, "learning_rate": 1.5572917111432403e-05, "loss": 0.4222, "step": 29477 }, { "epoch": 0.6251829229496724, "grad_norm": 0.374040812253952, "learning_rate": 1.557264020157089e-05, "loss": 0.5177, "step": 29478 }, { "epoch": 0.6252041314076053, "grad_norm": 0.3877054750919342, "learning_rate": 1.5572363285511536e-05, "loss": 0.5375, "step": 29479 }, { "epoch": 0.6252253398655384, "grad_norm": 0.4152657985687256, "learning_rate": 1.557208636325465e-05, "loss": 0.4428, "step": 29480 }, { "epoch": 0.6252465483234714, "grad_norm": 0.37009406089782715, "learning_rate": 1.557180943480054e-05, "loss": 0.5049, "step": 29481 }, { "epoch": 0.6252677567814044, "grad_norm": 0.3606697916984558, "learning_rate": 1.5571532500149505e-05, "loss": 0.5079, "step": 29482 }, { "epoch": 0.6252889652393374, "grad_norm": 0.3503090441226959, "learning_rate": 1.5571255559301867e-05, "loss": 0.4316, "step": 29483 }, { "epoch": 0.6253101736972705, "grad_norm": 0.45787909626960754, "learning_rate": 1.5570978612257922e-05, "loss": 0.6098, "step": 29484 }, { "epoch": 0.6253313821552035, "grad_norm": 0.36253201961517334, "learning_rate": 1.5570701659017985e-05, "loss": 0.5033, "step": 29485 }, { "epoch": 0.6253525906131365, "grad_norm": 0.31535574793815613, "learning_rate": 1.557042469958236e-05, "loss": 0.4092, "step": 29486 }, { "epoch": 0.6253737990710695, "grad_norm": 0.36599940061569214, "learning_rate": 1.557014773395136e-05, "loss": 0.4381, "step": 29487 }, { "epoch": 0.6253950075290026, "grad_norm": 0.3797207176685333, "learning_rate": 1.556987076212529e-05, "loss": 0.5434, "step": 29488 }, { "epoch": 0.6254162159869356, "grad_norm": 0.3492601811885834, "learning_rate": 1.556959378410446e-05, "loss": 0.5079, "step": 29489 }, { "epoch": 0.6254374244448686, "grad_norm": 0.34902894496917725, "learning_rate": 1.5569316799889168e-05, "loss": 0.5459, "step": 29490 }, { "epoch": 0.6254586329028017, "grad_norm": 0.3391786217689514, "learning_rate": 1.5569039809479738e-05, "loss": 0.46, "step": 29491 }, { "epoch": 0.6254798413607346, "grad_norm": 0.4954507052898407, "learning_rate": 1.5568762812876464e-05, "loss": 0.4476, "step": 29492 }, { "epoch": 0.6255010498186677, "grad_norm": 0.3711034953594208, "learning_rate": 1.556848581007967e-05, "loss": 0.478, "step": 29493 }, { "epoch": 0.6255222582766007, "grad_norm": 0.7834066152572632, "learning_rate": 1.5568208801089646e-05, "loss": 0.4228, "step": 29494 }, { "epoch": 0.6255434667345338, "grad_norm": 0.3951965272426605, "learning_rate": 1.556793178590671e-05, "loss": 0.5185, "step": 29495 }, { "epoch": 0.6255646751924667, "grad_norm": 0.3053453862667084, "learning_rate": 1.556765476453117e-05, "loss": 0.4772, "step": 29496 }, { "epoch": 0.6255858836503998, "grad_norm": 0.5408200621604919, "learning_rate": 1.5567377736963333e-05, "loss": 0.5477, "step": 29497 }, { "epoch": 0.6256070921083328, "grad_norm": 0.37089136242866516, "learning_rate": 1.556710070320351e-05, "loss": 0.4574, "step": 29498 }, { "epoch": 0.6256283005662658, "grad_norm": 0.3313943147659302, "learning_rate": 1.5566823663252e-05, "loss": 0.3835, "step": 29499 }, { "epoch": 0.6256495090241988, "grad_norm": 0.45082682371139526, "learning_rate": 1.556654661710912e-05, "loss": 0.5383, "step": 29500 }, { "epoch": 0.6256707174821319, "grad_norm": 0.3642542064189911, "learning_rate": 1.5566269564775175e-05, "loss": 0.4689, "step": 29501 }, { "epoch": 0.625691925940065, "grad_norm": 0.32563096284866333, "learning_rate": 1.5565992506250472e-05, "loss": 0.4639, "step": 29502 }, { "epoch": 0.6257131343979979, "grad_norm": 0.3667164444923401, "learning_rate": 1.556571544153532e-05, "loss": 0.5097, "step": 29503 }, { "epoch": 0.625734342855931, "grad_norm": 0.3463905155658722, "learning_rate": 1.556543837063003e-05, "loss": 0.4993, "step": 29504 }, { "epoch": 0.625755551313864, "grad_norm": 0.3558993339538574, "learning_rate": 1.556516129353491e-05, "loss": 0.4245, "step": 29505 }, { "epoch": 0.625776759771797, "grad_norm": 0.32117587327957153, "learning_rate": 1.556488421025026e-05, "loss": 0.4699, "step": 29506 }, { "epoch": 0.62579796822973, "grad_norm": 0.37705856561660767, "learning_rate": 1.5564607120776398e-05, "loss": 0.5104, "step": 29507 }, { "epoch": 0.6258191766876631, "grad_norm": 0.4010554254055023, "learning_rate": 1.5564330025113624e-05, "loss": 0.4042, "step": 29508 }, { "epoch": 0.625840385145596, "grad_norm": 0.33415359258651733, "learning_rate": 1.5564052923262258e-05, "loss": 0.5352, "step": 29509 }, { "epoch": 0.6258615936035291, "grad_norm": 0.4231915771961212, "learning_rate": 1.5563775815222598e-05, "loss": 0.5257, "step": 29510 }, { "epoch": 0.6258828020614621, "grad_norm": 0.35997259616851807, "learning_rate": 1.556349870099495e-05, "loss": 0.5672, "step": 29511 }, { "epoch": 0.6259040105193951, "grad_norm": 0.3146129250526428, "learning_rate": 1.556322158057963e-05, "loss": 0.4682, "step": 29512 }, { "epoch": 0.6259252189773281, "grad_norm": 0.4007110297679901, "learning_rate": 1.5562944453976944e-05, "loss": 0.5142, "step": 29513 }, { "epoch": 0.6259464274352612, "grad_norm": 0.313997745513916, "learning_rate": 1.5562667321187198e-05, "loss": 0.3986, "step": 29514 }, { "epoch": 0.6259676358931942, "grad_norm": 0.39820051193237305, "learning_rate": 1.5562390182210703e-05, "loss": 0.4843, "step": 29515 }, { "epoch": 0.6259888443511272, "grad_norm": 0.3479909300804138, "learning_rate": 1.5562113037047768e-05, "loss": 0.5132, "step": 29516 }, { "epoch": 0.6260100528090603, "grad_norm": 0.36363381147384644, "learning_rate": 1.5561835885698697e-05, "loss": 0.5397, "step": 29517 }, { "epoch": 0.6260312612669933, "grad_norm": 0.32865801453590393, "learning_rate": 1.55615587281638e-05, "loss": 0.4469, "step": 29518 }, { "epoch": 0.6260524697249263, "grad_norm": 0.35705599188804626, "learning_rate": 1.5561281564443387e-05, "loss": 0.5694, "step": 29519 }, { "epoch": 0.6260736781828593, "grad_norm": 0.40566208958625793, "learning_rate": 1.5561004394537765e-05, "loss": 0.469, "step": 29520 }, { "epoch": 0.6260948866407924, "grad_norm": 0.3678368628025055, "learning_rate": 1.556072721844724e-05, "loss": 0.475, "step": 29521 }, { "epoch": 0.6261160950987253, "grad_norm": 0.36464670300483704, "learning_rate": 1.5560450036172123e-05, "loss": 0.4664, "step": 29522 }, { "epoch": 0.6261373035566584, "grad_norm": 0.36079147458076477, "learning_rate": 1.5560172847712726e-05, "loss": 0.473, "step": 29523 }, { "epoch": 0.6261585120145914, "grad_norm": 0.3562866747379303, "learning_rate": 1.5559895653069344e-05, "loss": 0.4692, "step": 29524 }, { "epoch": 0.6261797204725245, "grad_norm": 0.6053922772407532, "learning_rate": 1.5559618452242304e-05, "loss": 0.4296, "step": 29525 }, { "epoch": 0.6262009289304574, "grad_norm": 0.35735827684402466, "learning_rate": 1.55593412452319e-05, "loss": 0.5045, "step": 29526 }, { "epoch": 0.6262221373883905, "grad_norm": 0.432746946811676, "learning_rate": 1.5559064032038445e-05, "loss": 0.4763, "step": 29527 }, { "epoch": 0.6262433458463235, "grad_norm": 0.3271327316761017, "learning_rate": 1.5558786812662247e-05, "loss": 0.5052, "step": 29528 }, { "epoch": 0.6262645543042565, "grad_norm": 0.3438235819339752, "learning_rate": 1.555850958710362e-05, "loss": 0.5116, "step": 29529 }, { "epoch": 0.6262857627621896, "grad_norm": 0.3617461323738098, "learning_rate": 1.5558232355362858e-05, "loss": 0.4474, "step": 29530 }, { "epoch": 0.6263069712201226, "grad_norm": 0.35108473896980286, "learning_rate": 1.5557955117440284e-05, "loss": 0.5039, "step": 29531 }, { "epoch": 0.6263281796780557, "grad_norm": 0.3470461964607239, "learning_rate": 1.5557677873336202e-05, "loss": 0.4764, "step": 29532 }, { "epoch": 0.6263493881359886, "grad_norm": 0.33136633038520813, "learning_rate": 1.5557400623050915e-05, "loss": 0.459, "step": 29533 }, { "epoch": 0.6263705965939217, "grad_norm": 0.38052213191986084, "learning_rate": 1.5557123366584733e-05, "loss": 0.5229, "step": 29534 }, { "epoch": 0.6263918050518547, "grad_norm": 0.355064332485199, "learning_rate": 1.5556846103937972e-05, "loss": 0.5058, "step": 29535 }, { "epoch": 0.6264130135097877, "grad_norm": 0.5854945778846741, "learning_rate": 1.5556568835110933e-05, "loss": 0.4443, "step": 29536 }, { "epoch": 0.6264342219677207, "grad_norm": 0.4090965688228607, "learning_rate": 1.555629156010393e-05, "loss": 0.4857, "step": 29537 }, { "epoch": 0.6264554304256538, "grad_norm": 0.3559007942676544, "learning_rate": 1.5556014278917263e-05, "loss": 0.505, "step": 29538 }, { "epoch": 0.6264766388835867, "grad_norm": 0.3313710689544678, "learning_rate": 1.5555736991551248e-05, "loss": 0.4293, "step": 29539 }, { "epoch": 0.6264978473415198, "grad_norm": 0.3197566866874695, "learning_rate": 1.555545969800619e-05, "loss": 0.4667, "step": 29540 }, { "epoch": 0.6265190557994528, "grad_norm": 0.4025580883026123, "learning_rate": 1.5555182398282394e-05, "loss": 0.4861, "step": 29541 }, { "epoch": 0.6265402642573858, "grad_norm": 0.33621323108673096, "learning_rate": 1.5554905092380177e-05, "loss": 0.449, "step": 29542 }, { "epoch": 0.6265614727153189, "grad_norm": 0.36570116877555847, "learning_rate": 1.5554627780299842e-05, "loss": 0.4786, "step": 29543 }, { "epoch": 0.6265826811732519, "grad_norm": 0.28403306007385254, "learning_rate": 1.5554350462041695e-05, "loss": 0.4296, "step": 29544 }, { "epoch": 0.626603889631185, "grad_norm": 0.3781110942363739, "learning_rate": 1.5554073137606052e-05, "loss": 0.4633, "step": 29545 }, { "epoch": 0.6266250980891179, "grad_norm": 0.3365641236305237, "learning_rate": 1.5553795806993215e-05, "loss": 0.3833, "step": 29546 }, { "epoch": 0.626646306547051, "grad_norm": 0.4384433925151825, "learning_rate": 1.5553518470203495e-05, "loss": 0.4914, "step": 29547 }, { "epoch": 0.626667515004984, "grad_norm": 0.38309648633003235, "learning_rate": 1.55532411272372e-05, "loss": 0.5451, "step": 29548 }, { "epoch": 0.626688723462917, "grad_norm": 0.3697849214076996, "learning_rate": 1.5552963778094637e-05, "loss": 0.5121, "step": 29549 }, { "epoch": 0.62670993192085, "grad_norm": 0.33897972106933594, "learning_rate": 1.555268642277612e-05, "loss": 0.4902, "step": 29550 }, { "epoch": 0.6267311403787831, "grad_norm": 0.35088324546813965, "learning_rate": 1.555240906128195e-05, "loss": 0.4436, "step": 29551 }, { "epoch": 0.626752348836716, "grad_norm": 0.3025719225406647, "learning_rate": 1.555213169361244e-05, "loss": 0.4185, "step": 29552 }, { "epoch": 0.6267735572946491, "grad_norm": 0.35863617062568665, "learning_rate": 1.5551854319767897e-05, "loss": 0.4739, "step": 29553 }, { "epoch": 0.6267947657525821, "grad_norm": 0.5203545689582825, "learning_rate": 1.5551576939748628e-05, "loss": 0.5463, "step": 29554 }, { "epoch": 0.6268159742105152, "grad_norm": 0.36593741178512573, "learning_rate": 1.555129955355495e-05, "loss": 0.5144, "step": 29555 }, { "epoch": 0.6268371826684481, "grad_norm": 0.3029893636703491, "learning_rate": 1.5551022161187155e-05, "loss": 0.5059, "step": 29556 }, { "epoch": 0.6268583911263812, "grad_norm": 0.35510173439979553, "learning_rate": 1.555074476264557e-05, "loss": 0.49, "step": 29557 }, { "epoch": 0.6268795995843143, "grad_norm": 0.34408873319625854, "learning_rate": 1.5550467357930488e-05, "loss": 0.4725, "step": 29558 }, { "epoch": 0.6269008080422472, "grad_norm": 0.3747311532497406, "learning_rate": 1.555018994704223e-05, "loss": 0.477, "step": 29559 }, { "epoch": 0.6269220165001803, "grad_norm": 0.37939345836639404, "learning_rate": 1.5549912529981096e-05, "loss": 0.511, "step": 29560 }, { "epoch": 0.6269432249581133, "grad_norm": 0.3352031111717224, "learning_rate": 1.55496351067474e-05, "loss": 0.4996, "step": 29561 }, { "epoch": 0.6269644334160464, "grad_norm": 0.3461079001426697, "learning_rate": 1.5549357677341443e-05, "loss": 0.453, "step": 29562 }, { "epoch": 0.6269856418739793, "grad_norm": 0.3998733460903168, "learning_rate": 1.5549080241763544e-05, "loss": 0.4931, "step": 29563 }, { "epoch": 0.6270068503319124, "grad_norm": 0.3836215138435364, "learning_rate": 1.5548802800014004e-05, "loss": 0.495, "step": 29564 }, { "epoch": 0.6270280587898454, "grad_norm": 0.3738687336444855, "learning_rate": 1.5548525352093135e-05, "loss": 0.4138, "step": 29565 }, { "epoch": 0.6270492672477784, "grad_norm": 0.3627094030380249, "learning_rate": 1.554824789800124e-05, "loss": 0.4918, "step": 29566 }, { "epoch": 0.6270704757057114, "grad_norm": 0.3413718044757843, "learning_rate": 1.5547970437738637e-05, "loss": 0.5537, "step": 29567 }, { "epoch": 0.6270916841636445, "grad_norm": 0.321809321641922, "learning_rate": 1.554769297130563e-05, "loss": 0.4928, "step": 29568 }, { "epoch": 0.6271128926215774, "grad_norm": 0.5090464353561401, "learning_rate": 1.554741549870252e-05, "loss": 0.5198, "step": 29569 }, { "epoch": 0.6271341010795105, "grad_norm": 0.34798628091812134, "learning_rate": 1.5547138019929628e-05, "loss": 0.4414, "step": 29570 }, { "epoch": 0.6271553095374436, "grad_norm": 0.3787274956703186, "learning_rate": 1.554686053498726e-05, "loss": 0.5788, "step": 29571 }, { "epoch": 0.6271765179953765, "grad_norm": 0.37350931763648987, "learning_rate": 1.5546583043875715e-05, "loss": 0.4839, "step": 29572 }, { "epoch": 0.6271977264533096, "grad_norm": 0.41838234663009644, "learning_rate": 1.5546305546595312e-05, "loss": 0.5224, "step": 29573 }, { "epoch": 0.6272189349112426, "grad_norm": 0.3448856770992279, "learning_rate": 1.554602804314636e-05, "loss": 0.4694, "step": 29574 }, { "epoch": 0.6272401433691757, "grad_norm": 0.3788534104824066, "learning_rate": 1.5545750533529156e-05, "loss": 0.5191, "step": 29575 }, { "epoch": 0.6272613518271086, "grad_norm": 0.3556595742702484, "learning_rate": 1.554547301774402e-05, "loss": 0.511, "step": 29576 }, { "epoch": 0.6272825602850417, "grad_norm": 0.326183557510376, "learning_rate": 1.554519549579126e-05, "loss": 0.5059, "step": 29577 }, { "epoch": 0.6273037687429747, "grad_norm": 0.3827469050884247, "learning_rate": 1.554491796767118e-05, "loss": 0.446, "step": 29578 }, { "epoch": 0.6273249772009077, "grad_norm": 0.3222355842590332, "learning_rate": 1.554464043338409e-05, "loss": 0.4107, "step": 29579 }, { "epoch": 0.6273461856588407, "grad_norm": 0.35586118698120117, "learning_rate": 1.5544362892930297e-05, "loss": 0.5512, "step": 29580 }, { "epoch": 0.6273673941167738, "grad_norm": 0.3606683015823364, "learning_rate": 1.5544085346310113e-05, "loss": 0.4919, "step": 29581 }, { "epoch": 0.6273886025747067, "grad_norm": 0.34944987297058105, "learning_rate": 1.5543807793523844e-05, "loss": 0.5276, "step": 29582 }, { "epoch": 0.6274098110326398, "grad_norm": 0.34893831610679626, "learning_rate": 1.5543530234571802e-05, "loss": 0.4488, "step": 29583 }, { "epoch": 0.6274310194905729, "grad_norm": 0.39224618673324585, "learning_rate": 1.5543252669454295e-05, "loss": 0.5308, "step": 29584 }, { "epoch": 0.6274522279485059, "grad_norm": 0.39144232869148254, "learning_rate": 1.554297509817163e-05, "loss": 0.588, "step": 29585 }, { "epoch": 0.6274734364064389, "grad_norm": 0.3641055226325989, "learning_rate": 1.5542697520724114e-05, "loss": 0.5169, "step": 29586 }, { "epoch": 0.6274946448643719, "grad_norm": 0.3783322870731354, "learning_rate": 1.554241993711206e-05, "loss": 0.4924, "step": 29587 }, { "epoch": 0.627515853322305, "grad_norm": 0.35791370272636414, "learning_rate": 1.5542142347335773e-05, "loss": 0.525, "step": 29588 }, { "epoch": 0.6275370617802379, "grad_norm": 0.4587796628475189, "learning_rate": 1.5541864751395564e-05, "loss": 0.4849, "step": 29589 }, { "epoch": 0.627558270238171, "grad_norm": 0.3068530857563019, "learning_rate": 1.5541587149291744e-05, "loss": 0.3711, "step": 29590 }, { "epoch": 0.627579478696104, "grad_norm": 0.45684337615966797, "learning_rate": 1.5541309541024617e-05, "loss": 0.4491, "step": 29591 }, { "epoch": 0.627600687154037, "grad_norm": 0.31193387508392334, "learning_rate": 1.5541031926594495e-05, "loss": 0.4225, "step": 29592 }, { "epoch": 0.62762189561197, "grad_norm": 0.38207343220710754, "learning_rate": 1.5540754306001683e-05, "loss": 0.5226, "step": 29593 }, { "epoch": 0.6276431040699031, "grad_norm": 0.37851861119270325, "learning_rate": 1.5540476679246494e-05, "loss": 0.4974, "step": 29594 }, { "epoch": 0.627664312527836, "grad_norm": 0.3979412019252777, "learning_rate": 1.554019904632923e-05, "loss": 0.5774, "step": 29595 }, { "epoch": 0.6276855209857691, "grad_norm": 1.1387883424758911, "learning_rate": 1.5539921407250213e-05, "loss": 0.4547, "step": 29596 }, { "epoch": 0.6277067294437021, "grad_norm": 0.3996841013431549, "learning_rate": 1.553964376200974e-05, "loss": 0.5133, "step": 29597 }, { "epoch": 0.6277279379016352, "grad_norm": 0.3769654929637909, "learning_rate": 1.553936611060812e-05, "loss": 0.5118, "step": 29598 }, { "epoch": 0.6277491463595682, "grad_norm": 0.37451887130737305, "learning_rate": 1.5539088453045667e-05, "loss": 0.4594, "step": 29599 }, { "epoch": 0.6277703548175012, "grad_norm": 0.341235488653183, "learning_rate": 1.553881078932269e-05, "loss": 0.5068, "step": 29600 }, { "epoch": 0.6277915632754343, "grad_norm": 0.37356674671173096, "learning_rate": 1.5538533119439493e-05, "loss": 0.6129, "step": 29601 }, { "epoch": 0.6278127717333672, "grad_norm": 0.36514410376548767, "learning_rate": 1.5538255443396388e-05, "loss": 0.5078, "step": 29602 }, { "epoch": 0.6278339801913003, "grad_norm": 0.35975033044815063, "learning_rate": 1.5537977761193686e-05, "loss": 0.4582, "step": 29603 }, { "epoch": 0.6278551886492333, "grad_norm": 0.36713555455207825, "learning_rate": 1.553770007283169e-05, "loss": 0.5233, "step": 29604 }, { "epoch": 0.6278763971071664, "grad_norm": 0.39794036746025085, "learning_rate": 1.5537422378310716e-05, "loss": 0.5673, "step": 29605 }, { "epoch": 0.6278976055650993, "grad_norm": 0.42465072870254517, "learning_rate": 1.5537144677631065e-05, "loss": 0.488, "step": 29606 }, { "epoch": 0.6279188140230324, "grad_norm": 0.3730938136577606, "learning_rate": 1.553686697079305e-05, "loss": 0.4515, "step": 29607 }, { "epoch": 0.6279400224809654, "grad_norm": 0.36840930581092834, "learning_rate": 1.5536589257796984e-05, "loss": 0.5334, "step": 29608 }, { "epoch": 0.6279612309388984, "grad_norm": 0.3996783196926117, "learning_rate": 1.5536311538643168e-05, "loss": 0.489, "step": 29609 }, { "epoch": 0.6279824393968314, "grad_norm": 0.3509500026702881, "learning_rate": 1.5536033813331914e-05, "loss": 0.5795, "step": 29610 }, { "epoch": 0.6280036478547645, "grad_norm": 0.3213937282562256, "learning_rate": 1.5535756081863533e-05, "loss": 0.4618, "step": 29611 }, { "epoch": 0.6280248563126976, "grad_norm": 0.3025398850440979, "learning_rate": 1.5535478344238334e-05, "loss": 0.4601, "step": 29612 }, { "epoch": 0.6280460647706305, "grad_norm": 0.336940735578537, "learning_rate": 1.5535200600456618e-05, "loss": 0.5104, "step": 29613 }, { "epoch": 0.6280672732285636, "grad_norm": 0.35027921199798584, "learning_rate": 1.5534922850518704e-05, "loss": 0.4925, "step": 29614 }, { "epoch": 0.6280884816864966, "grad_norm": 0.32572242617607117, "learning_rate": 1.5534645094424897e-05, "loss": 0.501, "step": 29615 }, { "epoch": 0.6281096901444296, "grad_norm": 0.3761063516139984, "learning_rate": 1.5534367332175505e-05, "loss": 0.5408, "step": 29616 }, { "epoch": 0.6281308986023626, "grad_norm": 0.40337225794792175, "learning_rate": 1.553408956377084e-05, "loss": 0.5012, "step": 29617 }, { "epoch": 0.6281521070602957, "grad_norm": 0.39462408423423767, "learning_rate": 1.5533811789211206e-05, "loss": 0.5399, "step": 29618 }, { "epoch": 0.6281733155182286, "grad_norm": 0.5015866756439209, "learning_rate": 1.5533534008496912e-05, "loss": 0.4955, "step": 29619 }, { "epoch": 0.6281945239761617, "grad_norm": 0.39597049355506897, "learning_rate": 1.5533256221628273e-05, "loss": 0.4744, "step": 29620 }, { "epoch": 0.6282157324340947, "grad_norm": 0.32940539717674255, "learning_rate": 1.5532978428605595e-05, "loss": 0.469, "step": 29621 }, { "epoch": 0.6282369408920278, "grad_norm": 0.3718729317188263, "learning_rate": 1.5532700629429188e-05, "loss": 0.4715, "step": 29622 }, { "epoch": 0.6282581493499607, "grad_norm": 0.3771970570087433, "learning_rate": 1.5532422824099355e-05, "loss": 0.5824, "step": 29623 }, { "epoch": 0.6282793578078938, "grad_norm": 0.3658186197280884, "learning_rate": 1.5532145012616413e-05, "loss": 0.4648, "step": 29624 }, { "epoch": 0.6283005662658269, "grad_norm": 0.37915217876434326, "learning_rate": 1.5531867194980665e-05, "loss": 0.4269, "step": 29625 }, { "epoch": 0.6283217747237598, "grad_norm": 0.41732490062713623, "learning_rate": 1.553158937119242e-05, "loss": 0.5135, "step": 29626 }, { "epoch": 0.6283429831816929, "grad_norm": 0.35518237948417664, "learning_rate": 1.5531311541251995e-05, "loss": 0.4722, "step": 29627 }, { "epoch": 0.6283641916396259, "grad_norm": 0.3624209463596344, "learning_rate": 1.5531033705159693e-05, "loss": 0.5071, "step": 29628 }, { "epoch": 0.6283854000975589, "grad_norm": 0.4100864827632904, "learning_rate": 1.5530755862915822e-05, "loss": 0.4804, "step": 29629 }, { "epoch": 0.6284066085554919, "grad_norm": 0.35296687483787537, "learning_rate": 1.5530478014520692e-05, "loss": 0.435, "step": 29630 }, { "epoch": 0.628427817013425, "grad_norm": 0.31481653451919556, "learning_rate": 1.553020015997461e-05, "loss": 0.4817, "step": 29631 }, { "epoch": 0.628449025471358, "grad_norm": 0.3356287479400635, "learning_rate": 1.552992229927789e-05, "loss": 0.4312, "step": 29632 }, { "epoch": 0.628470233929291, "grad_norm": 0.3859284520149231, "learning_rate": 1.5529644432430836e-05, "loss": 0.5531, "step": 29633 }, { "epoch": 0.628491442387224, "grad_norm": 0.4135442078113556, "learning_rate": 1.5529366559433763e-05, "loss": 0.5078, "step": 29634 }, { "epoch": 0.6285126508451571, "grad_norm": 0.33052918314933777, "learning_rate": 1.5529088680286974e-05, "loss": 0.4607, "step": 29635 }, { "epoch": 0.62853385930309, "grad_norm": 0.34476983547210693, "learning_rate": 1.5528810794990782e-05, "loss": 0.4573, "step": 29636 }, { "epoch": 0.6285550677610231, "grad_norm": 0.33688002824783325, "learning_rate": 1.5528532903545498e-05, "loss": 0.4806, "step": 29637 }, { "epoch": 0.6285762762189561, "grad_norm": 0.3948099911212921, "learning_rate": 1.552825500595142e-05, "loss": 0.4669, "step": 29638 }, { "epoch": 0.6285974846768891, "grad_norm": 0.3561570346355438, "learning_rate": 1.5527977102208872e-05, "loss": 0.4801, "step": 29639 }, { "epoch": 0.6286186931348222, "grad_norm": 0.35050562024116516, "learning_rate": 1.552769919231815e-05, "loss": 0.5392, "step": 29640 }, { "epoch": 0.6286399015927552, "grad_norm": 0.34399905800819397, "learning_rate": 1.552742127627957e-05, "loss": 0.4498, "step": 29641 }, { "epoch": 0.6286611100506883, "grad_norm": 0.4012329578399658, "learning_rate": 1.5527143354093443e-05, "loss": 0.4981, "step": 29642 }, { "epoch": 0.6286823185086212, "grad_norm": 0.4471735954284668, "learning_rate": 1.5526865425760077e-05, "loss": 0.4779, "step": 29643 }, { "epoch": 0.6287035269665543, "grad_norm": 0.32905542850494385, "learning_rate": 1.5526587491279776e-05, "loss": 0.4572, "step": 29644 }, { "epoch": 0.6287247354244873, "grad_norm": 0.34057846665382385, "learning_rate": 1.5526309550652848e-05, "loss": 0.4798, "step": 29645 }, { "epoch": 0.6287459438824203, "grad_norm": 0.35836005210876465, "learning_rate": 1.5526031603879615e-05, "loss": 0.4256, "step": 29646 }, { "epoch": 0.6287671523403533, "grad_norm": 0.3512546420097351, "learning_rate": 1.5525753650960373e-05, "loss": 0.4429, "step": 29647 }, { "epoch": 0.6287883607982864, "grad_norm": 0.3230300843715668, "learning_rate": 1.5525475691895438e-05, "loss": 0.5461, "step": 29648 }, { "epoch": 0.6288095692562193, "grad_norm": 0.34889474511146545, "learning_rate": 1.5525197726685114e-05, "loss": 0.5318, "step": 29649 }, { "epoch": 0.6288307777141524, "grad_norm": 0.3983982801437378, "learning_rate": 1.5524919755329718e-05, "loss": 0.4969, "step": 29650 }, { "epoch": 0.6288519861720854, "grad_norm": 0.358629435300827, "learning_rate": 1.5524641777829548e-05, "loss": 0.5636, "step": 29651 }, { "epoch": 0.6288731946300185, "grad_norm": 0.3162368834018707, "learning_rate": 1.5524363794184924e-05, "loss": 0.4431, "step": 29652 }, { "epoch": 0.6288944030879515, "grad_norm": 0.3528486490249634, "learning_rate": 1.5524085804396146e-05, "loss": 0.5616, "step": 29653 }, { "epoch": 0.6289156115458845, "grad_norm": 0.328916072845459, "learning_rate": 1.552380780846353e-05, "loss": 0.5191, "step": 29654 }, { "epoch": 0.6289368200038176, "grad_norm": 0.45289042592048645, "learning_rate": 1.5523529806387384e-05, "loss": 0.5183, "step": 29655 }, { "epoch": 0.6289580284617505, "grad_norm": 0.3234899640083313, "learning_rate": 1.5523251798168017e-05, "loss": 0.4978, "step": 29656 }, { "epoch": 0.6289792369196836, "grad_norm": 0.37470170855522156, "learning_rate": 1.5522973783805736e-05, "loss": 0.5178, "step": 29657 }, { "epoch": 0.6290004453776166, "grad_norm": 0.4735385477542877, "learning_rate": 1.552269576330085e-05, "loss": 0.4639, "step": 29658 }, { "epoch": 0.6290216538355496, "grad_norm": 0.3411805331707001, "learning_rate": 1.5522417736653674e-05, "loss": 0.4769, "step": 29659 }, { "epoch": 0.6290428622934826, "grad_norm": 0.3373594582080841, "learning_rate": 1.552213970386451e-05, "loss": 0.562, "step": 29660 }, { "epoch": 0.6290640707514157, "grad_norm": 0.3651842772960663, "learning_rate": 1.5521861664933672e-05, "loss": 0.4487, "step": 29661 }, { "epoch": 0.6290852792093486, "grad_norm": 0.38100728392601013, "learning_rate": 1.5521583619861466e-05, "loss": 0.5097, "step": 29662 }, { "epoch": 0.6291064876672817, "grad_norm": 0.4746304750442505, "learning_rate": 1.55213055686482e-05, "loss": 0.4897, "step": 29663 }, { "epoch": 0.6291276961252147, "grad_norm": 0.35103926062583923, "learning_rate": 1.552102751129419e-05, "loss": 0.4079, "step": 29664 }, { "epoch": 0.6291489045831478, "grad_norm": 0.34184134006500244, "learning_rate": 1.5520749447799735e-05, "loss": 0.5526, "step": 29665 }, { "epoch": 0.6291701130410808, "grad_norm": 0.38157182931900024, "learning_rate": 1.552047137816516e-05, "loss": 0.4796, "step": 29666 }, { "epoch": 0.6291913214990138, "grad_norm": 0.33599942922592163, "learning_rate": 1.5520193302390757e-05, "loss": 0.5141, "step": 29667 }, { "epoch": 0.6292125299569469, "grad_norm": 0.4060487747192383, "learning_rate": 1.5519915220476847e-05, "loss": 0.4199, "step": 29668 }, { "epoch": 0.6292337384148798, "grad_norm": 0.38731077313423157, "learning_rate": 1.5519637132423736e-05, "loss": 0.5349, "step": 29669 }, { "epoch": 0.6292549468728129, "grad_norm": 0.33408698439598083, "learning_rate": 1.551935903823173e-05, "loss": 0.5193, "step": 29670 }, { "epoch": 0.6292761553307459, "grad_norm": 0.4028518497943878, "learning_rate": 1.5519080937901138e-05, "loss": 0.5671, "step": 29671 }, { "epoch": 0.629297363788679, "grad_norm": 0.3610081076622009, "learning_rate": 1.5518802831432278e-05, "loss": 0.5539, "step": 29672 }, { "epoch": 0.6293185722466119, "grad_norm": 0.3663453459739685, "learning_rate": 1.551852471882545e-05, "loss": 0.4878, "step": 29673 }, { "epoch": 0.629339780704545, "grad_norm": 0.40227046608924866, "learning_rate": 1.5518246600080965e-05, "loss": 0.4985, "step": 29674 }, { "epoch": 0.629360989162478, "grad_norm": 0.3785035312175751, "learning_rate": 1.551796847519914e-05, "loss": 0.5512, "step": 29675 }, { "epoch": 0.629382197620411, "grad_norm": 0.35917040705680847, "learning_rate": 1.5517690344180273e-05, "loss": 0.5391, "step": 29676 }, { "epoch": 0.629403406078344, "grad_norm": 0.36204615235328674, "learning_rate": 1.5517412207024682e-05, "loss": 0.5002, "step": 29677 }, { "epoch": 0.6294246145362771, "grad_norm": 0.37236982583999634, "learning_rate": 1.551713406373267e-05, "loss": 0.4865, "step": 29678 }, { "epoch": 0.62944582299421, "grad_norm": 0.30663350224494934, "learning_rate": 1.5516855914304555e-05, "loss": 0.4664, "step": 29679 }, { "epoch": 0.6294670314521431, "grad_norm": 0.37091708183288574, "learning_rate": 1.5516577758740635e-05, "loss": 0.5473, "step": 29680 }, { "epoch": 0.6294882399100762, "grad_norm": 0.4690380096435547, "learning_rate": 1.5516299597041225e-05, "loss": 0.5375, "step": 29681 }, { "epoch": 0.6295094483680092, "grad_norm": 0.32936105132102966, "learning_rate": 1.551602142920664e-05, "loss": 0.4812, "step": 29682 }, { "epoch": 0.6295306568259422, "grad_norm": 0.3941541016101837, "learning_rate": 1.551574325523718e-05, "loss": 0.4416, "step": 29683 }, { "epoch": 0.6295518652838752, "grad_norm": 0.4392549693584442, "learning_rate": 1.551546507513316e-05, "loss": 0.434, "step": 29684 }, { "epoch": 0.6295730737418083, "grad_norm": 0.350446879863739, "learning_rate": 1.5515186888894883e-05, "loss": 0.4522, "step": 29685 }, { "epoch": 0.6295942821997412, "grad_norm": 0.3769301474094391, "learning_rate": 1.5514908696522668e-05, "loss": 0.51, "step": 29686 }, { "epoch": 0.6296154906576743, "grad_norm": 0.34849607944488525, "learning_rate": 1.551463049801682e-05, "loss": 0.4757, "step": 29687 }, { "epoch": 0.6296366991156073, "grad_norm": 0.3732675313949585, "learning_rate": 1.5514352293377644e-05, "loss": 0.468, "step": 29688 }, { "epoch": 0.6296579075735403, "grad_norm": 0.3601844906806946, "learning_rate": 1.551407408260546e-05, "loss": 0.4956, "step": 29689 }, { "epoch": 0.6296791160314733, "grad_norm": 0.3133958876132965, "learning_rate": 1.551379586570056e-05, "loss": 0.4895, "step": 29690 }, { "epoch": 0.6297003244894064, "grad_norm": 0.37695181369781494, "learning_rate": 1.551351764266327e-05, "loss": 0.5954, "step": 29691 }, { "epoch": 0.6297215329473393, "grad_norm": 0.3885459005832672, "learning_rate": 1.5513239413493898e-05, "loss": 0.5463, "step": 29692 }, { "epoch": 0.6297427414052724, "grad_norm": 0.36813247203826904, "learning_rate": 1.5512961178192742e-05, "loss": 0.4567, "step": 29693 }, { "epoch": 0.6297639498632055, "grad_norm": 0.4151778817176819, "learning_rate": 1.5512682936760122e-05, "loss": 0.5252, "step": 29694 }, { "epoch": 0.6297851583211385, "grad_norm": 0.3512434959411621, "learning_rate": 1.551240468919634e-05, "loss": 0.5325, "step": 29695 }, { "epoch": 0.6298063667790715, "grad_norm": 0.3425869047641754, "learning_rate": 1.5512126435501717e-05, "loss": 0.4606, "step": 29696 }, { "epoch": 0.6298275752370045, "grad_norm": 0.3888632357120514, "learning_rate": 1.551184817567655e-05, "loss": 0.5263, "step": 29697 }, { "epoch": 0.6298487836949376, "grad_norm": 0.3568747937679291, "learning_rate": 1.5511569909721152e-05, "loss": 0.5216, "step": 29698 }, { "epoch": 0.6298699921528705, "grad_norm": 0.37201568484306335, "learning_rate": 1.5511291637635836e-05, "loss": 0.4767, "step": 29699 }, { "epoch": 0.6298912006108036, "grad_norm": 0.3409922420978546, "learning_rate": 1.5511013359420908e-05, "loss": 0.5005, "step": 29700 }, { "epoch": 0.6299124090687366, "grad_norm": 0.38184332847595215, "learning_rate": 1.551073507507668e-05, "loss": 0.4846, "step": 29701 }, { "epoch": 0.6299336175266697, "grad_norm": 0.35440775752067566, "learning_rate": 1.551045678460346e-05, "loss": 0.4717, "step": 29702 }, { "epoch": 0.6299548259846026, "grad_norm": 0.5683448314666748, "learning_rate": 1.551017848800156e-05, "loss": 0.4527, "step": 29703 }, { "epoch": 0.6299760344425357, "grad_norm": 0.34300801157951355, "learning_rate": 1.550990018527128e-05, "loss": 0.4519, "step": 29704 }, { "epoch": 0.6299972429004687, "grad_norm": 0.3342759907245636, "learning_rate": 1.5509621876412945e-05, "loss": 0.5039, "step": 29705 }, { "epoch": 0.6300184513584017, "grad_norm": 0.34588953852653503, "learning_rate": 1.5509343561426853e-05, "loss": 0.4976, "step": 29706 }, { "epoch": 0.6300396598163348, "grad_norm": 0.34568122029304504, "learning_rate": 1.5509065240313318e-05, "loss": 0.5301, "step": 29707 }, { "epoch": 0.6300608682742678, "grad_norm": 0.3891536593437195, "learning_rate": 1.5508786913072648e-05, "loss": 0.5365, "step": 29708 }, { "epoch": 0.6300820767322008, "grad_norm": 0.38250312209129333, "learning_rate": 1.5508508579705152e-05, "loss": 0.4691, "step": 29709 }, { "epoch": 0.6301032851901338, "grad_norm": 0.3519604504108429, "learning_rate": 1.550823024021114e-05, "loss": 0.5171, "step": 29710 }, { "epoch": 0.6301244936480669, "grad_norm": 0.35453853011131287, "learning_rate": 1.5507951894590924e-05, "loss": 0.478, "step": 29711 }, { "epoch": 0.6301457021059998, "grad_norm": 0.3209210932254791, "learning_rate": 1.5507673542844812e-05, "loss": 0.3908, "step": 29712 }, { "epoch": 0.6301669105639329, "grad_norm": 0.34857723116874695, "learning_rate": 1.5507395184973113e-05, "loss": 0.4665, "step": 29713 }, { "epoch": 0.6301881190218659, "grad_norm": 0.3540094196796417, "learning_rate": 1.5507116820976137e-05, "loss": 0.5299, "step": 29714 }, { "epoch": 0.630209327479799, "grad_norm": 0.36242610216140747, "learning_rate": 1.5506838450854194e-05, "loss": 0.5012, "step": 29715 }, { "epoch": 0.6302305359377319, "grad_norm": 0.34924882650375366, "learning_rate": 1.550656007460759e-05, "loss": 0.4131, "step": 29716 }, { "epoch": 0.630251744395665, "grad_norm": 0.3956364095211029, "learning_rate": 1.550628169223664e-05, "loss": 0.476, "step": 29717 }, { "epoch": 0.630272952853598, "grad_norm": 0.33906611800193787, "learning_rate": 1.5506003303741653e-05, "loss": 0.5205, "step": 29718 }, { "epoch": 0.630294161311531, "grad_norm": 0.3351990580558777, "learning_rate": 1.5505724909122934e-05, "loss": 0.5191, "step": 29719 }, { "epoch": 0.6303153697694641, "grad_norm": 0.37815889716148376, "learning_rate": 1.5505446508380798e-05, "loss": 0.4818, "step": 29720 }, { "epoch": 0.6303365782273971, "grad_norm": 0.33209672570228577, "learning_rate": 1.5505168101515555e-05, "loss": 0.4812, "step": 29721 }, { "epoch": 0.6303577866853302, "grad_norm": 0.35996732115745544, "learning_rate": 1.5504889688527504e-05, "loss": 0.5229, "step": 29722 }, { "epoch": 0.6303789951432631, "grad_norm": 0.38118624687194824, "learning_rate": 1.5504611269416966e-05, "loss": 0.5334, "step": 29723 }, { "epoch": 0.6304002036011962, "grad_norm": 0.3904325067996979, "learning_rate": 1.5504332844184248e-05, "loss": 0.5668, "step": 29724 }, { "epoch": 0.6304214120591292, "grad_norm": 0.34744083881378174, "learning_rate": 1.550405441282966e-05, "loss": 0.5029, "step": 29725 }, { "epoch": 0.6304426205170622, "grad_norm": 0.35577133297920227, "learning_rate": 1.5503775975353506e-05, "loss": 0.4537, "step": 29726 }, { "epoch": 0.6304638289749952, "grad_norm": 0.34135276079177856, "learning_rate": 1.5503497531756107e-05, "loss": 0.4451, "step": 29727 }, { "epoch": 0.6304850374329283, "grad_norm": 0.37782806158065796, "learning_rate": 1.5503219082037763e-05, "loss": 0.5562, "step": 29728 }, { "epoch": 0.6305062458908612, "grad_norm": 0.34498992562294006, "learning_rate": 1.5502940626198783e-05, "loss": 0.4802, "step": 29729 }, { "epoch": 0.6305274543487943, "grad_norm": 0.3688008189201355, "learning_rate": 1.550266216423948e-05, "loss": 0.4907, "step": 29730 }, { "epoch": 0.6305486628067273, "grad_norm": 0.3479360044002533, "learning_rate": 1.5502383696160166e-05, "loss": 0.4983, "step": 29731 }, { "epoch": 0.6305698712646604, "grad_norm": 0.31056514382362366, "learning_rate": 1.550210522196115e-05, "loss": 0.4713, "step": 29732 }, { "epoch": 0.6305910797225933, "grad_norm": 0.33780431747436523, "learning_rate": 1.5501826741642742e-05, "loss": 0.5258, "step": 29733 }, { "epoch": 0.6306122881805264, "grad_norm": 0.3159905970096588, "learning_rate": 1.550154825520525e-05, "loss": 0.4095, "step": 29734 }, { "epoch": 0.6306334966384595, "grad_norm": 0.3296017646789551, "learning_rate": 1.550126976264898e-05, "loss": 0.4857, "step": 29735 }, { "epoch": 0.6306547050963924, "grad_norm": 0.3494075536727905, "learning_rate": 1.5500991263974245e-05, "loss": 0.5567, "step": 29736 }, { "epoch": 0.6306759135543255, "grad_norm": 0.3448579013347626, "learning_rate": 1.550071275918136e-05, "loss": 0.4387, "step": 29737 }, { "epoch": 0.6306971220122585, "grad_norm": 1.3316328525543213, "learning_rate": 1.5500434248270627e-05, "loss": 0.5643, "step": 29738 }, { "epoch": 0.6307183304701915, "grad_norm": 0.3971903324127197, "learning_rate": 1.550015573124236e-05, "loss": 0.503, "step": 29739 }, { "epoch": 0.6307395389281245, "grad_norm": 0.3329769968986511, "learning_rate": 1.5499877208096867e-05, "loss": 0.4343, "step": 29740 }, { "epoch": 0.6307607473860576, "grad_norm": 0.30447566509246826, "learning_rate": 1.549959867883446e-05, "loss": 0.4291, "step": 29741 }, { "epoch": 0.6307819558439905, "grad_norm": 0.3308720588684082, "learning_rate": 1.5499320143455442e-05, "loss": 0.3709, "step": 29742 }, { "epoch": 0.6308031643019236, "grad_norm": 0.31056177616119385, "learning_rate": 1.5499041601960133e-05, "loss": 0.4719, "step": 29743 }, { "epoch": 0.6308243727598566, "grad_norm": 0.30662137269973755, "learning_rate": 1.5498763054348836e-05, "loss": 0.4189, "step": 29744 }, { "epoch": 0.6308455812177897, "grad_norm": 0.3149458169937134, "learning_rate": 1.5498484500621863e-05, "loss": 0.4928, "step": 29745 }, { "epoch": 0.6308667896757226, "grad_norm": 0.37252748012542725, "learning_rate": 1.5498205940779522e-05, "loss": 0.5279, "step": 29746 }, { "epoch": 0.6308879981336557, "grad_norm": 0.33603668212890625, "learning_rate": 1.5497927374822125e-05, "loss": 0.4844, "step": 29747 }, { "epoch": 0.6309092065915888, "grad_norm": 0.3893855810165405, "learning_rate": 1.549764880274998e-05, "loss": 0.612, "step": 29748 }, { "epoch": 0.6309304150495217, "grad_norm": 0.35703909397125244, "learning_rate": 1.54973702245634e-05, "loss": 0.5572, "step": 29749 }, { "epoch": 0.6309516235074548, "grad_norm": 0.3487100899219513, "learning_rate": 1.549709164026269e-05, "loss": 0.5019, "step": 29750 }, { "epoch": 0.6309728319653878, "grad_norm": 0.35599079728126526, "learning_rate": 1.5496813049848166e-05, "loss": 0.5168, "step": 29751 }, { "epoch": 0.6309940404233209, "grad_norm": 0.3527721166610718, "learning_rate": 1.549653445332013e-05, "loss": 0.5137, "step": 29752 }, { "epoch": 0.6310152488812538, "grad_norm": 0.38168662786483765, "learning_rate": 1.5496255850678902e-05, "loss": 0.4797, "step": 29753 }, { "epoch": 0.6310364573391869, "grad_norm": 0.39851242303848267, "learning_rate": 1.549597724192478e-05, "loss": 0.4879, "step": 29754 }, { "epoch": 0.6310576657971199, "grad_norm": 0.3286161720752716, "learning_rate": 1.5495698627058084e-05, "loss": 0.5018, "step": 29755 }, { "epoch": 0.6310788742550529, "grad_norm": 0.40485191345214844, "learning_rate": 1.5495420006079114e-05, "loss": 0.4891, "step": 29756 }, { "epoch": 0.6311000827129859, "grad_norm": 0.36322271823883057, "learning_rate": 1.549514137898819e-05, "loss": 0.5099, "step": 29757 }, { "epoch": 0.631121291170919, "grad_norm": 0.39094114303588867, "learning_rate": 1.5494862745785617e-05, "loss": 0.4923, "step": 29758 }, { "epoch": 0.6311424996288519, "grad_norm": 0.3637421727180481, "learning_rate": 1.5494584106471705e-05, "loss": 0.4669, "step": 29759 }, { "epoch": 0.631163708086785, "grad_norm": 0.3367217779159546, "learning_rate": 1.5494305461046767e-05, "loss": 0.5665, "step": 29760 }, { "epoch": 0.6311849165447181, "grad_norm": 0.4420609176158905, "learning_rate": 1.5494026809511104e-05, "loss": 0.421, "step": 29761 }, { "epoch": 0.631206125002651, "grad_norm": 0.3723449110984802, "learning_rate": 1.5493748151865037e-05, "loss": 0.4908, "step": 29762 }, { "epoch": 0.6312273334605841, "grad_norm": 0.34090033173561096, "learning_rate": 1.549346948810887e-05, "loss": 0.5599, "step": 29763 }, { "epoch": 0.6312485419185171, "grad_norm": 0.339211642742157, "learning_rate": 1.5493190818242913e-05, "loss": 0.3995, "step": 29764 }, { "epoch": 0.6312697503764502, "grad_norm": 0.3394785523414612, "learning_rate": 1.549291214226748e-05, "loss": 0.4642, "step": 29765 }, { "epoch": 0.6312909588343831, "grad_norm": 0.4166219234466553, "learning_rate": 1.5492633460182875e-05, "loss": 0.5666, "step": 29766 }, { "epoch": 0.6313121672923162, "grad_norm": 0.7367808222770691, "learning_rate": 1.5492354771989408e-05, "loss": 0.5755, "step": 29767 }, { "epoch": 0.6313333757502492, "grad_norm": 0.31405559182167053, "learning_rate": 1.5492076077687393e-05, "loss": 0.4483, "step": 29768 }, { "epoch": 0.6313545842081822, "grad_norm": 0.3597918450832367, "learning_rate": 1.5491797377277143e-05, "loss": 0.5189, "step": 29769 }, { "epoch": 0.6313757926661152, "grad_norm": 0.36911314725875854, "learning_rate": 1.5491518670758962e-05, "loss": 0.5282, "step": 29770 }, { "epoch": 0.6313970011240483, "grad_norm": 0.36070534586906433, "learning_rate": 1.549123995813316e-05, "loss": 0.4997, "step": 29771 }, { "epoch": 0.6314182095819812, "grad_norm": 0.35871797800064087, "learning_rate": 1.549096123940005e-05, "loss": 0.4984, "step": 29772 }, { "epoch": 0.6314394180399143, "grad_norm": 0.41449031233787537, "learning_rate": 1.5490682514559943e-05, "loss": 0.481, "step": 29773 }, { "epoch": 0.6314606264978473, "grad_norm": 0.3442358076572418, "learning_rate": 1.5490403783613142e-05, "loss": 0.4841, "step": 29774 }, { "epoch": 0.6314818349557804, "grad_norm": 0.3327707052230835, "learning_rate": 1.5490125046559962e-05, "loss": 0.506, "step": 29775 }, { "epoch": 0.6315030434137134, "grad_norm": 0.4075166583061218, "learning_rate": 1.5489846303400718e-05, "loss": 0.5076, "step": 29776 }, { "epoch": 0.6315242518716464, "grad_norm": 0.5446774959564209, "learning_rate": 1.5489567554135708e-05, "loss": 0.4765, "step": 29777 }, { "epoch": 0.6315454603295795, "grad_norm": 0.39460569620132446, "learning_rate": 1.548928879876525e-05, "loss": 0.5346, "step": 29778 }, { "epoch": 0.6315666687875124, "grad_norm": 0.3688207268714905, "learning_rate": 1.5489010037289654e-05, "loss": 0.4915, "step": 29779 }, { "epoch": 0.6315878772454455, "grad_norm": 0.32887962460517883, "learning_rate": 1.548873126970923e-05, "loss": 0.4645, "step": 29780 }, { "epoch": 0.6316090857033785, "grad_norm": 0.4274810552597046, "learning_rate": 1.5488452496024287e-05, "loss": 0.4784, "step": 29781 }, { "epoch": 0.6316302941613116, "grad_norm": 0.37767094373703003, "learning_rate": 1.548817371623513e-05, "loss": 0.5424, "step": 29782 }, { "epoch": 0.6316515026192445, "grad_norm": 0.3710896670818329, "learning_rate": 1.548789493034208e-05, "loss": 0.5237, "step": 29783 }, { "epoch": 0.6316727110771776, "grad_norm": 0.3529893159866333, "learning_rate": 1.5487616138345438e-05, "loss": 0.503, "step": 29784 }, { "epoch": 0.6316939195351106, "grad_norm": 0.38135093450546265, "learning_rate": 1.548733734024552e-05, "loss": 0.5131, "step": 29785 }, { "epoch": 0.6317151279930436, "grad_norm": 0.38057318329811096, "learning_rate": 1.5487058536042627e-05, "loss": 0.5083, "step": 29786 }, { "epoch": 0.6317363364509766, "grad_norm": 0.3565557897090912, "learning_rate": 1.548677972573708e-05, "loss": 0.5111, "step": 29787 }, { "epoch": 0.6317575449089097, "grad_norm": 0.4349072277545929, "learning_rate": 1.548650090932918e-05, "loss": 0.4841, "step": 29788 }, { "epoch": 0.6317787533668427, "grad_norm": 0.3781701624393463, "learning_rate": 1.5486222086819245e-05, "loss": 0.5178, "step": 29789 }, { "epoch": 0.6317999618247757, "grad_norm": 0.3485415577888489, "learning_rate": 1.548594325820758e-05, "loss": 0.4875, "step": 29790 }, { "epoch": 0.6318211702827088, "grad_norm": 0.347474604845047, "learning_rate": 1.5485664423494496e-05, "loss": 0.5117, "step": 29791 }, { "epoch": 0.6318423787406418, "grad_norm": 0.36994877457618713, "learning_rate": 1.548538558268031e-05, "loss": 0.476, "step": 29792 }, { "epoch": 0.6318635871985748, "grad_norm": 0.34426915645599365, "learning_rate": 1.5485106735765315e-05, "loss": 0.5895, "step": 29793 }, { "epoch": 0.6318847956565078, "grad_norm": 0.3314562737941742, "learning_rate": 1.548482788274984e-05, "loss": 0.473, "step": 29794 }, { "epoch": 0.6319060041144409, "grad_norm": 0.36087924242019653, "learning_rate": 1.5484549023634182e-05, "loss": 0.554, "step": 29795 }, { "epoch": 0.6319272125723738, "grad_norm": 0.388319194316864, "learning_rate": 1.548427015841866e-05, "loss": 0.5305, "step": 29796 }, { "epoch": 0.6319484210303069, "grad_norm": 0.35559824109077454, "learning_rate": 1.5483991287103577e-05, "loss": 0.4865, "step": 29797 }, { "epoch": 0.6319696294882399, "grad_norm": 0.33938366174697876, "learning_rate": 1.5483712409689252e-05, "loss": 0.427, "step": 29798 }, { "epoch": 0.631990837946173, "grad_norm": 0.38909009099006653, "learning_rate": 1.5483433526175984e-05, "loss": 0.574, "step": 29799 }, { "epoch": 0.6320120464041059, "grad_norm": 0.5556790828704834, "learning_rate": 1.5483154636564093e-05, "loss": 0.5116, "step": 29800 }, { "epoch": 0.632033254862039, "grad_norm": 0.4619143605232239, "learning_rate": 1.5482875740853883e-05, "loss": 0.4641, "step": 29801 }, { "epoch": 0.6320544633199721, "grad_norm": 0.35654348134994507, "learning_rate": 1.5482596839045666e-05, "loss": 0.4529, "step": 29802 }, { "epoch": 0.632075671777905, "grad_norm": 0.36999234557151794, "learning_rate": 1.5482317931139754e-05, "loss": 0.5199, "step": 29803 }, { "epoch": 0.6320968802358381, "grad_norm": 0.45045387744903564, "learning_rate": 1.5482039017136454e-05, "loss": 0.4244, "step": 29804 }, { "epoch": 0.6321180886937711, "grad_norm": 0.31257402896881104, "learning_rate": 1.548176009703608e-05, "loss": 0.4747, "step": 29805 }, { "epoch": 0.6321392971517041, "grad_norm": 0.36440858244895935, "learning_rate": 1.5481481170838938e-05, "loss": 0.5335, "step": 29806 }, { "epoch": 0.6321605056096371, "grad_norm": 0.34371140599250793, "learning_rate": 1.5481202238545338e-05, "loss": 0.4852, "step": 29807 }, { "epoch": 0.6321817140675702, "grad_norm": 0.3369051516056061, "learning_rate": 1.5480923300155594e-05, "loss": 0.4958, "step": 29808 }, { "epoch": 0.6322029225255031, "grad_norm": 0.4774603247642517, "learning_rate": 1.5480644355670018e-05, "loss": 0.5023, "step": 29809 }, { "epoch": 0.6322241309834362, "grad_norm": 0.3289978802204132, "learning_rate": 1.5480365405088915e-05, "loss": 0.4622, "step": 29810 }, { "epoch": 0.6322453394413692, "grad_norm": 0.33641600608825684, "learning_rate": 1.5480086448412598e-05, "loss": 0.4584, "step": 29811 }, { "epoch": 0.6322665478993023, "grad_norm": 0.4935067594051361, "learning_rate": 1.5479807485641373e-05, "loss": 0.5587, "step": 29812 }, { "epoch": 0.6322877563572352, "grad_norm": 0.36468735337257385, "learning_rate": 1.5479528516775556e-05, "loss": 0.4809, "step": 29813 }, { "epoch": 0.6323089648151683, "grad_norm": 0.3782273828983307, "learning_rate": 1.5479249541815456e-05, "loss": 0.441, "step": 29814 }, { "epoch": 0.6323301732731013, "grad_norm": 0.4104648530483246, "learning_rate": 1.547897056076138e-05, "loss": 0.56, "step": 29815 }, { "epoch": 0.6323513817310343, "grad_norm": 0.4011014401912689, "learning_rate": 1.5478691573613646e-05, "loss": 0.4647, "step": 29816 }, { "epoch": 0.6323725901889674, "grad_norm": 0.35223647952079773, "learning_rate": 1.5478412580372552e-05, "loss": 0.4745, "step": 29817 }, { "epoch": 0.6323937986469004, "grad_norm": 0.32517579197883606, "learning_rate": 1.547813358103842e-05, "loss": 0.4026, "step": 29818 }, { "epoch": 0.6324150071048334, "grad_norm": 0.39488106966018677, "learning_rate": 1.547785457561155e-05, "loss": 0.4902, "step": 29819 }, { "epoch": 0.6324362155627664, "grad_norm": 0.9817833304405212, "learning_rate": 1.547757556409226e-05, "loss": 0.5085, "step": 29820 }, { "epoch": 0.6324574240206995, "grad_norm": 0.46774953603744507, "learning_rate": 1.547729654648086e-05, "loss": 0.4867, "step": 29821 }, { "epoch": 0.6324786324786325, "grad_norm": 0.3303472697734833, "learning_rate": 1.547701752277766e-05, "loss": 0.4993, "step": 29822 }, { "epoch": 0.6324998409365655, "grad_norm": 0.5228961706161499, "learning_rate": 1.5476738492982965e-05, "loss": 0.5368, "step": 29823 }, { "epoch": 0.6325210493944985, "grad_norm": 0.3967747390270233, "learning_rate": 1.547645945709709e-05, "loss": 0.5261, "step": 29824 }, { "epoch": 0.6325422578524316, "grad_norm": 0.3627679646015167, "learning_rate": 1.5476180415120342e-05, "loss": 0.4661, "step": 29825 }, { "epoch": 0.6325634663103645, "grad_norm": 0.36458876729011536, "learning_rate": 1.547590136705304e-05, "loss": 0.4513, "step": 29826 }, { "epoch": 0.6325846747682976, "grad_norm": 0.34188878536224365, "learning_rate": 1.5475622312895483e-05, "loss": 0.4486, "step": 29827 }, { "epoch": 0.6326058832262306, "grad_norm": 0.36493462324142456, "learning_rate": 1.547534325264799e-05, "loss": 0.4766, "step": 29828 }, { "epoch": 0.6326270916841636, "grad_norm": 0.49414190649986267, "learning_rate": 1.5475064186310865e-05, "loss": 0.5226, "step": 29829 }, { "epoch": 0.6326483001420967, "grad_norm": 0.3496341109275818, "learning_rate": 1.5474785113884422e-05, "loss": 0.5023, "step": 29830 }, { "epoch": 0.6326695086000297, "grad_norm": 0.33916810154914856, "learning_rate": 1.547450603536897e-05, "loss": 0.4712, "step": 29831 }, { "epoch": 0.6326907170579628, "grad_norm": 0.34047648310661316, "learning_rate": 1.547422695076482e-05, "loss": 0.4753, "step": 29832 }, { "epoch": 0.6327119255158957, "grad_norm": 0.3531823754310608, "learning_rate": 1.547394786007228e-05, "loss": 0.4912, "step": 29833 }, { "epoch": 0.6327331339738288, "grad_norm": 0.350198358297348, "learning_rate": 1.5473668763291668e-05, "loss": 0.4206, "step": 29834 }, { "epoch": 0.6327543424317618, "grad_norm": 0.36961960792541504, "learning_rate": 1.5473389660423285e-05, "loss": 0.4723, "step": 29835 }, { "epoch": 0.6327755508896948, "grad_norm": 0.4299262762069702, "learning_rate": 1.5473110551467448e-05, "loss": 0.4803, "step": 29836 }, { "epoch": 0.6327967593476278, "grad_norm": 0.34754976630210876, "learning_rate": 1.5472831436424463e-05, "loss": 0.4545, "step": 29837 }, { "epoch": 0.6328179678055609, "grad_norm": 0.43873947858810425, "learning_rate": 1.5472552315294645e-05, "loss": 0.5402, "step": 29838 }, { "epoch": 0.6328391762634938, "grad_norm": 0.4056979715824127, "learning_rate": 1.54722731880783e-05, "loss": 0.5108, "step": 29839 }, { "epoch": 0.6328603847214269, "grad_norm": 0.38792645931243896, "learning_rate": 1.5471994054775737e-05, "loss": 0.5231, "step": 29840 }, { "epoch": 0.6328815931793599, "grad_norm": 0.3641490340232849, "learning_rate": 1.5471714915387277e-05, "loss": 0.4337, "step": 29841 }, { "epoch": 0.632902801637293, "grad_norm": 0.3684295415878296, "learning_rate": 1.547143576991322e-05, "loss": 0.5287, "step": 29842 }, { "epoch": 0.632924010095226, "grad_norm": 0.3394129276275635, "learning_rate": 1.5471156618353877e-05, "loss": 0.4784, "step": 29843 }, { "epoch": 0.632945218553159, "grad_norm": 0.30214059352874756, "learning_rate": 1.5470877460709567e-05, "loss": 0.4744, "step": 29844 }, { "epoch": 0.6329664270110921, "grad_norm": 0.393390029668808, "learning_rate": 1.547059829698059e-05, "loss": 0.5813, "step": 29845 }, { "epoch": 0.632987635469025, "grad_norm": 0.3423965871334076, "learning_rate": 1.547031912716726e-05, "loss": 0.4789, "step": 29846 }, { "epoch": 0.6330088439269581, "grad_norm": 0.354500412940979, "learning_rate": 1.547003995126989e-05, "loss": 0.475, "step": 29847 }, { "epoch": 0.6330300523848911, "grad_norm": 0.30849164724349976, "learning_rate": 1.5469760769288793e-05, "loss": 0.4239, "step": 29848 }, { "epoch": 0.6330512608428241, "grad_norm": 0.3887811005115509, "learning_rate": 1.5469481581224274e-05, "loss": 0.432, "step": 29849 }, { "epoch": 0.6330724693007571, "grad_norm": 0.36266234517097473, "learning_rate": 1.546920238707664e-05, "loss": 0.4931, "step": 29850 }, { "epoch": 0.6330936777586902, "grad_norm": 0.3610302209854126, "learning_rate": 1.5468923186846208e-05, "loss": 0.367, "step": 29851 }, { "epoch": 0.6331148862166232, "grad_norm": 0.3707588016986847, "learning_rate": 1.5468643980533292e-05, "loss": 0.4376, "step": 29852 }, { "epoch": 0.6331360946745562, "grad_norm": 0.35638970136642456, "learning_rate": 1.5468364768138192e-05, "loss": 0.4888, "step": 29853 }, { "epoch": 0.6331573031324892, "grad_norm": 0.35031020641326904, "learning_rate": 1.546808554966123e-05, "loss": 0.5072, "step": 29854 }, { "epoch": 0.6331785115904223, "grad_norm": 0.34922122955322266, "learning_rate": 1.5467806325102708e-05, "loss": 0.5293, "step": 29855 }, { "epoch": 0.6331997200483552, "grad_norm": 0.3472261428833008, "learning_rate": 1.5467527094462938e-05, "loss": 0.4257, "step": 29856 }, { "epoch": 0.6332209285062883, "grad_norm": 0.3288883864879608, "learning_rate": 1.546724785774223e-05, "loss": 0.5341, "step": 29857 }, { "epoch": 0.6332421369642214, "grad_norm": 0.44600969552993774, "learning_rate": 1.5466968614940904e-05, "loss": 0.5608, "step": 29858 }, { "epoch": 0.6332633454221543, "grad_norm": 0.40632301568984985, "learning_rate": 1.5466689366059256e-05, "loss": 0.4476, "step": 29859 }, { "epoch": 0.6332845538800874, "grad_norm": 0.3445829451084137, "learning_rate": 1.5466410111097606e-05, "loss": 0.39, "step": 29860 }, { "epoch": 0.6333057623380204, "grad_norm": 0.38088029623031616, "learning_rate": 1.546613085005626e-05, "loss": 0.4794, "step": 29861 }, { "epoch": 0.6333269707959535, "grad_norm": 0.3621399402618408, "learning_rate": 1.5465851582935535e-05, "loss": 0.4931, "step": 29862 }, { "epoch": 0.6333481792538864, "grad_norm": 0.35253480076789856, "learning_rate": 1.5465572309735737e-05, "loss": 0.4484, "step": 29863 }, { "epoch": 0.6333693877118195, "grad_norm": 0.38007232546806335, "learning_rate": 1.5465293030457174e-05, "loss": 0.4883, "step": 29864 }, { "epoch": 0.6333905961697525, "grad_norm": 0.35876235365867615, "learning_rate": 1.5465013745100163e-05, "loss": 0.4906, "step": 29865 }, { "epoch": 0.6334118046276855, "grad_norm": 0.34800636768341064, "learning_rate": 1.5464734453665006e-05, "loss": 0.4755, "step": 29866 }, { "epoch": 0.6334330130856185, "grad_norm": 0.32986292243003845, "learning_rate": 1.5464455156152022e-05, "loss": 0.4744, "step": 29867 }, { "epoch": 0.6334542215435516, "grad_norm": 0.3307974934577942, "learning_rate": 1.546417585256152e-05, "loss": 0.5146, "step": 29868 }, { "epoch": 0.6334754300014845, "grad_norm": 0.3648962080478668, "learning_rate": 1.5463896542893806e-05, "loss": 0.5249, "step": 29869 }, { "epoch": 0.6334966384594176, "grad_norm": 0.3596799969673157, "learning_rate": 1.5463617227149193e-05, "loss": 0.5262, "step": 29870 }, { "epoch": 0.6335178469173507, "grad_norm": 0.6669461131095886, "learning_rate": 1.5463337905327994e-05, "loss": 0.5188, "step": 29871 }, { "epoch": 0.6335390553752837, "grad_norm": 0.3114263117313385, "learning_rate": 1.546305857743052e-05, "loss": 0.5078, "step": 29872 }, { "epoch": 0.6335602638332167, "grad_norm": 0.3811225891113281, "learning_rate": 1.546277924345708e-05, "loss": 0.4203, "step": 29873 }, { "epoch": 0.6335814722911497, "grad_norm": 0.3537672758102417, "learning_rate": 1.546249990340798e-05, "loss": 0.4675, "step": 29874 }, { "epoch": 0.6336026807490828, "grad_norm": 0.9135202765464783, "learning_rate": 1.546222055728354e-05, "loss": 0.5066, "step": 29875 }, { "epoch": 0.6336238892070157, "grad_norm": 0.34475240111351013, "learning_rate": 1.5461941205084065e-05, "loss": 0.5333, "step": 29876 }, { "epoch": 0.6336450976649488, "grad_norm": 0.33646202087402344, "learning_rate": 1.546166184680986e-05, "loss": 0.4909, "step": 29877 }, { "epoch": 0.6336663061228818, "grad_norm": 0.3605005443096161, "learning_rate": 1.546138248246125e-05, "loss": 0.468, "step": 29878 }, { "epoch": 0.6336875145808148, "grad_norm": 0.33026692271232605, "learning_rate": 1.5461103112038537e-05, "loss": 0.4505, "step": 29879 }, { "epoch": 0.6337087230387478, "grad_norm": 0.35957616567611694, "learning_rate": 1.5460823735542028e-05, "loss": 0.5257, "step": 29880 }, { "epoch": 0.6337299314966809, "grad_norm": 0.3858977258205414, "learning_rate": 1.546054435297204e-05, "loss": 0.4748, "step": 29881 }, { "epoch": 0.6337511399546139, "grad_norm": 0.41107550263404846, "learning_rate": 1.546026496432888e-05, "loss": 0.4775, "step": 29882 }, { "epoch": 0.6337723484125469, "grad_norm": 0.42446911334991455, "learning_rate": 1.5459985569612865e-05, "loss": 0.5161, "step": 29883 }, { "epoch": 0.63379355687048, "grad_norm": 0.33229127526283264, "learning_rate": 1.54597061688243e-05, "loss": 0.5287, "step": 29884 }, { "epoch": 0.633814765328413, "grad_norm": 0.33783942461013794, "learning_rate": 1.5459426761963498e-05, "loss": 0.4199, "step": 29885 }, { "epoch": 0.633835973786346, "grad_norm": 0.33141595125198364, "learning_rate": 1.545914734903077e-05, "loss": 0.4987, "step": 29886 }, { "epoch": 0.633857182244279, "grad_norm": 0.4003187417984009, "learning_rate": 1.5458867930026423e-05, "loss": 0.481, "step": 29887 }, { "epoch": 0.6338783907022121, "grad_norm": 0.49755287170410156, "learning_rate": 1.5458588504950772e-05, "loss": 0.4714, "step": 29888 }, { "epoch": 0.633899599160145, "grad_norm": 0.3527681529521942, "learning_rate": 1.5458309073804124e-05, "loss": 0.5489, "step": 29889 }, { "epoch": 0.6339208076180781, "grad_norm": 0.35077258944511414, "learning_rate": 1.5458029636586794e-05, "loss": 0.5027, "step": 29890 }, { "epoch": 0.6339420160760111, "grad_norm": 0.3221038281917572, "learning_rate": 1.5457750193299088e-05, "loss": 0.4338, "step": 29891 }, { "epoch": 0.6339632245339442, "grad_norm": 0.35245954990386963, "learning_rate": 1.5457470743941323e-05, "loss": 0.4381, "step": 29892 }, { "epoch": 0.6339844329918771, "grad_norm": 0.3814294636249542, "learning_rate": 1.5457191288513808e-05, "loss": 0.513, "step": 29893 }, { "epoch": 0.6340056414498102, "grad_norm": 0.3999216854572296, "learning_rate": 1.545691182701685e-05, "loss": 0.5208, "step": 29894 }, { "epoch": 0.6340268499077432, "grad_norm": 0.35794422030448914, "learning_rate": 1.5456632359450762e-05, "loss": 0.4634, "step": 29895 }, { "epoch": 0.6340480583656762, "grad_norm": 0.46077239513397217, "learning_rate": 1.5456352885815854e-05, "loss": 0.5854, "step": 29896 }, { "epoch": 0.6340692668236092, "grad_norm": 0.3415322005748749, "learning_rate": 1.545607340611244e-05, "loss": 0.498, "step": 29897 }, { "epoch": 0.6340904752815423, "grad_norm": 0.37841731309890747, "learning_rate": 1.5455793920340826e-05, "loss": 0.5292, "step": 29898 }, { "epoch": 0.6341116837394754, "grad_norm": 0.36024120450019836, "learning_rate": 1.5455514428501323e-05, "loss": 0.4686, "step": 29899 }, { "epoch": 0.6341328921974083, "grad_norm": 1.849853277206421, "learning_rate": 1.5455234930594252e-05, "loss": 0.5323, "step": 29900 }, { "epoch": 0.6341541006553414, "grad_norm": 0.3176082968711853, "learning_rate": 1.545495542661991e-05, "loss": 0.4389, "step": 29901 }, { "epoch": 0.6341753091132744, "grad_norm": 0.33918383717536926, "learning_rate": 1.5454675916578616e-05, "loss": 0.437, "step": 29902 }, { "epoch": 0.6341965175712074, "grad_norm": 0.4029412865638733, "learning_rate": 1.545439640047068e-05, "loss": 0.519, "step": 29903 }, { "epoch": 0.6342177260291404, "grad_norm": 0.38821515440940857, "learning_rate": 1.545411687829641e-05, "loss": 0.5258, "step": 29904 }, { "epoch": 0.6342389344870735, "grad_norm": 0.3197943866252899, "learning_rate": 1.5453837350056117e-05, "loss": 0.4131, "step": 29905 }, { "epoch": 0.6342601429450064, "grad_norm": 0.38740625977516174, "learning_rate": 1.5453557815750116e-05, "loss": 0.5448, "step": 29906 }, { "epoch": 0.6342813514029395, "grad_norm": 0.40412014722824097, "learning_rate": 1.5453278275378715e-05, "loss": 0.4887, "step": 29907 }, { "epoch": 0.6343025598608725, "grad_norm": 0.332290381193161, "learning_rate": 1.5452998728942223e-05, "loss": 0.4491, "step": 29908 }, { "epoch": 0.6343237683188055, "grad_norm": 0.34164541959762573, "learning_rate": 1.5452719176440955e-05, "loss": 0.5414, "step": 29909 }, { "epoch": 0.6343449767767385, "grad_norm": 0.34354549646377563, "learning_rate": 1.5452439617875222e-05, "loss": 0.4801, "step": 29910 }, { "epoch": 0.6343661852346716, "grad_norm": 0.4043237566947937, "learning_rate": 1.5452160053245333e-05, "loss": 0.5858, "step": 29911 }, { "epoch": 0.6343873936926047, "grad_norm": 0.3765885829925537, "learning_rate": 1.5451880482551596e-05, "loss": 0.5771, "step": 29912 }, { "epoch": 0.6344086021505376, "grad_norm": 0.3839225172996521, "learning_rate": 1.5451600905794326e-05, "loss": 0.5351, "step": 29913 }, { "epoch": 0.6344298106084707, "grad_norm": 0.34279727935791016, "learning_rate": 1.5451321322973835e-05, "loss": 0.5292, "step": 29914 }, { "epoch": 0.6344510190664037, "grad_norm": 0.3459846079349518, "learning_rate": 1.5451041734090425e-05, "loss": 0.4716, "step": 29915 }, { "epoch": 0.6344722275243367, "grad_norm": 0.687432050704956, "learning_rate": 1.545076213914442e-05, "loss": 0.5606, "step": 29916 }, { "epoch": 0.6344934359822697, "grad_norm": 0.3477379083633423, "learning_rate": 1.5450482538136123e-05, "loss": 0.4902, "step": 29917 }, { "epoch": 0.6345146444402028, "grad_norm": 0.32727909088134766, "learning_rate": 1.545020293106585e-05, "loss": 0.4662, "step": 29918 }, { "epoch": 0.6345358528981357, "grad_norm": 0.35295799374580383, "learning_rate": 1.5449923317933904e-05, "loss": 0.5393, "step": 29919 }, { "epoch": 0.6345570613560688, "grad_norm": 0.3612465560436249, "learning_rate": 1.5449643698740602e-05, "loss": 0.5303, "step": 29920 }, { "epoch": 0.6345782698140018, "grad_norm": 0.38848230242729187, "learning_rate": 1.5449364073486254e-05, "loss": 0.4921, "step": 29921 }, { "epoch": 0.6345994782719349, "grad_norm": 0.30496451258659363, "learning_rate": 1.544908444217117e-05, "loss": 0.3813, "step": 29922 }, { "epoch": 0.6346206867298678, "grad_norm": 0.36118292808532715, "learning_rate": 1.5448804804795664e-05, "loss": 0.4477, "step": 29923 }, { "epoch": 0.6346418951878009, "grad_norm": 0.3600045144557953, "learning_rate": 1.5448525161360042e-05, "loss": 0.45, "step": 29924 }, { "epoch": 0.634663103645734, "grad_norm": 0.3440183997154236, "learning_rate": 1.5448245511864618e-05, "loss": 0.4263, "step": 29925 }, { "epoch": 0.6346843121036669, "grad_norm": 0.3569013774394989, "learning_rate": 1.5447965856309702e-05, "loss": 0.4313, "step": 29926 }, { "epoch": 0.6347055205616, "grad_norm": 0.4125346839427948, "learning_rate": 1.5447686194695608e-05, "loss": 0.4691, "step": 29927 }, { "epoch": 0.634726729019533, "grad_norm": 0.4116152226924896, "learning_rate": 1.5447406527022643e-05, "loss": 0.5487, "step": 29928 }, { "epoch": 0.634747937477466, "grad_norm": 0.4318832755088806, "learning_rate": 1.544712685329112e-05, "loss": 0.4968, "step": 29929 }, { "epoch": 0.634769145935399, "grad_norm": 0.34785380959510803, "learning_rate": 1.544684717350135e-05, "loss": 0.4699, "step": 29930 }, { "epoch": 0.6347903543933321, "grad_norm": 0.35485365986824036, "learning_rate": 1.5446567487653644e-05, "loss": 0.5067, "step": 29931 }, { "epoch": 0.634811562851265, "grad_norm": 0.3782860338687897, "learning_rate": 1.5446287795748313e-05, "loss": 0.4665, "step": 29932 }, { "epoch": 0.6348327713091981, "grad_norm": 0.37249937653541565, "learning_rate": 1.544600809778567e-05, "loss": 0.4705, "step": 29933 }, { "epoch": 0.6348539797671311, "grad_norm": 0.3576500713825226, "learning_rate": 1.544572839376602e-05, "loss": 0.4514, "step": 29934 }, { "epoch": 0.6348751882250642, "grad_norm": 0.3990689814090729, "learning_rate": 1.544544868368968e-05, "loss": 0.5488, "step": 29935 }, { "epoch": 0.6348963966829971, "grad_norm": 0.30684757232666016, "learning_rate": 1.544516896755696e-05, "loss": 0.4798, "step": 29936 }, { "epoch": 0.6349176051409302, "grad_norm": 0.3642931580543518, "learning_rate": 1.5444889245368167e-05, "loss": 0.5174, "step": 29937 }, { "epoch": 0.6349388135988632, "grad_norm": 0.3548964560031891, "learning_rate": 1.5444609517123622e-05, "loss": 0.5341, "step": 29938 }, { "epoch": 0.6349600220567962, "grad_norm": 0.37806564569473267, "learning_rate": 1.5444329782823625e-05, "loss": 0.4741, "step": 29939 }, { "epoch": 0.6349812305147293, "grad_norm": 0.3627803921699524, "learning_rate": 1.5444050042468494e-05, "loss": 0.5118, "step": 29940 }, { "epoch": 0.6350024389726623, "grad_norm": 0.3591887056827545, "learning_rate": 1.5443770296058534e-05, "loss": 0.4818, "step": 29941 }, { "epoch": 0.6350236474305954, "grad_norm": 0.41664332151412964, "learning_rate": 1.5443490543594065e-05, "loss": 0.5197, "step": 29942 }, { "epoch": 0.6350448558885283, "grad_norm": 0.35709092020988464, "learning_rate": 1.544321078507539e-05, "loss": 0.4561, "step": 29943 }, { "epoch": 0.6350660643464614, "grad_norm": 0.3545292913913727, "learning_rate": 1.544293102050282e-05, "loss": 0.4794, "step": 29944 }, { "epoch": 0.6350872728043944, "grad_norm": 0.36609557271003723, "learning_rate": 1.5442651249876677e-05, "loss": 0.5796, "step": 29945 }, { "epoch": 0.6351084812623274, "grad_norm": 0.34053200483322144, "learning_rate": 1.544237147319726e-05, "loss": 0.506, "step": 29946 }, { "epoch": 0.6351296897202604, "grad_norm": 0.37393128871917725, "learning_rate": 1.5442091690464885e-05, "loss": 0.5827, "step": 29947 }, { "epoch": 0.6351508981781935, "grad_norm": 0.3901643455028534, "learning_rate": 1.5441811901679863e-05, "loss": 0.5334, "step": 29948 }, { "epoch": 0.6351721066361264, "grad_norm": 0.34514734148979187, "learning_rate": 1.544153210684251e-05, "loss": 0.5212, "step": 29949 }, { "epoch": 0.6351933150940595, "grad_norm": 0.36820197105407715, "learning_rate": 1.5441252305953125e-05, "loss": 0.4788, "step": 29950 }, { "epoch": 0.6352145235519925, "grad_norm": 0.3663051128387451, "learning_rate": 1.544097249901203e-05, "loss": 0.5766, "step": 29951 }, { "epoch": 0.6352357320099256, "grad_norm": 0.37813079357147217, "learning_rate": 1.544069268601953e-05, "loss": 0.5901, "step": 29952 }, { "epoch": 0.6352569404678586, "grad_norm": 0.35118377208709717, "learning_rate": 1.544041286697594e-05, "loss": 0.4874, "step": 29953 }, { "epoch": 0.6352781489257916, "grad_norm": 0.3669160306453705, "learning_rate": 1.544013304188157e-05, "loss": 0.5394, "step": 29954 }, { "epoch": 0.6352993573837247, "grad_norm": 0.33631062507629395, "learning_rate": 1.543985321073673e-05, "loss": 0.4813, "step": 29955 }, { "epoch": 0.6353205658416576, "grad_norm": 0.33632293343544006, "learning_rate": 1.543957337354174e-05, "loss": 0.3992, "step": 29956 }, { "epoch": 0.6353417742995907, "grad_norm": 0.31760773062705994, "learning_rate": 1.5439293530296893e-05, "loss": 0.5009, "step": 29957 }, { "epoch": 0.6353629827575237, "grad_norm": 0.33537593483924866, "learning_rate": 1.543901368100252e-05, "loss": 0.4764, "step": 29958 }, { "epoch": 0.6353841912154568, "grad_norm": 0.3210445046424866, "learning_rate": 1.543873382565892e-05, "loss": 0.5101, "step": 29959 }, { "epoch": 0.6354053996733897, "grad_norm": 0.3566906750202179, "learning_rate": 1.5438453964266406e-05, "loss": 0.5555, "step": 29960 }, { "epoch": 0.6354266081313228, "grad_norm": 0.35342684388160706, "learning_rate": 1.543817409682529e-05, "loss": 0.413, "step": 29961 }, { "epoch": 0.6354478165892558, "grad_norm": 0.3785105049610138, "learning_rate": 1.543789422333589e-05, "loss": 0.5325, "step": 29962 }, { "epoch": 0.6354690250471888, "grad_norm": 0.38080963492393494, "learning_rate": 1.5437614343798504e-05, "loss": 0.4845, "step": 29963 }, { "epoch": 0.6354902335051218, "grad_norm": 0.3522903025150299, "learning_rate": 1.5437334458213452e-05, "loss": 0.4457, "step": 29964 }, { "epoch": 0.6355114419630549, "grad_norm": 0.37907546758651733, "learning_rate": 1.543705456658105e-05, "loss": 0.5348, "step": 29965 }, { "epoch": 0.6355326504209879, "grad_norm": 0.3391222655773163, "learning_rate": 1.5436774668901596e-05, "loss": 0.5558, "step": 29966 }, { "epoch": 0.6355538588789209, "grad_norm": 0.3977939486503601, "learning_rate": 1.5436494765175413e-05, "loss": 0.5205, "step": 29967 }, { "epoch": 0.635575067336854, "grad_norm": 0.4032421410083771, "learning_rate": 1.5436214855402803e-05, "loss": 0.4561, "step": 29968 }, { "epoch": 0.635596275794787, "grad_norm": 0.3673989176750183, "learning_rate": 1.5435934939584086e-05, "loss": 0.5741, "step": 29969 }, { "epoch": 0.63561748425272, "grad_norm": 0.3831569254398346, "learning_rate": 1.5435655017719572e-05, "loss": 0.4989, "step": 29970 }, { "epoch": 0.635638692710653, "grad_norm": 0.39561009407043457, "learning_rate": 1.5435375089809565e-05, "loss": 0.4739, "step": 29971 }, { "epoch": 0.6356599011685861, "grad_norm": 0.3521902561187744, "learning_rate": 1.543509515585438e-05, "loss": 0.5788, "step": 29972 }, { "epoch": 0.635681109626519, "grad_norm": 0.35238152742385864, "learning_rate": 1.543481521585433e-05, "loss": 0.4427, "step": 29973 }, { "epoch": 0.6357023180844521, "grad_norm": 0.3284851014614105, "learning_rate": 1.5434535269809726e-05, "loss": 0.5199, "step": 29974 }, { "epoch": 0.6357235265423851, "grad_norm": 0.3439686596393585, "learning_rate": 1.5434255317720882e-05, "loss": 0.525, "step": 29975 }, { "epoch": 0.6357447350003181, "grad_norm": 0.3233364224433899, "learning_rate": 1.5433975359588105e-05, "loss": 0.4185, "step": 29976 }, { "epoch": 0.6357659434582511, "grad_norm": 0.3243420720100403, "learning_rate": 1.5433695395411708e-05, "loss": 0.4334, "step": 29977 }, { "epoch": 0.6357871519161842, "grad_norm": 0.46965572237968445, "learning_rate": 1.5433415425192e-05, "loss": 0.454, "step": 29978 }, { "epoch": 0.6358083603741171, "grad_norm": 0.32680854201316833, "learning_rate": 1.5433135448929297e-05, "loss": 0.4501, "step": 29979 }, { "epoch": 0.6358295688320502, "grad_norm": 0.35817191004753113, "learning_rate": 1.5432855466623908e-05, "loss": 0.5102, "step": 29980 }, { "epoch": 0.6358507772899833, "grad_norm": 0.5010682940483093, "learning_rate": 1.543257547827614e-05, "loss": 0.5491, "step": 29981 }, { "epoch": 0.6358719857479163, "grad_norm": 0.4216531217098236, "learning_rate": 1.543229548388632e-05, "loss": 0.4048, "step": 29982 }, { "epoch": 0.6358931942058493, "grad_norm": 0.37539979815483093, "learning_rate": 1.5432015483454736e-05, "loss": 0.4986, "step": 29983 }, { "epoch": 0.6359144026637823, "grad_norm": 0.387585312128067, "learning_rate": 1.5431735476981717e-05, "loss": 0.5495, "step": 29984 }, { "epoch": 0.6359356111217154, "grad_norm": 0.3720493018627167, "learning_rate": 1.5431455464467565e-05, "loss": 0.4932, "step": 29985 }, { "epoch": 0.6359568195796483, "grad_norm": 0.4719933569431305, "learning_rate": 1.5431175445912598e-05, "loss": 0.5176, "step": 29986 }, { "epoch": 0.6359780280375814, "grad_norm": 0.32307490706443787, "learning_rate": 1.5430895421317127e-05, "loss": 0.4666, "step": 29987 }, { "epoch": 0.6359992364955144, "grad_norm": 0.3698284924030304, "learning_rate": 1.543061539068146e-05, "loss": 0.4953, "step": 29988 }, { "epoch": 0.6360204449534475, "grad_norm": 0.3531540632247925, "learning_rate": 1.543033535400591e-05, "loss": 0.5188, "step": 29989 }, { "epoch": 0.6360416534113804, "grad_norm": 0.3162917196750641, "learning_rate": 1.5430055311290788e-05, "loss": 0.5057, "step": 29990 }, { "epoch": 0.6360628618693135, "grad_norm": 0.3992103934288025, "learning_rate": 1.5429775262536403e-05, "loss": 0.4784, "step": 29991 }, { "epoch": 0.6360840703272465, "grad_norm": 0.35433435440063477, "learning_rate": 1.5429495207743072e-05, "loss": 0.6118, "step": 29992 }, { "epoch": 0.6361052787851795, "grad_norm": 0.349690318107605, "learning_rate": 1.5429215146911104e-05, "loss": 0.4442, "step": 29993 }, { "epoch": 0.6361264872431126, "grad_norm": 0.36832401156425476, "learning_rate": 1.542893508004081e-05, "loss": 0.4854, "step": 29994 }, { "epoch": 0.6361476957010456, "grad_norm": 0.36039724946022034, "learning_rate": 1.54286550071325e-05, "loss": 0.4257, "step": 29995 }, { "epoch": 0.6361689041589786, "grad_norm": 0.34957846999168396, "learning_rate": 1.5428374928186488e-05, "loss": 0.4942, "step": 29996 }, { "epoch": 0.6361901126169116, "grad_norm": 0.37708306312561035, "learning_rate": 1.5428094843203084e-05, "loss": 0.4425, "step": 29997 }, { "epoch": 0.6362113210748447, "grad_norm": 0.3160131275653839, "learning_rate": 1.5427814752182597e-05, "loss": 0.448, "step": 29998 }, { "epoch": 0.6362325295327776, "grad_norm": 0.3512434661388397, "learning_rate": 1.5427534655125348e-05, "loss": 0.5292, "step": 29999 }, { "epoch": 0.6362537379907107, "grad_norm": 0.9899955987930298, "learning_rate": 1.5427254552031637e-05, "loss": 0.5214, "step": 30000 }, { "epoch": 0.6362749464486437, "grad_norm": 0.38638657331466675, "learning_rate": 1.542697444290178e-05, "loss": 0.4577, "step": 30001 }, { "epoch": 0.6362961549065768, "grad_norm": 0.3416985869407654, "learning_rate": 1.5426694327736095e-05, "loss": 0.4844, "step": 30002 }, { "epoch": 0.6363173633645097, "grad_norm": 0.34605464339256287, "learning_rate": 1.5426414206534883e-05, "loss": 0.4532, "step": 30003 }, { "epoch": 0.6363385718224428, "grad_norm": 0.3546384572982788, "learning_rate": 1.5426134079298463e-05, "loss": 0.5752, "step": 30004 }, { "epoch": 0.6363597802803758, "grad_norm": 0.45195671916007996, "learning_rate": 1.542585394602714e-05, "loss": 0.4949, "step": 30005 }, { "epoch": 0.6363809887383088, "grad_norm": 0.3108881413936615, "learning_rate": 1.5425573806721233e-05, "loss": 0.4218, "step": 30006 }, { "epoch": 0.6364021971962419, "grad_norm": 0.3283202052116394, "learning_rate": 1.542529366138105e-05, "loss": 0.4894, "step": 30007 }, { "epoch": 0.6364234056541749, "grad_norm": 0.3757684826850891, "learning_rate": 1.54250135100069e-05, "loss": 0.4899, "step": 30008 }, { "epoch": 0.636444614112108, "grad_norm": 0.328780859708786, "learning_rate": 1.54247333525991e-05, "loss": 0.4768, "step": 30009 }, { "epoch": 0.6364658225700409, "grad_norm": 0.3454274535179138, "learning_rate": 1.5424453189157954e-05, "loss": 0.4724, "step": 30010 }, { "epoch": 0.636487031027974, "grad_norm": 0.3492807447910309, "learning_rate": 1.5424173019683782e-05, "loss": 0.5293, "step": 30011 }, { "epoch": 0.636508239485907, "grad_norm": 0.37261858582496643, "learning_rate": 1.5423892844176892e-05, "loss": 0.5061, "step": 30012 }, { "epoch": 0.63652944794384, "grad_norm": 0.33664166927337646, "learning_rate": 1.5423612662637593e-05, "loss": 0.4874, "step": 30013 }, { "epoch": 0.636550656401773, "grad_norm": 0.3359396755695343, "learning_rate": 1.5423332475066204e-05, "loss": 0.4992, "step": 30014 }, { "epoch": 0.6365718648597061, "grad_norm": 0.34728118777275085, "learning_rate": 1.5423052281463028e-05, "loss": 0.5007, "step": 30015 }, { "epoch": 0.636593073317639, "grad_norm": 0.3583265244960785, "learning_rate": 1.542277208182838e-05, "loss": 0.5056, "step": 30016 }, { "epoch": 0.6366142817755721, "grad_norm": 0.3259716331958771, "learning_rate": 1.5422491876162572e-05, "loss": 0.4582, "step": 30017 }, { "epoch": 0.6366354902335051, "grad_norm": 0.38582509756088257, "learning_rate": 1.542221166446592e-05, "loss": 0.4207, "step": 30018 }, { "epoch": 0.6366566986914381, "grad_norm": 0.35992947220802307, "learning_rate": 1.5421931446738726e-05, "loss": 0.4829, "step": 30019 }, { "epoch": 0.6366779071493711, "grad_norm": 0.35108378529548645, "learning_rate": 1.542165122298131e-05, "loss": 0.4973, "step": 30020 }, { "epoch": 0.6366991156073042, "grad_norm": 0.4391292929649353, "learning_rate": 1.5421370993193977e-05, "loss": 0.4916, "step": 30021 }, { "epoch": 0.6367203240652373, "grad_norm": 0.37008509039878845, "learning_rate": 1.5421090757377047e-05, "loss": 0.4945, "step": 30022 }, { "epoch": 0.6367415325231702, "grad_norm": 0.39617180824279785, "learning_rate": 1.5420810515530825e-05, "loss": 0.5115, "step": 30023 }, { "epoch": 0.6367627409811033, "grad_norm": 0.3534453511238098, "learning_rate": 1.5420530267655623e-05, "loss": 0.4514, "step": 30024 }, { "epoch": 0.6367839494390363, "grad_norm": 0.3449721932411194, "learning_rate": 1.5420250013751755e-05, "loss": 0.4351, "step": 30025 }, { "epoch": 0.6368051578969693, "grad_norm": 0.33810165524482727, "learning_rate": 1.5419969753819533e-05, "loss": 0.4509, "step": 30026 }, { "epoch": 0.6368263663549023, "grad_norm": 0.4379417598247528, "learning_rate": 1.541968948785927e-05, "loss": 0.5912, "step": 30027 }, { "epoch": 0.6368475748128354, "grad_norm": 0.3436138331890106, "learning_rate": 1.541940921587127e-05, "loss": 0.4834, "step": 30028 }, { "epoch": 0.6368687832707683, "grad_norm": 0.37413400411605835, "learning_rate": 1.5419128937855855e-05, "loss": 0.4813, "step": 30029 }, { "epoch": 0.6368899917287014, "grad_norm": 0.3604184091091156, "learning_rate": 1.5418848653813328e-05, "loss": 0.4889, "step": 30030 }, { "epoch": 0.6369112001866344, "grad_norm": 0.3331655263900757, "learning_rate": 1.5418568363744006e-05, "loss": 0.3947, "step": 30031 }, { "epoch": 0.6369324086445675, "grad_norm": 0.3709167242050171, "learning_rate": 1.5418288067648203e-05, "loss": 0.4329, "step": 30032 }, { "epoch": 0.6369536171025004, "grad_norm": 0.3827874958515167, "learning_rate": 1.5418007765526222e-05, "loss": 0.5182, "step": 30033 }, { "epoch": 0.6369748255604335, "grad_norm": 0.4502170979976654, "learning_rate": 1.541772745737838e-05, "loss": 0.5275, "step": 30034 }, { "epoch": 0.6369960340183666, "grad_norm": 0.3681301176548004, "learning_rate": 1.5417447143204992e-05, "loss": 0.5315, "step": 30035 }, { "epoch": 0.6370172424762995, "grad_norm": 0.31930094957351685, "learning_rate": 1.5417166823006365e-05, "loss": 0.4791, "step": 30036 }, { "epoch": 0.6370384509342326, "grad_norm": 0.3500770032405853, "learning_rate": 1.541688649678281e-05, "loss": 0.4479, "step": 30037 }, { "epoch": 0.6370596593921656, "grad_norm": 0.3321811854839325, "learning_rate": 1.5416606164534643e-05, "loss": 0.4778, "step": 30038 }, { "epoch": 0.6370808678500987, "grad_norm": 0.34913885593414307, "learning_rate": 1.5416325826262173e-05, "loss": 0.4711, "step": 30039 }, { "epoch": 0.6371020763080316, "grad_norm": 0.34865716099739075, "learning_rate": 1.5416045481965714e-05, "loss": 0.4554, "step": 30040 }, { "epoch": 0.6371232847659647, "grad_norm": 0.3307972252368927, "learning_rate": 1.5415765131645576e-05, "loss": 0.5046, "step": 30041 }, { "epoch": 0.6371444932238977, "grad_norm": 0.35395246744155884, "learning_rate": 1.541548477530207e-05, "loss": 0.5133, "step": 30042 }, { "epoch": 0.6371657016818307, "grad_norm": 0.3216368556022644, "learning_rate": 1.5415204412935506e-05, "loss": 0.472, "step": 30043 }, { "epoch": 0.6371869101397637, "grad_norm": 0.37262946367263794, "learning_rate": 1.5414924044546204e-05, "loss": 0.5464, "step": 30044 }, { "epoch": 0.6372081185976968, "grad_norm": 0.3302795886993408, "learning_rate": 1.5414643670134467e-05, "loss": 0.4899, "step": 30045 }, { "epoch": 0.6372293270556297, "grad_norm": 0.3657741844654083, "learning_rate": 1.541436328970061e-05, "loss": 0.4954, "step": 30046 }, { "epoch": 0.6372505355135628, "grad_norm": 0.3704734146595001, "learning_rate": 1.541408290324495e-05, "loss": 0.4313, "step": 30047 }, { "epoch": 0.6372717439714959, "grad_norm": 0.4014095664024353, "learning_rate": 1.541380251076779e-05, "loss": 0.5381, "step": 30048 }, { "epoch": 0.6372929524294288, "grad_norm": 0.3549226224422455, "learning_rate": 1.5413522112269448e-05, "loss": 0.4941, "step": 30049 }, { "epoch": 0.6373141608873619, "grad_norm": 0.39334338903427124, "learning_rate": 1.541324170775023e-05, "loss": 0.5426, "step": 30050 }, { "epoch": 0.6373353693452949, "grad_norm": 0.3279513418674469, "learning_rate": 1.5412961297210457e-05, "loss": 0.3661, "step": 30051 }, { "epoch": 0.637356577803228, "grad_norm": 0.34839800000190735, "learning_rate": 1.541268088065043e-05, "loss": 0.5, "step": 30052 }, { "epoch": 0.6373777862611609, "grad_norm": 0.3492339551448822, "learning_rate": 1.541240045807047e-05, "loss": 0.46, "step": 30053 }, { "epoch": 0.637398994719094, "grad_norm": 0.3303379714488983, "learning_rate": 1.5412120029470884e-05, "loss": 0.458, "step": 30054 }, { "epoch": 0.637420203177027, "grad_norm": 0.5327787399291992, "learning_rate": 1.5411839594851985e-05, "loss": 0.5537, "step": 30055 }, { "epoch": 0.63744141163496, "grad_norm": 0.361704021692276, "learning_rate": 1.5411559154214086e-05, "loss": 0.4815, "step": 30056 }, { "epoch": 0.637462620092893, "grad_norm": 0.3471219539642334, "learning_rate": 1.5411278707557493e-05, "loss": 0.4656, "step": 30057 }, { "epoch": 0.6374838285508261, "grad_norm": 0.9268012642860413, "learning_rate": 1.541099825488253e-05, "loss": 0.4783, "step": 30058 }, { "epoch": 0.637505037008759, "grad_norm": 0.5531669855117798, "learning_rate": 1.5410717796189498e-05, "loss": 0.5351, "step": 30059 }, { "epoch": 0.6375262454666921, "grad_norm": 0.36188292503356934, "learning_rate": 1.5410437331478712e-05, "loss": 0.4672, "step": 30060 }, { "epoch": 0.6375474539246252, "grad_norm": 0.3519047200679779, "learning_rate": 1.5410156860750487e-05, "loss": 0.4472, "step": 30061 }, { "epoch": 0.6375686623825582, "grad_norm": 0.33479559421539307, "learning_rate": 1.5409876384005128e-05, "loss": 0.4978, "step": 30062 }, { "epoch": 0.6375898708404912, "grad_norm": 0.3676915466785431, "learning_rate": 1.5409595901242956e-05, "loss": 0.4698, "step": 30063 }, { "epoch": 0.6376110792984242, "grad_norm": 0.35855552554130554, "learning_rate": 1.540931541246428e-05, "loss": 0.4102, "step": 30064 }, { "epoch": 0.6376322877563573, "grad_norm": 0.3909560739994049, "learning_rate": 1.5409034917669407e-05, "loss": 0.5159, "step": 30065 }, { "epoch": 0.6376534962142902, "grad_norm": 0.8863144516944885, "learning_rate": 1.5408754416858654e-05, "loss": 0.4861, "step": 30066 }, { "epoch": 0.6376747046722233, "grad_norm": 0.3256566524505615, "learning_rate": 1.540847391003233e-05, "loss": 0.4608, "step": 30067 }, { "epoch": 0.6376959131301563, "grad_norm": 0.3388254940509796, "learning_rate": 1.5408193397190747e-05, "loss": 0.4406, "step": 30068 }, { "epoch": 0.6377171215880894, "grad_norm": 0.3653852641582489, "learning_rate": 1.540791287833422e-05, "loss": 0.5122, "step": 30069 }, { "epoch": 0.6377383300460223, "grad_norm": 0.3375045359134674, "learning_rate": 1.540763235346306e-05, "loss": 0.4712, "step": 30070 }, { "epoch": 0.6377595385039554, "grad_norm": 0.3861796259880066, "learning_rate": 1.5407351822577578e-05, "loss": 0.5653, "step": 30071 }, { "epoch": 0.6377807469618884, "grad_norm": 0.3385787606239319, "learning_rate": 1.5407071285678085e-05, "loss": 0.469, "step": 30072 }, { "epoch": 0.6378019554198214, "grad_norm": 0.3864501714706421, "learning_rate": 1.5406790742764898e-05, "loss": 0.5343, "step": 30073 }, { "epoch": 0.6378231638777544, "grad_norm": 0.38302892446517944, "learning_rate": 1.540651019383832e-05, "loss": 0.5126, "step": 30074 }, { "epoch": 0.6378443723356875, "grad_norm": 0.3530474603176117, "learning_rate": 1.540622963889867e-05, "loss": 0.5504, "step": 30075 }, { "epoch": 0.6378655807936205, "grad_norm": 0.36352741718292236, "learning_rate": 1.5405949077946257e-05, "loss": 0.511, "step": 30076 }, { "epoch": 0.6378867892515535, "grad_norm": 0.3561533987522125, "learning_rate": 1.54056685109814e-05, "loss": 0.5183, "step": 30077 }, { "epoch": 0.6379079977094866, "grad_norm": 0.367888867855072, "learning_rate": 1.5405387938004402e-05, "loss": 0.549, "step": 30078 }, { "epoch": 0.6379292061674195, "grad_norm": 0.3784790635108948, "learning_rate": 1.5405107359015577e-05, "loss": 0.5607, "step": 30079 }, { "epoch": 0.6379504146253526, "grad_norm": 0.3330872058868408, "learning_rate": 1.540482677401524e-05, "loss": 0.4741, "step": 30080 }, { "epoch": 0.6379716230832856, "grad_norm": 0.42571142315864563, "learning_rate": 1.5404546183003705e-05, "loss": 0.4706, "step": 30081 }, { "epoch": 0.6379928315412187, "grad_norm": 0.3335109055042267, "learning_rate": 1.5404265585981274e-05, "loss": 0.4583, "step": 30082 }, { "epoch": 0.6380140399991516, "grad_norm": 0.33151522278785706, "learning_rate": 1.540398498294827e-05, "loss": 0.4635, "step": 30083 }, { "epoch": 0.6380352484570847, "grad_norm": 0.3682330250740051, "learning_rate": 1.5403704373905e-05, "loss": 0.4794, "step": 30084 }, { "epoch": 0.6380564569150177, "grad_norm": 0.37759315967559814, "learning_rate": 1.5403423758851778e-05, "loss": 0.5221, "step": 30085 }, { "epoch": 0.6380776653729507, "grad_norm": 0.3609161376953125, "learning_rate": 1.5403143137788916e-05, "loss": 0.4698, "step": 30086 }, { "epoch": 0.6380988738308837, "grad_norm": 0.363022118806839, "learning_rate": 1.540286251071672e-05, "loss": 0.5044, "step": 30087 }, { "epoch": 0.6381200822888168, "grad_norm": 0.3603834807872772, "learning_rate": 1.5402581877635508e-05, "loss": 0.5288, "step": 30088 }, { "epoch": 0.6381412907467499, "grad_norm": 0.4743577837944031, "learning_rate": 1.5402301238545595e-05, "loss": 0.4479, "step": 30089 }, { "epoch": 0.6381624992046828, "grad_norm": 0.6495819091796875, "learning_rate": 1.5402020593447292e-05, "loss": 0.4465, "step": 30090 }, { "epoch": 0.6381837076626159, "grad_norm": 0.5100231170654297, "learning_rate": 1.54017399423409e-05, "loss": 0.4659, "step": 30091 }, { "epoch": 0.6382049161205489, "grad_norm": 0.3926476240158081, "learning_rate": 1.5401459285226747e-05, "loss": 0.563, "step": 30092 }, { "epoch": 0.6382261245784819, "grad_norm": 0.46458375453948975, "learning_rate": 1.5401178622105134e-05, "loss": 0.5761, "step": 30093 }, { "epoch": 0.6382473330364149, "grad_norm": 0.3320659399032593, "learning_rate": 1.5400897952976378e-05, "loss": 0.3754, "step": 30094 }, { "epoch": 0.638268541494348, "grad_norm": 0.3625071048736572, "learning_rate": 1.540061727784079e-05, "loss": 0.4462, "step": 30095 }, { "epoch": 0.6382897499522809, "grad_norm": 0.3239063620567322, "learning_rate": 1.5400336596698683e-05, "loss": 0.4259, "step": 30096 }, { "epoch": 0.638310958410214, "grad_norm": 0.42148083448410034, "learning_rate": 1.5400055909550367e-05, "loss": 0.4687, "step": 30097 }, { "epoch": 0.638332166868147, "grad_norm": 0.37500664591789246, "learning_rate": 1.5399775216396162e-05, "loss": 0.6239, "step": 30098 }, { "epoch": 0.63835337532608, "grad_norm": 0.3608459234237671, "learning_rate": 1.5399494517236367e-05, "loss": 0.5122, "step": 30099 }, { "epoch": 0.638374583784013, "grad_norm": 0.34820690751075745, "learning_rate": 1.5399213812071305e-05, "loss": 0.5022, "step": 30100 }, { "epoch": 0.6383957922419461, "grad_norm": 0.4528045654296875, "learning_rate": 1.539893310090128e-05, "loss": 0.5504, "step": 30101 }, { "epoch": 0.6384170006998792, "grad_norm": 0.3478160798549652, "learning_rate": 1.539865238372661e-05, "loss": 0.4609, "step": 30102 }, { "epoch": 0.6384382091578121, "grad_norm": 0.3383050262928009, "learning_rate": 1.539837166054761e-05, "loss": 0.5252, "step": 30103 }, { "epoch": 0.6384594176157452, "grad_norm": 0.35595622658729553, "learning_rate": 1.5398090931364588e-05, "loss": 0.4788, "step": 30104 }, { "epoch": 0.6384806260736782, "grad_norm": 0.36993223428726196, "learning_rate": 1.539781019617785e-05, "loss": 0.5188, "step": 30105 }, { "epoch": 0.6385018345316112, "grad_norm": 0.3735238015651703, "learning_rate": 1.5397529454987717e-05, "loss": 0.4935, "step": 30106 }, { "epoch": 0.6385230429895442, "grad_norm": 0.3894374966621399, "learning_rate": 1.53972487077945e-05, "loss": 0.5217, "step": 30107 }, { "epoch": 0.6385442514474773, "grad_norm": 0.3233204483985901, "learning_rate": 1.5396967954598506e-05, "loss": 0.5004, "step": 30108 }, { "epoch": 0.6385654599054102, "grad_norm": 0.49057477712631226, "learning_rate": 1.5396687195400058e-05, "loss": 0.4617, "step": 30109 }, { "epoch": 0.6385866683633433, "grad_norm": 0.3951014578342438, "learning_rate": 1.5396406430199455e-05, "loss": 0.5101, "step": 30110 }, { "epoch": 0.6386078768212763, "grad_norm": 0.38558605313301086, "learning_rate": 1.539612565899702e-05, "loss": 0.484, "step": 30111 }, { "epoch": 0.6386290852792094, "grad_norm": 0.37387949228286743, "learning_rate": 1.5395844881793058e-05, "loss": 0.4865, "step": 30112 }, { "epoch": 0.6386502937371423, "grad_norm": 0.35898900032043457, "learning_rate": 1.5395564098587886e-05, "loss": 0.4953, "step": 30113 }, { "epoch": 0.6386715021950754, "grad_norm": 0.34911662340164185, "learning_rate": 1.539528330938181e-05, "loss": 0.5244, "step": 30114 }, { "epoch": 0.6386927106530084, "grad_norm": 0.34601521492004395, "learning_rate": 1.539500251417515e-05, "loss": 0.5292, "step": 30115 }, { "epoch": 0.6387139191109414, "grad_norm": 0.3353452682495117, "learning_rate": 1.5394721712968216e-05, "loss": 0.4345, "step": 30116 }, { "epoch": 0.6387351275688745, "grad_norm": 0.4863220155239105, "learning_rate": 1.539444090576132e-05, "loss": 0.5043, "step": 30117 }, { "epoch": 0.6387563360268075, "grad_norm": 0.37988993525505066, "learning_rate": 1.5394160092554775e-05, "loss": 0.483, "step": 30118 }, { "epoch": 0.6387775444847406, "grad_norm": 0.3532981276512146, "learning_rate": 1.539387927334889e-05, "loss": 0.473, "step": 30119 }, { "epoch": 0.6387987529426735, "grad_norm": 0.340282678604126, "learning_rate": 1.539359844814398e-05, "loss": 0.461, "step": 30120 }, { "epoch": 0.6388199614006066, "grad_norm": 0.3299345374107361, "learning_rate": 1.5393317616940354e-05, "loss": 0.5618, "step": 30121 }, { "epoch": 0.6388411698585396, "grad_norm": 0.4115265905857086, "learning_rate": 1.5393036779738326e-05, "loss": 0.5464, "step": 30122 }, { "epoch": 0.6388623783164726, "grad_norm": 0.37819215655326843, "learning_rate": 1.5392755936538214e-05, "loss": 0.5227, "step": 30123 }, { "epoch": 0.6388835867744056, "grad_norm": 0.3375284969806671, "learning_rate": 1.5392475087340322e-05, "loss": 0.479, "step": 30124 }, { "epoch": 0.6389047952323387, "grad_norm": 0.4268958568572998, "learning_rate": 1.539219423214497e-05, "loss": 0.44, "step": 30125 }, { "epoch": 0.6389260036902716, "grad_norm": 0.4423670172691345, "learning_rate": 1.539191337095246e-05, "loss": 0.5008, "step": 30126 }, { "epoch": 0.6389472121482047, "grad_norm": 0.34167584776878357, "learning_rate": 1.5391632503763116e-05, "loss": 0.4714, "step": 30127 }, { "epoch": 0.6389684206061377, "grad_norm": 0.32449963688850403, "learning_rate": 1.5391351630577244e-05, "loss": 0.5215, "step": 30128 }, { "epoch": 0.6389896290640708, "grad_norm": 0.38378965854644775, "learning_rate": 1.5391070751395162e-05, "loss": 0.5295, "step": 30129 }, { "epoch": 0.6390108375220038, "grad_norm": 0.3888320326805115, "learning_rate": 1.5390789866217168e-05, "loss": 0.5029, "step": 30130 }, { "epoch": 0.6390320459799368, "grad_norm": 0.37704741954803467, "learning_rate": 1.5390508975043593e-05, "loss": 0.4907, "step": 30131 }, { "epoch": 0.6390532544378699, "grad_norm": 0.3579510450363159, "learning_rate": 1.539022807787474e-05, "loss": 0.6022, "step": 30132 }, { "epoch": 0.6390744628958028, "grad_norm": 0.3327752351760864, "learning_rate": 1.5389947174710915e-05, "loss": 0.4829, "step": 30133 }, { "epoch": 0.6390956713537359, "grad_norm": 0.41661733388900757, "learning_rate": 1.5389666265552442e-05, "loss": 0.4874, "step": 30134 }, { "epoch": 0.6391168798116689, "grad_norm": 0.3613179326057434, "learning_rate": 1.5389385350399632e-05, "loss": 0.5741, "step": 30135 }, { "epoch": 0.639138088269602, "grad_norm": 0.3930049240589142, "learning_rate": 1.538910442925279e-05, "loss": 0.4795, "step": 30136 }, { "epoch": 0.6391592967275349, "grad_norm": 0.3532765805721283, "learning_rate": 1.5388823502112234e-05, "loss": 0.5532, "step": 30137 }, { "epoch": 0.639180505185468, "grad_norm": 0.4085267186164856, "learning_rate": 1.5388542568978277e-05, "loss": 0.473, "step": 30138 }, { "epoch": 0.639201713643401, "grad_norm": 0.4284564256668091, "learning_rate": 1.5388261629851226e-05, "loss": 0.4981, "step": 30139 }, { "epoch": 0.639222922101334, "grad_norm": 0.35630398988723755, "learning_rate": 1.5387980684731403e-05, "loss": 0.4789, "step": 30140 }, { "epoch": 0.639244130559267, "grad_norm": 0.3189251720905304, "learning_rate": 1.5387699733619114e-05, "loss": 0.465, "step": 30141 }, { "epoch": 0.6392653390172001, "grad_norm": 0.37276124954223633, "learning_rate": 1.5387418776514666e-05, "loss": 0.4711, "step": 30142 }, { "epoch": 0.6392865474751331, "grad_norm": 0.32339709997177124, "learning_rate": 1.5387137813418385e-05, "loss": 0.4616, "step": 30143 }, { "epoch": 0.6393077559330661, "grad_norm": 0.33464866876602173, "learning_rate": 1.538685684433057e-05, "loss": 0.5767, "step": 30144 }, { "epoch": 0.6393289643909992, "grad_norm": 0.3602922260761261, "learning_rate": 1.5386575869251544e-05, "loss": 0.4714, "step": 30145 }, { "epoch": 0.6393501728489321, "grad_norm": 0.3436138331890106, "learning_rate": 1.538629488818161e-05, "loss": 0.4696, "step": 30146 }, { "epoch": 0.6393713813068652, "grad_norm": 0.33021900057792664, "learning_rate": 1.5386013901121087e-05, "loss": 0.4809, "step": 30147 }, { "epoch": 0.6393925897647982, "grad_norm": 0.36592361330986023, "learning_rate": 1.5385732908070292e-05, "loss": 0.4987, "step": 30148 }, { "epoch": 0.6394137982227313, "grad_norm": 0.49149513244628906, "learning_rate": 1.5385451909029528e-05, "loss": 0.5198, "step": 30149 }, { "epoch": 0.6394350066806642, "grad_norm": 0.40571022033691406, "learning_rate": 1.538517090399911e-05, "loss": 0.4788, "step": 30150 }, { "epoch": 0.6394562151385973, "grad_norm": 0.5147548913955688, "learning_rate": 1.5384889892979354e-05, "loss": 0.5949, "step": 30151 }, { "epoch": 0.6394774235965303, "grad_norm": 0.3265466094017029, "learning_rate": 1.5384608875970564e-05, "loss": 0.4179, "step": 30152 }, { "epoch": 0.6394986320544633, "grad_norm": 0.3279552161693573, "learning_rate": 1.5384327852973066e-05, "loss": 0.4695, "step": 30153 }, { "epoch": 0.6395198405123963, "grad_norm": 0.34445425868034363, "learning_rate": 1.5384046823987162e-05, "loss": 0.4278, "step": 30154 }, { "epoch": 0.6395410489703294, "grad_norm": 0.34938791394233704, "learning_rate": 1.5383765789013173e-05, "loss": 0.5261, "step": 30155 }, { "epoch": 0.6395622574282623, "grad_norm": 0.3410826027393341, "learning_rate": 1.5383484748051397e-05, "loss": 0.36, "step": 30156 }, { "epoch": 0.6395834658861954, "grad_norm": 0.3558737635612488, "learning_rate": 1.5383203701102164e-05, "loss": 0.5024, "step": 30157 }, { "epoch": 0.6396046743441285, "grad_norm": 0.3476355969905853, "learning_rate": 1.5382922648165774e-05, "loss": 0.6202, "step": 30158 }, { "epoch": 0.6396258828020615, "grad_norm": 0.4154210388660431, "learning_rate": 1.5382641589242545e-05, "loss": 0.4574, "step": 30159 }, { "epoch": 0.6396470912599945, "grad_norm": 0.3436194062232971, "learning_rate": 1.5382360524332792e-05, "loss": 0.4778, "step": 30160 }, { "epoch": 0.6396682997179275, "grad_norm": 0.34342214465141296, "learning_rate": 1.5382079453436824e-05, "loss": 0.4804, "step": 30161 }, { "epoch": 0.6396895081758606, "grad_norm": 0.3952885866165161, "learning_rate": 1.538179837655495e-05, "loss": 0.4459, "step": 30162 }, { "epoch": 0.6397107166337935, "grad_norm": 0.4088839888572693, "learning_rate": 1.538151729368749e-05, "loss": 0.4107, "step": 30163 }, { "epoch": 0.6397319250917266, "grad_norm": 0.4002157151699066, "learning_rate": 1.538123620483475e-05, "loss": 0.5408, "step": 30164 }, { "epoch": 0.6397531335496596, "grad_norm": 0.9680073261260986, "learning_rate": 1.5380955109997048e-05, "loss": 0.4332, "step": 30165 }, { "epoch": 0.6397743420075926, "grad_norm": 0.5891346335411072, "learning_rate": 1.5380674009174697e-05, "loss": 0.5419, "step": 30166 }, { "epoch": 0.6397955504655256, "grad_norm": 0.3368898034095764, "learning_rate": 1.5380392902368e-05, "loss": 0.4766, "step": 30167 }, { "epoch": 0.6398167589234587, "grad_norm": 0.33536556363105774, "learning_rate": 1.5380111789577285e-05, "loss": 0.4807, "step": 30168 }, { "epoch": 0.6398379673813916, "grad_norm": 0.3201028108596802, "learning_rate": 1.5379830670802854e-05, "loss": 0.5526, "step": 30169 }, { "epoch": 0.6398591758393247, "grad_norm": 0.32919198274612427, "learning_rate": 1.537954954604502e-05, "loss": 0.4255, "step": 30170 }, { "epoch": 0.6398803842972578, "grad_norm": 0.4198007583618164, "learning_rate": 1.5379268415304096e-05, "loss": 0.5486, "step": 30171 }, { "epoch": 0.6399015927551908, "grad_norm": 0.38443660736083984, "learning_rate": 1.53789872785804e-05, "loss": 0.4938, "step": 30172 }, { "epoch": 0.6399228012131238, "grad_norm": 0.45543432235717773, "learning_rate": 1.537870613587424e-05, "loss": 0.4919, "step": 30173 }, { "epoch": 0.6399440096710568, "grad_norm": 0.31660136580467224, "learning_rate": 1.537842498718593e-05, "loss": 0.4608, "step": 30174 }, { "epoch": 0.6399652181289899, "grad_norm": 0.3562110960483551, "learning_rate": 1.537814383251578e-05, "loss": 0.5402, "step": 30175 }, { "epoch": 0.6399864265869228, "grad_norm": 0.31263113021850586, "learning_rate": 1.537786267186411e-05, "loss": 0.4517, "step": 30176 }, { "epoch": 0.6400076350448559, "grad_norm": 0.3495016098022461, "learning_rate": 1.5377581505231224e-05, "loss": 0.4965, "step": 30177 }, { "epoch": 0.6400288435027889, "grad_norm": 0.3723013699054718, "learning_rate": 1.5377300332617437e-05, "loss": 0.5174, "step": 30178 }, { "epoch": 0.640050051960722, "grad_norm": 0.4319016635417938, "learning_rate": 1.5377019154023066e-05, "loss": 0.5756, "step": 30179 }, { "epoch": 0.6400712604186549, "grad_norm": 0.36708956956863403, "learning_rate": 1.5376737969448425e-05, "loss": 0.5209, "step": 30180 }, { "epoch": 0.640092468876588, "grad_norm": 0.362719863653183, "learning_rate": 1.5376456778893816e-05, "loss": 0.5118, "step": 30181 }, { "epoch": 0.640113677334521, "grad_norm": 0.37837761640548706, "learning_rate": 1.537617558235956e-05, "loss": 0.4281, "step": 30182 }, { "epoch": 0.640134885792454, "grad_norm": 0.36370137333869934, "learning_rate": 1.5375894379845972e-05, "loss": 0.5299, "step": 30183 }, { "epoch": 0.6401560942503871, "grad_norm": 0.35376647114753723, "learning_rate": 1.5375613171353357e-05, "loss": 0.4165, "step": 30184 }, { "epoch": 0.6401773027083201, "grad_norm": 0.3607597053050995, "learning_rate": 1.5375331956882034e-05, "loss": 0.4269, "step": 30185 }, { "epoch": 0.6401985111662531, "grad_norm": 0.4082360863685608, "learning_rate": 1.5375050736432312e-05, "loss": 0.5701, "step": 30186 }, { "epoch": 0.6402197196241861, "grad_norm": 0.3645084798336029, "learning_rate": 1.5374769510004507e-05, "loss": 0.6017, "step": 30187 }, { "epoch": 0.6402409280821192, "grad_norm": 0.3598864674568176, "learning_rate": 1.5374488277598928e-05, "loss": 0.5749, "step": 30188 }, { "epoch": 0.6402621365400522, "grad_norm": 0.4884180426597595, "learning_rate": 1.5374207039215893e-05, "loss": 0.5022, "step": 30189 }, { "epoch": 0.6402833449979852, "grad_norm": 0.3411295711994171, "learning_rate": 1.5373925794855708e-05, "loss": 0.4655, "step": 30190 }, { "epoch": 0.6403045534559182, "grad_norm": 0.3387018144130707, "learning_rate": 1.537364454451869e-05, "loss": 0.4734, "step": 30191 }, { "epoch": 0.6403257619138513, "grad_norm": 0.36088672280311584, "learning_rate": 1.5373363288205152e-05, "loss": 0.5002, "step": 30192 }, { "epoch": 0.6403469703717842, "grad_norm": 0.34696078300476074, "learning_rate": 1.5373082025915404e-05, "loss": 0.443, "step": 30193 }, { "epoch": 0.6403681788297173, "grad_norm": 0.34535789489746094, "learning_rate": 1.5372800757649765e-05, "loss": 0.5277, "step": 30194 }, { "epoch": 0.6403893872876503, "grad_norm": 0.3803849220275879, "learning_rate": 1.537251948340854e-05, "loss": 0.545, "step": 30195 }, { "epoch": 0.6404105957455833, "grad_norm": 0.3790034055709839, "learning_rate": 1.537223820319205e-05, "loss": 0.5375, "step": 30196 }, { "epoch": 0.6404318042035163, "grad_norm": 0.39686059951782227, "learning_rate": 1.53719569170006e-05, "loss": 0.5284, "step": 30197 }, { "epoch": 0.6404530126614494, "grad_norm": 0.36637189984321594, "learning_rate": 1.5371675624834503e-05, "loss": 0.4975, "step": 30198 }, { "epoch": 0.6404742211193825, "grad_norm": 0.33781349658966064, "learning_rate": 1.537139432669408e-05, "loss": 0.4312, "step": 30199 }, { "epoch": 0.6404954295773154, "grad_norm": 0.36534079909324646, "learning_rate": 1.537111302257964e-05, "loss": 0.407, "step": 30200 }, { "epoch": 0.6405166380352485, "grad_norm": 0.3639145493507385, "learning_rate": 1.537083171249149e-05, "loss": 0.4428, "step": 30201 }, { "epoch": 0.6405378464931815, "grad_norm": 0.4757048189640045, "learning_rate": 1.537055039642995e-05, "loss": 0.5354, "step": 30202 }, { "epoch": 0.6405590549511145, "grad_norm": 0.35448476672172546, "learning_rate": 1.537026907439533e-05, "loss": 0.4269, "step": 30203 }, { "epoch": 0.6405802634090475, "grad_norm": 0.2963097393512726, "learning_rate": 1.536998774638794e-05, "loss": 0.3899, "step": 30204 }, { "epoch": 0.6406014718669806, "grad_norm": 0.35897573828697205, "learning_rate": 1.5369706412408107e-05, "loss": 0.4918, "step": 30205 }, { "epoch": 0.6406226803249135, "grad_norm": 0.3346315920352936, "learning_rate": 1.536942507245612e-05, "loss": 0.4619, "step": 30206 }, { "epoch": 0.6406438887828466, "grad_norm": 0.41452017426490784, "learning_rate": 1.5369143726532315e-05, "loss": 0.5374, "step": 30207 }, { "epoch": 0.6406650972407796, "grad_norm": 0.33871859312057495, "learning_rate": 1.536886237463699e-05, "loss": 0.4133, "step": 30208 }, { "epoch": 0.6406863056987127, "grad_norm": 0.3057531416416168, "learning_rate": 1.5368581016770465e-05, "loss": 0.4474, "step": 30209 }, { "epoch": 0.6407075141566456, "grad_norm": 0.3834439516067505, "learning_rate": 1.536829965293305e-05, "loss": 0.5228, "step": 30210 }, { "epoch": 0.6407287226145787, "grad_norm": 0.3494974970817566, "learning_rate": 1.536801828312506e-05, "loss": 0.4794, "step": 30211 }, { "epoch": 0.6407499310725118, "grad_norm": 0.3546343743801117, "learning_rate": 1.5367736907346803e-05, "loss": 0.4742, "step": 30212 }, { "epoch": 0.6407711395304447, "grad_norm": 0.3688301146030426, "learning_rate": 1.53674555255986e-05, "loss": 0.5482, "step": 30213 }, { "epoch": 0.6407923479883778, "grad_norm": 0.3475281894207001, "learning_rate": 1.5367174137880758e-05, "loss": 0.4414, "step": 30214 }, { "epoch": 0.6408135564463108, "grad_norm": 0.329226016998291, "learning_rate": 1.5366892744193588e-05, "loss": 0.4972, "step": 30215 }, { "epoch": 0.6408347649042438, "grad_norm": 0.341349720954895, "learning_rate": 1.5366611344537414e-05, "loss": 0.448, "step": 30216 }, { "epoch": 0.6408559733621768, "grad_norm": 0.35199305415153503, "learning_rate": 1.5366329938912534e-05, "loss": 0.4926, "step": 30217 }, { "epoch": 0.6408771818201099, "grad_norm": 0.363189160823822, "learning_rate": 1.536604852731927e-05, "loss": 0.4924, "step": 30218 }, { "epoch": 0.6408983902780429, "grad_norm": 0.36230143904685974, "learning_rate": 1.5365767109757937e-05, "loss": 0.4559, "step": 30219 }, { "epoch": 0.6409195987359759, "grad_norm": 0.3585268557071686, "learning_rate": 1.5365485686228844e-05, "loss": 0.4375, "step": 30220 }, { "epoch": 0.6409408071939089, "grad_norm": 0.2972160875797272, "learning_rate": 1.5365204256732302e-05, "loss": 0.4768, "step": 30221 }, { "epoch": 0.640962015651842, "grad_norm": 0.39089810848236084, "learning_rate": 1.5364922821268627e-05, "loss": 0.5128, "step": 30222 }, { "epoch": 0.6409832241097749, "grad_norm": 0.6187390685081482, "learning_rate": 1.5364641379838133e-05, "loss": 0.4928, "step": 30223 }, { "epoch": 0.641004432567708, "grad_norm": 0.3624478578567505, "learning_rate": 1.536435993244113e-05, "loss": 0.5752, "step": 30224 }, { "epoch": 0.6410256410256411, "grad_norm": 0.3707658648490906, "learning_rate": 1.5364078479077932e-05, "loss": 0.5091, "step": 30225 }, { "epoch": 0.641046849483574, "grad_norm": 0.39036881923675537, "learning_rate": 1.5363797019748853e-05, "loss": 0.5798, "step": 30226 }, { "epoch": 0.6410680579415071, "grad_norm": 0.34934455156326294, "learning_rate": 1.536351555445421e-05, "loss": 0.4823, "step": 30227 }, { "epoch": 0.6410892663994401, "grad_norm": 0.4020722508430481, "learning_rate": 1.5363234083194307e-05, "loss": 0.5351, "step": 30228 }, { "epoch": 0.6411104748573732, "grad_norm": 0.5761466026306152, "learning_rate": 1.5362952605969458e-05, "loss": 0.4797, "step": 30229 }, { "epoch": 0.6411316833153061, "grad_norm": 0.39892420172691345, "learning_rate": 1.5362671122779986e-05, "loss": 0.4692, "step": 30230 }, { "epoch": 0.6411528917732392, "grad_norm": 0.3290407955646515, "learning_rate": 1.5362389633626195e-05, "loss": 0.4098, "step": 30231 }, { "epoch": 0.6411741002311722, "grad_norm": 0.3848109245300293, "learning_rate": 1.53621081385084e-05, "loss": 0.62, "step": 30232 }, { "epoch": 0.6411953086891052, "grad_norm": 0.35323211550712585, "learning_rate": 1.5361826637426916e-05, "loss": 0.5158, "step": 30233 }, { "epoch": 0.6412165171470382, "grad_norm": 0.3561610281467438, "learning_rate": 1.5361545130382055e-05, "loss": 0.4737, "step": 30234 }, { "epoch": 0.6412377256049713, "grad_norm": 0.37135249376296997, "learning_rate": 1.5361263617374133e-05, "loss": 0.4816, "step": 30235 }, { "epoch": 0.6412589340629042, "grad_norm": 0.41189953684806824, "learning_rate": 1.5360982098403458e-05, "loss": 0.4258, "step": 30236 }, { "epoch": 0.6412801425208373, "grad_norm": 0.33154693245887756, "learning_rate": 1.5360700573470345e-05, "loss": 0.4227, "step": 30237 }, { "epoch": 0.6413013509787703, "grad_norm": 0.32578176259994507, "learning_rate": 1.5360419042575106e-05, "loss": 0.4657, "step": 30238 }, { "epoch": 0.6413225594367034, "grad_norm": 0.3464178144931793, "learning_rate": 1.5360137505718057e-05, "loss": 0.502, "step": 30239 }, { "epoch": 0.6413437678946364, "grad_norm": 0.3597646951675415, "learning_rate": 1.535985596289951e-05, "loss": 0.5479, "step": 30240 }, { "epoch": 0.6413649763525694, "grad_norm": 0.5185778737068176, "learning_rate": 1.5359574414119775e-05, "loss": 0.4723, "step": 30241 }, { "epoch": 0.6413861848105025, "grad_norm": 0.31347084045410156, "learning_rate": 1.5359292859379173e-05, "loss": 0.5285, "step": 30242 }, { "epoch": 0.6414073932684354, "grad_norm": 0.3475097417831421, "learning_rate": 1.5359011298678007e-05, "loss": 0.4668, "step": 30243 }, { "epoch": 0.6414286017263685, "grad_norm": 0.32833221554756165, "learning_rate": 1.53587297320166e-05, "loss": 0.5334, "step": 30244 }, { "epoch": 0.6414498101843015, "grad_norm": 0.38011878728866577, "learning_rate": 1.5358448159395256e-05, "loss": 0.4482, "step": 30245 }, { "epoch": 0.6414710186422345, "grad_norm": 0.34046033024787903, "learning_rate": 1.5358166580814292e-05, "loss": 0.454, "step": 30246 }, { "epoch": 0.6414922271001675, "grad_norm": 0.37016862630844116, "learning_rate": 1.5357884996274027e-05, "loss": 0.5138, "step": 30247 }, { "epoch": 0.6415134355581006, "grad_norm": 0.31935393810272217, "learning_rate": 1.5357603405774764e-05, "loss": 0.5734, "step": 30248 }, { "epoch": 0.6415346440160336, "grad_norm": 0.36728376150131226, "learning_rate": 1.535732180931682e-05, "loss": 0.4846, "step": 30249 }, { "epoch": 0.6415558524739666, "grad_norm": 0.39120423793792725, "learning_rate": 1.5357040206900515e-05, "loss": 0.5388, "step": 30250 }, { "epoch": 0.6415770609318996, "grad_norm": 0.38673365116119385, "learning_rate": 1.5356758598526152e-05, "loss": 0.4729, "step": 30251 }, { "epoch": 0.6415982693898327, "grad_norm": 0.5178698897361755, "learning_rate": 1.535647698419405e-05, "loss": 0.5094, "step": 30252 }, { "epoch": 0.6416194778477657, "grad_norm": 0.29856202006340027, "learning_rate": 1.535619536390452e-05, "loss": 0.4112, "step": 30253 }, { "epoch": 0.6416406863056987, "grad_norm": 0.348660945892334, "learning_rate": 1.535591373765788e-05, "loss": 0.504, "step": 30254 }, { "epoch": 0.6416618947636318, "grad_norm": 0.31371957063674927, "learning_rate": 1.5355632105454433e-05, "loss": 0.4589, "step": 30255 }, { "epoch": 0.6416831032215647, "grad_norm": 0.3278060555458069, "learning_rate": 1.53553504672945e-05, "loss": 0.4429, "step": 30256 }, { "epoch": 0.6417043116794978, "grad_norm": 0.3662467300891876, "learning_rate": 1.5355068823178396e-05, "loss": 0.5242, "step": 30257 }, { "epoch": 0.6417255201374308, "grad_norm": 0.3644019663333893, "learning_rate": 1.5354787173106428e-05, "loss": 0.4269, "step": 30258 }, { "epoch": 0.6417467285953639, "grad_norm": 0.37851035594940186, "learning_rate": 1.5354505517078917e-05, "loss": 0.5563, "step": 30259 }, { "epoch": 0.6417679370532968, "grad_norm": 0.3620351254940033, "learning_rate": 1.5354223855096168e-05, "loss": 0.5272, "step": 30260 }, { "epoch": 0.6417891455112299, "grad_norm": 0.34354159235954285, "learning_rate": 1.5353942187158496e-05, "loss": 0.4668, "step": 30261 }, { "epoch": 0.6418103539691629, "grad_norm": 0.4207257330417633, "learning_rate": 1.535366051326622e-05, "loss": 0.5374, "step": 30262 }, { "epoch": 0.6418315624270959, "grad_norm": 0.34689536690711975, "learning_rate": 1.5353378833419645e-05, "loss": 0.5087, "step": 30263 }, { "epoch": 0.6418527708850289, "grad_norm": 0.33529868721961975, "learning_rate": 1.535309714761909e-05, "loss": 0.4902, "step": 30264 }, { "epoch": 0.641873979342962, "grad_norm": 0.3628993630409241, "learning_rate": 1.535281545586487e-05, "loss": 0.5043, "step": 30265 }, { "epoch": 0.641895187800895, "grad_norm": 0.3602323532104492, "learning_rate": 1.535253375815729e-05, "loss": 0.4895, "step": 30266 }, { "epoch": 0.641916396258828, "grad_norm": 0.33749860525131226, "learning_rate": 1.535225205449667e-05, "loss": 0.4779, "step": 30267 }, { "epoch": 0.6419376047167611, "grad_norm": 0.3778665065765381, "learning_rate": 1.5351970344883325e-05, "loss": 0.4916, "step": 30268 }, { "epoch": 0.641958813174694, "grad_norm": 0.35318100452423096, "learning_rate": 1.535168862931756e-05, "loss": 0.4323, "step": 30269 }, { "epoch": 0.6419800216326271, "grad_norm": 0.33153390884399414, "learning_rate": 1.53514069077997e-05, "loss": 0.4593, "step": 30270 }, { "epoch": 0.6420012300905601, "grad_norm": 0.4028245806694031, "learning_rate": 1.5351125180330043e-05, "loss": 0.5189, "step": 30271 }, { "epoch": 0.6420224385484932, "grad_norm": 0.36948323249816895, "learning_rate": 1.5350843446908918e-05, "loss": 0.5791, "step": 30272 }, { "epoch": 0.6420436470064261, "grad_norm": 0.4147200584411621, "learning_rate": 1.5350561707536634e-05, "loss": 0.4856, "step": 30273 }, { "epoch": 0.6420648554643592, "grad_norm": 0.3590599000453949, "learning_rate": 1.5350279962213494e-05, "loss": 0.5448, "step": 30274 }, { "epoch": 0.6420860639222922, "grad_norm": 0.4288710057735443, "learning_rate": 1.534999821093982e-05, "loss": 0.4974, "step": 30275 }, { "epoch": 0.6421072723802252, "grad_norm": 0.6631233096122742, "learning_rate": 1.534971645371593e-05, "loss": 0.5638, "step": 30276 }, { "epoch": 0.6421284808381582, "grad_norm": 0.33459463715553284, "learning_rate": 1.5349434690542125e-05, "loss": 0.4702, "step": 30277 }, { "epoch": 0.6421496892960913, "grad_norm": 0.3878840506076813, "learning_rate": 1.5349152921418734e-05, "loss": 0.5014, "step": 30278 }, { "epoch": 0.6421708977540243, "grad_norm": 0.33037319779396057, "learning_rate": 1.5348871146346055e-05, "loss": 0.4398, "step": 30279 }, { "epoch": 0.6421921062119573, "grad_norm": 0.3615313768386841, "learning_rate": 1.5348589365324406e-05, "loss": 0.5231, "step": 30280 }, { "epoch": 0.6422133146698904, "grad_norm": 0.4211663603782654, "learning_rate": 1.534830757835411e-05, "loss": 0.4851, "step": 30281 }, { "epoch": 0.6422345231278234, "grad_norm": 0.7524260878562927, "learning_rate": 1.534802578543547e-05, "loss": 0.5064, "step": 30282 }, { "epoch": 0.6422557315857564, "grad_norm": 0.3770862817764282, "learning_rate": 1.5347743986568798e-05, "loss": 0.4509, "step": 30283 }, { "epoch": 0.6422769400436894, "grad_norm": 0.4102141857147217, "learning_rate": 1.5347462181754417e-05, "loss": 0.4843, "step": 30284 }, { "epoch": 0.6422981485016225, "grad_norm": 0.5646629929542542, "learning_rate": 1.5347180370992628e-05, "loss": 0.4573, "step": 30285 }, { "epoch": 0.6423193569595554, "grad_norm": 0.6016848683357239, "learning_rate": 1.534689855428376e-05, "loss": 0.5209, "step": 30286 }, { "epoch": 0.6423405654174885, "grad_norm": 0.3573594391345978, "learning_rate": 1.534661673162811e-05, "loss": 0.4699, "step": 30287 }, { "epoch": 0.6423617738754215, "grad_norm": 0.37949028611183167, "learning_rate": 1.5346334903026005e-05, "loss": 0.4984, "step": 30288 }, { "epoch": 0.6423829823333546, "grad_norm": 0.3602997958660126, "learning_rate": 1.534605306847775e-05, "loss": 0.4759, "step": 30289 }, { "epoch": 0.6424041907912875, "grad_norm": 0.37243562936782837, "learning_rate": 1.5345771227983663e-05, "loss": 0.5095, "step": 30290 }, { "epoch": 0.6424253992492206, "grad_norm": 0.3837091028690338, "learning_rate": 1.5345489381544054e-05, "loss": 0.5394, "step": 30291 }, { "epoch": 0.6424466077071536, "grad_norm": 0.30769819021224976, "learning_rate": 1.534520752915924e-05, "loss": 0.4185, "step": 30292 }, { "epoch": 0.6424678161650866, "grad_norm": 0.3295309245586395, "learning_rate": 1.534492567082953e-05, "loss": 0.4329, "step": 30293 }, { "epoch": 0.6424890246230197, "grad_norm": 0.37776151299476624, "learning_rate": 1.5344643806555238e-05, "loss": 0.569, "step": 30294 }, { "epoch": 0.6425102330809527, "grad_norm": 0.34332743287086487, "learning_rate": 1.5344361936336686e-05, "loss": 0.5386, "step": 30295 }, { "epoch": 0.6425314415388858, "grad_norm": 0.37039533257484436, "learning_rate": 1.534408006017418e-05, "loss": 0.4957, "step": 30296 }, { "epoch": 0.6425526499968187, "grad_norm": 0.37722933292388916, "learning_rate": 1.534379817806803e-05, "loss": 0.4084, "step": 30297 }, { "epoch": 0.6425738584547518, "grad_norm": 0.46977540850639343, "learning_rate": 1.534351629001856e-05, "loss": 0.5433, "step": 30298 }, { "epoch": 0.6425950669126848, "grad_norm": 0.3887590765953064, "learning_rate": 1.534323439602607e-05, "loss": 0.4647, "step": 30299 }, { "epoch": 0.6426162753706178, "grad_norm": 0.445889413356781, "learning_rate": 1.5342952496090887e-05, "loss": 0.5195, "step": 30300 }, { "epoch": 0.6426374838285508, "grad_norm": 0.29528167843818665, "learning_rate": 1.534267059021332e-05, "loss": 0.3928, "step": 30301 }, { "epoch": 0.6426586922864839, "grad_norm": 0.36015579104423523, "learning_rate": 1.5342388678393673e-05, "loss": 0.5168, "step": 30302 }, { "epoch": 0.6426799007444168, "grad_norm": 0.3541562855243683, "learning_rate": 1.5342106760632276e-05, "loss": 0.4486, "step": 30303 }, { "epoch": 0.6427011092023499, "grad_norm": 0.38824567198753357, "learning_rate": 1.534182483692943e-05, "loss": 0.5501, "step": 30304 }, { "epoch": 0.6427223176602829, "grad_norm": 0.3614521026611328, "learning_rate": 1.5341542907285454e-05, "loss": 0.4981, "step": 30305 }, { "epoch": 0.642743526118216, "grad_norm": 0.30960971117019653, "learning_rate": 1.534126097170066e-05, "loss": 0.5418, "step": 30306 }, { "epoch": 0.642764734576149, "grad_norm": 0.3226318657398224, "learning_rate": 1.5340979030175366e-05, "loss": 0.4224, "step": 30307 }, { "epoch": 0.642785943034082, "grad_norm": 0.4325827360153198, "learning_rate": 1.5340697082709876e-05, "loss": 0.5678, "step": 30308 }, { "epoch": 0.6428071514920151, "grad_norm": 0.35971421003341675, "learning_rate": 1.5340415129304513e-05, "loss": 0.5355, "step": 30309 }, { "epoch": 0.642828359949948, "grad_norm": 0.35024523735046387, "learning_rate": 1.5340133169959583e-05, "loss": 0.5087, "step": 30310 }, { "epoch": 0.6428495684078811, "grad_norm": 0.36276775598526, "learning_rate": 1.5339851204675403e-05, "loss": 0.4689, "step": 30311 }, { "epoch": 0.6428707768658141, "grad_norm": 0.4535880982875824, "learning_rate": 1.5339569233452292e-05, "loss": 0.4688, "step": 30312 }, { "epoch": 0.6428919853237471, "grad_norm": 0.38695573806762695, "learning_rate": 1.5339287256290555e-05, "loss": 0.518, "step": 30313 }, { "epoch": 0.6429131937816801, "grad_norm": 0.34212350845336914, "learning_rate": 1.5339005273190508e-05, "loss": 0.4601, "step": 30314 }, { "epoch": 0.6429344022396132, "grad_norm": 0.4662347137928009, "learning_rate": 1.5338723284152468e-05, "loss": 0.5273, "step": 30315 }, { "epoch": 0.6429556106975461, "grad_norm": 0.3373240828514099, "learning_rate": 1.5338441289176748e-05, "loss": 0.5103, "step": 30316 }, { "epoch": 0.6429768191554792, "grad_norm": 0.39050355553627014, "learning_rate": 1.5338159288263658e-05, "loss": 0.5639, "step": 30317 }, { "epoch": 0.6429980276134122, "grad_norm": 0.33453261852264404, "learning_rate": 1.533787728141351e-05, "loss": 0.4902, "step": 30318 }, { "epoch": 0.6430192360713453, "grad_norm": 0.3436358869075775, "learning_rate": 1.5337595268626623e-05, "loss": 0.5212, "step": 30319 }, { "epoch": 0.6430404445292782, "grad_norm": 0.3641357123851776, "learning_rate": 1.533731324990331e-05, "loss": 0.5479, "step": 30320 }, { "epoch": 0.6430616529872113, "grad_norm": 0.512009859085083, "learning_rate": 1.5337031225243886e-05, "loss": 0.5669, "step": 30321 }, { "epoch": 0.6430828614451444, "grad_norm": 0.33146005868911743, "learning_rate": 1.533674919464866e-05, "loss": 0.4482, "step": 30322 }, { "epoch": 0.6431040699030773, "grad_norm": 0.3473154902458191, "learning_rate": 1.533646715811795e-05, "loss": 0.4855, "step": 30323 }, { "epoch": 0.6431252783610104, "grad_norm": 0.5159755945205688, "learning_rate": 1.5336185115652066e-05, "loss": 0.4691, "step": 30324 }, { "epoch": 0.6431464868189434, "grad_norm": 0.3480687439441681, "learning_rate": 1.533590306725132e-05, "loss": 0.5047, "step": 30325 }, { "epoch": 0.6431676952768764, "grad_norm": 0.35293322801589966, "learning_rate": 1.5335621012916033e-05, "loss": 0.523, "step": 30326 }, { "epoch": 0.6431889037348094, "grad_norm": 0.33761247992515564, "learning_rate": 1.5335338952646513e-05, "loss": 0.5271, "step": 30327 }, { "epoch": 0.6432101121927425, "grad_norm": 0.35293278098106384, "learning_rate": 1.5335056886443077e-05, "loss": 0.5673, "step": 30328 }, { "epoch": 0.6432313206506755, "grad_norm": 0.38764408230781555, "learning_rate": 1.5334774814306037e-05, "loss": 0.5097, "step": 30329 }, { "epoch": 0.6432525291086085, "grad_norm": 0.34089210629463196, "learning_rate": 1.5334492736235703e-05, "loss": 0.4427, "step": 30330 }, { "epoch": 0.6432737375665415, "grad_norm": 0.32946982979774475, "learning_rate": 1.53342106522324e-05, "loss": 0.5305, "step": 30331 }, { "epoch": 0.6432949460244746, "grad_norm": 0.43551045656204224, "learning_rate": 1.5333928562296424e-05, "loss": 0.455, "step": 30332 }, { "epoch": 0.6433161544824075, "grad_norm": 0.3447730839252472, "learning_rate": 1.5333646466428106e-05, "loss": 0.5654, "step": 30333 }, { "epoch": 0.6433373629403406, "grad_norm": 0.33964887261390686, "learning_rate": 1.533336436462775e-05, "loss": 0.4215, "step": 30334 }, { "epoch": 0.6433585713982737, "grad_norm": 0.3307217061519623, "learning_rate": 1.533308225689568e-05, "loss": 0.4818, "step": 30335 }, { "epoch": 0.6433797798562066, "grad_norm": 0.37558630108833313, "learning_rate": 1.533280014323219e-05, "loss": 0.5475, "step": 30336 }, { "epoch": 0.6434009883141397, "grad_norm": 0.36838722229003906, "learning_rate": 1.5332518023637615e-05, "loss": 0.5607, "step": 30337 }, { "epoch": 0.6434221967720727, "grad_norm": 0.3566799759864807, "learning_rate": 1.5332235898112257e-05, "loss": 0.4995, "step": 30338 }, { "epoch": 0.6434434052300058, "grad_norm": 0.3332654535770416, "learning_rate": 1.5331953766656432e-05, "loss": 0.461, "step": 30339 }, { "epoch": 0.6434646136879387, "grad_norm": 0.3467015027999878, "learning_rate": 1.5331671629270455e-05, "loss": 0.501, "step": 30340 }, { "epoch": 0.6434858221458718, "grad_norm": 0.41637253761291504, "learning_rate": 1.5331389485954637e-05, "loss": 0.5516, "step": 30341 }, { "epoch": 0.6435070306038048, "grad_norm": 0.37357670068740845, "learning_rate": 1.5331107336709297e-05, "loss": 0.476, "step": 30342 }, { "epoch": 0.6435282390617378, "grad_norm": 0.36704057455062866, "learning_rate": 1.5330825181534747e-05, "loss": 0.5641, "step": 30343 }, { "epoch": 0.6435494475196708, "grad_norm": 0.5010713338851929, "learning_rate": 1.5330543020431295e-05, "loss": 0.4691, "step": 30344 }, { "epoch": 0.6435706559776039, "grad_norm": 0.37372592091560364, "learning_rate": 1.533026085339926e-05, "loss": 0.4951, "step": 30345 }, { "epoch": 0.6435918644355368, "grad_norm": 0.32391345500946045, "learning_rate": 1.5329978680438958e-05, "loss": 0.4996, "step": 30346 }, { "epoch": 0.6436130728934699, "grad_norm": 0.34898605942726135, "learning_rate": 1.5329696501550697e-05, "loss": 0.5143, "step": 30347 }, { "epoch": 0.643634281351403, "grad_norm": 0.34443992376327515, "learning_rate": 1.5329414316734795e-05, "loss": 0.5022, "step": 30348 }, { "epoch": 0.643655489809336, "grad_norm": 0.35961416363716125, "learning_rate": 1.532913212599157e-05, "loss": 0.5799, "step": 30349 }, { "epoch": 0.643676698267269, "grad_norm": 0.3701961636543274, "learning_rate": 1.5328849929321323e-05, "loss": 0.4928, "step": 30350 }, { "epoch": 0.643697906725202, "grad_norm": 0.3555505573749542, "learning_rate": 1.5328567726724376e-05, "loss": 0.5096, "step": 30351 }, { "epoch": 0.6437191151831351, "grad_norm": 0.39854422211647034, "learning_rate": 1.5328285518201048e-05, "loss": 0.5066, "step": 30352 }, { "epoch": 0.643740323641068, "grad_norm": 0.34831860661506653, "learning_rate": 1.532800330375164e-05, "loss": 0.4633, "step": 30353 }, { "epoch": 0.6437615320990011, "grad_norm": 0.36146071553230286, "learning_rate": 1.532772108337648e-05, "loss": 0.5046, "step": 30354 }, { "epoch": 0.6437827405569341, "grad_norm": 0.414913535118103, "learning_rate": 1.532743885707587e-05, "loss": 0.5345, "step": 30355 }, { "epoch": 0.6438039490148671, "grad_norm": 0.35339564085006714, "learning_rate": 1.5327156624850128e-05, "loss": 0.5642, "step": 30356 }, { "epoch": 0.6438251574728001, "grad_norm": 0.4153251051902771, "learning_rate": 1.532687438669957e-05, "loss": 0.5182, "step": 30357 }, { "epoch": 0.6438463659307332, "grad_norm": 0.3538496196269989, "learning_rate": 1.5326592142624514e-05, "loss": 0.5052, "step": 30358 }, { "epoch": 0.6438675743886662, "grad_norm": 0.323670357465744, "learning_rate": 1.532630989262526e-05, "loss": 0.5236, "step": 30359 }, { "epoch": 0.6438887828465992, "grad_norm": 0.4460761845111847, "learning_rate": 1.5326027636702136e-05, "loss": 0.4295, "step": 30360 }, { "epoch": 0.6439099913045322, "grad_norm": 0.3475967049598694, "learning_rate": 1.532574537485545e-05, "loss": 0.5992, "step": 30361 }, { "epoch": 0.6439311997624653, "grad_norm": 0.36838507652282715, "learning_rate": 1.5325463107085513e-05, "loss": 0.4942, "step": 30362 }, { "epoch": 0.6439524082203983, "grad_norm": 0.33658480644226074, "learning_rate": 1.5325180833392646e-05, "loss": 0.44, "step": 30363 }, { "epoch": 0.6439736166783313, "grad_norm": 0.34972935914993286, "learning_rate": 1.5324898553777155e-05, "loss": 0.52, "step": 30364 }, { "epoch": 0.6439948251362644, "grad_norm": 0.38568323850631714, "learning_rate": 1.5324616268239362e-05, "loss": 0.5844, "step": 30365 }, { "epoch": 0.6440160335941973, "grad_norm": 0.36404576897621155, "learning_rate": 1.5324333976779576e-05, "loss": 0.4743, "step": 30366 }, { "epoch": 0.6440372420521304, "grad_norm": 0.35164719820022583, "learning_rate": 1.5324051679398107e-05, "loss": 0.5846, "step": 30367 }, { "epoch": 0.6440584505100634, "grad_norm": 0.37040141224861145, "learning_rate": 1.5323769376095282e-05, "loss": 0.5094, "step": 30368 }, { "epoch": 0.6440796589679965, "grad_norm": 0.33251118659973145, "learning_rate": 1.5323487066871402e-05, "loss": 0.4648, "step": 30369 }, { "epoch": 0.6441008674259294, "grad_norm": 0.3801991641521454, "learning_rate": 1.5323204751726786e-05, "loss": 0.5434, "step": 30370 }, { "epoch": 0.6441220758838625, "grad_norm": 0.3383733332157135, "learning_rate": 1.5322922430661748e-05, "loss": 0.522, "step": 30371 }, { "epoch": 0.6441432843417955, "grad_norm": 0.4115862548351288, "learning_rate": 1.5322640103676606e-05, "loss": 0.4596, "step": 30372 }, { "epoch": 0.6441644927997285, "grad_norm": 0.41308051347732544, "learning_rate": 1.5322357770771667e-05, "loss": 0.5734, "step": 30373 }, { "epoch": 0.6441857012576615, "grad_norm": 0.32251521944999695, "learning_rate": 1.532207543194725e-05, "loss": 0.4868, "step": 30374 }, { "epoch": 0.6442069097155946, "grad_norm": 0.3303808271884918, "learning_rate": 1.5321793087203662e-05, "loss": 0.5202, "step": 30375 }, { "epoch": 0.6442281181735277, "grad_norm": 0.45616066455841064, "learning_rate": 1.5321510736541226e-05, "loss": 0.5153, "step": 30376 }, { "epoch": 0.6442493266314606, "grad_norm": 0.49073269963264465, "learning_rate": 1.532122837996025e-05, "loss": 0.4901, "step": 30377 }, { "epoch": 0.6442705350893937, "grad_norm": 0.38249868154525757, "learning_rate": 1.5320946017461052e-05, "loss": 0.4819, "step": 30378 }, { "epoch": 0.6442917435473267, "grad_norm": 0.4946385324001312, "learning_rate": 1.5320663649043942e-05, "loss": 0.3853, "step": 30379 }, { "epoch": 0.6443129520052597, "grad_norm": 0.3812773823738098, "learning_rate": 1.532038127470924e-05, "loss": 0.4204, "step": 30380 }, { "epoch": 0.6443341604631927, "grad_norm": 0.35547298192977905, "learning_rate": 1.532009889445725e-05, "loss": 0.4942, "step": 30381 }, { "epoch": 0.6443553689211258, "grad_norm": 0.32569050788879395, "learning_rate": 1.5319816508288296e-05, "loss": 0.4611, "step": 30382 }, { "epoch": 0.6443765773790587, "grad_norm": 0.3596442937850952, "learning_rate": 1.531953411620269e-05, "loss": 0.4796, "step": 30383 }, { "epoch": 0.6443977858369918, "grad_norm": 0.4876432418823242, "learning_rate": 1.5319251718200737e-05, "loss": 0.5153, "step": 30384 }, { "epoch": 0.6444189942949248, "grad_norm": 0.4254705309867859, "learning_rate": 1.5318969314282765e-05, "loss": 0.4481, "step": 30385 }, { "epoch": 0.6444402027528578, "grad_norm": 0.38486742973327637, "learning_rate": 1.531868690444908e-05, "loss": 0.4729, "step": 30386 }, { "epoch": 0.6444614112107908, "grad_norm": 0.368645578622818, "learning_rate": 1.53184044887e-05, "loss": 0.5157, "step": 30387 }, { "epoch": 0.6444826196687239, "grad_norm": 0.3256501257419586, "learning_rate": 1.5318122067035832e-05, "loss": 0.4849, "step": 30388 }, { "epoch": 0.644503828126657, "grad_norm": 0.3807411789894104, "learning_rate": 1.5317839639456896e-05, "loss": 0.4311, "step": 30389 }, { "epoch": 0.6445250365845899, "grad_norm": 0.3678291440010071, "learning_rate": 1.5317557205963508e-05, "loss": 0.5008, "step": 30390 }, { "epoch": 0.644546245042523, "grad_norm": 0.3919326961040497, "learning_rate": 1.5317274766555976e-05, "loss": 0.4503, "step": 30391 }, { "epoch": 0.644567453500456, "grad_norm": 0.38877391815185547, "learning_rate": 1.531699232123462e-05, "loss": 0.5103, "step": 30392 }, { "epoch": 0.644588661958389, "grad_norm": 0.3440861701965332, "learning_rate": 1.5316709869999747e-05, "loss": 0.4457, "step": 30393 }, { "epoch": 0.644609870416322, "grad_norm": 0.33723968267440796, "learning_rate": 1.5316427412851684e-05, "loss": 0.4407, "step": 30394 }, { "epoch": 0.6446310788742551, "grad_norm": 0.3526858687400818, "learning_rate": 1.5316144949790727e-05, "loss": 0.4919, "step": 30395 }, { "epoch": 0.644652287332188, "grad_norm": 0.3422958552837372, "learning_rate": 1.5315862480817202e-05, "loss": 0.4594, "step": 30396 }, { "epoch": 0.6446734957901211, "grad_norm": 0.3411049544811249, "learning_rate": 1.5315580005931424e-05, "loss": 0.5105, "step": 30397 }, { "epoch": 0.6446947042480541, "grad_norm": 0.5800548195838928, "learning_rate": 1.5315297525133702e-05, "loss": 0.4864, "step": 30398 }, { "epoch": 0.6447159127059872, "grad_norm": 0.3334285616874695, "learning_rate": 1.5315015038424355e-05, "loss": 0.4878, "step": 30399 }, { "epoch": 0.6447371211639201, "grad_norm": 0.4069460928440094, "learning_rate": 1.5314732545803693e-05, "loss": 0.525, "step": 30400 }, { "epoch": 0.6447583296218532, "grad_norm": 0.4778851568698883, "learning_rate": 1.5314450047272028e-05, "loss": 0.4164, "step": 30401 }, { "epoch": 0.6447795380797863, "grad_norm": 0.37672001123428345, "learning_rate": 1.531416754282968e-05, "loss": 0.5048, "step": 30402 }, { "epoch": 0.6448007465377192, "grad_norm": 0.35413965582847595, "learning_rate": 1.5313885032476963e-05, "loss": 0.4435, "step": 30403 }, { "epoch": 0.6448219549956523, "grad_norm": 0.3121100664138794, "learning_rate": 1.5313602516214188e-05, "loss": 0.4449, "step": 30404 }, { "epoch": 0.6448431634535853, "grad_norm": 0.34171879291534424, "learning_rate": 1.531331999404167e-05, "loss": 0.5279, "step": 30405 }, { "epoch": 0.6448643719115184, "grad_norm": 0.3705638647079468, "learning_rate": 1.5313037465959724e-05, "loss": 0.4923, "step": 30406 }, { "epoch": 0.6448855803694513, "grad_norm": 0.3768240809440613, "learning_rate": 1.5312754931968665e-05, "loss": 0.4715, "step": 30407 }, { "epoch": 0.6449067888273844, "grad_norm": 0.3522987961769104, "learning_rate": 1.5312472392068806e-05, "loss": 0.3823, "step": 30408 }, { "epoch": 0.6449279972853174, "grad_norm": 0.3747108578681946, "learning_rate": 1.5312189846260462e-05, "loss": 0.4917, "step": 30409 }, { "epoch": 0.6449492057432504, "grad_norm": 0.3391623795032501, "learning_rate": 1.531190729454394e-05, "loss": 0.4653, "step": 30410 }, { "epoch": 0.6449704142011834, "grad_norm": 0.3492542803287506, "learning_rate": 1.531162473691957e-05, "loss": 0.5208, "step": 30411 }, { "epoch": 0.6449916226591165, "grad_norm": 0.4588703513145447, "learning_rate": 1.531134217338765e-05, "loss": 0.5186, "step": 30412 }, { "epoch": 0.6450128311170494, "grad_norm": 0.32686281204223633, "learning_rate": 1.5311059603948506e-05, "loss": 0.5053, "step": 30413 }, { "epoch": 0.6450340395749825, "grad_norm": 0.364322304725647, "learning_rate": 1.531077702860245e-05, "loss": 0.4363, "step": 30414 }, { "epoch": 0.6450552480329155, "grad_norm": 0.3485833704471588, "learning_rate": 1.5310494447349787e-05, "loss": 0.5026, "step": 30415 }, { "epoch": 0.6450764564908485, "grad_norm": 0.3702712059020996, "learning_rate": 1.531021186019084e-05, "loss": 0.5787, "step": 30416 }, { "epoch": 0.6450976649487816, "grad_norm": 0.3307729661464691, "learning_rate": 1.5309929267125924e-05, "loss": 0.5312, "step": 30417 }, { "epoch": 0.6451188734067146, "grad_norm": 0.3941405117511749, "learning_rate": 1.530964666815535e-05, "loss": 0.4373, "step": 30418 }, { "epoch": 0.6451400818646477, "grad_norm": 0.32860636711120605, "learning_rate": 1.530936406327943e-05, "loss": 0.4796, "step": 30419 }, { "epoch": 0.6451612903225806, "grad_norm": 0.36998265981674194, "learning_rate": 1.5309081452498485e-05, "loss": 0.5439, "step": 30420 }, { "epoch": 0.6451824987805137, "grad_norm": 0.3877725303173065, "learning_rate": 1.5308798835812823e-05, "loss": 0.4943, "step": 30421 }, { "epoch": 0.6452037072384467, "grad_norm": 0.4335614740848541, "learning_rate": 1.5308516213222765e-05, "loss": 0.5162, "step": 30422 }, { "epoch": 0.6452249156963797, "grad_norm": 0.38903728127479553, "learning_rate": 1.530823358472862e-05, "loss": 0.4688, "step": 30423 }, { "epoch": 0.6452461241543127, "grad_norm": 0.36076104640960693, "learning_rate": 1.5307950950330702e-05, "loss": 0.5066, "step": 30424 }, { "epoch": 0.6452673326122458, "grad_norm": 0.3435552418231964, "learning_rate": 1.5307668310029327e-05, "loss": 0.449, "step": 30425 }, { "epoch": 0.6452885410701787, "grad_norm": 0.41000550985336304, "learning_rate": 1.530738566382481e-05, "loss": 0.4652, "step": 30426 }, { "epoch": 0.6453097495281118, "grad_norm": 0.4617190957069397, "learning_rate": 1.5307103011717467e-05, "loss": 0.5292, "step": 30427 }, { "epoch": 0.6453309579860448, "grad_norm": 0.3518429696559906, "learning_rate": 1.5306820353707608e-05, "loss": 0.4776, "step": 30428 }, { "epoch": 0.6453521664439779, "grad_norm": 0.329947292804718, "learning_rate": 1.530653768979555e-05, "loss": 0.5168, "step": 30429 }, { "epoch": 0.6453733749019109, "grad_norm": 1.383368968963623, "learning_rate": 1.5306255019981605e-05, "loss": 0.4953, "step": 30430 }, { "epoch": 0.6453945833598439, "grad_norm": 0.3169163763523102, "learning_rate": 1.5305972344266095e-05, "loss": 0.5321, "step": 30431 }, { "epoch": 0.645415791817777, "grad_norm": 0.3690740764141083, "learning_rate": 1.5305689662649323e-05, "loss": 0.4815, "step": 30432 }, { "epoch": 0.6454370002757099, "grad_norm": 0.34015560150146484, "learning_rate": 1.530540697513161e-05, "loss": 0.432, "step": 30433 }, { "epoch": 0.645458208733643, "grad_norm": 0.4105713963508606, "learning_rate": 1.5305124281713267e-05, "loss": 0.6138, "step": 30434 }, { "epoch": 0.645479417191576, "grad_norm": 0.390597939491272, "learning_rate": 1.5304841582394615e-05, "loss": 0.4987, "step": 30435 }, { "epoch": 0.645500625649509, "grad_norm": 0.3632814288139343, "learning_rate": 1.5304558877175964e-05, "loss": 0.4555, "step": 30436 }, { "epoch": 0.645521834107442, "grad_norm": 0.3475256860256195, "learning_rate": 1.530427616605763e-05, "loss": 0.486, "step": 30437 }, { "epoch": 0.6455430425653751, "grad_norm": 0.3887900114059448, "learning_rate": 1.5303993449039923e-05, "loss": 0.477, "step": 30438 }, { "epoch": 0.6455642510233081, "grad_norm": 0.3846421241760254, "learning_rate": 1.5303710726123163e-05, "loss": 0.5241, "step": 30439 }, { "epoch": 0.6455854594812411, "grad_norm": 0.3856406807899475, "learning_rate": 1.5303427997307658e-05, "loss": 0.5056, "step": 30440 }, { "epoch": 0.6456066679391741, "grad_norm": 0.33372005820274353, "learning_rate": 1.5303145262593728e-05, "loss": 0.4458, "step": 30441 }, { "epoch": 0.6456278763971072, "grad_norm": 0.3667372763156891, "learning_rate": 1.530286252198169e-05, "loss": 0.544, "step": 30442 }, { "epoch": 0.6456490848550402, "grad_norm": 0.36454176902770996, "learning_rate": 1.5302579775471847e-05, "loss": 0.4654, "step": 30443 }, { "epoch": 0.6456702933129732, "grad_norm": 0.32324326038360596, "learning_rate": 1.530229702306453e-05, "loss": 0.3733, "step": 30444 }, { "epoch": 0.6456915017709063, "grad_norm": 0.32922667264938354, "learning_rate": 1.5302014264760038e-05, "loss": 0.4047, "step": 30445 }, { "epoch": 0.6457127102288392, "grad_norm": 0.378246545791626, "learning_rate": 1.5301731500558692e-05, "loss": 0.5322, "step": 30446 }, { "epoch": 0.6457339186867723, "grad_norm": 0.3991612195968628, "learning_rate": 1.5301448730460807e-05, "loss": 0.554, "step": 30447 }, { "epoch": 0.6457551271447053, "grad_norm": 0.37363389134407043, "learning_rate": 1.53011659544667e-05, "loss": 0.5277, "step": 30448 }, { "epoch": 0.6457763356026384, "grad_norm": 0.31607210636138916, "learning_rate": 1.530088317257668e-05, "loss": 0.3806, "step": 30449 }, { "epoch": 0.6457975440605713, "grad_norm": 0.36273735761642456, "learning_rate": 1.5300600384791063e-05, "loss": 0.5596, "step": 30450 }, { "epoch": 0.6458187525185044, "grad_norm": 0.31734132766723633, "learning_rate": 1.5300317591110165e-05, "loss": 0.4673, "step": 30451 }, { "epoch": 0.6458399609764374, "grad_norm": 0.33408430218696594, "learning_rate": 1.5300034791534297e-05, "loss": 0.486, "step": 30452 }, { "epoch": 0.6458611694343704, "grad_norm": 0.34478217363357544, "learning_rate": 1.529975198606378e-05, "loss": 0.5385, "step": 30453 }, { "epoch": 0.6458823778923034, "grad_norm": 0.35614269971847534, "learning_rate": 1.5299469174698926e-05, "loss": 0.5038, "step": 30454 }, { "epoch": 0.6459035863502365, "grad_norm": 0.3666234612464905, "learning_rate": 1.5299186357440042e-05, "loss": 0.517, "step": 30455 }, { "epoch": 0.6459247948081694, "grad_norm": 0.4438879191875458, "learning_rate": 1.5298903534287456e-05, "loss": 0.4086, "step": 30456 }, { "epoch": 0.6459460032661025, "grad_norm": 0.36934372782707214, "learning_rate": 1.5298620705241473e-05, "loss": 0.497, "step": 30457 }, { "epoch": 0.6459672117240356, "grad_norm": 0.923417866230011, "learning_rate": 1.529833787030241e-05, "loss": 0.4225, "step": 30458 }, { "epoch": 0.6459884201819686, "grad_norm": 0.3605002164840698, "learning_rate": 1.5298055029470582e-05, "loss": 0.4564, "step": 30459 }, { "epoch": 0.6460096286399016, "grad_norm": 0.37629419565200806, "learning_rate": 1.5297772182746303e-05, "loss": 0.4831, "step": 30460 }, { "epoch": 0.6460308370978346, "grad_norm": 0.3634871542453766, "learning_rate": 1.5297489330129885e-05, "loss": 0.6111, "step": 30461 }, { "epoch": 0.6460520455557677, "grad_norm": 0.7142305970191956, "learning_rate": 1.529720647162165e-05, "loss": 0.4662, "step": 30462 }, { "epoch": 0.6460732540137006, "grad_norm": 0.37618178129196167, "learning_rate": 1.5296923607221903e-05, "loss": 0.4764, "step": 30463 }, { "epoch": 0.6460944624716337, "grad_norm": 0.33797189593315125, "learning_rate": 1.5296640736930968e-05, "loss": 0.4612, "step": 30464 }, { "epoch": 0.6461156709295667, "grad_norm": 0.3772192597389221, "learning_rate": 1.5296357860749152e-05, "loss": 0.3974, "step": 30465 }, { "epoch": 0.6461368793874998, "grad_norm": 0.35631102323532104, "learning_rate": 1.5296074978676775e-05, "loss": 0.5343, "step": 30466 }, { "epoch": 0.6461580878454327, "grad_norm": 0.7108535766601562, "learning_rate": 1.529579209071415e-05, "loss": 0.5133, "step": 30467 }, { "epoch": 0.6461792963033658, "grad_norm": 0.3430543839931488, "learning_rate": 1.5295509196861588e-05, "loss": 0.49, "step": 30468 }, { "epoch": 0.6462005047612988, "grad_norm": 0.3420533239841461, "learning_rate": 1.529522629711941e-05, "loss": 0.4696, "step": 30469 }, { "epoch": 0.6462217132192318, "grad_norm": 0.34440290927886963, "learning_rate": 1.5294943391487924e-05, "loss": 0.4769, "step": 30470 }, { "epoch": 0.6462429216771649, "grad_norm": 0.3316340148448944, "learning_rate": 1.529466047996745e-05, "loss": 0.4727, "step": 30471 }, { "epoch": 0.6462641301350979, "grad_norm": 0.3624080419540405, "learning_rate": 1.52943775625583e-05, "loss": 0.4689, "step": 30472 }, { "epoch": 0.646285338593031, "grad_norm": 0.6679341197013855, "learning_rate": 1.529409463926079e-05, "loss": 0.5227, "step": 30473 }, { "epoch": 0.6463065470509639, "grad_norm": 0.35773003101348877, "learning_rate": 1.5293811710075234e-05, "loss": 0.4384, "step": 30474 }, { "epoch": 0.646327755508897, "grad_norm": 0.3740183711051941, "learning_rate": 1.5293528775001948e-05, "loss": 0.4696, "step": 30475 }, { "epoch": 0.64634896396683, "grad_norm": 0.3566906452178955, "learning_rate": 1.529324583404124e-05, "loss": 0.5048, "step": 30476 }, { "epoch": 0.646370172424763, "grad_norm": 0.35689258575439453, "learning_rate": 1.5292962887193436e-05, "loss": 0.4582, "step": 30477 }, { "epoch": 0.646391380882696, "grad_norm": 0.38415074348449707, "learning_rate": 1.5292679934458842e-05, "loss": 0.5462, "step": 30478 }, { "epoch": 0.6464125893406291, "grad_norm": 0.35891881585121155, "learning_rate": 1.5292396975837777e-05, "loss": 0.5156, "step": 30479 }, { "epoch": 0.646433797798562, "grad_norm": 0.35620415210723877, "learning_rate": 1.529211401133055e-05, "loss": 0.4382, "step": 30480 }, { "epoch": 0.6464550062564951, "grad_norm": 0.3454108238220215, "learning_rate": 1.529183104093748e-05, "loss": 0.4182, "step": 30481 }, { "epoch": 0.6464762147144281, "grad_norm": 0.3781185746192932, "learning_rate": 1.5291548064658886e-05, "loss": 0.4586, "step": 30482 }, { "epoch": 0.6464974231723611, "grad_norm": 0.4219115674495697, "learning_rate": 1.5291265082495078e-05, "loss": 0.4493, "step": 30483 }, { "epoch": 0.6465186316302942, "grad_norm": 0.3315456807613373, "learning_rate": 1.5290982094446366e-05, "loss": 0.5328, "step": 30484 }, { "epoch": 0.6465398400882272, "grad_norm": 0.3487948477268219, "learning_rate": 1.5290699100513073e-05, "loss": 0.4914, "step": 30485 }, { "epoch": 0.6465610485461603, "grad_norm": 0.6036776304244995, "learning_rate": 1.529041610069551e-05, "loss": 0.484, "step": 30486 }, { "epoch": 0.6465822570040932, "grad_norm": 0.38368144631385803, "learning_rate": 1.5290133094993994e-05, "loss": 0.59, "step": 30487 }, { "epoch": 0.6466034654620263, "grad_norm": 0.34923431277275085, "learning_rate": 1.5289850083408836e-05, "loss": 0.4757, "step": 30488 }, { "epoch": 0.6466246739199593, "grad_norm": 0.35545000433921814, "learning_rate": 1.5289567065940353e-05, "loss": 0.4276, "step": 30489 }, { "epoch": 0.6466458823778923, "grad_norm": 0.35034310817718506, "learning_rate": 1.528928404258886e-05, "loss": 0.4864, "step": 30490 }, { "epoch": 0.6466670908358253, "grad_norm": 0.3510904610157013, "learning_rate": 1.528900101335467e-05, "loss": 0.4283, "step": 30491 }, { "epoch": 0.6466882992937584, "grad_norm": 0.29832199215888977, "learning_rate": 1.5288717978238098e-05, "loss": 0.5179, "step": 30492 }, { "epoch": 0.6467095077516913, "grad_norm": 0.3509616553783417, "learning_rate": 1.528843493723946e-05, "loss": 0.5028, "step": 30493 }, { "epoch": 0.6467307162096244, "grad_norm": 0.3867117762565613, "learning_rate": 1.5288151890359072e-05, "loss": 0.4808, "step": 30494 }, { "epoch": 0.6467519246675574, "grad_norm": 0.3831724226474762, "learning_rate": 1.528786883759725e-05, "loss": 0.5318, "step": 30495 }, { "epoch": 0.6467731331254905, "grad_norm": 0.38766399025917053, "learning_rate": 1.5287585778954302e-05, "loss": 0.4242, "step": 30496 }, { "epoch": 0.6467943415834234, "grad_norm": 0.3444266617298126, "learning_rate": 1.5287302714430546e-05, "loss": 0.5699, "step": 30497 }, { "epoch": 0.6468155500413565, "grad_norm": 0.33488258719444275, "learning_rate": 1.5287019644026302e-05, "loss": 0.4849, "step": 30498 }, { "epoch": 0.6468367584992896, "grad_norm": 0.39245572686195374, "learning_rate": 1.528673656774188e-05, "loss": 0.5086, "step": 30499 }, { "epoch": 0.6468579669572225, "grad_norm": 0.3556677997112274, "learning_rate": 1.5286453485577593e-05, "loss": 0.521, "step": 30500 }, { "epoch": 0.6468791754151556, "grad_norm": 0.41965416073799133, "learning_rate": 1.5286170397533762e-05, "loss": 0.5178, "step": 30501 }, { "epoch": 0.6469003838730886, "grad_norm": 0.36428767442703247, "learning_rate": 1.5285887303610696e-05, "loss": 0.4552, "step": 30502 }, { "epoch": 0.6469215923310216, "grad_norm": 0.5300382375717163, "learning_rate": 1.5285604203808713e-05, "loss": 0.4477, "step": 30503 }, { "epoch": 0.6469428007889546, "grad_norm": 0.37324169278144836, "learning_rate": 1.5285321098128125e-05, "loss": 0.5177, "step": 30504 }, { "epoch": 0.6469640092468877, "grad_norm": 0.3390149772167206, "learning_rate": 1.528503798656925e-05, "loss": 0.4933, "step": 30505 }, { "epoch": 0.6469852177048206, "grad_norm": 0.3256618082523346, "learning_rate": 1.52847548691324e-05, "loss": 0.524, "step": 30506 }, { "epoch": 0.6470064261627537, "grad_norm": 0.33848321437835693, "learning_rate": 1.5284471745817894e-05, "loss": 0.5057, "step": 30507 }, { "epoch": 0.6470276346206867, "grad_norm": 0.3213457763195038, "learning_rate": 1.5284188616626042e-05, "loss": 0.5014, "step": 30508 }, { "epoch": 0.6470488430786198, "grad_norm": 0.3719155192375183, "learning_rate": 1.5283905481557165e-05, "loss": 0.4795, "step": 30509 }, { "epoch": 0.6470700515365527, "grad_norm": 0.34415122866630554, "learning_rate": 1.5283622340611573e-05, "loss": 0.5315, "step": 30510 }, { "epoch": 0.6470912599944858, "grad_norm": 0.3633289635181427, "learning_rate": 1.5283339193789578e-05, "loss": 0.4537, "step": 30511 }, { "epoch": 0.6471124684524189, "grad_norm": 0.3396470844745636, "learning_rate": 1.5283056041091505e-05, "loss": 0.4667, "step": 30512 }, { "epoch": 0.6471336769103518, "grad_norm": 0.3586151897907257, "learning_rate": 1.528277288251766e-05, "loss": 0.489, "step": 30513 }, { "epoch": 0.6471548853682849, "grad_norm": 0.34954389929771423, "learning_rate": 1.528248971806836e-05, "loss": 0.4937, "step": 30514 }, { "epoch": 0.6471760938262179, "grad_norm": 0.35581308603286743, "learning_rate": 1.5282206547743922e-05, "loss": 0.4766, "step": 30515 }, { "epoch": 0.647197302284151, "grad_norm": 0.3528248369693756, "learning_rate": 1.5281923371544663e-05, "loss": 0.4392, "step": 30516 }, { "epoch": 0.6472185107420839, "grad_norm": 0.51265549659729, "learning_rate": 1.528164018947089e-05, "loss": 0.5106, "step": 30517 }, { "epoch": 0.647239719200017, "grad_norm": 0.36106717586517334, "learning_rate": 1.5281357001522923e-05, "loss": 0.5015, "step": 30518 }, { "epoch": 0.64726092765795, "grad_norm": 0.3396718204021454, "learning_rate": 1.528107380770108e-05, "loss": 0.4594, "step": 30519 }, { "epoch": 0.647282136115883, "grad_norm": 0.4007106423377991, "learning_rate": 1.5280790608005668e-05, "loss": 0.4448, "step": 30520 }, { "epoch": 0.647303344573816, "grad_norm": 0.34120097756385803, "learning_rate": 1.528050740243701e-05, "loss": 0.6093, "step": 30521 }, { "epoch": 0.6473245530317491, "grad_norm": 0.330107182264328, "learning_rate": 1.5280224190995415e-05, "loss": 0.4402, "step": 30522 }, { "epoch": 0.647345761489682, "grad_norm": 0.3528383672237396, "learning_rate": 1.5279940973681204e-05, "loss": 0.5389, "step": 30523 }, { "epoch": 0.6473669699476151, "grad_norm": 0.3354295492172241, "learning_rate": 1.5279657750494682e-05, "loss": 0.6037, "step": 30524 }, { "epoch": 0.6473881784055482, "grad_norm": 0.34353190660476685, "learning_rate": 1.5279374521436176e-05, "loss": 0.4882, "step": 30525 }, { "epoch": 0.6474093868634812, "grad_norm": 0.38060104846954346, "learning_rate": 1.5279091286505997e-05, "loss": 0.4766, "step": 30526 }, { "epoch": 0.6474305953214142, "grad_norm": 0.3979262411594391, "learning_rate": 1.5278808045704457e-05, "loss": 0.4821, "step": 30527 }, { "epoch": 0.6474518037793472, "grad_norm": 0.4183858633041382, "learning_rate": 1.527852479903187e-05, "loss": 0.5188, "step": 30528 }, { "epoch": 0.6474730122372803, "grad_norm": 0.3821223974227905, "learning_rate": 1.5278241546488555e-05, "loss": 0.5401, "step": 30529 }, { "epoch": 0.6474942206952132, "grad_norm": 0.3629101812839508, "learning_rate": 1.5277958288074828e-05, "loss": 0.5453, "step": 30530 }, { "epoch": 0.6475154291531463, "grad_norm": 0.4711591601371765, "learning_rate": 1.5277675023790997e-05, "loss": 0.5487, "step": 30531 }, { "epoch": 0.6475366376110793, "grad_norm": 0.37321344017982483, "learning_rate": 1.5277391753637383e-05, "loss": 0.5039, "step": 30532 }, { "epoch": 0.6475578460690123, "grad_norm": 0.37805861234664917, "learning_rate": 1.5277108477614305e-05, "loss": 0.4611, "step": 30533 }, { "epoch": 0.6475790545269453, "grad_norm": 0.37848788499832153, "learning_rate": 1.5276825195722068e-05, "loss": 0.4455, "step": 30534 }, { "epoch": 0.6476002629848784, "grad_norm": 0.38834184408187866, "learning_rate": 1.527654190796099e-05, "loss": 0.5098, "step": 30535 }, { "epoch": 0.6476214714428113, "grad_norm": 0.3591485619544983, "learning_rate": 1.527625861433139e-05, "loss": 0.4497, "step": 30536 }, { "epoch": 0.6476426799007444, "grad_norm": 0.35453087091445923, "learning_rate": 1.527597531483358e-05, "loss": 0.4225, "step": 30537 }, { "epoch": 0.6476638883586774, "grad_norm": 0.3403114676475525, "learning_rate": 1.527569200946788e-05, "loss": 0.5525, "step": 30538 }, { "epoch": 0.6476850968166105, "grad_norm": 0.38109534978866577, "learning_rate": 1.5275408698234596e-05, "loss": 0.4931, "step": 30539 }, { "epoch": 0.6477063052745435, "grad_norm": 0.33015164732933044, "learning_rate": 1.5275125381134052e-05, "loss": 0.5305, "step": 30540 }, { "epoch": 0.6477275137324765, "grad_norm": 0.3064609467983246, "learning_rate": 1.5274842058166562e-05, "loss": 0.4386, "step": 30541 }, { "epoch": 0.6477487221904096, "grad_norm": 0.34531185030937195, "learning_rate": 1.5274558729332433e-05, "loss": 0.5554, "step": 30542 }, { "epoch": 0.6477699306483425, "grad_norm": 0.34592750668525696, "learning_rate": 1.5274275394631985e-05, "loss": 0.4746, "step": 30543 }, { "epoch": 0.6477911391062756, "grad_norm": 0.322689950466156, "learning_rate": 1.5273992054065535e-05, "loss": 0.4212, "step": 30544 }, { "epoch": 0.6478123475642086, "grad_norm": 0.36569005250930786, "learning_rate": 1.5273708707633402e-05, "loss": 0.4863, "step": 30545 }, { "epoch": 0.6478335560221417, "grad_norm": 0.31703081727027893, "learning_rate": 1.527342535533589e-05, "loss": 0.445, "step": 30546 }, { "epoch": 0.6478547644800746, "grad_norm": 0.307547926902771, "learning_rate": 1.5273141997173325e-05, "loss": 0.5675, "step": 30547 }, { "epoch": 0.6478759729380077, "grad_norm": 0.32228484749794006, "learning_rate": 1.5272858633146014e-05, "loss": 0.4411, "step": 30548 }, { "epoch": 0.6478971813959407, "grad_norm": 0.34770968556404114, "learning_rate": 1.5272575263254274e-05, "loss": 0.4359, "step": 30549 }, { "epoch": 0.6479183898538737, "grad_norm": 0.40361347794532776, "learning_rate": 1.5272291887498423e-05, "loss": 0.478, "step": 30550 }, { "epoch": 0.6479395983118067, "grad_norm": 0.7245537042617798, "learning_rate": 1.5272008505878773e-05, "loss": 0.5398, "step": 30551 }, { "epoch": 0.6479608067697398, "grad_norm": 0.3784838616847992, "learning_rate": 1.5271725118395646e-05, "loss": 0.4794, "step": 30552 }, { "epoch": 0.6479820152276728, "grad_norm": 0.4129578471183777, "learning_rate": 1.5271441725049347e-05, "loss": 0.476, "step": 30553 }, { "epoch": 0.6480032236856058, "grad_norm": 0.36297446489334106, "learning_rate": 1.52711583258402e-05, "loss": 0.4367, "step": 30554 }, { "epoch": 0.6480244321435389, "grad_norm": 0.34738507866859436, "learning_rate": 1.5270874920768516e-05, "loss": 0.4217, "step": 30555 }, { "epoch": 0.6480456406014719, "grad_norm": 0.36149415373802185, "learning_rate": 1.527059150983461e-05, "loss": 0.4979, "step": 30556 }, { "epoch": 0.6480668490594049, "grad_norm": 0.3525175154209137, "learning_rate": 1.5270308093038794e-05, "loss": 0.4585, "step": 30557 }, { "epoch": 0.6480880575173379, "grad_norm": 0.3404465615749359, "learning_rate": 1.5270024670381394e-05, "loss": 0.5066, "step": 30558 }, { "epoch": 0.648109265975271, "grad_norm": 0.40511849522590637, "learning_rate": 1.5269741241862715e-05, "loss": 0.5524, "step": 30559 }, { "epoch": 0.6481304744332039, "grad_norm": 0.36358150839805603, "learning_rate": 1.5269457807483074e-05, "loss": 0.5197, "step": 30560 }, { "epoch": 0.648151682891137, "grad_norm": 0.3924750089645386, "learning_rate": 1.526917436724279e-05, "loss": 0.5918, "step": 30561 }, { "epoch": 0.64817289134907, "grad_norm": 0.3522965908050537, "learning_rate": 1.5268890921142175e-05, "loss": 0.5044, "step": 30562 }, { "epoch": 0.648194099807003, "grad_norm": 0.3573046624660492, "learning_rate": 1.5268607469181545e-05, "loss": 0.4777, "step": 30563 }, { "epoch": 0.648215308264936, "grad_norm": 0.3355099558830261, "learning_rate": 1.526832401136122e-05, "loss": 0.4714, "step": 30564 }, { "epoch": 0.6482365167228691, "grad_norm": 0.3828033208847046, "learning_rate": 1.5268040547681505e-05, "loss": 0.5158, "step": 30565 }, { "epoch": 0.6482577251808022, "grad_norm": 0.32255813479423523, "learning_rate": 1.5267757078142725e-05, "loss": 0.4765, "step": 30566 }, { "epoch": 0.6482789336387351, "grad_norm": 0.6183890104293823, "learning_rate": 1.5267473602745188e-05, "loss": 0.5756, "step": 30567 }, { "epoch": 0.6483001420966682, "grad_norm": 0.3535240590572357, "learning_rate": 1.5267190121489215e-05, "loss": 0.5205, "step": 30568 }, { "epoch": 0.6483213505546012, "grad_norm": 0.35180386900901794, "learning_rate": 1.5266906634375115e-05, "loss": 0.5286, "step": 30569 }, { "epoch": 0.6483425590125342, "grad_norm": 0.614600658416748, "learning_rate": 1.5266623141403213e-05, "loss": 0.5274, "step": 30570 }, { "epoch": 0.6483637674704672, "grad_norm": 0.28547778725624084, "learning_rate": 1.5266339642573817e-05, "loss": 0.3769, "step": 30571 }, { "epoch": 0.6483849759284003, "grad_norm": 0.4427204430103302, "learning_rate": 1.5266056137887242e-05, "loss": 0.5287, "step": 30572 }, { "epoch": 0.6484061843863332, "grad_norm": 0.3452891409397125, "learning_rate": 1.5265772627343804e-05, "loss": 0.5192, "step": 30573 }, { "epoch": 0.6484273928442663, "grad_norm": 0.4162282943725586, "learning_rate": 1.5265489110943825e-05, "loss": 0.4726, "step": 30574 }, { "epoch": 0.6484486013021993, "grad_norm": 0.3483034670352936, "learning_rate": 1.5265205588687608e-05, "loss": 0.5176, "step": 30575 }, { "epoch": 0.6484698097601324, "grad_norm": 0.4010419547557831, "learning_rate": 1.5264922060575477e-05, "loss": 0.4564, "step": 30576 }, { "epoch": 0.6484910182180653, "grad_norm": 0.34592771530151367, "learning_rate": 1.5264638526607746e-05, "loss": 0.4574, "step": 30577 }, { "epoch": 0.6485122266759984, "grad_norm": 0.3704516887664795, "learning_rate": 1.526435498678473e-05, "loss": 0.482, "step": 30578 }, { "epoch": 0.6485334351339314, "grad_norm": 0.35604405403137207, "learning_rate": 1.5264071441106745e-05, "loss": 0.4764, "step": 30579 }, { "epoch": 0.6485546435918644, "grad_norm": 0.3680553436279297, "learning_rate": 1.5263787889574105e-05, "loss": 0.5279, "step": 30580 }, { "epoch": 0.6485758520497975, "grad_norm": 0.3507956266403198, "learning_rate": 1.5263504332187126e-05, "loss": 0.5195, "step": 30581 }, { "epoch": 0.6485970605077305, "grad_norm": 0.3102627992630005, "learning_rate": 1.526322076894612e-05, "loss": 0.4039, "step": 30582 }, { "epoch": 0.6486182689656635, "grad_norm": 0.3218100965023041, "learning_rate": 1.526293719985141e-05, "loss": 0.4559, "step": 30583 }, { "epoch": 0.6486394774235965, "grad_norm": 0.35487496852874756, "learning_rate": 1.5262653624903307e-05, "loss": 0.4793, "step": 30584 }, { "epoch": 0.6486606858815296, "grad_norm": 0.37294307351112366, "learning_rate": 1.5262370044102125e-05, "loss": 0.5157, "step": 30585 }, { "epoch": 0.6486818943394626, "grad_norm": 0.3466302454471588, "learning_rate": 1.5262086457448182e-05, "loss": 0.4654, "step": 30586 }, { "epoch": 0.6487031027973956, "grad_norm": 0.34739983081817627, "learning_rate": 1.5261802864941788e-05, "loss": 0.4614, "step": 30587 }, { "epoch": 0.6487243112553286, "grad_norm": 0.3411410450935364, "learning_rate": 1.5261519266583266e-05, "loss": 0.5139, "step": 30588 }, { "epoch": 0.6487455197132617, "grad_norm": 0.326895534992218, "learning_rate": 1.5261235662372925e-05, "loss": 0.5201, "step": 30589 }, { "epoch": 0.6487667281711946, "grad_norm": 0.378018319606781, "learning_rate": 1.5260952052311085e-05, "loss": 0.3377, "step": 30590 }, { "epoch": 0.6487879366291277, "grad_norm": 0.3762407600879669, "learning_rate": 1.5260668436398063e-05, "loss": 0.4472, "step": 30591 }, { "epoch": 0.6488091450870607, "grad_norm": 0.36618033051490784, "learning_rate": 1.526038481463417e-05, "loss": 0.4939, "step": 30592 }, { "epoch": 0.6488303535449937, "grad_norm": 0.3427754342556, "learning_rate": 1.5260101187019717e-05, "loss": 0.4531, "step": 30593 }, { "epoch": 0.6488515620029268, "grad_norm": 0.3266606032848358, "learning_rate": 1.5259817553555034e-05, "loss": 0.525, "step": 30594 }, { "epoch": 0.6488727704608598, "grad_norm": 0.34210920333862305, "learning_rate": 1.525953391424042e-05, "loss": 0.4305, "step": 30595 }, { "epoch": 0.6488939789187929, "grad_norm": 0.3617594540119171, "learning_rate": 1.5259250269076202e-05, "loss": 0.5513, "step": 30596 }, { "epoch": 0.6489151873767258, "grad_norm": 0.3174150288105011, "learning_rate": 1.525896661806269e-05, "loss": 0.4876, "step": 30597 }, { "epoch": 0.6489363958346589, "grad_norm": 0.3807935118675232, "learning_rate": 1.5258682961200203e-05, "loss": 0.4884, "step": 30598 }, { "epoch": 0.6489576042925919, "grad_norm": 0.39162030816078186, "learning_rate": 1.5258399298489052e-05, "loss": 0.3619, "step": 30599 }, { "epoch": 0.6489788127505249, "grad_norm": 0.35123223066329956, "learning_rate": 1.5258115629929558e-05, "loss": 0.5225, "step": 30600 }, { "epoch": 0.6490000212084579, "grad_norm": 0.36620694398880005, "learning_rate": 1.5257831955522031e-05, "loss": 0.4954, "step": 30601 }, { "epoch": 0.649021229666391, "grad_norm": 0.3458101153373718, "learning_rate": 1.5257548275266789e-05, "loss": 0.4199, "step": 30602 }, { "epoch": 0.6490424381243239, "grad_norm": 0.3629996180534363, "learning_rate": 1.525726458916415e-05, "loss": 0.4939, "step": 30603 }, { "epoch": 0.649063646582257, "grad_norm": 0.3498082160949707, "learning_rate": 1.5256980897214422e-05, "loss": 0.4969, "step": 30604 }, { "epoch": 0.64908485504019, "grad_norm": 0.4074161648750305, "learning_rate": 1.5256697199417932e-05, "loss": 0.473, "step": 30605 }, { "epoch": 0.649106063498123, "grad_norm": 0.3446296453475952, "learning_rate": 1.5256413495774985e-05, "loss": 0.5227, "step": 30606 }, { "epoch": 0.6491272719560561, "grad_norm": 0.37743228673934937, "learning_rate": 1.5256129786285898e-05, "loss": 0.4383, "step": 30607 }, { "epoch": 0.6491484804139891, "grad_norm": 0.3223477602005005, "learning_rate": 1.5255846070950995e-05, "loss": 0.4651, "step": 30608 }, { "epoch": 0.6491696888719222, "grad_norm": 0.33501845598220825, "learning_rate": 1.5255562349770583e-05, "loss": 0.4082, "step": 30609 }, { "epoch": 0.6491908973298551, "grad_norm": 0.3693121373653412, "learning_rate": 1.525527862274498e-05, "loss": 0.4707, "step": 30610 }, { "epoch": 0.6492121057877882, "grad_norm": 0.48205652832984924, "learning_rate": 1.5254994889874502e-05, "loss": 0.4629, "step": 30611 }, { "epoch": 0.6492333142457212, "grad_norm": 0.3710079789161682, "learning_rate": 1.5254711151159465e-05, "loss": 0.5154, "step": 30612 }, { "epoch": 0.6492545227036542, "grad_norm": 0.3371101915836334, "learning_rate": 1.5254427406600184e-05, "loss": 0.4978, "step": 30613 }, { "epoch": 0.6492757311615872, "grad_norm": 0.36172738671302795, "learning_rate": 1.5254143656196971e-05, "loss": 0.4943, "step": 30614 }, { "epoch": 0.6492969396195203, "grad_norm": 0.4048338830471039, "learning_rate": 1.5253859899950149e-05, "loss": 0.4321, "step": 30615 }, { "epoch": 0.6493181480774532, "grad_norm": 0.3462294340133667, "learning_rate": 1.5253576137860027e-05, "loss": 0.4837, "step": 30616 }, { "epoch": 0.6493393565353863, "grad_norm": 0.34119367599487305, "learning_rate": 1.5253292369926927e-05, "loss": 0.4592, "step": 30617 }, { "epoch": 0.6493605649933193, "grad_norm": 0.36985740065574646, "learning_rate": 1.5253008596151158e-05, "loss": 0.5104, "step": 30618 }, { "epoch": 0.6493817734512524, "grad_norm": 0.3303241729736328, "learning_rate": 1.5252724816533037e-05, "loss": 0.4628, "step": 30619 }, { "epoch": 0.6494029819091853, "grad_norm": 0.35477977991104126, "learning_rate": 1.5252441031072884e-05, "loss": 0.4508, "step": 30620 }, { "epoch": 0.6494241903671184, "grad_norm": 0.3554994761943817, "learning_rate": 1.5252157239771009e-05, "loss": 0.5089, "step": 30621 }, { "epoch": 0.6494453988250515, "grad_norm": 0.3470236361026764, "learning_rate": 1.5251873442627734e-05, "loss": 0.4687, "step": 30622 }, { "epoch": 0.6494666072829844, "grad_norm": 0.3948678970336914, "learning_rate": 1.5251589639643369e-05, "loss": 0.5706, "step": 30623 }, { "epoch": 0.6494878157409175, "grad_norm": 0.36024078726768494, "learning_rate": 1.5251305830818232e-05, "loss": 0.4905, "step": 30624 }, { "epoch": 0.6495090241988505, "grad_norm": 0.3501138687133789, "learning_rate": 1.5251022016152639e-05, "loss": 0.4653, "step": 30625 }, { "epoch": 0.6495302326567836, "grad_norm": 0.41010746359825134, "learning_rate": 1.5250738195646903e-05, "loss": 0.4121, "step": 30626 }, { "epoch": 0.6495514411147165, "grad_norm": 0.3384447693824768, "learning_rate": 1.5250454369301342e-05, "loss": 0.5239, "step": 30627 }, { "epoch": 0.6495726495726496, "grad_norm": 0.38352397084236145, "learning_rate": 1.5250170537116273e-05, "loss": 0.5766, "step": 30628 }, { "epoch": 0.6495938580305826, "grad_norm": 0.34821170568466187, "learning_rate": 1.5249886699092009e-05, "loss": 0.4603, "step": 30629 }, { "epoch": 0.6496150664885156, "grad_norm": 0.3370799124240875, "learning_rate": 1.5249602855228864e-05, "loss": 0.4195, "step": 30630 }, { "epoch": 0.6496362749464486, "grad_norm": 0.3573971390724182, "learning_rate": 1.5249319005527161e-05, "loss": 0.5156, "step": 30631 }, { "epoch": 0.6496574834043817, "grad_norm": 0.3749576508998871, "learning_rate": 1.5249035149987209e-05, "loss": 0.5815, "step": 30632 }, { "epoch": 0.6496786918623146, "grad_norm": 0.37949779629707336, "learning_rate": 1.5248751288609324e-05, "loss": 0.4979, "step": 30633 }, { "epoch": 0.6496999003202477, "grad_norm": 0.35314539074897766, "learning_rate": 1.5248467421393827e-05, "loss": 0.5467, "step": 30634 }, { "epoch": 0.6497211087781808, "grad_norm": 0.5208718776702881, "learning_rate": 1.5248183548341025e-05, "loss": 0.4704, "step": 30635 }, { "epoch": 0.6497423172361138, "grad_norm": 0.3559347987174988, "learning_rate": 1.5247899669451242e-05, "loss": 0.4644, "step": 30636 }, { "epoch": 0.6497635256940468, "grad_norm": 0.4448666274547577, "learning_rate": 1.5247615784724793e-05, "loss": 0.4892, "step": 30637 }, { "epoch": 0.6497847341519798, "grad_norm": 0.3336679935455322, "learning_rate": 1.5247331894161988e-05, "loss": 0.5193, "step": 30638 }, { "epoch": 0.6498059426099129, "grad_norm": 0.34473326802253723, "learning_rate": 1.5247047997763146e-05, "loss": 0.4983, "step": 30639 }, { "epoch": 0.6498271510678458, "grad_norm": 0.3458389341831207, "learning_rate": 1.5246764095528583e-05, "loss": 0.5221, "step": 30640 }, { "epoch": 0.6498483595257789, "grad_norm": 0.3488640785217285, "learning_rate": 1.5246480187458614e-05, "loss": 0.4247, "step": 30641 }, { "epoch": 0.6498695679837119, "grad_norm": 0.426457017660141, "learning_rate": 1.5246196273553557e-05, "loss": 0.5244, "step": 30642 }, { "epoch": 0.649890776441645, "grad_norm": 0.32391342520713806, "learning_rate": 1.5245912353813728e-05, "loss": 0.4523, "step": 30643 }, { "epoch": 0.6499119848995779, "grad_norm": 0.3661835789680481, "learning_rate": 1.5245628428239436e-05, "loss": 0.5256, "step": 30644 }, { "epoch": 0.649933193357511, "grad_norm": 0.3486015200614929, "learning_rate": 1.5245344496831005e-05, "loss": 0.5155, "step": 30645 }, { "epoch": 0.649954401815444, "grad_norm": 0.3761134147644043, "learning_rate": 1.5245060559588747e-05, "loss": 0.4809, "step": 30646 }, { "epoch": 0.649975610273377, "grad_norm": 0.3744071125984192, "learning_rate": 1.5244776616512976e-05, "loss": 0.5196, "step": 30647 }, { "epoch": 0.6499968187313101, "grad_norm": 0.32847702503204346, "learning_rate": 1.5244492667604013e-05, "loss": 0.4671, "step": 30648 }, { "epoch": 0.6500180271892431, "grad_norm": 0.3514683246612549, "learning_rate": 1.5244208712862167e-05, "loss": 0.5473, "step": 30649 }, { "epoch": 0.6500392356471761, "grad_norm": 0.365451455116272, "learning_rate": 1.5243924752287762e-05, "loss": 0.453, "step": 30650 }, { "epoch": 0.6500604441051091, "grad_norm": 0.34902095794677734, "learning_rate": 1.5243640785881106e-05, "loss": 0.4522, "step": 30651 }, { "epoch": 0.6500816525630422, "grad_norm": 0.3595733940601349, "learning_rate": 1.524335681364252e-05, "loss": 0.5377, "step": 30652 }, { "epoch": 0.6501028610209751, "grad_norm": 0.31355297565460205, "learning_rate": 1.5243072835572319e-05, "loss": 0.5, "step": 30653 }, { "epoch": 0.6501240694789082, "grad_norm": 0.3163287937641144, "learning_rate": 1.5242788851670818e-05, "loss": 0.4602, "step": 30654 }, { "epoch": 0.6501452779368412, "grad_norm": 0.34762123227119446, "learning_rate": 1.5242504861938328e-05, "loss": 0.501, "step": 30655 }, { "epoch": 0.6501664863947743, "grad_norm": 0.5593845248222351, "learning_rate": 1.5242220866375178e-05, "loss": 0.5413, "step": 30656 }, { "epoch": 0.6501876948527072, "grad_norm": 0.36053016781806946, "learning_rate": 1.5241936864981668e-05, "loss": 0.3924, "step": 30657 }, { "epoch": 0.6502089033106403, "grad_norm": 0.35265302658081055, "learning_rate": 1.5241652857758125e-05, "loss": 0.4474, "step": 30658 }, { "epoch": 0.6502301117685733, "grad_norm": 0.4224221110343933, "learning_rate": 1.5241368844704859e-05, "loss": 0.5333, "step": 30659 }, { "epoch": 0.6502513202265063, "grad_norm": 0.439664751291275, "learning_rate": 1.524108482582219e-05, "loss": 0.5277, "step": 30660 }, { "epoch": 0.6502725286844393, "grad_norm": 0.38247302174568176, "learning_rate": 1.5240800801110429e-05, "loss": 0.5008, "step": 30661 }, { "epoch": 0.6502937371423724, "grad_norm": 0.3862874209880829, "learning_rate": 1.52405167705699e-05, "loss": 0.5538, "step": 30662 }, { "epoch": 0.6503149456003054, "grad_norm": 0.35461410880088806, "learning_rate": 1.5240232734200907e-05, "loss": 0.5119, "step": 30663 }, { "epoch": 0.6503361540582384, "grad_norm": 0.40085864067077637, "learning_rate": 1.523994869200378e-05, "loss": 0.4508, "step": 30664 }, { "epoch": 0.6503573625161715, "grad_norm": 0.3575748801231384, "learning_rate": 1.5239664643978822e-05, "loss": 0.4623, "step": 30665 }, { "epoch": 0.6503785709741045, "grad_norm": 0.35777053236961365, "learning_rate": 1.523938059012636e-05, "loss": 0.4881, "step": 30666 }, { "epoch": 0.6503997794320375, "grad_norm": 0.3278290033340454, "learning_rate": 1.52390965304467e-05, "loss": 0.5174, "step": 30667 }, { "epoch": 0.6504209878899705, "grad_norm": 0.37353000044822693, "learning_rate": 1.5238812464940163e-05, "loss": 0.5007, "step": 30668 }, { "epoch": 0.6504421963479036, "grad_norm": 0.3165193498134613, "learning_rate": 1.5238528393607066e-05, "loss": 0.4291, "step": 30669 }, { "epoch": 0.6504634048058365, "grad_norm": 0.35073453187942505, "learning_rate": 1.5238244316447724e-05, "loss": 0.4564, "step": 30670 }, { "epoch": 0.6504846132637696, "grad_norm": 0.36167946457862854, "learning_rate": 1.523796023346245e-05, "loss": 0.46, "step": 30671 }, { "epoch": 0.6505058217217026, "grad_norm": 0.34259793162345886, "learning_rate": 1.523767614465156e-05, "loss": 0.497, "step": 30672 }, { "epoch": 0.6505270301796356, "grad_norm": 0.3914050757884979, "learning_rate": 1.5237392050015373e-05, "loss": 0.513, "step": 30673 }, { "epoch": 0.6505482386375686, "grad_norm": 0.3952610194683075, "learning_rate": 1.5237107949554206e-05, "loss": 0.6052, "step": 30674 }, { "epoch": 0.6505694470955017, "grad_norm": 0.3719346523284912, "learning_rate": 1.5236823843268373e-05, "loss": 0.5615, "step": 30675 }, { "epoch": 0.6505906555534348, "grad_norm": 0.3628709018230438, "learning_rate": 1.523653973115819e-05, "loss": 0.5318, "step": 30676 }, { "epoch": 0.6506118640113677, "grad_norm": 0.3338926434516907, "learning_rate": 1.5236255613223972e-05, "loss": 0.5016, "step": 30677 }, { "epoch": 0.6506330724693008, "grad_norm": 0.38398537039756775, "learning_rate": 1.5235971489466037e-05, "loss": 0.5322, "step": 30678 }, { "epoch": 0.6506542809272338, "grad_norm": 0.4527321755886078, "learning_rate": 1.5235687359884699e-05, "loss": 0.5406, "step": 30679 }, { "epoch": 0.6506754893851668, "grad_norm": 0.34821221232414246, "learning_rate": 1.5235403224480277e-05, "loss": 0.4653, "step": 30680 }, { "epoch": 0.6506966978430998, "grad_norm": 0.32434308528900146, "learning_rate": 1.5235119083253082e-05, "loss": 0.5159, "step": 30681 }, { "epoch": 0.6507179063010329, "grad_norm": 0.41300880908966064, "learning_rate": 1.5234834936203436e-05, "loss": 0.4084, "step": 30682 }, { "epoch": 0.6507391147589658, "grad_norm": 0.34878671169281006, "learning_rate": 1.523455078333165e-05, "loss": 0.5331, "step": 30683 }, { "epoch": 0.6507603232168989, "grad_norm": 0.8027560710906982, "learning_rate": 1.5234266624638041e-05, "loss": 0.4873, "step": 30684 }, { "epoch": 0.6507815316748319, "grad_norm": 0.34056758880615234, "learning_rate": 1.5233982460122931e-05, "loss": 0.4938, "step": 30685 }, { "epoch": 0.650802740132765, "grad_norm": 0.3163597285747528, "learning_rate": 1.5233698289786625e-05, "loss": 0.4389, "step": 30686 }, { "epoch": 0.6508239485906979, "grad_norm": 0.3403521478176117, "learning_rate": 1.523341411362945e-05, "loss": 0.4891, "step": 30687 }, { "epoch": 0.650845157048631, "grad_norm": 0.37057435512542725, "learning_rate": 1.5233129931651717e-05, "loss": 0.4297, "step": 30688 }, { "epoch": 0.6508663655065641, "grad_norm": 0.3439003527164459, "learning_rate": 1.5232845743853739e-05, "loss": 0.4683, "step": 30689 }, { "epoch": 0.650887573964497, "grad_norm": 0.35436540842056274, "learning_rate": 1.5232561550235839e-05, "loss": 0.4451, "step": 30690 }, { "epoch": 0.6509087824224301, "grad_norm": 0.38097232580184937, "learning_rate": 1.5232277350798326e-05, "loss": 0.4666, "step": 30691 }, { "epoch": 0.6509299908803631, "grad_norm": 0.33855462074279785, "learning_rate": 1.5231993145541521e-05, "loss": 0.4512, "step": 30692 }, { "epoch": 0.6509511993382961, "grad_norm": 0.3671189844608307, "learning_rate": 1.5231708934465742e-05, "loss": 0.5701, "step": 30693 }, { "epoch": 0.6509724077962291, "grad_norm": 0.34445756673812866, "learning_rate": 1.5231424717571298e-05, "loss": 0.4605, "step": 30694 }, { "epoch": 0.6509936162541622, "grad_norm": 0.3908562660217285, "learning_rate": 1.523114049485851e-05, "loss": 0.4994, "step": 30695 }, { "epoch": 0.6510148247120952, "grad_norm": 0.3504197895526886, "learning_rate": 1.5230856266327694e-05, "loss": 0.4595, "step": 30696 }, { "epoch": 0.6510360331700282, "grad_norm": 0.4215811789035797, "learning_rate": 1.5230572031979161e-05, "loss": 0.5425, "step": 30697 }, { "epoch": 0.6510572416279612, "grad_norm": 0.34274163842201233, "learning_rate": 1.5230287791813235e-05, "loss": 0.4678, "step": 30698 }, { "epoch": 0.6510784500858943, "grad_norm": 0.3663794994354248, "learning_rate": 1.5230003545830228e-05, "loss": 0.5153, "step": 30699 }, { "epoch": 0.6510996585438272, "grad_norm": 0.3319456875324249, "learning_rate": 1.5229719294030453e-05, "loss": 0.5153, "step": 30700 }, { "epoch": 0.6511208670017603, "grad_norm": 0.4119096100330353, "learning_rate": 1.5229435036414235e-05, "loss": 0.5034, "step": 30701 }, { "epoch": 0.6511420754596933, "grad_norm": 0.3587196469306946, "learning_rate": 1.5229150772981883e-05, "loss": 0.5014, "step": 30702 }, { "epoch": 0.6511632839176263, "grad_norm": 0.3753317594528198, "learning_rate": 1.5228866503733715e-05, "loss": 0.4753, "step": 30703 }, { "epoch": 0.6511844923755594, "grad_norm": 0.37099185585975647, "learning_rate": 1.5228582228670043e-05, "loss": 0.4638, "step": 30704 }, { "epoch": 0.6512057008334924, "grad_norm": 0.34745320677757263, "learning_rate": 1.5228297947791194e-05, "loss": 0.5237, "step": 30705 }, { "epoch": 0.6512269092914255, "grad_norm": 0.36793026328086853, "learning_rate": 1.5228013661097472e-05, "loss": 0.5547, "step": 30706 }, { "epoch": 0.6512481177493584, "grad_norm": 0.35500451922416687, "learning_rate": 1.5227729368589202e-05, "loss": 0.445, "step": 30707 }, { "epoch": 0.6512693262072915, "grad_norm": 0.35282671451568604, "learning_rate": 1.5227445070266695e-05, "loss": 0.5354, "step": 30708 }, { "epoch": 0.6512905346652245, "grad_norm": 0.3699248731136322, "learning_rate": 1.5227160766130268e-05, "loss": 0.4955, "step": 30709 }, { "epoch": 0.6513117431231575, "grad_norm": 0.4009005129337311, "learning_rate": 1.522687645618024e-05, "loss": 0.4817, "step": 30710 }, { "epoch": 0.6513329515810905, "grad_norm": 0.3594834506511688, "learning_rate": 1.5226592140416925e-05, "loss": 0.4679, "step": 30711 }, { "epoch": 0.6513541600390236, "grad_norm": 0.3568708300590515, "learning_rate": 1.5226307818840639e-05, "loss": 0.5213, "step": 30712 }, { "epoch": 0.6513753684969565, "grad_norm": 0.36839696764945984, "learning_rate": 1.5226023491451702e-05, "loss": 0.4259, "step": 30713 }, { "epoch": 0.6513965769548896, "grad_norm": 0.34526437520980835, "learning_rate": 1.5225739158250422e-05, "loss": 0.4947, "step": 30714 }, { "epoch": 0.6514177854128226, "grad_norm": 1.5012956857681274, "learning_rate": 1.5225454819237127e-05, "loss": 0.5373, "step": 30715 }, { "epoch": 0.6514389938707557, "grad_norm": 0.3777015209197998, "learning_rate": 1.5225170474412121e-05, "loss": 0.5234, "step": 30716 }, { "epoch": 0.6514602023286887, "grad_norm": 0.34095054864883423, "learning_rate": 1.5224886123775724e-05, "loss": 0.5037, "step": 30717 }, { "epoch": 0.6514814107866217, "grad_norm": 0.37136146426200867, "learning_rate": 1.522460176732826e-05, "loss": 0.5012, "step": 30718 }, { "epoch": 0.6515026192445548, "grad_norm": 0.3424988090991974, "learning_rate": 1.5224317405070037e-05, "loss": 0.4549, "step": 30719 }, { "epoch": 0.6515238277024877, "grad_norm": 0.4070919454097748, "learning_rate": 1.5224033037001373e-05, "loss": 0.4729, "step": 30720 }, { "epoch": 0.6515450361604208, "grad_norm": 0.365063339471817, "learning_rate": 1.5223748663122586e-05, "loss": 0.5299, "step": 30721 }, { "epoch": 0.6515662446183538, "grad_norm": 0.4877137839794159, "learning_rate": 1.5223464283433991e-05, "loss": 0.4718, "step": 30722 }, { "epoch": 0.6515874530762868, "grad_norm": 0.5965343713760376, "learning_rate": 1.52231798979359e-05, "loss": 0.4963, "step": 30723 }, { "epoch": 0.6516086615342198, "grad_norm": 0.35311275720596313, "learning_rate": 1.5222895506628638e-05, "loss": 0.5569, "step": 30724 }, { "epoch": 0.6516298699921529, "grad_norm": 0.3748280107975006, "learning_rate": 1.5222611109512518e-05, "loss": 0.4968, "step": 30725 }, { "epoch": 0.6516510784500859, "grad_norm": 0.3806057870388031, "learning_rate": 1.5222326706587854e-05, "loss": 0.5051, "step": 30726 }, { "epoch": 0.6516722869080189, "grad_norm": 0.36758318543434143, "learning_rate": 1.5222042297854963e-05, "loss": 0.4897, "step": 30727 }, { "epoch": 0.6516934953659519, "grad_norm": 0.34099382162094116, "learning_rate": 1.5221757883314161e-05, "loss": 0.4835, "step": 30728 }, { "epoch": 0.651714703823885, "grad_norm": 0.349681556224823, "learning_rate": 1.5221473462965764e-05, "loss": 0.4469, "step": 30729 }, { "epoch": 0.651735912281818, "grad_norm": 0.33369049429893494, "learning_rate": 1.5221189036810093e-05, "loss": 0.4856, "step": 30730 }, { "epoch": 0.651757120739751, "grad_norm": 0.35233306884765625, "learning_rate": 1.522090460484746e-05, "loss": 0.5165, "step": 30731 }, { "epoch": 0.6517783291976841, "grad_norm": 0.33706963062286377, "learning_rate": 1.5220620167078183e-05, "loss": 0.4813, "step": 30732 }, { "epoch": 0.651799537655617, "grad_norm": 0.3912077248096466, "learning_rate": 1.5220335723502577e-05, "loss": 0.5277, "step": 30733 }, { "epoch": 0.6518207461135501, "grad_norm": 0.3344786763191223, "learning_rate": 1.5220051274120957e-05, "loss": 0.4875, "step": 30734 }, { "epoch": 0.6518419545714831, "grad_norm": 0.3565249443054199, "learning_rate": 1.5219766818933646e-05, "loss": 0.5528, "step": 30735 }, { "epoch": 0.6518631630294162, "grad_norm": 0.41859543323516846, "learning_rate": 1.5219482357940951e-05, "loss": 0.556, "step": 30736 }, { "epoch": 0.6518843714873491, "grad_norm": 0.47446516156196594, "learning_rate": 1.5219197891143194e-05, "loss": 0.544, "step": 30737 }, { "epoch": 0.6519055799452822, "grad_norm": 1.9713553190231323, "learning_rate": 1.5218913418540694e-05, "loss": 0.5324, "step": 30738 }, { "epoch": 0.6519267884032152, "grad_norm": 0.4522373676300049, "learning_rate": 1.5218628940133759e-05, "loss": 0.4668, "step": 30739 }, { "epoch": 0.6519479968611482, "grad_norm": 0.3528869152069092, "learning_rate": 1.521834445592271e-05, "loss": 0.5581, "step": 30740 }, { "epoch": 0.6519692053190812, "grad_norm": 0.3825690448284149, "learning_rate": 1.521805996590787e-05, "loss": 0.5518, "step": 30741 }, { "epoch": 0.6519904137770143, "grad_norm": 0.37715381383895874, "learning_rate": 1.5217775470089542e-05, "loss": 0.4454, "step": 30742 }, { "epoch": 0.6520116222349474, "grad_norm": 0.3520372807979584, "learning_rate": 1.5217490968468052e-05, "loss": 0.4234, "step": 30743 }, { "epoch": 0.6520328306928803, "grad_norm": 0.3630472719669342, "learning_rate": 1.5217206461043716e-05, "loss": 0.4384, "step": 30744 }, { "epoch": 0.6520540391508134, "grad_norm": 0.41338852047920227, "learning_rate": 1.5216921947816845e-05, "loss": 0.6106, "step": 30745 }, { "epoch": 0.6520752476087464, "grad_norm": 0.3746434450149536, "learning_rate": 1.5216637428787762e-05, "loss": 0.525, "step": 30746 }, { "epoch": 0.6520964560666794, "grad_norm": 0.37040746212005615, "learning_rate": 1.5216352903956778e-05, "loss": 0.4438, "step": 30747 }, { "epoch": 0.6521176645246124, "grad_norm": 0.3733346462249756, "learning_rate": 1.521606837332421e-05, "loss": 0.5366, "step": 30748 }, { "epoch": 0.6521388729825455, "grad_norm": 0.3718603849411011, "learning_rate": 1.521578383689038e-05, "loss": 0.5171, "step": 30749 }, { "epoch": 0.6521600814404784, "grad_norm": 0.34724292159080505, "learning_rate": 1.5215499294655597e-05, "loss": 0.5766, "step": 30750 }, { "epoch": 0.6521812898984115, "grad_norm": 0.3574382960796356, "learning_rate": 1.5215214746620181e-05, "loss": 0.5724, "step": 30751 }, { "epoch": 0.6522024983563445, "grad_norm": 0.32311099767684937, "learning_rate": 1.5214930192784453e-05, "loss": 0.4309, "step": 30752 }, { "epoch": 0.6522237068142775, "grad_norm": 0.36543285846710205, "learning_rate": 1.5214645633148722e-05, "loss": 0.573, "step": 30753 }, { "epoch": 0.6522449152722105, "grad_norm": 0.3434351682662964, "learning_rate": 1.5214361067713306e-05, "loss": 0.4876, "step": 30754 }, { "epoch": 0.6522661237301436, "grad_norm": 0.3656995892524719, "learning_rate": 1.5214076496478525e-05, "loss": 0.5693, "step": 30755 }, { "epoch": 0.6522873321880766, "grad_norm": 0.35623082518577576, "learning_rate": 1.5213791919444693e-05, "loss": 0.5223, "step": 30756 }, { "epoch": 0.6523085406460096, "grad_norm": 0.3512044847011566, "learning_rate": 1.5213507336612125e-05, "loss": 0.5082, "step": 30757 }, { "epoch": 0.6523297491039427, "grad_norm": 0.35303419828414917, "learning_rate": 1.5213222747981142e-05, "loss": 0.4138, "step": 30758 }, { "epoch": 0.6523509575618757, "grad_norm": 0.3782438337802887, "learning_rate": 1.5212938153552057e-05, "loss": 0.53, "step": 30759 }, { "epoch": 0.6523721660198087, "grad_norm": 0.3702796399593353, "learning_rate": 1.5212653553325187e-05, "loss": 0.5763, "step": 30760 }, { "epoch": 0.6523933744777417, "grad_norm": 0.36177879571914673, "learning_rate": 1.5212368947300848e-05, "loss": 0.5229, "step": 30761 }, { "epoch": 0.6524145829356748, "grad_norm": 0.31688591837882996, "learning_rate": 1.521208433547936e-05, "loss": 0.4767, "step": 30762 }, { "epoch": 0.6524357913936077, "grad_norm": 0.3530714511871338, "learning_rate": 1.5211799717861037e-05, "loss": 0.5289, "step": 30763 }, { "epoch": 0.6524569998515408, "grad_norm": 0.5920414924621582, "learning_rate": 1.5211515094446196e-05, "loss": 0.4911, "step": 30764 }, { "epoch": 0.6524782083094738, "grad_norm": 0.3714970052242279, "learning_rate": 1.5211230465235152e-05, "loss": 0.5079, "step": 30765 }, { "epoch": 0.6524994167674069, "grad_norm": 0.4256734549999237, "learning_rate": 1.5210945830228224e-05, "loss": 0.5202, "step": 30766 }, { "epoch": 0.6525206252253398, "grad_norm": 0.44972875714302063, "learning_rate": 1.5210661189425727e-05, "loss": 0.4272, "step": 30767 }, { "epoch": 0.6525418336832729, "grad_norm": 0.379222571849823, "learning_rate": 1.5210376542827971e-05, "loss": 0.4799, "step": 30768 }, { "epoch": 0.6525630421412059, "grad_norm": 0.32346421480178833, "learning_rate": 1.5210091890435287e-05, "loss": 0.4816, "step": 30769 }, { "epoch": 0.6525842505991389, "grad_norm": 0.38183891773223877, "learning_rate": 1.5209807232247985e-05, "loss": 0.4565, "step": 30770 }, { "epoch": 0.652605459057072, "grad_norm": 0.31987884640693665, "learning_rate": 1.5209522568266377e-05, "loss": 0.4401, "step": 30771 }, { "epoch": 0.652626667515005, "grad_norm": 0.38528597354888916, "learning_rate": 1.5209237898490784e-05, "loss": 0.5161, "step": 30772 }, { "epoch": 0.652647875972938, "grad_norm": 0.35840362310409546, "learning_rate": 1.520895322292152e-05, "loss": 0.4908, "step": 30773 }, { "epoch": 0.652669084430871, "grad_norm": 0.32467108964920044, "learning_rate": 1.5208668541558907e-05, "loss": 0.3902, "step": 30774 }, { "epoch": 0.6526902928888041, "grad_norm": 0.324444979429245, "learning_rate": 1.5208383854403257e-05, "loss": 0.4512, "step": 30775 }, { "epoch": 0.652711501346737, "grad_norm": 0.35900670289993286, "learning_rate": 1.5208099161454888e-05, "loss": 0.4652, "step": 30776 }, { "epoch": 0.6527327098046701, "grad_norm": 0.3522063195705414, "learning_rate": 1.5207814462714114e-05, "loss": 0.5502, "step": 30777 }, { "epoch": 0.6527539182626031, "grad_norm": 0.36979424953460693, "learning_rate": 1.5207529758181259e-05, "loss": 0.4966, "step": 30778 }, { "epoch": 0.6527751267205362, "grad_norm": 0.3326647877693176, "learning_rate": 1.5207245047856628e-05, "loss": 0.5484, "step": 30779 }, { "epoch": 0.6527963351784691, "grad_norm": 0.3823969066143036, "learning_rate": 1.520696033174055e-05, "loss": 0.5165, "step": 30780 }, { "epoch": 0.6528175436364022, "grad_norm": 0.3676307797431946, "learning_rate": 1.5206675609833332e-05, "loss": 0.446, "step": 30781 }, { "epoch": 0.6528387520943352, "grad_norm": 0.38256558775901794, "learning_rate": 1.5206390882135295e-05, "loss": 0.5451, "step": 30782 }, { "epoch": 0.6528599605522682, "grad_norm": 0.3391183018684387, "learning_rate": 1.5206106148646757e-05, "loss": 0.4554, "step": 30783 }, { "epoch": 0.6528811690102013, "grad_norm": 0.3584003150463104, "learning_rate": 1.5205821409368035e-05, "loss": 0.4474, "step": 30784 }, { "epoch": 0.6529023774681343, "grad_norm": 0.629336416721344, "learning_rate": 1.520553666429944e-05, "loss": 0.5228, "step": 30785 }, { "epoch": 0.6529235859260674, "grad_norm": 0.35123908519744873, "learning_rate": 1.5205251913441297e-05, "loss": 0.4733, "step": 30786 }, { "epoch": 0.6529447943840003, "grad_norm": 0.3352549374103546, "learning_rate": 1.520496715679391e-05, "loss": 0.4703, "step": 30787 }, { "epoch": 0.6529660028419334, "grad_norm": 0.3913559913635254, "learning_rate": 1.5204682394357608e-05, "loss": 0.4835, "step": 30788 }, { "epoch": 0.6529872112998664, "grad_norm": 0.33653631806373596, "learning_rate": 1.5204397626132706e-05, "loss": 0.4148, "step": 30789 }, { "epoch": 0.6530084197577994, "grad_norm": 0.4032600224018097, "learning_rate": 1.5204112852119517e-05, "loss": 0.4963, "step": 30790 }, { "epoch": 0.6530296282157324, "grad_norm": 0.4289267659187317, "learning_rate": 1.5203828072318358e-05, "loss": 0.4869, "step": 30791 }, { "epoch": 0.6530508366736655, "grad_norm": 0.3184252977371216, "learning_rate": 1.5203543286729545e-05, "loss": 0.4659, "step": 30792 }, { "epoch": 0.6530720451315984, "grad_norm": 0.3379184901714325, "learning_rate": 1.52032584953534e-05, "loss": 0.5037, "step": 30793 }, { "epoch": 0.6530932535895315, "grad_norm": 0.3115197718143463, "learning_rate": 1.5202973698190233e-05, "loss": 0.4255, "step": 30794 }, { "epoch": 0.6531144620474645, "grad_norm": 0.5834612846374512, "learning_rate": 1.5202688895240365e-05, "loss": 0.5439, "step": 30795 }, { "epoch": 0.6531356705053976, "grad_norm": 0.3908827602863312, "learning_rate": 1.520240408650411e-05, "loss": 0.5364, "step": 30796 }, { "epoch": 0.6531568789633305, "grad_norm": 0.35488754510879517, "learning_rate": 1.520211927198179e-05, "loss": 0.5072, "step": 30797 }, { "epoch": 0.6531780874212636, "grad_norm": 0.3486837148666382, "learning_rate": 1.5201834451673716e-05, "loss": 0.4769, "step": 30798 }, { "epoch": 0.6531992958791967, "grad_norm": 0.37938812375068665, "learning_rate": 1.5201549625580206e-05, "loss": 0.5724, "step": 30799 }, { "epoch": 0.6532205043371296, "grad_norm": 0.4020647704601288, "learning_rate": 1.5201264793701577e-05, "loss": 0.4398, "step": 30800 }, { "epoch": 0.6532417127950627, "grad_norm": 0.39713212847709656, "learning_rate": 1.5200979956038149e-05, "loss": 0.4757, "step": 30801 }, { "epoch": 0.6532629212529957, "grad_norm": 0.3601810336112976, "learning_rate": 1.5200695112590236e-05, "loss": 0.3874, "step": 30802 }, { "epoch": 0.6532841297109288, "grad_norm": 0.3888431787490845, "learning_rate": 1.5200410263358154e-05, "loss": 0.5417, "step": 30803 }, { "epoch": 0.6533053381688617, "grad_norm": 0.3528212904930115, "learning_rate": 1.5200125408342223e-05, "loss": 0.4996, "step": 30804 }, { "epoch": 0.6533265466267948, "grad_norm": 0.3425063490867615, "learning_rate": 1.5199840547542755e-05, "loss": 0.5061, "step": 30805 }, { "epoch": 0.6533477550847278, "grad_norm": 0.4095654785633087, "learning_rate": 1.5199555680960067e-05, "loss": 0.5331, "step": 30806 }, { "epoch": 0.6533689635426608, "grad_norm": 0.4023999571800232, "learning_rate": 1.5199270808594484e-05, "loss": 0.5565, "step": 30807 }, { "epoch": 0.6533901720005938, "grad_norm": 0.3828451931476593, "learning_rate": 1.5198985930446314e-05, "loss": 0.5832, "step": 30808 }, { "epoch": 0.6534113804585269, "grad_norm": 0.3779018819332123, "learning_rate": 1.519870104651588e-05, "loss": 0.5495, "step": 30809 }, { "epoch": 0.6534325889164598, "grad_norm": 0.4021572172641754, "learning_rate": 1.5198416156803494e-05, "loss": 0.522, "step": 30810 }, { "epoch": 0.6534537973743929, "grad_norm": 0.33817803859710693, "learning_rate": 1.5198131261309474e-05, "loss": 0.535, "step": 30811 }, { "epoch": 0.653475005832326, "grad_norm": 0.32392528653144836, "learning_rate": 1.5197846360034139e-05, "loss": 0.4879, "step": 30812 }, { "epoch": 0.653496214290259, "grad_norm": 0.3639078140258789, "learning_rate": 1.51975614529778e-05, "loss": 0.5459, "step": 30813 }, { "epoch": 0.653517422748192, "grad_norm": 0.364311158657074, "learning_rate": 1.5197276540140785e-05, "loss": 0.6206, "step": 30814 }, { "epoch": 0.653538631206125, "grad_norm": 0.3768276274204254, "learning_rate": 1.51969916215234e-05, "loss": 0.5342, "step": 30815 }, { "epoch": 0.6535598396640581, "grad_norm": 0.4232710897922516, "learning_rate": 1.5196706697125967e-05, "loss": 0.4919, "step": 30816 }, { "epoch": 0.653581048121991, "grad_norm": 0.3371218144893646, "learning_rate": 1.5196421766948804e-05, "loss": 0.4891, "step": 30817 }, { "epoch": 0.6536022565799241, "grad_norm": 0.35176151990890503, "learning_rate": 1.5196136830992224e-05, "loss": 0.4963, "step": 30818 }, { "epoch": 0.6536234650378571, "grad_norm": 0.3919209837913513, "learning_rate": 1.519585188925654e-05, "loss": 0.496, "step": 30819 }, { "epoch": 0.6536446734957901, "grad_norm": 0.3518226444721222, "learning_rate": 1.5195566941742084e-05, "loss": 0.4654, "step": 30820 }, { "epoch": 0.6536658819537231, "grad_norm": 0.35361629724502563, "learning_rate": 1.519528198844916e-05, "loss": 0.5845, "step": 30821 }, { "epoch": 0.6536870904116562, "grad_norm": 0.39472126960754395, "learning_rate": 1.5194997029378087e-05, "loss": 0.4486, "step": 30822 }, { "epoch": 0.6537082988695891, "grad_norm": 0.3803035020828247, "learning_rate": 1.5194712064529187e-05, "loss": 0.4698, "step": 30823 }, { "epoch": 0.6537295073275222, "grad_norm": 0.41389045119285583, "learning_rate": 1.519442709390277e-05, "loss": 0.4944, "step": 30824 }, { "epoch": 0.6537507157854553, "grad_norm": 0.3850642144680023, "learning_rate": 1.5194142117499157e-05, "loss": 0.4993, "step": 30825 }, { "epoch": 0.6537719242433883, "grad_norm": 0.3815425634384155, "learning_rate": 1.5193857135318666e-05, "loss": 0.496, "step": 30826 }, { "epoch": 0.6537931327013213, "grad_norm": 0.3238205909729004, "learning_rate": 1.5193572147361608e-05, "loss": 0.4863, "step": 30827 }, { "epoch": 0.6538143411592543, "grad_norm": 0.35569867491722107, "learning_rate": 1.519328715362831e-05, "loss": 0.4641, "step": 30828 }, { "epoch": 0.6538355496171874, "grad_norm": 0.38935911655426025, "learning_rate": 1.519300215411908e-05, "loss": 0.4911, "step": 30829 }, { "epoch": 0.6538567580751203, "grad_norm": 0.39711257815361023, "learning_rate": 1.5192717148834239e-05, "loss": 0.5356, "step": 30830 }, { "epoch": 0.6538779665330534, "grad_norm": 0.3602195382118225, "learning_rate": 1.5192432137774105e-05, "loss": 0.5381, "step": 30831 }, { "epoch": 0.6538991749909864, "grad_norm": 0.38244181871414185, "learning_rate": 1.519214712093899e-05, "loss": 0.4614, "step": 30832 }, { "epoch": 0.6539203834489195, "grad_norm": 0.3545876443386078, "learning_rate": 1.5191862098329213e-05, "loss": 0.4296, "step": 30833 }, { "epoch": 0.6539415919068524, "grad_norm": 0.3761061728000641, "learning_rate": 1.5191577069945093e-05, "loss": 0.5349, "step": 30834 }, { "epoch": 0.6539628003647855, "grad_norm": 0.3696966767311096, "learning_rate": 1.5191292035786948e-05, "loss": 0.4872, "step": 30835 }, { "epoch": 0.6539840088227185, "grad_norm": 0.3691329061985016, "learning_rate": 1.5191006995855092e-05, "loss": 0.4803, "step": 30836 }, { "epoch": 0.6540052172806515, "grad_norm": 0.4063434898853302, "learning_rate": 1.5190721950149843e-05, "loss": 0.5413, "step": 30837 }, { "epoch": 0.6540264257385845, "grad_norm": 0.3935689628124237, "learning_rate": 1.519043689867152e-05, "loss": 0.5655, "step": 30838 }, { "epoch": 0.6540476341965176, "grad_norm": 0.3549754023551941, "learning_rate": 1.5190151841420432e-05, "loss": 0.5541, "step": 30839 }, { "epoch": 0.6540688426544506, "grad_norm": 0.3514864444732666, "learning_rate": 1.5189866778396909e-05, "loss": 0.5243, "step": 30840 }, { "epoch": 0.6540900511123836, "grad_norm": 0.3379696309566498, "learning_rate": 1.5189581709601258e-05, "loss": 0.4673, "step": 30841 }, { "epoch": 0.6541112595703167, "grad_norm": 0.39967459440231323, "learning_rate": 1.5189296635033798e-05, "loss": 0.4973, "step": 30842 }, { "epoch": 0.6541324680282496, "grad_norm": 0.36847829818725586, "learning_rate": 1.5189011554694853e-05, "loss": 0.5472, "step": 30843 }, { "epoch": 0.6541536764861827, "grad_norm": 0.35148078203201294, "learning_rate": 1.5188726468584732e-05, "loss": 0.5036, "step": 30844 }, { "epoch": 0.6541748849441157, "grad_norm": 0.32114914059638977, "learning_rate": 1.5188441376703752e-05, "loss": 0.4503, "step": 30845 }, { "epoch": 0.6541960934020488, "grad_norm": 0.45437055826187134, "learning_rate": 1.5188156279052232e-05, "loss": 0.5478, "step": 30846 }, { "epoch": 0.6542173018599817, "grad_norm": 0.3387989103794098, "learning_rate": 1.5187871175630492e-05, "loss": 0.5008, "step": 30847 }, { "epoch": 0.6542385103179148, "grad_norm": 0.36907249689102173, "learning_rate": 1.5187586066438847e-05, "loss": 0.5127, "step": 30848 }, { "epoch": 0.6542597187758478, "grad_norm": 0.4731956124305725, "learning_rate": 1.5187300951477615e-05, "loss": 0.4442, "step": 30849 }, { "epoch": 0.6542809272337808, "grad_norm": 0.33745279908180237, "learning_rate": 1.5187015830747109e-05, "loss": 0.4852, "step": 30850 }, { "epoch": 0.6543021356917138, "grad_norm": 0.38268133997917175, "learning_rate": 1.518673070424765e-05, "loss": 0.5506, "step": 30851 }, { "epoch": 0.6543233441496469, "grad_norm": 0.3576461374759674, "learning_rate": 1.5186445571979556e-05, "loss": 0.5077, "step": 30852 }, { "epoch": 0.65434455260758, "grad_norm": 0.38404059410095215, "learning_rate": 1.5186160433943138e-05, "loss": 0.4455, "step": 30853 }, { "epoch": 0.6543657610655129, "grad_norm": 0.3624953031539917, "learning_rate": 1.518587529013872e-05, "loss": 0.4706, "step": 30854 }, { "epoch": 0.654386969523446, "grad_norm": 0.3249918222427368, "learning_rate": 1.5185590140566617e-05, "loss": 0.4466, "step": 30855 }, { "epoch": 0.654408177981379, "grad_norm": 0.33464962244033813, "learning_rate": 1.5185304985227144e-05, "loss": 0.5087, "step": 30856 }, { "epoch": 0.654429386439312, "grad_norm": 0.36260682344436646, "learning_rate": 1.5185019824120626e-05, "loss": 0.4662, "step": 30857 }, { "epoch": 0.654450594897245, "grad_norm": 0.3414340317249298, "learning_rate": 1.5184734657247364e-05, "loss": 0.4492, "step": 30858 }, { "epoch": 0.6544718033551781, "grad_norm": 0.35866230726242065, "learning_rate": 1.5184449484607692e-05, "loss": 0.5228, "step": 30859 }, { "epoch": 0.654493011813111, "grad_norm": 0.33114591240882874, "learning_rate": 1.518416430620192e-05, "loss": 0.504, "step": 30860 }, { "epoch": 0.6545142202710441, "grad_norm": 0.2940737009048462, "learning_rate": 1.5183879122030364e-05, "loss": 0.4336, "step": 30861 }, { "epoch": 0.6545354287289771, "grad_norm": 0.37460857629776, "learning_rate": 1.5183593932093343e-05, "loss": 0.5006, "step": 30862 }, { "epoch": 0.6545566371869102, "grad_norm": 0.3504829406738281, "learning_rate": 1.5183308736391174e-05, "loss": 0.5415, "step": 30863 }, { "epoch": 0.6545778456448431, "grad_norm": 0.3420712649822235, "learning_rate": 1.518302353492417e-05, "loss": 0.5538, "step": 30864 }, { "epoch": 0.6545990541027762, "grad_norm": 0.4158226251602173, "learning_rate": 1.5182738327692658e-05, "loss": 0.5406, "step": 30865 }, { "epoch": 0.6546202625607093, "grad_norm": 0.3853166401386261, "learning_rate": 1.5182453114696949e-05, "loss": 0.504, "step": 30866 }, { "epoch": 0.6546414710186422, "grad_norm": 0.3645739257335663, "learning_rate": 1.5182167895937358e-05, "loss": 0.5133, "step": 30867 }, { "epoch": 0.6546626794765753, "grad_norm": 0.32117828726768494, "learning_rate": 1.5181882671414208e-05, "loss": 0.4344, "step": 30868 }, { "epoch": 0.6546838879345083, "grad_norm": 0.40520498156547546, "learning_rate": 1.518159744112781e-05, "loss": 0.4963, "step": 30869 }, { "epoch": 0.6547050963924413, "grad_norm": 0.3397625684738159, "learning_rate": 1.5181312205078485e-05, "loss": 0.5109, "step": 30870 }, { "epoch": 0.6547263048503743, "grad_norm": 0.38532209396362305, "learning_rate": 1.5181026963266549e-05, "loss": 0.4406, "step": 30871 }, { "epoch": 0.6547475133083074, "grad_norm": 0.3764117360115051, "learning_rate": 1.5180741715692322e-05, "loss": 0.5156, "step": 30872 }, { "epoch": 0.6547687217662403, "grad_norm": 0.3500991463661194, "learning_rate": 1.5180456462356117e-05, "loss": 0.5277, "step": 30873 }, { "epoch": 0.6547899302241734, "grad_norm": 0.36717137694358826, "learning_rate": 1.5180171203258256e-05, "loss": 0.5343, "step": 30874 }, { "epoch": 0.6548111386821064, "grad_norm": 0.3864423632621765, "learning_rate": 1.517988593839905e-05, "loss": 0.5731, "step": 30875 }, { "epoch": 0.6548323471400395, "grad_norm": 0.5279845595359802, "learning_rate": 1.5179600667778825e-05, "loss": 0.4537, "step": 30876 }, { "epoch": 0.6548535555979724, "grad_norm": 0.34492841362953186, "learning_rate": 1.5179315391397889e-05, "loss": 0.5582, "step": 30877 }, { "epoch": 0.6548747640559055, "grad_norm": 0.3291676640510559, "learning_rate": 1.5179030109256561e-05, "loss": 0.4762, "step": 30878 }, { "epoch": 0.6548959725138385, "grad_norm": 0.37253615260124207, "learning_rate": 1.5178744821355165e-05, "loss": 0.4468, "step": 30879 }, { "epoch": 0.6549171809717715, "grad_norm": 0.3378172218799591, "learning_rate": 1.5178459527694016e-05, "loss": 0.4382, "step": 30880 }, { "epoch": 0.6549383894297046, "grad_norm": 0.36944380402565, "learning_rate": 1.5178174228273423e-05, "loss": 0.4531, "step": 30881 }, { "epoch": 0.6549595978876376, "grad_norm": 0.3392965495586395, "learning_rate": 1.5177888923093714e-05, "loss": 0.5567, "step": 30882 }, { "epoch": 0.6549808063455707, "grad_norm": 0.6855689287185669, "learning_rate": 1.51776036121552e-05, "loss": 0.5003, "step": 30883 }, { "epoch": 0.6550020148035036, "grad_norm": 0.35854658484458923, "learning_rate": 1.5177318295458201e-05, "loss": 0.5288, "step": 30884 }, { "epoch": 0.6550232232614367, "grad_norm": 0.3309565782546997, "learning_rate": 1.5177032973003035e-05, "loss": 0.4064, "step": 30885 }, { "epoch": 0.6550444317193697, "grad_norm": 0.3540829122066498, "learning_rate": 1.5176747644790016e-05, "loss": 0.4942, "step": 30886 }, { "epoch": 0.6550656401773027, "grad_norm": 0.40934857726097107, "learning_rate": 1.5176462310819462e-05, "loss": 0.5161, "step": 30887 }, { "epoch": 0.6550868486352357, "grad_norm": 0.3564331829547882, "learning_rate": 1.5176176971091696e-05, "loss": 0.5097, "step": 30888 }, { "epoch": 0.6551080570931688, "grad_norm": 0.6677303910255432, "learning_rate": 1.5175891625607027e-05, "loss": 0.5184, "step": 30889 }, { "epoch": 0.6551292655511017, "grad_norm": 0.34411853551864624, "learning_rate": 1.5175606274365775e-05, "loss": 0.4945, "step": 30890 }, { "epoch": 0.6551504740090348, "grad_norm": 0.35754603147506714, "learning_rate": 1.5175320917368264e-05, "loss": 0.4586, "step": 30891 }, { "epoch": 0.6551716824669678, "grad_norm": 0.5603342652320862, "learning_rate": 1.51750355546148e-05, "loss": 0.4907, "step": 30892 }, { "epoch": 0.6551928909249009, "grad_norm": 0.3762834072113037, "learning_rate": 1.5174750186105711e-05, "loss": 0.5287, "step": 30893 }, { "epoch": 0.6552140993828339, "grad_norm": 0.40543872117996216, "learning_rate": 1.517446481184131e-05, "loss": 0.5496, "step": 30894 }, { "epoch": 0.6552353078407669, "grad_norm": 0.4043252468109131, "learning_rate": 1.5174179431821912e-05, "loss": 0.5232, "step": 30895 }, { "epoch": 0.6552565162987, "grad_norm": 0.3441772162914276, "learning_rate": 1.5173894046047837e-05, "loss": 0.4847, "step": 30896 }, { "epoch": 0.6552777247566329, "grad_norm": 0.44911307096481323, "learning_rate": 1.5173608654519402e-05, "loss": 0.4899, "step": 30897 }, { "epoch": 0.655298933214566, "grad_norm": 0.3373662531375885, "learning_rate": 1.5173323257236925e-05, "loss": 0.4775, "step": 30898 }, { "epoch": 0.655320141672499, "grad_norm": 0.40986406803131104, "learning_rate": 1.5173037854200723e-05, "loss": 0.5175, "step": 30899 }, { "epoch": 0.655341350130432, "grad_norm": 0.3652881979942322, "learning_rate": 1.5172752445411112e-05, "loss": 0.5089, "step": 30900 }, { "epoch": 0.655362558588365, "grad_norm": 0.3791871666908264, "learning_rate": 1.5172467030868412e-05, "loss": 0.5186, "step": 30901 }, { "epoch": 0.6553837670462981, "grad_norm": 0.4222126305103302, "learning_rate": 1.5172181610572941e-05, "loss": 0.42, "step": 30902 }, { "epoch": 0.655404975504231, "grad_norm": 0.38790977001190186, "learning_rate": 1.517189618452501e-05, "loss": 0.4365, "step": 30903 }, { "epoch": 0.6554261839621641, "grad_norm": 0.3430001139640808, "learning_rate": 1.517161075272494e-05, "loss": 0.4402, "step": 30904 }, { "epoch": 0.6554473924200971, "grad_norm": 0.3523159325122833, "learning_rate": 1.5171325315173055e-05, "loss": 0.4505, "step": 30905 }, { "epoch": 0.6554686008780302, "grad_norm": 0.3786531686782837, "learning_rate": 1.5171039871869662e-05, "loss": 0.4454, "step": 30906 }, { "epoch": 0.6554898093359632, "grad_norm": 0.3103121221065521, "learning_rate": 1.5170754422815088e-05, "loss": 0.5261, "step": 30907 }, { "epoch": 0.6555110177938962, "grad_norm": 0.3658815920352936, "learning_rate": 1.5170468968009644e-05, "loss": 0.4517, "step": 30908 }, { "epoch": 0.6555322262518293, "grad_norm": 0.4121705889701843, "learning_rate": 1.5170183507453647e-05, "loss": 0.4902, "step": 30909 }, { "epoch": 0.6555534347097622, "grad_norm": 0.41683855652809143, "learning_rate": 1.5169898041147419e-05, "loss": 0.4621, "step": 30910 }, { "epoch": 0.6555746431676953, "grad_norm": 0.33778202533721924, "learning_rate": 1.5169612569091277e-05, "loss": 0.4319, "step": 30911 }, { "epoch": 0.6555958516256283, "grad_norm": 0.4141288995742798, "learning_rate": 1.5169327091285533e-05, "loss": 0.4813, "step": 30912 }, { "epoch": 0.6556170600835614, "grad_norm": 0.31833958625793457, "learning_rate": 1.5169041607730512e-05, "loss": 0.4217, "step": 30913 }, { "epoch": 0.6556382685414943, "grad_norm": 0.3269660472869873, "learning_rate": 1.5168756118426525e-05, "loss": 0.4202, "step": 30914 }, { "epoch": 0.6556594769994274, "grad_norm": 0.3754196763038635, "learning_rate": 1.516847062337389e-05, "loss": 0.496, "step": 30915 }, { "epoch": 0.6556806854573604, "grad_norm": 0.3618216812610626, "learning_rate": 1.5168185122572934e-05, "loss": 0.4845, "step": 30916 }, { "epoch": 0.6557018939152934, "grad_norm": 0.3279569149017334, "learning_rate": 1.5167899616023962e-05, "loss": 0.5133, "step": 30917 }, { "epoch": 0.6557231023732264, "grad_norm": 0.40798628330230713, "learning_rate": 1.51676141037273e-05, "loss": 0.5649, "step": 30918 }, { "epoch": 0.6557443108311595, "grad_norm": 0.3240320384502411, "learning_rate": 1.516732858568326e-05, "loss": 0.4102, "step": 30919 }, { "epoch": 0.6557655192890924, "grad_norm": 0.508560061454773, "learning_rate": 1.5167043061892162e-05, "loss": 0.4516, "step": 30920 }, { "epoch": 0.6557867277470255, "grad_norm": 0.36254772543907166, "learning_rate": 1.5166757532354326e-05, "loss": 0.4642, "step": 30921 }, { "epoch": 0.6558079362049586, "grad_norm": 0.37405091524124146, "learning_rate": 1.5166471997070065e-05, "loss": 0.4302, "step": 30922 }, { "epoch": 0.6558291446628915, "grad_norm": 0.3451913297176361, "learning_rate": 1.5166186456039698e-05, "loss": 0.4797, "step": 30923 }, { "epoch": 0.6558503531208246, "grad_norm": 0.3593364655971527, "learning_rate": 1.5165900909263546e-05, "loss": 0.4901, "step": 30924 }, { "epoch": 0.6558715615787576, "grad_norm": 0.37013816833496094, "learning_rate": 1.5165615356741926e-05, "loss": 0.4471, "step": 30925 }, { "epoch": 0.6558927700366907, "grad_norm": 0.34854379296302795, "learning_rate": 1.5165329798475145e-05, "loss": 0.449, "step": 30926 }, { "epoch": 0.6559139784946236, "grad_norm": 0.33895376324653625, "learning_rate": 1.5165044234463538e-05, "loss": 0.5822, "step": 30927 }, { "epoch": 0.6559351869525567, "grad_norm": 0.378065824508667, "learning_rate": 1.5164758664707409e-05, "loss": 0.491, "step": 30928 }, { "epoch": 0.6559563954104897, "grad_norm": 0.3569754660129547, "learning_rate": 1.5164473089207082e-05, "loss": 0.5459, "step": 30929 }, { "epoch": 0.6559776038684227, "grad_norm": 0.3511907756328583, "learning_rate": 1.516418750796287e-05, "loss": 0.4971, "step": 30930 }, { "epoch": 0.6559988123263557, "grad_norm": 0.39865005016326904, "learning_rate": 1.5163901920975097e-05, "loss": 0.459, "step": 30931 }, { "epoch": 0.6560200207842888, "grad_norm": 0.37351521849632263, "learning_rate": 1.5163616328244075e-05, "loss": 0.5152, "step": 30932 }, { "epoch": 0.6560412292422217, "grad_norm": 0.3249971866607666, "learning_rate": 1.5163330729770124e-05, "loss": 0.4404, "step": 30933 }, { "epoch": 0.6560624377001548, "grad_norm": 0.33099329471588135, "learning_rate": 1.5163045125553564e-05, "loss": 0.484, "step": 30934 }, { "epoch": 0.6560836461580879, "grad_norm": 0.3553638756275177, "learning_rate": 1.5162759515594707e-05, "loss": 0.5621, "step": 30935 }, { "epoch": 0.6561048546160209, "grad_norm": 0.35152363777160645, "learning_rate": 1.5162473899893875e-05, "loss": 0.5402, "step": 30936 }, { "epoch": 0.6561260630739539, "grad_norm": 0.3845002055168152, "learning_rate": 1.5162188278451382e-05, "loss": 0.5056, "step": 30937 }, { "epoch": 0.6561472715318869, "grad_norm": 0.39777472615242004, "learning_rate": 1.5161902651267553e-05, "loss": 0.6077, "step": 30938 }, { "epoch": 0.65616847998982, "grad_norm": 0.4097360670566559, "learning_rate": 1.5161617018342699e-05, "loss": 0.4203, "step": 30939 }, { "epoch": 0.6561896884477529, "grad_norm": 0.3755055069923401, "learning_rate": 1.5161331379677136e-05, "loss": 0.5688, "step": 30940 }, { "epoch": 0.656210896905686, "grad_norm": 0.3198438286781311, "learning_rate": 1.5161045735271188e-05, "loss": 0.479, "step": 30941 }, { "epoch": 0.656232105363619, "grad_norm": 0.3399980366230011, "learning_rate": 1.5160760085125167e-05, "loss": 0.5212, "step": 30942 }, { "epoch": 0.656253313821552, "grad_norm": 0.3636089265346527, "learning_rate": 1.5160474429239396e-05, "loss": 0.4559, "step": 30943 }, { "epoch": 0.656274522279485, "grad_norm": 0.3531060814857483, "learning_rate": 1.5160188767614191e-05, "loss": 0.4727, "step": 30944 }, { "epoch": 0.6562957307374181, "grad_norm": 0.34969350695610046, "learning_rate": 1.5159903100249866e-05, "loss": 0.4792, "step": 30945 }, { "epoch": 0.6563169391953511, "grad_norm": 0.46569836139678955, "learning_rate": 1.5159617427146745e-05, "loss": 0.5372, "step": 30946 }, { "epoch": 0.6563381476532841, "grad_norm": 0.37363043427467346, "learning_rate": 1.5159331748305137e-05, "loss": 0.46, "step": 30947 }, { "epoch": 0.6563593561112172, "grad_norm": 0.5134377479553223, "learning_rate": 1.5159046063725371e-05, "loss": 0.4559, "step": 30948 }, { "epoch": 0.6563805645691502, "grad_norm": 0.3813326060771942, "learning_rate": 1.5158760373407755e-05, "loss": 0.4595, "step": 30949 }, { "epoch": 0.6564017730270832, "grad_norm": 0.337053507566452, "learning_rate": 1.5158474677352613e-05, "loss": 0.4712, "step": 30950 }, { "epoch": 0.6564229814850162, "grad_norm": 0.37485384941101074, "learning_rate": 1.515818897556026e-05, "loss": 0.5239, "step": 30951 }, { "epoch": 0.6564441899429493, "grad_norm": 0.3312617838382721, "learning_rate": 1.5157903268031014e-05, "loss": 0.5191, "step": 30952 }, { "epoch": 0.6564653984008822, "grad_norm": 0.3711763322353363, "learning_rate": 1.5157617554765191e-05, "loss": 0.5745, "step": 30953 }, { "epoch": 0.6564866068588153, "grad_norm": 0.39228442311286926, "learning_rate": 1.5157331835763109e-05, "loss": 0.5864, "step": 30954 }, { "epoch": 0.6565078153167483, "grad_norm": 0.44025295972824097, "learning_rate": 1.515704611102509e-05, "loss": 0.5222, "step": 30955 }, { "epoch": 0.6565290237746814, "grad_norm": 0.34012994170188904, "learning_rate": 1.5156760380551452e-05, "loss": 0.4894, "step": 30956 }, { "epoch": 0.6565502322326143, "grad_norm": 0.33827486634254456, "learning_rate": 1.5156474644342505e-05, "loss": 0.574, "step": 30957 }, { "epoch": 0.6565714406905474, "grad_norm": 0.32644715905189514, "learning_rate": 1.5156188902398574e-05, "loss": 0.4638, "step": 30958 }, { "epoch": 0.6565926491484804, "grad_norm": 0.3592660427093506, "learning_rate": 1.5155903154719975e-05, "loss": 0.5137, "step": 30959 }, { "epoch": 0.6566138576064134, "grad_norm": 0.3702181875705719, "learning_rate": 1.5155617401307022e-05, "loss": 0.484, "step": 30960 }, { "epoch": 0.6566350660643464, "grad_norm": 0.34907275438308716, "learning_rate": 1.515533164216004e-05, "loss": 0.4756, "step": 30961 }, { "epoch": 0.6566562745222795, "grad_norm": 0.35438063740730286, "learning_rate": 1.5155045877279341e-05, "loss": 0.5312, "step": 30962 }, { "epoch": 0.6566774829802126, "grad_norm": 0.3807617425918579, "learning_rate": 1.5154760106665244e-05, "loss": 0.51, "step": 30963 }, { "epoch": 0.6566986914381455, "grad_norm": 0.500521183013916, "learning_rate": 1.515447433031807e-05, "loss": 0.4554, "step": 30964 }, { "epoch": 0.6567198998960786, "grad_norm": 0.36568987369537354, "learning_rate": 1.5154188548238133e-05, "loss": 0.5238, "step": 30965 }, { "epoch": 0.6567411083540116, "grad_norm": 0.35302528738975525, "learning_rate": 1.5153902760425754e-05, "loss": 0.5277, "step": 30966 }, { "epoch": 0.6567623168119446, "grad_norm": 0.354469358921051, "learning_rate": 1.5153616966881245e-05, "loss": 0.5174, "step": 30967 }, { "epoch": 0.6567835252698776, "grad_norm": 0.39768874645233154, "learning_rate": 1.515333116760493e-05, "loss": 0.5297, "step": 30968 }, { "epoch": 0.6568047337278107, "grad_norm": 0.35172146558761597, "learning_rate": 1.5153045362597127e-05, "loss": 0.4651, "step": 30969 }, { "epoch": 0.6568259421857436, "grad_norm": 0.3878030776977539, "learning_rate": 1.5152759551858151e-05, "loss": 0.3951, "step": 30970 }, { "epoch": 0.6568471506436767, "grad_norm": 0.3440057635307312, "learning_rate": 1.515247373538832e-05, "loss": 0.4603, "step": 30971 }, { "epoch": 0.6568683591016097, "grad_norm": 0.3944774568080902, "learning_rate": 1.5152187913187952e-05, "loss": 0.5614, "step": 30972 }, { "epoch": 0.6568895675595428, "grad_norm": 0.3554096519947052, "learning_rate": 1.5151902085257365e-05, "loss": 0.4886, "step": 30973 }, { "epoch": 0.6569107760174757, "grad_norm": 0.3853331506252289, "learning_rate": 1.5151616251596878e-05, "loss": 0.5364, "step": 30974 }, { "epoch": 0.6569319844754088, "grad_norm": 0.34482112526893616, "learning_rate": 1.5151330412206809e-05, "loss": 0.3683, "step": 30975 }, { "epoch": 0.6569531929333419, "grad_norm": 0.3221244812011719, "learning_rate": 1.5151044567087474e-05, "loss": 0.4378, "step": 30976 }, { "epoch": 0.6569744013912748, "grad_norm": 0.38290566205978394, "learning_rate": 1.515075871623919e-05, "loss": 0.4486, "step": 30977 }, { "epoch": 0.6569956098492079, "grad_norm": 0.37191271781921387, "learning_rate": 1.5150472859662281e-05, "loss": 0.5165, "step": 30978 }, { "epoch": 0.6570168183071409, "grad_norm": 0.3749105930328369, "learning_rate": 1.5150186997357058e-05, "loss": 0.489, "step": 30979 }, { "epoch": 0.657038026765074, "grad_norm": 0.34153974056243896, "learning_rate": 1.514990112932384e-05, "loss": 0.481, "step": 30980 }, { "epoch": 0.6570592352230069, "grad_norm": 0.42635810375213623, "learning_rate": 1.5149615255562951e-05, "loss": 0.5028, "step": 30981 }, { "epoch": 0.65708044368094, "grad_norm": 0.38343754410743713, "learning_rate": 1.5149329376074702e-05, "loss": 0.5378, "step": 30982 }, { "epoch": 0.657101652138873, "grad_norm": 0.370462030172348, "learning_rate": 1.5149043490859416e-05, "loss": 0.4875, "step": 30983 }, { "epoch": 0.657122860596806, "grad_norm": 0.4446285665035248, "learning_rate": 1.5148757599917408e-05, "loss": 0.51, "step": 30984 }, { "epoch": 0.657144069054739, "grad_norm": 0.345066636800766, "learning_rate": 1.5148471703248992e-05, "loss": 0.51, "step": 30985 }, { "epoch": 0.6571652775126721, "grad_norm": 0.3502061069011688, "learning_rate": 1.5148185800854496e-05, "loss": 0.5197, "step": 30986 }, { "epoch": 0.657186485970605, "grad_norm": 0.3090013563632965, "learning_rate": 1.514789989273423e-05, "loss": 0.4102, "step": 30987 }, { "epoch": 0.6572076944285381, "grad_norm": 0.43904909491539, "learning_rate": 1.5147613978888514e-05, "loss": 0.4517, "step": 30988 }, { "epoch": 0.6572289028864712, "grad_norm": 0.3545408546924591, "learning_rate": 1.5147328059317668e-05, "loss": 0.6316, "step": 30989 }, { "epoch": 0.6572501113444041, "grad_norm": 0.35943603515625, "learning_rate": 1.5147042134022009e-05, "loss": 0.5008, "step": 30990 }, { "epoch": 0.6572713198023372, "grad_norm": 0.31351974606513977, "learning_rate": 1.5146756203001854e-05, "loss": 0.4531, "step": 30991 }, { "epoch": 0.6572925282602702, "grad_norm": 0.4167483448982239, "learning_rate": 1.514647026625752e-05, "loss": 0.5069, "step": 30992 }, { "epoch": 0.6573137367182033, "grad_norm": 0.5287099480628967, "learning_rate": 1.5146184323789329e-05, "loss": 0.4422, "step": 30993 }, { "epoch": 0.6573349451761362, "grad_norm": 0.42638152837753296, "learning_rate": 1.5145898375597592e-05, "loss": 0.464, "step": 30994 }, { "epoch": 0.6573561536340693, "grad_norm": 0.37975162267684937, "learning_rate": 1.5145612421682634e-05, "loss": 0.5753, "step": 30995 }, { "epoch": 0.6573773620920023, "grad_norm": 0.35641300678253174, "learning_rate": 1.5145326462044772e-05, "loss": 0.4665, "step": 30996 }, { "epoch": 0.6573985705499353, "grad_norm": 0.35142555832862854, "learning_rate": 1.5145040496684322e-05, "loss": 0.5359, "step": 30997 }, { "epoch": 0.6574197790078683, "grad_norm": 0.5291144251823425, "learning_rate": 1.5144754525601601e-05, "loss": 0.5306, "step": 30998 }, { "epoch": 0.6574409874658014, "grad_norm": 0.3431820869445801, "learning_rate": 1.5144468548796929e-05, "loss": 0.5378, "step": 30999 }, { "epoch": 0.6574621959237343, "grad_norm": 0.3828166425228119, "learning_rate": 1.5144182566270625e-05, "loss": 0.5426, "step": 31000 }, { "epoch": 0.6574834043816674, "grad_norm": 0.3937338888645172, "learning_rate": 1.5143896578023008e-05, "loss": 0.4341, "step": 31001 }, { "epoch": 0.6575046128396004, "grad_norm": 0.32213863730430603, "learning_rate": 1.5143610584054388e-05, "loss": 0.4834, "step": 31002 }, { "epoch": 0.6575258212975335, "grad_norm": 1.546392560005188, "learning_rate": 1.5143324584365093e-05, "loss": 0.5351, "step": 31003 }, { "epoch": 0.6575470297554665, "grad_norm": 0.36890923976898193, "learning_rate": 1.514303857895544e-05, "loss": 0.5083, "step": 31004 }, { "epoch": 0.6575682382133995, "grad_norm": 0.4209634065628052, "learning_rate": 1.5142752567825737e-05, "loss": 0.5127, "step": 31005 }, { "epoch": 0.6575894466713326, "grad_norm": 0.349515825510025, "learning_rate": 1.5142466550976314e-05, "loss": 0.5631, "step": 31006 }, { "epoch": 0.6576106551292655, "grad_norm": 0.3547879159450531, "learning_rate": 1.5142180528407485e-05, "loss": 0.4885, "step": 31007 }, { "epoch": 0.6576318635871986, "grad_norm": 0.33943259716033936, "learning_rate": 1.5141894500119564e-05, "loss": 0.5248, "step": 31008 }, { "epoch": 0.6576530720451316, "grad_norm": 0.3310109078884125, "learning_rate": 1.5141608466112874e-05, "loss": 0.4166, "step": 31009 }, { "epoch": 0.6576742805030646, "grad_norm": 0.39738374948501587, "learning_rate": 1.5141322426387733e-05, "loss": 0.5179, "step": 31010 }, { "epoch": 0.6576954889609976, "grad_norm": 0.35681408643722534, "learning_rate": 1.5141036380944457e-05, "loss": 0.4836, "step": 31011 }, { "epoch": 0.6577166974189307, "grad_norm": 0.3351283669471741, "learning_rate": 1.5140750329783363e-05, "loss": 0.4763, "step": 31012 }, { "epoch": 0.6577379058768636, "grad_norm": 0.3771321773529053, "learning_rate": 1.5140464272904773e-05, "loss": 0.4957, "step": 31013 }, { "epoch": 0.6577591143347967, "grad_norm": 0.3289129137992859, "learning_rate": 1.5140178210309002e-05, "loss": 0.4436, "step": 31014 }, { "epoch": 0.6577803227927297, "grad_norm": 0.3509964644908905, "learning_rate": 1.5139892141996373e-05, "loss": 0.5281, "step": 31015 }, { "epoch": 0.6578015312506628, "grad_norm": 0.3480202853679657, "learning_rate": 1.5139606067967196e-05, "loss": 0.5853, "step": 31016 }, { "epoch": 0.6578227397085958, "grad_norm": 0.3278373181819916, "learning_rate": 1.5139319988221799e-05, "loss": 0.4646, "step": 31017 }, { "epoch": 0.6578439481665288, "grad_norm": 0.37396037578582764, "learning_rate": 1.5139033902760492e-05, "loss": 0.5305, "step": 31018 }, { "epoch": 0.6578651566244619, "grad_norm": 0.3875443637371063, "learning_rate": 1.5138747811583593e-05, "loss": 0.4968, "step": 31019 }, { "epoch": 0.6578863650823948, "grad_norm": 0.3480128049850464, "learning_rate": 1.5138461714691431e-05, "loss": 0.5017, "step": 31020 }, { "epoch": 0.6579075735403279, "grad_norm": 0.4453469514846802, "learning_rate": 1.5138175612084309e-05, "loss": 0.5828, "step": 31021 }, { "epoch": 0.6579287819982609, "grad_norm": 0.36307692527770996, "learning_rate": 1.5137889503762557e-05, "loss": 0.5029, "step": 31022 }, { "epoch": 0.657949990456194, "grad_norm": 0.42958080768585205, "learning_rate": 1.5137603389726487e-05, "loss": 0.4844, "step": 31023 }, { "epoch": 0.6579711989141269, "grad_norm": 0.38178378343582153, "learning_rate": 1.5137317269976422e-05, "loss": 0.4948, "step": 31024 }, { "epoch": 0.65799240737206, "grad_norm": 0.38282108306884766, "learning_rate": 1.5137031144512673e-05, "loss": 0.5289, "step": 31025 }, { "epoch": 0.658013615829993, "grad_norm": 0.5985697507858276, "learning_rate": 1.5136745013335565e-05, "loss": 0.4875, "step": 31026 }, { "epoch": 0.658034824287926, "grad_norm": 0.5254981517791748, "learning_rate": 1.5136458876445414e-05, "loss": 0.5238, "step": 31027 }, { "epoch": 0.658056032745859, "grad_norm": 0.3917195200920105, "learning_rate": 1.5136172733842538e-05, "loss": 0.5068, "step": 31028 }, { "epoch": 0.6580772412037921, "grad_norm": 0.763985812664032, "learning_rate": 1.5135886585527258e-05, "loss": 0.5597, "step": 31029 }, { "epoch": 0.6580984496617251, "grad_norm": 0.35731613636016846, "learning_rate": 1.5135600431499884e-05, "loss": 0.474, "step": 31030 }, { "epoch": 0.6581196581196581, "grad_norm": 0.3392851948738098, "learning_rate": 1.5135314271760742e-05, "loss": 0.394, "step": 31031 }, { "epoch": 0.6581408665775912, "grad_norm": 0.3475976884365082, "learning_rate": 1.5135028106310148e-05, "loss": 0.5336, "step": 31032 }, { "epoch": 0.6581620750355242, "grad_norm": 0.3051431477069855, "learning_rate": 1.513474193514842e-05, "loss": 0.4954, "step": 31033 }, { "epoch": 0.6581832834934572, "grad_norm": 0.337377667427063, "learning_rate": 1.5134455758275879e-05, "loss": 0.5526, "step": 31034 }, { "epoch": 0.6582044919513902, "grad_norm": 0.35803017020225525, "learning_rate": 1.513416957569284e-05, "loss": 0.5563, "step": 31035 }, { "epoch": 0.6582257004093233, "grad_norm": 0.3639412224292755, "learning_rate": 1.513388338739962e-05, "loss": 0.5043, "step": 31036 }, { "epoch": 0.6582469088672562, "grad_norm": 0.4190426766872406, "learning_rate": 1.5133597193396542e-05, "loss": 0.5163, "step": 31037 }, { "epoch": 0.6582681173251893, "grad_norm": 0.39247506856918335, "learning_rate": 1.513331099368392e-05, "loss": 0.5581, "step": 31038 }, { "epoch": 0.6582893257831223, "grad_norm": 0.3515731990337372, "learning_rate": 1.5133024788262074e-05, "loss": 0.5011, "step": 31039 }, { "epoch": 0.6583105342410553, "grad_norm": 0.3801261782646179, "learning_rate": 1.5132738577131325e-05, "loss": 0.514, "step": 31040 }, { "epoch": 0.6583317426989883, "grad_norm": 0.4583807587623596, "learning_rate": 1.513245236029199e-05, "loss": 0.5712, "step": 31041 }, { "epoch": 0.6583529511569214, "grad_norm": 0.3607480525970459, "learning_rate": 1.513216613774438e-05, "loss": 0.5505, "step": 31042 }, { "epoch": 0.6583741596148543, "grad_norm": 0.33367031812667847, "learning_rate": 1.5131879909488824e-05, "loss": 0.5581, "step": 31043 }, { "epoch": 0.6583953680727874, "grad_norm": 0.4056541919708252, "learning_rate": 1.5131593675525633e-05, "loss": 0.5376, "step": 31044 }, { "epoch": 0.6584165765307205, "grad_norm": 0.33473190665245056, "learning_rate": 1.5131307435855127e-05, "loss": 0.4882, "step": 31045 }, { "epoch": 0.6584377849886535, "grad_norm": 0.5656855702400208, "learning_rate": 1.513102119047763e-05, "loss": 0.4895, "step": 31046 }, { "epoch": 0.6584589934465865, "grad_norm": 0.3555468022823334, "learning_rate": 1.5130734939393454e-05, "loss": 0.5063, "step": 31047 }, { "epoch": 0.6584802019045195, "grad_norm": 0.36403653025627136, "learning_rate": 1.5130448682602918e-05, "loss": 0.4639, "step": 31048 }, { "epoch": 0.6585014103624526, "grad_norm": 0.33726966381073, "learning_rate": 1.5130162420106345e-05, "loss": 0.4047, "step": 31049 }, { "epoch": 0.6585226188203855, "grad_norm": 0.3747701048851013, "learning_rate": 1.5129876151904044e-05, "loss": 0.4926, "step": 31050 }, { "epoch": 0.6585438272783186, "grad_norm": 0.3584110140800476, "learning_rate": 1.5129589877996345e-05, "loss": 0.5248, "step": 31051 }, { "epoch": 0.6585650357362516, "grad_norm": 0.30813267827033997, "learning_rate": 1.5129303598383558e-05, "loss": 0.4931, "step": 31052 }, { "epoch": 0.6585862441941847, "grad_norm": 0.3416588604450226, "learning_rate": 1.5129017313066003e-05, "loss": 0.3717, "step": 31053 }, { "epoch": 0.6586074526521176, "grad_norm": 0.36703893542289734, "learning_rate": 1.5128731022044001e-05, "loss": 0.4753, "step": 31054 }, { "epoch": 0.6586286611100507, "grad_norm": 0.4264104962348938, "learning_rate": 1.512844472531787e-05, "loss": 0.4011, "step": 31055 }, { "epoch": 0.6586498695679837, "grad_norm": 0.33560118079185486, "learning_rate": 1.5128158422887925e-05, "loss": 0.4575, "step": 31056 }, { "epoch": 0.6586710780259167, "grad_norm": 0.3631516396999359, "learning_rate": 1.5127872114754487e-05, "loss": 0.4608, "step": 31057 }, { "epoch": 0.6586922864838498, "grad_norm": 0.4146793484687805, "learning_rate": 1.5127585800917876e-05, "loss": 0.5435, "step": 31058 }, { "epoch": 0.6587134949417828, "grad_norm": 0.37857934832572937, "learning_rate": 1.5127299481378408e-05, "loss": 0.4702, "step": 31059 }, { "epoch": 0.6587347033997158, "grad_norm": 0.398801326751709, "learning_rate": 1.51270131561364e-05, "loss": 0.4096, "step": 31060 }, { "epoch": 0.6587559118576488, "grad_norm": 0.3874805271625519, "learning_rate": 1.5126726825192175e-05, "loss": 0.5427, "step": 31061 }, { "epoch": 0.6587771203155819, "grad_norm": 0.3886314332485199, "learning_rate": 1.512644048854605e-05, "loss": 0.5532, "step": 31062 }, { "epoch": 0.6587983287735149, "grad_norm": 0.3735312521457672, "learning_rate": 1.5126154146198339e-05, "loss": 0.4246, "step": 31063 }, { "epoch": 0.6588195372314479, "grad_norm": 0.3440195918083191, "learning_rate": 1.5125867798149365e-05, "loss": 0.5467, "step": 31064 }, { "epoch": 0.6588407456893809, "grad_norm": 0.40367192029953003, "learning_rate": 1.5125581444399449e-05, "loss": 0.49, "step": 31065 }, { "epoch": 0.658861954147314, "grad_norm": 0.36460891366004944, "learning_rate": 1.51252950849489e-05, "loss": 0.4689, "step": 31066 }, { "epoch": 0.6588831626052469, "grad_norm": 0.3893060088157654, "learning_rate": 1.5125008719798046e-05, "loss": 0.516, "step": 31067 }, { "epoch": 0.65890437106318, "grad_norm": 0.3625108003616333, "learning_rate": 1.5124722348947201e-05, "loss": 0.4968, "step": 31068 }, { "epoch": 0.658925579521113, "grad_norm": 0.45296409726142883, "learning_rate": 1.5124435972396683e-05, "loss": 0.5103, "step": 31069 }, { "epoch": 0.658946787979046, "grad_norm": 0.3402121961116791, "learning_rate": 1.5124149590146813e-05, "loss": 0.5179, "step": 31070 }, { "epoch": 0.6589679964369791, "grad_norm": 0.3774961829185486, "learning_rate": 1.512386320219791e-05, "loss": 0.4865, "step": 31071 }, { "epoch": 0.6589892048949121, "grad_norm": 0.39422306418418884, "learning_rate": 1.512357680855029e-05, "loss": 0.4924, "step": 31072 }, { "epoch": 0.6590104133528452, "grad_norm": 0.42228370904922485, "learning_rate": 1.512329040920427e-05, "loss": 0.5511, "step": 31073 }, { "epoch": 0.6590316218107781, "grad_norm": 0.3924335837364197, "learning_rate": 1.5123004004160175e-05, "loss": 0.4546, "step": 31074 }, { "epoch": 0.6590528302687112, "grad_norm": 0.33083927631378174, "learning_rate": 1.5122717593418315e-05, "loss": 0.4379, "step": 31075 }, { "epoch": 0.6590740387266442, "grad_norm": 0.3248051702976227, "learning_rate": 1.5122431176979016e-05, "loss": 0.4486, "step": 31076 }, { "epoch": 0.6590952471845772, "grad_norm": 0.3647865653038025, "learning_rate": 1.5122144754842592e-05, "loss": 0.501, "step": 31077 }, { "epoch": 0.6591164556425102, "grad_norm": 0.3329252600669861, "learning_rate": 1.5121858327009362e-05, "loss": 0.4601, "step": 31078 }, { "epoch": 0.6591376641004433, "grad_norm": 0.47206369042396545, "learning_rate": 1.5121571893479647e-05, "loss": 0.4716, "step": 31079 }, { "epoch": 0.6591588725583762, "grad_norm": 0.3523132801055908, "learning_rate": 1.5121285454253767e-05, "loss": 0.499, "step": 31080 }, { "epoch": 0.6591800810163093, "grad_norm": 0.4055483937263489, "learning_rate": 1.5120999009332033e-05, "loss": 0.4912, "step": 31081 }, { "epoch": 0.6592012894742423, "grad_norm": 0.3550778329372406, "learning_rate": 1.5120712558714772e-05, "loss": 0.5629, "step": 31082 }, { "epoch": 0.6592224979321754, "grad_norm": 0.3748222887516022, "learning_rate": 1.5120426102402297e-05, "loss": 0.5169, "step": 31083 }, { "epoch": 0.6592437063901084, "grad_norm": 0.4140700101852417, "learning_rate": 1.512013964039493e-05, "loss": 0.4316, "step": 31084 }, { "epoch": 0.6592649148480414, "grad_norm": 0.3335109055042267, "learning_rate": 1.5119853172692986e-05, "loss": 0.5086, "step": 31085 }, { "epoch": 0.6592861233059745, "grad_norm": 0.4000014364719391, "learning_rate": 1.511956669929679e-05, "loss": 0.467, "step": 31086 }, { "epoch": 0.6593073317639074, "grad_norm": 0.3528788387775421, "learning_rate": 1.5119280220206652e-05, "loss": 0.57, "step": 31087 }, { "epoch": 0.6593285402218405, "grad_norm": 0.3361489176750183, "learning_rate": 1.5118993735422898e-05, "loss": 0.4651, "step": 31088 }, { "epoch": 0.6593497486797735, "grad_norm": 0.34929266571998596, "learning_rate": 1.511870724494584e-05, "loss": 0.4191, "step": 31089 }, { "epoch": 0.6593709571377065, "grad_norm": 0.36530688405036926, "learning_rate": 1.5118420748775805e-05, "loss": 0.5293, "step": 31090 }, { "epoch": 0.6593921655956395, "grad_norm": 0.3302883207798004, "learning_rate": 1.5118134246913102e-05, "loss": 0.4251, "step": 31091 }, { "epoch": 0.6594133740535726, "grad_norm": 0.7272689938545227, "learning_rate": 1.5117847739358058e-05, "loss": 0.5508, "step": 31092 }, { "epoch": 0.6594345825115056, "grad_norm": 0.35866618156433105, "learning_rate": 1.5117561226110989e-05, "loss": 0.4736, "step": 31093 }, { "epoch": 0.6594557909694386, "grad_norm": 0.3161814212799072, "learning_rate": 1.511727470717221e-05, "loss": 0.359, "step": 31094 }, { "epoch": 0.6594769994273716, "grad_norm": 0.4026046395301819, "learning_rate": 1.5116988182542043e-05, "loss": 0.5117, "step": 31095 }, { "epoch": 0.6594982078853047, "grad_norm": 0.3288918733596802, "learning_rate": 1.5116701652220807e-05, "loss": 0.5024, "step": 31096 }, { "epoch": 0.6595194163432376, "grad_norm": 0.3926127552986145, "learning_rate": 1.5116415116208818e-05, "loss": 0.5782, "step": 31097 }, { "epoch": 0.6595406248011707, "grad_norm": 0.4666759669780731, "learning_rate": 1.51161285745064e-05, "loss": 0.5273, "step": 31098 }, { "epoch": 0.6595618332591038, "grad_norm": 0.36996787786483765, "learning_rate": 1.5115842027113865e-05, "loss": 0.5597, "step": 31099 }, { "epoch": 0.6595830417170367, "grad_norm": 0.46195781230926514, "learning_rate": 1.5115555474031538e-05, "loss": 0.5352, "step": 31100 }, { "epoch": 0.6596042501749698, "grad_norm": 0.329761803150177, "learning_rate": 1.5115268915259732e-05, "loss": 0.4125, "step": 31101 }, { "epoch": 0.6596254586329028, "grad_norm": 0.38636836409568787, "learning_rate": 1.511498235079877e-05, "loss": 0.5653, "step": 31102 }, { "epoch": 0.6596466670908359, "grad_norm": 0.3354775011539459, "learning_rate": 1.5114695780648969e-05, "loss": 0.5348, "step": 31103 }, { "epoch": 0.6596678755487688, "grad_norm": 0.3480316400527954, "learning_rate": 1.5114409204810647e-05, "loss": 0.4913, "step": 31104 }, { "epoch": 0.6596890840067019, "grad_norm": 0.3419518768787384, "learning_rate": 1.5114122623284124e-05, "loss": 0.4608, "step": 31105 }, { "epoch": 0.6597102924646349, "grad_norm": 0.3278255760669708, "learning_rate": 1.5113836036069715e-05, "loss": 0.4969, "step": 31106 }, { "epoch": 0.6597315009225679, "grad_norm": 0.3379564583301544, "learning_rate": 1.5113549443167747e-05, "loss": 0.5281, "step": 31107 }, { "epoch": 0.6597527093805009, "grad_norm": 0.3970581591129303, "learning_rate": 1.5113262844578531e-05, "loss": 0.5245, "step": 31108 }, { "epoch": 0.659773917838434, "grad_norm": 0.34929129481315613, "learning_rate": 1.511297624030239e-05, "loss": 0.5395, "step": 31109 }, { "epoch": 0.6597951262963669, "grad_norm": 0.3678182065486908, "learning_rate": 1.511268963033964e-05, "loss": 0.4625, "step": 31110 }, { "epoch": 0.6598163347543, "grad_norm": 0.36530306935310364, "learning_rate": 1.5112403014690602e-05, "loss": 0.5506, "step": 31111 }, { "epoch": 0.6598375432122331, "grad_norm": 0.3905671536922455, "learning_rate": 1.5112116393355591e-05, "loss": 0.5787, "step": 31112 }, { "epoch": 0.659858751670166, "grad_norm": 0.37767571210861206, "learning_rate": 1.5111829766334933e-05, "loss": 0.4681, "step": 31113 }, { "epoch": 0.6598799601280991, "grad_norm": 0.3433244228363037, "learning_rate": 1.511154313362894e-05, "loss": 0.5087, "step": 31114 }, { "epoch": 0.6599011685860321, "grad_norm": 0.3674318790435791, "learning_rate": 1.5111256495237931e-05, "loss": 0.5385, "step": 31115 }, { "epoch": 0.6599223770439652, "grad_norm": 0.39333823323249817, "learning_rate": 1.5110969851162231e-05, "loss": 0.5317, "step": 31116 }, { "epoch": 0.6599435855018981, "grad_norm": 0.3618752658367157, "learning_rate": 1.5110683201402152e-05, "loss": 0.5353, "step": 31117 }, { "epoch": 0.6599647939598312, "grad_norm": 0.350238561630249, "learning_rate": 1.5110396545958018e-05, "loss": 0.4794, "step": 31118 }, { "epoch": 0.6599860024177642, "grad_norm": 0.31622475385665894, "learning_rate": 1.5110109884830143e-05, "loss": 0.4337, "step": 31119 }, { "epoch": 0.6600072108756972, "grad_norm": 0.34796765446662903, "learning_rate": 1.5109823218018849e-05, "loss": 0.5348, "step": 31120 }, { "epoch": 0.6600284193336302, "grad_norm": 0.3726625144481659, "learning_rate": 1.5109536545524453e-05, "loss": 0.5093, "step": 31121 }, { "epoch": 0.6600496277915633, "grad_norm": 0.38076329231262207, "learning_rate": 1.5109249867347276e-05, "loss": 0.4934, "step": 31122 }, { "epoch": 0.6600708362494963, "grad_norm": 0.3699652850627899, "learning_rate": 1.5108963183487637e-05, "loss": 0.4286, "step": 31123 }, { "epoch": 0.6600920447074293, "grad_norm": 0.3900053799152374, "learning_rate": 1.5108676493945851e-05, "loss": 0.5371, "step": 31124 }, { "epoch": 0.6601132531653624, "grad_norm": 0.3835621774196625, "learning_rate": 1.5108389798722241e-05, "loss": 0.5524, "step": 31125 }, { "epoch": 0.6601344616232954, "grad_norm": 0.3731754422187805, "learning_rate": 1.5108103097817122e-05, "loss": 0.5824, "step": 31126 }, { "epoch": 0.6601556700812284, "grad_norm": 0.34804508090019226, "learning_rate": 1.5107816391230818e-05, "loss": 0.5255, "step": 31127 }, { "epoch": 0.6601768785391614, "grad_norm": 0.352554589509964, "learning_rate": 1.5107529678963643e-05, "loss": 0.454, "step": 31128 }, { "epoch": 0.6601980869970945, "grad_norm": 0.30387941002845764, "learning_rate": 1.5107242961015918e-05, "loss": 0.4634, "step": 31129 }, { "epoch": 0.6602192954550274, "grad_norm": 0.34662872552871704, "learning_rate": 1.5106956237387964e-05, "loss": 0.4938, "step": 31130 }, { "epoch": 0.6602405039129605, "grad_norm": 0.3250106871128082, "learning_rate": 1.5106669508080097e-05, "loss": 0.4724, "step": 31131 }, { "epoch": 0.6602617123708935, "grad_norm": 0.3524758517742157, "learning_rate": 1.5106382773092633e-05, "loss": 0.5094, "step": 31132 }, { "epoch": 0.6602829208288266, "grad_norm": 0.3880271315574646, "learning_rate": 1.5106096032425897e-05, "loss": 0.569, "step": 31133 }, { "epoch": 0.6603041292867595, "grad_norm": 0.3558800518512726, "learning_rate": 1.5105809286080206e-05, "loss": 0.4691, "step": 31134 }, { "epoch": 0.6603253377446926, "grad_norm": 0.3430183529853821, "learning_rate": 1.5105522534055876e-05, "loss": 0.5042, "step": 31135 }, { "epoch": 0.6603465462026256, "grad_norm": 0.32489702105522156, "learning_rate": 1.5105235776353233e-05, "loss": 0.4328, "step": 31136 }, { "epoch": 0.6603677546605586, "grad_norm": 0.3690802752971649, "learning_rate": 1.5104949012972588e-05, "loss": 0.4962, "step": 31137 }, { "epoch": 0.6603889631184916, "grad_norm": 0.3457011878490448, "learning_rate": 1.5104662243914261e-05, "loss": 0.5645, "step": 31138 }, { "epoch": 0.6604101715764247, "grad_norm": 0.4084739089012146, "learning_rate": 1.5104375469178575e-05, "loss": 0.4681, "step": 31139 }, { "epoch": 0.6604313800343578, "grad_norm": 0.36857858300209045, "learning_rate": 1.5104088688765848e-05, "loss": 0.474, "step": 31140 }, { "epoch": 0.6604525884922907, "grad_norm": 0.4698144793510437, "learning_rate": 1.5103801902676395e-05, "loss": 0.5531, "step": 31141 }, { "epoch": 0.6604737969502238, "grad_norm": 0.321662575006485, "learning_rate": 1.5103515110910541e-05, "loss": 0.44, "step": 31142 }, { "epoch": 0.6604950054081568, "grad_norm": 0.3790728449821472, "learning_rate": 1.5103228313468599e-05, "loss": 0.5028, "step": 31143 }, { "epoch": 0.6605162138660898, "grad_norm": 0.3703655004501343, "learning_rate": 1.5102941510350894e-05, "loss": 0.4692, "step": 31144 }, { "epoch": 0.6605374223240228, "grad_norm": 0.3996274173259735, "learning_rate": 1.510265470155774e-05, "loss": 0.5717, "step": 31145 }, { "epoch": 0.6605586307819559, "grad_norm": 0.36989665031433105, "learning_rate": 1.5102367887089455e-05, "loss": 0.5627, "step": 31146 }, { "epoch": 0.6605798392398888, "grad_norm": 0.37637120485305786, "learning_rate": 1.5102081066946364e-05, "loss": 0.4637, "step": 31147 }, { "epoch": 0.6606010476978219, "grad_norm": 0.35576581954956055, "learning_rate": 1.5101794241128784e-05, "loss": 0.4494, "step": 31148 }, { "epoch": 0.6606222561557549, "grad_norm": 0.3526695668697357, "learning_rate": 1.5101507409637031e-05, "loss": 0.5296, "step": 31149 }, { "epoch": 0.660643464613688, "grad_norm": 0.3630104959011078, "learning_rate": 1.5101220572471427e-05, "loss": 0.5099, "step": 31150 }, { "epoch": 0.6606646730716209, "grad_norm": 0.42009395360946655, "learning_rate": 1.5100933729632288e-05, "loss": 0.5198, "step": 31151 }, { "epoch": 0.660685881529554, "grad_norm": 0.30610984563827515, "learning_rate": 1.5100646881119936e-05, "loss": 0.4236, "step": 31152 }, { "epoch": 0.6607070899874871, "grad_norm": 0.3760165274143219, "learning_rate": 1.5100360026934686e-05, "loss": 0.5198, "step": 31153 }, { "epoch": 0.66072829844542, "grad_norm": 0.33241406083106995, "learning_rate": 1.5100073167076863e-05, "loss": 0.4047, "step": 31154 }, { "epoch": 0.6607495069033531, "grad_norm": 0.35177144408226013, "learning_rate": 1.5099786301546781e-05, "loss": 0.5333, "step": 31155 }, { "epoch": 0.6607707153612861, "grad_norm": 0.38000601530075073, "learning_rate": 1.5099499430344764e-05, "loss": 0.5544, "step": 31156 }, { "epoch": 0.6607919238192191, "grad_norm": 0.3421911597251892, "learning_rate": 1.5099212553471125e-05, "loss": 0.5402, "step": 31157 }, { "epoch": 0.6608131322771521, "grad_norm": 0.5627687573432922, "learning_rate": 1.509892567092619e-05, "loss": 0.4624, "step": 31158 }, { "epoch": 0.6608343407350852, "grad_norm": 0.31423962116241455, "learning_rate": 1.5098638782710268e-05, "loss": 0.4247, "step": 31159 }, { "epoch": 0.6608555491930181, "grad_norm": 0.36107271909713745, "learning_rate": 1.5098351888823688e-05, "loss": 0.462, "step": 31160 }, { "epoch": 0.6608767576509512, "grad_norm": 0.3780340552330017, "learning_rate": 1.5098064989266767e-05, "loss": 0.4626, "step": 31161 }, { "epoch": 0.6608979661088842, "grad_norm": 0.3933364748954773, "learning_rate": 1.509777808403982e-05, "loss": 0.57, "step": 31162 }, { "epoch": 0.6609191745668173, "grad_norm": 0.386772096157074, "learning_rate": 1.5097491173143168e-05, "loss": 0.452, "step": 31163 }, { "epoch": 0.6609403830247502, "grad_norm": 0.41887688636779785, "learning_rate": 1.5097204256577133e-05, "loss": 0.5296, "step": 31164 }, { "epoch": 0.6609615914826833, "grad_norm": 0.393683522939682, "learning_rate": 1.509691733434203e-05, "loss": 0.5055, "step": 31165 }, { "epoch": 0.6609827999406164, "grad_norm": 0.31417784094810486, "learning_rate": 1.5096630406438178e-05, "loss": 0.4771, "step": 31166 }, { "epoch": 0.6610040083985493, "grad_norm": 0.3965742886066437, "learning_rate": 1.50963434728659e-05, "loss": 0.4373, "step": 31167 }, { "epoch": 0.6610252168564824, "grad_norm": 0.3497108817100525, "learning_rate": 1.5096056533625514e-05, "loss": 0.5568, "step": 31168 }, { "epoch": 0.6610464253144154, "grad_norm": 0.3712679147720337, "learning_rate": 1.5095769588717337e-05, "loss": 0.491, "step": 31169 }, { "epoch": 0.6610676337723485, "grad_norm": 0.34756284952163696, "learning_rate": 1.509548263814169e-05, "loss": 0.4442, "step": 31170 }, { "epoch": 0.6610888422302814, "grad_norm": 0.3586261570453644, "learning_rate": 1.509519568189889e-05, "loss": 0.4706, "step": 31171 }, { "epoch": 0.6611100506882145, "grad_norm": 0.3766014277935028, "learning_rate": 1.509490871998926e-05, "loss": 0.5387, "step": 31172 }, { "epoch": 0.6611312591461475, "grad_norm": 0.35744327306747437, "learning_rate": 1.5094621752413114e-05, "loss": 0.5759, "step": 31173 }, { "epoch": 0.6611524676040805, "grad_norm": 0.5166847705841064, "learning_rate": 1.5094334779170777e-05, "loss": 0.5511, "step": 31174 }, { "epoch": 0.6611736760620135, "grad_norm": 0.3170066475868225, "learning_rate": 1.5094047800262561e-05, "loss": 0.4234, "step": 31175 }, { "epoch": 0.6611948845199466, "grad_norm": 0.36927366256713867, "learning_rate": 1.5093760815688795e-05, "loss": 0.5519, "step": 31176 }, { "epoch": 0.6612160929778795, "grad_norm": 0.4279690384864807, "learning_rate": 1.5093473825449787e-05, "loss": 0.4655, "step": 31177 }, { "epoch": 0.6612373014358126, "grad_norm": 0.3670717477798462, "learning_rate": 1.5093186829545862e-05, "loss": 0.4965, "step": 31178 }, { "epoch": 0.6612585098937456, "grad_norm": 0.38208335638046265, "learning_rate": 1.5092899827977343e-05, "loss": 0.529, "step": 31179 }, { "epoch": 0.6612797183516786, "grad_norm": 0.34681910276412964, "learning_rate": 1.509261282074454e-05, "loss": 0.5129, "step": 31180 }, { "epoch": 0.6613009268096117, "grad_norm": 0.3252142369747162, "learning_rate": 1.5092325807847783e-05, "loss": 0.4308, "step": 31181 }, { "epoch": 0.6613221352675447, "grad_norm": 0.3682915270328522, "learning_rate": 1.5092038789287382e-05, "loss": 0.479, "step": 31182 }, { "epoch": 0.6613433437254778, "grad_norm": 0.3838884234428406, "learning_rate": 1.509175176506366e-05, "loss": 0.5626, "step": 31183 }, { "epoch": 0.6613645521834107, "grad_norm": 0.36277472972869873, "learning_rate": 1.509146473517694e-05, "loss": 0.5098, "step": 31184 }, { "epoch": 0.6613857606413438, "grad_norm": 0.3549627959728241, "learning_rate": 1.5091177699627531e-05, "loss": 0.5218, "step": 31185 }, { "epoch": 0.6614069690992768, "grad_norm": 0.3320147395133972, "learning_rate": 1.5090890658415758e-05, "loss": 0.4721, "step": 31186 }, { "epoch": 0.6614281775572098, "grad_norm": 0.36728429794311523, "learning_rate": 1.5090603611541946e-05, "loss": 0.5105, "step": 31187 }, { "epoch": 0.6614493860151428, "grad_norm": 0.42784345149993896, "learning_rate": 1.5090316559006404e-05, "loss": 0.5595, "step": 31188 }, { "epoch": 0.6614705944730759, "grad_norm": 0.3412941098213196, "learning_rate": 1.509002950080946e-05, "loss": 0.5079, "step": 31189 }, { "epoch": 0.6614918029310088, "grad_norm": 0.3660529553890228, "learning_rate": 1.508974243695143e-05, "loss": 0.4928, "step": 31190 }, { "epoch": 0.6615130113889419, "grad_norm": 0.3690445125102997, "learning_rate": 1.5089455367432629e-05, "loss": 0.4846, "step": 31191 }, { "epoch": 0.6615342198468749, "grad_norm": 0.31702232360839844, "learning_rate": 1.508916829225338e-05, "loss": 0.4488, "step": 31192 }, { "epoch": 0.661555428304808, "grad_norm": 0.3371978998184204, "learning_rate": 1.5088881211414004e-05, "loss": 0.4351, "step": 31193 }, { "epoch": 0.661576636762741, "grad_norm": 0.3370042145252228, "learning_rate": 1.5088594124914817e-05, "loss": 0.4653, "step": 31194 }, { "epoch": 0.661597845220674, "grad_norm": 0.34297001361846924, "learning_rate": 1.5088307032756141e-05, "loss": 0.4081, "step": 31195 }, { "epoch": 0.6616190536786071, "grad_norm": 0.4089285731315613, "learning_rate": 1.5088019934938296e-05, "loss": 0.5906, "step": 31196 }, { "epoch": 0.66164026213654, "grad_norm": 0.35548871755599976, "learning_rate": 1.5087732831461597e-05, "loss": 0.4603, "step": 31197 }, { "epoch": 0.6616614705944731, "grad_norm": 0.3693574070930481, "learning_rate": 1.5087445722326365e-05, "loss": 0.442, "step": 31198 }, { "epoch": 0.6616826790524061, "grad_norm": 0.38718146085739136, "learning_rate": 1.5087158607532923e-05, "loss": 0.4559, "step": 31199 }, { "epoch": 0.6617038875103392, "grad_norm": 0.3253514766693115, "learning_rate": 1.5086871487081585e-05, "loss": 0.4416, "step": 31200 }, { "epoch": 0.6617250959682721, "grad_norm": 0.39197415113449097, "learning_rate": 1.5086584360972674e-05, "loss": 0.4945, "step": 31201 }, { "epoch": 0.6617463044262052, "grad_norm": 0.37063565850257874, "learning_rate": 1.5086297229206506e-05, "loss": 0.4863, "step": 31202 }, { "epoch": 0.6617675128841382, "grad_norm": 0.4649405777454376, "learning_rate": 1.5086010091783406e-05, "loss": 0.4376, "step": 31203 }, { "epoch": 0.6617887213420712, "grad_norm": 0.34216707944869995, "learning_rate": 1.5085722948703688e-05, "loss": 0.525, "step": 31204 }, { "epoch": 0.6618099298000042, "grad_norm": 0.3469821810722351, "learning_rate": 1.5085435799967672e-05, "loss": 0.515, "step": 31205 }, { "epoch": 0.6618311382579373, "grad_norm": 0.3503965735435486, "learning_rate": 1.5085148645575678e-05, "loss": 0.5661, "step": 31206 }, { "epoch": 0.6618523467158703, "grad_norm": 0.42054280638694763, "learning_rate": 1.5084861485528027e-05, "loss": 0.4986, "step": 31207 }, { "epoch": 0.6618735551738033, "grad_norm": 0.351471483707428, "learning_rate": 1.5084574319825036e-05, "loss": 0.4748, "step": 31208 }, { "epoch": 0.6618947636317364, "grad_norm": 0.4330297112464905, "learning_rate": 1.5084287148467029e-05, "loss": 0.5011, "step": 31209 }, { "epoch": 0.6619159720896693, "grad_norm": 0.40717029571533203, "learning_rate": 1.5083999971454319e-05, "loss": 0.4081, "step": 31210 }, { "epoch": 0.6619371805476024, "grad_norm": 0.36367717385292053, "learning_rate": 1.5083712788787229e-05, "loss": 0.5522, "step": 31211 }, { "epoch": 0.6619583890055354, "grad_norm": 0.32641956210136414, "learning_rate": 1.5083425600466076e-05, "loss": 0.4916, "step": 31212 }, { "epoch": 0.6619795974634685, "grad_norm": 0.3629480004310608, "learning_rate": 1.5083138406491186e-05, "loss": 0.4893, "step": 31213 }, { "epoch": 0.6620008059214014, "grad_norm": 0.3732512891292572, "learning_rate": 1.5082851206862871e-05, "loss": 0.5065, "step": 31214 }, { "epoch": 0.6620220143793345, "grad_norm": 0.44288933277130127, "learning_rate": 1.5082564001581453e-05, "loss": 0.5109, "step": 31215 }, { "epoch": 0.6620432228372675, "grad_norm": 0.3423256278038025, "learning_rate": 1.5082276790647252e-05, "loss": 0.5045, "step": 31216 }, { "epoch": 0.6620644312952005, "grad_norm": 0.5263311266899109, "learning_rate": 1.5081989574060585e-05, "loss": 0.5021, "step": 31217 }, { "epoch": 0.6620856397531335, "grad_norm": 0.4076615571975708, "learning_rate": 1.5081702351821776e-05, "loss": 0.3957, "step": 31218 }, { "epoch": 0.6621068482110666, "grad_norm": 0.37847164273262024, "learning_rate": 1.508141512393114e-05, "loss": 0.5517, "step": 31219 }, { "epoch": 0.6621280566689995, "grad_norm": 0.41495490074157715, "learning_rate": 1.5081127890388999e-05, "loss": 0.4983, "step": 31220 }, { "epoch": 0.6621492651269326, "grad_norm": 0.3286391496658325, "learning_rate": 1.5080840651195671e-05, "loss": 0.4278, "step": 31221 }, { "epoch": 0.6621704735848657, "grad_norm": 0.3192439675331116, "learning_rate": 1.5080553406351477e-05, "loss": 0.4019, "step": 31222 }, { "epoch": 0.6621916820427987, "grad_norm": 0.3954927325248718, "learning_rate": 1.5080266155856733e-05, "loss": 0.4693, "step": 31223 }, { "epoch": 0.6622128905007317, "grad_norm": 0.4181641638278961, "learning_rate": 1.5079978899711765e-05, "loss": 0.4714, "step": 31224 }, { "epoch": 0.6622340989586647, "grad_norm": 0.3452252149581909, "learning_rate": 1.5079691637916887e-05, "loss": 0.4836, "step": 31225 }, { "epoch": 0.6622553074165978, "grad_norm": 0.3366665244102478, "learning_rate": 1.5079404370472422e-05, "loss": 0.4949, "step": 31226 }, { "epoch": 0.6622765158745307, "grad_norm": 0.46181899309158325, "learning_rate": 1.5079117097378686e-05, "loss": 0.4793, "step": 31227 }, { "epoch": 0.6622977243324638, "grad_norm": 0.32776084542274475, "learning_rate": 1.5078829818635998e-05, "loss": 0.4539, "step": 31228 }, { "epoch": 0.6623189327903968, "grad_norm": 0.3520270586013794, "learning_rate": 1.5078542534244682e-05, "loss": 0.5029, "step": 31229 }, { "epoch": 0.6623401412483299, "grad_norm": 0.37860167026519775, "learning_rate": 1.5078255244205053e-05, "loss": 0.4237, "step": 31230 }, { "epoch": 0.6623613497062628, "grad_norm": 0.3683299422264099, "learning_rate": 1.5077967948517433e-05, "loss": 0.5323, "step": 31231 }, { "epoch": 0.6623825581641959, "grad_norm": 0.334124356508255, "learning_rate": 1.5077680647182145e-05, "loss": 0.5093, "step": 31232 }, { "epoch": 0.6624037666221289, "grad_norm": 0.31558096408843994, "learning_rate": 1.5077393340199502e-05, "loss": 0.52, "step": 31233 }, { "epoch": 0.6624249750800619, "grad_norm": 0.33833572268486023, "learning_rate": 1.5077106027569826e-05, "loss": 0.4997, "step": 31234 }, { "epoch": 0.662446183537995, "grad_norm": 0.3250260651111603, "learning_rate": 1.5076818709293437e-05, "loss": 0.4334, "step": 31235 }, { "epoch": 0.662467391995928, "grad_norm": 0.35300931334495544, "learning_rate": 1.5076531385370653e-05, "loss": 0.6049, "step": 31236 }, { "epoch": 0.662488600453861, "grad_norm": 0.36579129099845886, "learning_rate": 1.5076244055801797e-05, "loss": 0.4936, "step": 31237 }, { "epoch": 0.662509808911794, "grad_norm": 0.338890939950943, "learning_rate": 1.5075956720587183e-05, "loss": 0.5491, "step": 31238 }, { "epoch": 0.6625310173697271, "grad_norm": 0.35224035382270813, "learning_rate": 1.5075669379727137e-05, "loss": 0.4791, "step": 31239 }, { "epoch": 0.66255222582766, "grad_norm": 0.42930489778518677, "learning_rate": 1.5075382033221978e-05, "loss": 0.5449, "step": 31240 }, { "epoch": 0.6625734342855931, "grad_norm": 0.3482414782047272, "learning_rate": 1.5075094681072023e-05, "loss": 0.5231, "step": 31241 }, { "epoch": 0.6625946427435261, "grad_norm": 0.3580891788005829, "learning_rate": 1.5074807323277582e-05, "loss": 0.4074, "step": 31242 }, { "epoch": 0.6626158512014592, "grad_norm": 0.3462021052837372, "learning_rate": 1.5074519959838996e-05, "loss": 0.5836, "step": 31243 }, { "epoch": 0.6626370596593921, "grad_norm": 0.35798755288124084, "learning_rate": 1.5074232590756568e-05, "loss": 0.4367, "step": 31244 }, { "epoch": 0.6626582681173252, "grad_norm": 0.3644591271877289, "learning_rate": 1.5073945216030623e-05, "loss": 0.5455, "step": 31245 }, { "epoch": 0.6626794765752582, "grad_norm": 0.37000900506973267, "learning_rate": 1.5073657835661481e-05, "loss": 0.512, "step": 31246 }, { "epoch": 0.6627006850331912, "grad_norm": 0.3307648003101349, "learning_rate": 1.5073370449649463e-05, "loss": 0.4958, "step": 31247 }, { "epoch": 0.6627218934911243, "grad_norm": 0.35702863335609436, "learning_rate": 1.5073083057994882e-05, "loss": 0.4785, "step": 31248 }, { "epoch": 0.6627431019490573, "grad_norm": 0.4030423164367676, "learning_rate": 1.5072795660698063e-05, "loss": 0.5567, "step": 31249 }, { "epoch": 0.6627643104069904, "grad_norm": 0.3626299798488617, "learning_rate": 1.5072508257759326e-05, "loss": 0.5035, "step": 31250 }, { "epoch": 0.6627855188649233, "grad_norm": 0.39599233865737915, "learning_rate": 1.5072220849178987e-05, "loss": 0.4774, "step": 31251 }, { "epoch": 0.6628067273228564, "grad_norm": 0.3947031795978546, "learning_rate": 1.507193343495737e-05, "loss": 0.4994, "step": 31252 }, { "epoch": 0.6628279357807894, "grad_norm": 0.3944541811943054, "learning_rate": 1.5071646015094792e-05, "loss": 0.5282, "step": 31253 }, { "epoch": 0.6628491442387224, "grad_norm": 0.3455604612827301, "learning_rate": 1.5071358589591575e-05, "loss": 0.4878, "step": 31254 }, { "epoch": 0.6628703526966554, "grad_norm": 0.33106866478919983, "learning_rate": 1.5071071158448036e-05, "loss": 0.528, "step": 31255 }, { "epoch": 0.6628915611545885, "grad_norm": 0.3588072657585144, "learning_rate": 1.5070783721664495e-05, "loss": 0.4988, "step": 31256 }, { "epoch": 0.6629127696125214, "grad_norm": 0.39160463213920593, "learning_rate": 1.5070496279241274e-05, "loss": 0.5301, "step": 31257 }, { "epoch": 0.6629339780704545, "grad_norm": 0.3740365207195282, "learning_rate": 1.507020883117869e-05, "loss": 0.5025, "step": 31258 }, { "epoch": 0.6629551865283875, "grad_norm": 0.4648030400276184, "learning_rate": 1.5069921377477064e-05, "loss": 0.4464, "step": 31259 }, { "epoch": 0.6629763949863205, "grad_norm": 0.32626819610595703, "learning_rate": 1.5069633918136717e-05, "loss": 0.4969, "step": 31260 }, { "epoch": 0.6629976034442535, "grad_norm": 0.41998961567878723, "learning_rate": 1.5069346453157967e-05, "loss": 0.4761, "step": 31261 }, { "epoch": 0.6630188119021866, "grad_norm": 0.34761109948158264, "learning_rate": 1.5069058982541127e-05, "loss": 0.4721, "step": 31262 }, { "epoch": 0.6630400203601197, "grad_norm": 0.33172112703323364, "learning_rate": 1.506877150628653e-05, "loss": 0.5502, "step": 31263 }, { "epoch": 0.6630612288180526, "grad_norm": 0.35534557700157166, "learning_rate": 1.5068484024394492e-05, "loss": 0.4717, "step": 31264 }, { "epoch": 0.6630824372759857, "grad_norm": 0.35955536365509033, "learning_rate": 1.5068196536865325e-05, "loss": 0.4796, "step": 31265 }, { "epoch": 0.6631036457339187, "grad_norm": 0.37777459621429443, "learning_rate": 1.5067909043699356e-05, "loss": 0.5493, "step": 31266 }, { "epoch": 0.6631248541918517, "grad_norm": 0.3405229151248932, "learning_rate": 1.5067621544896902e-05, "loss": 0.5022, "step": 31267 }, { "epoch": 0.6631460626497847, "grad_norm": 0.3780924081802368, "learning_rate": 1.5067334040458282e-05, "loss": 0.5519, "step": 31268 }, { "epoch": 0.6631672711077178, "grad_norm": 0.34176865220069885, "learning_rate": 1.506704653038382e-05, "loss": 0.4677, "step": 31269 }, { "epoch": 0.6631884795656507, "grad_norm": 0.3694921135902405, "learning_rate": 1.506675901467383e-05, "loss": 0.557, "step": 31270 }, { "epoch": 0.6632096880235838, "grad_norm": 0.3484252691268921, "learning_rate": 1.5066471493328636e-05, "loss": 0.4736, "step": 31271 }, { "epoch": 0.6632308964815168, "grad_norm": 0.3603697419166565, "learning_rate": 1.5066183966348558e-05, "loss": 0.4609, "step": 31272 }, { "epoch": 0.6632521049394499, "grad_norm": 0.3393430709838867, "learning_rate": 1.5065896433733914e-05, "loss": 0.4749, "step": 31273 }, { "epoch": 0.6632733133973828, "grad_norm": 0.3372974395751953, "learning_rate": 1.506560889548502e-05, "loss": 0.4853, "step": 31274 }, { "epoch": 0.6632945218553159, "grad_norm": 0.3656252324581146, "learning_rate": 1.5065321351602204e-05, "loss": 0.3974, "step": 31275 }, { "epoch": 0.663315730313249, "grad_norm": 0.3461034297943115, "learning_rate": 1.506503380208578e-05, "loss": 0.4664, "step": 31276 }, { "epoch": 0.6633369387711819, "grad_norm": 0.3853290379047394, "learning_rate": 1.506474624693607e-05, "loss": 0.5361, "step": 31277 }, { "epoch": 0.663358147229115, "grad_norm": 0.33948132395744324, "learning_rate": 1.506445868615339e-05, "loss": 0.5201, "step": 31278 }, { "epoch": 0.663379355687048, "grad_norm": 0.30198997259140015, "learning_rate": 1.5064171119738068e-05, "loss": 0.406, "step": 31279 }, { "epoch": 0.663400564144981, "grad_norm": 0.36186525225639343, "learning_rate": 1.5063883547690416e-05, "loss": 0.498, "step": 31280 }, { "epoch": 0.663421772602914, "grad_norm": 0.35641440749168396, "learning_rate": 1.5063595970010758e-05, "loss": 0.4361, "step": 31281 }, { "epoch": 0.6634429810608471, "grad_norm": 0.34728750586509705, "learning_rate": 1.5063308386699411e-05, "loss": 0.4675, "step": 31282 }, { "epoch": 0.6634641895187801, "grad_norm": 0.35584214329719543, "learning_rate": 1.5063020797756698e-05, "loss": 0.5346, "step": 31283 }, { "epoch": 0.6634853979767131, "grad_norm": 0.5870267152786255, "learning_rate": 1.5062733203182935e-05, "loss": 0.4464, "step": 31284 }, { "epoch": 0.6635066064346461, "grad_norm": 0.34131336212158203, "learning_rate": 1.5062445602978448e-05, "loss": 0.4758, "step": 31285 }, { "epoch": 0.6635278148925792, "grad_norm": 0.36115938425064087, "learning_rate": 1.5062157997143549e-05, "loss": 0.4549, "step": 31286 }, { "epoch": 0.6635490233505121, "grad_norm": 0.35795319080352783, "learning_rate": 1.5061870385678563e-05, "loss": 0.4942, "step": 31287 }, { "epoch": 0.6635702318084452, "grad_norm": 0.3205522298812866, "learning_rate": 1.5061582768583809e-05, "loss": 0.4697, "step": 31288 }, { "epoch": 0.6635914402663783, "grad_norm": 0.3978959619998932, "learning_rate": 1.5061295145859606e-05, "loss": 0.5405, "step": 31289 }, { "epoch": 0.6636126487243112, "grad_norm": 0.3311200737953186, "learning_rate": 1.5061007517506276e-05, "loss": 0.4911, "step": 31290 }, { "epoch": 0.6636338571822443, "grad_norm": 0.3817730247974396, "learning_rate": 1.5060719883524138e-05, "loss": 0.4128, "step": 31291 }, { "epoch": 0.6636550656401773, "grad_norm": 0.313728928565979, "learning_rate": 1.506043224391351e-05, "loss": 0.4384, "step": 31292 }, { "epoch": 0.6636762740981104, "grad_norm": 0.4016503691673279, "learning_rate": 1.5060144598674714e-05, "loss": 0.5292, "step": 31293 }, { "epoch": 0.6636974825560433, "grad_norm": 0.3709240257740021, "learning_rate": 1.5059856947808066e-05, "loss": 0.4618, "step": 31294 }, { "epoch": 0.6637186910139764, "grad_norm": 0.31854453682899475, "learning_rate": 1.5059569291313893e-05, "loss": 0.5077, "step": 31295 }, { "epoch": 0.6637398994719094, "grad_norm": 0.3324914872646332, "learning_rate": 1.505928162919251e-05, "loss": 0.4544, "step": 31296 }, { "epoch": 0.6637611079298424, "grad_norm": 0.3349169194698334, "learning_rate": 1.5058993961444237e-05, "loss": 0.4531, "step": 31297 }, { "epoch": 0.6637823163877754, "grad_norm": 1.2083402872085571, "learning_rate": 1.5058706288069393e-05, "loss": 0.5209, "step": 31298 }, { "epoch": 0.6638035248457085, "grad_norm": 0.5458425879478455, "learning_rate": 1.5058418609068306e-05, "loss": 0.4436, "step": 31299 }, { "epoch": 0.6638247333036414, "grad_norm": 0.37602630257606506, "learning_rate": 1.5058130924441284e-05, "loss": 0.5306, "step": 31300 }, { "epoch": 0.6638459417615745, "grad_norm": 0.3626985549926758, "learning_rate": 1.5057843234188655e-05, "loss": 0.5191, "step": 31301 }, { "epoch": 0.6638671502195075, "grad_norm": 0.3147234320640564, "learning_rate": 1.5057555538310736e-05, "loss": 0.4515, "step": 31302 }, { "epoch": 0.6638883586774406, "grad_norm": 0.363075315952301, "learning_rate": 1.5057267836807849e-05, "loss": 0.5124, "step": 31303 }, { "epoch": 0.6639095671353736, "grad_norm": 0.34649065136909485, "learning_rate": 1.5056980129680309e-05, "loss": 0.4879, "step": 31304 }, { "epoch": 0.6639307755933066, "grad_norm": 0.34069332480430603, "learning_rate": 1.5056692416928442e-05, "loss": 0.4968, "step": 31305 }, { "epoch": 0.6639519840512397, "grad_norm": 0.36128005385398865, "learning_rate": 1.5056404698552567e-05, "loss": 0.4483, "step": 31306 }, { "epoch": 0.6639731925091726, "grad_norm": 0.33775368332862854, "learning_rate": 1.5056116974552999e-05, "loss": 0.4445, "step": 31307 }, { "epoch": 0.6639944009671057, "grad_norm": 0.37306755781173706, "learning_rate": 1.5055829244930066e-05, "loss": 0.5892, "step": 31308 }, { "epoch": 0.6640156094250387, "grad_norm": 0.3279542326927185, "learning_rate": 1.5055541509684082e-05, "loss": 0.3858, "step": 31309 }, { "epoch": 0.6640368178829718, "grad_norm": 0.35711967945098877, "learning_rate": 1.5055253768815368e-05, "loss": 0.4954, "step": 31310 }, { "epoch": 0.6640580263409047, "grad_norm": 0.6721135377883911, "learning_rate": 1.5054966022324244e-05, "loss": 0.4491, "step": 31311 }, { "epoch": 0.6640792347988378, "grad_norm": 0.3449360728263855, "learning_rate": 1.5054678270211032e-05, "loss": 0.4133, "step": 31312 }, { "epoch": 0.6641004432567708, "grad_norm": 0.2929699718952179, "learning_rate": 1.505439051247605e-05, "loss": 0.405, "step": 31313 }, { "epoch": 0.6641216517147038, "grad_norm": 0.3670022487640381, "learning_rate": 1.505410274911962e-05, "loss": 0.5199, "step": 31314 }, { "epoch": 0.6641428601726368, "grad_norm": 0.36041998863220215, "learning_rate": 1.505381498014206e-05, "loss": 0.5096, "step": 31315 }, { "epoch": 0.6641640686305699, "grad_norm": 0.4115472137928009, "learning_rate": 1.505352720554369e-05, "loss": 0.5189, "step": 31316 }, { "epoch": 0.664185277088503, "grad_norm": 0.48921799659729004, "learning_rate": 1.5053239425324835e-05, "loss": 0.669, "step": 31317 }, { "epoch": 0.6642064855464359, "grad_norm": 0.35862693190574646, "learning_rate": 1.5052951639485805e-05, "loss": 0.5098, "step": 31318 }, { "epoch": 0.664227694004369, "grad_norm": 0.34648117423057556, "learning_rate": 1.5052663848026926e-05, "loss": 0.5259, "step": 31319 }, { "epoch": 0.664248902462302, "grad_norm": 0.3810034692287445, "learning_rate": 1.5052376050948525e-05, "loss": 0.4729, "step": 31320 }, { "epoch": 0.664270110920235, "grad_norm": 0.3379320204257965, "learning_rate": 1.5052088248250911e-05, "loss": 0.528, "step": 31321 }, { "epoch": 0.664291319378168, "grad_norm": 0.32436490058898926, "learning_rate": 1.5051800439934407e-05, "loss": 0.4908, "step": 31322 }, { "epoch": 0.6643125278361011, "grad_norm": 0.4352670907974243, "learning_rate": 1.505151262599934e-05, "loss": 0.5994, "step": 31323 }, { "epoch": 0.664333736294034, "grad_norm": 0.35308584570884705, "learning_rate": 1.5051224806446018e-05, "loss": 0.4915, "step": 31324 }, { "epoch": 0.6643549447519671, "grad_norm": 0.3849683403968811, "learning_rate": 1.5050936981274773e-05, "loss": 0.5807, "step": 31325 }, { "epoch": 0.6643761532099001, "grad_norm": 0.3336464762687683, "learning_rate": 1.5050649150485917e-05, "loss": 0.5436, "step": 31326 }, { "epoch": 0.6643973616678331, "grad_norm": 0.3687901198863983, "learning_rate": 1.5050361314079772e-05, "loss": 0.503, "step": 31327 }, { "epoch": 0.6644185701257661, "grad_norm": 0.3600413203239441, "learning_rate": 1.5050073472056658e-05, "loss": 0.4749, "step": 31328 }, { "epoch": 0.6644397785836992, "grad_norm": 0.3854934871196747, "learning_rate": 1.5049785624416902e-05, "loss": 0.5662, "step": 31329 }, { "epoch": 0.6644609870416323, "grad_norm": 0.47761479020118713, "learning_rate": 1.5049497771160812e-05, "loss": 0.5257, "step": 31330 }, { "epoch": 0.6644821954995652, "grad_norm": 0.3993612825870514, "learning_rate": 1.5049209912288717e-05, "loss": 0.5249, "step": 31331 }, { "epoch": 0.6645034039574983, "grad_norm": 0.42992690205574036, "learning_rate": 1.5048922047800933e-05, "loss": 0.5903, "step": 31332 }, { "epoch": 0.6645246124154313, "grad_norm": 0.3853323757648468, "learning_rate": 1.5048634177697783e-05, "loss": 0.4815, "step": 31333 }, { "epoch": 0.6645458208733643, "grad_norm": 0.32022207975387573, "learning_rate": 1.5048346301979586e-05, "loss": 0.3999, "step": 31334 }, { "epoch": 0.6645670293312973, "grad_norm": 0.35381177067756653, "learning_rate": 1.5048058420646663e-05, "loss": 0.5081, "step": 31335 }, { "epoch": 0.6645882377892304, "grad_norm": 0.47565120458602905, "learning_rate": 1.5047770533699332e-05, "loss": 0.574, "step": 31336 }, { "epoch": 0.6646094462471633, "grad_norm": 0.39965713024139404, "learning_rate": 1.5047482641137917e-05, "loss": 0.5173, "step": 31337 }, { "epoch": 0.6646306547050964, "grad_norm": 0.36643630266189575, "learning_rate": 1.5047194742962731e-05, "loss": 0.4875, "step": 31338 }, { "epoch": 0.6646518631630294, "grad_norm": 0.38305220007896423, "learning_rate": 1.5046906839174102e-05, "loss": 0.5511, "step": 31339 }, { "epoch": 0.6646730716209625, "grad_norm": 0.3902629017829895, "learning_rate": 1.504661892977235e-05, "loss": 0.5721, "step": 31340 }, { "epoch": 0.6646942800788954, "grad_norm": 0.35736778378486633, "learning_rate": 1.5046331014757787e-05, "loss": 0.5022, "step": 31341 }, { "epoch": 0.6647154885368285, "grad_norm": 0.3243306875228882, "learning_rate": 1.504604309413074e-05, "loss": 0.4544, "step": 31342 }, { "epoch": 0.6647366969947615, "grad_norm": 0.3564865291118622, "learning_rate": 1.504575516789153e-05, "loss": 0.5314, "step": 31343 }, { "epoch": 0.6647579054526945, "grad_norm": 0.8963223099708557, "learning_rate": 1.5045467236040469e-05, "loss": 0.4946, "step": 31344 }, { "epoch": 0.6647791139106276, "grad_norm": 0.3373877704143524, "learning_rate": 1.504517929857789e-05, "loss": 0.4606, "step": 31345 }, { "epoch": 0.6648003223685606, "grad_norm": 0.34629127383232117, "learning_rate": 1.5044891355504103e-05, "loss": 0.4637, "step": 31346 }, { "epoch": 0.6648215308264936, "grad_norm": 0.37412771582603455, "learning_rate": 1.5044603406819431e-05, "loss": 0.4822, "step": 31347 }, { "epoch": 0.6648427392844266, "grad_norm": 0.33074262738227844, "learning_rate": 1.5044315452524196e-05, "loss": 0.5222, "step": 31348 }, { "epoch": 0.6648639477423597, "grad_norm": 0.3821967840194702, "learning_rate": 1.5044027492618718e-05, "loss": 0.5572, "step": 31349 }, { "epoch": 0.6648851562002926, "grad_norm": 0.4674723148345947, "learning_rate": 1.5043739527103316e-05, "loss": 0.5266, "step": 31350 }, { "epoch": 0.6649063646582257, "grad_norm": 0.36866018176078796, "learning_rate": 1.504345155597831e-05, "loss": 0.5774, "step": 31351 }, { "epoch": 0.6649275731161587, "grad_norm": 0.356025755405426, "learning_rate": 1.504316357924402e-05, "loss": 0.5059, "step": 31352 }, { "epoch": 0.6649487815740918, "grad_norm": 0.3548955023288727, "learning_rate": 1.504287559690077e-05, "loss": 0.4608, "step": 31353 }, { "epoch": 0.6649699900320247, "grad_norm": 0.3276045024394989, "learning_rate": 1.5042587608948877e-05, "loss": 0.4724, "step": 31354 }, { "epoch": 0.6649911984899578, "grad_norm": 0.36368775367736816, "learning_rate": 1.5042299615388661e-05, "loss": 0.5639, "step": 31355 }, { "epoch": 0.6650124069478908, "grad_norm": 0.31106239557266235, "learning_rate": 1.5042011616220444e-05, "loss": 0.4518, "step": 31356 }, { "epoch": 0.6650336154058238, "grad_norm": 0.38821491599082947, "learning_rate": 1.5041723611444545e-05, "loss": 0.5446, "step": 31357 }, { "epoch": 0.6650548238637569, "grad_norm": 0.34910905361175537, "learning_rate": 1.5041435601061283e-05, "loss": 0.4391, "step": 31358 }, { "epoch": 0.6650760323216899, "grad_norm": 0.35453131794929504, "learning_rate": 1.504114758507098e-05, "loss": 0.51, "step": 31359 }, { "epoch": 0.665097240779623, "grad_norm": 0.5319293141365051, "learning_rate": 1.504085956347396e-05, "loss": 0.5006, "step": 31360 }, { "epoch": 0.6651184492375559, "grad_norm": 0.37347525358200073, "learning_rate": 1.5040571536270539e-05, "loss": 0.5752, "step": 31361 }, { "epoch": 0.665139657695489, "grad_norm": 0.3483318090438843, "learning_rate": 1.5040283503461036e-05, "loss": 0.5012, "step": 31362 }, { "epoch": 0.665160866153422, "grad_norm": 0.40138235688209534, "learning_rate": 1.5039995465045775e-05, "loss": 0.474, "step": 31363 }, { "epoch": 0.665182074611355, "grad_norm": 0.36113646626472473, "learning_rate": 1.5039707421025074e-05, "loss": 0.4921, "step": 31364 }, { "epoch": 0.665203283069288, "grad_norm": 0.40210577845573425, "learning_rate": 1.5039419371399255e-05, "loss": 0.5056, "step": 31365 }, { "epoch": 0.6652244915272211, "grad_norm": 0.3640401065349579, "learning_rate": 1.5039131316168635e-05, "loss": 0.4551, "step": 31366 }, { "epoch": 0.665245699985154, "grad_norm": 0.34436342120170593, "learning_rate": 1.503884325533354e-05, "loss": 0.4628, "step": 31367 }, { "epoch": 0.6652669084430871, "grad_norm": 0.331716388463974, "learning_rate": 1.5038555188894289e-05, "loss": 0.5409, "step": 31368 }, { "epoch": 0.6652881169010201, "grad_norm": 0.34924232959747314, "learning_rate": 1.5038267116851195e-05, "loss": 0.4063, "step": 31369 }, { "epoch": 0.6653093253589532, "grad_norm": 0.33993247151374817, "learning_rate": 1.503797903920459e-05, "loss": 0.4154, "step": 31370 }, { "epoch": 0.6653305338168862, "grad_norm": 0.40400320291519165, "learning_rate": 1.5037690955954785e-05, "loss": 0.4578, "step": 31371 }, { "epoch": 0.6653517422748192, "grad_norm": 0.35001617670059204, "learning_rate": 1.5037402867102102e-05, "loss": 0.4662, "step": 31372 }, { "epoch": 0.6653729507327523, "grad_norm": 0.31413108110427856, "learning_rate": 1.5037114772646867e-05, "loss": 0.4223, "step": 31373 }, { "epoch": 0.6653941591906852, "grad_norm": 0.3444933593273163, "learning_rate": 1.5036826672589398e-05, "loss": 0.4764, "step": 31374 }, { "epoch": 0.6654153676486183, "grad_norm": 0.3720281720161438, "learning_rate": 1.503653856693001e-05, "loss": 0.5438, "step": 31375 }, { "epoch": 0.6654365761065513, "grad_norm": 0.3669331967830658, "learning_rate": 1.5036250455669028e-05, "loss": 0.6007, "step": 31376 }, { "epoch": 0.6654577845644843, "grad_norm": 0.35744473338127136, "learning_rate": 1.5035962338806773e-05, "loss": 0.4604, "step": 31377 }, { "epoch": 0.6654789930224173, "grad_norm": 0.38688892126083374, "learning_rate": 1.5035674216343563e-05, "loss": 0.5295, "step": 31378 }, { "epoch": 0.6655002014803504, "grad_norm": 0.34318727254867554, "learning_rate": 1.5035386088279724e-05, "loss": 0.527, "step": 31379 }, { "epoch": 0.6655214099382833, "grad_norm": 0.3322467505931854, "learning_rate": 1.5035097954615569e-05, "loss": 0.5429, "step": 31380 }, { "epoch": 0.6655426183962164, "grad_norm": 0.43862614035606384, "learning_rate": 1.5034809815351423e-05, "loss": 0.5271, "step": 31381 }, { "epoch": 0.6655638268541494, "grad_norm": 0.4040328562259674, "learning_rate": 1.5034521670487605e-05, "loss": 0.5755, "step": 31382 }, { "epoch": 0.6655850353120825, "grad_norm": 0.312911719083786, "learning_rate": 1.5034233520024436e-05, "loss": 0.4807, "step": 31383 }, { "epoch": 0.6656062437700155, "grad_norm": 0.37524551153182983, "learning_rate": 1.5033945363962234e-05, "loss": 0.5569, "step": 31384 }, { "epoch": 0.6656274522279485, "grad_norm": 0.37079864740371704, "learning_rate": 1.5033657202301326e-05, "loss": 0.5871, "step": 31385 }, { "epoch": 0.6656486606858816, "grad_norm": 0.3852872848510742, "learning_rate": 1.5033369035042022e-05, "loss": 0.4354, "step": 31386 }, { "epoch": 0.6656698691438145, "grad_norm": 0.32191771268844604, "learning_rate": 1.5033080862184654e-05, "loss": 0.5036, "step": 31387 }, { "epoch": 0.6656910776017476, "grad_norm": 0.36296355724334717, "learning_rate": 1.5032792683729538e-05, "loss": 0.5139, "step": 31388 }, { "epoch": 0.6657122860596806, "grad_norm": 0.39388561248779297, "learning_rate": 1.503250449967699e-05, "loss": 0.4626, "step": 31389 }, { "epoch": 0.6657334945176137, "grad_norm": 0.3705878257751465, "learning_rate": 1.5032216310027334e-05, "loss": 0.5245, "step": 31390 }, { "epoch": 0.6657547029755466, "grad_norm": 0.4461311995983124, "learning_rate": 1.5031928114780893e-05, "loss": 0.4244, "step": 31391 }, { "epoch": 0.6657759114334797, "grad_norm": 0.3913557529449463, "learning_rate": 1.5031639913937986e-05, "loss": 0.4213, "step": 31392 }, { "epoch": 0.6657971198914127, "grad_norm": 0.3856721818447113, "learning_rate": 1.5031351707498929e-05, "loss": 0.5923, "step": 31393 }, { "epoch": 0.6658183283493457, "grad_norm": 0.3391091227531433, "learning_rate": 1.5031063495464048e-05, "loss": 0.5442, "step": 31394 }, { "epoch": 0.6658395368072787, "grad_norm": 0.3877600431442261, "learning_rate": 1.5030775277833664e-05, "loss": 0.5227, "step": 31395 }, { "epoch": 0.6658607452652118, "grad_norm": 0.3505764901638031, "learning_rate": 1.5030487054608091e-05, "loss": 0.5116, "step": 31396 }, { "epoch": 0.6658819537231447, "grad_norm": 0.31575608253479004, "learning_rate": 1.5030198825787658e-05, "loss": 0.5016, "step": 31397 }, { "epoch": 0.6659031621810778, "grad_norm": 0.38155439496040344, "learning_rate": 1.5029910591372683e-05, "loss": 0.5539, "step": 31398 }, { "epoch": 0.6659243706390109, "grad_norm": 0.36979255080223083, "learning_rate": 1.5029622351363483e-05, "loss": 0.4997, "step": 31399 }, { "epoch": 0.6659455790969439, "grad_norm": 0.3760511875152588, "learning_rate": 1.5029334105760378e-05, "loss": 0.4983, "step": 31400 }, { "epoch": 0.6659667875548769, "grad_norm": 0.36642739176750183, "learning_rate": 1.5029045854563695e-05, "loss": 0.5453, "step": 31401 }, { "epoch": 0.6659879960128099, "grad_norm": 0.3686811625957489, "learning_rate": 1.5028757597773751e-05, "loss": 0.5267, "step": 31402 }, { "epoch": 0.666009204470743, "grad_norm": 0.3182411789894104, "learning_rate": 1.5028469335390861e-05, "loss": 0.4383, "step": 31403 }, { "epoch": 0.6660304129286759, "grad_norm": 0.41959893703460693, "learning_rate": 1.5028181067415357e-05, "loss": 0.5299, "step": 31404 }, { "epoch": 0.666051621386609, "grad_norm": 0.3718204200267792, "learning_rate": 1.5027892793847554e-05, "loss": 0.4917, "step": 31405 }, { "epoch": 0.666072829844542, "grad_norm": 0.35949981212615967, "learning_rate": 1.5027604514687769e-05, "loss": 0.5284, "step": 31406 }, { "epoch": 0.666094038302475, "grad_norm": 0.3503345251083374, "learning_rate": 1.5027316229936326e-05, "loss": 0.496, "step": 31407 }, { "epoch": 0.666115246760408, "grad_norm": 0.32438111305236816, "learning_rate": 1.5027027939593546e-05, "loss": 0.4803, "step": 31408 }, { "epoch": 0.6661364552183411, "grad_norm": 0.327621191740036, "learning_rate": 1.5026739643659748e-05, "loss": 0.419, "step": 31409 }, { "epoch": 0.666157663676274, "grad_norm": 0.382455050945282, "learning_rate": 1.5026451342135258e-05, "loss": 0.5791, "step": 31410 }, { "epoch": 0.6661788721342071, "grad_norm": 0.3581567406654358, "learning_rate": 1.502616303502039e-05, "loss": 0.5043, "step": 31411 }, { "epoch": 0.6662000805921402, "grad_norm": 0.4120246171951294, "learning_rate": 1.5025874722315467e-05, "loss": 0.4816, "step": 31412 }, { "epoch": 0.6662212890500732, "grad_norm": 0.3613661825656891, "learning_rate": 1.5025586404020812e-05, "loss": 0.5789, "step": 31413 }, { "epoch": 0.6662424975080062, "grad_norm": 0.3548557460308075, "learning_rate": 1.5025298080136738e-05, "loss": 0.5011, "step": 31414 }, { "epoch": 0.6662637059659392, "grad_norm": 0.3801884055137634, "learning_rate": 1.5025009750663577e-05, "loss": 0.5915, "step": 31415 }, { "epoch": 0.6662849144238723, "grad_norm": 0.4214839041233063, "learning_rate": 1.5024721415601641e-05, "loss": 0.5494, "step": 31416 }, { "epoch": 0.6663061228818052, "grad_norm": 0.3461107015609741, "learning_rate": 1.5024433074951255e-05, "loss": 0.5244, "step": 31417 }, { "epoch": 0.6663273313397383, "grad_norm": 0.363463819026947, "learning_rate": 1.5024144728712737e-05, "loss": 0.5002, "step": 31418 }, { "epoch": 0.6663485397976713, "grad_norm": 0.2988174855709076, "learning_rate": 1.502385637688641e-05, "loss": 0.4168, "step": 31419 }, { "epoch": 0.6663697482556044, "grad_norm": 0.39305493235588074, "learning_rate": 1.5023568019472592e-05, "loss": 0.4631, "step": 31420 }, { "epoch": 0.6663909567135373, "grad_norm": 0.3276900351047516, "learning_rate": 1.5023279656471605e-05, "loss": 0.4291, "step": 31421 }, { "epoch": 0.6664121651714704, "grad_norm": 0.5700995922088623, "learning_rate": 1.502299128788377e-05, "loss": 0.5201, "step": 31422 }, { "epoch": 0.6664333736294034, "grad_norm": 0.35016313195228577, "learning_rate": 1.5022702913709409e-05, "loss": 0.4939, "step": 31423 }, { "epoch": 0.6664545820873364, "grad_norm": 0.33688196539878845, "learning_rate": 1.5022414533948842e-05, "loss": 0.4736, "step": 31424 }, { "epoch": 0.6664757905452695, "grad_norm": 0.3638324737548828, "learning_rate": 1.5022126148602385e-05, "loss": 0.5027, "step": 31425 }, { "epoch": 0.6664969990032025, "grad_norm": 0.3196314573287964, "learning_rate": 1.502183775767037e-05, "loss": 0.4431, "step": 31426 }, { "epoch": 0.6665182074611355, "grad_norm": 0.3341817557811737, "learning_rate": 1.5021549361153108e-05, "loss": 0.4383, "step": 31427 }, { "epoch": 0.6665394159190685, "grad_norm": 0.40787574648857117, "learning_rate": 1.5021260959050919e-05, "loss": 0.6126, "step": 31428 }, { "epoch": 0.6665606243770016, "grad_norm": 0.38186150789260864, "learning_rate": 1.5020972551364128e-05, "loss": 0.5346, "step": 31429 }, { "epoch": 0.6665818328349346, "grad_norm": 0.3121464252471924, "learning_rate": 1.5020684138093058e-05, "loss": 0.3955, "step": 31430 }, { "epoch": 0.6666030412928676, "grad_norm": 0.42507025599479675, "learning_rate": 1.5020395719238023e-05, "loss": 0.4992, "step": 31431 }, { "epoch": 0.6666242497508006, "grad_norm": 0.36798492074012756, "learning_rate": 1.5020107294799351e-05, "loss": 0.5332, "step": 31432 }, { "epoch": 0.6666454582087337, "grad_norm": 0.35629990696907043, "learning_rate": 1.501981886477736e-05, "loss": 0.4319, "step": 31433 }, { "epoch": 0.6666666666666666, "grad_norm": 0.3404196798801422, "learning_rate": 1.5019530429172366e-05, "loss": 0.4642, "step": 31434 }, { "epoch": 0.6666878751245997, "grad_norm": 0.42428797483444214, "learning_rate": 1.5019241987984697e-05, "loss": 0.5305, "step": 31435 }, { "epoch": 0.6667090835825327, "grad_norm": 0.45905399322509766, "learning_rate": 1.5018953541214667e-05, "loss": 0.4636, "step": 31436 }, { "epoch": 0.6667302920404657, "grad_norm": 0.37905365228652954, "learning_rate": 1.5018665088862603e-05, "loss": 0.5372, "step": 31437 }, { "epoch": 0.6667515004983987, "grad_norm": 0.4408007860183716, "learning_rate": 1.5018376630928826e-05, "loss": 0.4237, "step": 31438 }, { "epoch": 0.6667727089563318, "grad_norm": 0.3548413813114166, "learning_rate": 1.501808816741365e-05, "loss": 0.4986, "step": 31439 }, { "epoch": 0.6667939174142649, "grad_norm": 0.32579275965690613, "learning_rate": 1.5017799698317402e-05, "loss": 0.4593, "step": 31440 }, { "epoch": 0.6668151258721978, "grad_norm": 0.3530700206756592, "learning_rate": 1.50175112236404e-05, "loss": 0.4948, "step": 31441 }, { "epoch": 0.6668363343301309, "grad_norm": 0.3344106078147888, "learning_rate": 1.5017222743382966e-05, "loss": 0.4708, "step": 31442 }, { "epoch": 0.6668575427880639, "grad_norm": 0.3812095522880554, "learning_rate": 1.5016934257545419e-05, "loss": 0.4715, "step": 31443 }, { "epoch": 0.6668787512459969, "grad_norm": 0.41430166363716125, "learning_rate": 1.5016645766128081e-05, "loss": 0.5867, "step": 31444 }, { "epoch": 0.6668999597039299, "grad_norm": 0.3891535997390747, "learning_rate": 1.5016357269131275e-05, "loss": 0.4689, "step": 31445 }, { "epoch": 0.666921168161863, "grad_norm": 0.38697293400764465, "learning_rate": 1.5016068766555321e-05, "loss": 0.505, "step": 31446 }, { "epoch": 0.6669423766197959, "grad_norm": 0.35290881991386414, "learning_rate": 1.5015780258400536e-05, "loss": 0.498, "step": 31447 }, { "epoch": 0.666963585077729, "grad_norm": 0.3735674023628235, "learning_rate": 1.5015491744667246e-05, "loss": 0.5151, "step": 31448 }, { "epoch": 0.666984793535662, "grad_norm": 0.39017775654792786, "learning_rate": 1.5015203225355768e-05, "loss": 0.5152, "step": 31449 }, { "epoch": 0.667006001993595, "grad_norm": 0.4190739691257477, "learning_rate": 1.5014914700466428e-05, "loss": 0.5502, "step": 31450 }, { "epoch": 0.667027210451528, "grad_norm": 0.3677905797958374, "learning_rate": 1.501462616999954e-05, "loss": 0.4379, "step": 31451 }, { "epoch": 0.6670484189094611, "grad_norm": 0.37963828444480896, "learning_rate": 1.5014337633955429e-05, "loss": 0.5256, "step": 31452 }, { "epoch": 0.6670696273673942, "grad_norm": 0.340555340051651, "learning_rate": 1.5014049092334416e-05, "loss": 0.5314, "step": 31453 }, { "epoch": 0.6670908358253271, "grad_norm": 0.3668331801891327, "learning_rate": 1.5013760545136817e-05, "loss": 0.4612, "step": 31454 }, { "epoch": 0.6671120442832602, "grad_norm": 0.32179540395736694, "learning_rate": 1.5013471992362962e-05, "loss": 0.4547, "step": 31455 }, { "epoch": 0.6671332527411932, "grad_norm": 0.35105130076408386, "learning_rate": 1.5013183434013167e-05, "loss": 0.5682, "step": 31456 }, { "epoch": 0.6671544611991262, "grad_norm": 0.3491574823856354, "learning_rate": 1.5012894870087751e-05, "loss": 0.5216, "step": 31457 }, { "epoch": 0.6671756696570592, "grad_norm": 0.4082924425601959, "learning_rate": 1.5012606300587038e-05, "loss": 0.5132, "step": 31458 }, { "epoch": 0.6671968781149923, "grad_norm": 0.31901079416275024, "learning_rate": 1.5012317725511345e-05, "loss": 0.4352, "step": 31459 }, { "epoch": 0.6672180865729253, "grad_norm": 0.33847060799598694, "learning_rate": 1.5012029144860998e-05, "loss": 0.4726, "step": 31460 }, { "epoch": 0.6672392950308583, "grad_norm": 0.3925914466381073, "learning_rate": 1.5011740558636318e-05, "loss": 0.4633, "step": 31461 }, { "epoch": 0.6672605034887913, "grad_norm": 0.3417418301105499, "learning_rate": 1.501145196683762e-05, "loss": 0.5068, "step": 31462 }, { "epoch": 0.6672817119467244, "grad_norm": 0.3717491030693054, "learning_rate": 1.501116336946523e-05, "loss": 0.5392, "step": 31463 }, { "epoch": 0.6673029204046573, "grad_norm": 0.38089632987976074, "learning_rate": 1.501087476651947e-05, "loss": 0.56, "step": 31464 }, { "epoch": 0.6673241288625904, "grad_norm": 0.3475588858127594, "learning_rate": 1.5010586158000655e-05, "loss": 0.5081, "step": 31465 }, { "epoch": 0.6673453373205235, "grad_norm": 0.3831573724746704, "learning_rate": 1.5010297543909111e-05, "loss": 0.4703, "step": 31466 }, { "epoch": 0.6673665457784564, "grad_norm": 0.36120501160621643, "learning_rate": 1.5010008924245156e-05, "loss": 0.4399, "step": 31467 }, { "epoch": 0.6673877542363895, "grad_norm": 0.34296923875808716, "learning_rate": 1.5009720299009116e-05, "loss": 0.4699, "step": 31468 }, { "epoch": 0.6674089626943225, "grad_norm": 0.39664071798324585, "learning_rate": 1.5009431668201306e-05, "loss": 0.5334, "step": 31469 }, { "epoch": 0.6674301711522556, "grad_norm": 0.37149012088775635, "learning_rate": 1.5009143031822051e-05, "loss": 0.4912, "step": 31470 }, { "epoch": 0.6674513796101885, "grad_norm": 0.6620043516159058, "learning_rate": 1.500885438987167e-05, "loss": 0.4636, "step": 31471 }, { "epoch": 0.6674725880681216, "grad_norm": 0.3666362166404724, "learning_rate": 1.5008565742350485e-05, "loss": 0.5523, "step": 31472 }, { "epoch": 0.6674937965260546, "grad_norm": 0.5383173227310181, "learning_rate": 1.5008277089258816e-05, "loss": 0.4755, "step": 31473 }, { "epoch": 0.6675150049839876, "grad_norm": 0.35273393988609314, "learning_rate": 1.5007988430596983e-05, "loss": 0.5026, "step": 31474 }, { "epoch": 0.6675362134419206, "grad_norm": 0.33341068029403687, "learning_rate": 1.5007699766365313e-05, "loss": 0.5322, "step": 31475 }, { "epoch": 0.6675574218998537, "grad_norm": 0.3647859990596771, "learning_rate": 1.5007411096564119e-05, "loss": 0.5007, "step": 31476 }, { "epoch": 0.6675786303577866, "grad_norm": 0.4562145471572876, "learning_rate": 1.5007122421193729e-05, "loss": 0.503, "step": 31477 }, { "epoch": 0.6675998388157197, "grad_norm": 0.35325542092323303, "learning_rate": 1.5006833740254461e-05, "loss": 0.4783, "step": 31478 }, { "epoch": 0.6676210472736527, "grad_norm": 0.35648950934410095, "learning_rate": 1.5006545053746633e-05, "loss": 0.4603, "step": 31479 }, { "epoch": 0.6676422557315858, "grad_norm": 0.4036984145641327, "learning_rate": 1.500625636167057e-05, "loss": 0.4975, "step": 31480 }, { "epoch": 0.6676634641895188, "grad_norm": 0.36736011505126953, "learning_rate": 1.5005967664026595e-05, "loss": 0.4657, "step": 31481 }, { "epoch": 0.6676846726474518, "grad_norm": 0.32794737815856934, "learning_rate": 1.5005678960815023e-05, "loss": 0.4568, "step": 31482 }, { "epoch": 0.6677058811053849, "grad_norm": 0.4531364142894745, "learning_rate": 1.5005390252036178e-05, "loss": 0.4619, "step": 31483 }, { "epoch": 0.6677270895633178, "grad_norm": 0.34693196415901184, "learning_rate": 1.5005101537690385e-05, "loss": 0.4143, "step": 31484 }, { "epoch": 0.6677482980212509, "grad_norm": 0.3545147180557251, "learning_rate": 1.500481281777796e-05, "loss": 0.5281, "step": 31485 }, { "epoch": 0.6677695064791839, "grad_norm": 0.3295195400714874, "learning_rate": 1.5004524092299223e-05, "loss": 0.4683, "step": 31486 }, { "epoch": 0.667790714937117, "grad_norm": 0.3215349316596985, "learning_rate": 1.50042353612545e-05, "loss": 0.4464, "step": 31487 }, { "epoch": 0.6678119233950499, "grad_norm": 0.3338182866573334, "learning_rate": 1.500394662464411e-05, "loss": 0.4647, "step": 31488 }, { "epoch": 0.667833131852983, "grad_norm": 0.34902313351631165, "learning_rate": 1.5003657882468371e-05, "loss": 0.4509, "step": 31489 }, { "epoch": 0.667854340310916, "grad_norm": 0.3889133334159851, "learning_rate": 1.500336913472761e-05, "loss": 0.4695, "step": 31490 }, { "epoch": 0.667875548768849, "grad_norm": 0.3389693796634674, "learning_rate": 1.5003080381422149e-05, "loss": 0.604, "step": 31491 }, { "epoch": 0.667896757226782, "grad_norm": 0.4288913905620575, "learning_rate": 1.5002791622552301e-05, "loss": 0.5386, "step": 31492 }, { "epoch": 0.6679179656847151, "grad_norm": 0.3144177198410034, "learning_rate": 1.500250285811839e-05, "loss": 0.4833, "step": 31493 }, { "epoch": 0.6679391741426481, "grad_norm": 0.4051709771156311, "learning_rate": 1.500221408812074e-05, "loss": 0.4803, "step": 31494 }, { "epoch": 0.6679603826005811, "grad_norm": 0.3982628583908081, "learning_rate": 1.5001925312559675e-05, "loss": 0.5122, "step": 31495 }, { "epoch": 0.6679815910585142, "grad_norm": 0.3204004764556885, "learning_rate": 1.5001636531435509e-05, "loss": 0.4506, "step": 31496 }, { "epoch": 0.6680027995164471, "grad_norm": 0.39594507217407227, "learning_rate": 1.5001347744748567e-05, "loss": 0.5143, "step": 31497 }, { "epoch": 0.6680240079743802, "grad_norm": 0.34559446573257446, "learning_rate": 1.500105895249917e-05, "loss": 0.4022, "step": 31498 }, { "epoch": 0.6680452164323132, "grad_norm": 0.3771417438983917, "learning_rate": 1.5000770154687634e-05, "loss": 0.4805, "step": 31499 }, { "epoch": 0.6680664248902463, "grad_norm": 0.47318774461746216, "learning_rate": 1.5000481351314289e-05, "loss": 0.4785, "step": 31500 }, { "epoch": 0.6680876333481792, "grad_norm": 0.3446599841117859, "learning_rate": 1.5000192542379452e-05, "loss": 0.4848, "step": 31501 }, { "epoch": 0.6681088418061123, "grad_norm": 0.38735148310661316, "learning_rate": 1.4999903727883443e-05, "loss": 0.5609, "step": 31502 }, { "epoch": 0.6681300502640453, "grad_norm": 0.3464392423629761, "learning_rate": 1.4999614907826587e-05, "loss": 0.4852, "step": 31503 }, { "epoch": 0.6681512587219783, "grad_norm": 0.34955596923828125, "learning_rate": 1.49993260822092e-05, "loss": 0.5605, "step": 31504 }, { "epoch": 0.6681724671799113, "grad_norm": 0.3054451048374176, "learning_rate": 1.4999037251031605e-05, "loss": 0.4315, "step": 31505 }, { "epoch": 0.6681936756378444, "grad_norm": 0.33978304266929626, "learning_rate": 1.4998748414294129e-05, "loss": 0.4959, "step": 31506 }, { "epoch": 0.6682148840957775, "grad_norm": 0.33195555210113525, "learning_rate": 1.4998459571997084e-05, "loss": 0.5203, "step": 31507 }, { "epoch": 0.6682360925537104, "grad_norm": 0.36933985352516174, "learning_rate": 1.4998170724140799e-05, "loss": 0.4757, "step": 31508 }, { "epoch": 0.6682573010116435, "grad_norm": 0.34849974513053894, "learning_rate": 1.4997881870725591e-05, "loss": 0.5203, "step": 31509 }, { "epoch": 0.6682785094695765, "grad_norm": 0.3434915542602539, "learning_rate": 1.499759301175178e-05, "loss": 0.4405, "step": 31510 }, { "epoch": 0.6682997179275095, "grad_norm": 0.3506681025028229, "learning_rate": 1.4997304147219694e-05, "loss": 0.5164, "step": 31511 }, { "epoch": 0.6683209263854425, "grad_norm": 0.42853590846061707, "learning_rate": 1.4997015277129644e-05, "loss": 0.5229, "step": 31512 }, { "epoch": 0.6683421348433756, "grad_norm": 0.3431357145309448, "learning_rate": 1.4996726401481958e-05, "loss": 0.4735, "step": 31513 }, { "epoch": 0.6683633433013085, "grad_norm": 0.3701755702495575, "learning_rate": 1.4996437520276961e-05, "loss": 0.5113, "step": 31514 }, { "epoch": 0.6683845517592416, "grad_norm": 0.386138916015625, "learning_rate": 1.4996148633514967e-05, "loss": 0.5569, "step": 31515 }, { "epoch": 0.6684057602171746, "grad_norm": 0.3289632201194763, "learning_rate": 1.4995859741196297e-05, "loss": 0.4701, "step": 31516 }, { "epoch": 0.6684269686751076, "grad_norm": 0.3361353278160095, "learning_rate": 1.4995570843321278e-05, "loss": 0.4237, "step": 31517 }, { "epoch": 0.6684481771330406, "grad_norm": 0.3570650815963745, "learning_rate": 1.499528193989023e-05, "loss": 0.5726, "step": 31518 }, { "epoch": 0.6684693855909737, "grad_norm": 0.3454199731349945, "learning_rate": 1.4994993030903469e-05, "loss": 0.5211, "step": 31519 }, { "epoch": 0.6684905940489067, "grad_norm": 0.40369266271591187, "learning_rate": 1.4994704116361322e-05, "loss": 0.564, "step": 31520 }, { "epoch": 0.6685118025068397, "grad_norm": 0.3540686070919037, "learning_rate": 1.499441519626411e-05, "loss": 0.5, "step": 31521 }, { "epoch": 0.6685330109647728, "grad_norm": 0.3122526705265045, "learning_rate": 1.499412627061215e-05, "loss": 0.4317, "step": 31522 }, { "epoch": 0.6685542194227058, "grad_norm": 0.339417040348053, "learning_rate": 1.499383733940577e-05, "loss": 0.4478, "step": 31523 }, { "epoch": 0.6685754278806388, "grad_norm": 0.35768070816993713, "learning_rate": 1.4993548402645283e-05, "loss": 0.5589, "step": 31524 }, { "epoch": 0.6685966363385718, "grad_norm": 0.37500983476638794, "learning_rate": 1.4993259460331018e-05, "loss": 0.5334, "step": 31525 }, { "epoch": 0.6686178447965049, "grad_norm": 0.3450699746608734, "learning_rate": 1.4992970512463291e-05, "loss": 0.5372, "step": 31526 }, { "epoch": 0.6686390532544378, "grad_norm": 0.35899850726127625, "learning_rate": 1.4992681559042427e-05, "loss": 0.5055, "step": 31527 }, { "epoch": 0.6686602617123709, "grad_norm": 0.3966153562068939, "learning_rate": 1.4992392600068745e-05, "loss": 0.5483, "step": 31528 }, { "epoch": 0.6686814701703039, "grad_norm": 0.34726399183273315, "learning_rate": 1.4992103635542573e-05, "loss": 0.4856, "step": 31529 }, { "epoch": 0.668702678628237, "grad_norm": 0.35866379737854004, "learning_rate": 1.499181466546422e-05, "loss": 0.5618, "step": 31530 }, { "epoch": 0.6687238870861699, "grad_norm": 0.34396877884864807, "learning_rate": 1.4991525689834014e-05, "loss": 0.5182, "step": 31531 }, { "epoch": 0.668745095544103, "grad_norm": 0.3588632047176361, "learning_rate": 1.4991236708652278e-05, "loss": 0.4888, "step": 31532 }, { "epoch": 0.668766304002036, "grad_norm": 0.38323208689689636, "learning_rate": 1.4990947721919331e-05, "loss": 0.547, "step": 31533 }, { "epoch": 0.668787512459969, "grad_norm": 0.3774503171443939, "learning_rate": 1.49906587296355e-05, "loss": 0.4953, "step": 31534 }, { "epoch": 0.6688087209179021, "grad_norm": 0.3832002878189087, "learning_rate": 1.4990369731801096e-05, "loss": 0.4489, "step": 31535 }, { "epoch": 0.6688299293758351, "grad_norm": 0.36025387048721313, "learning_rate": 1.499008072841645e-05, "loss": 0.5105, "step": 31536 }, { "epoch": 0.6688511378337682, "grad_norm": 0.3094754219055176, "learning_rate": 1.4989791719481876e-05, "loss": 0.4903, "step": 31537 }, { "epoch": 0.6688723462917011, "grad_norm": 0.36712151765823364, "learning_rate": 1.49895027049977e-05, "loss": 0.4635, "step": 31538 }, { "epoch": 0.6688935547496342, "grad_norm": 0.5533491969108582, "learning_rate": 1.4989213684964244e-05, "loss": 0.5973, "step": 31539 }, { "epoch": 0.6689147632075672, "grad_norm": 0.3482095003128052, "learning_rate": 1.498892465938183e-05, "loss": 0.455, "step": 31540 }, { "epoch": 0.6689359716655002, "grad_norm": 0.3454503118991852, "learning_rate": 1.4988635628250771e-05, "loss": 0.4868, "step": 31541 }, { "epoch": 0.6689571801234332, "grad_norm": 0.3389888405799866, "learning_rate": 1.4988346591571401e-05, "loss": 0.5687, "step": 31542 }, { "epoch": 0.6689783885813663, "grad_norm": 0.34083807468414307, "learning_rate": 1.4988057549344033e-05, "loss": 0.4956, "step": 31543 }, { "epoch": 0.6689995970392992, "grad_norm": 0.3312586843967438, "learning_rate": 1.4987768501568987e-05, "loss": 0.4695, "step": 31544 }, { "epoch": 0.6690208054972323, "grad_norm": 0.3566938638687134, "learning_rate": 1.4987479448246594e-05, "loss": 0.5282, "step": 31545 }, { "epoch": 0.6690420139551653, "grad_norm": 0.3473120927810669, "learning_rate": 1.4987190389377167e-05, "loss": 0.4625, "step": 31546 }, { "epoch": 0.6690632224130983, "grad_norm": 0.3439527750015259, "learning_rate": 1.4986901324961029e-05, "loss": 0.4248, "step": 31547 }, { "epoch": 0.6690844308710314, "grad_norm": 0.3928254246711731, "learning_rate": 1.4986612254998506e-05, "loss": 0.4308, "step": 31548 }, { "epoch": 0.6691056393289644, "grad_norm": 0.4120970666408539, "learning_rate": 1.4986323179489912e-05, "loss": 0.5206, "step": 31549 }, { "epoch": 0.6691268477868975, "grad_norm": 0.39522719383239746, "learning_rate": 1.4986034098435574e-05, "loss": 0.5404, "step": 31550 }, { "epoch": 0.6691480562448304, "grad_norm": 0.37759163975715637, "learning_rate": 1.4985745011835814e-05, "loss": 0.5757, "step": 31551 }, { "epoch": 0.6691692647027635, "grad_norm": 0.33460068702697754, "learning_rate": 1.4985455919690952e-05, "loss": 0.4545, "step": 31552 }, { "epoch": 0.6691904731606965, "grad_norm": 0.3616659641265869, "learning_rate": 1.4985166822001306e-05, "loss": 0.4841, "step": 31553 }, { "epoch": 0.6692116816186295, "grad_norm": 0.38785552978515625, "learning_rate": 1.4984877718767205e-05, "loss": 0.6063, "step": 31554 }, { "epoch": 0.6692328900765625, "grad_norm": 0.3703517019748688, "learning_rate": 1.4984588609988964e-05, "loss": 0.4881, "step": 31555 }, { "epoch": 0.6692540985344956, "grad_norm": 0.3487444221973419, "learning_rate": 1.4984299495666908e-05, "loss": 0.5129, "step": 31556 }, { "epoch": 0.6692753069924285, "grad_norm": 0.30046504735946655, "learning_rate": 1.4984010375801354e-05, "loss": 0.3954, "step": 31557 }, { "epoch": 0.6692965154503616, "grad_norm": 0.4236103892326355, "learning_rate": 1.4983721250392629e-05, "loss": 0.4769, "step": 31558 }, { "epoch": 0.6693177239082946, "grad_norm": 0.33943215012550354, "learning_rate": 1.4983432119441053e-05, "loss": 0.5427, "step": 31559 }, { "epoch": 0.6693389323662277, "grad_norm": 0.39516088366508484, "learning_rate": 1.4983142982946948e-05, "loss": 0.4558, "step": 31560 }, { "epoch": 0.6693601408241606, "grad_norm": 0.6039677262306213, "learning_rate": 1.4982853840910632e-05, "loss": 0.5348, "step": 31561 }, { "epoch": 0.6693813492820937, "grad_norm": 0.40891286730766296, "learning_rate": 1.4982564693332433e-05, "loss": 0.3813, "step": 31562 }, { "epoch": 0.6694025577400268, "grad_norm": 0.42152899503707886, "learning_rate": 1.4982275540212666e-05, "loss": 0.5138, "step": 31563 }, { "epoch": 0.6694237661979597, "grad_norm": 0.4820868968963623, "learning_rate": 1.4981986381551654e-05, "loss": 0.4523, "step": 31564 }, { "epoch": 0.6694449746558928, "grad_norm": 0.3769017457962036, "learning_rate": 1.4981697217349723e-05, "loss": 0.5503, "step": 31565 }, { "epoch": 0.6694661831138258, "grad_norm": 0.47281765937805176, "learning_rate": 1.4981408047607193e-05, "loss": 0.4636, "step": 31566 }, { "epoch": 0.6694873915717588, "grad_norm": 0.38701918721199036, "learning_rate": 1.4981118872324381e-05, "loss": 0.4829, "step": 31567 }, { "epoch": 0.6695086000296918, "grad_norm": 0.3504185378551483, "learning_rate": 1.4980829691501613e-05, "loss": 0.4804, "step": 31568 }, { "epoch": 0.6695298084876249, "grad_norm": 0.4434146285057068, "learning_rate": 1.4980540505139209e-05, "loss": 0.4819, "step": 31569 }, { "epoch": 0.6695510169455579, "grad_norm": 0.34461668133735657, "learning_rate": 1.498025131323749e-05, "loss": 0.4792, "step": 31570 }, { "epoch": 0.6695722254034909, "grad_norm": 0.4492930471897125, "learning_rate": 1.497996211579678e-05, "loss": 0.5066, "step": 31571 }, { "epoch": 0.6695934338614239, "grad_norm": 0.36071380972862244, "learning_rate": 1.49796729128174e-05, "loss": 0.464, "step": 31572 }, { "epoch": 0.669614642319357, "grad_norm": 0.3772047460079193, "learning_rate": 1.497938370429967e-05, "loss": 0.5304, "step": 31573 }, { "epoch": 0.6696358507772899, "grad_norm": 0.3830878734588623, "learning_rate": 1.4979094490243915e-05, "loss": 0.5112, "step": 31574 }, { "epoch": 0.669657059235223, "grad_norm": 0.3452194929122925, "learning_rate": 1.4978805270650452e-05, "loss": 0.4587, "step": 31575 }, { "epoch": 0.6696782676931561, "grad_norm": 0.35278791189193726, "learning_rate": 1.4978516045519604e-05, "loss": 0.4946, "step": 31576 }, { "epoch": 0.669699476151089, "grad_norm": 0.3645645081996918, "learning_rate": 1.4978226814851697e-05, "loss": 0.6023, "step": 31577 }, { "epoch": 0.6697206846090221, "grad_norm": 0.37379124760627747, "learning_rate": 1.4977937578647047e-05, "loss": 0.5222, "step": 31578 }, { "epoch": 0.6697418930669551, "grad_norm": 0.37190258502960205, "learning_rate": 1.4977648336905981e-05, "loss": 0.5056, "step": 31579 }, { "epoch": 0.6697631015248882, "grad_norm": 0.4571114778518677, "learning_rate": 1.4977359089628816e-05, "loss": 0.5185, "step": 31580 }, { "epoch": 0.6697843099828211, "grad_norm": 0.34124863147735596, "learning_rate": 1.4977069836815874e-05, "loss": 0.4809, "step": 31581 }, { "epoch": 0.6698055184407542, "grad_norm": 0.31018975377082825, "learning_rate": 1.4976780578467477e-05, "loss": 0.4914, "step": 31582 }, { "epoch": 0.6698267268986872, "grad_norm": 0.3272627890110016, "learning_rate": 1.4976491314583952e-05, "loss": 0.514, "step": 31583 }, { "epoch": 0.6698479353566202, "grad_norm": 0.3501245081424713, "learning_rate": 1.4976202045165614e-05, "loss": 0.5112, "step": 31584 }, { "epoch": 0.6698691438145532, "grad_norm": 0.3212033212184906, "learning_rate": 1.4975912770212788e-05, "loss": 0.4223, "step": 31585 }, { "epoch": 0.6698903522724863, "grad_norm": 0.3934362828731537, "learning_rate": 1.4975623489725794e-05, "loss": 0.583, "step": 31586 }, { "epoch": 0.6699115607304192, "grad_norm": 0.38245323300361633, "learning_rate": 1.4975334203704956e-05, "loss": 0.4924, "step": 31587 }, { "epoch": 0.6699327691883523, "grad_norm": 0.36505600810050964, "learning_rate": 1.4975044912150596e-05, "loss": 0.4369, "step": 31588 }, { "epoch": 0.6699539776462854, "grad_norm": 0.3529907166957855, "learning_rate": 1.497475561506303e-05, "loss": 0.5134, "step": 31589 }, { "epoch": 0.6699751861042184, "grad_norm": 0.32724031805992126, "learning_rate": 1.4974466312442588e-05, "loss": 0.4511, "step": 31590 }, { "epoch": 0.6699963945621514, "grad_norm": 0.35012543201446533, "learning_rate": 1.4974177004289587e-05, "loss": 0.5352, "step": 31591 }, { "epoch": 0.6700176030200844, "grad_norm": 0.36056214570999146, "learning_rate": 1.497388769060435e-05, "loss": 0.4796, "step": 31592 }, { "epoch": 0.6700388114780175, "grad_norm": 0.38970887660980225, "learning_rate": 1.4973598371387198e-05, "loss": 0.4536, "step": 31593 }, { "epoch": 0.6700600199359504, "grad_norm": 0.34805023670196533, "learning_rate": 1.4973309046638451e-05, "loss": 0.4735, "step": 31594 }, { "epoch": 0.6700812283938835, "grad_norm": 0.36636602878570557, "learning_rate": 1.4973019716358433e-05, "loss": 0.5518, "step": 31595 }, { "epoch": 0.6701024368518165, "grad_norm": 0.38187602162361145, "learning_rate": 1.4972730380547468e-05, "loss": 0.4641, "step": 31596 }, { "epoch": 0.6701236453097495, "grad_norm": 0.3389843702316284, "learning_rate": 1.4972441039205877e-05, "loss": 0.4902, "step": 31597 }, { "epoch": 0.6701448537676825, "grad_norm": 0.3365877568721771, "learning_rate": 1.4972151692333973e-05, "loss": 0.4826, "step": 31598 }, { "epoch": 0.6701660622256156, "grad_norm": 0.3565428555011749, "learning_rate": 1.4971862339932093e-05, "loss": 0.532, "step": 31599 }, { "epoch": 0.6701872706835486, "grad_norm": 0.3568069636821747, "learning_rate": 1.4971572982000547e-05, "loss": 0.5008, "step": 31600 }, { "epoch": 0.6702084791414816, "grad_norm": 0.3434080481529236, "learning_rate": 1.4971283618539662e-05, "loss": 0.4646, "step": 31601 }, { "epoch": 0.6702296875994146, "grad_norm": 0.35680267214775085, "learning_rate": 1.4970994249549757e-05, "loss": 0.4617, "step": 31602 }, { "epoch": 0.6702508960573477, "grad_norm": 0.39556947350502014, "learning_rate": 1.4970704875031155e-05, "loss": 0.524, "step": 31603 }, { "epoch": 0.6702721045152807, "grad_norm": 0.40942487120628357, "learning_rate": 1.4970415494984181e-05, "loss": 0.4979, "step": 31604 }, { "epoch": 0.6702933129732137, "grad_norm": 0.3542736768722534, "learning_rate": 1.4970126109409154e-05, "loss": 0.4935, "step": 31605 }, { "epoch": 0.6703145214311468, "grad_norm": 0.3656432330608368, "learning_rate": 1.4969836718306393e-05, "loss": 0.4525, "step": 31606 }, { "epoch": 0.6703357298890797, "grad_norm": 0.3827558159828186, "learning_rate": 1.4969547321676225e-05, "loss": 0.4989, "step": 31607 }, { "epoch": 0.6703569383470128, "grad_norm": 0.3095795214176178, "learning_rate": 1.4969257919518967e-05, "loss": 0.4014, "step": 31608 }, { "epoch": 0.6703781468049458, "grad_norm": 0.34369876980781555, "learning_rate": 1.4968968511834945e-05, "loss": 0.5314, "step": 31609 }, { "epoch": 0.6703993552628789, "grad_norm": 0.3547042906284332, "learning_rate": 1.4968679098624482e-05, "loss": 0.4793, "step": 31610 }, { "epoch": 0.6704205637208118, "grad_norm": 0.3430691659450531, "learning_rate": 1.4968389679887896e-05, "loss": 0.4384, "step": 31611 }, { "epoch": 0.6704417721787449, "grad_norm": 0.36969903111457825, "learning_rate": 1.4968100255625509e-05, "loss": 0.5655, "step": 31612 }, { "epoch": 0.6704629806366779, "grad_norm": 0.393645703792572, "learning_rate": 1.4967810825837643e-05, "loss": 0.4829, "step": 31613 }, { "epoch": 0.6704841890946109, "grad_norm": 0.3618629574775696, "learning_rate": 1.4967521390524623e-05, "loss": 0.585, "step": 31614 }, { "epoch": 0.6705053975525439, "grad_norm": 0.35587137937545776, "learning_rate": 1.4967231949686767e-05, "loss": 0.4964, "step": 31615 }, { "epoch": 0.670526606010477, "grad_norm": 0.3821412920951843, "learning_rate": 1.49669425033244e-05, "loss": 0.4332, "step": 31616 }, { "epoch": 0.67054781446841, "grad_norm": 0.3411640226840973, "learning_rate": 1.4966653051437843e-05, "loss": 0.5171, "step": 31617 }, { "epoch": 0.670569022926343, "grad_norm": 0.34521687030792236, "learning_rate": 1.4966363594027418e-05, "loss": 0.5336, "step": 31618 }, { "epoch": 0.6705902313842761, "grad_norm": 0.4107334315776825, "learning_rate": 1.4966074131093447e-05, "loss": 0.4862, "step": 31619 }, { "epoch": 0.6706114398422091, "grad_norm": 0.32597029209136963, "learning_rate": 1.4965784662636248e-05, "loss": 0.4455, "step": 31620 }, { "epoch": 0.6706326483001421, "grad_norm": 0.3358534276485443, "learning_rate": 1.4965495188656147e-05, "loss": 0.4795, "step": 31621 }, { "epoch": 0.6706538567580751, "grad_norm": 0.38017794489860535, "learning_rate": 1.496520570915347e-05, "loss": 0.4569, "step": 31622 }, { "epoch": 0.6706750652160082, "grad_norm": 0.37452030181884766, "learning_rate": 1.496491622412853e-05, "loss": 0.4648, "step": 31623 }, { "epoch": 0.6706962736739411, "grad_norm": 0.33521509170532227, "learning_rate": 1.4964626733581656e-05, "loss": 0.4699, "step": 31624 }, { "epoch": 0.6707174821318742, "grad_norm": 0.4204295873641968, "learning_rate": 1.4964337237513168e-05, "loss": 0.4625, "step": 31625 }, { "epoch": 0.6707386905898072, "grad_norm": 0.333365261554718, "learning_rate": 1.4964047735923383e-05, "loss": 0.4318, "step": 31626 }, { "epoch": 0.6707598990477402, "grad_norm": 0.3562980890274048, "learning_rate": 1.4963758228812628e-05, "loss": 0.553, "step": 31627 }, { "epoch": 0.6707811075056732, "grad_norm": 0.3648454248905182, "learning_rate": 1.4963468716181226e-05, "loss": 0.5537, "step": 31628 }, { "epoch": 0.6708023159636063, "grad_norm": 0.38459303975105286, "learning_rate": 1.4963179198029497e-05, "loss": 0.5105, "step": 31629 }, { "epoch": 0.6708235244215394, "grad_norm": 0.4466315805912018, "learning_rate": 1.4962889674357763e-05, "loss": 0.4439, "step": 31630 }, { "epoch": 0.6708447328794723, "grad_norm": 0.34805363416671753, "learning_rate": 1.4962600145166346e-05, "loss": 0.4554, "step": 31631 }, { "epoch": 0.6708659413374054, "grad_norm": 0.36236804723739624, "learning_rate": 1.496231061045557e-05, "loss": 0.4379, "step": 31632 }, { "epoch": 0.6708871497953384, "grad_norm": 0.345232754945755, "learning_rate": 1.4962021070225754e-05, "loss": 0.5375, "step": 31633 }, { "epoch": 0.6709083582532714, "grad_norm": 0.37317341566085815, "learning_rate": 1.4961731524477222e-05, "loss": 0.4769, "step": 31634 }, { "epoch": 0.6709295667112044, "grad_norm": 0.351693332195282, "learning_rate": 1.4961441973210291e-05, "loss": 0.489, "step": 31635 }, { "epoch": 0.6709507751691375, "grad_norm": 0.3699629604816437, "learning_rate": 1.4961152416425294e-05, "loss": 0.4578, "step": 31636 }, { "epoch": 0.6709719836270704, "grad_norm": 0.34801656007766724, "learning_rate": 1.496086285412254e-05, "loss": 0.394, "step": 31637 }, { "epoch": 0.6709931920850035, "grad_norm": 0.3332451581954956, "learning_rate": 1.4960573286302362e-05, "loss": 0.4583, "step": 31638 }, { "epoch": 0.6710144005429365, "grad_norm": 0.3561025857925415, "learning_rate": 1.4960283712965079e-05, "loss": 0.479, "step": 31639 }, { "epoch": 0.6710356090008696, "grad_norm": 0.39421817660331726, "learning_rate": 1.4959994134111004e-05, "loss": 0.5142, "step": 31640 }, { "epoch": 0.6710568174588025, "grad_norm": 0.34578606486320496, "learning_rate": 1.4959704549740472e-05, "loss": 0.5444, "step": 31641 }, { "epoch": 0.6710780259167356, "grad_norm": 0.4105169177055359, "learning_rate": 1.49594149598538e-05, "loss": 0.5177, "step": 31642 }, { "epoch": 0.6710992343746686, "grad_norm": 1.2304041385650635, "learning_rate": 1.4959125364451308e-05, "loss": 0.4713, "step": 31643 }, { "epoch": 0.6711204428326016, "grad_norm": 0.3718684911727905, "learning_rate": 1.495883576353332e-05, "loss": 0.5243, "step": 31644 }, { "epoch": 0.6711416512905347, "grad_norm": 0.32853808999061584, "learning_rate": 1.495854615710016e-05, "loss": 0.5013, "step": 31645 }, { "epoch": 0.6711628597484677, "grad_norm": 0.35391998291015625, "learning_rate": 1.4958256545152145e-05, "loss": 0.5023, "step": 31646 }, { "epoch": 0.6711840682064008, "grad_norm": 0.34431925415992737, "learning_rate": 1.4957966927689601e-05, "loss": 0.5063, "step": 31647 }, { "epoch": 0.6712052766643337, "grad_norm": 0.31089577078819275, "learning_rate": 1.4957677304712852e-05, "loss": 0.5045, "step": 31648 }, { "epoch": 0.6712264851222668, "grad_norm": 0.3373580276966095, "learning_rate": 1.4957387676222216e-05, "loss": 0.5142, "step": 31649 }, { "epoch": 0.6712476935801998, "grad_norm": 0.3703997731208801, "learning_rate": 1.4957098042218015e-05, "loss": 0.4401, "step": 31650 }, { "epoch": 0.6712689020381328, "grad_norm": 0.3961935043334961, "learning_rate": 1.4956808402700574e-05, "loss": 0.4597, "step": 31651 }, { "epoch": 0.6712901104960658, "grad_norm": 0.31562909483909607, "learning_rate": 1.4956518757670213e-05, "loss": 0.4558, "step": 31652 }, { "epoch": 0.6713113189539989, "grad_norm": 0.3713390827178955, "learning_rate": 1.4956229107127254e-05, "loss": 0.5173, "step": 31653 }, { "epoch": 0.6713325274119318, "grad_norm": 0.35297465324401855, "learning_rate": 1.495593945107202e-05, "loss": 0.4821, "step": 31654 }, { "epoch": 0.6713537358698649, "grad_norm": 0.37565070390701294, "learning_rate": 1.4955649789504833e-05, "loss": 0.4995, "step": 31655 }, { "epoch": 0.6713749443277979, "grad_norm": 0.6298775672912598, "learning_rate": 1.4955360122426019e-05, "loss": 0.4937, "step": 31656 }, { "epoch": 0.671396152785731, "grad_norm": 0.33974167704582214, "learning_rate": 1.4955070449835893e-05, "loss": 0.4991, "step": 31657 }, { "epoch": 0.671417361243664, "grad_norm": 0.353209912776947, "learning_rate": 1.4954780771734783e-05, "loss": 0.4986, "step": 31658 }, { "epoch": 0.671438569701597, "grad_norm": 0.35813581943511963, "learning_rate": 1.4954491088123007e-05, "loss": 0.4691, "step": 31659 }, { "epoch": 0.6714597781595301, "grad_norm": 0.31154176592826843, "learning_rate": 1.495420139900089e-05, "loss": 0.3798, "step": 31660 }, { "epoch": 0.671480986617463, "grad_norm": 0.3900638222694397, "learning_rate": 1.4953911704368753e-05, "loss": 0.4806, "step": 31661 }, { "epoch": 0.6715021950753961, "grad_norm": 0.3742135167121887, "learning_rate": 1.495362200422692e-05, "loss": 0.5039, "step": 31662 }, { "epoch": 0.6715234035333291, "grad_norm": 0.35544946789741516, "learning_rate": 1.495333229857571e-05, "loss": 0.4424, "step": 31663 }, { "epoch": 0.6715446119912621, "grad_norm": 0.37452107667922974, "learning_rate": 1.4953042587415447e-05, "loss": 0.4855, "step": 31664 }, { "epoch": 0.6715658204491951, "grad_norm": 0.33050045371055603, "learning_rate": 1.495275287074645e-05, "loss": 0.45, "step": 31665 }, { "epoch": 0.6715870289071282, "grad_norm": 0.3823925256729126, "learning_rate": 1.4952463148569047e-05, "loss": 0.5502, "step": 31666 }, { "epoch": 0.6716082373650611, "grad_norm": 0.36938005685806274, "learning_rate": 1.4952173420883558e-05, "loss": 0.4978, "step": 31667 }, { "epoch": 0.6716294458229942, "grad_norm": 0.32731345295906067, "learning_rate": 1.4951883687690305e-05, "loss": 0.5029, "step": 31668 }, { "epoch": 0.6716506542809272, "grad_norm": 0.36619144678115845, "learning_rate": 1.4951593948989609e-05, "loss": 0.5227, "step": 31669 }, { "epoch": 0.6716718627388603, "grad_norm": 0.3429992198944092, "learning_rate": 1.4951304204781795e-05, "loss": 0.5415, "step": 31670 }, { "epoch": 0.6716930711967933, "grad_norm": 0.36164605617523193, "learning_rate": 1.4951014455067182e-05, "loss": 0.464, "step": 31671 }, { "epoch": 0.6717142796547263, "grad_norm": 0.4280906021595001, "learning_rate": 1.4950724699846093e-05, "loss": 0.4688, "step": 31672 }, { "epoch": 0.6717354881126594, "grad_norm": 0.3634220063686371, "learning_rate": 1.4950434939118852e-05, "loss": 0.4905, "step": 31673 }, { "epoch": 0.6717566965705923, "grad_norm": 0.37943553924560547, "learning_rate": 1.495014517288578e-05, "loss": 0.5136, "step": 31674 }, { "epoch": 0.6717779050285254, "grad_norm": 0.3808179795742035, "learning_rate": 1.49498554011472e-05, "loss": 0.5227, "step": 31675 }, { "epoch": 0.6717991134864584, "grad_norm": 0.4398047924041748, "learning_rate": 1.4949565623903435e-05, "loss": 0.5407, "step": 31676 }, { "epoch": 0.6718203219443915, "grad_norm": 0.3491871953010559, "learning_rate": 1.4949275841154802e-05, "loss": 0.4825, "step": 31677 }, { "epoch": 0.6718415304023244, "grad_norm": 0.3637043237686157, "learning_rate": 1.4948986052901633e-05, "loss": 0.5462, "step": 31678 }, { "epoch": 0.6718627388602575, "grad_norm": 0.31277334690093994, "learning_rate": 1.494869625914424e-05, "loss": 0.428, "step": 31679 }, { "epoch": 0.6718839473181905, "grad_norm": 0.31888243556022644, "learning_rate": 1.4948406459882951e-05, "loss": 0.4852, "step": 31680 }, { "epoch": 0.6719051557761235, "grad_norm": 0.35711154341697693, "learning_rate": 1.4948116655118087e-05, "loss": 0.501, "step": 31681 }, { "epoch": 0.6719263642340565, "grad_norm": 0.4453291594982147, "learning_rate": 1.4947826844849973e-05, "loss": 0.5061, "step": 31682 }, { "epoch": 0.6719475726919896, "grad_norm": 0.348413348197937, "learning_rate": 1.4947537029078929e-05, "loss": 0.4987, "step": 31683 }, { "epoch": 0.6719687811499225, "grad_norm": 0.30154862999916077, "learning_rate": 1.4947247207805275e-05, "loss": 0.443, "step": 31684 }, { "epoch": 0.6719899896078556, "grad_norm": 0.3503015637397766, "learning_rate": 1.4946957381029335e-05, "loss": 0.5425, "step": 31685 }, { "epoch": 0.6720111980657887, "grad_norm": 0.3928053081035614, "learning_rate": 1.4946667548751435e-05, "loss": 0.4876, "step": 31686 }, { "epoch": 0.6720324065237216, "grad_norm": 0.37182673811912537, "learning_rate": 1.4946377710971894e-05, "loss": 0.4336, "step": 31687 }, { "epoch": 0.6720536149816547, "grad_norm": 0.3710591495037079, "learning_rate": 1.4946087867691032e-05, "loss": 0.4583, "step": 31688 }, { "epoch": 0.6720748234395877, "grad_norm": 0.35328784584999084, "learning_rate": 1.4945798018909177e-05, "loss": 0.5033, "step": 31689 }, { "epoch": 0.6720960318975208, "grad_norm": 0.4025546908378601, "learning_rate": 1.4945508164626648e-05, "loss": 0.5345, "step": 31690 }, { "epoch": 0.6721172403554537, "grad_norm": 0.36697256565093994, "learning_rate": 1.4945218304843763e-05, "loss": 0.5263, "step": 31691 }, { "epoch": 0.6721384488133868, "grad_norm": 0.39437562227249146, "learning_rate": 1.4944928439560855e-05, "loss": 0.4853, "step": 31692 }, { "epoch": 0.6721596572713198, "grad_norm": 0.3502204418182373, "learning_rate": 1.494463856877824e-05, "loss": 0.4588, "step": 31693 }, { "epoch": 0.6721808657292528, "grad_norm": 0.3637091815471649, "learning_rate": 1.4944348692496238e-05, "loss": 0.4865, "step": 31694 }, { "epoch": 0.6722020741871858, "grad_norm": 0.37295982241630554, "learning_rate": 1.4944058810715176e-05, "loss": 0.4527, "step": 31695 }, { "epoch": 0.6722232826451189, "grad_norm": 0.37558481097221375, "learning_rate": 1.4943768923435373e-05, "loss": 0.574, "step": 31696 }, { "epoch": 0.6722444911030518, "grad_norm": 0.33283141255378723, "learning_rate": 1.4943479030657157e-05, "loss": 0.4689, "step": 31697 }, { "epoch": 0.6722656995609849, "grad_norm": 0.4104428291320801, "learning_rate": 1.4943189132380842e-05, "loss": 0.5929, "step": 31698 }, { "epoch": 0.672286908018918, "grad_norm": 0.36744797229766846, "learning_rate": 1.4942899228606758e-05, "loss": 0.5467, "step": 31699 }, { "epoch": 0.672308116476851, "grad_norm": 0.32162895798683167, "learning_rate": 1.4942609319335224e-05, "loss": 0.4068, "step": 31700 }, { "epoch": 0.672329324934784, "grad_norm": 0.3413480818271637, "learning_rate": 1.4942319404566564e-05, "loss": 0.4682, "step": 31701 }, { "epoch": 0.672350533392717, "grad_norm": 0.37199169397354126, "learning_rate": 1.4942029484301097e-05, "loss": 0.5655, "step": 31702 }, { "epoch": 0.6723717418506501, "grad_norm": 0.37930670380592346, "learning_rate": 1.4941739558539148e-05, "loss": 0.5063, "step": 31703 }, { "epoch": 0.672392950308583, "grad_norm": 0.345088928937912, "learning_rate": 1.494144962728104e-05, "loss": 0.5834, "step": 31704 }, { "epoch": 0.6724141587665161, "grad_norm": 0.4350307583808899, "learning_rate": 1.4941159690527095e-05, "loss": 0.4153, "step": 31705 }, { "epoch": 0.6724353672244491, "grad_norm": 0.34929972887039185, "learning_rate": 1.4940869748277633e-05, "loss": 0.5311, "step": 31706 }, { "epoch": 0.6724565756823822, "grad_norm": 0.3367330729961395, "learning_rate": 1.4940579800532981e-05, "loss": 0.5215, "step": 31707 }, { "epoch": 0.6724777841403151, "grad_norm": 0.43434053659439087, "learning_rate": 1.4940289847293459e-05, "loss": 0.5067, "step": 31708 }, { "epoch": 0.6724989925982482, "grad_norm": 0.3998033106327057, "learning_rate": 1.493999988855939e-05, "loss": 0.5313, "step": 31709 }, { "epoch": 0.6725202010561812, "grad_norm": 0.43502184748649597, "learning_rate": 1.4939709924331092e-05, "loss": 0.5182, "step": 31710 }, { "epoch": 0.6725414095141142, "grad_norm": 0.36466294527053833, "learning_rate": 1.4939419954608893e-05, "loss": 0.4451, "step": 31711 }, { "epoch": 0.6725626179720473, "grad_norm": 0.3282298147678375, "learning_rate": 1.4939129979393116e-05, "loss": 0.4448, "step": 31712 }, { "epoch": 0.6725838264299803, "grad_norm": 0.3472640812397003, "learning_rate": 1.4938839998684081e-05, "loss": 0.5378, "step": 31713 }, { "epoch": 0.6726050348879133, "grad_norm": 0.34406256675720215, "learning_rate": 1.493855001248211e-05, "loss": 0.4937, "step": 31714 }, { "epoch": 0.6726262433458463, "grad_norm": 0.3580465614795685, "learning_rate": 1.493826002078753e-05, "loss": 0.5152, "step": 31715 }, { "epoch": 0.6726474518037794, "grad_norm": 0.3710487484931946, "learning_rate": 1.4937970023600657e-05, "loss": 0.5848, "step": 31716 }, { "epoch": 0.6726686602617123, "grad_norm": 0.3547058403491974, "learning_rate": 1.4937680020921817e-05, "loss": 0.5187, "step": 31717 }, { "epoch": 0.6726898687196454, "grad_norm": 0.3768715262413025, "learning_rate": 1.4937390012751332e-05, "loss": 0.4896, "step": 31718 }, { "epoch": 0.6727110771775784, "grad_norm": 0.3582775890827179, "learning_rate": 1.4937099999089526e-05, "loss": 0.5035, "step": 31719 }, { "epoch": 0.6727322856355115, "grad_norm": 0.34595683217048645, "learning_rate": 1.493680997993672e-05, "loss": 0.4078, "step": 31720 }, { "epoch": 0.6727534940934444, "grad_norm": 0.36191776394844055, "learning_rate": 1.4936519955293237e-05, "loss": 0.4685, "step": 31721 }, { "epoch": 0.6727747025513775, "grad_norm": 0.333872526884079, "learning_rate": 1.4936229925159396e-05, "loss": 0.4545, "step": 31722 }, { "epoch": 0.6727959110093105, "grad_norm": 0.39978229999542236, "learning_rate": 1.4935939889535526e-05, "loss": 0.5504, "step": 31723 }, { "epoch": 0.6728171194672435, "grad_norm": 0.3834437131881714, "learning_rate": 1.4935649848421946e-05, "loss": 0.4779, "step": 31724 }, { "epoch": 0.6728383279251766, "grad_norm": 0.3488974869251251, "learning_rate": 1.4935359801818978e-05, "loss": 0.4939, "step": 31725 }, { "epoch": 0.6728595363831096, "grad_norm": 0.3513503968715668, "learning_rate": 1.493506974972695e-05, "loss": 0.5094, "step": 31726 }, { "epoch": 0.6728807448410427, "grad_norm": 0.36280468106269836, "learning_rate": 1.4934779692146175e-05, "loss": 0.4926, "step": 31727 }, { "epoch": 0.6729019532989756, "grad_norm": 0.3539341390132904, "learning_rate": 1.4934489629076985e-05, "loss": 0.5278, "step": 31728 }, { "epoch": 0.6729231617569087, "grad_norm": 0.35649535059928894, "learning_rate": 1.4934199560519695e-05, "loss": 0.4491, "step": 31729 }, { "epoch": 0.6729443702148417, "grad_norm": 0.32593250274658203, "learning_rate": 1.4933909486474634e-05, "loss": 0.46, "step": 31730 }, { "epoch": 0.6729655786727747, "grad_norm": 0.3138003349304199, "learning_rate": 1.493361940694212e-05, "loss": 0.4433, "step": 31731 }, { "epoch": 0.6729867871307077, "grad_norm": 0.3476756811141968, "learning_rate": 1.4933329321922479e-05, "loss": 0.4934, "step": 31732 }, { "epoch": 0.6730079955886408, "grad_norm": 0.47866708040237427, "learning_rate": 1.493303923141603e-05, "loss": 0.426, "step": 31733 }, { "epoch": 0.6730292040465737, "grad_norm": 0.3321318030357361, "learning_rate": 1.4932749135423098e-05, "loss": 0.4726, "step": 31734 }, { "epoch": 0.6730504125045068, "grad_norm": 0.4098079800605774, "learning_rate": 1.4932459033944006e-05, "loss": 0.5418, "step": 31735 }, { "epoch": 0.6730716209624398, "grad_norm": 0.3713051378726959, "learning_rate": 1.4932168926979074e-05, "loss": 0.5546, "step": 31736 }, { "epoch": 0.6730928294203729, "grad_norm": 0.39732542634010315, "learning_rate": 1.4931878814528629e-05, "loss": 0.5555, "step": 31737 }, { "epoch": 0.6731140378783058, "grad_norm": 0.4541674852371216, "learning_rate": 1.493158869659299e-05, "loss": 0.4461, "step": 31738 }, { "epoch": 0.6731352463362389, "grad_norm": 0.3442663848400116, "learning_rate": 1.493129857317248e-05, "loss": 0.4629, "step": 31739 }, { "epoch": 0.673156454794172, "grad_norm": 0.362785279750824, "learning_rate": 1.4931008444267426e-05, "loss": 0.5657, "step": 31740 }, { "epoch": 0.6731776632521049, "grad_norm": 0.3287016749382019, "learning_rate": 1.4930718309878142e-05, "loss": 0.4291, "step": 31741 }, { "epoch": 0.673198871710038, "grad_norm": 0.3654300272464752, "learning_rate": 1.4930428170004962e-05, "loss": 0.5605, "step": 31742 }, { "epoch": 0.673220080167971, "grad_norm": 0.3715333342552185, "learning_rate": 1.49301380246482e-05, "loss": 0.6005, "step": 31743 }, { "epoch": 0.673241288625904, "grad_norm": 0.34832563996315, "learning_rate": 1.4929847873808181e-05, "loss": 0.4583, "step": 31744 }, { "epoch": 0.673262497083837, "grad_norm": 0.3604608476161957, "learning_rate": 1.4929557717485225e-05, "loss": 0.5207, "step": 31745 }, { "epoch": 0.6732837055417701, "grad_norm": 0.36910632252693176, "learning_rate": 1.4929267555679662e-05, "loss": 0.4715, "step": 31746 }, { "epoch": 0.673304913999703, "grad_norm": 0.339834988117218, "learning_rate": 1.492897738839181e-05, "loss": 0.5177, "step": 31747 }, { "epoch": 0.6733261224576361, "grad_norm": 0.5743690133094788, "learning_rate": 1.492868721562199e-05, "loss": 0.4635, "step": 31748 }, { "epoch": 0.6733473309155691, "grad_norm": 0.36248788237571716, "learning_rate": 1.4928397037370527e-05, "loss": 0.4386, "step": 31749 }, { "epoch": 0.6733685393735022, "grad_norm": 0.358277291059494, "learning_rate": 1.4928106853637748e-05, "loss": 0.4936, "step": 31750 }, { "epoch": 0.6733897478314351, "grad_norm": 0.37117999792099, "learning_rate": 1.4927816664423967e-05, "loss": 0.4676, "step": 31751 }, { "epoch": 0.6734109562893682, "grad_norm": 0.36858126521110535, "learning_rate": 1.4927526469729514e-05, "loss": 0.606, "step": 31752 }, { "epoch": 0.6734321647473013, "grad_norm": 0.36999645829200745, "learning_rate": 1.4927236269554705e-05, "loss": 0.4466, "step": 31753 }, { "epoch": 0.6734533732052342, "grad_norm": 0.34633705019950867, "learning_rate": 1.492694606389987e-05, "loss": 0.5444, "step": 31754 }, { "epoch": 0.6734745816631673, "grad_norm": 0.356777161359787, "learning_rate": 1.4926655852765327e-05, "loss": 0.493, "step": 31755 }, { "epoch": 0.6734957901211003, "grad_norm": 0.3865630626678467, "learning_rate": 1.4926365636151399e-05, "loss": 0.4427, "step": 31756 }, { "epoch": 0.6735169985790334, "grad_norm": 0.37015968561172485, "learning_rate": 1.4926075414058411e-05, "loss": 0.5539, "step": 31757 }, { "epoch": 0.6735382070369663, "grad_norm": 0.4444383680820465, "learning_rate": 1.4925785186486688e-05, "loss": 0.5466, "step": 31758 }, { "epoch": 0.6735594154948994, "grad_norm": 0.3269652724266052, "learning_rate": 1.4925494953436545e-05, "loss": 0.3944, "step": 31759 }, { "epoch": 0.6735806239528324, "grad_norm": 0.45698320865631104, "learning_rate": 1.4925204714908313e-05, "loss": 0.4378, "step": 31760 }, { "epoch": 0.6736018324107654, "grad_norm": 0.34382233023643494, "learning_rate": 1.4924914470902305e-05, "loss": 0.5122, "step": 31761 }, { "epoch": 0.6736230408686984, "grad_norm": 0.41428324580192566, "learning_rate": 1.4924624221418853e-05, "loss": 0.4955, "step": 31762 }, { "epoch": 0.6736442493266315, "grad_norm": 0.35844823718070984, "learning_rate": 1.492433396645828e-05, "loss": 0.4528, "step": 31763 }, { "epoch": 0.6736654577845644, "grad_norm": 0.39362233877182007, "learning_rate": 1.49240437060209e-05, "loss": 0.4722, "step": 31764 }, { "epoch": 0.6736866662424975, "grad_norm": 0.35773926973342896, "learning_rate": 1.4923753440107048e-05, "loss": 0.5393, "step": 31765 }, { "epoch": 0.6737078747004306, "grad_norm": 0.36356043815612793, "learning_rate": 1.4923463168717036e-05, "loss": 0.4824, "step": 31766 }, { "epoch": 0.6737290831583636, "grad_norm": 0.4003280997276306, "learning_rate": 1.4923172891851192e-05, "loss": 0.5199, "step": 31767 }, { "epoch": 0.6737502916162966, "grad_norm": 0.3380512595176697, "learning_rate": 1.4922882609509837e-05, "loss": 0.4511, "step": 31768 }, { "epoch": 0.6737715000742296, "grad_norm": 0.340236634016037, "learning_rate": 1.4922592321693294e-05, "loss": 0.5057, "step": 31769 }, { "epoch": 0.6737927085321627, "grad_norm": 0.4237521290779114, "learning_rate": 1.4922302028401889e-05, "loss": 0.5384, "step": 31770 }, { "epoch": 0.6738139169900956, "grad_norm": 0.35593181848526, "learning_rate": 1.4922011729635941e-05, "loss": 0.4959, "step": 31771 }, { "epoch": 0.6738351254480287, "grad_norm": 0.3944643437862396, "learning_rate": 1.4921721425395776e-05, "loss": 0.5023, "step": 31772 }, { "epoch": 0.6738563339059617, "grad_norm": 0.4253825843334198, "learning_rate": 1.4921431115681713e-05, "loss": 0.5595, "step": 31773 }, { "epoch": 0.6738775423638947, "grad_norm": 0.9721068143844604, "learning_rate": 1.492114080049408e-05, "loss": 0.5542, "step": 31774 }, { "epoch": 0.6738987508218277, "grad_norm": 0.38331228494644165, "learning_rate": 1.4920850479833193e-05, "loss": 0.4621, "step": 31775 }, { "epoch": 0.6739199592797608, "grad_norm": 0.33089664578437805, "learning_rate": 1.492056015369938e-05, "loss": 0.5058, "step": 31776 }, { "epoch": 0.6739411677376937, "grad_norm": 0.3824467360973358, "learning_rate": 1.4920269822092964e-05, "loss": 0.569, "step": 31777 }, { "epoch": 0.6739623761956268, "grad_norm": 0.37162867188453674, "learning_rate": 1.4919979485014264e-05, "loss": 0.6022, "step": 31778 }, { "epoch": 0.6739835846535598, "grad_norm": 0.3527522385120392, "learning_rate": 1.4919689142463609e-05, "loss": 0.5407, "step": 31779 }, { "epoch": 0.6740047931114929, "grad_norm": 0.3422203063964844, "learning_rate": 1.4919398794441318e-05, "loss": 0.4914, "step": 31780 }, { "epoch": 0.6740260015694259, "grad_norm": 0.4138469994068146, "learning_rate": 1.4919108440947711e-05, "loss": 0.4772, "step": 31781 }, { "epoch": 0.6740472100273589, "grad_norm": 0.5482822060585022, "learning_rate": 1.4918818081983117e-05, "loss": 0.5965, "step": 31782 }, { "epoch": 0.674068418485292, "grad_norm": 0.35502704977989197, "learning_rate": 1.4918527717547858e-05, "loss": 0.5555, "step": 31783 }, { "epoch": 0.6740896269432249, "grad_norm": 0.3663989007472992, "learning_rate": 1.4918237347642252e-05, "loss": 0.5212, "step": 31784 }, { "epoch": 0.674110835401158, "grad_norm": 0.4746294617652893, "learning_rate": 1.4917946972266629e-05, "loss": 0.5094, "step": 31785 }, { "epoch": 0.674132043859091, "grad_norm": 0.3796805739402771, "learning_rate": 1.4917656591421305e-05, "loss": 0.4493, "step": 31786 }, { "epoch": 0.674153252317024, "grad_norm": 0.37996816635131836, "learning_rate": 1.4917366205106607e-05, "loss": 0.5249, "step": 31787 }, { "epoch": 0.674174460774957, "grad_norm": 0.3357990086078644, "learning_rate": 1.4917075813322855e-05, "loss": 0.4926, "step": 31788 }, { "epoch": 0.6741956692328901, "grad_norm": 0.3371296525001526, "learning_rate": 1.4916785416070376e-05, "loss": 0.4357, "step": 31789 }, { "epoch": 0.6742168776908231, "grad_norm": 0.3502202033996582, "learning_rate": 1.491649501334949e-05, "loss": 0.5362, "step": 31790 }, { "epoch": 0.6742380861487561, "grad_norm": 0.3916439414024353, "learning_rate": 1.4916204605160523e-05, "loss": 0.4661, "step": 31791 }, { "epoch": 0.6742592946066891, "grad_norm": 0.3949117362499237, "learning_rate": 1.4915914191503792e-05, "loss": 0.492, "step": 31792 }, { "epoch": 0.6742805030646222, "grad_norm": 0.5388868451118469, "learning_rate": 1.4915623772379629e-05, "loss": 0.5272, "step": 31793 }, { "epoch": 0.6743017115225552, "grad_norm": 0.33602404594421387, "learning_rate": 1.4915333347788348e-05, "loss": 0.4424, "step": 31794 }, { "epoch": 0.6743229199804882, "grad_norm": 0.3707066774368286, "learning_rate": 1.4915042917730276e-05, "loss": 0.5451, "step": 31795 }, { "epoch": 0.6743441284384213, "grad_norm": 0.33094871044158936, "learning_rate": 1.4914752482205738e-05, "loss": 0.4794, "step": 31796 }, { "epoch": 0.6743653368963543, "grad_norm": 0.5182928442955017, "learning_rate": 1.4914462041215057e-05, "loss": 0.5222, "step": 31797 }, { "epoch": 0.6743865453542873, "grad_norm": 0.364785373210907, "learning_rate": 1.4914171594758549e-05, "loss": 0.4361, "step": 31798 }, { "epoch": 0.6744077538122203, "grad_norm": 0.36360684037208557, "learning_rate": 1.4913881142836546e-05, "loss": 0.5633, "step": 31799 }, { "epoch": 0.6744289622701534, "grad_norm": 0.37713274359703064, "learning_rate": 1.4913590685449364e-05, "loss": 0.5104, "step": 31800 }, { "epoch": 0.6744501707280863, "grad_norm": 0.3353174924850464, "learning_rate": 1.491330022259733e-05, "loss": 0.4819, "step": 31801 }, { "epoch": 0.6744713791860194, "grad_norm": 0.5290539264678955, "learning_rate": 1.4913009754280767e-05, "loss": 0.5274, "step": 31802 }, { "epoch": 0.6744925876439524, "grad_norm": 0.3707732558250427, "learning_rate": 1.49127192805e-05, "loss": 0.4561, "step": 31803 }, { "epoch": 0.6745137961018854, "grad_norm": 0.35159677267074585, "learning_rate": 1.4912428801255343e-05, "loss": 0.4156, "step": 31804 }, { "epoch": 0.6745350045598184, "grad_norm": 0.41750210523605347, "learning_rate": 1.4912138316547129e-05, "loss": 0.4782, "step": 31805 }, { "epoch": 0.6745562130177515, "grad_norm": 0.619488000869751, "learning_rate": 1.4911847826375676e-05, "loss": 0.5646, "step": 31806 }, { "epoch": 0.6745774214756846, "grad_norm": 0.3660779297351837, "learning_rate": 1.491155733074131e-05, "loss": 0.484, "step": 31807 }, { "epoch": 0.6745986299336175, "grad_norm": 0.38576099276542664, "learning_rate": 1.4911266829644353e-05, "loss": 0.5019, "step": 31808 }, { "epoch": 0.6746198383915506, "grad_norm": 0.3752632141113281, "learning_rate": 1.4910976323085125e-05, "loss": 0.5082, "step": 31809 }, { "epoch": 0.6746410468494836, "grad_norm": 0.37495532631874084, "learning_rate": 1.4910685811063954e-05, "loss": 0.5167, "step": 31810 }, { "epoch": 0.6746622553074166, "grad_norm": 0.4306018054485321, "learning_rate": 1.4910395293581164e-05, "loss": 0.4839, "step": 31811 }, { "epoch": 0.6746834637653496, "grad_norm": 0.3789418637752533, "learning_rate": 1.4910104770637068e-05, "loss": 0.4951, "step": 31812 }, { "epoch": 0.6747046722232827, "grad_norm": 0.34231245517730713, "learning_rate": 1.4909814242232e-05, "loss": 0.5102, "step": 31813 }, { "epoch": 0.6747258806812156, "grad_norm": 0.3910559415817261, "learning_rate": 1.4909523708366281e-05, "loss": 0.5459, "step": 31814 }, { "epoch": 0.6747470891391487, "grad_norm": 0.335123211145401, "learning_rate": 1.4909233169040227e-05, "loss": 0.468, "step": 31815 }, { "epoch": 0.6747682975970817, "grad_norm": 0.37601935863494873, "learning_rate": 1.4908942624254172e-05, "loss": 0.4102, "step": 31816 }, { "epoch": 0.6747895060550148, "grad_norm": 0.414715051651001, "learning_rate": 1.4908652074008432e-05, "loss": 0.5133, "step": 31817 }, { "epoch": 0.6748107145129477, "grad_norm": 0.38565605878829956, "learning_rate": 1.490836151830333e-05, "loss": 0.502, "step": 31818 }, { "epoch": 0.6748319229708808, "grad_norm": 0.3756258487701416, "learning_rate": 1.4908070957139194e-05, "loss": 0.5129, "step": 31819 }, { "epoch": 0.6748531314288138, "grad_norm": 0.36369192600250244, "learning_rate": 1.4907780390516343e-05, "loss": 0.465, "step": 31820 }, { "epoch": 0.6748743398867468, "grad_norm": 0.30824923515319824, "learning_rate": 1.4907489818435096e-05, "loss": 0.4235, "step": 31821 }, { "epoch": 0.6748955483446799, "grad_norm": 0.3416115939617157, "learning_rate": 1.4907199240895788e-05, "loss": 0.4673, "step": 31822 }, { "epoch": 0.6749167568026129, "grad_norm": 0.3372320532798767, "learning_rate": 1.4906908657898733e-05, "loss": 0.4255, "step": 31823 }, { "epoch": 0.674937965260546, "grad_norm": 0.3876940906047821, "learning_rate": 1.4906618069444259e-05, "loss": 0.4125, "step": 31824 }, { "epoch": 0.6749591737184789, "grad_norm": 0.3340616524219513, "learning_rate": 1.4906327475532688e-05, "loss": 0.4573, "step": 31825 }, { "epoch": 0.674980382176412, "grad_norm": 0.3754553496837616, "learning_rate": 1.4906036876164336e-05, "loss": 0.4936, "step": 31826 }, { "epoch": 0.675001590634345, "grad_norm": 0.380561888217926, "learning_rate": 1.4905746271339537e-05, "loss": 0.5373, "step": 31827 }, { "epoch": 0.675022799092278, "grad_norm": 0.38432422280311584, "learning_rate": 1.490545566105861e-05, "loss": 0.4826, "step": 31828 }, { "epoch": 0.675044007550211, "grad_norm": 0.3685680031776428, "learning_rate": 1.4905165045321874e-05, "loss": 0.4466, "step": 31829 }, { "epoch": 0.6750652160081441, "grad_norm": 0.355935662984848, "learning_rate": 1.490487442412966e-05, "loss": 0.4089, "step": 31830 }, { "epoch": 0.675086424466077, "grad_norm": 0.47066688537597656, "learning_rate": 1.4904583797482289e-05, "loss": 0.5184, "step": 31831 }, { "epoch": 0.6751076329240101, "grad_norm": 0.4090930223464966, "learning_rate": 1.4904293165380074e-05, "loss": 0.4838, "step": 31832 }, { "epoch": 0.6751288413819431, "grad_norm": 0.36093413829803467, "learning_rate": 1.4904002527823354e-05, "loss": 0.5014, "step": 31833 }, { "epoch": 0.6751500498398761, "grad_norm": 0.3691614866256714, "learning_rate": 1.4903711884812444e-05, "loss": 0.5196, "step": 31834 }, { "epoch": 0.6751712582978092, "grad_norm": 0.37537670135498047, "learning_rate": 1.4903421236347665e-05, "loss": 0.5405, "step": 31835 }, { "epoch": 0.6751924667557422, "grad_norm": 0.35913628339767456, "learning_rate": 1.4903130582429348e-05, "loss": 0.5229, "step": 31836 }, { "epoch": 0.6752136752136753, "grad_norm": 0.37586647272109985, "learning_rate": 1.490283992305781e-05, "loss": 0.4888, "step": 31837 }, { "epoch": 0.6752348836716082, "grad_norm": 0.34885069727897644, "learning_rate": 1.4902549258233377e-05, "loss": 0.4488, "step": 31838 }, { "epoch": 0.6752560921295413, "grad_norm": 0.3526410758495331, "learning_rate": 1.4902258587956368e-05, "loss": 0.4476, "step": 31839 }, { "epoch": 0.6752773005874743, "grad_norm": 0.39005523920059204, "learning_rate": 1.4901967912227112e-05, "loss": 0.4819, "step": 31840 }, { "epoch": 0.6752985090454073, "grad_norm": 0.4990205764770508, "learning_rate": 1.490167723104593e-05, "loss": 0.5092, "step": 31841 }, { "epoch": 0.6753197175033403, "grad_norm": 0.39471399784088135, "learning_rate": 1.4901386544413145e-05, "loss": 0.4822, "step": 31842 }, { "epoch": 0.6753409259612734, "grad_norm": 0.34393149614334106, "learning_rate": 1.490109585232908e-05, "loss": 0.4668, "step": 31843 }, { "epoch": 0.6753621344192063, "grad_norm": 0.3438152074813843, "learning_rate": 1.4900805154794058e-05, "loss": 0.4174, "step": 31844 }, { "epoch": 0.6753833428771394, "grad_norm": 0.33594176173210144, "learning_rate": 1.4900514451808403e-05, "loss": 0.5156, "step": 31845 }, { "epoch": 0.6754045513350724, "grad_norm": 0.4582802653312683, "learning_rate": 1.490022374337244e-05, "loss": 0.5188, "step": 31846 }, { "epoch": 0.6754257597930055, "grad_norm": 0.3659437298774719, "learning_rate": 1.489993302948649e-05, "loss": 0.5103, "step": 31847 }, { "epoch": 0.6754469682509385, "grad_norm": 0.35166803002357483, "learning_rate": 1.4899642310150877e-05, "loss": 0.5809, "step": 31848 }, { "epoch": 0.6754681767088715, "grad_norm": 0.3533613085746765, "learning_rate": 1.4899351585365924e-05, "loss": 0.5065, "step": 31849 }, { "epoch": 0.6754893851668046, "grad_norm": 0.41240808367729187, "learning_rate": 1.4899060855131958e-05, "loss": 0.4935, "step": 31850 }, { "epoch": 0.6755105936247375, "grad_norm": 0.32415205240249634, "learning_rate": 1.4898770119449294e-05, "loss": 0.4421, "step": 31851 }, { "epoch": 0.6755318020826706, "grad_norm": 0.3590165376663208, "learning_rate": 1.4898479378318263e-05, "loss": 0.5063, "step": 31852 }, { "epoch": 0.6755530105406036, "grad_norm": 0.356842041015625, "learning_rate": 1.4898188631739186e-05, "loss": 0.512, "step": 31853 }, { "epoch": 0.6755742189985366, "grad_norm": 0.3523884415626526, "learning_rate": 1.4897897879712385e-05, "loss": 0.463, "step": 31854 }, { "epoch": 0.6755954274564696, "grad_norm": 0.3406190574169159, "learning_rate": 1.4897607122238186e-05, "loss": 0.5116, "step": 31855 }, { "epoch": 0.6756166359144027, "grad_norm": 0.3883155286312103, "learning_rate": 1.4897316359316912e-05, "loss": 0.4267, "step": 31856 }, { "epoch": 0.6756378443723356, "grad_norm": 0.4178774952888489, "learning_rate": 1.4897025590948882e-05, "loss": 0.5525, "step": 31857 }, { "epoch": 0.6756590528302687, "grad_norm": 0.3152787983417511, "learning_rate": 1.4896734817134421e-05, "loss": 0.437, "step": 31858 }, { "epoch": 0.6756802612882017, "grad_norm": 0.3915962278842926, "learning_rate": 1.4896444037873859e-05, "loss": 0.515, "step": 31859 }, { "epoch": 0.6757014697461348, "grad_norm": 0.42194366455078125, "learning_rate": 1.489615325316751e-05, "loss": 0.5459, "step": 31860 }, { "epoch": 0.6757226782040677, "grad_norm": 0.41693511605262756, "learning_rate": 1.4895862463015707e-05, "loss": 0.46, "step": 31861 }, { "epoch": 0.6757438866620008, "grad_norm": 0.3744071125984192, "learning_rate": 1.4895571667418766e-05, "loss": 0.5437, "step": 31862 }, { "epoch": 0.6757650951199339, "grad_norm": 0.4256312847137451, "learning_rate": 1.4895280866377013e-05, "loss": 0.469, "step": 31863 }, { "epoch": 0.6757863035778668, "grad_norm": 0.3598833680152893, "learning_rate": 1.4894990059890772e-05, "loss": 0.4292, "step": 31864 }, { "epoch": 0.6758075120357999, "grad_norm": 0.3571462035179138, "learning_rate": 1.4894699247960363e-05, "loss": 0.5289, "step": 31865 }, { "epoch": 0.6758287204937329, "grad_norm": 0.36686015129089355, "learning_rate": 1.4894408430586112e-05, "loss": 0.4446, "step": 31866 }, { "epoch": 0.675849928951666, "grad_norm": 0.35127729177474976, "learning_rate": 1.4894117607768346e-05, "loss": 0.5204, "step": 31867 }, { "epoch": 0.6758711374095989, "grad_norm": 0.3142299950122833, "learning_rate": 1.4893826779507381e-05, "loss": 0.4862, "step": 31868 }, { "epoch": 0.675892345867532, "grad_norm": 0.33342495560646057, "learning_rate": 1.4893535945803546e-05, "loss": 0.4477, "step": 31869 }, { "epoch": 0.675913554325465, "grad_norm": 0.37128719687461853, "learning_rate": 1.4893245106657164e-05, "loss": 0.5614, "step": 31870 }, { "epoch": 0.675934762783398, "grad_norm": 0.4710826575756073, "learning_rate": 1.4892954262068555e-05, "loss": 0.6173, "step": 31871 }, { "epoch": 0.675955971241331, "grad_norm": 0.5457205176353455, "learning_rate": 1.4892663412038047e-05, "loss": 0.5779, "step": 31872 }, { "epoch": 0.6759771796992641, "grad_norm": 0.3429800868034363, "learning_rate": 1.489237255656596e-05, "loss": 0.4551, "step": 31873 }, { "epoch": 0.675998388157197, "grad_norm": 0.3467903435230255, "learning_rate": 1.4892081695652615e-05, "loss": 0.4833, "step": 31874 }, { "epoch": 0.6760195966151301, "grad_norm": 0.3734580874443054, "learning_rate": 1.4891790829298347e-05, "loss": 0.425, "step": 31875 }, { "epoch": 0.6760408050730632, "grad_norm": 0.4614364504814148, "learning_rate": 1.4891499957503467e-05, "loss": 0.4405, "step": 31876 }, { "epoch": 0.6760620135309962, "grad_norm": 0.35742655396461487, "learning_rate": 1.4891209080268303e-05, "loss": 0.4697, "step": 31877 }, { "epoch": 0.6760832219889292, "grad_norm": 0.34813883900642395, "learning_rate": 1.489091819759318e-05, "loss": 0.5247, "step": 31878 }, { "epoch": 0.6761044304468622, "grad_norm": 0.33075785636901855, "learning_rate": 1.489062730947842e-05, "loss": 0.4699, "step": 31879 }, { "epoch": 0.6761256389047953, "grad_norm": 0.36560067534446716, "learning_rate": 1.4890336415924347e-05, "loss": 0.4468, "step": 31880 }, { "epoch": 0.6761468473627282, "grad_norm": 0.38653117418289185, "learning_rate": 1.4890045516931287e-05, "loss": 0.4553, "step": 31881 }, { "epoch": 0.6761680558206613, "grad_norm": 0.34885716438293457, "learning_rate": 1.4889754612499557e-05, "loss": 0.4775, "step": 31882 }, { "epoch": 0.6761892642785943, "grad_norm": 0.36513417959213257, "learning_rate": 1.4889463702629485e-05, "loss": 0.531, "step": 31883 }, { "epoch": 0.6762104727365273, "grad_norm": 0.34561410546302795, "learning_rate": 1.4889172787321393e-05, "loss": 0.5019, "step": 31884 }, { "epoch": 0.6762316811944603, "grad_norm": 0.3544040322303772, "learning_rate": 1.4888881866575609e-05, "loss": 0.5458, "step": 31885 }, { "epoch": 0.6762528896523934, "grad_norm": 0.41496649384498596, "learning_rate": 1.488859094039245e-05, "loss": 0.4642, "step": 31886 }, { "epoch": 0.6762740981103263, "grad_norm": 0.3473582863807678, "learning_rate": 1.4888300008772245e-05, "loss": 0.5793, "step": 31887 }, { "epoch": 0.6762953065682594, "grad_norm": 0.34419697523117065, "learning_rate": 1.4888009071715312e-05, "loss": 0.5113, "step": 31888 }, { "epoch": 0.6763165150261925, "grad_norm": 0.3606477379798889, "learning_rate": 1.488771812922198e-05, "loss": 0.4961, "step": 31889 }, { "epoch": 0.6763377234841255, "grad_norm": 0.3432041108608246, "learning_rate": 1.4887427181292569e-05, "loss": 0.5145, "step": 31890 }, { "epoch": 0.6763589319420585, "grad_norm": 0.37856799364089966, "learning_rate": 1.4887136227927403e-05, "loss": 0.4901, "step": 31891 }, { "epoch": 0.6763801403999915, "grad_norm": 0.3830212354660034, "learning_rate": 1.488684526912681e-05, "loss": 0.5599, "step": 31892 }, { "epoch": 0.6764013488579246, "grad_norm": 0.3604097068309784, "learning_rate": 1.4886554304891112e-05, "loss": 0.5754, "step": 31893 }, { "epoch": 0.6764225573158575, "grad_norm": 0.3655005991458893, "learning_rate": 1.4886263335220625e-05, "loss": 0.4742, "step": 31894 }, { "epoch": 0.6764437657737906, "grad_norm": 0.35734349489212036, "learning_rate": 1.4885972360115681e-05, "loss": 0.5209, "step": 31895 }, { "epoch": 0.6764649742317236, "grad_norm": 0.343789279460907, "learning_rate": 1.4885681379576599e-05, "loss": 0.5077, "step": 31896 }, { "epoch": 0.6764861826896567, "grad_norm": 0.3872135281562805, "learning_rate": 1.4885390393603708e-05, "loss": 0.4421, "step": 31897 }, { "epoch": 0.6765073911475896, "grad_norm": 0.3822137117385864, "learning_rate": 1.4885099402197327e-05, "loss": 0.4518, "step": 31898 }, { "epoch": 0.6765285996055227, "grad_norm": 1.0256356000900269, "learning_rate": 1.488480840535778e-05, "loss": 0.5602, "step": 31899 }, { "epoch": 0.6765498080634557, "grad_norm": 0.36095112562179565, "learning_rate": 1.4884517403085393e-05, "loss": 0.6017, "step": 31900 }, { "epoch": 0.6765710165213887, "grad_norm": 0.39953431487083435, "learning_rate": 1.4884226395380487e-05, "loss": 0.5034, "step": 31901 }, { "epoch": 0.6765922249793217, "grad_norm": 0.35885322093963623, "learning_rate": 1.4883935382243386e-05, "loss": 0.4923, "step": 31902 }, { "epoch": 0.6766134334372548, "grad_norm": 0.3647463917732239, "learning_rate": 1.4883644363674414e-05, "loss": 0.4408, "step": 31903 }, { "epoch": 0.6766346418951878, "grad_norm": 0.37512335181236267, "learning_rate": 1.4883353339673898e-05, "loss": 0.4299, "step": 31904 }, { "epoch": 0.6766558503531208, "grad_norm": 0.3669965863227844, "learning_rate": 1.4883062310242155e-05, "loss": 0.4373, "step": 31905 }, { "epoch": 0.6766770588110539, "grad_norm": 0.3376965820789337, "learning_rate": 1.4882771275379517e-05, "loss": 0.4918, "step": 31906 }, { "epoch": 0.6766982672689869, "grad_norm": 0.3281807601451874, "learning_rate": 1.4882480235086302e-05, "loss": 0.4514, "step": 31907 }, { "epoch": 0.6767194757269199, "grad_norm": 0.3827698826789856, "learning_rate": 1.4882189189362834e-05, "loss": 0.5211, "step": 31908 }, { "epoch": 0.6767406841848529, "grad_norm": 0.42115193605422974, "learning_rate": 1.4881898138209436e-05, "loss": 0.5013, "step": 31909 }, { "epoch": 0.676761892642786, "grad_norm": 0.310931921005249, "learning_rate": 1.4881607081626436e-05, "loss": 0.4317, "step": 31910 }, { "epoch": 0.6767831011007189, "grad_norm": 0.331228643655777, "learning_rate": 1.4881316019614154e-05, "loss": 0.5057, "step": 31911 }, { "epoch": 0.676804309558652, "grad_norm": 0.3733862340450287, "learning_rate": 1.4881024952172915e-05, "loss": 0.5375, "step": 31912 }, { "epoch": 0.676825518016585, "grad_norm": 0.38040146231651306, "learning_rate": 1.4880733879303044e-05, "loss": 0.5676, "step": 31913 }, { "epoch": 0.676846726474518, "grad_norm": 0.3462938666343689, "learning_rate": 1.4880442801004859e-05, "loss": 0.4427, "step": 31914 }, { "epoch": 0.676867934932451, "grad_norm": 0.3598534166812897, "learning_rate": 1.4880151717278692e-05, "loss": 0.5331, "step": 31915 }, { "epoch": 0.6768891433903841, "grad_norm": 0.3983961045742035, "learning_rate": 1.487986062812486e-05, "loss": 0.5238, "step": 31916 }, { "epoch": 0.6769103518483172, "grad_norm": 0.3847472369670868, "learning_rate": 1.487956953354369e-05, "loss": 0.5481, "step": 31917 }, { "epoch": 0.6769315603062501, "grad_norm": 0.3851906359195709, "learning_rate": 1.4879278433535506e-05, "loss": 0.5664, "step": 31918 }, { "epoch": 0.6769527687641832, "grad_norm": 0.3802940547466278, "learning_rate": 1.487898732810063e-05, "loss": 0.5042, "step": 31919 }, { "epoch": 0.6769739772221162, "grad_norm": 0.4050510823726654, "learning_rate": 1.4878696217239388e-05, "loss": 0.5165, "step": 31920 }, { "epoch": 0.6769951856800492, "grad_norm": 0.31947848200798035, "learning_rate": 1.4878405100952102e-05, "loss": 0.4499, "step": 31921 }, { "epoch": 0.6770163941379822, "grad_norm": 0.3557001054286957, "learning_rate": 1.4878113979239093e-05, "loss": 0.492, "step": 31922 }, { "epoch": 0.6770376025959153, "grad_norm": 0.36890220642089844, "learning_rate": 1.4877822852100691e-05, "loss": 0.4991, "step": 31923 }, { "epoch": 0.6770588110538482, "grad_norm": 0.4023647904396057, "learning_rate": 1.4877531719537217e-05, "loss": 0.505, "step": 31924 }, { "epoch": 0.6770800195117813, "grad_norm": 0.36970216035842896, "learning_rate": 1.4877240581548993e-05, "loss": 0.5452, "step": 31925 }, { "epoch": 0.6771012279697143, "grad_norm": 0.39852553606033325, "learning_rate": 1.4876949438136348e-05, "loss": 0.5447, "step": 31926 }, { "epoch": 0.6771224364276474, "grad_norm": 0.3586265742778778, "learning_rate": 1.4876658289299601e-05, "loss": 0.5611, "step": 31927 }, { "epoch": 0.6771436448855803, "grad_norm": 0.3483467102050781, "learning_rate": 1.4876367135039073e-05, "loss": 0.4952, "step": 31928 }, { "epoch": 0.6771648533435134, "grad_norm": 0.3518883287906647, "learning_rate": 1.4876075975355093e-05, "loss": 0.5549, "step": 31929 }, { "epoch": 0.6771860618014465, "grad_norm": 0.3223027288913727, "learning_rate": 1.4875784810247989e-05, "loss": 0.4981, "step": 31930 }, { "epoch": 0.6772072702593794, "grad_norm": 0.3783951997756958, "learning_rate": 1.4875493639718073e-05, "loss": 0.5119, "step": 31931 }, { "epoch": 0.6772284787173125, "grad_norm": 0.3857550621032715, "learning_rate": 1.4875202463765678e-05, "loss": 0.5873, "step": 31932 }, { "epoch": 0.6772496871752455, "grad_norm": 0.35220661759376526, "learning_rate": 1.4874911282391127e-05, "loss": 0.4141, "step": 31933 }, { "epoch": 0.6772708956331785, "grad_norm": 0.3357587158679962, "learning_rate": 1.487462009559474e-05, "loss": 0.417, "step": 31934 }, { "epoch": 0.6772921040911115, "grad_norm": 0.349433034658432, "learning_rate": 1.4874328903376843e-05, "loss": 0.443, "step": 31935 }, { "epoch": 0.6773133125490446, "grad_norm": 0.39226213097572327, "learning_rate": 1.4874037705737758e-05, "loss": 0.4827, "step": 31936 }, { "epoch": 0.6773345210069776, "grad_norm": 0.362977534532547, "learning_rate": 1.4873746502677813e-05, "loss": 0.5078, "step": 31937 }, { "epoch": 0.6773557294649106, "grad_norm": 0.3464064598083496, "learning_rate": 1.487345529419733e-05, "loss": 0.5219, "step": 31938 }, { "epoch": 0.6773769379228436, "grad_norm": 0.3960304260253906, "learning_rate": 1.4873164080296631e-05, "loss": 0.4921, "step": 31939 }, { "epoch": 0.6773981463807767, "grad_norm": 0.35811400413513184, "learning_rate": 1.4872872860976043e-05, "loss": 0.4333, "step": 31940 }, { "epoch": 0.6774193548387096, "grad_norm": 0.36311841011047363, "learning_rate": 1.4872581636235885e-05, "loss": 0.5401, "step": 31941 }, { "epoch": 0.6774405632966427, "grad_norm": 0.35263025760650635, "learning_rate": 1.4872290406076486e-05, "loss": 0.496, "step": 31942 }, { "epoch": 0.6774617717545757, "grad_norm": 0.3852806091308594, "learning_rate": 1.4871999170498167e-05, "loss": 0.5411, "step": 31943 }, { "epoch": 0.6774829802125087, "grad_norm": 0.3552277982234955, "learning_rate": 1.4871707929501255e-05, "loss": 0.4524, "step": 31944 }, { "epoch": 0.6775041886704418, "grad_norm": 0.38716524839401245, "learning_rate": 1.4871416683086068e-05, "loss": 0.48, "step": 31945 }, { "epoch": 0.6775253971283748, "grad_norm": 0.3395322263240814, "learning_rate": 1.4871125431252938e-05, "loss": 0.4566, "step": 31946 }, { "epoch": 0.6775466055863079, "grad_norm": 0.37259185314178467, "learning_rate": 1.4870834174002182e-05, "loss": 0.5023, "step": 31947 }, { "epoch": 0.6775678140442408, "grad_norm": 0.31916195154190063, "learning_rate": 1.4870542911334128e-05, "loss": 0.4533, "step": 31948 }, { "epoch": 0.6775890225021739, "grad_norm": 0.32134637236595154, "learning_rate": 1.4870251643249097e-05, "loss": 0.4171, "step": 31949 }, { "epoch": 0.6776102309601069, "grad_norm": 0.31628161668777466, "learning_rate": 1.4869960369747414e-05, "loss": 0.4983, "step": 31950 }, { "epoch": 0.6776314394180399, "grad_norm": 0.33184975385665894, "learning_rate": 1.4869669090829404e-05, "loss": 0.4418, "step": 31951 }, { "epoch": 0.6776526478759729, "grad_norm": 0.39837175607681274, "learning_rate": 1.4869377806495393e-05, "loss": 0.4542, "step": 31952 }, { "epoch": 0.677673856333906, "grad_norm": 0.3396870195865631, "learning_rate": 1.4869086516745698e-05, "loss": 0.4696, "step": 31953 }, { "epoch": 0.6776950647918389, "grad_norm": 0.3837229311466217, "learning_rate": 1.486879522158065e-05, "loss": 0.4875, "step": 31954 }, { "epoch": 0.677716273249772, "grad_norm": 0.3303116261959076, "learning_rate": 1.486850392100057e-05, "loss": 0.5489, "step": 31955 }, { "epoch": 0.677737481707705, "grad_norm": 0.3793686032295227, "learning_rate": 1.4868212615005782e-05, "loss": 0.5411, "step": 31956 }, { "epoch": 0.6777586901656381, "grad_norm": 0.3669840097427368, "learning_rate": 1.4867921303596608e-05, "loss": 0.4984, "step": 31957 }, { "epoch": 0.6777798986235711, "grad_norm": 0.34398919343948364, "learning_rate": 1.4867629986773379e-05, "loss": 0.5084, "step": 31958 }, { "epoch": 0.6778011070815041, "grad_norm": 0.37973591685295105, "learning_rate": 1.486733866453641e-05, "loss": 0.5275, "step": 31959 }, { "epoch": 0.6778223155394372, "grad_norm": 0.34525126218795776, "learning_rate": 1.4867047336886032e-05, "loss": 0.5916, "step": 31960 }, { "epoch": 0.6778435239973701, "grad_norm": 0.41047123074531555, "learning_rate": 1.4866756003822564e-05, "loss": 0.529, "step": 31961 }, { "epoch": 0.6778647324553032, "grad_norm": 0.38714882731437683, "learning_rate": 1.4866464665346332e-05, "loss": 0.463, "step": 31962 }, { "epoch": 0.6778859409132362, "grad_norm": 0.36628854274749756, "learning_rate": 1.4866173321457661e-05, "loss": 0.4649, "step": 31963 }, { "epoch": 0.6779071493711692, "grad_norm": 0.4188278019428253, "learning_rate": 1.4865881972156875e-05, "loss": 0.5367, "step": 31964 }, { "epoch": 0.6779283578291022, "grad_norm": 0.36415648460388184, "learning_rate": 1.4865590617444299e-05, "loss": 0.5168, "step": 31965 }, { "epoch": 0.6779495662870353, "grad_norm": 0.33359187841415405, "learning_rate": 1.4865299257320253e-05, "loss": 0.4884, "step": 31966 }, { "epoch": 0.6779707747449683, "grad_norm": 0.35620689392089844, "learning_rate": 1.4865007891785064e-05, "loss": 0.4983, "step": 31967 }, { "epoch": 0.6779919832029013, "grad_norm": 0.4337049722671509, "learning_rate": 1.4864716520839054e-05, "loss": 0.4653, "step": 31968 }, { "epoch": 0.6780131916608343, "grad_norm": 0.34910711646080017, "learning_rate": 1.486442514448255e-05, "loss": 0.5406, "step": 31969 }, { "epoch": 0.6780344001187674, "grad_norm": 0.3491210639476776, "learning_rate": 1.4864133762715873e-05, "loss": 0.5265, "step": 31970 }, { "epoch": 0.6780556085767004, "grad_norm": 0.4658852219581604, "learning_rate": 1.4863842375539352e-05, "loss": 0.4323, "step": 31971 }, { "epoch": 0.6780768170346334, "grad_norm": 0.37571242451667786, "learning_rate": 1.4863550982953308e-05, "loss": 0.4865, "step": 31972 }, { "epoch": 0.6780980254925665, "grad_norm": 0.3770710229873657, "learning_rate": 1.4863259584958062e-05, "loss": 0.4646, "step": 31973 }, { "epoch": 0.6781192339504994, "grad_norm": 0.4742129445075989, "learning_rate": 1.486296818155394e-05, "loss": 0.5586, "step": 31974 }, { "epoch": 0.6781404424084325, "grad_norm": 0.3503362536430359, "learning_rate": 1.486267677274127e-05, "loss": 0.541, "step": 31975 }, { "epoch": 0.6781616508663655, "grad_norm": 0.324108749628067, "learning_rate": 1.486238535852037e-05, "loss": 0.4729, "step": 31976 }, { "epoch": 0.6781828593242986, "grad_norm": 0.6003075838088989, "learning_rate": 1.486209393889157e-05, "loss": 0.4475, "step": 31977 }, { "epoch": 0.6782040677822315, "grad_norm": 0.32871222496032715, "learning_rate": 1.486180251385519e-05, "loss": 0.4993, "step": 31978 }, { "epoch": 0.6782252762401646, "grad_norm": 0.35784927010536194, "learning_rate": 1.4861511083411558e-05, "loss": 0.5363, "step": 31979 }, { "epoch": 0.6782464846980976, "grad_norm": 0.3604305684566498, "learning_rate": 1.4861219647560992e-05, "loss": 0.5407, "step": 31980 }, { "epoch": 0.6782676931560306, "grad_norm": 0.3312554657459259, "learning_rate": 1.4860928206303823e-05, "loss": 0.4262, "step": 31981 }, { "epoch": 0.6782889016139636, "grad_norm": 0.3367413580417633, "learning_rate": 1.4860636759640368e-05, "loss": 0.4336, "step": 31982 }, { "epoch": 0.6783101100718967, "grad_norm": 0.35447174310684204, "learning_rate": 1.4860345307570959e-05, "loss": 0.5186, "step": 31983 }, { "epoch": 0.6783313185298296, "grad_norm": 0.34462881088256836, "learning_rate": 1.4860053850095912e-05, "loss": 0.4702, "step": 31984 }, { "epoch": 0.6783525269877627, "grad_norm": 0.34841740131378174, "learning_rate": 1.485976238721556e-05, "loss": 0.5104, "step": 31985 }, { "epoch": 0.6783737354456958, "grad_norm": 0.5441340208053589, "learning_rate": 1.4859470918930219e-05, "loss": 0.5188, "step": 31986 }, { "epoch": 0.6783949439036288, "grad_norm": 0.4357331693172455, "learning_rate": 1.4859179445240217e-05, "loss": 0.5211, "step": 31987 }, { "epoch": 0.6784161523615618, "grad_norm": 0.3275495767593384, "learning_rate": 1.4858887966145878e-05, "loss": 0.5044, "step": 31988 }, { "epoch": 0.6784373608194948, "grad_norm": 0.45555579662323, "learning_rate": 1.485859648164753e-05, "loss": 0.5398, "step": 31989 }, { "epoch": 0.6784585692774279, "grad_norm": 0.34037795662879944, "learning_rate": 1.485830499174549e-05, "loss": 0.5176, "step": 31990 }, { "epoch": 0.6784797777353608, "grad_norm": 0.3477896749973297, "learning_rate": 1.4858013496440084e-05, "loss": 0.4267, "step": 31991 }, { "epoch": 0.6785009861932939, "grad_norm": 0.4398662745952606, "learning_rate": 1.4857721995731637e-05, "loss": 0.4506, "step": 31992 }, { "epoch": 0.6785221946512269, "grad_norm": 0.3572339117527008, "learning_rate": 1.4857430489620476e-05, "loss": 0.5175, "step": 31993 }, { "epoch": 0.67854340310916, "grad_norm": 0.35989630222320557, "learning_rate": 1.4857138978106925e-05, "loss": 0.4737, "step": 31994 }, { "epoch": 0.6785646115670929, "grad_norm": 0.3735528588294983, "learning_rate": 1.4856847461191303e-05, "loss": 0.6103, "step": 31995 }, { "epoch": 0.678585820025026, "grad_norm": 0.37364670634269714, "learning_rate": 1.4856555938873937e-05, "loss": 0.5387, "step": 31996 }, { "epoch": 0.678607028482959, "grad_norm": 0.37314924597740173, "learning_rate": 1.4856264411155156e-05, "loss": 0.454, "step": 31997 }, { "epoch": 0.678628236940892, "grad_norm": 0.36230340600013733, "learning_rate": 1.4855972878035276e-05, "loss": 0.5081, "step": 31998 }, { "epoch": 0.6786494453988251, "grad_norm": 0.3855917453765869, "learning_rate": 1.4855681339514624e-05, "loss": 0.536, "step": 31999 }, { "epoch": 0.6786706538567581, "grad_norm": 0.4052799940109253, "learning_rate": 1.4855389795593529e-05, "loss": 0.4212, "step": 32000 }, { "epoch": 0.6786918623146911, "grad_norm": 0.36830228567123413, "learning_rate": 1.4855098246272311e-05, "loss": 0.5003, "step": 32001 }, { "epoch": 0.6787130707726241, "grad_norm": 0.3434827923774719, "learning_rate": 1.4854806691551293e-05, "loss": 0.4377, "step": 32002 }, { "epoch": 0.6787342792305572, "grad_norm": 0.47031843662261963, "learning_rate": 1.4854515131430805e-05, "loss": 0.5082, "step": 32003 }, { "epoch": 0.6787554876884901, "grad_norm": 0.4589504599571228, "learning_rate": 1.485422356591116e-05, "loss": 0.422, "step": 32004 }, { "epoch": 0.6787766961464232, "grad_norm": 0.4841410219669342, "learning_rate": 1.4853931994992698e-05, "loss": 0.4939, "step": 32005 }, { "epoch": 0.6787979046043562, "grad_norm": 0.5781131386756897, "learning_rate": 1.485364041867573e-05, "loss": 0.4792, "step": 32006 }, { "epoch": 0.6788191130622893, "grad_norm": 0.32792577147483826, "learning_rate": 1.4853348836960586e-05, "loss": 0.4513, "step": 32007 }, { "epoch": 0.6788403215202222, "grad_norm": 0.39053672552108765, "learning_rate": 1.4853057249847592e-05, "loss": 0.4805, "step": 32008 }, { "epoch": 0.6788615299781553, "grad_norm": 0.376697301864624, "learning_rate": 1.485276565733707e-05, "loss": 0.4707, "step": 32009 }, { "epoch": 0.6788827384360883, "grad_norm": 0.37261345982551575, "learning_rate": 1.485247405942934e-05, "loss": 0.4541, "step": 32010 }, { "epoch": 0.6789039468940213, "grad_norm": 0.3792981803417206, "learning_rate": 1.4852182456124732e-05, "loss": 0.4842, "step": 32011 }, { "epoch": 0.6789251553519544, "grad_norm": 0.4028254747390747, "learning_rate": 1.4851890847423572e-05, "loss": 0.5136, "step": 32012 }, { "epoch": 0.6789463638098874, "grad_norm": 0.35188934206962585, "learning_rate": 1.4851599233326177e-05, "loss": 0.4568, "step": 32013 }, { "epoch": 0.6789675722678205, "grad_norm": 0.40496519207954407, "learning_rate": 1.4851307613832876e-05, "loss": 0.5191, "step": 32014 }, { "epoch": 0.6789887807257534, "grad_norm": 0.33490288257598877, "learning_rate": 1.4851015988943995e-05, "loss": 0.4765, "step": 32015 }, { "epoch": 0.6790099891836865, "grad_norm": 0.35563746094703674, "learning_rate": 1.4850724358659856e-05, "loss": 0.4764, "step": 32016 }, { "epoch": 0.6790311976416195, "grad_norm": 0.319955974817276, "learning_rate": 1.4850432722980783e-05, "loss": 0.4781, "step": 32017 }, { "epoch": 0.6790524060995525, "grad_norm": 0.4197269082069397, "learning_rate": 1.48501410819071e-05, "loss": 0.5066, "step": 32018 }, { "epoch": 0.6790736145574855, "grad_norm": 0.32602089643478394, "learning_rate": 1.4849849435439132e-05, "loss": 0.431, "step": 32019 }, { "epoch": 0.6790948230154186, "grad_norm": 0.37109047174453735, "learning_rate": 1.4849557783577206e-05, "loss": 0.4916, "step": 32020 }, { "epoch": 0.6791160314733515, "grad_norm": 0.34728333353996277, "learning_rate": 1.4849266126321643e-05, "loss": 0.4788, "step": 32021 }, { "epoch": 0.6791372399312846, "grad_norm": 0.31433528661727905, "learning_rate": 1.4848974463672768e-05, "loss": 0.4974, "step": 32022 }, { "epoch": 0.6791584483892176, "grad_norm": 0.34203505516052246, "learning_rate": 1.4848682795630907e-05, "loss": 0.3915, "step": 32023 }, { "epoch": 0.6791796568471506, "grad_norm": 0.358999103307724, "learning_rate": 1.484839112219638e-05, "loss": 0.4841, "step": 32024 }, { "epoch": 0.6792008653050836, "grad_norm": 0.4872410297393799, "learning_rate": 1.4848099443369515e-05, "loss": 0.5084, "step": 32025 }, { "epoch": 0.6792220737630167, "grad_norm": 0.35951146483421326, "learning_rate": 1.4847807759150636e-05, "loss": 0.4964, "step": 32026 }, { "epoch": 0.6792432822209498, "grad_norm": 0.37220823764801025, "learning_rate": 1.4847516069540069e-05, "loss": 0.4685, "step": 32027 }, { "epoch": 0.6792644906788827, "grad_norm": 0.38095781207084656, "learning_rate": 1.4847224374538137e-05, "loss": 0.5623, "step": 32028 }, { "epoch": 0.6792856991368158, "grad_norm": 0.3650440275669098, "learning_rate": 1.4846932674145162e-05, "loss": 0.4154, "step": 32029 }, { "epoch": 0.6793069075947488, "grad_norm": 0.37948620319366455, "learning_rate": 1.4846640968361473e-05, "loss": 0.5584, "step": 32030 }, { "epoch": 0.6793281160526818, "grad_norm": 0.3527291715145111, "learning_rate": 1.484634925718739e-05, "loss": 0.5119, "step": 32031 }, { "epoch": 0.6793493245106148, "grad_norm": 0.33033475279808044, "learning_rate": 1.484605754062324e-05, "loss": 0.4571, "step": 32032 }, { "epoch": 0.6793705329685479, "grad_norm": 0.35496193170547485, "learning_rate": 1.4845765818669348e-05, "loss": 0.4802, "step": 32033 }, { "epoch": 0.6793917414264808, "grad_norm": 0.3877480924129486, "learning_rate": 1.4845474091326038e-05, "loss": 0.4642, "step": 32034 }, { "epoch": 0.6794129498844139, "grad_norm": 0.3679034113883972, "learning_rate": 1.484518235859363e-05, "loss": 0.5677, "step": 32035 }, { "epoch": 0.6794341583423469, "grad_norm": 0.4679988622665405, "learning_rate": 1.4844890620472456e-05, "loss": 0.5272, "step": 32036 }, { "epoch": 0.67945536680028, "grad_norm": 0.33979591727256775, "learning_rate": 1.4844598876962837e-05, "loss": 0.5061, "step": 32037 }, { "epoch": 0.6794765752582129, "grad_norm": 0.5965592265129089, "learning_rate": 1.4844307128065092e-05, "loss": 0.4482, "step": 32038 }, { "epoch": 0.679497783716146, "grad_norm": 0.35344332456588745, "learning_rate": 1.4844015373779556e-05, "loss": 0.529, "step": 32039 }, { "epoch": 0.6795189921740791, "grad_norm": 0.37610363960266113, "learning_rate": 1.4843723614106547e-05, "loss": 0.4855, "step": 32040 }, { "epoch": 0.679540200632012, "grad_norm": 0.32016417384147644, "learning_rate": 1.4843431849046388e-05, "loss": 0.4243, "step": 32041 }, { "epoch": 0.6795614090899451, "grad_norm": 0.3747832477092743, "learning_rate": 1.4843140078599412e-05, "loss": 0.5294, "step": 32042 }, { "epoch": 0.6795826175478781, "grad_norm": 0.3753858804702759, "learning_rate": 1.4842848302765931e-05, "loss": 0.4688, "step": 32043 }, { "epoch": 0.6796038260058112, "grad_norm": 0.3424992263317108, "learning_rate": 1.4842556521546279e-05, "loss": 0.5009, "step": 32044 }, { "epoch": 0.6796250344637441, "grad_norm": 0.6392465233802795, "learning_rate": 1.484226473494078e-05, "loss": 0.5645, "step": 32045 }, { "epoch": 0.6796462429216772, "grad_norm": 0.36610904335975647, "learning_rate": 1.4841972942949752e-05, "loss": 0.4834, "step": 32046 }, { "epoch": 0.6796674513796102, "grad_norm": 0.36446473002433777, "learning_rate": 1.4841681145573525e-05, "loss": 0.5995, "step": 32047 }, { "epoch": 0.6796886598375432, "grad_norm": 0.3230355978012085, "learning_rate": 1.4841389342812424e-05, "loss": 0.451, "step": 32048 }, { "epoch": 0.6797098682954762, "grad_norm": 0.3984319567680359, "learning_rate": 1.4841097534666772e-05, "loss": 0.4895, "step": 32049 }, { "epoch": 0.6797310767534093, "grad_norm": 0.3717949688434601, "learning_rate": 1.4840805721136893e-05, "loss": 0.4741, "step": 32050 }, { "epoch": 0.6797522852113422, "grad_norm": 0.4011231064796448, "learning_rate": 1.484051390222311e-05, "loss": 0.5135, "step": 32051 }, { "epoch": 0.6797734936692753, "grad_norm": 7.947997570037842, "learning_rate": 1.484022207792575e-05, "loss": 0.5184, "step": 32052 }, { "epoch": 0.6797947021272084, "grad_norm": 0.34894952178001404, "learning_rate": 1.483993024824514e-05, "loss": 0.5141, "step": 32053 }, { "epoch": 0.6798159105851413, "grad_norm": 0.43116527795791626, "learning_rate": 1.48396384131816e-05, "loss": 0.4556, "step": 32054 }, { "epoch": 0.6798371190430744, "grad_norm": 0.37232720851898193, "learning_rate": 1.4839346572735455e-05, "loss": 0.4924, "step": 32055 }, { "epoch": 0.6798583275010074, "grad_norm": 0.32917115092277527, "learning_rate": 1.4839054726907032e-05, "loss": 0.324, "step": 32056 }, { "epoch": 0.6798795359589405, "grad_norm": 0.33604317903518677, "learning_rate": 1.4838762875696653e-05, "loss": 0.5512, "step": 32057 }, { "epoch": 0.6799007444168734, "grad_norm": 0.40098097920417786, "learning_rate": 1.4838471019104643e-05, "loss": 0.5251, "step": 32058 }, { "epoch": 0.6799219528748065, "grad_norm": 0.3624286353588104, "learning_rate": 1.4838179157131331e-05, "loss": 0.6503, "step": 32059 }, { "epoch": 0.6799431613327395, "grad_norm": 0.30528199672698975, "learning_rate": 1.4837887289777036e-05, "loss": 0.4413, "step": 32060 }, { "epoch": 0.6799643697906725, "grad_norm": 0.36755961179733276, "learning_rate": 1.4837595417042086e-05, "loss": 0.463, "step": 32061 }, { "epoch": 0.6799855782486055, "grad_norm": 0.3557108938694, "learning_rate": 1.4837303538926804e-05, "loss": 0.4637, "step": 32062 }, { "epoch": 0.6800067867065386, "grad_norm": 0.33225446939468384, "learning_rate": 1.4837011655431515e-05, "loss": 0.4728, "step": 32063 }, { "epoch": 0.6800279951644715, "grad_norm": 0.4012733995914459, "learning_rate": 1.4836719766556542e-05, "loss": 0.5429, "step": 32064 }, { "epoch": 0.6800492036224046, "grad_norm": 0.3637585937976837, "learning_rate": 1.4836427872302213e-05, "loss": 0.5304, "step": 32065 }, { "epoch": 0.6800704120803377, "grad_norm": 0.35607102513313293, "learning_rate": 1.4836135972668851e-05, "loss": 0.4432, "step": 32066 }, { "epoch": 0.6800916205382707, "grad_norm": 0.36335012316703796, "learning_rate": 1.483584406765678e-05, "loss": 0.5163, "step": 32067 }, { "epoch": 0.6801128289962037, "grad_norm": 0.37062713503837585, "learning_rate": 1.4835552157266327e-05, "loss": 0.5055, "step": 32068 }, { "epoch": 0.6801340374541367, "grad_norm": 0.38470762968063354, "learning_rate": 1.4835260241497813e-05, "loss": 0.5825, "step": 32069 }, { "epoch": 0.6801552459120698, "grad_norm": 0.3480818271636963, "learning_rate": 1.483496832035156e-05, "loss": 0.4389, "step": 32070 }, { "epoch": 0.6801764543700027, "grad_norm": 0.3224277198314667, "learning_rate": 1.4834676393827904e-05, "loss": 0.475, "step": 32071 }, { "epoch": 0.6801976628279358, "grad_norm": 0.4433680474758148, "learning_rate": 1.4834384461927162e-05, "loss": 0.4634, "step": 32072 }, { "epoch": 0.6802188712858688, "grad_norm": 0.3931924104690552, "learning_rate": 1.4834092524649659e-05, "loss": 0.5099, "step": 32073 }, { "epoch": 0.6802400797438019, "grad_norm": 0.3842540681362152, "learning_rate": 1.4833800581995718e-05, "loss": 0.4865, "step": 32074 }, { "epoch": 0.6802612882017348, "grad_norm": 0.4142497479915619, "learning_rate": 1.483350863396567e-05, "loss": 0.4942, "step": 32075 }, { "epoch": 0.6802824966596679, "grad_norm": 0.3685329258441925, "learning_rate": 1.4833216680559832e-05, "loss": 0.426, "step": 32076 }, { "epoch": 0.6803037051176009, "grad_norm": 0.3730786442756653, "learning_rate": 1.4832924721778537e-05, "loss": 0.4386, "step": 32077 }, { "epoch": 0.6803249135755339, "grad_norm": 0.323635458946228, "learning_rate": 1.4832632757622101e-05, "loss": 0.4325, "step": 32078 }, { "epoch": 0.6803461220334669, "grad_norm": 0.3572860360145569, "learning_rate": 1.4832340788090855e-05, "loss": 0.5308, "step": 32079 }, { "epoch": 0.6803673304914, "grad_norm": 0.36167576909065247, "learning_rate": 1.4832048813185119e-05, "loss": 0.4371, "step": 32080 }, { "epoch": 0.680388538949333, "grad_norm": 0.42486390471458435, "learning_rate": 1.4831756832905223e-05, "loss": 0.5445, "step": 32081 }, { "epoch": 0.680409747407266, "grad_norm": 0.45664718747138977, "learning_rate": 1.4831464847251489e-05, "loss": 0.4934, "step": 32082 }, { "epoch": 0.6804309558651991, "grad_norm": 0.40310871601104736, "learning_rate": 1.4831172856224237e-05, "loss": 0.5563, "step": 32083 }, { "epoch": 0.680452164323132, "grad_norm": 0.47395071387290955, "learning_rate": 1.48308808598238e-05, "loss": 0.5394, "step": 32084 }, { "epoch": 0.6804733727810651, "grad_norm": 0.4404654800891876, "learning_rate": 1.48305888580505e-05, "loss": 0.5064, "step": 32085 }, { "epoch": 0.6804945812389981, "grad_norm": 0.3651242256164551, "learning_rate": 1.483029685090466e-05, "loss": 0.5256, "step": 32086 }, { "epoch": 0.6805157896969312, "grad_norm": 0.3953164219856262, "learning_rate": 1.4830004838386609e-05, "loss": 0.57, "step": 32087 }, { "epoch": 0.6805369981548641, "grad_norm": 0.3789040446281433, "learning_rate": 1.4829712820496665e-05, "loss": 0.4374, "step": 32088 }, { "epoch": 0.6805582066127972, "grad_norm": 0.3088158667087555, "learning_rate": 1.4829420797235154e-05, "loss": 0.4177, "step": 32089 }, { "epoch": 0.6805794150707302, "grad_norm": 0.33288124203681946, "learning_rate": 1.4829128768602407e-05, "loss": 0.5036, "step": 32090 }, { "epoch": 0.6806006235286632, "grad_norm": 0.31438618898391724, "learning_rate": 1.4828836734598745e-05, "loss": 0.5151, "step": 32091 }, { "epoch": 0.6806218319865962, "grad_norm": 0.33170056343078613, "learning_rate": 1.4828544695224491e-05, "loss": 0.5083, "step": 32092 }, { "epoch": 0.6806430404445293, "grad_norm": 0.37721705436706543, "learning_rate": 1.4828252650479975e-05, "loss": 0.4793, "step": 32093 }, { "epoch": 0.6806642489024624, "grad_norm": 0.3326786160469055, "learning_rate": 1.4827960600365515e-05, "loss": 0.5257, "step": 32094 }, { "epoch": 0.6806854573603953, "grad_norm": 0.348574697971344, "learning_rate": 1.482766854488144e-05, "loss": 0.5433, "step": 32095 }, { "epoch": 0.6807066658183284, "grad_norm": 0.32579556107521057, "learning_rate": 1.4827376484028073e-05, "loss": 0.459, "step": 32096 }, { "epoch": 0.6807278742762614, "grad_norm": 0.36521464586257935, "learning_rate": 1.4827084417805742e-05, "loss": 0.4468, "step": 32097 }, { "epoch": 0.6807490827341944, "grad_norm": 0.3540935814380646, "learning_rate": 1.4826792346214766e-05, "loss": 0.5272, "step": 32098 }, { "epoch": 0.6807702911921274, "grad_norm": 0.43442434072494507, "learning_rate": 1.482650026925548e-05, "loss": 0.5391, "step": 32099 }, { "epoch": 0.6807914996500605, "grad_norm": 0.41029083728790283, "learning_rate": 1.4826208186928197e-05, "loss": 0.5449, "step": 32100 }, { "epoch": 0.6808127081079934, "grad_norm": 0.3554363548755646, "learning_rate": 1.4825916099233249e-05, "loss": 0.5079, "step": 32101 }, { "epoch": 0.6808339165659265, "grad_norm": 0.388143926858902, "learning_rate": 1.4825624006170958e-05, "loss": 0.514, "step": 32102 }, { "epoch": 0.6808551250238595, "grad_norm": 0.31172823905944824, "learning_rate": 1.4825331907741648e-05, "loss": 0.4641, "step": 32103 }, { "epoch": 0.6808763334817926, "grad_norm": 0.34302711486816406, "learning_rate": 1.482503980394565e-05, "loss": 0.4357, "step": 32104 }, { "epoch": 0.6808975419397255, "grad_norm": 0.363835871219635, "learning_rate": 1.4824747694783284e-05, "loss": 0.5098, "step": 32105 }, { "epoch": 0.6809187503976586, "grad_norm": 0.501274824142456, "learning_rate": 1.4824455580254873e-05, "loss": 0.4451, "step": 32106 }, { "epoch": 0.6809399588555917, "grad_norm": 0.35555633902549744, "learning_rate": 1.4824163460360748e-05, "loss": 0.4868, "step": 32107 }, { "epoch": 0.6809611673135246, "grad_norm": 0.36144259572029114, "learning_rate": 1.4823871335101226e-05, "loss": 0.5122, "step": 32108 }, { "epoch": 0.6809823757714577, "grad_norm": 0.35769012570381165, "learning_rate": 1.4823579204476638e-05, "loss": 0.4401, "step": 32109 }, { "epoch": 0.6810035842293907, "grad_norm": 0.293241024017334, "learning_rate": 1.4823287068487307e-05, "loss": 0.4438, "step": 32110 }, { "epoch": 0.6810247926873237, "grad_norm": 0.370311975479126, "learning_rate": 1.4822994927133559e-05, "loss": 0.5341, "step": 32111 }, { "epoch": 0.6810460011452567, "grad_norm": 0.39543694257736206, "learning_rate": 1.4822702780415717e-05, "loss": 0.4225, "step": 32112 }, { "epoch": 0.6810672096031898, "grad_norm": 0.3958241641521454, "learning_rate": 1.4822410628334107e-05, "loss": 0.4766, "step": 32113 }, { "epoch": 0.6810884180611227, "grad_norm": 0.34277671575546265, "learning_rate": 1.4822118470889054e-05, "loss": 0.5098, "step": 32114 }, { "epoch": 0.6811096265190558, "grad_norm": 0.3454163372516632, "learning_rate": 1.4821826308080883e-05, "loss": 0.4896, "step": 32115 }, { "epoch": 0.6811308349769888, "grad_norm": 0.42188194394111633, "learning_rate": 1.4821534139909917e-05, "loss": 0.4958, "step": 32116 }, { "epoch": 0.6811520434349219, "grad_norm": 0.3182724118232727, "learning_rate": 1.4821241966376483e-05, "loss": 0.442, "step": 32117 }, { "epoch": 0.6811732518928548, "grad_norm": 0.40064263343811035, "learning_rate": 1.4820949787480906e-05, "loss": 0.5541, "step": 32118 }, { "epoch": 0.6811944603507879, "grad_norm": 0.5659735798835754, "learning_rate": 1.4820657603223513e-05, "loss": 0.5829, "step": 32119 }, { "epoch": 0.6812156688087209, "grad_norm": 0.3424190282821655, "learning_rate": 1.4820365413604625e-05, "loss": 0.4427, "step": 32120 }, { "epoch": 0.6812368772666539, "grad_norm": 0.3752221465110779, "learning_rate": 1.4820073218624567e-05, "loss": 0.5832, "step": 32121 }, { "epoch": 0.681258085724587, "grad_norm": 0.3846334218978882, "learning_rate": 1.4819781018283668e-05, "loss": 0.4723, "step": 32122 }, { "epoch": 0.68127929418252, "grad_norm": 0.4202343821525574, "learning_rate": 1.4819488812582249e-05, "loss": 0.4404, "step": 32123 }, { "epoch": 0.681300502640453, "grad_norm": 0.32692450284957886, "learning_rate": 1.4819196601520635e-05, "loss": 0.4998, "step": 32124 }, { "epoch": 0.681321711098386, "grad_norm": 0.3208681643009186, "learning_rate": 1.4818904385099153e-05, "loss": 0.3853, "step": 32125 }, { "epoch": 0.6813429195563191, "grad_norm": 0.32427939772605896, "learning_rate": 1.4818612163318129e-05, "loss": 0.5266, "step": 32126 }, { "epoch": 0.6813641280142521, "grad_norm": 0.39474424719810486, "learning_rate": 1.4818319936177885e-05, "loss": 0.4826, "step": 32127 }, { "epoch": 0.6813853364721851, "grad_norm": 0.3863750100135803, "learning_rate": 1.4818027703678745e-05, "loss": 0.4601, "step": 32128 }, { "epoch": 0.6814065449301181, "grad_norm": 0.3208330571651459, "learning_rate": 1.481773546582104e-05, "loss": 0.4546, "step": 32129 }, { "epoch": 0.6814277533880512, "grad_norm": 0.3608415424823761, "learning_rate": 1.4817443222605094e-05, "loss": 0.4528, "step": 32130 }, { "epoch": 0.6814489618459841, "grad_norm": 0.35455113649368286, "learning_rate": 1.4817150974031225e-05, "loss": 0.3756, "step": 32131 }, { "epoch": 0.6814701703039172, "grad_norm": 0.3452363908290863, "learning_rate": 1.4816858720099765e-05, "loss": 0.4236, "step": 32132 }, { "epoch": 0.6814913787618502, "grad_norm": 0.3449961841106415, "learning_rate": 1.4816566460811038e-05, "loss": 0.5025, "step": 32133 }, { "epoch": 0.6815125872197833, "grad_norm": 0.3479604125022888, "learning_rate": 1.4816274196165361e-05, "loss": 0.5542, "step": 32134 }, { "epoch": 0.6815337956777163, "grad_norm": 0.3557599186897278, "learning_rate": 1.481598192616307e-05, "loss": 0.4349, "step": 32135 }, { "epoch": 0.6815550041356493, "grad_norm": 0.3366803526878357, "learning_rate": 1.4815689650804488e-05, "loss": 0.5452, "step": 32136 }, { "epoch": 0.6815762125935824, "grad_norm": 0.3561885356903076, "learning_rate": 1.4815397370089932e-05, "loss": 0.4353, "step": 32137 }, { "epoch": 0.6815974210515153, "grad_norm": 0.36203533411026, "learning_rate": 1.481510508401974e-05, "loss": 0.5024, "step": 32138 }, { "epoch": 0.6816186295094484, "grad_norm": 0.41984790563583374, "learning_rate": 1.4814812792594225e-05, "loss": 0.5651, "step": 32139 }, { "epoch": 0.6816398379673814, "grad_norm": 0.36545878648757935, "learning_rate": 1.4814520495813715e-05, "loss": 0.5107, "step": 32140 }, { "epoch": 0.6816610464253144, "grad_norm": 0.37469619512557983, "learning_rate": 1.4814228193678542e-05, "loss": 0.5724, "step": 32141 }, { "epoch": 0.6816822548832474, "grad_norm": 0.35522887110710144, "learning_rate": 1.4813935886189024e-05, "loss": 0.5257, "step": 32142 }, { "epoch": 0.6817034633411805, "grad_norm": 0.509290874004364, "learning_rate": 1.4813643573345488e-05, "loss": 0.485, "step": 32143 }, { "epoch": 0.6817246717991134, "grad_norm": 0.36929836869239807, "learning_rate": 1.4813351255148262e-05, "loss": 0.4434, "step": 32144 }, { "epoch": 0.6817458802570465, "grad_norm": 0.34643518924713135, "learning_rate": 1.4813058931597666e-05, "loss": 0.5071, "step": 32145 }, { "epoch": 0.6817670887149795, "grad_norm": 0.32749176025390625, "learning_rate": 1.481276660269403e-05, "loss": 0.5081, "step": 32146 }, { "epoch": 0.6817882971729126, "grad_norm": 0.3811682462692261, "learning_rate": 1.4812474268437673e-05, "loss": 0.477, "step": 32147 }, { "epoch": 0.6818095056308456, "grad_norm": 0.4129648804664612, "learning_rate": 1.4812181928828927e-05, "loss": 0.5514, "step": 32148 }, { "epoch": 0.6818307140887786, "grad_norm": 0.4008437991142273, "learning_rate": 1.4811889583868113e-05, "loss": 0.5156, "step": 32149 }, { "epoch": 0.6818519225467117, "grad_norm": 0.3306157886981964, "learning_rate": 1.481159723355556e-05, "loss": 0.4815, "step": 32150 }, { "epoch": 0.6818731310046446, "grad_norm": 0.40282875299453735, "learning_rate": 1.4811304877891585e-05, "loss": 0.5292, "step": 32151 }, { "epoch": 0.6818943394625777, "grad_norm": 0.41090717911720276, "learning_rate": 1.4811012516876523e-05, "loss": 0.5665, "step": 32152 }, { "epoch": 0.6819155479205107, "grad_norm": 0.36211922764778137, "learning_rate": 1.481072015051069e-05, "loss": 0.5212, "step": 32153 }, { "epoch": 0.6819367563784438, "grad_norm": 0.35514959692955017, "learning_rate": 1.481042777879442e-05, "loss": 0.4934, "step": 32154 }, { "epoch": 0.6819579648363767, "grad_norm": 0.35298267006874084, "learning_rate": 1.4810135401728035e-05, "loss": 0.5452, "step": 32155 }, { "epoch": 0.6819791732943098, "grad_norm": 0.3440067172050476, "learning_rate": 1.4809843019311856e-05, "loss": 0.5322, "step": 32156 }, { "epoch": 0.6820003817522428, "grad_norm": 0.5217160582542419, "learning_rate": 1.4809550631546212e-05, "loss": 0.4521, "step": 32157 }, { "epoch": 0.6820215902101758, "grad_norm": 0.591044008731842, "learning_rate": 1.480925823843143e-05, "loss": 0.4773, "step": 32158 }, { "epoch": 0.6820427986681088, "grad_norm": 0.35572701692581177, "learning_rate": 1.4808965839967831e-05, "loss": 0.5207, "step": 32159 }, { "epoch": 0.6820640071260419, "grad_norm": 0.35077187418937683, "learning_rate": 1.4808673436155739e-05, "loss": 0.5757, "step": 32160 }, { "epoch": 0.6820852155839748, "grad_norm": 0.3409883677959442, "learning_rate": 1.4808381026995486e-05, "loss": 0.5645, "step": 32161 }, { "epoch": 0.6821064240419079, "grad_norm": 0.35524794459342957, "learning_rate": 1.4808088612487392e-05, "loss": 0.5371, "step": 32162 }, { "epoch": 0.682127632499841, "grad_norm": 0.37605032324790955, "learning_rate": 1.4807796192631785e-05, "loss": 0.5097, "step": 32163 }, { "epoch": 0.682148840957774, "grad_norm": 0.3411612808704376, "learning_rate": 1.4807503767428987e-05, "loss": 0.5725, "step": 32164 }, { "epoch": 0.682170049415707, "grad_norm": 0.44776615500450134, "learning_rate": 1.4807211336879326e-05, "loss": 0.4798, "step": 32165 }, { "epoch": 0.68219125787364, "grad_norm": 0.3375033736228943, "learning_rate": 1.4806918900983125e-05, "loss": 0.4469, "step": 32166 }, { "epoch": 0.6822124663315731, "grad_norm": 0.36774200201034546, "learning_rate": 1.4806626459740714e-05, "loss": 0.4177, "step": 32167 }, { "epoch": 0.682233674789506, "grad_norm": 0.5150462985038757, "learning_rate": 1.480633401315241e-05, "loss": 0.501, "step": 32168 }, { "epoch": 0.6822548832474391, "grad_norm": 0.37193259596824646, "learning_rate": 1.4806041561218548e-05, "loss": 0.5036, "step": 32169 }, { "epoch": 0.6822760917053721, "grad_norm": 0.3326205313205719, "learning_rate": 1.4805749103939444e-05, "loss": 0.469, "step": 32170 }, { "epoch": 0.6822973001633051, "grad_norm": 0.3418008089065552, "learning_rate": 1.480545664131543e-05, "loss": 0.5335, "step": 32171 }, { "epoch": 0.6823185086212381, "grad_norm": 0.3757447898387909, "learning_rate": 1.480516417334683e-05, "loss": 0.4831, "step": 32172 }, { "epoch": 0.6823397170791712, "grad_norm": 0.373422235250473, "learning_rate": 1.4804871700033964e-05, "loss": 0.391, "step": 32173 }, { "epoch": 0.6823609255371041, "grad_norm": 0.3460083305835724, "learning_rate": 1.4804579221377165e-05, "loss": 0.3744, "step": 32174 }, { "epoch": 0.6823821339950372, "grad_norm": 0.3252638280391693, "learning_rate": 1.4804286737376755e-05, "loss": 0.4488, "step": 32175 }, { "epoch": 0.6824033424529703, "grad_norm": 0.3296408951282501, "learning_rate": 1.4803994248033056e-05, "loss": 0.5177, "step": 32176 }, { "epoch": 0.6824245509109033, "grad_norm": 0.3215049207210541, "learning_rate": 1.48037017533464e-05, "loss": 0.4923, "step": 32177 }, { "epoch": 0.6824457593688363, "grad_norm": 15.222801208496094, "learning_rate": 1.4803409253317106e-05, "loss": 0.5024, "step": 32178 }, { "epoch": 0.6824669678267693, "grad_norm": 0.4063987135887146, "learning_rate": 1.4803116747945501e-05, "loss": 0.5043, "step": 32179 }, { "epoch": 0.6824881762847024, "grad_norm": 0.3987216651439667, "learning_rate": 1.4802824237231911e-05, "loss": 0.4707, "step": 32180 }, { "epoch": 0.6825093847426353, "grad_norm": 0.34991657733917236, "learning_rate": 1.4802531721176668e-05, "loss": 0.4877, "step": 32181 }, { "epoch": 0.6825305932005684, "grad_norm": 0.38375595211982727, "learning_rate": 1.4802239199780083e-05, "loss": 0.4691, "step": 32182 }, { "epoch": 0.6825518016585014, "grad_norm": 0.34995248913764954, "learning_rate": 1.4801946673042496e-05, "loss": 0.4736, "step": 32183 }, { "epoch": 0.6825730101164345, "grad_norm": 0.38065165281295776, "learning_rate": 1.4801654140964221e-05, "loss": 0.5538, "step": 32184 }, { "epoch": 0.6825942185743674, "grad_norm": 0.37348854541778564, "learning_rate": 1.4801361603545588e-05, "loss": 0.5214, "step": 32185 }, { "epoch": 0.6826154270323005, "grad_norm": 0.36712029576301575, "learning_rate": 1.4801069060786923e-05, "loss": 0.4719, "step": 32186 }, { "epoch": 0.6826366354902335, "grad_norm": 0.5659695267677307, "learning_rate": 1.4800776512688553e-05, "loss": 0.6102, "step": 32187 }, { "epoch": 0.6826578439481665, "grad_norm": 0.3519807457923889, "learning_rate": 1.48004839592508e-05, "loss": 0.5098, "step": 32188 }, { "epoch": 0.6826790524060996, "grad_norm": 0.401034951210022, "learning_rate": 1.480019140047399e-05, "loss": 0.4721, "step": 32189 }, { "epoch": 0.6827002608640326, "grad_norm": 0.4108051359653473, "learning_rate": 1.4799898836358448e-05, "loss": 0.5429, "step": 32190 }, { "epoch": 0.6827214693219656, "grad_norm": 0.3823801875114441, "learning_rate": 1.4799606266904501e-05, "loss": 0.4959, "step": 32191 }, { "epoch": 0.6827426777798986, "grad_norm": 0.362135112285614, "learning_rate": 1.4799313692112474e-05, "loss": 0.5111, "step": 32192 }, { "epoch": 0.6827638862378317, "grad_norm": 0.37809860706329346, "learning_rate": 1.479902111198269e-05, "loss": 0.434, "step": 32193 }, { "epoch": 0.6827850946957646, "grad_norm": 0.39734503626823425, "learning_rate": 1.4798728526515478e-05, "loss": 0.5096, "step": 32194 }, { "epoch": 0.6828063031536977, "grad_norm": 0.5001488924026489, "learning_rate": 1.4798435935711164e-05, "loss": 0.537, "step": 32195 }, { "epoch": 0.6828275116116307, "grad_norm": 0.36820530891418457, "learning_rate": 1.4798143339570068e-05, "loss": 0.5117, "step": 32196 }, { "epoch": 0.6828487200695638, "grad_norm": 0.3214596211910248, "learning_rate": 1.4797850738092519e-05, "loss": 0.4381, "step": 32197 }, { "epoch": 0.6828699285274967, "grad_norm": 0.37690117955207825, "learning_rate": 1.4797558131278842e-05, "loss": 0.5837, "step": 32198 }, { "epoch": 0.6828911369854298, "grad_norm": 0.37184080481529236, "learning_rate": 1.4797265519129362e-05, "loss": 0.5196, "step": 32199 }, { "epoch": 0.6829123454433628, "grad_norm": 0.4187394976615906, "learning_rate": 1.4796972901644408e-05, "loss": 0.4679, "step": 32200 }, { "epoch": 0.6829335539012958, "grad_norm": 0.4777820408344269, "learning_rate": 1.4796680278824302e-05, "loss": 0.5174, "step": 32201 }, { "epoch": 0.6829547623592288, "grad_norm": 0.4083362817764282, "learning_rate": 1.4796387650669365e-05, "loss": 0.501, "step": 32202 }, { "epoch": 0.6829759708171619, "grad_norm": 0.35412630438804626, "learning_rate": 1.4796095017179934e-05, "loss": 0.459, "step": 32203 }, { "epoch": 0.682997179275095, "grad_norm": 0.31708306074142456, "learning_rate": 1.4795802378356323e-05, "loss": 0.4887, "step": 32204 }, { "epoch": 0.6830183877330279, "grad_norm": 0.3714311420917511, "learning_rate": 1.4795509734198862e-05, "loss": 0.5043, "step": 32205 }, { "epoch": 0.683039596190961, "grad_norm": 0.36959022283554077, "learning_rate": 1.479521708470788e-05, "loss": 0.5387, "step": 32206 }, { "epoch": 0.683060804648894, "grad_norm": 0.337799608707428, "learning_rate": 1.4794924429883697e-05, "loss": 0.4803, "step": 32207 }, { "epoch": 0.683082013106827, "grad_norm": 0.4199913740158081, "learning_rate": 1.4794631769726644e-05, "loss": 0.5177, "step": 32208 }, { "epoch": 0.68310322156476, "grad_norm": 0.3212253153324127, "learning_rate": 1.4794339104237042e-05, "loss": 0.4617, "step": 32209 }, { "epoch": 0.6831244300226931, "grad_norm": 0.6025630235671997, "learning_rate": 1.4794046433415214e-05, "loss": 0.5077, "step": 32210 }, { "epoch": 0.683145638480626, "grad_norm": 0.39049914479255676, "learning_rate": 1.479375375726149e-05, "loss": 0.5637, "step": 32211 }, { "epoch": 0.6831668469385591, "grad_norm": 0.39060381054878235, "learning_rate": 1.47934610757762e-05, "loss": 0.4807, "step": 32212 }, { "epoch": 0.6831880553964921, "grad_norm": 0.39617201685905457, "learning_rate": 1.4793168388959657e-05, "loss": 0.4715, "step": 32213 }, { "epoch": 0.6832092638544252, "grad_norm": 0.3354383707046509, "learning_rate": 1.47928756968122e-05, "loss": 0.456, "step": 32214 }, { "epoch": 0.6832304723123581, "grad_norm": 0.3294125199317932, "learning_rate": 1.4792582999334145e-05, "loss": 0.5058, "step": 32215 }, { "epoch": 0.6832516807702912, "grad_norm": 0.32773861289024353, "learning_rate": 1.4792290296525821e-05, "loss": 0.5802, "step": 32216 }, { "epoch": 0.6832728892282243, "grad_norm": 0.3951842784881592, "learning_rate": 1.4791997588387554e-05, "loss": 0.441, "step": 32217 }, { "epoch": 0.6832940976861572, "grad_norm": 0.35957226157188416, "learning_rate": 1.4791704874919668e-05, "loss": 0.467, "step": 32218 }, { "epoch": 0.6833153061440903, "grad_norm": 0.3513661026954651, "learning_rate": 1.4791412156122489e-05, "loss": 0.4272, "step": 32219 }, { "epoch": 0.6833365146020233, "grad_norm": 0.3145290017127991, "learning_rate": 1.4791119431996346e-05, "loss": 0.4715, "step": 32220 }, { "epoch": 0.6833577230599563, "grad_norm": 0.3777403235435486, "learning_rate": 1.479082670254156e-05, "loss": 0.5235, "step": 32221 }, { "epoch": 0.6833789315178893, "grad_norm": 0.362369567155838, "learning_rate": 1.4790533967758458e-05, "loss": 0.5032, "step": 32222 }, { "epoch": 0.6834001399758224, "grad_norm": 0.3387986421585083, "learning_rate": 1.4790241227647367e-05, "loss": 0.3939, "step": 32223 }, { "epoch": 0.6834213484337553, "grad_norm": 0.35470065474510193, "learning_rate": 1.4789948482208606e-05, "loss": 0.4449, "step": 32224 }, { "epoch": 0.6834425568916884, "grad_norm": 0.40955567359924316, "learning_rate": 1.4789655731442512e-05, "loss": 0.5254, "step": 32225 }, { "epoch": 0.6834637653496214, "grad_norm": 0.35652291774749756, "learning_rate": 1.4789362975349403e-05, "loss": 0.5187, "step": 32226 }, { "epoch": 0.6834849738075545, "grad_norm": 0.3561791181564331, "learning_rate": 1.4789070213929604e-05, "loss": 0.4877, "step": 32227 }, { "epoch": 0.6835061822654874, "grad_norm": 0.34223711490631104, "learning_rate": 1.4788777447183447e-05, "loss": 0.4604, "step": 32228 }, { "epoch": 0.6835273907234205, "grad_norm": 0.35381218791007996, "learning_rate": 1.478848467511125e-05, "loss": 0.5376, "step": 32229 }, { "epoch": 0.6835485991813536, "grad_norm": 0.3554231524467468, "learning_rate": 1.478819189771334e-05, "loss": 0.4763, "step": 32230 }, { "epoch": 0.6835698076392865, "grad_norm": 0.3488641679286957, "learning_rate": 1.4787899114990047e-05, "loss": 0.5306, "step": 32231 }, { "epoch": 0.6835910160972196, "grad_norm": 0.3338567614555359, "learning_rate": 1.4787606326941694e-05, "loss": 0.4883, "step": 32232 }, { "epoch": 0.6836122245551526, "grad_norm": 0.40558645129203796, "learning_rate": 1.4787313533568609e-05, "loss": 0.4614, "step": 32233 }, { "epoch": 0.6836334330130857, "grad_norm": 0.341216504573822, "learning_rate": 1.4787020734871113e-05, "loss": 0.4443, "step": 32234 }, { "epoch": 0.6836546414710186, "grad_norm": 0.3522684872150421, "learning_rate": 1.4786727930849533e-05, "loss": 0.5323, "step": 32235 }, { "epoch": 0.6836758499289517, "grad_norm": 0.33624905347824097, "learning_rate": 1.4786435121504198e-05, "loss": 0.5542, "step": 32236 }, { "epoch": 0.6836970583868847, "grad_norm": 0.3764737546443939, "learning_rate": 1.478614230683543e-05, "loss": 0.4869, "step": 32237 }, { "epoch": 0.6837182668448177, "grad_norm": 0.40135958790779114, "learning_rate": 1.4785849486843558e-05, "loss": 0.5522, "step": 32238 }, { "epoch": 0.6837394753027507, "grad_norm": 0.5540624260902405, "learning_rate": 1.4785556661528906e-05, "loss": 0.6546, "step": 32239 }, { "epoch": 0.6837606837606838, "grad_norm": 0.42058876156806946, "learning_rate": 1.4785263830891799e-05, "loss": 0.4914, "step": 32240 }, { "epoch": 0.6837818922186167, "grad_norm": 0.3585306704044342, "learning_rate": 1.4784970994932562e-05, "loss": 0.4716, "step": 32241 }, { "epoch": 0.6838031006765498, "grad_norm": 0.4248732924461365, "learning_rate": 1.4784678153651522e-05, "loss": 0.4991, "step": 32242 }, { "epoch": 0.6838243091344828, "grad_norm": 0.36846303939819336, "learning_rate": 1.4784385307049002e-05, "loss": 0.5385, "step": 32243 }, { "epoch": 0.6838455175924159, "grad_norm": 0.3722621500492096, "learning_rate": 1.4784092455125334e-05, "loss": 0.477, "step": 32244 }, { "epoch": 0.6838667260503489, "grad_norm": 0.39333468675613403, "learning_rate": 1.478379959788084e-05, "loss": 0.541, "step": 32245 }, { "epoch": 0.6838879345082819, "grad_norm": 0.34356021881103516, "learning_rate": 1.4783506735315844e-05, "loss": 0.5397, "step": 32246 }, { "epoch": 0.683909142966215, "grad_norm": 0.5010818243026733, "learning_rate": 1.4783213867430675e-05, "loss": 0.5597, "step": 32247 }, { "epoch": 0.6839303514241479, "grad_norm": 0.3554399609565735, "learning_rate": 1.4782920994225656e-05, "loss": 0.4356, "step": 32248 }, { "epoch": 0.683951559882081, "grad_norm": 0.37799936532974243, "learning_rate": 1.4782628115701112e-05, "loss": 0.5328, "step": 32249 }, { "epoch": 0.683972768340014, "grad_norm": 0.38671207427978516, "learning_rate": 1.4782335231857371e-05, "loss": 0.4964, "step": 32250 }, { "epoch": 0.683993976797947, "grad_norm": 0.3021906316280365, "learning_rate": 1.4782042342694762e-05, "loss": 0.4139, "step": 32251 }, { "epoch": 0.68401518525588, "grad_norm": 0.36168086528778076, "learning_rate": 1.4781749448213605e-05, "loss": 0.5429, "step": 32252 }, { "epoch": 0.6840363937138131, "grad_norm": 0.36263716220855713, "learning_rate": 1.4781456548414226e-05, "loss": 0.5808, "step": 32253 }, { "epoch": 0.684057602171746, "grad_norm": 0.3715856075286865, "learning_rate": 1.4781163643296957e-05, "loss": 0.5486, "step": 32254 }, { "epoch": 0.6840788106296791, "grad_norm": 0.3485148251056671, "learning_rate": 1.4780870732862114e-05, "loss": 0.4356, "step": 32255 }, { "epoch": 0.6841000190876121, "grad_norm": 0.3914114534854889, "learning_rate": 1.4780577817110029e-05, "loss": 0.4864, "step": 32256 }, { "epoch": 0.6841212275455452, "grad_norm": 0.3747715651988983, "learning_rate": 1.4780284896041031e-05, "loss": 0.4245, "step": 32257 }, { "epoch": 0.6841424360034782, "grad_norm": 0.30948883295059204, "learning_rate": 1.4779991969655437e-05, "loss": 0.4428, "step": 32258 }, { "epoch": 0.6841636444614112, "grad_norm": 0.41933029890060425, "learning_rate": 1.477969903795358e-05, "loss": 0.5535, "step": 32259 }, { "epoch": 0.6841848529193443, "grad_norm": 0.32936903834342957, "learning_rate": 1.4779406100935784e-05, "loss": 0.4513, "step": 32260 }, { "epoch": 0.6842060613772772, "grad_norm": 0.3649487793445587, "learning_rate": 1.4779113158602372e-05, "loss": 0.4717, "step": 32261 }, { "epoch": 0.6842272698352103, "grad_norm": 0.36392688751220703, "learning_rate": 1.4778820210953672e-05, "loss": 0.4971, "step": 32262 }, { "epoch": 0.6842484782931433, "grad_norm": 0.3635280430316925, "learning_rate": 1.4778527257990011e-05, "loss": 0.4709, "step": 32263 }, { "epoch": 0.6842696867510764, "grad_norm": 0.36899542808532715, "learning_rate": 1.477823429971171e-05, "loss": 0.499, "step": 32264 }, { "epoch": 0.6842908952090093, "grad_norm": 0.34659525752067566, "learning_rate": 1.4777941336119104e-05, "loss": 0.5393, "step": 32265 }, { "epoch": 0.6843121036669424, "grad_norm": 0.35047733783721924, "learning_rate": 1.477764836721251e-05, "loss": 0.5382, "step": 32266 }, { "epoch": 0.6843333121248754, "grad_norm": 0.36862432956695557, "learning_rate": 1.477735539299226e-05, "loss": 0.4893, "step": 32267 }, { "epoch": 0.6843545205828084, "grad_norm": 0.38456955552101135, "learning_rate": 1.4777062413458675e-05, "loss": 0.5059, "step": 32268 }, { "epoch": 0.6843757290407414, "grad_norm": 0.41987478733062744, "learning_rate": 1.4776769428612079e-05, "loss": 0.4867, "step": 32269 }, { "epoch": 0.6843969374986745, "grad_norm": 0.35247254371643066, "learning_rate": 1.4776476438452803e-05, "loss": 0.5343, "step": 32270 }, { "epoch": 0.6844181459566075, "grad_norm": 0.4216797649860382, "learning_rate": 1.4776183442981173e-05, "loss": 0.4212, "step": 32271 }, { "epoch": 0.6844393544145405, "grad_norm": 0.39402341842651367, "learning_rate": 1.4775890442197514e-05, "loss": 0.5149, "step": 32272 }, { "epoch": 0.6844605628724736, "grad_norm": 0.4050622284412384, "learning_rate": 1.4775597436102151e-05, "loss": 0.5149, "step": 32273 }, { "epoch": 0.6844817713304066, "grad_norm": 0.3632861375808716, "learning_rate": 1.477530442469541e-05, "loss": 0.4547, "step": 32274 }, { "epoch": 0.6845029797883396, "grad_norm": 0.34070849418640137, "learning_rate": 1.4775011407977614e-05, "loss": 0.4148, "step": 32275 }, { "epoch": 0.6845241882462726, "grad_norm": 0.4322166442871094, "learning_rate": 1.4774718385949094e-05, "loss": 0.4926, "step": 32276 }, { "epoch": 0.6845453967042057, "grad_norm": 0.3725898265838623, "learning_rate": 1.4774425358610174e-05, "loss": 0.556, "step": 32277 }, { "epoch": 0.6845666051621386, "grad_norm": 0.37138956785202026, "learning_rate": 1.4774132325961179e-05, "loss": 0.5494, "step": 32278 }, { "epoch": 0.6845878136200717, "grad_norm": 0.3337695002555847, "learning_rate": 1.4773839288002436e-05, "loss": 0.4536, "step": 32279 }, { "epoch": 0.6846090220780047, "grad_norm": 0.440367728471756, "learning_rate": 1.4773546244734269e-05, "loss": 0.5455, "step": 32280 }, { "epoch": 0.6846302305359377, "grad_norm": 0.3994503617286682, "learning_rate": 1.4773253196157004e-05, "loss": 0.4719, "step": 32281 }, { "epoch": 0.6846514389938707, "grad_norm": 0.3857792317867279, "learning_rate": 1.4772960142270974e-05, "loss": 0.4451, "step": 32282 }, { "epoch": 0.6846726474518038, "grad_norm": 0.34147968888282776, "learning_rate": 1.4772667083076495e-05, "loss": 0.4024, "step": 32283 }, { "epoch": 0.6846938559097367, "grad_norm": 0.3769838213920593, "learning_rate": 1.4772374018573898e-05, "loss": 0.4611, "step": 32284 }, { "epoch": 0.6847150643676698, "grad_norm": 0.35996729135513306, "learning_rate": 1.4772080948763509e-05, "loss": 0.5613, "step": 32285 }, { "epoch": 0.6847362728256029, "grad_norm": 0.3476680815219879, "learning_rate": 1.4771787873645651e-05, "loss": 0.4812, "step": 32286 }, { "epoch": 0.6847574812835359, "grad_norm": 0.5076174139976501, "learning_rate": 1.4771494793220654e-05, "loss": 0.4904, "step": 32287 }, { "epoch": 0.6847786897414689, "grad_norm": 0.35132500529289246, "learning_rate": 1.4771201707488841e-05, "loss": 0.4046, "step": 32288 }, { "epoch": 0.6847998981994019, "grad_norm": 0.34436896443367004, "learning_rate": 1.4770908616450536e-05, "loss": 0.5033, "step": 32289 }, { "epoch": 0.684821106657335, "grad_norm": 0.38688069581985474, "learning_rate": 1.4770615520106073e-05, "loss": 0.5332, "step": 32290 }, { "epoch": 0.6848423151152679, "grad_norm": 0.35730865597724915, "learning_rate": 1.4770322418455772e-05, "loss": 0.5616, "step": 32291 }, { "epoch": 0.684863523573201, "grad_norm": 0.3772932291030884, "learning_rate": 1.4770029311499956e-05, "loss": 0.5681, "step": 32292 }, { "epoch": 0.684884732031134, "grad_norm": 0.42116740345954895, "learning_rate": 1.476973619923896e-05, "loss": 0.53, "step": 32293 }, { "epoch": 0.684905940489067, "grad_norm": 0.36613720655441284, "learning_rate": 1.4769443081673101e-05, "loss": 0.4734, "step": 32294 }, { "epoch": 0.684927148947, "grad_norm": 0.3489188253879547, "learning_rate": 1.4769149958802708e-05, "loss": 0.4163, "step": 32295 }, { "epoch": 0.6849483574049331, "grad_norm": 0.38509178161621094, "learning_rate": 1.4768856830628111e-05, "loss": 0.4932, "step": 32296 }, { "epoch": 0.6849695658628661, "grad_norm": 0.3183988332748413, "learning_rate": 1.4768563697149634e-05, "loss": 0.4056, "step": 32297 }, { "epoch": 0.6849907743207991, "grad_norm": 0.3519369661808014, "learning_rate": 1.4768270558367597e-05, "loss": 0.4921, "step": 32298 }, { "epoch": 0.6850119827787322, "grad_norm": 0.3967190384864807, "learning_rate": 1.4767977414282336e-05, "loss": 0.4942, "step": 32299 }, { "epoch": 0.6850331912366652, "grad_norm": 0.3502650856971741, "learning_rate": 1.4767684264894166e-05, "loss": 0.4778, "step": 32300 }, { "epoch": 0.6850543996945982, "grad_norm": 0.33844631910324097, "learning_rate": 1.4767391110203422e-05, "loss": 0.5145, "step": 32301 }, { "epoch": 0.6850756081525312, "grad_norm": 0.4052467346191406, "learning_rate": 1.476709795021043e-05, "loss": 0.5194, "step": 32302 }, { "epoch": 0.6850968166104643, "grad_norm": 0.39903563261032104, "learning_rate": 1.476680478491551e-05, "loss": 0.4643, "step": 32303 }, { "epoch": 0.6851180250683973, "grad_norm": 0.3708931505680084, "learning_rate": 1.476651161431899e-05, "loss": 0.4647, "step": 32304 }, { "epoch": 0.6851392335263303, "grad_norm": 0.40555477142333984, "learning_rate": 1.4766218438421201e-05, "loss": 0.5573, "step": 32305 }, { "epoch": 0.6851604419842633, "grad_norm": 0.34938153624534607, "learning_rate": 1.4765925257222461e-05, "loss": 0.55, "step": 32306 }, { "epoch": 0.6851816504421964, "grad_norm": 0.34234777092933655, "learning_rate": 1.4765632070723101e-05, "loss": 0.4043, "step": 32307 }, { "epoch": 0.6852028589001293, "grad_norm": 0.38664206862449646, "learning_rate": 1.476533887892345e-05, "loss": 0.4748, "step": 32308 }, { "epoch": 0.6852240673580624, "grad_norm": 0.3829158544540405, "learning_rate": 1.476504568182383e-05, "loss": 0.4639, "step": 32309 }, { "epoch": 0.6852452758159954, "grad_norm": 0.4022611379623413, "learning_rate": 1.4764752479424565e-05, "loss": 0.6012, "step": 32310 }, { "epoch": 0.6852664842739284, "grad_norm": 0.38387978076934814, "learning_rate": 1.4764459271725984e-05, "loss": 0.4526, "step": 32311 }, { "epoch": 0.6852876927318615, "grad_norm": 0.3484421968460083, "learning_rate": 1.4764166058728413e-05, "loss": 0.4623, "step": 32312 }, { "epoch": 0.6853089011897945, "grad_norm": 0.327584832906723, "learning_rate": 1.4763872840432178e-05, "loss": 0.4414, "step": 32313 }, { "epoch": 0.6853301096477276, "grad_norm": 0.3691639006137848, "learning_rate": 1.4763579616837606e-05, "loss": 0.5074, "step": 32314 }, { "epoch": 0.6853513181056605, "grad_norm": 0.33433762192726135, "learning_rate": 1.476328638794502e-05, "loss": 0.5683, "step": 32315 }, { "epoch": 0.6853725265635936, "grad_norm": 0.3592630624771118, "learning_rate": 1.4762993153754752e-05, "loss": 0.4803, "step": 32316 }, { "epoch": 0.6853937350215266, "grad_norm": 0.320807546377182, "learning_rate": 1.476269991426712e-05, "loss": 0.5129, "step": 32317 }, { "epoch": 0.6854149434794596, "grad_norm": 0.3609378933906555, "learning_rate": 1.4762406669482459e-05, "loss": 0.5568, "step": 32318 }, { "epoch": 0.6854361519373926, "grad_norm": 0.34277334809303284, "learning_rate": 1.4762113419401087e-05, "loss": 0.5148, "step": 32319 }, { "epoch": 0.6854573603953257, "grad_norm": 0.2913286089897156, "learning_rate": 1.4761820164023332e-05, "loss": 0.4041, "step": 32320 }, { "epoch": 0.6854785688532586, "grad_norm": 0.38322269916534424, "learning_rate": 1.4761526903349527e-05, "loss": 0.5589, "step": 32321 }, { "epoch": 0.6854997773111917, "grad_norm": 0.32524242997169495, "learning_rate": 1.4761233637379993e-05, "loss": 0.4714, "step": 32322 }, { "epoch": 0.6855209857691247, "grad_norm": 0.3880200982093811, "learning_rate": 1.4760940366115052e-05, "loss": 0.517, "step": 32323 }, { "epoch": 0.6855421942270578, "grad_norm": 0.3770185112953186, "learning_rate": 1.4760647089555038e-05, "loss": 0.4918, "step": 32324 }, { "epoch": 0.6855634026849907, "grad_norm": 0.3244597017765045, "learning_rate": 1.4760353807700274e-05, "loss": 0.462, "step": 32325 }, { "epoch": 0.6855846111429238, "grad_norm": 0.39841991662979126, "learning_rate": 1.4760060520551082e-05, "loss": 0.5086, "step": 32326 }, { "epoch": 0.6856058196008569, "grad_norm": 0.4000811278820038, "learning_rate": 1.4759767228107796e-05, "loss": 0.4794, "step": 32327 }, { "epoch": 0.6856270280587898, "grad_norm": 0.33781424164772034, "learning_rate": 1.4759473930370738e-05, "loss": 0.4806, "step": 32328 }, { "epoch": 0.6856482365167229, "grad_norm": 0.3344743847846985, "learning_rate": 1.4759180627340232e-05, "loss": 0.4744, "step": 32329 }, { "epoch": 0.6856694449746559, "grad_norm": 0.41933122277259827, "learning_rate": 1.4758887319016607e-05, "loss": 0.4723, "step": 32330 }, { "epoch": 0.685690653432589, "grad_norm": 0.4251478612422943, "learning_rate": 1.475859400540019e-05, "loss": 0.4786, "step": 32331 }, { "epoch": 0.6857118618905219, "grad_norm": 0.4647431969642639, "learning_rate": 1.4758300686491307e-05, "loss": 0.6026, "step": 32332 }, { "epoch": 0.685733070348455, "grad_norm": 0.32857343554496765, "learning_rate": 1.475800736229028e-05, "loss": 0.3983, "step": 32333 }, { "epoch": 0.685754278806388, "grad_norm": 0.32126837968826294, "learning_rate": 1.475771403279744e-05, "loss": 0.4385, "step": 32334 }, { "epoch": 0.685775487264321, "grad_norm": 0.3421034514904022, "learning_rate": 1.4757420698013114e-05, "loss": 0.4047, "step": 32335 }, { "epoch": 0.685796695722254, "grad_norm": 0.35176780819892883, "learning_rate": 1.4757127357937626e-05, "loss": 0.4544, "step": 32336 }, { "epoch": 0.6858179041801871, "grad_norm": 0.34415680170059204, "learning_rate": 1.47568340125713e-05, "loss": 0.5183, "step": 32337 }, { "epoch": 0.68583911263812, "grad_norm": 0.37881192564964294, "learning_rate": 1.4756540661914467e-05, "loss": 0.4863, "step": 32338 }, { "epoch": 0.6858603210960531, "grad_norm": 0.3664691746234894, "learning_rate": 1.4756247305967448e-05, "loss": 0.4937, "step": 32339 }, { "epoch": 0.6858815295539862, "grad_norm": 0.4244897663593292, "learning_rate": 1.4755953944730571e-05, "loss": 0.4903, "step": 32340 }, { "epoch": 0.6859027380119191, "grad_norm": 0.39432254433631897, "learning_rate": 1.4755660578204167e-05, "loss": 0.4246, "step": 32341 }, { "epoch": 0.6859239464698522, "grad_norm": 0.3979189097881317, "learning_rate": 1.4755367206388559e-05, "loss": 0.5267, "step": 32342 }, { "epoch": 0.6859451549277852, "grad_norm": 0.34038904309272766, "learning_rate": 1.475507382928407e-05, "loss": 0.497, "step": 32343 }, { "epoch": 0.6859663633857183, "grad_norm": 0.3780842125415802, "learning_rate": 1.475478044689103e-05, "loss": 0.5805, "step": 32344 }, { "epoch": 0.6859875718436512, "grad_norm": 0.41851112246513367, "learning_rate": 1.4754487059209765e-05, "loss": 0.5448, "step": 32345 }, { "epoch": 0.6860087803015843, "grad_norm": 0.3884361684322357, "learning_rate": 1.47541936662406e-05, "loss": 0.4551, "step": 32346 }, { "epoch": 0.6860299887595173, "grad_norm": 0.3876180350780487, "learning_rate": 1.4753900267983863e-05, "loss": 0.4706, "step": 32347 }, { "epoch": 0.6860511972174503, "grad_norm": 0.34728866815567017, "learning_rate": 1.4753606864439878e-05, "loss": 0.4506, "step": 32348 }, { "epoch": 0.6860724056753833, "grad_norm": 0.3483945429325104, "learning_rate": 1.4753313455608975e-05, "loss": 0.5052, "step": 32349 }, { "epoch": 0.6860936141333164, "grad_norm": 0.4111911654472351, "learning_rate": 1.4753020041491478e-05, "loss": 0.5318, "step": 32350 }, { "epoch": 0.6861148225912493, "grad_norm": 0.4509470760822296, "learning_rate": 1.475272662208771e-05, "loss": 0.5029, "step": 32351 }, { "epoch": 0.6861360310491824, "grad_norm": 0.3563007116317749, "learning_rate": 1.4752433197398001e-05, "loss": 0.4402, "step": 32352 }, { "epoch": 0.6861572395071155, "grad_norm": 0.35152551531791687, "learning_rate": 1.4752139767422681e-05, "loss": 0.5428, "step": 32353 }, { "epoch": 0.6861784479650485, "grad_norm": 0.41348686814308167, "learning_rate": 1.475184633216207e-05, "loss": 0.5105, "step": 32354 }, { "epoch": 0.6861996564229815, "grad_norm": 0.3774513006210327, "learning_rate": 1.47515528916165e-05, "loss": 0.5046, "step": 32355 }, { "epoch": 0.6862208648809145, "grad_norm": 0.33259859681129456, "learning_rate": 1.475125944578629e-05, "loss": 0.498, "step": 32356 }, { "epoch": 0.6862420733388476, "grad_norm": 0.40189453959465027, "learning_rate": 1.4750965994671772e-05, "loss": 0.5556, "step": 32357 }, { "epoch": 0.6862632817967805, "grad_norm": 0.35048237442970276, "learning_rate": 1.4750672538273269e-05, "loss": 0.4738, "step": 32358 }, { "epoch": 0.6862844902547136, "grad_norm": 0.3522440195083618, "learning_rate": 1.4750379076591112e-05, "loss": 0.4353, "step": 32359 }, { "epoch": 0.6863056987126466, "grad_norm": 0.42947864532470703, "learning_rate": 1.4750085609625625e-05, "loss": 0.5413, "step": 32360 }, { "epoch": 0.6863269071705796, "grad_norm": 0.3544577956199646, "learning_rate": 1.4749792137377132e-05, "loss": 0.5327, "step": 32361 }, { "epoch": 0.6863481156285126, "grad_norm": 0.40432673692703247, "learning_rate": 1.4749498659845962e-05, "loss": 0.5357, "step": 32362 }, { "epoch": 0.6863693240864457, "grad_norm": 0.3917664885520935, "learning_rate": 1.4749205177032442e-05, "loss": 0.4763, "step": 32363 }, { "epoch": 0.6863905325443787, "grad_norm": 0.3823937177658081, "learning_rate": 1.4748911688936898e-05, "loss": 0.4738, "step": 32364 }, { "epoch": 0.6864117410023117, "grad_norm": 0.36985865235328674, "learning_rate": 1.474861819555965e-05, "loss": 0.4276, "step": 32365 }, { "epoch": 0.6864329494602447, "grad_norm": 0.32357361912727356, "learning_rate": 1.4748324696901034e-05, "loss": 0.4659, "step": 32366 }, { "epoch": 0.6864541579181778, "grad_norm": 0.3783539831638336, "learning_rate": 1.4748031192961376e-05, "loss": 0.5092, "step": 32367 }, { "epoch": 0.6864753663761108, "grad_norm": 0.3696247637271881, "learning_rate": 1.4747737683740994e-05, "loss": 0.4945, "step": 32368 }, { "epoch": 0.6864965748340438, "grad_norm": 0.450177401304245, "learning_rate": 1.4747444169240221e-05, "loss": 0.4545, "step": 32369 }, { "epoch": 0.6865177832919769, "grad_norm": 0.3944597542285919, "learning_rate": 1.4747150649459383e-05, "loss": 0.5433, "step": 32370 }, { "epoch": 0.6865389917499098, "grad_norm": 0.3275596797466278, "learning_rate": 1.4746857124398801e-05, "loss": 0.4969, "step": 32371 }, { "epoch": 0.6865602002078429, "grad_norm": 0.5214217901229858, "learning_rate": 1.4746563594058811e-05, "loss": 0.5014, "step": 32372 }, { "epoch": 0.6865814086657759, "grad_norm": 0.36012086272239685, "learning_rate": 1.4746270058439733e-05, "loss": 0.4879, "step": 32373 }, { "epoch": 0.686602617123709, "grad_norm": 0.343363493680954, "learning_rate": 1.4745976517541891e-05, "loss": 0.5814, "step": 32374 }, { "epoch": 0.6866238255816419, "grad_norm": 0.3218153119087219, "learning_rate": 1.474568297136562e-05, "loss": 0.419, "step": 32375 }, { "epoch": 0.686645034039575, "grad_norm": 0.36501628160476685, "learning_rate": 1.4745389419911238e-05, "loss": 0.4211, "step": 32376 }, { "epoch": 0.686666242497508, "grad_norm": 0.38490983843803406, "learning_rate": 1.474509586317908e-05, "loss": 0.5119, "step": 32377 }, { "epoch": 0.686687450955441, "grad_norm": 0.332874596118927, "learning_rate": 1.4744802301169463e-05, "loss": 0.4658, "step": 32378 }, { "epoch": 0.686708659413374, "grad_norm": 0.3390609920024872, "learning_rate": 1.4744508733882716e-05, "loss": 0.4426, "step": 32379 }, { "epoch": 0.6867298678713071, "grad_norm": 0.3290902078151703, "learning_rate": 1.4744215161319173e-05, "loss": 0.4505, "step": 32380 }, { "epoch": 0.6867510763292402, "grad_norm": 0.33636534214019775, "learning_rate": 1.4743921583479153e-05, "loss": 0.426, "step": 32381 }, { "epoch": 0.6867722847871731, "grad_norm": 0.8008083701133728, "learning_rate": 1.4743628000362985e-05, "loss": 0.3979, "step": 32382 }, { "epoch": 0.6867934932451062, "grad_norm": 0.42176756262779236, "learning_rate": 1.4743334411970996e-05, "loss": 0.5738, "step": 32383 }, { "epoch": 0.6868147017030392, "grad_norm": 0.3752913475036621, "learning_rate": 1.474304081830351e-05, "loss": 0.5139, "step": 32384 }, { "epoch": 0.6868359101609722, "grad_norm": 0.3538237512111664, "learning_rate": 1.4742747219360853e-05, "loss": 0.5684, "step": 32385 }, { "epoch": 0.6868571186189052, "grad_norm": 0.34857067465782166, "learning_rate": 1.4742453615143358e-05, "loss": 0.4666, "step": 32386 }, { "epoch": 0.6868783270768383, "grad_norm": 0.3746418356895447, "learning_rate": 1.4742160005651348e-05, "loss": 0.4532, "step": 32387 }, { "epoch": 0.6868995355347712, "grad_norm": 0.38292214274406433, "learning_rate": 1.4741866390885145e-05, "loss": 0.5664, "step": 32388 }, { "epoch": 0.6869207439927043, "grad_norm": 0.38823094964027405, "learning_rate": 1.4741572770845083e-05, "loss": 0.529, "step": 32389 }, { "epoch": 0.6869419524506373, "grad_norm": 0.3637970983982086, "learning_rate": 1.4741279145531482e-05, "loss": 0.5669, "step": 32390 }, { "epoch": 0.6869631609085703, "grad_norm": 0.442738801240921, "learning_rate": 1.474098551494467e-05, "loss": 0.599, "step": 32391 }, { "epoch": 0.6869843693665033, "grad_norm": 0.3866789937019348, "learning_rate": 1.4740691879084979e-05, "loss": 0.5285, "step": 32392 }, { "epoch": 0.6870055778244364, "grad_norm": 0.33757641911506653, "learning_rate": 1.4740398237952729e-05, "loss": 0.4249, "step": 32393 }, { "epoch": 0.6870267862823695, "grad_norm": 0.4605298936367035, "learning_rate": 1.4740104591548251e-05, "loss": 0.5682, "step": 32394 }, { "epoch": 0.6870479947403024, "grad_norm": 0.34056586027145386, "learning_rate": 1.473981093987187e-05, "loss": 0.4938, "step": 32395 }, { "epoch": 0.6870692031982355, "grad_norm": 0.37691712379455566, "learning_rate": 1.4739517282923912e-05, "loss": 0.4224, "step": 32396 }, { "epoch": 0.6870904116561685, "grad_norm": 0.3523162603378296, "learning_rate": 1.4739223620704701e-05, "loss": 0.4596, "step": 32397 }, { "epoch": 0.6871116201141015, "grad_norm": 0.36282801628112793, "learning_rate": 1.4738929953214571e-05, "loss": 0.5468, "step": 32398 }, { "epoch": 0.6871328285720345, "grad_norm": 0.4504876732826233, "learning_rate": 1.4738636280453842e-05, "loss": 0.498, "step": 32399 }, { "epoch": 0.6871540370299676, "grad_norm": 0.3764294683933258, "learning_rate": 1.4738342602422843e-05, "loss": 0.5268, "step": 32400 }, { "epoch": 0.6871752454879005, "grad_norm": 0.40952274203300476, "learning_rate": 1.4738048919121904e-05, "loss": 0.4638, "step": 32401 }, { "epoch": 0.6871964539458336, "grad_norm": 0.3526099622249603, "learning_rate": 1.4737755230551345e-05, "loss": 0.4691, "step": 32402 }, { "epoch": 0.6872176624037666, "grad_norm": 0.3887559771537781, "learning_rate": 1.4737461536711494e-05, "loss": 0.5081, "step": 32403 }, { "epoch": 0.6872388708616997, "grad_norm": 0.3648439049720764, "learning_rate": 1.4737167837602684e-05, "loss": 0.5013, "step": 32404 }, { "epoch": 0.6872600793196326, "grad_norm": 0.42374321818351746, "learning_rate": 1.4736874133225235e-05, "loss": 0.5036, "step": 32405 }, { "epoch": 0.6872812877775657, "grad_norm": 0.35825520753860474, "learning_rate": 1.4736580423579473e-05, "loss": 0.5384, "step": 32406 }, { "epoch": 0.6873024962354988, "grad_norm": 0.3447223901748657, "learning_rate": 1.4736286708665731e-05, "loss": 0.5223, "step": 32407 }, { "epoch": 0.6873237046934317, "grad_norm": 0.3520655632019043, "learning_rate": 1.4735992988484331e-05, "loss": 0.5428, "step": 32408 }, { "epoch": 0.6873449131513648, "grad_norm": 0.3091657757759094, "learning_rate": 1.4735699263035601e-05, "loss": 0.3922, "step": 32409 }, { "epoch": 0.6873661216092978, "grad_norm": 0.33832070231437683, "learning_rate": 1.4735405532319866e-05, "loss": 0.4528, "step": 32410 }, { "epoch": 0.6873873300672309, "grad_norm": 0.5800816416740417, "learning_rate": 1.4735111796337455e-05, "loss": 0.5782, "step": 32411 }, { "epoch": 0.6874085385251638, "grad_norm": 0.34164831042289734, "learning_rate": 1.4734818055088696e-05, "loss": 0.501, "step": 32412 }, { "epoch": 0.6874297469830969, "grad_norm": 0.4054618775844574, "learning_rate": 1.4734524308573908e-05, "loss": 0.4899, "step": 32413 }, { "epoch": 0.6874509554410299, "grad_norm": 0.3694246709346771, "learning_rate": 1.4734230556793429e-05, "loss": 0.5261, "step": 32414 }, { "epoch": 0.6874721638989629, "grad_norm": 0.3995826244354248, "learning_rate": 1.4733936799747577e-05, "loss": 0.5274, "step": 32415 }, { "epoch": 0.6874933723568959, "grad_norm": 0.36788609623908997, "learning_rate": 1.473364303743668e-05, "loss": 0.5325, "step": 32416 }, { "epoch": 0.687514580814829, "grad_norm": 0.44529175758361816, "learning_rate": 1.473334926986107e-05, "loss": 0.5565, "step": 32417 }, { "epoch": 0.6875357892727619, "grad_norm": 0.34317341446876526, "learning_rate": 1.473305549702107e-05, "loss": 0.4638, "step": 32418 }, { "epoch": 0.687556997730695, "grad_norm": 0.38552483916282654, "learning_rate": 1.4732761718917004e-05, "loss": 0.5475, "step": 32419 }, { "epoch": 0.687578206188628, "grad_norm": 0.38895919919013977, "learning_rate": 1.4732467935549201e-05, "loss": 0.5067, "step": 32420 }, { "epoch": 0.687599414646561, "grad_norm": 0.33896932005882263, "learning_rate": 1.4732174146917992e-05, "loss": 0.4979, "step": 32421 }, { "epoch": 0.6876206231044941, "grad_norm": 0.3789087235927582, "learning_rate": 1.4731880353023697e-05, "loss": 0.5642, "step": 32422 }, { "epoch": 0.6876418315624271, "grad_norm": 0.35944798588752747, "learning_rate": 1.4731586553866648e-05, "loss": 0.4805, "step": 32423 }, { "epoch": 0.6876630400203602, "grad_norm": 0.39730486273765564, "learning_rate": 1.4731292749447168e-05, "loss": 0.5042, "step": 32424 }, { "epoch": 0.6876842484782931, "grad_norm": 0.34373408555984497, "learning_rate": 1.4730998939765585e-05, "loss": 0.5605, "step": 32425 }, { "epoch": 0.6877054569362262, "grad_norm": 0.3450961112976074, "learning_rate": 1.4730705124822229e-05, "loss": 0.5814, "step": 32426 }, { "epoch": 0.6877266653941592, "grad_norm": 0.40123680233955383, "learning_rate": 1.4730411304617421e-05, "loss": 0.518, "step": 32427 }, { "epoch": 0.6877478738520922, "grad_norm": 0.40567484498023987, "learning_rate": 1.4730117479151491e-05, "loss": 0.4656, "step": 32428 }, { "epoch": 0.6877690823100252, "grad_norm": 0.4114452600479126, "learning_rate": 1.4729823648424764e-05, "loss": 0.5572, "step": 32429 }, { "epoch": 0.6877902907679583, "grad_norm": 0.34412628412246704, "learning_rate": 1.4729529812437568e-05, "loss": 0.4631, "step": 32430 }, { "epoch": 0.6878114992258912, "grad_norm": 0.38401955366134644, "learning_rate": 1.4729235971190234e-05, "loss": 0.503, "step": 32431 }, { "epoch": 0.6878327076838243, "grad_norm": 0.38216254115104675, "learning_rate": 1.4728942124683086e-05, "loss": 0.6016, "step": 32432 }, { "epoch": 0.6878539161417573, "grad_norm": 0.38259121775627136, "learning_rate": 1.4728648272916444e-05, "loss": 0.4877, "step": 32433 }, { "epoch": 0.6878751245996904, "grad_norm": 0.36098718643188477, "learning_rate": 1.4728354415890645e-05, "loss": 0.4805, "step": 32434 }, { "epoch": 0.6878963330576234, "grad_norm": 0.3210671544075012, "learning_rate": 1.4728060553606007e-05, "loss": 0.4537, "step": 32435 }, { "epoch": 0.6879175415155564, "grad_norm": 0.3278014659881592, "learning_rate": 1.4727766686062864e-05, "loss": 0.4447, "step": 32436 }, { "epoch": 0.6879387499734895, "grad_norm": 0.3642212748527527, "learning_rate": 1.4727472813261538e-05, "loss": 0.4755, "step": 32437 }, { "epoch": 0.6879599584314224, "grad_norm": 0.36792126297950745, "learning_rate": 1.4727178935202362e-05, "loss": 0.514, "step": 32438 }, { "epoch": 0.6879811668893555, "grad_norm": 0.3684883713722229, "learning_rate": 1.4726885051885654e-05, "loss": 0.4983, "step": 32439 }, { "epoch": 0.6880023753472885, "grad_norm": 0.3446855843067169, "learning_rate": 1.4726591163311748e-05, "loss": 0.4438, "step": 32440 }, { "epoch": 0.6880235838052216, "grad_norm": 0.33115309476852417, "learning_rate": 1.4726297269480967e-05, "loss": 0.4416, "step": 32441 }, { "epoch": 0.6880447922631545, "grad_norm": 0.3817940056324005, "learning_rate": 1.472600337039364e-05, "loss": 0.4959, "step": 32442 }, { "epoch": 0.6880660007210876, "grad_norm": 0.39118269085884094, "learning_rate": 1.4725709466050092e-05, "loss": 0.5335, "step": 32443 }, { "epoch": 0.6880872091790206, "grad_norm": 0.3800295293331146, "learning_rate": 1.4725415556450653e-05, "loss": 0.4755, "step": 32444 }, { "epoch": 0.6881084176369536, "grad_norm": 0.3789689242839813, "learning_rate": 1.4725121641595648e-05, "loss": 0.5476, "step": 32445 }, { "epoch": 0.6881296260948866, "grad_norm": 0.3290901780128479, "learning_rate": 1.4724827721485403e-05, "loss": 0.4778, "step": 32446 }, { "epoch": 0.6881508345528197, "grad_norm": 0.4833594262599945, "learning_rate": 1.4724533796120244e-05, "loss": 0.4417, "step": 32447 }, { "epoch": 0.6881720430107527, "grad_norm": 0.3657519817352295, "learning_rate": 1.4724239865500499e-05, "loss": 0.551, "step": 32448 }, { "epoch": 0.6881932514686857, "grad_norm": 0.3836977779865265, "learning_rate": 1.4723945929626498e-05, "loss": 0.5058, "step": 32449 }, { "epoch": 0.6882144599266188, "grad_norm": 0.5834830403327942, "learning_rate": 1.4723651988498565e-05, "loss": 0.5313, "step": 32450 }, { "epoch": 0.6882356683845517, "grad_norm": 0.36191433668136597, "learning_rate": 1.4723358042117028e-05, "loss": 0.5171, "step": 32451 }, { "epoch": 0.6882568768424848, "grad_norm": 0.3622128665447235, "learning_rate": 1.4723064090482214e-05, "loss": 0.4509, "step": 32452 }, { "epoch": 0.6882780853004178, "grad_norm": 0.35225340723991394, "learning_rate": 1.4722770133594443e-05, "loss": 0.4651, "step": 32453 }, { "epoch": 0.6882992937583509, "grad_norm": 0.3354741334915161, "learning_rate": 1.4722476171454054e-05, "loss": 0.5004, "step": 32454 }, { "epoch": 0.6883205022162838, "grad_norm": 0.35827213525772095, "learning_rate": 1.4722182204061366e-05, "loss": 0.5407, "step": 32455 }, { "epoch": 0.6883417106742169, "grad_norm": 0.353929340839386, "learning_rate": 1.4721888231416707e-05, "loss": 0.4987, "step": 32456 }, { "epoch": 0.6883629191321499, "grad_norm": 0.33896109461784363, "learning_rate": 1.4721594253520409e-05, "loss": 0.374, "step": 32457 }, { "epoch": 0.6883841275900829, "grad_norm": 0.34425023198127747, "learning_rate": 1.4721300270372792e-05, "loss": 0.5095, "step": 32458 }, { "epoch": 0.6884053360480159, "grad_norm": 0.349672794342041, "learning_rate": 1.4721006281974187e-05, "loss": 0.54, "step": 32459 }, { "epoch": 0.688426544505949, "grad_norm": 0.3593655228614807, "learning_rate": 1.4720712288324918e-05, "loss": 0.5499, "step": 32460 }, { "epoch": 0.6884477529638819, "grad_norm": 0.30607864260673523, "learning_rate": 1.4720418289425312e-05, "loss": 0.473, "step": 32461 }, { "epoch": 0.688468961421815, "grad_norm": 0.3322305679321289, "learning_rate": 1.4720124285275703e-05, "loss": 0.4838, "step": 32462 }, { "epoch": 0.6884901698797481, "grad_norm": 0.3532087206840515, "learning_rate": 1.471983027587641e-05, "loss": 0.4988, "step": 32463 }, { "epoch": 0.6885113783376811, "grad_norm": 0.3764362335205078, "learning_rate": 1.4719536261227763e-05, "loss": 0.5334, "step": 32464 }, { "epoch": 0.6885325867956141, "grad_norm": 0.3150674104690552, "learning_rate": 1.471924224133009e-05, "loss": 0.4607, "step": 32465 }, { "epoch": 0.6885537952535471, "grad_norm": 0.3903793692588806, "learning_rate": 1.4718948216183715e-05, "loss": 0.5832, "step": 32466 }, { "epoch": 0.6885750037114802, "grad_norm": 0.39216336607933044, "learning_rate": 1.4718654185788965e-05, "loss": 0.4983, "step": 32467 }, { "epoch": 0.6885962121694131, "grad_norm": 0.3648401200771332, "learning_rate": 1.4718360150146172e-05, "loss": 0.5034, "step": 32468 }, { "epoch": 0.6886174206273462, "grad_norm": 0.40610647201538086, "learning_rate": 1.471806610925566e-05, "loss": 0.4729, "step": 32469 }, { "epoch": 0.6886386290852792, "grad_norm": 0.3933335542678833, "learning_rate": 1.4717772063117754e-05, "loss": 0.4783, "step": 32470 }, { "epoch": 0.6886598375432122, "grad_norm": 0.3206954002380371, "learning_rate": 1.4717478011732784e-05, "loss": 0.4766, "step": 32471 }, { "epoch": 0.6886810460011452, "grad_norm": 0.34180760383605957, "learning_rate": 1.4717183955101075e-05, "loss": 0.4439, "step": 32472 }, { "epoch": 0.6887022544590783, "grad_norm": 0.8090556859970093, "learning_rate": 1.4716889893222956e-05, "loss": 0.6497, "step": 32473 }, { "epoch": 0.6887234629170113, "grad_norm": 0.3828328251838684, "learning_rate": 1.4716595826098754e-05, "loss": 0.4567, "step": 32474 }, { "epoch": 0.6887446713749443, "grad_norm": 0.40752947330474854, "learning_rate": 1.4716301753728793e-05, "loss": 0.4754, "step": 32475 }, { "epoch": 0.6887658798328774, "grad_norm": 0.38112515211105347, "learning_rate": 1.4716007676113401e-05, "loss": 0.4964, "step": 32476 }, { "epoch": 0.6887870882908104, "grad_norm": 0.39242851734161377, "learning_rate": 1.4715713593252909e-05, "loss": 0.5651, "step": 32477 }, { "epoch": 0.6888082967487434, "grad_norm": 0.3312508463859558, "learning_rate": 1.4715419505147641e-05, "loss": 0.5078, "step": 32478 }, { "epoch": 0.6888295052066764, "grad_norm": 0.3974078595638275, "learning_rate": 1.4715125411797926e-05, "loss": 0.5444, "step": 32479 }, { "epoch": 0.6888507136646095, "grad_norm": 0.34365031123161316, "learning_rate": 1.4714831313204085e-05, "loss": 0.5269, "step": 32480 }, { "epoch": 0.6888719221225424, "grad_norm": 0.3953920304775238, "learning_rate": 1.471453720936645e-05, "loss": 0.4671, "step": 32481 }, { "epoch": 0.6888931305804755, "grad_norm": 0.3339475393295288, "learning_rate": 1.4714243100285353e-05, "loss": 0.4764, "step": 32482 }, { "epoch": 0.6889143390384085, "grad_norm": 0.32171210646629333, "learning_rate": 1.4713948985961112e-05, "loss": 0.4941, "step": 32483 }, { "epoch": 0.6889355474963416, "grad_norm": 0.4163627326488495, "learning_rate": 1.4713654866394057e-05, "loss": 0.58, "step": 32484 }, { "epoch": 0.6889567559542745, "grad_norm": 0.3526584208011627, "learning_rate": 1.4713360741584519e-05, "loss": 0.5183, "step": 32485 }, { "epoch": 0.6889779644122076, "grad_norm": 0.36569294333457947, "learning_rate": 1.4713066611532818e-05, "loss": 0.5389, "step": 32486 }, { "epoch": 0.6889991728701406, "grad_norm": 0.3272889256477356, "learning_rate": 1.4712772476239288e-05, "loss": 0.4116, "step": 32487 }, { "epoch": 0.6890203813280736, "grad_norm": 0.32941934466362, "learning_rate": 1.4712478335704255e-05, "loss": 0.443, "step": 32488 }, { "epoch": 0.6890415897860067, "grad_norm": 0.3530466854572296, "learning_rate": 1.471218418992804e-05, "loss": 0.438, "step": 32489 }, { "epoch": 0.6890627982439397, "grad_norm": 0.4096633195877075, "learning_rate": 1.4711890038910979e-05, "loss": 0.51, "step": 32490 }, { "epoch": 0.6890840067018728, "grad_norm": 0.3357887268066406, "learning_rate": 1.4711595882653393e-05, "loss": 0.5036, "step": 32491 }, { "epoch": 0.6891052151598057, "grad_norm": 0.5986785292625427, "learning_rate": 1.4711301721155612e-05, "loss": 0.4735, "step": 32492 }, { "epoch": 0.6891264236177388, "grad_norm": 0.32745006680488586, "learning_rate": 1.471100755441796e-05, "loss": 0.4908, "step": 32493 }, { "epoch": 0.6891476320756718, "grad_norm": 0.3538169860839844, "learning_rate": 1.4710713382440766e-05, "loss": 0.4295, "step": 32494 }, { "epoch": 0.6891688405336048, "grad_norm": 0.3678152859210968, "learning_rate": 1.471041920522436e-05, "loss": 0.4987, "step": 32495 }, { "epoch": 0.6891900489915378, "grad_norm": 0.44349586963653564, "learning_rate": 1.4710125022769066e-05, "loss": 0.4633, "step": 32496 }, { "epoch": 0.6892112574494709, "grad_norm": 0.3411237895488739, "learning_rate": 1.4709830835075213e-05, "loss": 0.4418, "step": 32497 }, { "epoch": 0.6892324659074038, "grad_norm": 0.3888954520225525, "learning_rate": 1.4709536642143125e-05, "loss": 0.4852, "step": 32498 }, { "epoch": 0.6892536743653369, "grad_norm": 0.39482587575912476, "learning_rate": 1.470924244397313e-05, "loss": 0.5484, "step": 32499 }, { "epoch": 0.6892748828232699, "grad_norm": 0.3740960955619812, "learning_rate": 1.470894824056556e-05, "loss": 0.5201, "step": 32500 }, { "epoch": 0.689296091281203, "grad_norm": 0.43683844804763794, "learning_rate": 1.4708654031920738e-05, "loss": 0.469, "step": 32501 }, { "epoch": 0.6893172997391359, "grad_norm": 0.4004722237586975, "learning_rate": 1.470835981803899e-05, "loss": 0.5565, "step": 32502 }, { "epoch": 0.689338508197069, "grad_norm": 0.39297041296958923, "learning_rate": 1.4708065598920644e-05, "loss": 0.5469, "step": 32503 }, { "epoch": 0.6893597166550021, "grad_norm": 0.383159875869751, "learning_rate": 1.4707771374566034e-05, "loss": 0.5008, "step": 32504 }, { "epoch": 0.689380925112935, "grad_norm": 0.40815311670303345, "learning_rate": 1.4707477144975477e-05, "loss": 0.6341, "step": 32505 }, { "epoch": 0.6894021335708681, "grad_norm": 0.34988316893577576, "learning_rate": 1.4707182910149307e-05, "loss": 0.4735, "step": 32506 }, { "epoch": 0.6894233420288011, "grad_norm": 0.38296112418174744, "learning_rate": 1.4706888670087844e-05, "loss": 0.53, "step": 32507 }, { "epoch": 0.6894445504867341, "grad_norm": 0.34607234597206116, "learning_rate": 1.4706594424791427e-05, "loss": 0.5092, "step": 32508 }, { "epoch": 0.6894657589446671, "grad_norm": 0.3640632927417755, "learning_rate": 1.4706300174260373e-05, "loss": 0.4609, "step": 32509 }, { "epoch": 0.6894869674026002, "grad_norm": 0.3342069387435913, "learning_rate": 1.4706005918495014e-05, "loss": 0.5162, "step": 32510 }, { "epoch": 0.6895081758605331, "grad_norm": 0.3940097391605377, "learning_rate": 1.4705711657495677e-05, "loss": 0.5127, "step": 32511 }, { "epoch": 0.6895293843184662, "grad_norm": 0.36225461959838867, "learning_rate": 1.4705417391262683e-05, "loss": 0.4961, "step": 32512 }, { "epoch": 0.6895505927763992, "grad_norm": 0.3917504847049713, "learning_rate": 1.4705123119796372e-05, "loss": 0.5852, "step": 32513 }, { "epoch": 0.6895718012343323, "grad_norm": 0.37473082542419434, "learning_rate": 1.4704828843097061e-05, "loss": 0.4765, "step": 32514 }, { "epoch": 0.6895930096922652, "grad_norm": 0.36490508913993835, "learning_rate": 1.4704534561165078e-05, "loss": 0.4916, "step": 32515 }, { "epoch": 0.6896142181501983, "grad_norm": 0.3549093008041382, "learning_rate": 1.4704240274000756e-05, "loss": 0.5329, "step": 32516 }, { "epoch": 0.6896354266081314, "grad_norm": 0.34996646642684937, "learning_rate": 1.4703945981604415e-05, "loss": 0.4813, "step": 32517 }, { "epoch": 0.6896566350660643, "grad_norm": 0.3521656095981598, "learning_rate": 1.470365168397639e-05, "loss": 0.4563, "step": 32518 }, { "epoch": 0.6896778435239974, "grad_norm": 0.3133228123188019, "learning_rate": 1.4703357381117002e-05, "loss": 0.4642, "step": 32519 }, { "epoch": 0.6896990519819304, "grad_norm": 0.3606390655040741, "learning_rate": 1.4703063073026582e-05, "loss": 0.4392, "step": 32520 }, { "epoch": 0.6897202604398635, "grad_norm": 0.3351198136806488, "learning_rate": 1.4702768759705455e-05, "loss": 0.4583, "step": 32521 }, { "epoch": 0.6897414688977964, "grad_norm": 0.3547264337539673, "learning_rate": 1.4702474441153952e-05, "loss": 0.4876, "step": 32522 }, { "epoch": 0.6897626773557295, "grad_norm": 0.32826530933380127, "learning_rate": 1.4702180117372393e-05, "loss": 0.4809, "step": 32523 }, { "epoch": 0.6897838858136625, "grad_norm": 0.4013896882534027, "learning_rate": 1.4701885788361114e-05, "loss": 0.5238, "step": 32524 }, { "epoch": 0.6898050942715955, "grad_norm": 0.3574635982513428, "learning_rate": 1.4701591454120438e-05, "loss": 0.4679, "step": 32525 }, { "epoch": 0.6898263027295285, "grad_norm": 0.38653916120529175, "learning_rate": 1.4701297114650693e-05, "loss": 0.5532, "step": 32526 }, { "epoch": 0.6898475111874616, "grad_norm": 0.32303574681282043, "learning_rate": 1.4701002769952205e-05, "loss": 0.5393, "step": 32527 }, { "epoch": 0.6898687196453945, "grad_norm": 0.6699938774108887, "learning_rate": 1.4700708420025302e-05, "loss": 0.4721, "step": 32528 }, { "epoch": 0.6898899281033276, "grad_norm": 0.44953811168670654, "learning_rate": 1.4700414064870314e-05, "loss": 0.4323, "step": 32529 }, { "epoch": 0.6899111365612607, "grad_norm": 0.365923672914505, "learning_rate": 1.4700119704487565e-05, "loss": 0.4535, "step": 32530 }, { "epoch": 0.6899323450191936, "grad_norm": 0.34460970759391785, "learning_rate": 1.4699825338877385e-05, "loss": 0.4333, "step": 32531 }, { "epoch": 0.6899535534771267, "grad_norm": 0.43801212310791016, "learning_rate": 1.4699530968040097e-05, "loss": 0.5907, "step": 32532 }, { "epoch": 0.6899747619350597, "grad_norm": 0.345610111951828, "learning_rate": 1.4699236591976034e-05, "loss": 0.488, "step": 32533 }, { "epoch": 0.6899959703929928, "grad_norm": 0.3215123414993286, "learning_rate": 1.4698942210685523e-05, "loss": 0.4736, "step": 32534 }, { "epoch": 0.6900171788509257, "grad_norm": 0.3557623624801636, "learning_rate": 1.4698647824168886e-05, "loss": 0.4688, "step": 32535 }, { "epoch": 0.6900383873088588, "grad_norm": 0.3558683693408966, "learning_rate": 1.4698353432426455e-05, "loss": 0.4404, "step": 32536 }, { "epoch": 0.6900595957667918, "grad_norm": 0.40502649545669556, "learning_rate": 1.4698059035458555e-05, "loss": 0.4865, "step": 32537 }, { "epoch": 0.6900808042247248, "grad_norm": 0.32705897092819214, "learning_rate": 1.4697764633265514e-05, "loss": 0.4955, "step": 32538 }, { "epoch": 0.6901020126826578, "grad_norm": 0.40877565741539, "learning_rate": 1.4697470225847662e-05, "loss": 0.5183, "step": 32539 }, { "epoch": 0.6901232211405909, "grad_norm": 0.34407660365104675, "learning_rate": 1.4697175813205322e-05, "loss": 0.4142, "step": 32540 }, { "epoch": 0.6901444295985238, "grad_norm": 0.37158918380737305, "learning_rate": 1.4696881395338826e-05, "loss": 0.5234, "step": 32541 }, { "epoch": 0.6901656380564569, "grad_norm": 0.35666602849960327, "learning_rate": 1.46965869722485e-05, "loss": 0.4668, "step": 32542 }, { "epoch": 0.6901868465143899, "grad_norm": 0.34205466508865356, "learning_rate": 1.4696292543934669e-05, "loss": 0.4309, "step": 32543 }, { "epoch": 0.690208054972323, "grad_norm": 0.35292938351631165, "learning_rate": 1.4695998110397662e-05, "loss": 0.4506, "step": 32544 }, { "epoch": 0.690229263430256, "grad_norm": 0.3808184862136841, "learning_rate": 1.4695703671637808e-05, "loss": 0.4733, "step": 32545 }, { "epoch": 0.690250471888189, "grad_norm": 0.42602813243865967, "learning_rate": 1.469540922765543e-05, "loss": 0.5469, "step": 32546 }, { "epoch": 0.6902716803461221, "grad_norm": 0.3304305672645569, "learning_rate": 1.4695114778450866e-05, "loss": 0.4921, "step": 32547 }, { "epoch": 0.690292888804055, "grad_norm": 0.38618576526641846, "learning_rate": 1.4694820324024431e-05, "loss": 0.4791, "step": 32548 }, { "epoch": 0.6903140972619881, "grad_norm": 0.3690682053565979, "learning_rate": 1.4694525864376456e-05, "loss": 0.5568, "step": 32549 }, { "epoch": 0.6903353057199211, "grad_norm": 0.34116697311401367, "learning_rate": 1.4694231399507274e-05, "loss": 0.5057, "step": 32550 }, { "epoch": 0.6903565141778542, "grad_norm": 0.5277573466300964, "learning_rate": 1.4693936929417205e-05, "loss": 0.4511, "step": 32551 }, { "epoch": 0.6903777226357871, "grad_norm": 0.3621184229850769, "learning_rate": 1.4693642454106583e-05, "loss": 0.4794, "step": 32552 }, { "epoch": 0.6903989310937202, "grad_norm": 0.37682539224624634, "learning_rate": 1.4693347973575732e-05, "loss": 0.4966, "step": 32553 }, { "epoch": 0.6904201395516532, "grad_norm": 0.36410656571388245, "learning_rate": 1.4693053487824978e-05, "loss": 0.4919, "step": 32554 }, { "epoch": 0.6904413480095862, "grad_norm": 0.3546227216720581, "learning_rate": 1.4692758996854652e-05, "loss": 0.4531, "step": 32555 }, { "epoch": 0.6904625564675192, "grad_norm": 0.5767495632171631, "learning_rate": 1.4692464500665082e-05, "loss": 0.4961, "step": 32556 }, { "epoch": 0.6904837649254523, "grad_norm": 0.48112359642982483, "learning_rate": 1.4692169999256589e-05, "loss": 0.4871, "step": 32557 }, { "epoch": 0.6905049733833853, "grad_norm": 0.3419763743877411, "learning_rate": 1.4691875492629511e-05, "loss": 0.482, "step": 32558 }, { "epoch": 0.6905261818413183, "grad_norm": 0.3504563570022583, "learning_rate": 1.4691580980784168e-05, "loss": 0.5086, "step": 32559 }, { "epoch": 0.6905473902992514, "grad_norm": 0.40786826610565186, "learning_rate": 1.4691286463720889e-05, "loss": 0.5289, "step": 32560 }, { "epoch": 0.6905685987571843, "grad_norm": 0.3370668590068817, "learning_rate": 1.469099194144e-05, "loss": 0.5036, "step": 32561 }, { "epoch": 0.6905898072151174, "grad_norm": 0.36520570516586304, "learning_rate": 1.4690697413941835e-05, "loss": 0.4287, "step": 32562 }, { "epoch": 0.6906110156730504, "grad_norm": 0.3809424042701721, "learning_rate": 1.4690402881226713e-05, "loss": 0.5101, "step": 32563 }, { "epoch": 0.6906322241309835, "grad_norm": 0.37754544615745544, "learning_rate": 1.4690108343294966e-05, "loss": 0.47, "step": 32564 }, { "epoch": 0.6906534325889164, "grad_norm": 0.39914774894714355, "learning_rate": 1.4689813800146924e-05, "loss": 0.4621, "step": 32565 }, { "epoch": 0.6906746410468495, "grad_norm": 0.38662049174308777, "learning_rate": 1.468951925178291e-05, "loss": 0.5435, "step": 32566 }, { "epoch": 0.6906958495047825, "grad_norm": 0.33615851402282715, "learning_rate": 1.4689224698203254e-05, "loss": 0.5628, "step": 32567 }, { "epoch": 0.6907170579627155, "grad_norm": 0.4568046033382416, "learning_rate": 1.468893013940828e-05, "loss": 0.5959, "step": 32568 }, { "epoch": 0.6907382664206485, "grad_norm": 0.36208435893058777, "learning_rate": 1.4688635575398322e-05, "loss": 0.4049, "step": 32569 }, { "epoch": 0.6907594748785816, "grad_norm": 0.4305209517478943, "learning_rate": 1.4688341006173703e-05, "loss": 0.5124, "step": 32570 }, { "epoch": 0.6907806833365147, "grad_norm": 0.36692261695861816, "learning_rate": 1.468804643173475e-05, "loss": 0.4765, "step": 32571 }, { "epoch": 0.6908018917944476, "grad_norm": 0.31503626704216003, "learning_rate": 1.4687751852081797e-05, "loss": 0.5144, "step": 32572 }, { "epoch": 0.6908231002523807, "grad_norm": 0.34662169218063354, "learning_rate": 1.4687457267215166e-05, "loss": 0.4746, "step": 32573 }, { "epoch": 0.6908443087103137, "grad_norm": 0.3448082208633423, "learning_rate": 1.4687162677135182e-05, "loss": 0.4951, "step": 32574 }, { "epoch": 0.6908655171682467, "grad_norm": 0.3342532217502594, "learning_rate": 1.4686868081842183e-05, "loss": 0.5985, "step": 32575 }, { "epoch": 0.6908867256261797, "grad_norm": 0.3658577501773834, "learning_rate": 1.4686573481336483e-05, "loss": 0.5086, "step": 32576 }, { "epoch": 0.6909079340841128, "grad_norm": 0.3523047864437103, "learning_rate": 1.468627887561842e-05, "loss": 0.5072, "step": 32577 }, { "epoch": 0.6909291425420457, "grad_norm": 0.4922981560230255, "learning_rate": 1.4685984264688317e-05, "loss": 0.4651, "step": 32578 }, { "epoch": 0.6909503509999788, "grad_norm": 0.36426427960395813, "learning_rate": 1.4685689648546506e-05, "loss": 0.4955, "step": 32579 }, { "epoch": 0.6909715594579118, "grad_norm": 0.3773086667060852, "learning_rate": 1.468539502719331e-05, "loss": 0.5667, "step": 32580 }, { "epoch": 0.6909927679158449, "grad_norm": 0.3507075309753418, "learning_rate": 1.4685100400629058e-05, "loss": 0.5021, "step": 32581 }, { "epoch": 0.6910139763737778, "grad_norm": 0.3378932774066925, "learning_rate": 1.4684805768854079e-05, "loss": 0.4978, "step": 32582 }, { "epoch": 0.6910351848317109, "grad_norm": 0.3525351881980896, "learning_rate": 1.4684511131868698e-05, "loss": 0.4645, "step": 32583 }, { "epoch": 0.6910563932896439, "grad_norm": 0.3481104075908661, "learning_rate": 1.4684216489673245e-05, "loss": 0.5015, "step": 32584 }, { "epoch": 0.6910776017475769, "grad_norm": 0.33759862184524536, "learning_rate": 1.4683921842268047e-05, "loss": 0.489, "step": 32585 }, { "epoch": 0.69109881020551, "grad_norm": 0.3509026765823364, "learning_rate": 1.4683627189653433e-05, "loss": 0.535, "step": 32586 }, { "epoch": 0.691120018663443, "grad_norm": 0.34425294399261475, "learning_rate": 1.4683332531829729e-05, "loss": 0.4875, "step": 32587 }, { "epoch": 0.691141227121376, "grad_norm": 0.32113391160964966, "learning_rate": 1.468303786879726e-05, "loss": 0.498, "step": 32588 }, { "epoch": 0.691162435579309, "grad_norm": 0.3651488423347473, "learning_rate": 1.468274320055636e-05, "loss": 0.5448, "step": 32589 }, { "epoch": 0.6911836440372421, "grad_norm": 0.37602367997169495, "learning_rate": 1.4682448527107356e-05, "loss": 0.477, "step": 32590 }, { "epoch": 0.691204852495175, "grad_norm": 0.6053308844566345, "learning_rate": 1.4682153848450572e-05, "loss": 0.4404, "step": 32591 }, { "epoch": 0.6912260609531081, "grad_norm": 0.37023359537124634, "learning_rate": 1.4681859164586337e-05, "loss": 0.4738, "step": 32592 }, { "epoch": 0.6912472694110411, "grad_norm": 0.31409162282943726, "learning_rate": 1.4681564475514978e-05, "loss": 0.4873, "step": 32593 }, { "epoch": 0.6912684778689742, "grad_norm": 0.3376099169254303, "learning_rate": 1.4681269781236822e-05, "loss": 0.4873, "step": 32594 }, { "epoch": 0.6912896863269071, "grad_norm": 0.41806352138519287, "learning_rate": 1.46809750817522e-05, "loss": 0.4377, "step": 32595 }, { "epoch": 0.6913108947848402, "grad_norm": 0.33388206362724304, "learning_rate": 1.4680680377061438e-05, "loss": 0.5119, "step": 32596 }, { "epoch": 0.6913321032427732, "grad_norm": 0.35566791892051697, "learning_rate": 1.4680385667164862e-05, "loss": 0.4706, "step": 32597 }, { "epoch": 0.6913533117007062, "grad_norm": 0.39352551102638245, "learning_rate": 1.4680090952062806e-05, "loss": 0.5616, "step": 32598 }, { "epoch": 0.6913745201586393, "grad_norm": 0.35407590866088867, "learning_rate": 1.4679796231755592e-05, "loss": 0.486, "step": 32599 }, { "epoch": 0.6913957286165723, "grad_norm": 0.37036705017089844, "learning_rate": 1.4679501506243549e-05, "loss": 0.5808, "step": 32600 }, { "epoch": 0.6914169370745054, "grad_norm": 0.3493553102016449, "learning_rate": 1.4679206775527007e-05, "loss": 0.4016, "step": 32601 }, { "epoch": 0.6914381455324383, "grad_norm": 0.3851490020751953, "learning_rate": 1.4678912039606287e-05, "loss": 0.5097, "step": 32602 }, { "epoch": 0.6914593539903714, "grad_norm": 0.3272537887096405, "learning_rate": 1.4678617298481724e-05, "loss": 0.4599, "step": 32603 }, { "epoch": 0.6914805624483044, "grad_norm": 0.3606945276260376, "learning_rate": 1.4678322552153646e-05, "loss": 0.492, "step": 32604 }, { "epoch": 0.6915017709062374, "grad_norm": 0.3313402831554413, "learning_rate": 1.4678027800622374e-05, "loss": 0.5301, "step": 32605 }, { "epoch": 0.6915229793641704, "grad_norm": 0.3505159318447113, "learning_rate": 1.4677733043888245e-05, "loss": 0.5148, "step": 32606 }, { "epoch": 0.6915441878221035, "grad_norm": 0.3403244912624359, "learning_rate": 1.467743828195158e-05, "loss": 0.5148, "step": 32607 }, { "epoch": 0.6915653962800364, "grad_norm": 0.3389135003089905, "learning_rate": 1.4677143514812707e-05, "loss": 0.4642, "step": 32608 }, { "epoch": 0.6915866047379695, "grad_norm": 0.3519229590892792, "learning_rate": 1.4676848742471954e-05, "loss": 0.5047, "step": 32609 }, { "epoch": 0.6916078131959025, "grad_norm": 0.34505385160446167, "learning_rate": 1.4676553964929656e-05, "loss": 0.552, "step": 32610 }, { "epoch": 0.6916290216538356, "grad_norm": 0.3218981623649597, "learning_rate": 1.4676259182186133e-05, "loss": 0.4128, "step": 32611 }, { "epoch": 0.6916502301117686, "grad_norm": 0.34026002883911133, "learning_rate": 1.4675964394241714e-05, "loss": 0.5093, "step": 32612 }, { "epoch": 0.6916714385697016, "grad_norm": 0.3669695556163788, "learning_rate": 1.4675669601096727e-05, "loss": 0.4838, "step": 32613 }, { "epoch": 0.6916926470276347, "grad_norm": 0.37983471155166626, "learning_rate": 1.4675374802751504e-05, "loss": 0.4445, "step": 32614 }, { "epoch": 0.6917138554855676, "grad_norm": 0.36724239587783813, "learning_rate": 1.4675079999206367e-05, "loss": 0.5176, "step": 32615 }, { "epoch": 0.6917350639435007, "grad_norm": 0.4204777181148529, "learning_rate": 1.467478519046165e-05, "loss": 0.4975, "step": 32616 }, { "epoch": 0.6917562724014337, "grad_norm": 0.3722306191921234, "learning_rate": 1.4674490376517674e-05, "loss": 0.4721, "step": 32617 }, { "epoch": 0.6917774808593667, "grad_norm": 0.3013908267021179, "learning_rate": 1.4674195557374773e-05, "loss": 0.4418, "step": 32618 }, { "epoch": 0.6917986893172997, "grad_norm": 0.3834690451622009, "learning_rate": 1.467390073303327e-05, "loss": 0.5365, "step": 32619 }, { "epoch": 0.6918198977752328, "grad_norm": 0.37669530510902405, "learning_rate": 1.4673605903493497e-05, "loss": 0.5096, "step": 32620 }, { "epoch": 0.6918411062331657, "grad_norm": 0.38580721616744995, "learning_rate": 1.4673311068755778e-05, "loss": 0.4948, "step": 32621 }, { "epoch": 0.6918623146910988, "grad_norm": 0.4965929687023163, "learning_rate": 1.4673016228820442e-05, "loss": 0.4691, "step": 32622 }, { "epoch": 0.6918835231490318, "grad_norm": 2.4165000915527344, "learning_rate": 1.4672721383687822e-05, "loss": 0.53, "step": 32623 }, { "epoch": 0.6919047316069649, "grad_norm": 0.3862788677215576, "learning_rate": 1.4672426533358244e-05, "loss": 0.4358, "step": 32624 }, { "epoch": 0.6919259400648978, "grad_norm": 0.40373942255973816, "learning_rate": 1.4672131677832027e-05, "loss": 0.5092, "step": 32625 }, { "epoch": 0.6919471485228309, "grad_norm": 0.32172468304634094, "learning_rate": 1.467183681710951e-05, "loss": 0.4705, "step": 32626 }, { "epoch": 0.691968356980764, "grad_norm": 0.3376171588897705, "learning_rate": 1.4671541951191016e-05, "loss": 0.4336, "step": 32627 }, { "epoch": 0.6919895654386969, "grad_norm": 0.3890398442745209, "learning_rate": 1.4671247080076872e-05, "loss": 0.4887, "step": 32628 }, { "epoch": 0.69201077389663, "grad_norm": 0.5574392080307007, "learning_rate": 1.467095220376741e-05, "loss": 0.5653, "step": 32629 }, { "epoch": 0.692031982354563, "grad_norm": 0.3681469261646271, "learning_rate": 1.4670657322262955e-05, "loss": 0.5535, "step": 32630 }, { "epoch": 0.692053190812496, "grad_norm": 0.3629341721534729, "learning_rate": 1.4670362435563834e-05, "loss": 0.4721, "step": 32631 }, { "epoch": 0.692074399270429, "grad_norm": 0.38240310549736023, "learning_rate": 1.467006754367038e-05, "loss": 0.5163, "step": 32632 }, { "epoch": 0.6920956077283621, "grad_norm": 0.41287553310394287, "learning_rate": 1.4669772646582912e-05, "loss": 0.4011, "step": 32633 }, { "epoch": 0.6921168161862951, "grad_norm": 0.35903018712997437, "learning_rate": 1.4669477744301764e-05, "loss": 0.4971, "step": 32634 }, { "epoch": 0.6921380246442281, "grad_norm": 0.3687216341495514, "learning_rate": 1.4669182836827267e-05, "loss": 0.4474, "step": 32635 }, { "epoch": 0.6921592331021611, "grad_norm": 0.41325825452804565, "learning_rate": 1.4668887924159743e-05, "loss": 0.5371, "step": 32636 }, { "epoch": 0.6921804415600942, "grad_norm": 0.3768935203552246, "learning_rate": 1.4668593006299526e-05, "loss": 0.554, "step": 32637 }, { "epoch": 0.6922016500180271, "grad_norm": 0.36810749769210815, "learning_rate": 1.4668298083246936e-05, "loss": 0.565, "step": 32638 }, { "epoch": 0.6922228584759602, "grad_norm": 0.3604728579521179, "learning_rate": 1.4668003155002305e-05, "loss": 0.5333, "step": 32639 }, { "epoch": 0.6922440669338933, "grad_norm": 0.3452613651752472, "learning_rate": 1.4667708221565966e-05, "loss": 0.5276, "step": 32640 }, { "epoch": 0.6922652753918263, "grad_norm": 0.3853418231010437, "learning_rate": 1.466741328293824e-05, "loss": 0.5298, "step": 32641 }, { "epoch": 0.6922864838497593, "grad_norm": 0.35117268562316895, "learning_rate": 1.4667118339119457e-05, "loss": 0.5942, "step": 32642 }, { "epoch": 0.6923076923076923, "grad_norm": 0.3096008896827698, "learning_rate": 1.4666823390109946e-05, "loss": 0.3936, "step": 32643 }, { "epoch": 0.6923289007656254, "grad_norm": 0.4844762086868286, "learning_rate": 1.4666528435910037e-05, "loss": 0.4761, "step": 32644 }, { "epoch": 0.6923501092235583, "grad_norm": 1.142741084098816, "learning_rate": 1.466623347652005e-05, "loss": 0.5671, "step": 32645 }, { "epoch": 0.6923713176814914, "grad_norm": 0.35146334767341614, "learning_rate": 1.4665938511940325e-05, "loss": 0.4524, "step": 32646 }, { "epoch": 0.6923925261394244, "grad_norm": 0.355157732963562, "learning_rate": 1.466564354217118e-05, "loss": 0.5052, "step": 32647 }, { "epoch": 0.6924137345973574, "grad_norm": 0.5005481839179993, "learning_rate": 1.4665348567212946e-05, "loss": 0.4202, "step": 32648 }, { "epoch": 0.6924349430552904, "grad_norm": 0.38728538155555725, "learning_rate": 1.4665053587065953e-05, "loss": 0.5761, "step": 32649 }, { "epoch": 0.6924561515132235, "grad_norm": 0.337051123380661, "learning_rate": 1.466475860173053e-05, "loss": 0.4576, "step": 32650 }, { "epoch": 0.6924773599711564, "grad_norm": 0.37152162194252014, "learning_rate": 1.4664463611207e-05, "loss": 0.4573, "step": 32651 }, { "epoch": 0.6924985684290895, "grad_norm": 0.34880053997039795, "learning_rate": 1.4664168615495697e-05, "loss": 0.4678, "step": 32652 }, { "epoch": 0.6925197768870226, "grad_norm": 0.3697662949562073, "learning_rate": 1.4663873614596942e-05, "loss": 0.4707, "step": 32653 }, { "epoch": 0.6925409853449556, "grad_norm": 0.36893266439437866, "learning_rate": 1.4663578608511071e-05, "loss": 0.4476, "step": 32654 }, { "epoch": 0.6925621938028886, "grad_norm": 0.3505067527294159, "learning_rate": 1.4663283597238407e-05, "loss": 0.4624, "step": 32655 }, { "epoch": 0.6925834022608216, "grad_norm": 0.36436864733695984, "learning_rate": 1.4662988580779281e-05, "loss": 0.4813, "step": 32656 }, { "epoch": 0.6926046107187547, "grad_norm": 0.36643609404563904, "learning_rate": 1.4662693559134018e-05, "loss": 0.5672, "step": 32657 }, { "epoch": 0.6926258191766876, "grad_norm": 0.33757686614990234, "learning_rate": 1.4662398532302948e-05, "loss": 0.4713, "step": 32658 }, { "epoch": 0.6926470276346207, "grad_norm": 0.3380388915538788, "learning_rate": 1.4662103500286398e-05, "loss": 0.5594, "step": 32659 }, { "epoch": 0.6926682360925537, "grad_norm": 0.3619435429573059, "learning_rate": 1.4661808463084697e-05, "loss": 0.4945, "step": 32660 }, { "epoch": 0.6926894445504868, "grad_norm": 0.33777090907096863, "learning_rate": 1.4661513420698174e-05, "loss": 0.4731, "step": 32661 }, { "epoch": 0.6927106530084197, "grad_norm": 0.34969526529312134, "learning_rate": 1.4661218373127157e-05, "loss": 0.481, "step": 32662 }, { "epoch": 0.6927318614663528, "grad_norm": 0.35557717084884644, "learning_rate": 1.4660923320371974e-05, "loss": 0.4877, "step": 32663 }, { "epoch": 0.6927530699242858, "grad_norm": 0.38346827030181885, "learning_rate": 1.466062826243295e-05, "loss": 0.473, "step": 32664 }, { "epoch": 0.6927742783822188, "grad_norm": 0.43262413144111633, "learning_rate": 1.4660333199310416e-05, "loss": 0.4596, "step": 32665 }, { "epoch": 0.6927954868401518, "grad_norm": 0.34630143642425537, "learning_rate": 1.4660038131004702e-05, "loss": 0.4684, "step": 32666 }, { "epoch": 0.6928166952980849, "grad_norm": 0.33351802825927734, "learning_rate": 1.4659743057516131e-05, "loss": 0.4408, "step": 32667 }, { "epoch": 0.692837903756018, "grad_norm": 0.36294904351234436, "learning_rate": 1.4659447978845039e-05, "loss": 0.4435, "step": 32668 }, { "epoch": 0.6928591122139509, "grad_norm": 0.40087953209877014, "learning_rate": 1.4659152894991747e-05, "loss": 0.5073, "step": 32669 }, { "epoch": 0.692880320671884, "grad_norm": 0.34170001745224, "learning_rate": 1.4658857805956583e-05, "loss": 0.4691, "step": 32670 }, { "epoch": 0.692901529129817, "grad_norm": 0.6631348729133606, "learning_rate": 1.4658562711739883e-05, "loss": 0.5294, "step": 32671 }, { "epoch": 0.69292273758775, "grad_norm": 0.43113237619400024, "learning_rate": 1.4658267612341967e-05, "loss": 0.522, "step": 32672 }, { "epoch": 0.692943946045683, "grad_norm": 0.8904686570167542, "learning_rate": 1.4657972507763163e-05, "loss": 0.5165, "step": 32673 }, { "epoch": 0.6929651545036161, "grad_norm": 0.38581690192222595, "learning_rate": 1.4657677398003806e-05, "loss": 0.5501, "step": 32674 }, { "epoch": 0.692986362961549, "grad_norm": 0.40184536576271057, "learning_rate": 1.4657382283064224e-05, "loss": 0.5506, "step": 32675 }, { "epoch": 0.6930075714194821, "grad_norm": 0.38537076115608215, "learning_rate": 1.4657087162944736e-05, "loss": 0.5075, "step": 32676 }, { "epoch": 0.6930287798774151, "grad_norm": 0.3539574444293976, "learning_rate": 1.465679203764568e-05, "loss": 0.4908, "step": 32677 }, { "epoch": 0.6930499883353481, "grad_norm": 0.3945372700691223, "learning_rate": 1.4656496907167377e-05, "loss": 0.5059, "step": 32678 }, { "epoch": 0.6930711967932811, "grad_norm": 0.36401990056037903, "learning_rate": 1.465620177151016e-05, "loss": 0.4884, "step": 32679 }, { "epoch": 0.6930924052512142, "grad_norm": 0.4035881459712982, "learning_rate": 1.4655906630674357e-05, "loss": 0.5132, "step": 32680 }, { "epoch": 0.6931136137091473, "grad_norm": 0.9787473082542419, "learning_rate": 1.4655611484660293e-05, "loss": 0.5234, "step": 32681 }, { "epoch": 0.6931348221670802, "grad_norm": 0.35062265396118164, "learning_rate": 1.4655316333468302e-05, "loss": 0.4898, "step": 32682 }, { "epoch": 0.6931560306250133, "grad_norm": 0.33409544825553894, "learning_rate": 1.4655021177098708e-05, "loss": 0.5625, "step": 32683 }, { "epoch": 0.6931772390829463, "grad_norm": 0.3625263571739197, "learning_rate": 1.4654726015551835e-05, "loss": 0.5148, "step": 32684 }, { "epoch": 0.6931984475408793, "grad_norm": 0.41066670417785645, "learning_rate": 1.4654430848828019e-05, "loss": 0.4354, "step": 32685 }, { "epoch": 0.6932196559988123, "grad_norm": 0.32381850481033325, "learning_rate": 1.4654135676927586e-05, "loss": 0.4804, "step": 32686 }, { "epoch": 0.6932408644567454, "grad_norm": 0.3594454526901245, "learning_rate": 1.4653840499850862e-05, "loss": 0.51, "step": 32687 }, { "epoch": 0.6932620729146783, "grad_norm": 0.35350674390792847, "learning_rate": 1.4653545317598181e-05, "loss": 0.4772, "step": 32688 }, { "epoch": 0.6932832813726114, "grad_norm": 0.380635142326355, "learning_rate": 1.4653250130169864e-05, "loss": 0.5021, "step": 32689 }, { "epoch": 0.6933044898305444, "grad_norm": 0.3939129412174225, "learning_rate": 1.4652954937566244e-05, "loss": 0.5042, "step": 32690 }, { "epoch": 0.6933256982884775, "grad_norm": 0.342207670211792, "learning_rate": 1.4652659739787648e-05, "loss": 0.4663, "step": 32691 }, { "epoch": 0.6933469067464104, "grad_norm": 0.49445435404777527, "learning_rate": 1.4652364536834402e-05, "loss": 0.4633, "step": 32692 }, { "epoch": 0.6933681152043435, "grad_norm": 0.4222080409526825, "learning_rate": 1.4652069328706838e-05, "loss": 0.5378, "step": 32693 }, { "epoch": 0.6933893236622766, "grad_norm": 0.3616659343242645, "learning_rate": 1.4651774115405284e-05, "loss": 0.5054, "step": 32694 }, { "epoch": 0.6934105321202095, "grad_norm": 0.3587528169155121, "learning_rate": 1.4651478896930064e-05, "loss": 0.5551, "step": 32695 }, { "epoch": 0.6934317405781426, "grad_norm": 0.3952985107898712, "learning_rate": 1.4651183673281516e-05, "loss": 0.5162, "step": 32696 }, { "epoch": 0.6934529490360756, "grad_norm": 0.3392181694507599, "learning_rate": 1.4650888444459956e-05, "loss": 0.4875, "step": 32697 }, { "epoch": 0.6934741574940086, "grad_norm": 0.4938915967941284, "learning_rate": 1.465059321046572e-05, "loss": 0.3995, "step": 32698 }, { "epoch": 0.6934953659519416, "grad_norm": 0.3528556823730469, "learning_rate": 1.4650297971299134e-05, "loss": 0.5349, "step": 32699 }, { "epoch": 0.6935165744098747, "grad_norm": 0.3639976382255554, "learning_rate": 1.4650002726960529e-05, "loss": 0.5544, "step": 32700 }, { "epoch": 0.6935377828678077, "grad_norm": 0.3584044575691223, "learning_rate": 1.464970747745023e-05, "loss": 0.4602, "step": 32701 }, { "epoch": 0.6935589913257407, "grad_norm": 0.34057652950286865, "learning_rate": 1.4649412222768566e-05, "loss": 0.4724, "step": 32702 }, { "epoch": 0.6935801997836737, "grad_norm": 0.5644245743751526, "learning_rate": 1.4649116962915871e-05, "loss": 0.5045, "step": 32703 }, { "epoch": 0.6936014082416068, "grad_norm": 0.3539164662361145, "learning_rate": 1.4648821697892462e-05, "loss": 0.5013, "step": 32704 }, { "epoch": 0.6936226166995397, "grad_norm": 0.34588584303855896, "learning_rate": 1.4648526427698675e-05, "loss": 0.4709, "step": 32705 }, { "epoch": 0.6936438251574728, "grad_norm": 0.33599525690078735, "learning_rate": 1.464823115233484e-05, "loss": 0.4757, "step": 32706 }, { "epoch": 0.6936650336154059, "grad_norm": 0.36991122364997864, "learning_rate": 1.464793587180128e-05, "loss": 0.5936, "step": 32707 }, { "epoch": 0.6936862420733388, "grad_norm": 0.37096187472343445, "learning_rate": 1.4647640586098328e-05, "loss": 0.5002, "step": 32708 }, { "epoch": 0.6937074505312719, "grad_norm": 0.6538639068603516, "learning_rate": 1.464734529522631e-05, "loss": 0.5144, "step": 32709 }, { "epoch": 0.6937286589892049, "grad_norm": 0.34253445267677307, "learning_rate": 1.4647049999185556e-05, "loss": 0.4717, "step": 32710 }, { "epoch": 0.693749867447138, "grad_norm": 0.3719275891780853, "learning_rate": 1.4646754697976391e-05, "loss": 0.5234, "step": 32711 }, { "epoch": 0.6937710759050709, "grad_norm": 0.425513356924057, "learning_rate": 1.4646459391599148e-05, "loss": 0.5052, "step": 32712 }, { "epoch": 0.693792284363004, "grad_norm": 0.3219747543334961, "learning_rate": 1.4646164080054153e-05, "loss": 0.4449, "step": 32713 }, { "epoch": 0.693813492820937, "grad_norm": 0.38481152057647705, "learning_rate": 1.4645868763341732e-05, "loss": 0.5289, "step": 32714 }, { "epoch": 0.69383470127887, "grad_norm": 0.35684552788734436, "learning_rate": 1.464557344146222e-05, "loss": 0.4498, "step": 32715 }, { "epoch": 0.693855909736803, "grad_norm": 0.36575525999069214, "learning_rate": 1.464527811441594e-05, "loss": 0.4882, "step": 32716 }, { "epoch": 0.6938771181947361, "grad_norm": 0.4194623529911041, "learning_rate": 1.4644982782203223e-05, "loss": 0.5594, "step": 32717 }, { "epoch": 0.693898326652669, "grad_norm": 0.3275359570980072, "learning_rate": 1.464468744482439e-05, "loss": 0.4881, "step": 32718 }, { "epoch": 0.6939195351106021, "grad_norm": 0.37489867210388184, "learning_rate": 1.4644392102279784e-05, "loss": 0.5498, "step": 32719 }, { "epoch": 0.6939407435685351, "grad_norm": 0.4703345000743866, "learning_rate": 1.4644096754569723e-05, "loss": 0.4251, "step": 32720 }, { "epoch": 0.6939619520264682, "grad_norm": 0.371947705745697, "learning_rate": 1.4643801401694538e-05, "loss": 0.458, "step": 32721 }, { "epoch": 0.6939831604844012, "grad_norm": 0.37921831011772156, "learning_rate": 1.4643506043654556e-05, "loss": 0.4338, "step": 32722 }, { "epoch": 0.6940043689423342, "grad_norm": 0.34813761711120605, "learning_rate": 1.4643210680450108e-05, "loss": 0.4517, "step": 32723 }, { "epoch": 0.6940255774002673, "grad_norm": 0.3573903441429138, "learning_rate": 1.464291531208152e-05, "loss": 0.5228, "step": 32724 }, { "epoch": 0.6940467858582002, "grad_norm": 0.3961842656135559, "learning_rate": 1.4642619938549122e-05, "loss": 0.4971, "step": 32725 }, { "epoch": 0.6940679943161333, "grad_norm": 0.3163754343986511, "learning_rate": 1.4642324559853248e-05, "loss": 0.4848, "step": 32726 }, { "epoch": 0.6940892027740663, "grad_norm": 0.39399203658103943, "learning_rate": 1.4642029175994214e-05, "loss": 0.5138, "step": 32727 }, { "epoch": 0.6941104112319993, "grad_norm": 0.3762657046318054, "learning_rate": 1.4641733786972361e-05, "loss": 0.4958, "step": 32728 }, { "epoch": 0.6941316196899323, "grad_norm": 0.37032002210617065, "learning_rate": 1.4641438392788008e-05, "loss": 0.456, "step": 32729 }, { "epoch": 0.6941528281478654, "grad_norm": 0.335348904132843, "learning_rate": 1.4641142993441485e-05, "loss": 0.5133, "step": 32730 }, { "epoch": 0.6941740366057984, "grad_norm": 0.3533836603164673, "learning_rate": 1.4640847588933129e-05, "loss": 0.4644, "step": 32731 }, { "epoch": 0.6941952450637314, "grad_norm": 0.3965209126472473, "learning_rate": 1.4640552179263262e-05, "loss": 0.427, "step": 32732 }, { "epoch": 0.6942164535216644, "grad_norm": 0.5110408067703247, "learning_rate": 1.4640256764432211e-05, "loss": 0.5007, "step": 32733 }, { "epoch": 0.6942376619795975, "grad_norm": 0.36957991123199463, "learning_rate": 1.4639961344440307e-05, "loss": 0.5039, "step": 32734 }, { "epoch": 0.6942588704375305, "grad_norm": 0.3539941608905792, "learning_rate": 1.4639665919287877e-05, "loss": 0.5401, "step": 32735 }, { "epoch": 0.6942800788954635, "grad_norm": 0.367084801197052, "learning_rate": 1.4639370488975255e-05, "loss": 0.4422, "step": 32736 }, { "epoch": 0.6943012873533966, "grad_norm": 0.3550318777561188, "learning_rate": 1.4639075053502763e-05, "loss": 0.4714, "step": 32737 }, { "epoch": 0.6943224958113295, "grad_norm": 0.3640097677707672, "learning_rate": 1.4638779612870728e-05, "loss": 0.4651, "step": 32738 }, { "epoch": 0.6943437042692626, "grad_norm": 0.38115596771240234, "learning_rate": 1.463848416707949e-05, "loss": 0.5371, "step": 32739 }, { "epoch": 0.6943649127271956, "grad_norm": 0.37615567445755005, "learning_rate": 1.4638188716129367e-05, "loss": 0.5352, "step": 32740 }, { "epoch": 0.6943861211851287, "grad_norm": 0.3395233154296875, "learning_rate": 1.4637893260020689e-05, "loss": 0.4866, "step": 32741 }, { "epoch": 0.6944073296430616, "grad_norm": 0.3692803382873535, "learning_rate": 1.4637597798753789e-05, "loss": 0.4891, "step": 32742 }, { "epoch": 0.6944285381009947, "grad_norm": 0.3075975179672241, "learning_rate": 1.4637302332328991e-05, "loss": 0.408, "step": 32743 }, { "epoch": 0.6944497465589277, "grad_norm": 0.35657161474227905, "learning_rate": 1.4637006860746626e-05, "loss": 0.4953, "step": 32744 }, { "epoch": 0.6944709550168607, "grad_norm": 0.3133741617202759, "learning_rate": 1.4636711384007023e-05, "loss": 0.3859, "step": 32745 }, { "epoch": 0.6944921634747937, "grad_norm": 0.45909276604652405, "learning_rate": 1.463641590211051e-05, "loss": 0.4662, "step": 32746 }, { "epoch": 0.6945133719327268, "grad_norm": 0.39340469241142273, "learning_rate": 1.4636120415057415e-05, "loss": 0.5307, "step": 32747 }, { "epoch": 0.6945345803906599, "grad_norm": 0.353393018245697, "learning_rate": 1.463582492284807e-05, "loss": 0.4566, "step": 32748 }, { "epoch": 0.6945557888485928, "grad_norm": 0.4131436049938202, "learning_rate": 1.4635529425482795e-05, "loss": 0.4667, "step": 32749 }, { "epoch": 0.6945769973065259, "grad_norm": 0.5058417916297913, "learning_rate": 1.463523392296193e-05, "loss": 0.5675, "step": 32750 }, { "epoch": 0.6945982057644589, "grad_norm": 0.37049710750579834, "learning_rate": 1.4634938415285795e-05, "loss": 0.5479, "step": 32751 }, { "epoch": 0.6946194142223919, "grad_norm": 0.7247309684753418, "learning_rate": 1.4634642902454722e-05, "loss": 0.5333, "step": 32752 }, { "epoch": 0.6946406226803249, "grad_norm": 0.36243051290512085, "learning_rate": 1.463434738446904e-05, "loss": 0.5678, "step": 32753 }, { "epoch": 0.694661831138258, "grad_norm": 0.6961904168128967, "learning_rate": 1.4634051861329077e-05, "loss": 0.5025, "step": 32754 }, { "epoch": 0.6946830395961909, "grad_norm": 0.3762875199317932, "learning_rate": 1.463375633303516e-05, "loss": 0.4958, "step": 32755 }, { "epoch": 0.694704248054124, "grad_norm": 0.39456096291542053, "learning_rate": 1.4633460799587622e-05, "loss": 0.5484, "step": 32756 }, { "epoch": 0.694725456512057, "grad_norm": 0.38863736391067505, "learning_rate": 1.4633165260986789e-05, "loss": 0.4639, "step": 32757 }, { "epoch": 0.69474666496999, "grad_norm": 2.0951340198516846, "learning_rate": 1.4632869717232989e-05, "loss": 0.5923, "step": 32758 }, { "epoch": 0.694767873427923, "grad_norm": 0.3911254107952118, "learning_rate": 1.4632574168326551e-05, "loss": 0.4882, "step": 32759 }, { "epoch": 0.6947890818858561, "grad_norm": 0.33689606189727783, "learning_rate": 1.4632278614267806e-05, "loss": 0.5044, "step": 32760 }, { "epoch": 0.694810290343789, "grad_norm": 0.3709070384502411, "learning_rate": 1.4631983055057079e-05, "loss": 0.4609, "step": 32761 }, { "epoch": 0.6948314988017221, "grad_norm": 0.3508577346801758, "learning_rate": 1.4631687490694701e-05, "loss": 0.5006, "step": 32762 }, { "epoch": 0.6948527072596552, "grad_norm": 0.4826800525188446, "learning_rate": 1.4631391921181002e-05, "loss": 0.6017, "step": 32763 }, { "epoch": 0.6948739157175882, "grad_norm": 0.355210542678833, "learning_rate": 1.4631096346516309e-05, "loss": 0.4624, "step": 32764 }, { "epoch": 0.6948951241755212, "grad_norm": 0.3645264506340027, "learning_rate": 1.463080076670095e-05, "loss": 0.5251, "step": 32765 }, { "epoch": 0.6949163326334542, "grad_norm": 0.37648388743400574, "learning_rate": 1.4630505181735254e-05, "loss": 0.4945, "step": 32766 }, { "epoch": 0.6949375410913873, "grad_norm": 0.3725570738315582, "learning_rate": 1.4630209591619553e-05, "loss": 0.5203, "step": 32767 }, { "epoch": 0.6949587495493202, "grad_norm": 0.3382812440395355, "learning_rate": 1.462991399635417e-05, "loss": 0.5545, "step": 32768 }, { "epoch": 0.6949799580072533, "grad_norm": 0.34338822960853577, "learning_rate": 1.4629618395939438e-05, "loss": 0.4428, "step": 32769 }, { "epoch": 0.6950011664651863, "grad_norm": 0.3344983458518982, "learning_rate": 1.4629322790375683e-05, "loss": 0.5009, "step": 32770 }, { "epoch": 0.6950223749231194, "grad_norm": 0.31883612275123596, "learning_rate": 1.4629027179663239e-05, "loss": 0.3941, "step": 32771 }, { "epoch": 0.6950435833810523, "grad_norm": 0.5129255652427673, "learning_rate": 1.4628731563802427e-05, "loss": 0.5352, "step": 32772 }, { "epoch": 0.6950647918389854, "grad_norm": 0.344066858291626, "learning_rate": 1.4628435942793586e-05, "loss": 0.4505, "step": 32773 }, { "epoch": 0.6950860002969184, "grad_norm": 0.3898334205150604, "learning_rate": 1.4628140316637032e-05, "loss": 0.5379, "step": 32774 }, { "epoch": 0.6951072087548514, "grad_norm": 0.3678383231163025, "learning_rate": 1.4627844685333103e-05, "loss": 0.5643, "step": 32775 }, { "epoch": 0.6951284172127845, "grad_norm": 0.3883223235607147, "learning_rate": 1.4627549048882128e-05, "loss": 0.5008, "step": 32776 }, { "epoch": 0.6951496256707175, "grad_norm": 0.33228155970573425, "learning_rate": 1.462725340728443e-05, "loss": 0.4087, "step": 32777 }, { "epoch": 0.6951708341286505, "grad_norm": 0.36026185750961304, "learning_rate": 1.4626957760540341e-05, "loss": 0.5001, "step": 32778 }, { "epoch": 0.6951920425865835, "grad_norm": 0.3682224750518799, "learning_rate": 1.4626662108650193e-05, "loss": 0.4163, "step": 32779 }, { "epoch": 0.6952132510445166, "grad_norm": 0.7674614787101746, "learning_rate": 1.462636645161431e-05, "loss": 0.4885, "step": 32780 }, { "epoch": 0.6952344595024496, "grad_norm": 0.37275969982147217, "learning_rate": 1.4626070789433023e-05, "loss": 0.4894, "step": 32781 }, { "epoch": 0.6952556679603826, "grad_norm": 0.39701569080352783, "learning_rate": 1.4625775122106655e-05, "loss": 0.5487, "step": 32782 }, { "epoch": 0.6952768764183156, "grad_norm": 0.3225656747817993, "learning_rate": 1.4625479449635544e-05, "loss": 0.3724, "step": 32783 }, { "epoch": 0.6952980848762487, "grad_norm": 0.35206228494644165, "learning_rate": 1.4625183772020018e-05, "loss": 0.5221, "step": 32784 }, { "epoch": 0.6953192933341816, "grad_norm": 0.3646147847175598, "learning_rate": 1.46248880892604e-05, "loss": 0.5692, "step": 32785 }, { "epoch": 0.6953405017921147, "grad_norm": 0.3748132586479187, "learning_rate": 1.4624592401357021e-05, "loss": 0.5064, "step": 32786 }, { "epoch": 0.6953617102500477, "grad_norm": 0.374394953250885, "learning_rate": 1.462429670831021e-05, "loss": 0.457, "step": 32787 }, { "epoch": 0.6953829187079807, "grad_norm": 0.33915600180625916, "learning_rate": 1.4624001010120297e-05, "loss": 0.5358, "step": 32788 }, { "epoch": 0.6954041271659138, "grad_norm": 0.3863455355167389, "learning_rate": 1.462370530678761e-05, "loss": 0.5192, "step": 32789 }, { "epoch": 0.6954253356238468, "grad_norm": 0.34637847542762756, "learning_rate": 1.462340959831248e-05, "loss": 0.5463, "step": 32790 }, { "epoch": 0.6954465440817799, "grad_norm": 0.40725287795066833, "learning_rate": 1.4623113884695232e-05, "loss": 0.4643, "step": 32791 }, { "epoch": 0.6954677525397128, "grad_norm": 0.3975953459739685, "learning_rate": 1.4622818165936198e-05, "loss": 0.5222, "step": 32792 }, { "epoch": 0.6954889609976459, "grad_norm": 0.33855515718460083, "learning_rate": 1.4622522442035707e-05, "loss": 0.5017, "step": 32793 }, { "epoch": 0.6955101694555789, "grad_norm": 0.38889279961586, "learning_rate": 1.4622226712994083e-05, "loss": 0.4792, "step": 32794 }, { "epoch": 0.6955313779135119, "grad_norm": 0.42892467975616455, "learning_rate": 1.462193097881166e-05, "loss": 0.6061, "step": 32795 }, { "epoch": 0.6955525863714449, "grad_norm": 0.3604762852191925, "learning_rate": 1.4621635239488767e-05, "loss": 0.4952, "step": 32796 }, { "epoch": 0.695573794829378, "grad_norm": 0.35763290524482727, "learning_rate": 1.4621339495025731e-05, "loss": 0.5323, "step": 32797 }, { "epoch": 0.6955950032873109, "grad_norm": 0.3632667660713196, "learning_rate": 1.4621043745422882e-05, "loss": 0.4668, "step": 32798 }, { "epoch": 0.695616211745244, "grad_norm": 0.36872464418411255, "learning_rate": 1.4620747990680549e-05, "loss": 0.4811, "step": 32799 }, { "epoch": 0.695637420203177, "grad_norm": 0.772298276424408, "learning_rate": 1.4620452230799057e-05, "loss": 0.652, "step": 32800 }, { "epoch": 0.6956586286611101, "grad_norm": 0.3556608259677887, "learning_rate": 1.462015646577874e-05, "loss": 0.4765, "step": 32801 }, { "epoch": 0.695679837119043, "grad_norm": 0.3772953748703003, "learning_rate": 1.4619860695619925e-05, "loss": 0.4771, "step": 32802 }, { "epoch": 0.6957010455769761, "grad_norm": 0.33577653765678406, "learning_rate": 1.461956492032294e-05, "loss": 0.4951, "step": 32803 }, { "epoch": 0.6957222540349092, "grad_norm": 0.36765745282173157, "learning_rate": 1.4619269139888117e-05, "loss": 0.5158, "step": 32804 }, { "epoch": 0.6957434624928421, "grad_norm": 0.3449118733406067, "learning_rate": 1.4618973354315782e-05, "loss": 0.5215, "step": 32805 }, { "epoch": 0.6957646709507752, "grad_norm": 0.3376595973968506, "learning_rate": 1.4618677563606265e-05, "loss": 0.4651, "step": 32806 }, { "epoch": 0.6957858794087082, "grad_norm": 0.41802656650543213, "learning_rate": 1.4618381767759893e-05, "loss": 0.586, "step": 32807 }, { "epoch": 0.6958070878666412, "grad_norm": 0.4617612659931183, "learning_rate": 1.4618085966777001e-05, "loss": 0.492, "step": 32808 }, { "epoch": 0.6958282963245742, "grad_norm": 0.47159358859062195, "learning_rate": 1.461779016065791e-05, "loss": 0.5372, "step": 32809 }, { "epoch": 0.6958495047825073, "grad_norm": 0.49185511469841003, "learning_rate": 1.4617494349402956e-05, "loss": 0.5573, "step": 32810 }, { "epoch": 0.6958707132404403, "grad_norm": 0.4332420229911804, "learning_rate": 1.4617198533012462e-05, "loss": 0.5468, "step": 32811 }, { "epoch": 0.6958919216983733, "grad_norm": 0.6717357635498047, "learning_rate": 1.4616902711486762e-05, "loss": 0.5034, "step": 32812 }, { "epoch": 0.6959131301563063, "grad_norm": 0.3711574971675873, "learning_rate": 1.4616606884826183e-05, "loss": 0.4673, "step": 32813 }, { "epoch": 0.6959343386142394, "grad_norm": 0.3628586232662201, "learning_rate": 1.4616311053031052e-05, "loss": 0.4685, "step": 32814 }, { "epoch": 0.6959555470721723, "grad_norm": 0.3760802745819092, "learning_rate": 1.4616015216101703e-05, "loss": 0.4789, "step": 32815 }, { "epoch": 0.6959767555301054, "grad_norm": 0.3594292998313904, "learning_rate": 1.461571937403846e-05, "loss": 0.5356, "step": 32816 }, { "epoch": 0.6959979639880385, "grad_norm": 0.3596811890602112, "learning_rate": 1.4615423526841652e-05, "loss": 0.5384, "step": 32817 }, { "epoch": 0.6960191724459714, "grad_norm": 0.40103691816329956, "learning_rate": 1.4615127674511613e-05, "loss": 0.5309, "step": 32818 }, { "epoch": 0.6960403809039045, "grad_norm": 0.3775281012058258, "learning_rate": 1.4614831817048666e-05, "loss": 0.5218, "step": 32819 }, { "epoch": 0.6960615893618375, "grad_norm": 0.37228474020957947, "learning_rate": 1.4614535954453147e-05, "loss": 0.4979, "step": 32820 }, { "epoch": 0.6960827978197706, "grad_norm": 0.3524494767189026, "learning_rate": 1.461424008672538e-05, "loss": 0.4158, "step": 32821 }, { "epoch": 0.6961040062777035, "grad_norm": 0.37501177191734314, "learning_rate": 1.4613944213865697e-05, "loss": 0.5575, "step": 32822 }, { "epoch": 0.6961252147356366, "grad_norm": 0.3339046239852905, "learning_rate": 1.461364833587442e-05, "loss": 0.5947, "step": 32823 }, { "epoch": 0.6961464231935696, "grad_norm": 0.34821921586990356, "learning_rate": 1.4613352452751887e-05, "loss": 0.5953, "step": 32824 }, { "epoch": 0.6961676316515026, "grad_norm": 0.3848564624786377, "learning_rate": 1.4613056564498425e-05, "loss": 0.4659, "step": 32825 }, { "epoch": 0.6961888401094356, "grad_norm": 0.36076685786247253, "learning_rate": 1.4612760671114359e-05, "loss": 0.561, "step": 32826 }, { "epoch": 0.6962100485673687, "grad_norm": 0.43253329396247864, "learning_rate": 1.4612464772600021e-05, "loss": 0.4835, "step": 32827 }, { "epoch": 0.6962312570253016, "grad_norm": 0.425265371799469, "learning_rate": 1.4612168868955738e-05, "loss": 0.4445, "step": 32828 }, { "epoch": 0.6962524654832347, "grad_norm": 0.386593759059906, "learning_rate": 1.4611872960181846e-05, "loss": 0.5245, "step": 32829 }, { "epoch": 0.6962736739411678, "grad_norm": 0.365311861038208, "learning_rate": 1.4611577046278664e-05, "loss": 0.468, "step": 32830 }, { "epoch": 0.6962948823991008, "grad_norm": 0.3909741938114166, "learning_rate": 1.4611281127246529e-05, "loss": 0.5632, "step": 32831 }, { "epoch": 0.6963160908570338, "grad_norm": 0.32711026072502136, "learning_rate": 1.4610985203085766e-05, "loss": 0.5475, "step": 32832 }, { "epoch": 0.6963372993149668, "grad_norm": 0.3610561192035675, "learning_rate": 1.4610689273796706e-05, "loss": 0.5153, "step": 32833 }, { "epoch": 0.6963585077728999, "grad_norm": 0.381891667842865, "learning_rate": 1.4610393339379675e-05, "loss": 0.4836, "step": 32834 }, { "epoch": 0.6963797162308328, "grad_norm": 0.3391386568546295, "learning_rate": 1.4610097399835009e-05, "loss": 0.3951, "step": 32835 }, { "epoch": 0.6964009246887659, "grad_norm": 0.48306429386138916, "learning_rate": 1.4609801455163029e-05, "loss": 0.4882, "step": 32836 }, { "epoch": 0.6964221331466989, "grad_norm": 0.4549141526222229, "learning_rate": 1.4609505505364068e-05, "loss": 0.5804, "step": 32837 }, { "epoch": 0.696443341604632, "grad_norm": 0.37109142541885376, "learning_rate": 1.4609209550438457e-05, "loss": 0.4592, "step": 32838 }, { "epoch": 0.6964645500625649, "grad_norm": 0.34702569246292114, "learning_rate": 1.4608913590386522e-05, "loss": 0.4979, "step": 32839 }, { "epoch": 0.696485758520498, "grad_norm": 0.4367233216762543, "learning_rate": 1.4608617625208592e-05, "loss": 0.5511, "step": 32840 }, { "epoch": 0.696506966978431, "grad_norm": 0.3642498254776001, "learning_rate": 1.4608321654905e-05, "loss": 0.5109, "step": 32841 }, { "epoch": 0.696528175436364, "grad_norm": 0.4443925619125366, "learning_rate": 1.4608025679476073e-05, "loss": 0.4908, "step": 32842 }, { "epoch": 0.696549383894297, "grad_norm": 0.354558527469635, "learning_rate": 1.4607729698922137e-05, "loss": 0.5357, "step": 32843 }, { "epoch": 0.6965705923522301, "grad_norm": 0.3386373817920685, "learning_rate": 1.4607433713243528e-05, "loss": 0.399, "step": 32844 }, { "epoch": 0.6965918008101631, "grad_norm": 0.3929193913936615, "learning_rate": 1.4607137722440568e-05, "loss": 0.4682, "step": 32845 }, { "epoch": 0.6966130092680961, "grad_norm": 0.3420632779598236, "learning_rate": 1.460684172651359e-05, "loss": 0.5129, "step": 32846 }, { "epoch": 0.6966342177260292, "grad_norm": 0.41548165678977966, "learning_rate": 1.4606545725462924e-05, "loss": 0.4511, "step": 32847 }, { "epoch": 0.6966554261839621, "grad_norm": 0.4121907353401184, "learning_rate": 1.4606249719288896e-05, "loss": 0.489, "step": 32848 }, { "epoch": 0.6966766346418952, "grad_norm": 0.3231969177722931, "learning_rate": 1.4605953707991839e-05, "loss": 0.4561, "step": 32849 }, { "epoch": 0.6966978430998282, "grad_norm": 0.3622879683971405, "learning_rate": 1.460565769157208e-05, "loss": 0.5213, "step": 32850 }, { "epoch": 0.6967190515577613, "grad_norm": 0.3456246852874756, "learning_rate": 1.4605361670029947e-05, "loss": 0.4365, "step": 32851 }, { "epoch": 0.6967402600156942, "grad_norm": 0.3557426333427429, "learning_rate": 1.460506564336577e-05, "loss": 0.4601, "step": 32852 }, { "epoch": 0.6967614684736273, "grad_norm": 0.3942054808139801, "learning_rate": 1.4604769611579882e-05, "loss": 0.5252, "step": 32853 }, { "epoch": 0.6967826769315603, "grad_norm": 0.38521239161491394, "learning_rate": 1.4604473574672607e-05, "loss": 0.4709, "step": 32854 }, { "epoch": 0.6968038853894933, "grad_norm": 0.3415036201477051, "learning_rate": 1.4604177532644279e-05, "loss": 0.4195, "step": 32855 }, { "epoch": 0.6968250938474263, "grad_norm": 0.3701038062572479, "learning_rate": 1.4603881485495222e-05, "loss": 0.4396, "step": 32856 }, { "epoch": 0.6968463023053594, "grad_norm": 0.40746188163757324, "learning_rate": 1.460358543322577e-05, "loss": 0.5304, "step": 32857 }, { "epoch": 0.6968675107632925, "grad_norm": 0.42546114325523376, "learning_rate": 1.460328937583625e-05, "loss": 0.5086, "step": 32858 }, { "epoch": 0.6968887192212254, "grad_norm": 0.34515848755836487, "learning_rate": 1.4602993313326987e-05, "loss": 0.5136, "step": 32859 }, { "epoch": 0.6969099276791585, "grad_norm": 0.354923278093338, "learning_rate": 1.460269724569832e-05, "loss": 0.4767, "step": 32860 }, { "epoch": 0.6969311361370915, "grad_norm": 0.3674331307411194, "learning_rate": 1.4602401172950574e-05, "loss": 0.4976, "step": 32861 }, { "epoch": 0.6969523445950245, "grad_norm": 0.3336859941482544, "learning_rate": 1.4602105095084074e-05, "loss": 0.422, "step": 32862 }, { "epoch": 0.6969735530529575, "grad_norm": 0.393648624420166, "learning_rate": 1.4601809012099155e-05, "loss": 0.628, "step": 32863 }, { "epoch": 0.6969947615108906, "grad_norm": 0.4049884080886841, "learning_rate": 1.4601512923996142e-05, "loss": 0.5191, "step": 32864 }, { "epoch": 0.6970159699688235, "grad_norm": 0.41909700632095337, "learning_rate": 1.4601216830775365e-05, "loss": 0.4361, "step": 32865 }, { "epoch": 0.6970371784267566, "grad_norm": 0.35478073358535767, "learning_rate": 1.4600920732437158e-05, "loss": 0.5137, "step": 32866 }, { "epoch": 0.6970583868846896, "grad_norm": 0.33003783226013184, "learning_rate": 1.4600624628981845e-05, "loss": 0.5085, "step": 32867 }, { "epoch": 0.6970795953426226, "grad_norm": 0.3295170068740845, "learning_rate": 1.460032852040976e-05, "loss": 0.5091, "step": 32868 }, { "epoch": 0.6971008038005556, "grad_norm": 0.3923591077327728, "learning_rate": 1.4600032406721227e-05, "loss": 0.4685, "step": 32869 }, { "epoch": 0.6971220122584887, "grad_norm": 0.323670357465744, "learning_rate": 1.4599736287916577e-05, "loss": 0.5087, "step": 32870 }, { "epoch": 0.6971432207164218, "grad_norm": 0.4069947898387909, "learning_rate": 1.4599440163996142e-05, "loss": 0.5315, "step": 32871 }, { "epoch": 0.6971644291743547, "grad_norm": 0.39027151465415955, "learning_rate": 1.459914403496025e-05, "loss": 0.4617, "step": 32872 }, { "epoch": 0.6971856376322878, "grad_norm": 0.4146556854248047, "learning_rate": 1.4598847900809228e-05, "loss": 0.5164, "step": 32873 }, { "epoch": 0.6972068460902208, "grad_norm": 0.3264399468898773, "learning_rate": 1.459855176154341e-05, "loss": 0.475, "step": 32874 }, { "epoch": 0.6972280545481538, "grad_norm": 0.3762967586517334, "learning_rate": 1.4598255617163122e-05, "loss": 0.5314, "step": 32875 }, { "epoch": 0.6972492630060868, "grad_norm": 0.6079289317131042, "learning_rate": 1.4597959467668692e-05, "loss": 0.6004, "step": 32876 }, { "epoch": 0.6972704714640199, "grad_norm": 0.33598634600639343, "learning_rate": 1.4597663313060455e-05, "loss": 0.4599, "step": 32877 }, { "epoch": 0.6972916799219528, "grad_norm": 0.3452511131763458, "learning_rate": 1.4597367153338734e-05, "loss": 0.5518, "step": 32878 }, { "epoch": 0.6973128883798859, "grad_norm": 0.3439781665802002, "learning_rate": 1.459707098850386e-05, "loss": 0.3716, "step": 32879 }, { "epoch": 0.6973340968378189, "grad_norm": 0.3627296984195709, "learning_rate": 1.4596774818556165e-05, "loss": 0.5171, "step": 32880 }, { "epoch": 0.697355305295752, "grad_norm": 0.4241958260536194, "learning_rate": 1.459647864349598e-05, "loss": 0.4306, "step": 32881 }, { "epoch": 0.6973765137536849, "grad_norm": 0.38143685460090637, "learning_rate": 1.4596182463323627e-05, "loss": 0.4879, "step": 32882 }, { "epoch": 0.697397722211618, "grad_norm": 0.3297632336616516, "learning_rate": 1.4595886278039441e-05, "loss": 0.4177, "step": 32883 }, { "epoch": 0.697418930669551, "grad_norm": 0.3271179795265198, "learning_rate": 1.4595590087643752e-05, "loss": 0.4994, "step": 32884 }, { "epoch": 0.697440139127484, "grad_norm": 0.39684298634529114, "learning_rate": 1.4595293892136884e-05, "loss": 0.4783, "step": 32885 }, { "epoch": 0.6974613475854171, "grad_norm": 0.32652655243873596, "learning_rate": 1.4594997691519176e-05, "loss": 0.4772, "step": 32886 }, { "epoch": 0.6974825560433501, "grad_norm": 0.43021950125694275, "learning_rate": 1.4594701485790945e-05, "loss": 0.551, "step": 32887 }, { "epoch": 0.6975037645012832, "grad_norm": 0.375279039144516, "learning_rate": 1.459440527495253e-05, "loss": 0.4787, "step": 32888 }, { "epoch": 0.6975249729592161, "grad_norm": 0.4168890416622162, "learning_rate": 1.459410905900426e-05, "loss": 0.5214, "step": 32889 }, { "epoch": 0.6975461814171492, "grad_norm": 0.35642826557159424, "learning_rate": 1.4593812837946457e-05, "loss": 0.4764, "step": 32890 }, { "epoch": 0.6975673898750822, "grad_norm": 0.4680519700050354, "learning_rate": 1.4593516611779457e-05, "loss": 0.5149, "step": 32891 }, { "epoch": 0.6975885983330152, "grad_norm": 0.3151416480541229, "learning_rate": 1.4593220380503587e-05, "loss": 0.5338, "step": 32892 }, { "epoch": 0.6976098067909482, "grad_norm": 0.33276790380477905, "learning_rate": 1.4592924144119178e-05, "loss": 0.4831, "step": 32893 }, { "epoch": 0.6976310152488813, "grad_norm": 0.3563171625137329, "learning_rate": 1.459262790262656e-05, "loss": 0.4815, "step": 32894 }, { "epoch": 0.6976522237068142, "grad_norm": 0.3715449869632721, "learning_rate": 1.4592331656026061e-05, "loss": 0.5218, "step": 32895 }, { "epoch": 0.6976734321647473, "grad_norm": 0.3610720932483673, "learning_rate": 1.4592035404318009e-05, "loss": 0.5485, "step": 32896 }, { "epoch": 0.6976946406226803, "grad_norm": 0.3651430010795593, "learning_rate": 1.4591739147502735e-05, "loss": 0.5023, "step": 32897 }, { "epoch": 0.6977158490806133, "grad_norm": 0.5766141414642334, "learning_rate": 1.4591442885580568e-05, "loss": 0.6182, "step": 32898 }, { "epoch": 0.6977370575385464, "grad_norm": 0.3785880208015442, "learning_rate": 1.4591146618551841e-05, "loss": 0.4323, "step": 32899 }, { "epoch": 0.6977582659964794, "grad_norm": 0.809123158454895, "learning_rate": 1.4590850346416878e-05, "loss": 0.4573, "step": 32900 }, { "epoch": 0.6977794744544125, "grad_norm": 0.3440638482570648, "learning_rate": 1.4590554069176013e-05, "loss": 0.515, "step": 32901 }, { "epoch": 0.6978006829123454, "grad_norm": 0.3826197385787964, "learning_rate": 1.4590257786829573e-05, "loss": 0.5754, "step": 32902 }, { "epoch": 0.6978218913702785, "grad_norm": 0.3724534809589386, "learning_rate": 1.4589961499377889e-05, "loss": 0.5126, "step": 32903 }, { "epoch": 0.6978430998282115, "grad_norm": 0.322174996137619, "learning_rate": 1.4589665206821286e-05, "loss": 0.4876, "step": 32904 }, { "epoch": 0.6978643082861445, "grad_norm": 0.43068769574165344, "learning_rate": 1.45893689091601e-05, "loss": 0.4959, "step": 32905 }, { "epoch": 0.6978855167440775, "grad_norm": 0.34365034103393555, "learning_rate": 1.4589072606394659e-05, "loss": 0.5036, "step": 32906 }, { "epoch": 0.6979067252020106, "grad_norm": 0.3416350483894348, "learning_rate": 1.458877629852529e-05, "loss": 0.4925, "step": 32907 }, { "epoch": 0.6979279336599435, "grad_norm": 0.43181952834129333, "learning_rate": 1.4588479985552325e-05, "loss": 0.5715, "step": 32908 }, { "epoch": 0.6979491421178766, "grad_norm": 0.34810417890548706, "learning_rate": 1.4588183667476093e-05, "loss": 0.5832, "step": 32909 }, { "epoch": 0.6979703505758096, "grad_norm": 0.34886807203292847, "learning_rate": 1.4587887344296917e-05, "loss": 0.461, "step": 32910 }, { "epoch": 0.6979915590337427, "grad_norm": 0.3840618431568146, "learning_rate": 1.4587591016015138e-05, "loss": 0.5127, "step": 32911 }, { "epoch": 0.6980127674916757, "grad_norm": 0.6270167231559753, "learning_rate": 1.458729468263108e-05, "loss": 0.5092, "step": 32912 }, { "epoch": 0.6980339759496087, "grad_norm": 0.3615965247154236, "learning_rate": 1.458699834414507e-05, "loss": 0.5321, "step": 32913 }, { "epoch": 0.6980551844075418, "grad_norm": 0.3607727587223053, "learning_rate": 1.4586702000557442e-05, "loss": 0.4962, "step": 32914 }, { "epoch": 0.6980763928654747, "grad_norm": 0.3393050730228424, "learning_rate": 1.4586405651868525e-05, "loss": 0.4807, "step": 32915 }, { "epoch": 0.6980976013234078, "grad_norm": 0.3681114912033081, "learning_rate": 1.4586109298078643e-05, "loss": 0.5086, "step": 32916 }, { "epoch": 0.6981188097813408, "grad_norm": 0.3370762765407562, "learning_rate": 1.4585812939188136e-05, "loss": 0.5543, "step": 32917 }, { "epoch": 0.6981400182392739, "grad_norm": 0.35052263736724854, "learning_rate": 1.4585516575197325e-05, "loss": 0.4724, "step": 32918 }, { "epoch": 0.6981612266972068, "grad_norm": 0.33937907218933105, "learning_rate": 1.458522020610654e-05, "loss": 0.4977, "step": 32919 }, { "epoch": 0.6981824351551399, "grad_norm": 0.3937776982784271, "learning_rate": 1.4584923831916117e-05, "loss": 0.5416, "step": 32920 }, { "epoch": 0.6982036436130729, "grad_norm": 0.7389968037605286, "learning_rate": 1.4584627452626378e-05, "loss": 0.4919, "step": 32921 }, { "epoch": 0.6982248520710059, "grad_norm": 0.3685263395309448, "learning_rate": 1.458433106823766e-05, "loss": 0.4764, "step": 32922 }, { "epoch": 0.6982460605289389, "grad_norm": 0.3616262376308441, "learning_rate": 1.4584034678750287e-05, "loss": 0.5069, "step": 32923 }, { "epoch": 0.698267268986872, "grad_norm": 0.33484598994255066, "learning_rate": 1.458373828416459e-05, "loss": 0.4845, "step": 32924 }, { "epoch": 0.6982884774448049, "grad_norm": 0.3692843019962311, "learning_rate": 1.45834418844809e-05, "loss": 0.5732, "step": 32925 }, { "epoch": 0.698309685902738, "grad_norm": 0.3508831560611725, "learning_rate": 1.4583145479699545e-05, "loss": 0.4241, "step": 32926 }, { "epoch": 0.6983308943606711, "grad_norm": 0.4582703709602356, "learning_rate": 1.4582849069820854e-05, "loss": 0.5335, "step": 32927 }, { "epoch": 0.698352102818604, "grad_norm": 0.37689945101737976, "learning_rate": 1.4582552654845161e-05, "loss": 0.5862, "step": 32928 }, { "epoch": 0.6983733112765371, "grad_norm": 0.343375563621521, "learning_rate": 1.4582256234772792e-05, "loss": 0.5096, "step": 32929 }, { "epoch": 0.6983945197344701, "grad_norm": 0.34558653831481934, "learning_rate": 1.4581959809604076e-05, "loss": 0.4941, "step": 32930 }, { "epoch": 0.6984157281924032, "grad_norm": 0.36492687463760376, "learning_rate": 1.4581663379339345e-05, "loss": 0.4943, "step": 32931 }, { "epoch": 0.6984369366503361, "grad_norm": 0.3418417274951935, "learning_rate": 1.4581366943978927e-05, "loss": 0.4753, "step": 32932 }, { "epoch": 0.6984581451082692, "grad_norm": 0.34643879532814026, "learning_rate": 1.4581070503523153e-05, "loss": 0.5557, "step": 32933 }, { "epoch": 0.6984793535662022, "grad_norm": 0.3731767237186432, "learning_rate": 1.4580774057972353e-05, "loss": 0.4939, "step": 32934 }, { "epoch": 0.6985005620241352, "grad_norm": 0.4010646343231201, "learning_rate": 1.4580477607326856e-05, "loss": 0.5472, "step": 32935 }, { "epoch": 0.6985217704820682, "grad_norm": 0.35465189814567566, "learning_rate": 1.4580181151586988e-05, "loss": 0.4255, "step": 32936 }, { "epoch": 0.6985429789400013, "grad_norm": 0.3959338366985321, "learning_rate": 1.4579884690753087e-05, "loss": 0.4859, "step": 32937 }, { "epoch": 0.6985641873979342, "grad_norm": 0.3372465670108795, "learning_rate": 1.4579588224825476e-05, "loss": 0.5291, "step": 32938 }, { "epoch": 0.6985853958558673, "grad_norm": 0.35712528228759766, "learning_rate": 1.4579291753804488e-05, "loss": 0.4995, "step": 32939 }, { "epoch": 0.6986066043138004, "grad_norm": 0.3696536123752594, "learning_rate": 1.4578995277690452e-05, "loss": 0.5304, "step": 32940 }, { "epoch": 0.6986278127717334, "grad_norm": 0.3686286509037018, "learning_rate": 1.4578698796483692e-05, "loss": 0.5283, "step": 32941 }, { "epoch": 0.6986490212296664, "grad_norm": 0.3393629789352417, "learning_rate": 1.4578402310184547e-05, "loss": 0.5129, "step": 32942 }, { "epoch": 0.6986702296875994, "grad_norm": 0.32435697317123413, "learning_rate": 1.4578105818793342e-05, "loss": 0.4694, "step": 32943 }, { "epoch": 0.6986914381455325, "grad_norm": 0.32642892003059387, "learning_rate": 1.4577809322310408e-05, "loss": 0.5268, "step": 32944 }, { "epoch": 0.6987126466034654, "grad_norm": 0.3486250340938568, "learning_rate": 1.4577512820736074e-05, "loss": 0.5153, "step": 32945 }, { "epoch": 0.6987338550613985, "grad_norm": 0.36600610613822937, "learning_rate": 1.4577216314070671e-05, "loss": 0.4879, "step": 32946 }, { "epoch": 0.6987550635193315, "grad_norm": 0.4479331076145172, "learning_rate": 1.4576919802314527e-05, "loss": 0.5149, "step": 32947 }, { "epoch": 0.6987762719772646, "grad_norm": 0.46626120805740356, "learning_rate": 1.457662328546797e-05, "loss": 0.5072, "step": 32948 }, { "epoch": 0.6987974804351975, "grad_norm": 0.32599925994873047, "learning_rate": 1.4576326763531337e-05, "loss": 0.5033, "step": 32949 }, { "epoch": 0.6988186888931306, "grad_norm": 0.3696301579475403, "learning_rate": 1.457603023650495e-05, "loss": 0.5671, "step": 32950 }, { "epoch": 0.6988398973510636, "grad_norm": 0.39438340067863464, "learning_rate": 1.4575733704389143e-05, "loss": 0.4718, "step": 32951 }, { "epoch": 0.6988611058089966, "grad_norm": 0.3681281507015228, "learning_rate": 1.4575437167184243e-05, "loss": 0.4359, "step": 32952 }, { "epoch": 0.6988823142669297, "grad_norm": 0.3583178222179413, "learning_rate": 1.4575140624890584e-05, "loss": 0.4998, "step": 32953 }, { "epoch": 0.6989035227248627, "grad_norm": 0.3912630081176758, "learning_rate": 1.4574844077508494e-05, "loss": 0.5347, "step": 32954 }, { "epoch": 0.6989247311827957, "grad_norm": 0.3091774582862854, "learning_rate": 1.4574547525038298e-05, "loss": 0.4901, "step": 32955 }, { "epoch": 0.6989459396407287, "grad_norm": 0.3939416706562042, "learning_rate": 1.4574250967480335e-05, "loss": 0.5064, "step": 32956 }, { "epoch": 0.6989671480986618, "grad_norm": 0.35763058066368103, "learning_rate": 1.4573954404834927e-05, "loss": 0.4725, "step": 32957 }, { "epoch": 0.6989883565565947, "grad_norm": 0.5040351152420044, "learning_rate": 1.4573657837102406e-05, "loss": 0.5671, "step": 32958 }, { "epoch": 0.6990095650145278, "grad_norm": 0.3327198624610901, "learning_rate": 1.4573361264283104e-05, "loss": 0.4656, "step": 32959 }, { "epoch": 0.6990307734724608, "grad_norm": 0.39379963278770447, "learning_rate": 1.4573064686377348e-05, "loss": 0.4988, "step": 32960 }, { "epoch": 0.6990519819303939, "grad_norm": 0.31953880190849304, "learning_rate": 1.4572768103385468e-05, "loss": 0.4295, "step": 32961 }, { "epoch": 0.6990731903883268, "grad_norm": 0.39649152755737305, "learning_rate": 1.4572471515307798e-05, "loss": 0.4991, "step": 32962 }, { "epoch": 0.6990943988462599, "grad_norm": 0.34508517384529114, "learning_rate": 1.4572174922144666e-05, "loss": 0.4752, "step": 32963 }, { "epoch": 0.6991156073041929, "grad_norm": 0.3405628800392151, "learning_rate": 1.4571878323896398e-05, "loss": 0.5255, "step": 32964 }, { "epoch": 0.6991368157621259, "grad_norm": 0.3491063714027405, "learning_rate": 1.4571581720563328e-05, "loss": 0.477, "step": 32965 }, { "epoch": 0.6991580242200589, "grad_norm": 0.354754775762558, "learning_rate": 1.4571285112145783e-05, "loss": 0.4761, "step": 32966 }, { "epoch": 0.699179232677992, "grad_norm": 0.3633783757686615, "learning_rate": 1.4570988498644097e-05, "loss": 0.4922, "step": 32967 }, { "epoch": 0.699200441135925, "grad_norm": 0.3363036513328552, "learning_rate": 1.4570691880058594e-05, "loss": 0.4594, "step": 32968 }, { "epoch": 0.699221649593858, "grad_norm": 0.41473525762557983, "learning_rate": 1.4570395256389607e-05, "loss": 0.5536, "step": 32969 }, { "epoch": 0.6992428580517911, "grad_norm": 0.3565893769264221, "learning_rate": 1.4570098627637471e-05, "loss": 0.5327, "step": 32970 }, { "epoch": 0.6992640665097241, "grad_norm": 0.3439490497112274, "learning_rate": 1.4569801993802507e-05, "loss": 0.4233, "step": 32971 }, { "epoch": 0.6992852749676571, "grad_norm": 0.32768014073371887, "learning_rate": 1.4569505354885047e-05, "loss": 0.4979, "step": 32972 }, { "epoch": 0.6993064834255901, "grad_norm": 0.4097657799720764, "learning_rate": 1.4569208710885428e-05, "loss": 0.5069, "step": 32973 }, { "epoch": 0.6993276918835232, "grad_norm": 0.38630545139312744, "learning_rate": 1.456891206180397e-05, "loss": 0.4931, "step": 32974 }, { "epoch": 0.6993489003414561, "grad_norm": 0.3629406690597534, "learning_rate": 1.4568615407641012e-05, "loss": 0.4607, "step": 32975 }, { "epoch": 0.6993701087993892, "grad_norm": 0.38992899656295776, "learning_rate": 1.4568318748396877e-05, "loss": 0.5211, "step": 32976 }, { "epoch": 0.6993913172573222, "grad_norm": 0.33297279477119446, "learning_rate": 1.45680220840719e-05, "loss": 0.5475, "step": 32977 }, { "epoch": 0.6994125257152553, "grad_norm": 0.35570117831230164, "learning_rate": 1.4567725414666405e-05, "loss": 0.5309, "step": 32978 }, { "epoch": 0.6994337341731882, "grad_norm": 0.3134852647781372, "learning_rate": 1.4567428740180729e-05, "loss": 0.4796, "step": 32979 }, { "epoch": 0.6994549426311213, "grad_norm": 0.361395001411438, "learning_rate": 1.4567132060615195e-05, "loss": 0.5756, "step": 32980 }, { "epoch": 0.6994761510890544, "grad_norm": 0.3390013575553894, "learning_rate": 1.4566835375970136e-05, "loss": 0.5039, "step": 32981 }, { "epoch": 0.6994973595469873, "grad_norm": 0.36009323596954346, "learning_rate": 1.4566538686245886e-05, "loss": 0.5459, "step": 32982 }, { "epoch": 0.6995185680049204, "grad_norm": 0.3292183578014374, "learning_rate": 1.456624199144277e-05, "loss": 0.4694, "step": 32983 }, { "epoch": 0.6995397764628534, "grad_norm": 0.3697098195552826, "learning_rate": 1.456594529156112e-05, "loss": 0.4802, "step": 32984 }, { "epoch": 0.6995609849207864, "grad_norm": 0.351681649684906, "learning_rate": 1.4565648586601264e-05, "loss": 0.5934, "step": 32985 }, { "epoch": 0.6995821933787194, "grad_norm": 0.37839213013648987, "learning_rate": 1.4565351876563533e-05, "loss": 0.5183, "step": 32986 }, { "epoch": 0.6996034018366525, "grad_norm": 0.3464716374874115, "learning_rate": 1.4565055161448258e-05, "loss": 0.5007, "step": 32987 }, { "epoch": 0.6996246102945854, "grad_norm": 0.3292224109172821, "learning_rate": 1.4564758441255768e-05, "loss": 0.4359, "step": 32988 }, { "epoch": 0.6996458187525185, "grad_norm": 0.3530173897743225, "learning_rate": 1.4564461715986395e-05, "loss": 0.5308, "step": 32989 }, { "epoch": 0.6996670272104515, "grad_norm": 0.3860490918159485, "learning_rate": 1.4564164985640465e-05, "loss": 0.5435, "step": 32990 }, { "epoch": 0.6996882356683846, "grad_norm": 0.35402974486351013, "learning_rate": 1.4563868250218314e-05, "loss": 0.5026, "step": 32991 }, { "epoch": 0.6997094441263175, "grad_norm": 0.3381396234035492, "learning_rate": 1.4563571509720265e-05, "loss": 0.4457, "step": 32992 }, { "epoch": 0.6997306525842506, "grad_norm": 0.4453727900981903, "learning_rate": 1.456327476414665e-05, "loss": 0.4569, "step": 32993 }, { "epoch": 0.6997518610421837, "grad_norm": 0.4463789463043213, "learning_rate": 1.4562978013497805e-05, "loss": 0.5146, "step": 32994 }, { "epoch": 0.6997730695001166, "grad_norm": 0.4296920895576477, "learning_rate": 1.4562681257774052e-05, "loss": 0.5398, "step": 32995 }, { "epoch": 0.6997942779580497, "grad_norm": 0.40621718764305115, "learning_rate": 1.4562384496975725e-05, "loss": 0.4252, "step": 32996 }, { "epoch": 0.6998154864159827, "grad_norm": 0.32448941469192505, "learning_rate": 1.4562087731103155e-05, "loss": 0.4852, "step": 32997 }, { "epoch": 0.6998366948739158, "grad_norm": 0.35493454337120056, "learning_rate": 1.456179096015667e-05, "loss": 0.4821, "step": 32998 }, { "epoch": 0.6998579033318487, "grad_norm": 0.3571750819683075, "learning_rate": 1.4561494184136602e-05, "loss": 0.4765, "step": 32999 }, { "epoch": 0.6998791117897818, "grad_norm": 0.39966052770614624, "learning_rate": 1.4561197403043275e-05, "loss": 0.4904, "step": 33000 }, { "epoch": 0.6999003202477148, "grad_norm": 0.33649274706840515, "learning_rate": 1.4560900616877028e-05, "loss": 0.4832, "step": 33001 }, { "epoch": 0.6999215287056478, "grad_norm": 0.3868507444858551, "learning_rate": 1.4560603825638188e-05, "loss": 0.5207, "step": 33002 }, { "epoch": 0.6999427371635808, "grad_norm": 0.3712369203567505, "learning_rate": 1.456030702932708e-05, "loss": 0.5271, "step": 33003 }, { "epoch": 0.6999639456215139, "grad_norm": 0.4110107719898224, "learning_rate": 1.4560010227944042e-05, "loss": 0.5023, "step": 33004 }, { "epoch": 0.6999851540794468, "grad_norm": 0.37839651107788086, "learning_rate": 1.45597134214894e-05, "loss": 0.4426, "step": 33005 }, { "epoch": 0.7000063625373799, "grad_norm": 0.35230743885040283, "learning_rate": 1.4559416609963479e-05, "loss": 0.4907, "step": 33006 }, { "epoch": 0.7000275709953129, "grad_norm": 0.3464234471321106, "learning_rate": 1.4559119793366621e-05, "loss": 0.4946, "step": 33007 }, { "epoch": 0.700048779453246, "grad_norm": 0.34994226694107056, "learning_rate": 1.4558822971699147e-05, "loss": 0.5293, "step": 33008 }, { "epoch": 0.700069987911179, "grad_norm": 0.35413625836372375, "learning_rate": 1.4558526144961389e-05, "loss": 0.4861, "step": 33009 }, { "epoch": 0.700091196369112, "grad_norm": 0.3521040380001068, "learning_rate": 1.455822931315368e-05, "loss": 0.4603, "step": 33010 }, { "epoch": 0.7001124048270451, "grad_norm": 0.4187015891075134, "learning_rate": 1.4557932476276344e-05, "loss": 0.5114, "step": 33011 }, { "epoch": 0.700133613284978, "grad_norm": 0.32720115780830383, "learning_rate": 1.4557635634329722e-05, "loss": 0.3931, "step": 33012 }, { "epoch": 0.7001548217429111, "grad_norm": 0.35002782940864563, "learning_rate": 1.455733878731413e-05, "loss": 0.4531, "step": 33013 }, { "epoch": 0.7001760302008441, "grad_norm": 0.31603243947029114, "learning_rate": 1.4557041935229906e-05, "loss": 0.4243, "step": 33014 }, { "epoch": 0.7001972386587771, "grad_norm": 0.3805725872516632, "learning_rate": 1.4556745078077383e-05, "loss": 0.4991, "step": 33015 }, { "epoch": 0.7002184471167101, "grad_norm": 0.3165493607521057, "learning_rate": 1.4556448215856888e-05, "loss": 0.4324, "step": 33016 }, { "epoch": 0.7002396555746432, "grad_norm": 0.4102911353111267, "learning_rate": 1.4556151348568748e-05, "loss": 0.4756, "step": 33017 }, { "epoch": 0.7002608640325761, "grad_norm": 0.3606081008911133, "learning_rate": 1.4555854476213298e-05, "loss": 0.4626, "step": 33018 }, { "epoch": 0.7002820724905092, "grad_norm": 0.3715139627456665, "learning_rate": 1.4555557598790866e-05, "loss": 0.5125, "step": 33019 }, { "epoch": 0.7003032809484422, "grad_norm": 0.3473382592201233, "learning_rate": 1.455526071630178e-05, "loss": 0.374, "step": 33020 }, { "epoch": 0.7003244894063753, "grad_norm": 0.36719629168510437, "learning_rate": 1.4554963828746376e-05, "loss": 0.508, "step": 33021 }, { "epoch": 0.7003456978643083, "grad_norm": 0.3645470440387726, "learning_rate": 1.4554666936124982e-05, "loss": 0.5202, "step": 33022 }, { "epoch": 0.7003669063222413, "grad_norm": 0.3151441216468811, "learning_rate": 1.4554370038437922e-05, "loss": 0.4354, "step": 33023 }, { "epoch": 0.7003881147801744, "grad_norm": 0.34090209007263184, "learning_rate": 1.4554073135685537e-05, "loss": 0.4891, "step": 33024 }, { "epoch": 0.7004093232381073, "grad_norm": 0.33184754848480225, "learning_rate": 1.4553776227868143e-05, "loss": 0.454, "step": 33025 }, { "epoch": 0.7004305316960404, "grad_norm": 0.3554907441139221, "learning_rate": 1.4553479314986083e-05, "loss": 0.4864, "step": 33026 }, { "epoch": 0.7004517401539734, "grad_norm": 0.3525618612766266, "learning_rate": 1.4553182397039685e-05, "loss": 0.4673, "step": 33027 }, { "epoch": 0.7004729486119065, "grad_norm": 0.34899839758872986, "learning_rate": 1.4552885474029275e-05, "loss": 0.4979, "step": 33028 }, { "epoch": 0.7004941570698394, "grad_norm": 0.40668487548828125, "learning_rate": 1.4552588545955187e-05, "loss": 0.4882, "step": 33029 }, { "epoch": 0.7005153655277725, "grad_norm": 0.37106940150260925, "learning_rate": 1.4552291612817751e-05, "loss": 0.5171, "step": 33030 }, { "epoch": 0.7005365739857055, "grad_norm": 0.2975705862045288, "learning_rate": 1.4551994674617291e-05, "loss": 0.4099, "step": 33031 }, { "epoch": 0.7005577824436385, "grad_norm": 0.32326680421829224, "learning_rate": 1.4551697731354144e-05, "loss": 0.5158, "step": 33032 }, { "epoch": 0.7005789909015715, "grad_norm": 0.3293575346469879, "learning_rate": 1.455140078302864e-05, "loss": 0.435, "step": 33033 }, { "epoch": 0.7006001993595046, "grad_norm": 0.3346252143383026, "learning_rate": 1.4551103829641105e-05, "loss": 0.4984, "step": 33034 }, { "epoch": 0.7006214078174376, "grad_norm": 0.3349182605743408, "learning_rate": 1.4550806871191873e-05, "loss": 0.4933, "step": 33035 }, { "epoch": 0.7006426162753706, "grad_norm": 0.3793272376060486, "learning_rate": 1.4550509907681276e-05, "loss": 0.4875, "step": 33036 }, { "epoch": 0.7006638247333037, "grad_norm": 0.33660632371902466, "learning_rate": 1.4550212939109638e-05, "loss": 0.5035, "step": 33037 }, { "epoch": 0.7006850331912367, "grad_norm": 0.4331516921520233, "learning_rate": 1.4549915965477293e-05, "loss": 0.4591, "step": 33038 }, { "epoch": 0.7007062416491697, "grad_norm": 0.4353526532649994, "learning_rate": 1.454961898678457e-05, "loss": 0.5145, "step": 33039 }, { "epoch": 0.7007274501071027, "grad_norm": 0.4096089005470276, "learning_rate": 1.4549322003031802e-05, "loss": 0.5672, "step": 33040 }, { "epoch": 0.7007486585650358, "grad_norm": 0.33666178584098816, "learning_rate": 1.4549025014219317e-05, "loss": 0.5042, "step": 33041 }, { "epoch": 0.7007698670229687, "grad_norm": 0.3845852315425873, "learning_rate": 1.4548728020347448e-05, "loss": 0.4704, "step": 33042 }, { "epoch": 0.7007910754809018, "grad_norm": 0.29988810420036316, "learning_rate": 1.4548431021416521e-05, "loss": 0.4103, "step": 33043 }, { "epoch": 0.7008122839388348, "grad_norm": 0.3753069043159485, "learning_rate": 1.454813401742687e-05, "loss": 0.5243, "step": 33044 }, { "epoch": 0.7008334923967678, "grad_norm": 0.36687958240509033, "learning_rate": 1.4547837008378822e-05, "loss": 0.4986, "step": 33045 }, { "epoch": 0.7008547008547008, "grad_norm": 0.33551904559135437, "learning_rate": 1.4547539994272707e-05, "loss": 0.4712, "step": 33046 }, { "epoch": 0.7008759093126339, "grad_norm": 0.3573923408985138, "learning_rate": 1.4547242975108864e-05, "loss": 0.4729, "step": 33047 }, { "epoch": 0.700897117770567, "grad_norm": 0.365400105714798, "learning_rate": 1.4546945950887612e-05, "loss": 0.454, "step": 33048 }, { "epoch": 0.7009183262284999, "grad_norm": 0.3464590311050415, "learning_rate": 1.4546648921609287e-05, "loss": 0.4931, "step": 33049 }, { "epoch": 0.700939534686433, "grad_norm": 0.35331737995147705, "learning_rate": 1.4546351887274222e-05, "loss": 0.4692, "step": 33050 }, { "epoch": 0.700960743144366, "grad_norm": 0.3236985504627228, "learning_rate": 1.4546054847882739e-05, "loss": 0.4485, "step": 33051 }, { "epoch": 0.700981951602299, "grad_norm": 0.3655557334423065, "learning_rate": 1.4545757803435175e-05, "loss": 0.5117, "step": 33052 }, { "epoch": 0.701003160060232, "grad_norm": 0.34019261598587036, "learning_rate": 1.454546075393186e-05, "loss": 0.5208, "step": 33053 }, { "epoch": 0.7010243685181651, "grad_norm": 0.3656967282295227, "learning_rate": 1.4545163699373122e-05, "loss": 0.5564, "step": 33054 }, { "epoch": 0.701045576976098, "grad_norm": 0.39378777146339417, "learning_rate": 1.4544866639759291e-05, "loss": 0.5272, "step": 33055 }, { "epoch": 0.7010667854340311, "grad_norm": 0.33989304304122925, "learning_rate": 1.4544569575090702e-05, "loss": 0.412, "step": 33056 }, { "epoch": 0.7010879938919641, "grad_norm": 0.39244771003723145, "learning_rate": 1.4544272505367679e-05, "loss": 0.4885, "step": 33057 }, { "epoch": 0.7011092023498972, "grad_norm": 0.36002427339553833, "learning_rate": 1.4543975430590557e-05, "loss": 0.4987, "step": 33058 }, { "epoch": 0.7011304108078301, "grad_norm": 0.4347066283226013, "learning_rate": 1.4543678350759666e-05, "loss": 0.501, "step": 33059 }, { "epoch": 0.7011516192657632, "grad_norm": 0.33983930945396423, "learning_rate": 1.4543381265875335e-05, "loss": 0.4375, "step": 33060 }, { "epoch": 0.7011728277236962, "grad_norm": 0.35199955105781555, "learning_rate": 1.4543084175937894e-05, "loss": 0.4494, "step": 33061 }, { "epoch": 0.7011940361816292, "grad_norm": 0.3119840919971466, "learning_rate": 1.4542787080947673e-05, "loss": 0.4468, "step": 33062 }, { "epoch": 0.7012152446395623, "grad_norm": 0.37969720363616943, "learning_rate": 1.4542489980905009e-05, "loss": 0.538, "step": 33063 }, { "epoch": 0.7012364530974953, "grad_norm": 0.39000093936920166, "learning_rate": 1.454219287581022e-05, "loss": 0.5275, "step": 33064 }, { "epoch": 0.7012576615554283, "grad_norm": 0.3459857106208801, "learning_rate": 1.4541895765663645e-05, "loss": 0.5782, "step": 33065 }, { "epoch": 0.7012788700133613, "grad_norm": 0.339557409286499, "learning_rate": 1.4541598650465619e-05, "loss": 0.4576, "step": 33066 }, { "epoch": 0.7013000784712944, "grad_norm": 0.3605565130710602, "learning_rate": 1.454130153021646e-05, "loss": 0.4649, "step": 33067 }, { "epoch": 0.7013212869292273, "grad_norm": 0.34060797095298767, "learning_rate": 1.4541004404916507e-05, "loss": 0.5042, "step": 33068 }, { "epoch": 0.7013424953871604, "grad_norm": 0.3611084222793579, "learning_rate": 1.4540707274566091e-05, "loss": 0.4824, "step": 33069 }, { "epoch": 0.7013637038450934, "grad_norm": 0.34188830852508545, "learning_rate": 1.4540410139165536e-05, "loss": 0.468, "step": 33070 }, { "epoch": 0.7013849123030265, "grad_norm": 0.38630199432373047, "learning_rate": 1.4540112998715177e-05, "loss": 0.4819, "step": 33071 }, { "epoch": 0.7014061207609594, "grad_norm": 0.3592623472213745, "learning_rate": 1.4539815853215344e-05, "loss": 0.4795, "step": 33072 }, { "epoch": 0.7014273292188925, "grad_norm": 0.40913283824920654, "learning_rate": 1.4539518702666369e-05, "loss": 0.6763, "step": 33073 }, { "epoch": 0.7014485376768255, "grad_norm": 0.3345910310745239, "learning_rate": 1.4539221547068577e-05, "loss": 0.4318, "step": 33074 }, { "epoch": 0.7014697461347585, "grad_norm": 0.3412688374519348, "learning_rate": 1.4538924386422309e-05, "loss": 0.4818, "step": 33075 }, { "epoch": 0.7014909545926916, "grad_norm": 0.3568980097770691, "learning_rate": 1.4538627220727881e-05, "loss": 0.4086, "step": 33076 }, { "epoch": 0.7015121630506246, "grad_norm": 0.3809269964694977, "learning_rate": 1.4538330049985633e-05, "loss": 0.4472, "step": 33077 }, { "epoch": 0.7015333715085577, "grad_norm": 0.350410521030426, "learning_rate": 1.4538032874195898e-05, "loss": 0.4742, "step": 33078 }, { "epoch": 0.7015545799664906, "grad_norm": 0.38435572385787964, "learning_rate": 1.4537735693359e-05, "loss": 0.5506, "step": 33079 }, { "epoch": 0.7015757884244237, "grad_norm": 0.33574748039245605, "learning_rate": 1.453743850747527e-05, "loss": 0.4632, "step": 33080 }, { "epoch": 0.7015969968823567, "grad_norm": 0.3575081527233124, "learning_rate": 1.4537141316545042e-05, "loss": 0.5038, "step": 33081 }, { "epoch": 0.7016182053402897, "grad_norm": 0.493501752614975, "learning_rate": 1.4536844120568645e-05, "loss": 0.5801, "step": 33082 }, { "epoch": 0.7016394137982227, "grad_norm": 0.36078882217407227, "learning_rate": 1.4536546919546408e-05, "loss": 0.503, "step": 33083 }, { "epoch": 0.7016606222561558, "grad_norm": 0.530698835849762, "learning_rate": 1.4536249713478663e-05, "loss": 0.5378, "step": 33084 }, { "epoch": 0.7016818307140887, "grad_norm": 0.3337254226207733, "learning_rate": 1.4535952502365742e-05, "loss": 0.4614, "step": 33085 }, { "epoch": 0.7017030391720218, "grad_norm": 0.35606878995895386, "learning_rate": 1.4535655286207973e-05, "loss": 0.5029, "step": 33086 }, { "epoch": 0.7017242476299548, "grad_norm": 0.32079678773880005, "learning_rate": 1.4535358065005689e-05, "loss": 0.4463, "step": 33087 }, { "epoch": 0.7017454560878879, "grad_norm": 0.36395296454429626, "learning_rate": 1.4535060838759219e-05, "loss": 0.4702, "step": 33088 }, { "epoch": 0.7017666645458209, "grad_norm": 0.3597978949546814, "learning_rate": 1.4534763607468891e-05, "loss": 0.5371, "step": 33089 }, { "epoch": 0.7017878730037539, "grad_norm": 0.3458199203014374, "learning_rate": 1.4534466371135041e-05, "loss": 0.4362, "step": 33090 }, { "epoch": 0.701809081461687, "grad_norm": 0.3935477137565613, "learning_rate": 1.4534169129757993e-05, "loss": 0.492, "step": 33091 }, { "epoch": 0.7018302899196199, "grad_norm": 0.3409208655357361, "learning_rate": 1.4533871883338086e-05, "loss": 0.4257, "step": 33092 }, { "epoch": 0.701851498377553, "grad_norm": 0.3947848379611969, "learning_rate": 1.4533574631875645e-05, "loss": 0.5685, "step": 33093 }, { "epoch": 0.701872706835486, "grad_norm": 0.3892092704772949, "learning_rate": 1.4533277375371001e-05, "loss": 0.4448, "step": 33094 }, { "epoch": 0.701893915293419, "grad_norm": 0.4108012318611145, "learning_rate": 1.4532980113824486e-05, "loss": 0.464, "step": 33095 }, { "epoch": 0.701915123751352, "grad_norm": 0.37267521023750305, "learning_rate": 1.4532682847236428e-05, "loss": 0.448, "step": 33096 }, { "epoch": 0.7019363322092851, "grad_norm": 0.3846169710159302, "learning_rate": 1.4532385575607162e-05, "loss": 0.4586, "step": 33097 }, { "epoch": 0.701957540667218, "grad_norm": 0.33537957072257996, "learning_rate": 1.4532088298937016e-05, "loss": 0.4614, "step": 33098 }, { "epoch": 0.7019787491251511, "grad_norm": 0.3657890558242798, "learning_rate": 1.4531791017226318e-05, "loss": 0.5468, "step": 33099 }, { "epoch": 0.7019999575830841, "grad_norm": 0.35518187284469604, "learning_rate": 1.4531493730475405e-05, "loss": 0.4753, "step": 33100 }, { "epoch": 0.7020211660410172, "grad_norm": 0.36465439200401306, "learning_rate": 1.4531196438684602e-05, "loss": 0.5288, "step": 33101 }, { "epoch": 0.7020423744989501, "grad_norm": 0.4268132448196411, "learning_rate": 1.453089914185424e-05, "loss": 0.4935, "step": 33102 }, { "epoch": 0.7020635829568832, "grad_norm": 0.6089202165603638, "learning_rate": 1.4530601839984654e-05, "loss": 0.5002, "step": 33103 }, { "epoch": 0.7020847914148163, "grad_norm": 0.354287713766098, "learning_rate": 1.4530304533076174e-05, "loss": 0.5131, "step": 33104 }, { "epoch": 0.7021059998727492, "grad_norm": 0.3594745695590973, "learning_rate": 1.4530007221129124e-05, "loss": 0.4097, "step": 33105 }, { "epoch": 0.7021272083306823, "grad_norm": 0.33844026923179626, "learning_rate": 1.4529709904143843e-05, "loss": 0.4983, "step": 33106 }, { "epoch": 0.7021484167886153, "grad_norm": 0.37871551513671875, "learning_rate": 1.4529412582120655e-05, "loss": 0.5465, "step": 33107 }, { "epoch": 0.7021696252465484, "grad_norm": 0.31659823656082153, "learning_rate": 1.4529115255059897e-05, "loss": 0.4062, "step": 33108 }, { "epoch": 0.7021908337044813, "grad_norm": 0.33332231640815735, "learning_rate": 1.452881792296189e-05, "loss": 0.4335, "step": 33109 }, { "epoch": 0.7022120421624144, "grad_norm": 0.35587742924690247, "learning_rate": 1.4528520585826975e-05, "loss": 0.4753, "step": 33110 }, { "epoch": 0.7022332506203474, "grad_norm": 0.3459301292896271, "learning_rate": 1.4528223243655482e-05, "loss": 0.4771, "step": 33111 }, { "epoch": 0.7022544590782804, "grad_norm": 0.3916700482368469, "learning_rate": 1.4527925896447737e-05, "loss": 0.5211, "step": 33112 }, { "epoch": 0.7022756675362134, "grad_norm": 0.35256215929985046, "learning_rate": 1.4527628544204069e-05, "loss": 0.5238, "step": 33113 }, { "epoch": 0.7022968759941465, "grad_norm": 0.3629836142063141, "learning_rate": 1.4527331186924814e-05, "loss": 0.4903, "step": 33114 }, { "epoch": 0.7023180844520794, "grad_norm": 0.34516462683677673, "learning_rate": 1.45270338246103e-05, "loss": 0.4798, "step": 33115 }, { "epoch": 0.7023392929100125, "grad_norm": 0.39345577359199524, "learning_rate": 1.4526736457260858e-05, "loss": 0.4886, "step": 33116 }, { "epoch": 0.7023605013679456, "grad_norm": 0.35823380947113037, "learning_rate": 1.4526439084876818e-05, "loss": 0.5915, "step": 33117 }, { "epoch": 0.7023817098258786, "grad_norm": 0.32948437333106995, "learning_rate": 1.4526141707458515e-05, "loss": 0.4323, "step": 33118 }, { "epoch": 0.7024029182838116, "grad_norm": 0.3920789361000061, "learning_rate": 1.4525844325006274e-05, "loss": 0.5223, "step": 33119 }, { "epoch": 0.7024241267417446, "grad_norm": 0.4411414563655853, "learning_rate": 1.452554693752043e-05, "loss": 0.5237, "step": 33120 }, { "epoch": 0.7024453351996777, "grad_norm": 0.6941103935241699, "learning_rate": 1.452524954500131e-05, "loss": 0.5221, "step": 33121 }, { "epoch": 0.7024665436576106, "grad_norm": 0.37255847454071045, "learning_rate": 1.4524952147449247e-05, "loss": 0.5027, "step": 33122 }, { "epoch": 0.7024877521155437, "grad_norm": 0.39771077036857605, "learning_rate": 1.4524654744864573e-05, "loss": 0.4884, "step": 33123 }, { "epoch": 0.7025089605734767, "grad_norm": 0.3543703556060791, "learning_rate": 1.4524357337247615e-05, "loss": 0.5985, "step": 33124 }, { "epoch": 0.7025301690314097, "grad_norm": 0.3298397362232208, "learning_rate": 1.452405992459871e-05, "loss": 0.4787, "step": 33125 }, { "epoch": 0.7025513774893427, "grad_norm": 0.389270156621933, "learning_rate": 1.4523762506918183e-05, "loss": 0.5115, "step": 33126 }, { "epoch": 0.7025725859472758, "grad_norm": 0.3574402630329132, "learning_rate": 1.4523465084206366e-05, "loss": 0.4834, "step": 33127 }, { "epoch": 0.7025937944052087, "grad_norm": 0.337509423494339, "learning_rate": 1.4523167656463588e-05, "loss": 0.5159, "step": 33128 }, { "epoch": 0.7026150028631418, "grad_norm": 0.3606877624988556, "learning_rate": 1.4522870223690186e-05, "loss": 0.5214, "step": 33129 }, { "epoch": 0.7026362113210749, "grad_norm": 0.34285762906074524, "learning_rate": 1.4522572785886486e-05, "loss": 0.4876, "step": 33130 }, { "epoch": 0.7026574197790079, "grad_norm": 0.42031729221343994, "learning_rate": 1.4522275343052821e-05, "loss": 0.5236, "step": 33131 }, { "epoch": 0.7026786282369409, "grad_norm": 0.43759554624557495, "learning_rate": 1.4521977895189518e-05, "loss": 0.468, "step": 33132 }, { "epoch": 0.7026998366948739, "grad_norm": 0.363178014755249, "learning_rate": 1.4521680442296912e-05, "loss": 0.4396, "step": 33133 }, { "epoch": 0.702721045152807, "grad_norm": 0.44341593980789185, "learning_rate": 1.4521382984375332e-05, "loss": 0.4542, "step": 33134 }, { "epoch": 0.7027422536107399, "grad_norm": 0.35583123564720154, "learning_rate": 1.4521085521425108e-05, "loss": 0.5529, "step": 33135 }, { "epoch": 0.702763462068673, "grad_norm": 0.35788494348526, "learning_rate": 1.4520788053446573e-05, "loss": 0.5369, "step": 33136 }, { "epoch": 0.702784670526606, "grad_norm": 0.3748089075088501, "learning_rate": 1.452049058044006e-05, "loss": 0.5068, "step": 33137 }, { "epoch": 0.7028058789845391, "grad_norm": 0.3836161196231842, "learning_rate": 1.452019310240589e-05, "loss": 0.4695, "step": 33138 }, { "epoch": 0.702827087442472, "grad_norm": 0.4069160521030426, "learning_rate": 1.4519895619344407e-05, "loss": 0.5422, "step": 33139 }, { "epoch": 0.7028482959004051, "grad_norm": 0.34288737177848816, "learning_rate": 1.4519598131255933e-05, "loss": 0.4736, "step": 33140 }, { "epoch": 0.7028695043583381, "grad_norm": 0.3460433781147003, "learning_rate": 1.4519300638140799e-05, "loss": 0.4573, "step": 33141 }, { "epoch": 0.7028907128162711, "grad_norm": 0.7374523878097534, "learning_rate": 1.451900313999934e-05, "loss": 0.5722, "step": 33142 }, { "epoch": 0.7029119212742041, "grad_norm": 0.35034045577049255, "learning_rate": 1.4518705636831885e-05, "loss": 0.4928, "step": 33143 }, { "epoch": 0.7029331297321372, "grad_norm": 0.8281859755516052, "learning_rate": 1.4518408128638763e-05, "loss": 0.5237, "step": 33144 }, { "epoch": 0.7029543381900702, "grad_norm": 0.4005947411060333, "learning_rate": 1.4518110615420309e-05, "loss": 0.4872, "step": 33145 }, { "epoch": 0.7029755466480032, "grad_norm": 0.43294233083724976, "learning_rate": 1.4517813097176853e-05, "loss": 0.4249, "step": 33146 }, { "epoch": 0.7029967551059363, "grad_norm": 0.3654398024082184, "learning_rate": 1.451751557390872e-05, "loss": 0.4487, "step": 33147 }, { "epoch": 0.7030179635638693, "grad_norm": 0.33062979578971863, "learning_rate": 1.4517218045616248e-05, "loss": 0.4671, "step": 33148 }, { "epoch": 0.7030391720218023, "grad_norm": 0.3764755427837372, "learning_rate": 1.4516920512299768e-05, "loss": 0.5403, "step": 33149 }, { "epoch": 0.7030603804797353, "grad_norm": 0.3646509051322937, "learning_rate": 1.4516622973959602e-05, "loss": 0.4878, "step": 33150 }, { "epoch": 0.7030815889376684, "grad_norm": 0.3596521317958832, "learning_rate": 1.4516325430596092e-05, "loss": 0.469, "step": 33151 }, { "epoch": 0.7031027973956013, "grad_norm": 0.3479968011379242, "learning_rate": 1.4516027882209562e-05, "loss": 0.5158, "step": 33152 }, { "epoch": 0.7031240058535344, "grad_norm": 0.33450180292129517, "learning_rate": 1.4515730328800346e-05, "loss": 0.5157, "step": 33153 }, { "epoch": 0.7031452143114674, "grad_norm": 0.3971370756626129, "learning_rate": 1.4515432770368774e-05, "loss": 0.444, "step": 33154 }, { "epoch": 0.7031664227694004, "grad_norm": 0.5077195763587952, "learning_rate": 1.4515135206915179e-05, "loss": 0.4742, "step": 33155 }, { "epoch": 0.7031876312273334, "grad_norm": 0.39746004343032837, "learning_rate": 1.4514837638439884e-05, "loss": 0.5494, "step": 33156 }, { "epoch": 0.7032088396852665, "grad_norm": 0.3951699137687683, "learning_rate": 1.451454006494323e-05, "loss": 0.5239, "step": 33157 }, { "epoch": 0.7032300481431996, "grad_norm": 0.38747072219848633, "learning_rate": 1.4514242486425543e-05, "loss": 0.4918, "step": 33158 }, { "epoch": 0.7032512566011325, "grad_norm": 0.351389080286026, "learning_rate": 1.4513944902887156e-05, "loss": 0.5406, "step": 33159 }, { "epoch": 0.7032724650590656, "grad_norm": 0.3871843218803406, "learning_rate": 1.4513647314328397e-05, "loss": 0.6109, "step": 33160 }, { "epoch": 0.7032936735169986, "grad_norm": 0.389960914850235, "learning_rate": 1.4513349720749599e-05, "loss": 0.5468, "step": 33161 }, { "epoch": 0.7033148819749316, "grad_norm": 0.34425872564315796, "learning_rate": 1.4513052122151093e-05, "loss": 0.5287, "step": 33162 }, { "epoch": 0.7033360904328646, "grad_norm": 0.3486156165599823, "learning_rate": 1.451275451853321e-05, "loss": 0.5189, "step": 33163 }, { "epoch": 0.7033572988907977, "grad_norm": 0.3670141398906708, "learning_rate": 1.451245690989628e-05, "loss": 0.4959, "step": 33164 }, { "epoch": 0.7033785073487306, "grad_norm": 1.0729639530181885, "learning_rate": 1.4512159296240635e-05, "loss": 0.5724, "step": 33165 }, { "epoch": 0.7033997158066637, "grad_norm": 0.5110083222389221, "learning_rate": 1.4511861677566604e-05, "loss": 0.4464, "step": 33166 }, { "epoch": 0.7034209242645967, "grad_norm": 0.4902791380882263, "learning_rate": 1.451156405387452e-05, "loss": 0.4669, "step": 33167 }, { "epoch": 0.7034421327225298, "grad_norm": 0.40870198607444763, "learning_rate": 1.4511266425164717e-05, "loss": 0.4917, "step": 33168 }, { "epoch": 0.7034633411804627, "grad_norm": 0.35264575481414795, "learning_rate": 1.451096879143752e-05, "loss": 0.4766, "step": 33169 }, { "epoch": 0.7034845496383958, "grad_norm": 0.33952030539512634, "learning_rate": 1.4510671152693261e-05, "loss": 0.4847, "step": 33170 }, { "epoch": 0.7035057580963289, "grad_norm": 0.35037675499916077, "learning_rate": 1.4510373508932276e-05, "loss": 0.5328, "step": 33171 }, { "epoch": 0.7035269665542618, "grad_norm": 0.526141881942749, "learning_rate": 1.451007586015489e-05, "loss": 0.5333, "step": 33172 }, { "epoch": 0.7035481750121949, "grad_norm": 0.3906261622905731, "learning_rate": 1.4509778206361439e-05, "loss": 0.5264, "step": 33173 }, { "epoch": 0.7035693834701279, "grad_norm": 0.697596549987793, "learning_rate": 1.450948054755225e-05, "loss": 0.449, "step": 33174 }, { "epoch": 0.703590591928061, "grad_norm": 0.34326308965682983, "learning_rate": 1.4509182883727657e-05, "loss": 0.5201, "step": 33175 }, { "epoch": 0.7036118003859939, "grad_norm": 0.35949215292930603, "learning_rate": 1.4508885214887991e-05, "loss": 0.4919, "step": 33176 }, { "epoch": 0.703633008843927, "grad_norm": 0.3774084448814392, "learning_rate": 1.450858754103358e-05, "loss": 0.4994, "step": 33177 }, { "epoch": 0.70365421730186, "grad_norm": 0.408200204372406, "learning_rate": 1.4508289862164757e-05, "loss": 0.5462, "step": 33178 }, { "epoch": 0.703675425759793, "grad_norm": 0.5525862574577332, "learning_rate": 1.4507992178281855e-05, "loss": 0.482, "step": 33179 }, { "epoch": 0.703696634217726, "grad_norm": 0.3364844024181366, "learning_rate": 1.4507694489385205e-05, "loss": 0.4457, "step": 33180 }, { "epoch": 0.7037178426756591, "grad_norm": 0.41901442408561707, "learning_rate": 1.4507396795475131e-05, "loss": 0.5323, "step": 33181 }, { "epoch": 0.703739051133592, "grad_norm": 0.3553917109966278, "learning_rate": 1.4507099096551974e-05, "loss": 0.4718, "step": 33182 }, { "epoch": 0.7037602595915251, "grad_norm": 0.35700541734695435, "learning_rate": 1.4506801392616059e-05, "loss": 0.5085, "step": 33183 }, { "epoch": 0.7037814680494581, "grad_norm": 0.3399339020252228, "learning_rate": 1.450650368366772e-05, "loss": 0.4882, "step": 33184 }, { "epoch": 0.7038026765073911, "grad_norm": 0.3977285921573639, "learning_rate": 1.4506205969707283e-05, "loss": 0.5475, "step": 33185 }, { "epoch": 0.7038238849653242, "grad_norm": 0.40528470277786255, "learning_rate": 1.4505908250735086e-05, "loss": 0.529, "step": 33186 }, { "epoch": 0.7038450934232572, "grad_norm": 0.37278833985328674, "learning_rate": 1.4505610526751455e-05, "loss": 0.4516, "step": 33187 }, { "epoch": 0.7038663018811903, "grad_norm": 0.4600997269153595, "learning_rate": 1.4505312797756725e-05, "loss": 0.6227, "step": 33188 }, { "epoch": 0.7038875103391232, "grad_norm": 0.34740740060806274, "learning_rate": 1.4505015063751225e-05, "loss": 0.5185, "step": 33189 }, { "epoch": 0.7039087187970563, "grad_norm": 0.35715970396995544, "learning_rate": 1.4504717324735285e-05, "loss": 0.3679, "step": 33190 }, { "epoch": 0.7039299272549893, "grad_norm": 0.34259161353111267, "learning_rate": 1.450441958070924e-05, "loss": 0.421, "step": 33191 }, { "epoch": 0.7039511357129223, "grad_norm": 0.3712601065635681, "learning_rate": 1.4504121831673414e-05, "loss": 0.5571, "step": 33192 }, { "epoch": 0.7039723441708553, "grad_norm": 0.35726025700569153, "learning_rate": 1.4503824077628146e-05, "loss": 0.5208, "step": 33193 }, { "epoch": 0.7039935526287884, "grad_norm": 0.4226032495498657, "learning_rate": 1.4503526318573764e-05, "loss": 0.4695, "step": 33194 }, { "epoch": 0.7040147610867213, "grad_norm": 0.32140201330184937, "learning_rate": 1.4503228554510599e-05, "loss": 0.4418, "step": 33195 }, { "epoch": 0.7040359695446544, "grad_norm": 0.340083509683609, "learning_rate": 1.4502930785438985e-05, "loss": 0.473, "step": 33196 }, { "epoch": 0.7040571780025874, "grad_norm": 0.33724913001060486, "learning_rate": 1.4502633011359248e-05, "loss": 0.4661, "step": 33197 }, { "epoch": 0.7040783864605205, "grad_norm": 0.33035436272621155, "learning_rate": 1.450233523227172e-05, "loss": 0.4109, "step": 33198 }, { "epoch": 0.7040995949184535, "grad_norm": 0.37295758724212646, "learning_rate": 1.4502037448176734e-05, "loss": 0.4834, "step": 33199 }, { "epoch": 0.7041208033763865, "grad_norm": 0.38689306378364563, "learning_rate": 1.4501739659074622e-05, "loss": 0.482, "step": 33200 }, { "epoch": 0.7041420118343196, "grad_norm": 0.3556545674800873, "learning_rate": 1.4501441864965716e-05, "loss": 0.5135, "step": 33201 }, { "epoch": 0.7041632202922525, "grad_norm": 0.643876314163208, "learning_rate": 1.4501144065850343e-05, "loss": 0.4567, "step": 33202 }, { "epoch": 0.7041844287501856, "grad_norm": 0.35371458530426025, "learning_rate": 1.4500846261728838e-05, "loss": 0.5423, "step": 33203 }, { "epoch": 0.7042056372081186, "grad_norm": 0.3564322292804718, "learning_rate": 1.4500548452601533e-05, "loss": 0.4698, "step": 33204 }, { "epoch": 0.7042268456660516, "grad_norm": 0.35293444991111755, "learning_rate": 1.4500250638468752e-05, "loss": 0.47, "step": 33205 }, { "epoch": 0.7042480541239846, "grad_norm": 0.42251887917518616, "learning_rate": 1.4499952819330833e-05, "loss": 0.5285, "step": 33206 }, { "epoch": 0.7042692625819177, "grad_norm": 0.3522476851940155, "learning_rate": 1.4499654995188109e-05, "loss": 0.4934, "step": 33207 }, { "epoch": 0.7042904710398507, "grad_norm": 0.33911946415901184, "learning_rate": 1.4499357166040905e-05, "loss": 0.472, "step": 33208 }, { "epoch": 0.7043116794977837, "grad_norm": 0.3797013759613037, "learning_rate": 1.4499059331889556e-05, "loss": 0.4785, "step": 33209 }, { "epoch": 0.7043328879557167, "grad_norm": 0.35983529686927795, "learning_rate": 1.4498761492734392e-05, "loss": 0.5421, "step": 33210 }, { "epoch": 0.7043540964136498, "grad_norm": 0.35647517442703247, "learning_rate": 1.4498463648575744e-05, "loss": 0.5277, "step": 33211 }, { "epoch": 0.7043753048715828, "grad_norm": 0.4500887393951416, "learning_rate": 1.4498165799413943e-05, "loss": 0.485, "step": 33212 }, { "epoch": 0.7043965133295158, "grad_norm": 0.31904199719429016, "learning_rate": 1.4497867945249321e-05, "loss": 0.4639, "step": 33213 }, { "epoch": 0.7044177217874489, "grad_norm": 0.37705764174461365, "learning_rate": 1.4497570086082213e-05, "loss": 0.5263, "step": 33214 }, { "epoch": 0.7044389302453818, "grad_norm": 0.3633035123348236, "learning_rate": 1.4497272221912943e-05, "loss": 0.5381, "step": 33215 }, { "epoch": 0.7044601387033149, "grad_norm": 0.3644137680530548, "learning_rate": 1.4496974352741849e-05, "loss": 0.4879, "step": 33216 }, { "epoch": 0.7044813471612479, "grad_norm": 0.38081154227256775, "learning_rate": 1.4496676478569255e-05, "loss": 0.5282, "step": 33217 }, { "epoch": 0.704502555619181, "grad_norm": 1.205701470375061, "learning_rate": 1.44963785993955e-05, "loss": 0.4909, "step": 33218 }, { "epoch": 0.7045237640771139, "grad_norm": 0.4180729389190674, "learning_rate": 1.4496080715220911e-05, "loss": 0.4943, "step": 33219 }, { "epoch": 0.704544972535047, "grad_norm": 0.4814801812171936, "learning_rate": 1.4495782826045819e-05, "loss": 0.4769, "step": 33220 }, { "epoch": 0.70456618099298, "grad_norm": 0.3999220132827759, "learning_rate": 1.4495484931870558e-05, "loss": 0.5329, "step": 33221 }, { "epoch": 0.704587389450913, "grad_norm": 0.3567029535770416, "learning_rate": 1.4495187032695457e-05, "loss": 0.5271, "step": 33222 }, { "epoch": 0.704608597908846, "grad_norm": 0.6400579214096069, "learning_rate": 1.4494889128520848e-05, "loss": 0.5038, "step": 33223 }, { "epoch": 0.7046298063667791, "grad_norm": 0.37933945655822754, "learning_rate": 1.4494591219347061e-05, "loss": 0.4301, "step": 33224 }, { "epoch": 0.704651014824712, "grad_norm": 0.34702983498573303, "learning_rate": 1.4494293305174431e-05, "loss": 0.4628, "step": 33225 }, { "epoch": 0.7046722232826451, "grad_norm": 0.3248801827430725, "learning_rate": 1.4493995386003285e-05, "loss": 0.4127, "step": 33226 }, { "epoch": 0.7046934317405782, "grad_norm": 0.3418050706386566, "learning_rate": 1.4493697461833957e-05, "loss": 0.4523, "step": 33227 }, { "epoch": 0.7047146401985112, "grad_norm": 0.3174855709075928, "learning_rate": 1.4493399532666779e-05, "loss": 0.4969, "step": 33228 }, { "epoch": 0.7047358486564442, "grad_norm": 0.40625107288360596, "learning_rate": 1.4493101598502079e-05, "loss": 0.52, "step": 33229 }, { "epoch": 0.7047570571143772, "grad_norm": 0.33976784348487854, "learning_rate": 1.4492803659340193e-05, "loss": 0.4838, "step": 33230 }, { "epoch": 0.7047782655723103, "grad_norm": 0.3271856904029846, "learning_rate": 1.4492505715181447e-05, "loss": 0.4293, "step": 33231 }, { "epoch": 0.7047994740302432, "grad_norm": 0.37458181381225586, "learning_rate": 1.4492207766026176e-05, "loss": 0.5104, "step": 33232 }, { "epoch": 0.7048206824881763, "grad_norm": 0.3187735974788666, "learning_rate": 1.449190981187471e-05, "loss": 0.4523, "step": 33233 }, { "epoch": 0.7048418909461093, "grad_norm": 0.3628552556037903, "learning_rate": 1.4491611852727382e-05, "loss": 0.5108, "step": 33234 }, { "epoch": 0.7048630994040423, "grad_norm": 0.36631453037261963, "learning_rate": 1.4491313888584523e-05, "loss": 0.5216, "step": 33235 }, { "epoch": 0.7048843078619753, "grad_norm": 0.33857977390289307, "learning_rate": 1.4491015919446465e-05, "loss": 0.4571, "step": 33236 }, { "epoch": 0.7049055163199084, "grad_norm": 0.37742435932159424, "learning_rate": 1.4490717945313535e-05, "loss": 0.5374, "step": 33237 }, { "epoch": 0.7049267247778414, "grad_norm": 0.42872434854507446, "learning_rate": 1.449041996618607e-05, "loss": 0.5592, "step": 33238 }, { "epoch": 0.7049479332357744, "grad_norm": 0.3459973633289337, "learning_rate": 1.4490121982064396e-05, "loss": 0.4662, "step": 33239 }, { "epoch": 0.7049691416937075, "grad_norm": 0.3666993975639343, "learning_rate": 1.4489823992948848e-05, "loss": 0.5127, "step": 33240 }, { "epoch": 0.7049903501516405, "grad_norm": 0.5499230027198792, "learning_rate": 1.448952599883976e-05, "loss": 0.5472, "step": 33241 }, { "epoch": 0.7050115586095735, "grad_norm": 0.3934412896633148, "learning_rate": 1.4489227999737459e-05, "loss": 0.6182, "step": 33242 }, { "epoch": 0.7050327670675065, "grad_norm": 0.44202741980552673, "learning_rate": 1.4488929995642273e-05, "loss": 0.4779, "step": 33243 }, { "epoch": 0.7050539755254396, "grad_norm": 0.3173256814479828, "learning_rate": 1.4488631986554543e-05, "loss": 0.4441, "step": 33244 }, { "epoch": 0.7050751839833725, "grad_norm": 0.33401066064834595, "learning_rate": 1.4488333972474593e-05, "loss": 0.4523, "step": 33245 }, { "epoch": 0.7050963924413056, "grad_norm": 0.42041951417922974, "learning_rate": 1.4488035953402757e-05, "loss": 0.4615, "step": 33246 }, { "epoch": 0.7051176008992386, "grad_norm": 0.3497149646282196, "learning_rate": 1.4487737929339369e-05, "loss": 0.5241, "step": 33247 }, { "epoch": 0.7051388093571717, "grad_norm": 0.6942338943481445, "learning_rate": 1.4487439900284756e-05, "loss": 0.4969, "step": 33248 }, { "epoch": 0.7051600178151046, "grad_norm": 0.3459709882736206, "learning_rate": 1.4487141866239251e-05, "loss": 0.4504, "step": 33249 }, { "epoch": 0.7051812262730377, "grad_norm": 0.34936800599098206, "learning_rate": 1.4486843827203185e-05, "loss": 0.4661, "step": 33250 }, { "epoch": 0.7052024347309707, "grad_norm": 0.3244631588459015, "learning_rate": 1.4486545783176893e-05, "loss": 0.457, "step": 33251 }, { "epoch": 0.7052236431889037, "grad_norm": 0.38310620188713074, "learning_rate": 1.4486247734160702e-05, "loss": 0.4614, "step": 33252 }, { "epoch": 0.7052448516468368, "grad_norm": 0.5384990572929382, "learning_rate": 1.4485949680154946e-05, "loss": 0.468, "step": 33253 }, { "epoch": 0.7052660601047698, "grad_norm": 0.3915776014328003, "learning_rate": 1.4485651621159955e-05, "loss": 0.4799, "step": 33254 }, { "epoch": 0.7052872685627029, "grad_norm": 0.3464297950267792, "learning_rate": 1.4485353557176064e-05, "loss": 0.5157, "step": 33255 }, { "epoch": 0.7053084770206358, "grad_norm": 0.34382450580596924, "learning_rate": 1.4485055488203597e-05, "loss": 0.5161, "step": 33256 }, { "epoch": 0.7053296854785689, "grad_norm": 0.39308643341064453, "learning_rate": 1.4484757414242892e-05, "loss": 0.5361, "step": 33257 }, { "epoch": 0.7053508939365019, "grad_norm": 0.38705456256866455, "learning_rate": 1.4484459335294282e-05, "loss": 0.4898, "step": 33258 }, { "epoch": 0.7053721023944349, "grad_norm": 0.31644466519355774, "learning_rate": 1.448416125135809e-05, "loss": 0.4172, "step": 33259 }, { "epoch": 0.7053933108523679, "grad_norm": 0.35662466287612915, "learning_rate": 1.4483863162434655e-05, "loss": 0.4824, "step": 33260 }, { "epoch": 0.705414519310301, "grad_norm": 0.3459625244140625, "learning_rate": 1.4483565068524307e-05, "loss": 0.5168, "step": 33261 }, { "epoch": 0.7054357277682339, "grad_norm": 0.3411200940608978, "learning_rate": 1.4483266969627376e-05, "loss": 0.4972, "step": 33262 }, { "epoch": 0.705456936226167, "grad_norm": 0.34450703859329224, "learning_rate": 1.4482968865744194e-05, "loss": 0.4668, "step": 33263 }, { "epoch": 0.7054781446841, "grad_norm": 0.34009531140327454, "learning_rate": 1.4482670756875096e-05, "loss": 0.5265, "step": 33264 }, { "epoch": 0.705499353142033, "grad_norm": 0.38064098358154297, "learning_rate": 1.448237264302041e-05, "loss": 0.5419, "step": 33265 }, { "epoch": 0.705520561599966, "grad_norm": 0.36595410108566284, "learning_rate": 1.4482074524180465e-05, "loss": 0.5126, "step": 33266 }, { "epoch": 0.7055417700578991, "grad_norm": 0.3625349998474121, "learning_rate": 1.4481776400355598e-05, "loss": 0.4432, "step": 33267 }, { "epoch": 0.7055629785158322, "grad_norm": 0.42542341351509094, "learning_rate": 1.4481478271546137e-05, "loss": 0.4931, "step": 33268 }, { "epoch": 0.7055841869737651, "grad_norm": 0.35198354721069336, "learning_rate": 1.4481180137752416e-05, "loss": 0.4613, "step": 33269 }, { "epoch": 0.7056053954316982, "grad_norm": 0.35996949672698975, "learning_rate": 1.4480881998974763e-05, "loss": 0.5019, "step": 33270 }, { "epoch": 0.7056266038896312, "grad_norm": 0.37007710337638855, "learning_rate": 1.4480583855213515e-05, "loss": 0.5373, "step": 33271 }, { "epoch": 0.7056478123475642, "grad_norm": 0.42989614605903625, "learning_rate": 1.4480285706469e-05, "loss": 0.5338, "step": 33272 }, { "epoch": 0.7056690208054972, "grad_norm": 0.33966386318206787, "learning_rate": 1.447998755274155e-05, "loss": 0.4743, "step": 33273 }, { "epoch": 0.7056902292634303, "grad_norm": 0.3707456886768341, "learning_rate": 1.4479689394031495e-05, "loss": 0.5001, "step": 33274 }, { "epoch": 0.7057114377213632, "grad_norm": 0.3325212597846985, "learning_rate": 1.4479391230339168e-05, "loss": 0.4345, "step": 33275 }, { "epoch": 0.7057326461792963, "grad_norm": 0.37076717615127563, "learning_rate": 1.4479093061664902e-05, "loss": 0.4876, "step": 33276 }, { "epoch": 0.7057538546372293, "grad_norm": 0.37372881174087524, "learning_rate": 1.447879488800903e-05, "loss": 0.4907, "step": 33277 }, { "epoch": 0.7057750630951624, "grad_norm": 0.36700478196144104, "learning_rate": 1.4478496709371878e-05, "loss": 0.4329, "step": 33278 }, { "epoch": 0.7057962715530953, "grad_norm": 0.3819054067134857, "learning_rate": 1.4478198525753783e-05, "loss": 0.6038, "step": 33279 }, { "epoch": 0.7058174800110284, "grad_norm": 0.3728719651699066, "learning_rate": 1.4477900337155074e-05, "loss": 0.4678, "step": 33280 }, { "epoch": 0.7058386884689615, "grad_norm": 0.3770662546157837, "learning_rate": 1.4477602143576083e-05, "loss": 0.5127, "step": 33281 }, { "epoch": 0.7058598969268944, "grad_norm": 0.39783087372779846, "learning_rate": 1.447730394501714e-05, "loss": 0.5124, "step": 33282 }, { "epoch": 0.7058811053848275, "grad_norm": 0.33179980516433716, "learning_rate": 1.447700574147858e-05, "loss": 0.467, "step": 33283 }, { "epoch": 0.7059023138427605, "grad_norm": 0.37390363216400146, "learning_rate": 1.4476707532960734e-05, "loss": 0.4697, "step": 33284 }, { "epoch": 0.7059235223006936, "grad_norm": 0.37561455368995667, "learning_rate": 1.4476409319463931e-05, "loss": 0.4571, "step": 33285 }, { "epoch": 0.7059447307586265, "grad_norm": 0.3963591158390045, "learning_rate": 1.4476111100988506e-05, "loss": 0.455, "step": 33286 }, { "epoch": 0.7059659392165596, "grad_norm": 0.34916800260543823, "learning_rate": 1.4475812877534789e-05, "loss": 0.4853, "step": 33287 }, { "epoch": 0.7059871476744926, "grad_norm": 0.3897393047809601, "learning_rate": 1.4475514649103108e-05, "loss": 0.5325, "step": 33288 }, { "epoch": 0.7060083561324256, "grad_norm": 0.3734719157218933, "learning_rate": 1.4475216415693805e-05, "loss": 0.523, "step": 33289 }, { "epoch": 0.7060295645903586, "grad_norm": 0.40384116768836975, "learning_rate": 1.4474918177307203e-05, "loss": 0.5119, "step": 33290 }, { "epoch": 0.7060507730482917, "grad_norm": 0.3216381072998047, "learning_rate": 1.4474619933943632e-05, "loss": 0.5213, "step": 33291 }, { "epoch": 0.7060719815062246, "grad_norm": 0.3822421133518219, "learning_rate": 1.4474321685603432e-05, "loss": 0.5227, "step": 33292 }, { "epoch": 0.7060931899641577, "grad_norm": 0.37724074721336365, "learning_rate": 1.4474023432286929e-05, "loss": 0.455, "step": 33293 }, { "epoch": 0.7061143984220908, "grad_norm": 0.35013097524642944, "learning_rate": 1.4473725173994455e-05, "loss": 0.4739, "step": 33294 }, { "epoch": 0.7061356068800237, "grad_norm": 0.38962334394454956, "learning_rate": 1.4473426910726342e-05, "loss": 0.5472, "step": 33295 }, { "epoch": 0.7061568153379568, "grad_norm": 0.349287748336792, "learning_rate": 1.4473128642482926e-05, "loss": 0.5976, "step": 33296 }, { "epoch": 0.7061780237958898, "grad_norm": 0.3887385427951813, "learning_rate": 1.4472830369264532e-05, "loss": 0.4982, "step": 33297 }, { "epoch": 0.7061992322538229, "grad_norm": 12.77962875366211, "learning_rate": 1.4472532091071497e-05, "loss": 0.4579, "step": 33298 }, { "epoch": 0.7062204407117558, "grad_norm": 0.3905037045478821, "learning_rate": 1.4472233807904149e-05, "loss": 0.65, "step": 33299 }, { "epoch": 0.7062416491696889, "grad_norm": 0.3745127022266388, "learning_rate": 1.4471935519762824e-05, "loss": 0.4947, "step": 33300 }, { "epoch": 0.7062628576276219, "grad_norm": 0.443043053150177, "learning_rate": 1.4471637226647848e-05, "loss": 0.5025, "step": 33301 }, { "epoch": 0.7062840660855549, "grad_norm": 0.37105217576026917, "learning_rate": 1.4471338928559555e-05, "loss": 0.4888, "step": 33302 }, { "epoch": 0.7063052745434879, "grad_norm": 0.3290255069732666, "learning_rate": 1.447104062549828e-05, "loss": 0.4246, "step": 33303 }, { "epoch": 0.706326483001421, "grad_norm": 0.3456742763519287, "learning_rate": 1.4470742317464354e-05, "loss": 0.4199, "step": 33304 }, { "epoch": 0.7063476914593539, "grad_norm": 0.5228545069694519, "learning_rate": 1.4470444004458104e-05, "loss": 0.4295, "step": 33305 }, { "epoch": 0.706368899917287, "grad_norm": 0.37903839349746704, "learning_rate": 1.4470145686479868e-05, "loss": 0.5786, "step": 33306 }, { "epoch": 0.70639010837522, "grad_norm": 0.41407594084739685, "learning_rate": 1.4469847363529975e-05, "loss": 0.492, "step": 33307 }, { "epoch": 0.7064113168331531, "grad_norm": 0.362052321434021, "learning_rate": 1.4469549035608752e-05, "loss": 0.4339, "step": 33308 }, { "epoch": 0.7064325252910861, "grad_norm": 0.35344889760017395, "learning_rate": 1.4469250702716538e-05, "loss": 0.4242, "step": 33309 }, { "epoch": 0.7064537337490191, "grad_norm": 0.37586915493011475, "learning_rate": 1.4468952364853666e-05, "loss": 0.4533, "step": 33310 }, { "epoch": 0.7064749422069522, "grad_norm": 0.4261590242385864, "learning_rate": 1.4468654022020458e-05, "loss": 0.4984, "step": 33311 }, { "epoch": 0.7064961506648851, "grad_norm": 0.3549209237098694, "learning_rate": 1.4468355674217254e-05, "loss": 0.3963, "step": 33312 }, { "epoch": 0.7065173591228182, "grad_norm": 0.33805030584335327, "learning_rate": 1.4468057321444384e-05, "loss": 0.5223, "step": 33313 }, { "epoch": 0.7065385675807512, "grad_norm": 0.35897278785705566, "learning_rate": 1.4467758963702179e-05, "loss": 0.5179, "step": 33314 }, { "epoch": 0.7065597760386843, "grad_norm": 0.34405672550201416, "learning_rate": 1.4467460600990973e-05, "loss": 0.4586, "step": 33315 }, { "epoch": 0.7065809844966172, "grad_norm": 0.349283903837204, "learning_rate": 1.4467162233311092e-05, "loss": 0.4826, "step": 33316 }, { "epoch": 0.7066021929545503, "grad_norm": 0.8092278838157654, "learning_rate": 1.4466863860662877e-05, "loss": 0.5404, "step": 33317 }, { "epoch": 0.7066234014124833, "grad_norm": 0.7125250697135925, "learning_rate": 1.4466565483046653e-05, "loss": 0.429, "step": 33318 }, { "epoch": 0.7066446098704163, "grad_norm": 0.38550394773483276, "learning_rate": 1.4466267100462755e-05, "loss": 0.4701, "step": 33319 }, { "epoch": 0.7066658183283493, "grad_norm": 0.36806604266166687, "learning_rate": 1.4465968712911508e-05, "loss": 0.4704, "step": 33320 }, { "epoch": 0.7066870267862824, "grad_norm": 0.37289655208587646, "learning_rate": 1.4465670320393254e-05, "loss": 0.4871, "step": 33321 }, { "epoch": 0.7067082352442154, "grad_norm": 0.39946404099464417, "learning_rate": 1.446537192290832e-05, "loss": 0.5499, "step": 33322 }, { "epoch": 0.7067294437021484, "grad_norm": 0.4082796573638916, "learning_rate": 1.4465073520457037e-05, "loss": 0.4912, "step": 33323 }, { "epoch": 0.7067506521600815, "grad_norm": 0.37520888447761536, "learning_rate": 1.4464775113039743e-05, "loss": 0.5715, "step": 33324 }, { "epoch": 0.7067718606180144, "grad_norm": 0.3431238830089569, "learning_rate": 1.4464476700656758e-05, "loss": 0.454, "step": 33325 }, { "epoch": 0.7067930690759475, "grad_norm": 0.3385297656059265, "learning_rate": 1.4464178283308424e-05, "loss": 0.451, "step": 33326 }, { "epoch": 0.7068142775338805, "grad_norm": 0.3448464870452881, "learning_rate": 1.4463879860995067e-05, "loss": 0.5453, "step": 33327 }, { "epoch": 0.7068354859918136, "grad_norm": 0.3575848340988159, "learning_rate": 1.4463581433717022e-05, "loss": 0.5043, "step": 33328 }, { "epoch": 0.7068566944497465, "grad_norm": 0.37878888845443726, "learning_rate": 1.4463283001474624e-05, "loss": 0.5022, "step": 33329 }, { "epoch": 0.7068779029076796, "grad_norm": 0.34994545578956604, "learning_rate": 1.44629845642682e-05, "loss": 0.5346, "step": 33330 }, { "epoch": 0.7068991113656126, "grad_norm": 0.34497368335723877, "learning_rate": 1.4462686122098081e-05, "loss": 0.4775, "step": 33331 }, { "epoch": 0.7069203198235456, "grad_norm": 0.34217405319213867, "learning_rate": 1.4462387674964606e-05, "loss": 0.485, "step": 33332 }, { "epoch": 0.7069415282814786, "grad_norm": 0.36884427070617676, "learning_rate": 1.4462089222868099e-05, "loss": 0.4396, "step": 33333 }, { "epoch": 0.7069627367394117, "grad_norm": 0.4978308081626892, "learning_rate": 1.4461790765808895e-05, "loss": 0.5646, "step": 33334 }, { "epoch": 0.7069839451973448, "grad_norm": 0.37025392055511475, "learning_rate": 1.4461492303787328e-05, "loss": 0.4799, "step": 33335 }, { "epoch": 0.7070051536552777, "grad_norm": 0.38586100935935974, "learning_rate": 1.4461193836803724e-05, "loss": 0.5309, "step": 33336 }, { "epoch": 0.7070263621132108, "grad_norm": 0.38700127601623535, "learning_rate": 1.4460895364858423e-05, "loss": 0.529, "step": 33337 }, { "epoch": 0.7070475705711438, "grad_norm": 0.34245893359184265, "learning_rate": 1.4460596887951753e-05, "loss": 0.483, "step": 33338 }, { "epoch": 0.7070687790290768, "grad_norm": 0.3683866560459137, "learning_rate": 1.4460298406084042e-05, "loss": 0.476, "step": 33339 }, { "epoch": 0.7070899874870098, "grad_norm": 0.4686339795589447, "learning_rate": 1.4459999919255628e-05, "loss": 0.4033, "step": 33340 }, { "epoch": 0.7071111959449429, "grad_norm": 0.37829577922821045, "learning_rate": 1.4459701427466841e-05, "loss": 0.5313, "step": 33341 }, { "epoch": 0.7071324044028758, "grad_norm": 0.33633920550346375, "learning_rate": 1.4459402930718013e-05, "loss": 0.4983, "step": 33342 }, { "epoch": 0.7071536128608089, "grad_norm": 0.7273540496826172, "learning_rate": 1.4459104429009477e-05, "loss": 0.4203, "step": 33343 }, { "epoch": 0.7071748213187419, "grad_norm": 0.34229975938796997, "learning_rate": 1.445880592234156e-05, "loss": 0.4882, "step": 33344 }, { "epoch": 0.707196029776675, "grad_norm": 0.3658803105354309, "learning_rate": 1.4458507410714602e-05, "loss": 0.5599, "step": 33345 }, { "epoch": 0.7072172382346079, "grad_norm": 0.372329980134964, "learning_rate": 1.445820889412893e-05, "loss": 0.471, "step": 33346 }, { "epoch": 0.707238446692541, "grad_norm": 0.3702630400657654, "learning_rate": 1.4457910372584875e-05, "loss": 0.536, "step": 33347 }, { "epoch": 0.707259655150474, "grad_norm": 0.43990346789360046, "learning_rate": 1.4457611846082771e-05, "loss": 0.399, "step": 33348 }, { "epoch": 0.707280863608407, "grad_norm": 0.33696699142456055, "learning_rate": 1.4457313314622952e-05, "loss": 0.4528, "step": 33349 }, { "epoch": 0.7073020720663401, "grad_norm": 0.4039713740348816, "learning_rate": 1.4457014778205744e-05, "loss": 0.4954, "step": 33350 }, { "epoch": 0.7073232805242731, "grad_norm": 0.39777761697769165, "learning_rate": 1.4456716236831487e-05, "loss": 0.493, "step": 33351 }, { "epoch": 0.7073444889822061, "grad_norm": 0.3633415400981903, "learning_rate": 1.4456417690500506e-05, "loss": 0.5241, "step": 33352 }, { "epoch": 0.7073656974401391, "grad_norm": 1.084758996963501, "learning_rate": 1.4456119139213137e-05, "loss": 0.4986, "step": 33353 }, { "epoch": 0.7073869058980722, "grad_norm": 0.3937302231788635, "learning_rate": 1.445582058296971e-05, "loss": 0.4118, "step": 33354 }, { "epoch": 0.7074081143560051, "grad_norm": 0.33413976430892944, "learning_rate": 1.445552202177056e-05, "loss": 0.4509, "step": 33355 }, { "epoch": 0.7074293228139382, "grad_norm": 0.33220359683036804, "learning_rate": 1.4455223455616017e-05, "loss": 0.4591, "step": 33356 }, { "epoch": 0.7074505312718712, "grad_norm": 0.31616589426994324, "learning_rate": 1.4454924884506412e-05, "loss": 0.4331, "step": 33357 }, { "epoch": 0.7074717397298043, "grad_norm": 0.34578076004981995, "learning_rate": 1.4454626308442079e-05, "loss": 0.5214, "step": 33358 }, { "epoch": 0.7074929481877372, "grad_norm": 0.4138778746128082, "learning_rate": 1.4454327727423347e-05, "loss": 0.5708, "step": 33359 }, { "epoch": 0.7075141566456703, "grad_norm": 0.4978801906108856, "learning_rate": 1.4454029141450554e-05, "loss": 0.5561, "step": 33360 }, { "epoch": 0.7075353651036033, "grad_norm": 0.37010616064071655, "learning_rate": 1.4453730550524028e-05, "loss": 0.4892, "step": 33361 }, { "epoch": 0.7075565735615363, "grad_norm": 0.3602825105190277, "learning_rate": 1.44534319546441e-05, "loss": 0.4716, "step": 33362 }, { "epoch": 0.7075777820194694, "grad_norm": 1.096981167793274, "learning_rate": 1.4453133353811104e-05, "loss": 0.5215, "step": 33363 }, { "epoch": 0.7075989904774024, "grad_norm": 0.35612013936042786, "learning_rate": 1.4452834748025371e-05, "loss": 0.5143, "step": 33364 }, { "epoch": 0.7076201989353355, "grad_norm": 0.36584925651550293, "learning_rate": 1.4452536137287233e-05, "loss": 0.5312, "step": 33365 }, { "epoch": 0.7076414073932684, "grad_norm": 0.3450635075569153, "learning_rate": 1.4452237521597026e-05, "loss": 0.4376, "step": 33366 }, { "epoch": 0.7076626158512015, "grad_norm": 0.3195110261440277, "learning_rate": 1.4451938900955079e-05, "loss": 0.3978, "step": 33367 }, { "epoch": 0.7076838243091345, "grad_norm": 0.38894951343536377, "learning_rate": 1.4451640275361722e-05, "loss": 0.5077, "step": 33368 }, { "epoch": 0.7077050327670675, "grad_norm": 0.3646276295185089, "learning_rate": 1.4451341644817291e-05, "loss": 0.4908, "step": 33369 }, { "epoch": 0.7077262412250005, "grad_norm": 0.32850679755210876, "learning_rate": 1.4451043009322116e-05, "loss": 0.4741, "step": 33370 }, { "epoch": 0.7077474496829336, "grad_norm": 2.5040581226348877, "learning_rate": 1.445074436887653e-05, "loss": 0.5242, "step": 33371 }, { "epoch": 0.7077686581408665, "grad_norm": 0.3724825978279114, "learning_rate": 1.4450445723480864e-05, "loss": 0.4524, "step": 33372 }, { "epoch": 0.7077898665987996, "grad_norm": 0.4165378510951996, "learning_rate": 1.445014707313545e-05, "loss": 0.5648, "step": 33373 }, { "epoch": 0.7078110750567326, "grad_norm": 0.40970849990844727, "learning_rate": 1.4449848417840624e-05, "loss": 0.5203, "step": 33374 }, { "epoch": 0.7078322835146656, "grad_norm": 0.3792453408241272, "learning_rate": 1.4449549757596716e-05, "loss": 0.5363, "step": 33375 }, { "epoch": 0.7078534919725987, "grad_norm": 0.33919933438301086, "learning_rate": 1.4449251092404052e-05, "loss": 0.5137, "step": 33376 }, { "epoch": 0.7078747004305317, "grad_norm": 0.3724757134914398, "learning_rate": 1.4448952422262974e-05, "loss": 0.5068, "step": 33377 }, { "epoch": 0.7078959088884648, "grad_norm": 0.3437865376472473, "learning_rate": 1.4448653747173806e-05, "loss": 0.3947, "step": 33378 }, { "epoch": 0.7079171173463977, "grad_norm": 0.34243643283843994, "learning_rate": 1.4448355067136886e-05, "loss": 0.5042, "step": 33379 }, { "epoch": 0.7079383258043308, "grad_norm": 0.36180979013442993, "learning_rate": 1.4448056382152547e-05, "loss": 0.4897, "step": 33380 }, { "epoch": 0.7079595342622638, "grad_norm": 0.3678578734397888, "learning_rate": 1.4447757692221113e-05, "loss": 0.5713, "step": 33381 }, { "epoch": 0.7079807427201968, "grad_norm": 0.38609132170677185, "learning_rate": 1.4447458997342927e-05, "loss": 0.5862, "step": 33382 }, { "epoch": 0.7080019511781298, "grad_norm": 0.5175221562385559, "learning_rate": 1.4447160297518312e-05, "loss": 0.5003, "step": 33383 }, { "epoch": 0.7080231596360629, "grad_norm": 0.3425558805465698, "learning_rate": 1.4446861592747606e-05, "loss": 0.467, "step": 33384 }, { "epoch": 0.7080443680939958, "grad_norm": 0.34007528424263, "learning_rate": 1.4446562883031137e-05, "loss": 0.5115, "step": 33385 }, { "epoch": 0.7080655765519289, "grad_norm": 0.37003958225250244, "learning_rate": 1.4446264168369243e-05, "loss": 0.4138, "step": 33386 }, { "epoch": 0.7080867850098619, "grad_norm": 0.4277306795120239, "learning_rate": 1.4445965448762248e-05, "loss": 0.5195, "step": 33387 }, { "epoch": 0.708107993467795, "grad_norm": 0.38257116079330444, "learning_rate": 1.4445666724210492e-05, "loss": 0.5222, "step": 33388 }, { "epoch": 0.708129201925728, "grad_norm": 0.3377333879470825, "learning_rate": 1.4445367994714304e-05, "loss": 0.4905, "step": 33389 }, { "epoch": 0.708150410383661, "grad_norm": 0.4037807285785675, "learning_rate": 1.4445069260274014e-05, "loss": 0.5395, "step": 33390 }, { "epoch": 0.7081716188415941, "grad_norm": 0.3564296066761017, "learning_rate": 1.4444770520889957e-05, "loss": 0.5442, "step": 33391 }, { "epoch": 0.708192827299527, "grad_norm": 0.34528911113739014, "learning_rate": 1.4444471776562465e-05, "loss": 0.5041, "step": 33392 }, { "epoch": 0.7082140357574601, "grad_norm": 0.4144270122051239, "learning_rate": 1.4444173027291872e-05, "loss": 0.5221, "step": 33393 }, { "epoch": 0.7082352442153931, "grad_norm": 0.3599056899547577, "learning_rate": 1.4443874273078509e-05, "loss": 0.4843, "step": 33394 }, { "epoch": 0.7082564526733262, "grad_norm": 0.32212239503860474, "learning_rate": 1.4443575513922702e-05, "loss": 0.5003, "step": 33395 }, { "epoch": 0.7082776611312591, "grad_norm": 0.3561287522315979, "learning_rate": 1.4443276749824795e-05, "loss": 0.4313, "step": 33396 }, { "epoch": 0.7082988695891922, "grad_norm": 0.35606276988983154, "learning_rate": 1.444297798078511e-05, "loss": 0.5403, "step": 33397 }, { "epoch": 0.7083200780471252, "grad_norm": 0.3448812663555145, "learning_rate": 1.4442679206803988e-05, "loss": 0.4686, "step": 33398 }, { "epoch": 0.7083412865050582, "grad_norm": 0.5990395545959473, "learning_rate": 1.4442380427881752e-05, "loss": 0.4991, "step": 33399 }, { "epoch": 0.7083624949629912, "grad_norm": 0.3820856213569641, "learning_rate": 1.4442081644018743e-05, "loss": 0.5526, "step": 33400 }, { "epoch": 0.7083837034209243, "grad_norm": 0.38207024335861206, "learning_rate": 1.4441782855215288e-05, "loss": 0.4818, "step": 33401 }, { "epoch": 0.7084049118788572, "grad_norm": 0.4288853704929352, "learning_rate": 1.4441484061471722e-05, "loss": 0.4868, "step": 33402 }, { "epoch": 0.7084261203367903, "grad_norm": 0.40751907229423523, "learning_rate": 1.4441185262788376e-05, "loss": 0.4798, "step": 33403 }, { "epoch": 0.7084473287947234, "grad_norm": 0.33056458830833435, "learning_rate": 1.4440886459165576e-05, "loss": 0.4031, "step": 33404 }, { "epoch": 0.7084685372526563, "grad_norm": 0.6020429134368896, "learning_rate": 1.4440587650603669e-05, "loss": 0.4609, "step": 33405 }, { "epoch": 0.7084897457105894, "grad_norm": 0.363604873418808, "learning_rate": 1.4440288837102973e-05, "loss": 0.425, "step": 33406 }, { "epoch": 0.7085109541685224, "grad_norm": 0.4173799157142639, "learning_rate": 1.443999001866383e-05, "loss": 0.4346, "step": 33407 }, { "epoch": 0.7085321626264555, "grad_norm": 0.36534565687179565, "learning_rate": 1.4439691195286567e-05, "loss": 0.5028, "step": 33408 }, { "epoch": 0.7085533710843884, "grad_norm": 0.3527165949344635, "learning_rate": 1.4439392366971518e-05, "loss": 0.4652, "step": 33409 }, { "epoch": 0.7085745795423215, "grad_norm": 0.38699036836624146, "learning_rate": 1.4439093533719015e-05, "loss": 0.5214, "step": 33410 }, { "epoch": 0.7085957880002545, "grad_norm": 0.3498465120792389, "learning_rate": 1.4438794695529392e-05, "loss": 0.5128, "step": 33411 }, { "epoch": 0.7086169964581875, "grad_norm": 0.4135100543498993, "learning_rate": 1.4438495852402979e-05, "loss": 0.5753, "step": 33412 }, { "epoch": 0.7086382049161205, "grad_norm": 0.365215539932251, "learning_rate": 1.443819700434011e-05, "loss": 0.4777, "step": 33413 }, { "epoch": 0.7086594133740536, "grad_norm": 0.3794177770614624, "learning_rate": 1.443789815134112e-05, "loss": 0.4885, "step": 33414 }, { "epoch": 0.7086806218319865, "grad_norm": 0.3494754433631897, "learning_rate": 1.4437599293406334e-05, "loss": 0.5447, "step": 33415 }, { "epoch": 0.7087018302899196, "grad_norm": 0.35814547538757324, "learning_rate": 1.4437300430536091e-05, "loss": 0.4273, "step": 33416 }, { "epoch": 0.7087230387478527, "grad_norm": 0.38143104314804077, "learning_rate": 1.443700156273072e-05, "loss": 0.5555, "step": 33417 }, { "epoch": 0.7087442472057857, "grad_norm": 0.34898287057876587, "learning_rate": 1.4436702689990551e-05, "loss": 0.4504, "step": 33418 }, { "epoch": 0.7087654556637187, "grad_norm": 0.3595426678657532, "learning_rate": 1.4436403812315924e-05, "loss": 0.4406, "step": 33419 }, { "epoch": 0.7087866641216517, "grad_norm": 0.4144337773323059, "learning_rate": 1.443610492970717e-05, "loss": 0.5174, "step": 33420 }, { "epoch": 0.7088078725795848, "grad_norm": 0.3389454483985901, "learning_rate": 1.4435806042164614e-05, "loss": 0.472, "step": 33421 }, { "epoch": 0.7088290810375177, "grad_norm": 0.36119315028190613, "learning_rate": 1.4435507149688594e-05, "loss": 0.4914, "step": 33422 }, { "epoch": 0.7088502894954508, "grad_norm": 0.3756628632545471, "learning_rate": 1.443520825227944e-05, "loss": 0.5358, "step": 33423 }, { "epoch": 0.7088714979533838, "grad_norm": 0.34506410360336304, "learning_rate": 1.4434909349937487e-05, "loss": 0.4856, "step": 33424 }, { "epoch": 0.7088927064113169, "grad_norm": 0.3562566041946411, "learning_rate": 1.4434610442663069e-05, "loss": 0.5107, "step": 33425 }, { "epoch": 0.7089139148692498, "grad_norm": 0.3084139823913574, "learning_rate": 1.4434311530456511e-05, "loss": 0.456, "step": 33426 }, { "epoch": 0.7089351233271829, "grad_norm": 0.3592776656150818, "learning_rate": 1.4434012613318153e-05, "loss": 0.4891, "step": 33427 }, { "epoch": 0.7089563317851159, "grad_norm": 0.41439369320869446, "learning_rate": 1.4433713691248326e-05, "loss": 0.4689, "step": 33428 }, { "epoch": 0.7089775402430489, "grad_norm": 0.3634118139743805, "learning_rate": 1.4433414764247355e-05, "loss": 0.376, "step": 33429 }, { "epoch": 0.708998748700982, "grad_norm": 0.3533252477645874, "learning_rate": 1.4433115832315586e-05, "loss": 0.5338, "step": 33430 }, { "epoch": 0.709019957158915, "grad_norm": 0.32595375180244446, "learning_rate": 1.443281689545334e-05, "loss": 0.4643, "step": 33431 }, { "epoch": 0.709041165616848, "grad_norm": 0.3958744704723358, "learning_rate": 1.4432517953660956e-05, "loss": 0.5668, "step": 33432 }, { "epoch": 0.709062374074781, "grad_norm": 0.3660143315792084, "learning_rate": 1.4432219006938764e-05, "loss": 0.4992, "step": 33433 }, { "epoch": 0.7090835825327141, "grad_norm": 0.36725497245788574, "learning_rate": 1.4431920055287097e-05, "loss": 0.4529, "step": 33434 }, { "epoch": 0.709104790990647, "grad_norm": 0.4033259153366089, "learning_rate": 1.4431621098706282e-05, "loss": 0.4109, "step": 33435 }, { "epoch": 0.7091259994485801, "grad_norm": 0.37133121490478516, "learning_rate": 1.443132213719666e-05, "loss": 0.5079, "step": 33436 }, { "epoch": 0.7091472079065131, "grad_norm": 0.3868767321109772, "learning_rate": 1.4431023170758562e-05, "loss": 0.5429, "step": 33437 }, { "epoch": 0.7091684163644462, "grad_norm": 0.5073667168617249, "learning_rate": 1.4430724199392314e-05, "loss": 0.4677, "step": 33438 }, { "epoch": 0.7091896248223791, "grad_norm": 0.40245842933654785, "learning_rate": 1.443042522309826e-05, "loss": 0.4501, "step": 33439 }, { "epoch": 0.7092108332803122, "grad_norm": 0.3666021525859833, "learning_rate": 1.4430126241876718e-05, "loss": 0.5138, "step": 33440 }, { "epoch": 0.7092320417382452, "grad_norm": 0.3797398507595062, "learning_rate": 1.4429827255728033e-05, "loss": 0.4708, "step": 33441 }, { "epoch": 0.7092532501961782, "grad_norm": 0.36931878328323364, "learning_rate": 1.4429528264652528e-05, "loss": 0.4806, "step": 33442 }, { "epoch": 0.7092744586541112, "grad_norm": 0.3970915973186493, "learning_rate": 1.4429229268650541e-05, "loss": 0.5828, "step": 33443 }, { "epoch": 0.7092956671120443, "grad_norm": 0.36161765456199646, "learning_rate": 1.4428930267722407e-05, "loss": 0.4578, "step": 33444 }, { "epoch": 0.7093168755699774, "grad_norm": 0.39603275060653687, "learning_rate": 1.4428631261868456e-05, "loss": 0.4997, "step": 33445 }, { "epoch": 0.7093380840279103, "grad_norm": 0.31869152188301086, "learning_rate": 1.4428332251089017e-05, "loss": 0.4627, "step": 33446 }, { "epoch": 0.7093592924858434, "grad_norm": 0.6139504909515381, "learning_rate": 1.4428033235384427e-05, "loss": 0.4429, "step": 33447 }, { "epoch": 0.7093805009437764, "grad_norm": 0.3608258366584778, "learning_rate": 1.4427734214755016e-05, "loss": 0.5307, "step": 33448 }, { "epoch": 0.7094017094017094, "grad_norm": 0.3420652449131012, "learning_rate": 1.4427435189201115e-05, "loss": 0.4882, "step": 33449 }, { "epoch": 0.7094229178596424, "grad_norm": 0.33020246028900146, "learning_rate": 1.4427136158723062e-05, "loss": 0.4951, "step": 33450 }, { "epoch": 0.7094441263175755, "grad_norm": 0.4305381178855896, "learning_rate": 1.4426837123321187e-05, "loss": 0.4693, "step": 33451 }, { "epoch": 0.7094653347755084, "grad_norm": 0.36208799481391907, "learning_rate": 1.442653808299582e-05, "loss": 0.4991, "step": 33452 }, { "epoch": 0.7094865432334415, "grad_norm": 0.41389814019203186, "learning_rate": 1.4426239037747297e-05, "loss": 0.5371, "step": 33453 }, { "epoch": 0.7095077516913745, "grad_norm": 0.398345410823822, "learning_rate": 1.4425939987575949e-05, "loss": 0.4305, "step": 33454 }, { "epoch": 0.7095289601493076, "grad_norm": 0.3625275492668152, "learning_rate": 1.442564093248211e-05, "loss": 0.4922, "step": 33455 }, { "epoch": 0.7095501686072405, "grad_norm": 0.33651724457740784, "learning_rate": 1.442534187246611e-05, "loss": 0.4596, "step": 33456 }, { "epoch": 0.7095713770651736, "grad_norm": 0.32811760902404785, "learning_rate": 1.4425042807528284e-05, "loss": 0.5075, "step": 33457 }, { "epoch": 0.7095925855231067, "grad_norm": 0.3873765766620636, "learning_rate": 1.4424743737668964e-05, "loss": 0.5065, "step": 33458 }, { "epoch": 0.7096137939810396, "grad_norm": 0.3085615038871765, "learning_rate": 1.4424444662888487e-05, "loss": 0.4039, "step": 33459 }, { "epoch": 0.7096350024389727, "grad_norm": 0.34118252992630005, "learning_rate": 1.4424145583187174e-05, "loss": 0.5411, "step": 33460 }, { "epoch": 0.7096562108969057, "grad_norm": 0.33138802647590637, "learning_rate": 1.4423846498565368e-05, "loss": 0.4498, "step": 33461 }, { "epoch": 0.7096774193548387, "grad_norm": 0.40915513038635254, "learning_rate": 1.4423547409023395e-05, "loss": 0.5221, "step": 33462 }, { "epoch": 0.7096986278127717, "grad_norm": 0.4007766842842102, "learning_rate": 1.4423248314561593e-05, "loss": 0.5427, "step": 33463 }, { "epoch": 0.7097198362707048, "grad_norm": 0.3557966351509094, "learning_rate": 1.4422949215180295e-05, "loss": 0.5137, "step": 33464 }, { "epoch": 0.7097410447286377, "grad_norm": 0.36509573459625244, "learning_rate": 1.442265011087983e-05, "loss": 0.4469, "step": 33465 }, { "epoch": 0.7097622531865708, "grad_norm": 0.3859393298625946, "learning_rate": 1.442235100166053e-05, "loss": 0.4449, "step": 33466 }, { "epoch": 0.7097834616445038, "grad_norm": 0.37518152594566345, "learning_rate": 1.4422051887522735e-05, "loss": 0.5697, "step": 33467 }, { "epoch": 0.7098046701024369, "grad_norm": 0.33527112007141113, "learning_rate": 1.4421752768466765e-05, "loss": 0.4199, "step": 33468 }, { "epoch": 0.7098258785603698, "grad_norm": 0.4012036621570587, "learning_rate": 1.4421453644492964e-05, "loss": 0.516, "step": 33469 }, { "epoch": 0.7098470870183029, "grad_norm": 0.3577072024345398, "learning_rate": 1.4421154515601661e-05, "loss": 0.46, "step": 33470 }, { "epoch": 0.709868295476236, "grad_norm": 0.434640496969223, "learning_rate": 1.4420855381793185e-05, "loss": 0.4836, "step": 33471 }, { "epoch": 0.7098895039341689, "grad_norm": 0.3295081853866577, "learning_rate": 1.4420556243067876e-05, "loss": 0.5315, "step": 33472 }, { "epoch": 0.709910712392102, "grad_norm": 0.3128688633441925, "learning_rate": 1.4420257099426063e-05, "loss": 0.423, "step": 33473 }, { "epoch": 0.709931920850035, "grad_norm": 0.341800332069397, "learning_rate": 1.4419957950868075e-05, "loss": 0.4609, "step": 33474 }, { "epoch": 0.7099531293079681, "grad_norm": 1.5134668350219727, "learning_rate": 1.4419658797394249e-05, "loss": 0.4332, "step": 33475 }, { "epoch": 0.709974337765901, "grad_norm": 0.3322238028049469, "learning_rate": 1.4419359639004919e-05, "loss": 0.4704, "step": 33476 }, { "epoch": 0.7099955462238341, "grad_norm": 0.33614689111709595, "learning_rate": 1.4419060475700413e-05, "loss": 0.5403, "step": 33477 }, { "epoch": 0.7100167546817671, "grad_norm": 0.352478951215744, "learning_rate": 1.4418761307481068e-05, "loss": 0.6117, "step": 33478 }, { "epoch": 0.7100379631397001, "grad_norm": 0.3786967694759369, "learning_rate": 1.4418462134347217e-05, "loss": 0.5644, "step": 33479 }, { "epoch": 0.7100591715976331, "grad_norm": 0.3569854497909546, "learning_rate": 1.4418162956299189e-05, "loss": 0.4234, "step": 33480 }, { "epoch": 0.7100803800555662, "grad_norm": 0.3268125653266907, "learning_rate": 1.4417863773337315e-05, "loss": 0.4222, "step": 33481 }, { "epoch": 0.7101015885134991, "grad_norm": 0.40606915950775146, "learning_rate": 1.4417564585461937e-05, "loss": 0.3697, "step": 33482 }, { "epoch": 0.7101227969714322, "grad_norm": 0.3886057436466217, "learning_rate": 1.441726539267338e-05, "loss": 0.4532, "step": 33483 }, { "epoch": 0.7101440054293652, "grad_norm": 0.34590068459510803, "learning_rate": 1.4416966194971978e-05, "loss": 0.4429, "step": 33484 }, { "epoch": 0.7101652138872983, "grad_norm": 0.45410022139549255, "learning_rate": 1.4416666992358064e-05, "loss": 0.4894, "step": 33485 }, { "epoch": 0.7101864223452313, "grad_norm": 0.3689802885055542, "learning_rate": 1.4416367784831973e-05, "loss": 0.4831, "step": 33486 }, { "epoch": 0.7102076308031643, "grad_norm": 0.35784146189689636, "learning_rate": 1.4416068572394034e-05, "loss": 0.531, "step": 33487 }, { "epoch": 0.7102288392610974, "grad_norm": 0.3845853805541992, "learning_rate": 1.4415769355044584e-05, "loss": 0.5142, "step": 33488 }, { "epoch": 0.7102500477190303, "grad_norm": 0.3949902653694153, "learning_rate": 1.441547013278395e-05, "loss": 0.5145, "step": 33489 }, { "epoch": 0.7102712561769634, "grad_norm": 0.4257797598838806, "learning_rate": 1.4415170905612474e-05, "loss": 0.5061, "step": 33490 }, { "epoch": 0.7102924646348964, "grad_norm": 0.3519674837589264, "learning_rate": 1.441487167353048e-05, "loss": 0.4298, "step": 33491 }, { "epoch": 0.7103136730928294, "grad_norm": 0.37388846278190613, "learning_rate": 1.4414572436538305e-05, "loss": 0.5249, "step": 33492 }, { "epoch": 0.7103348815507624, "grad_norm": 0.3567834496498108, "learning_rate": 1.4414273194636282e-05, "loss": 0.456, "step": 33493 }, { "epoch": 0.7103560900086955, "grad_norm": 0.3409009277820587, "learning_rate": 1.4413973947824737e-05, "loss": 0.397, "step": 33494 }, { "epoch": 0.7103772984666284, "grad_norm": 0.3781222403049469, "learning_rate": 1.4413674696104013e-05, "loss": 0.5407, "step": 33495 }, { "epoch": 0.7103985069245615, "grad_norm": 0.37758803367614746, "learning_rate": 1.441337543947444e-05, "loss": 0.5271, "step": 33496 }, { "epoch": 0.7104197153824945, "grad_norm": 0.34645140171051025, "learning_rate": 1.4413076177936347e-05, "loss": 0.5082, "step": 33497 }, { "epoch": 0.7104409238404276, "grad_norm": 0.336865097284317, "learning_rate": 1.4412776911490069e-05, "loss": 0.3958, "step": 33498 }, { "epoch": 0.7104621322983606, "grad_norm": 0.3565344214439392, "learning_rate": 1.441247764013594e-05, "loss": 0.4968, "step": 33499 }, { "epoch": 0.7104833407562936, "grad_norm": 0.3758656084537506, "learning_rate": 1.4412178363874289e-05, "loss": 0.4799, "step": 33500 }, { "epoch": 0.7105045492142267, "grad_norm": 0.33084040880203247, "learning_rate": 1.4411879082705455e-05, "loss": 0.55, "step": 33501 }, { "epoch": 0.7105257576721596, "grad_norm": 0.36739665269851685, "learning_rate": 1.4411579796629766e-05, "loss": 0.463, "step": 33502 }, { "epoch": 0.7105469661300927, "grad_norm": 0.33980605006217957, "learning_rate": 1.4411280505647555e-05, "loss": 0.4761, "step": 33503 }, { "epoch": 0.7105681745880257, "grad_norm": 0.354952335357666, "learning_rate": 1.4410981209759159e-05, "loss": 0.5095, "step": 33504 }, { "epoch": 0.7105893830459588, "grad_norm": 0.3597692847251892, "learning_rate": 1.4410681908964905e-05, "loss": 0.5062, "step": 33505 }, { "epoch": 0.7106105915038917, "grad_norm": 0.4011993706226349, "learning_rate": 1.441038260326513e-05, "loss": 0.5173, "step": 33506 }, { "epoch": 0.7106317999618248, "grad_norm": 0.42286500334739685, "learning_rate": 1.4410083292660166e-05, "loss": 0.574, "step": 33507 }, { "epoch": 0.7106530084197578, "grad_norm": 0.3677610158920288, "learning_rate": 1.4409783977150344e-05, "loss": 0.4968, "step": 33508 }, { "epoch": 0.7106742168776908, "grad_norm": 0.3822353780269623, "learning_rate": 1.4409484656736004e-05, "loss": 0.5141, "step": 33509 }, { "epoch": 0.7106954253356238, "grad_norm": 0.3319835364818573, "learning_rate": 1.4409185331417468e-05, "loss": 0.3955, "step": 33510 }, { "epoch": 0.7107166337935569, "grad_norm": 0.3592335283756256, "learning_rate": 1.4408886001195077e-05, "loss": 0.4648, "step": 33511 }, { "epoch": 0.71073784225149, "grad_norm": 0.3774506151676178, "learning_rate": 1.440858666606916e-05, "loss": 0.5043, "step": 33512 }, { "epoch": 0.7107590507094229, "grad_norm": 0.37393197417259216, "learning_rate": 1.440828732604005e-05, "loss": 0.5041, "step": 33513 }, { "epoch": 0.710780259167356, "grad_norm": 0.46682295203208923, "learning_rate": 1.4407987981108083e-05, "loss": 0.4898, "step": 33514 }, { "epoch": 0.710801467625289, "grad_norm": 0.3555475175380707, "learning_rate": 1.4407688631273591e-05, "loss": 0.5082, "step": 33515 }, { "epoch": 0.710822676083222, "grad_norm": 0.4041367173194885, "learning_rate": 1.4407389276536905e-05, "loss": 0.506, "step": 33516 }, { "epoch": 0.710843884541155, "grad_norm": 0.36210888624191284, "learning_rate": 1.4407089916898359e-05, "loss": 0.4993, "step": 33517 }, { "epoch": 0.7108650929990881, "grad_norm": 0.36970487236976624, "learning_rate": 1.4406790552358286e-05, "loss": 0.4974, "step": 33518 }, { "epoch": 0.710886301457021, "grad_norm": 0.35080215334892273, "learning_rate": 1.4406491182917018e-05, "loss": 0.512, "step": 33519 }, { "epoch": 0.7109075099149541, "grad_norm": 0.3348270654678345, "learning_rate": 1.4406191808574888e-05, "loss": 0.5607, "step": 33520 }, { "epoch": 0.7109287183728871, "grad_norm": 0.3732982277870178, "learning_rate": 1.4405892429332231e-05, "loss": 0.507, "step": 33521 }, { "epoch": 0.7109499268308201, "grad_norm": 0.33971068263053894, "learning_rate": 1.4405593045189376e-05, "loss": 0.4542, "step": 33522 }, { "epoch": 0.7109711352887531, "grad_norm": 0.3253713846206665, "learning_rate": 1.4405293656146664e-05, "loss": 0.4584, "step": 33523 }, { "epoch": 0.7109923437466862, "grad_norm": 0.3749181032180786, "learning_rate": 1.4404994262204421e-05, "loss": 0.4889, "step": 33524 }, { "epoch": 0.7110135522046191, "grad_norm": 0.3861149251461029, "learning_rate": 1.4404694863362978e-05, "loss": 0.5116, "step": 33525 }, { "epoch": 0.7110347606625522, "grad_norm": 0.3277295231819153, "learning_rate": 1.4404395459622676e-05, "loss": 0.4547, "step": 33526 }, { "epoch": 0.7110559691204853, "grad_norm": 0.3445010483264923, "learning_rate": 1.4404096050983842e-05, "loss": 0.4975, "step": 33527 }, { "epoch": 0.7110771775784183, "grad_norm": 0.32479918003082275, "learning_rate": 1.440379663744681e-05, "loss": 0.42, "step": 33528 }, { "epoch": 0.7110983860363513, "grad_norm": 0.35891351103782654, "learning_rate": 1.4403497219011916e-05, "loss": 0.4893, "step": 33529 }, { "epoch": 0.7111195944942843, "grad_norm": 0.36598607897758484, "learning_rate": 1.4403197795679487e-05, "loss": 0.5241, "step": 33530 }, { "epoch": 0.7111408029522174, "grad_norm": 0.5410429239273071, "learning_rate": 1.4402898367449862e-05, "loss": 0.5279, "step": 33531 }, { "epoch": 0.7111620114101503, "grad_norm": 0.34886857867240906, "learning_rate": 1.4402598934323368e-05, "loss": 0.4837, "step": 33532 }, { "epoch": 0.7111832198680834, "grad_norm": 0.33832064270973206, "learning_rate": 1.4402299496300348e-05, "loss": 0.4841, "step": 33533 }, { "epoch": 0.7112044283260164, "grad_norm": 0.36132970452308655, "learning_rate": 1.4402000053381125e-05, "loss": 0.5566, "step": 33534 }, { "epoch": 0.7112256367839495, "grad_norm": 0.38829725980758667, "learning_rate": 1.4401700605566036e-05, "loss": 0.6099, "step": 33535 }, { "epoch": 0.7112468452418824, "grad_norm": 0.7237057685852051, "learning_rate": 1.4401401152855415e-05, "loss": 0.4731, "step": 33536 }, { "epoch": 0.7112680536998155, "grad_norm": 0.3683619201183319, "learning_rate": 1.4401101695249593e-05, "loss": 0.4539, "step": 33537 }, { "epoch": 0.7112892621577485, "grad_norm": 0.3992690443992615, "learning_rate": 1.4400802232748902e-05, "loss": 0.4957, "step": 33538 }, { "epoch": 0.7113104706156815, "grad_norm": 0.32819318771362305, "learning_rate": 1.440050276535368e-05, "loss": 0.4927, "step": 33539 }, { "epoch": 0.7113316790736146, "grad_norm": 0.35194751620292664, "learning_rate": 1.4400203293064255e-05, "loss": 0.4398, "step": 33540 }, { "epoch": 0.7113528875315476, "grad_norm": 0.3577019274234772, "learning_rate": 1.4399903815880965e-05, "loss": 0.459, "step": 33541 }, { "epoch": 0.7113740959894806, "grad_norm": 0.3384600877761841, "learning_rate": 1.4399604333804135e-05, "loss": 0.4774, "step": 33542 }, { "epoch": 0.7113953044474136, "grad_norm": 0.3851642310619354, "learning_rate": 1.439930484683411e-05, "loss": 0.5389, "step": 33543 }, { "epoch": 0.7114165129053467, "grad_norm": 0.362203449010849, "learning_rate": 1.4399005354971214e-05, "loss": 0.4163, "step": 33544 }, { "epoch": 0.7114377213632797, "grad_norm": 0.3473454713821411, "learning_rate": 1.4398705858215778e-05, "loss": 0.4979, "step": 33545 }, { "epoch": 0.7114589298212127, "grad_norm": 0.3744564652442932, "learning_rate": 1.4398406356568144e-05, "loss": 0.4781, "step": 33546 }, { "epoch": 0.7114801382791457, "grad_norm": 0.3617211580276489, "learning_rate": 1.4398106850028642e-05, "loss": 0.5436, "step": 33547 }, { "epoch": 0.7115013467370788, "grad_norm": 0.34045490622520447, "learning_rate": 1.4397807338597601e-05, "loss": 0.4487, "step": 33548 }, { "epoch": 0.7115225551950117, "grad_norm": 0.3228031396865845, "learning_rate": 1.4397507822275356e-05, "loss": 0.5031, "step": 33549 }, { "epoch": 0.7115437636529448, "grad_norm": 0.35624679923057556, "learning_rate": 1.4397208301062242e-05, "loss": 0.5217, "step": 33550 }, { "epoch": 0.7115649721108778, "grad_norm": 0.3788914978504181, "learning_rate": 1.4396908774958591e-05, "loss": 0.4525, "step": 33551 }, { "epoch": 0.7115861805688108, "grad_norm": 0.34408050775527954, "learning_rate": 1.4396609243964737e-05, "loss": 0.4543, "step": 33552 }, { "epoch": 0.7116073890267439, "grad_norm": 0.34609153866767883, "learning_rate": 1.4396309708081012e-05, "loss": 0.451, "step": 33553 }, { "epoch": 0.7116285974846769, "grad_norm": 0.357997328042984, "learning_rate": 1.439601016730775e-05, "loss": 0.4813, "step": 33554 }, { "epoch": 0.71164980594261, "grad_norm": 0.3131263256072998, "learning_rate": 1.4395710621645285e-05, "loss": 0.3942, "step": 33555 }, { "epoch": 0.7116710144005429, "grad_norm": 0.3657223582267761, "learning_rate": 1.4395411071093945e-05, "loss": 0.487, "step": 33556 }, { "epoch": 0.711692222858476, "grad_norm": 0.41303879022598267, "learning_rate": 1.439511151565407e-05, "loss": 0.4997, "step": 33557 }, { "epoch": 0.711713431316409, "grad_norm": 0.37660014629364014, "learning_rate": 1.439481195532599e-05, "loss": 0.4904, "step": 33558 }, { "epoch": 0.711734639774342, "grad_norm": 0.31544673442840576, "learning_rate": 1.4394512390110037e-05, "loss": 0.5049, "step": 33559 }, { "epoch": 0.711755848232275, "grad_norm": 0.35202649235725403, "learning_rate": 1.4394212820006546e-05, "loss": 0.5563, "step": 33560 }, { "epoch": 0.7117770566902081, "grad_norm": 0.4220138192176819, "learning_rate": 1.4393913245015849e-05, "loss": 0.4895, "step": 33561 }, { "epoch": 0.711798265148141, "grad_norm": 0.3476995527744293, "learning_rate": 1.4393613665138281e-05, "loss": 0.5576, "step": 33562 }, { "epoch": 0.7118194736060741, "grad_norm": 0.34440040588378906, "learning_rate": 1.4393314080374175e-05, "loss": 0.4911, "step": 33563 }, { "epoch": 0.7118406820640071, "grad_norm": 0.369463711977005, "learning_rate": 1.439301449072386e-05, "loss": 0.5585, "step": 33564 }, { "epoch": 0.7118618905219402, "grad_norm": 0.4074900150299072, "learning_rate": 1.4392714896187674e-05, "loss": 0.5111, "step": 33565 }, { "epoch": 0.7118830989798731, "grad_norm": 0.3582083284854889, "learning_rate": 1.439241529676595e-05, "loss": 0.4369, "step": 33566 }, { "epoch": 0.7119043074378062, "grad_norm": 0.37092000246047974, "learning_rate": 1.439211569245902e-05, "loss": 0.4676, "step": 33567 }, { "epoch": 0.7119255158957393, "grad_norm": 0.37354281544685364, "learning_rate": 1.4391816083267215e-05, "loss": 0.4861, "step": 33568 }, { "epoch": 0.7119467243536722, "grad_norm": 0.37214890122413635, "learning_rate": 1.4391516469190872e-05, "loss": 0.4983, "step": 33569 }, { "epoch": 0.7119679328116053, "grad_norm": 0.33594322204589844, "learning_rate": 1.4391216850230321e-05, "loss": 0.505, "step": 33570 }, { "epoch": 0.7119891412695383, "grad_norm": 0.35660648345947266, "learning_rate": 1.4390917226385896e-05, "loss": 0.4949, "step": 33571 }, { "epoch": 0.7120103497274713, "grad_norm": 0.3391028940677643, "learning_rate": 1.4390617597657934e-05, "loss": 0.4703, "step": 33572 }, { "epoch": 0.7120315581854043, "grad_norm": 0.3669838607311249, "learning_rate": 1.4390317964046763e-05, "loss": 0.5537, "step": 33573 }, { "epoch": 0.7120527666433374, "grad_norm": 0.32865607738494873, "learning_rate": 1.4390018325552721e-05, "loss": 0.4551, "step": 33574 }, { "epoch": 0.7120739751012704, "grad_norm": 0.369550496339798, "learning_rate": 1.4389718682176137e-05, "loss": 0.5111, "step": 33575 }, { "epoch": 0.7120951835592034, "grad_norm": 0.4978082776069641, "learning_rate": 1.4389419033917347e-05, "loss": 0.5545, "step": 33576 }, { "epoch": 0.7121163920171364, "grad_norm": 0.34657058119773865, "learning_rate": 1.438911938077668e-05, "loss": 0.4352, "step": 33577 }, { "epoch": 0.7121376004750695, "grad_norm": 0.35376739501953125, "learning_rate": 1.4388819722754476e-05, "loss": 0.5708, "step": 33578 }, { "epoch": 0.7121588089330024, "grad_norm": 0.33315473794937134, "learning_rate": 1.4388520059851063e-05, "loss": 0.4407, "step": 33579 }, { "epoch": 0.7121800173909355, "grad_norm": 0.3721611499786377, "learning_rate": 1.438822039206678e-05, "loss": 0.5048, "step": 33580 }, { "epoch": 0.7122012258488686, "grad_norm": 0.3551332354545593, "learning_rate": 1.4387920719401951e-05, "loss": 0.5023, "step": 33581 }, { "epoch": 0.7122224343068015, "grad_norm": 0.43346744775772095, "learning_rate": 1.4387621041856916e-05, "loss": 0.5134, "step": 33582 }, { "epoch": 0.7122436427647346, "grad_norm": 0.30584579706192017, "learning_rate": 1.4387321359432007e-05, "loss": 0.488, "step": 33583 }, { "epoch": 0.7122648512226676, "grad_norm": 0.38414502143859863, "learning_rate": 1.438702167212756e-05, "loss": 0.5815, "step": 33584 }, { "epoch": 0.7122860596806007, "grad_norm": 0.35401609539985657, "learning_rate": 1.4386721979943902e-05, "loss": 0.5727, "step": 33585 }, { "epoch": 0.7123072681385336, "grad_norm": 0.3581056296825409, "learning_rate": 1.4386422282881374e-05, "loss": 0.5034, "step": 33586 }, { "epoch": 0.7123284765964667, "grad_norm": 0.41654980182647705, "learning_rate": 1.4386122580940303e-05, "loss": 0.5059, "step": 33587 }, { "epoch": 0.7123496850543997, "grad_norm": 0.37897545099258423, "learning_rate": 1.4385822874121024e-05, "loss": 0.508, "step": 33588 }, { "epoch": 0.7123708935123327, "grad_norm": 0.36355358362197876, "learning_rate": 1.4385523162423873e-05, "loss": 0.5446, "step": 33589 }, { "epoch": 0.7123921019702657, "grad_norm": 0.3450964689254761, "learning_rate": 1.4385223445849177e-05, "loss": 0.5462, "step": 33590 }, { "epoch": 0.7124133104281988, "grad_norm": 0.3581700921058655, "learning_rate": 1.4384923724397277e-05, "loss": 0.4804, "step": 33591 }, { "epoch": 0.7124345188861317, "grad_norm": 0.3442496955394745, "learning_rate": 1.4384623998068502e-05, "loss": 0.53, "step": 33592 }, { "epoch": 0.7124557273440648, "grad_norm": 0.3899000287055969, "learning_rate": 1.4384324266863188e-05, "loss": 0.4744, "step": 33593 }, { "epoch": 0.7124769358019979, "grad_norm": 0.33649393916130066, "learning_rate": 1.4384024530781665e-05, "loss": 0.5155, "step": 33594 }, { "epoch": 0.7124981442599309, "grad_norm": 0.342985600233078, "learning_rate": 1.4383724789824267e-05, "loss": 0.4552, "step": 33595 }, { "epoch": 0.7125193527178639, "grad_norm": 0.3676164746284485, "learning_rate": 1.4383425043991329e-05, "loss": 0.3955, "step": 33596 }, { "epoch": 0.7125405611757969, "grad_norm": 0.372882604598999, "learning_rate": 1.4383125293283184e-05, "loss": 0.5614, "step": 33597 }, { "epoch": 0.71256176963373, "grad_norm": 0.35818782448768616, "learning_rate": 1.4382825537700166e-05, "loss": 0.4411, "step": 33598 }, { "epoch": 0.7125829780916629, "grad_norm": 0.38939496874809265, "learning_rate": 1.4382525777242608e-05, "loss": 0.4845, "step": 33599 }, { "epoch": 0.712604186549596, "grad_norm": 0.42820900678634644, "learning_rate": 1.4382226011910842e-05, "loss": 0.5502, "step": 33600 }, { "epoch": 0.712625395007529, "grad_norm": 0.32708337903022766, "learning_rate": 1.4381926241705202e-05, "loss": 0.5252, "step": 33601 }, { "epoch": 0.712646603465462, "grad_norm": 0.353900671005249, "learning_rate": 1.4381626466626021e-05, "loss": 0.462, "step": 33602 }, { "epoch": 0.712667811923395, "grad_norm": 0.39701125025749207, "learning_rate": 1.4381326686673634e-05, "loss": 0.4375, "step": 33603 }, { "epoch": 0.7126890203813281, "grad_norm": 0.3361580967903137, "learning_rate": 1.4381026901848372e-05, "loss": 0.402, "step": 33604 }, { "epoch": 0.712710228839261, "grad_norm": 0.42616382241249084, "learning_rate": 1.4380727112150572e-05, "loss": 0.5414, "step": 33605 }, { "epoch": 0.7127314372971941, "grad_norm": 0.3314507305622101, "learning_rate": 1.4380427317580568e-05, "loss": 0.4742, "step": 33606 }, { "epoch": 0.7127526457551271, "grad_norm": 0.3837074935436249, "learning_rate": 1.4380127518138685e-05, "loss": 0.5237, "step": 33607 }, { "epoch": 0.7127738542130602, "grad_norm": 0.41754692792892456, "learning_rate": 1.4379827713825268e-05, "loss": 0.5327, "step": 33608 }, { "epoch": 0.7127950626709932, "grad_norm": 0.42780601978302, "learning_rate": 1.4379527904640638e-05, "loss": 0.5011, "step": 33609 }, { "epoch": 0.7128162711289262, "grad_norm": 0.32703137397766113, "learning_rate": 1.4379228090585138e-05, "loss": 0.4602, "step": 33610 }, { "epoch": 0.7128374795868593, "grad_norm": 0.35841768980026245, "learning_rate": 1.4378928271659101e-05, "loss": 0.4847, "step": 33611 }, { "epoch": 0.7128586880447922, "grad_norm": 0.34226635098457336, "learning_rate": 1.4378628447862857e-05, "loss": 0.4965, "step": 33612 }, { "epoch": 0.7128798965027253, "grad_norm": 0.4279695749282837, "learning_rate": 1.4378328619196737e-05, "loss": 0.4705, "step": 33613 }, { "epoch": 0.7129011049606583, "grad_norm": 0.3655720055103302, "learning_rate": 1.4378028785661082e-05, "loss": 0.5249, "step": 33614 }, { "epoch": 0.7129223134185914, "grad_norm": 0.349244624376297, "learning_rate": 1.437772894725622e-05, "loss": 0.4233, "step": 33615 }, { "epoch": 0.7129435218765243, "grad_norm": 0.5109189748764038, "learning_rate": 1.4377429103982486e-05, "loss": 0.51, "step": 33616 }, { "epoch": 0.7129647303344574, "grad_norm": 0.3790094256401062, "learning_rate": 1.4377129255840213e-05, "loss": 0.5211, "step": 33617 }, { "epoch": 0.7129859387923904, "grad_norm": 0.350893497467041, "learning_rate": 1.4376829402829732e-05, "loss": 0.5162, "step": 33618 }, { "epoch": 0.7130071472503234, "grad_norm": 0.4867115616798401, "learning_rate": 1.4376529544951386e-05, "loss": 0.5716, "step": 33619 }, { "epoch": 0.7130283557082564, "grad_norm": 0.3967335820198059, "learning_rate": 1.4376229682205498e-05, "loss": 0.5455, "step": 33620 }, { "epoch": 0.7130495641661895, "grad_norm": 0.35871776938438416, "learning_rate": 1.4375929814592404e-05, "loss": 0.4867, "step": 33621 }, { "epoch": 0.7130707726241226, "grad_norm": 0.47961705923080444, "learning_rate": 1.437562994211244e-05, "loss": 0.5324, "step": 33622 }, { "epoch": 0.7130919810820555, "grad_norm": 0.47042784094810486, "learning_rate": 1.437533006476594e-05, "loss": 0.4504, "step": 33623 }, { "epoch": 0.7131131895399886, "grad_norm": 0.35330405831336975, "learning_rate": 1.4375030182553233e-05, "loss": 0.5441, "step": 33624 }, { "epoch": 0.7131343979979216, "grad_norm": 0.42206981778144836, "learning_rate": 1.4374730295474659e-05, "loss": 0.4752, "step": 33625 }, { "epoch": 0.7131556064558546, "grad_norm": 0.5039937496185303, "learning_rate": 1.4374430403530548e-05, "loss": 0.4706, "step": 33626 }, { "epoch": 0.7131768149137876, "grad_norm": 0.338290274143219, "learning_rate": 1.437413050672123e-05, "loss": 0.5412, "step": 33627 }, { "epoch": 0.7131980233717207, "grad_norm": 0.7550973892211914, "learning_rate": 1.4373830605047044e-05, "loss": 0.4763, "step": 33628 }, { "epoch": 0.7132192318296536, "grad_norm": 0.341977596282959, "learning_rate": 1.4373530698508323e-05, "loss": 0.434, "step": 33629 }, { "epoch": 0.7132404402875867, "grad_norm": 0.352777898311615, "learning_rate": 1.4373230787105397e-05, "loss": 0.4839, "step": 33630 }, { "epoch": 0.7132616487455197, "grad_norm": 0.3723834455013275, "learning_rate": 1.4372930870838601e-05, "loss": 0.452, "step": 33631 }, { "epoch": 0.7132828572034527, "grad_norm": 0.36932265758514404, "learning_rate": 1.4372630949708271e-05, "loss": 0.463, "step": 33632 }, { "epoch": 0.7133040656613857, "grad_norm": 0.32792794704437256, "learning_rate": 1.4372331023714738e-05, "loss": 0.4304, "step": 33633 }, { "epoch": 0.7133252741193188, "grad_norm": 0.3875110447406769, "learning_rate": 1.4372031092858339e-05, "loss": 0.5462, "step": 33634 }, { "epoch": 0.7133464825772519, "grad_norm": 0.3572297990322113, "learning_rate": 1.4371731157139399e-05, "loss": 0.4871, "step": 33635 }, { "epoch": 0.7133676910351848, "grad_norm": 0.36423298716545105, "learning_rate": 1.4371431216558263e-05, "loss": 0.4257, "step": 33636 }, { "epoch": 0.7133888994931179, "grad_norm": 0.3743039667606354, "learning_rate": 1.437113127111526e-05, "loss": 0.6485, "step": 33637 }, { "epoch": 0.7134101079510509, "grad_norm": 0.37833571434020996, "learning_rate": 1.4370831320810718e-05, "loss": 0.5588, "step": 33638 }, { "epoch": 0.7134313164089839, "grad_norm": 0.3456414043903351, "learning_rate": 1.4370531365644978e-05, "loss": 0.5486, "step": 33639 }, { "epoch": 0.7134525248669169, "grad_norm": 0.3272501528263092, "learning_rate": 1.4370231405618371e-05, "loss": 0.4932, "step": 33640 }, { "epoch": 0.71347373332485, "grad_norm": 0.39322981238365173, "learning_rate": 1.4369931440731228e-05, "loss": 0.5107, "step": 33641 }, { "epoch": 0.7134949417827829, "grad_norm": 0.34421730041503906, "learning_rate": 1.4369631470983889e-05, "loss": 0.4941, "step": 33642 }, { "epoch": 0.713516150240716, "grad_norm": 0.36575552821159363, "learning_rate": 1.4369331496376684e-05, "loss": 0.5564, "step": 33643 }, { "epoch": 0.713537358698649, "grad_norm": 0.37641745805740356, "learning_rate": 1.436903151690994e-05, "loss": 0.6192, "step": 33644 }, { "epoch": 0.7135585671565821, "grad_norm": 0.4041604995727539, "learning_rate": 1.4368731532584005e-05, "loss": 0.4795, "step": 33645 }, { "epoch": 0.713579775614515, "grad_norm": 0.35242101550102234, "learning_rate": 1.43684315433992e-05, "loss": 0.4829, "step": 33646 }, { "epoch": 0.7136009840724481, "grad_norm": 0.45482996106147766, "learning_rate": 1.4368131549355866e-05, "loss": 0.4625, "step": 33647 }, { "epoch": 0.7136221925303811, "grad_norm": 0.3402804434299469, "learning_rate": 1.4367831550454333e-05, "loss": 0.4459, "step": 33648 }, { "epoch": 0.7136434009883141, "grad_norm": 0.34344062209129333, "learning_rate": 1.4367531546694935e-05, "loss": 0.4856, "step": 33649 }, { "epoch": 0.7136646094462472, "grad_norm": 0.43062594532966614, "learning_rate": 1.4367231538078008e-05, "loss": 0.4757, "step": 33650 }, { "epoch": 0.7136858179041802, "grad_norm": 0.3513198494911194, "learning_rate": 1.4366931524603884e-05, "loss": 0.4943, "step": 33651 }, { "epoch": 0.7137070263621133, "grad_norm": 0.3861829340457916, "learning_rate": 1.4366631506272897e-05, "loss": 0.462, "step": 33652 }, { "epoch": 0.7137282348200462, "grad_norm": 0.34746140241622925, "learning_rate": 1.4366331483085378e-05, "loss": 0.4438, "step": 33653 }, { "epoch": 0.7137494432779793, "grad_norm": 0.38136157393455505, "learning_rate": 1.4366031455041665e-05, "loss": 0.5364, "step": 33654 }, { "epoch": 0.7137706517359123, "grad_norm": 0.3953205943107605, "learning_rate": 1.4365731422142087e-05, "loss": 0.5566, "step": 33655 }, { "epoch": 0.7137918601938453, "grad_norm": 0.36089223623275757, "learning_rate": 1.4365431384386983e-05, "loss": 0.4881, "step": 33656 }, { "epoch": 0.7138130686517783, "grad_norm": 0.3644390404224396, "learning_rate": 1.4365131341776683e-05, "loss": 0.4323, "step": 33657 }, { "epoch": 0.7138342771097114, "grad_norm": 0.36520150303840637, "learning_rate": 1.4364831294311523e-05, "loss": 0.5322, "step": 33658 }, { "epoch": 0.7138554855676443, "grad_norm": 0.34460344910621643, "learning_rate": 1.4364531241991837e-05, "loss": 0.4633, "step": 33659 }, { "epoch": 0.7138766940255774, "grad_norm": 0.32934409379959106, "learning_rate": 1.4364231184817954e-05, "loss": 0.5228, "step": 33660 }, { "epoch": 0.7138979024835104, "grad_norm": 0.468461275100708, "learning_rate": 1.436393112279021e-05, "loss": 0.4947, "step": 33661 }, { "epoch": 0.7139191109414434, "grad_norm": 0.3352469205856323, "learning_rate": 1.4363631055908944e-05, "loss": 0.5071, "step": 33662 }, { "epoch": 0.7139403193993765, "grad_norm": 0.36489200592041016, "learning_rate": 1.4363330984174482e-05, "loss": 0.4933, "step": 33663 }, { "epoch": 0.7139615278573095, "grad_norm": 0.4081796109676361, "learning_rate": 1.4363030907587163e-05, "loss": 0.5135, "step": 33664 }, { "epoch": 0.7139827363152426, "grad_norm": 0.44402024149894714, "learning_rate": 1.436273082614732e-05, "loss": 0.4716, "step": 33665 }, { "epoch": 0.7140039447731755, "grad_norm": 0.36320310831069946, "learning_rate": 1.4362430739855282e-05, "loss": 0.527, "step": 33666 }, { "epoch": 0.7140251532311086, "grad_norm": 0.5028741955757141, "learning_rate": 1.4362130648711387e-05, "loss": 0.4785, "step": 33667 }, { "epoch": 0.7140463616890416, "grad_norm": 0.3457149565219879, "learning_rate": 1.4361830552715973e-05, "loss": 0.515, "step": 33668 }, { "epoch": 0.7140675701469746, "grad_norm": 0.3500482439994812, "learning_rate": 1.4361530451869365e-05, "loss": 0.471, "step": 33669 }, { "epoch": 0.7140887786049076, "grad_norm": 0.37114062905311584, "learning_rate": 1.4361230346171902e-05, "loss": 0.5178, "step": 33670 }, { "epoch": 0.7141099870628407, "grad_norm": 0.35436832904815674, "learning_rate": 1.4360930235623917e-05, "loss": 0.4533, "step": 33671 }, { "epoch": 0.7141311955207736, "grad_norm": 0.34054845571517944, "learning_rate": 1.436063012022574e-05, "loss": 0.4254, "step": 33672 }, { "epoch": 0.7141524039787067, "grad_norm": 0.4019850492477417, "learning_rate": 1.4360329999977709e-05, "loss": 0.4534, "step": 33673 }, { "epoch": 0.7141736124366397, "grad_norm": 0.36658644676208496, "learning_rate": 1.436002987488016e-05, "loss": 0.5552, "step": 33674 }, { "epoch": 0.7141948208945728, "grad_norm": 0.4270951747894287, "learning_rate": 1.4359729744933422e-05, "loss": 0.5589, "step": 33675 }, { "epoch": 0.7142160293525058, "grad_norm": 0.3281971514225006, "learning_rate": 1.4359429610137831e-05, "loss": 0.4344, "step": 33676 }, { "epoch": 0.7142372378104388, "grad_norm": 0.3473316431045532, "learning_rate": 1.4359129470493718e-05, "loss": 0.4563, "step": 33677 }, { "epoch": 0.7142584462683719, "grad_norm": 0.365166574716568, "learning_rate": 1.4358829326001423e-05, "loss": 0.5252, "step": 33678 }, { "epoch": 0.7142796547263048, "grad_norm": 0.3461538553237915, "learning_rate": 1.4358529176661273e-05, "loss": 0.5164, "step": 33679 }, { "epoch": 0.7143008631842379, "grad_norm": 0.338539183139801, "learning_rate": 1.4358229022473604e-05, "loss": 0.4024, "step": 33680 }, { "epoch": 0.7143220716421709, "grad_norm": 0.30462807416915894, "learning_rate": 1.435792886343875e-05, "loss": 0.4137, "step": 33681 }, { "epoch": 0.714343280100104, "grad_norm": 0.368468314409256, "learning_rate": 1.4357628699557048e-05, "loss": 0.5262, "step": 33682 }, { "epoch": 0.7143644885580369, "grad_norm": 0.3852402865886688, "learning_rate": 1.435732853082883e-05, "loss": 0.4829, "step": 33683 }, { "epoch": 0.71438569701597, "grad_norm": 0.38061901926994324, "learning_rate": 1.4357028357254428e-05, "loss": 0.486, "step": 33684 }, { "epoch": 0.714406905473903, "grad_norm": 0.3290785849094391, "learning_rate": 1.4356728178834175e-05, "loss": 0.4367, "step": 33685 }, { "epoch": 0.714428113931836, "grad_norm": 0.34958136081695557, "learning_rate": 1.4356427995568405e-05, "loss": 0.4686, "step": 33686 }, { "epoch": 0.714449322389769, "grad_norm": 0.3865014910697937, "learning_rate": 1.4356127807457459e-05, "loss": 0.4332, "step": 33687 }, { "epoch": 0.7144705308477021, "grad_norm": 0.509975016117096, "learning_rate": 1.4355827614501662e-05, "loss": 0.5322, "step": 33688 }, { "epoch": 0.714491739305635, "grad_norm": 0.3766617476940155, "learning_rate": 1.4355527416701354e-05, "loss": 0.5419, "step": 33689 }, { "epoch": 0.7145129477635681, "grad_norm": 0.3368615508079529, "learning_rate": 1.4355227214056866e-05, "loss": 0.5151, "step": 33690 }, { "epoch": 0.7145341562215012, "grad_norm": 0.3690142035484314, "learning_rate": 1.4354927006568528e-05, "loss": 0.5381, "step": 33691 }, { "epoch": 0.7145553646794341, "grad_norm": 0.35102519392967224, "learning_rate": 1.4354626794236682e-05, "loss": 0.5006, "step": 33692 }, { "epoch": 0.7145765731373672, "grad_norm": 0.3723345100879669, "learning_rate": 1.4354326577061656e-05, "loss": 0.5106, "step": 33693 }, { "epoch": 0.7145977815953002, "grad_norm": 0.3551766872406006, "learning_rate": 1.4354026355043788e-05, "loss": 0.5509, "step": 33694 }, { "epoch": 0.7146189900532333, "grad_norm": 0.3704645037651062, "learning_rate": 1.4353726128183407e-05, "loss": 0.4687, "step": 33695 }, { "epoch": 0.7146401985111662, "grad_norm": 0.3552320897579193, "learning_rate": 1.4353425896480853e-05, "loss": 0.4762, "step": 33696 }, { "epoch": 0.7146614069690993, "grad_norm": 0.37783893942832947, "learning_rate": 1.4353125659936454e-05, "loss": 0.4725, "step": 33697 }, { "epoch": 0.7146826154270323, "grad_norm": 0.3895421028137207, "learning_rate": 1.4352825418550547e-05, "loss": 0.4705, "step": 33698 }, { "epoch": 0.7147038238849653, "grad_norm": 0.3662063777446747, "learning_rate": 1.4352525172323466e-05, "loss": 0.5019, "step": 33699 }, { "epoch": 0.7147250323428983, "grad_norm": 0.32232391834259033, "learning_rate": 1.4352224921255543e-05, "loss": 0.5129, "step": 33700 }, { "epoch": 0.7147462408008314, "grad_norm": 0.3792240619659424, "learning_rate": 1.4351924665347112e-05, "loss": 0.5066, "step": 33701 }, { "epoch": 0.7147674492587643, "grad_norm": 0.36590588092803955, "learning_rate": 1.4351624404598512e-05, "loss": 0.5015, "step": 33702 }, { "epoch": 0.7147886577166974, "grad_norm": 0.3735977113246918, "learning_rate": 1.435132413901007e-05, "loss": 0.4772, "step": 33703 }, { "epoch": 0.7148098661746305, "grad_norm": 0.3599182963371277, "learning_rate": 1.4351023868582126e-05, "loss": 0.5526, "step": 33704 }, { "epoch": 0.7148310746325635, "grad_norm": 0.4047577381134033, "learning_rate": 1.4350723593315008e-05, "loss": 0.5259, "step": 33705 }, { "epoch": 0.7148522830904965, "grad_norm": 0.365947425365448, "learning_rate": 1.4350423313209054e-05, "loss": 0.5675, "step": 33706 }, { "epoch": 0.7148734915484295, "grad_norm": 0.3528253138065338, "learning_rate": 1.4350123028264598e-05, "loss": 0.4454, "step": 33707 }, { "epoch": 0.7148947000063626, "grad_norm": 0.4761005938053131, "learning_rate": 1.4349822738481973e-05, "loss": 0.4947, "step": 33708 }, { "epoch": 0.7149159084642955, "grad_norm": 0.34092167019844055, "learning_rate": 1.4349522443861513e-05, "loss": 0.5126, "step": 33709 }, { "epoch": 0.7149371169222286, "grad_norm": 0.38981688022613525, "learning_rate": 1.4349222144403552e-05, "loss": 0.4794, "step": 33710 }, { "epoch": 0.7149583253801616, "grad_norm": 0.41055014729499817, "learning_rate": 1.434892184010842e-05, "loss": 0.4996, "step": 33711 }, { "epoch": 0.7149795338380946, "grad_norm": 0.3950602114200592, "learning_rate": 1.4348621530976458e-05, "loss": 0.4445, "step": 33712 }, { "epoch": 0.7150007422960276, "grad_norm": 0.46400171518325806, "learning_rate": 1.4348321217007998e-05, "loss": 0.484, "step": 33713 }, { "epoch": 0.7150219507539607, "grad_norm": 0.3689132332801819, "learning_rate": 1.434802089820337e-05, "loss": 0.5052, "step": 33714 }, { "epoch": 0.7150431592118937, "grad_norm": 0.37567681074142456, "learning_rate": 1.4347720574562915e-05, "loss": 0.5167, "step": 33715 }, { "epoch": 0.7150643676698267, "grad_norm": 0.32608506083488464, "learning_rate": 1.434742024608696e-05, "loss": 0.5119, "step": 33716 }, { "epoch": 0.7150855761277598, "grad_norm": 0.35867244005203247, "learning_rate": 1.4347119912775842e-05, "loss": 0.5144, "step": 33717 }, { "epoch": 0.7151067845856928, "grad_norm": 0.33331865072250366, "learning_rate": 1.4346819574629896e-05, "loss": 0.5199, "step": 33718 }, { "epoch": 0.7151279930436258, "grad_norm": 0.3268204927444458, "learning_rate": 1.4346519231649456e-05, "loss": 0.4948, "step": 33719 }, { "epoch": 0.7151492015015588, "grad_norm": 0.35641956329345703, "learning_rate": 1.4346218883834853e-05, "loss": 0.4337, "step": 33720 }, { "epoch": 0.7151704099594919, "grad_norm": 0.40419936180114746, "learning_rate": 1.4345918531186425e-05, "loss": 0.4111, "step": 33721 }, { "epoch": 0.7151916184174248, "grad_norm": 0.35767024755477905, "learning_rate": 1.4345618173704504e-05, "loss": 0.3929, "step": 33722 }, { "epoch": 0.7152128268753579, "grad_norm": 0.3445245921611786, "learning_rate": 1.4345317811389421e-05, "loss": 0.4714, "step": 33723 }, { "epoch": 0.7152340353332909, "grad_norm": 0.35278502106666565, "learning_rate": 1.4345017444241515e-05, "loss": 0.558, "step": 33724 }, { "epoch": 0.715255243791224, "grad_norm": 0.4688962996006012, "learning_rate": 1.4344717072261121e-05, "loss": 0.4475, "step": 33725 }, { "epoch": 0.7152764522491569, "grad_norm": 0.567333459854126, "learning_rate": 1.4344416695448566e-05, "loss": 0.4954, "step": 33726 }, { "epoch": 0.71529766070709, "grad_norm": 0.3863639831542969, "learning_rate": 1.4344116313804193e-05, "loss": 0.4781, "step": 33727 }, { "epoch": 0.715318869165023, "grad_norm": 0.38965359330177307, "learning_rate": 1.4343815927328329e-05, "loss": 0.5878, "step": 33728 }, { "epoch": 0.715340077622956, "grad_norm": 0.3575064241886139, "learning_rate": 1.4343515536021311e-05, "loss": 0.5033, "step": 33729 }, { "epoch": 0.7153612860808891, "grad_norm": 0.37239399552345276, "learning_rate": 1.4343215139883472e-05, "loss": 0.4686, "step": 33730 }, { "epoch": 0.7153824945388221, "grad_norm": 0.3469911515712738, "learning_rate": 1.4342914738915146e-05, "loss": 0.5044, "step": 33731 }, { "epoch": 0.7154037029967552, "grad_norm": 0.35069456696510315, "learning_rate": 1.4342614333116671e-05, "loss": 0.5234, "step": 33732 }, { "epoch": 0.7154249114546881, "grad_norm": 0.35984599590301514, "learning_rate": 1.4342313922488377e-05, "loss": 0.445, "step": 33733 }, { "epoch": 0.7154461199126212, "grad_norm": 0.31333673000335693, "learning_rate": 1.4342013507030598e-05, "loss": 0.4702, "step": 33734 }, { "epoch": 0.7154673283705542, "grad_norm": 0.44065389037132263, "learning_rate": 1.4341713086743671e-05, "loss": 0.5259, "step": 33735 }, { "epoch": 0.7154885368284872, "grad_norm": 0.35611769556999207, "learning_rate": 1.4341412661627929e-05, "loss": 0.5202, "step": 33736 }, { "epoch": 0.7155097452864202, "grad_norm": 0.3416388928890228, "learning_rate": 1.4341112231683701e-05, "loss": 0.507, "step": 33737 }, { "epoch": 0.7155309537443533, "grad_norm": 0.3661821782588959, "learning_rate": 1.434081179691133e-05, "loss": 0.5625, "step": 33738 }, { "epoch": 0.7155521622022862, "grad_norm": 0.40475165843963623, "learning_rate": 1.4340511357311146e-05, "loss": 0.5696, "step": 33739 }, { "epoch": 0.7155733706602193, "grad_norm": 0.3558216094970703, "learning_rate": 1.4340210912883482e-05, "loss": 0.5425, "step": 33740 }, { "epoch": 0.7155945791181523, "grad_norm": 0.3716941475868225, "learning_rate": 1.4339910463628673e-05, "loss": 0.4834, "step": 33741 }, { "epoch": 0.7156157875760853, "grad_norm": 0.4309934079647064, "learning_rate": 1.4339610009547052e-05, "loss": 0.5517, "step": 33742 }, { "epoch": 0.7156369960340183, "grad_norm": 0.3833702504634857, "learning_rate": 1.4339309550638956e-05, "loss": 0.5811, "step": 33743 }, { "epoch": 0.7156582044919514, "grad_norm": 0.4497668445110321, "learning_rate": 1.4339009086904715e-05, "loss": 0.5403, "step": 33744 }, { "epoch": 0.7156794129498845, "grad_norm": 0.3834977447986603, "learning_rate": 1.4338708618344667e-05, "loss": 0.5003, "step": 33745 }, { "epoch": 0.7157006214078174, "grad_norm": 0.3296397924423218, "learning_rate": 1.4338408144959148e-05, "loss": 0.4023, "step": 33746 }, { "epoch": 0.7157218298657505, "grad_norm": 0.36564019322395325, "learning_rate": 1.4338107666748486e-05, "loss": 0.5045, "step": 33747 }, { "epoch": 0.7157430383236835, "grad_norm": 0.3821687400341034, "learning_rate": 1.433780718371302e-05, "loss": 0.5067, "step": 33748 }, { "epoch": 0.7157642467816165, "grad_norm": 0.345857173204422, "learning_rate": 1.4337506695853083e-05, "loss": 0.4957, "step": 33749 }, { "epoch": 0.7157854552395495, "grad_norm": 0.3719659745693207, "learning_rate": 1.4337206203169008e-05, "loss": 0.4582, "step": 33750 }, { "epoch": 0.7158066636974826, "grad_norm": 0.39137688279151917, "learning_rate": 1.4336905705661126e-05, "loss": 0.5002, "step": 33751 }, { "epoch": 0.7158278721554155, "grad_norm": 0.31890156865119934, "learning_rate": 1.4336605203329781e-05, "loss": 0.5018, "step": 33752 }, { "epoch": 0.7158490806133486, "grad_norm": 0.37078747153282166, "learning_rate": 1.4336304696175298e-05, "loss": 0.5112, "step": 33753 }, { "epoch": 0.7158702890712816, "grad_norm": 0.34005555510520935, "learning_rate": 1.4336004184198016e-05, "loss": 0.5076, "step": 33754 }, { "epoch": 0.7158914975292147, "grad_norm": 0.3547091484069824, "learning_rate": 1.4335703667398268e-05, "loss": 0.4812, "step": 33755 }, { "epoch": 0.7159127059871476, "grad_norm": 0.36411142349243164, "learning_rate": 1.4335403145776388e-05, "loss": 0.535, "step": 33756 }, { "epoch": 0.7159339144450807, "grad_norm": 0.35058674216270447, "learning_rate": 1.4335102619332708e-05, "loss": 0.4856, "step": 33757 }, { "epoch": 0.7159551229030138, "grad_norm": 0.32658395171165466, "learning_rate": 1.4334802088067568e-05, "loss": 0.4689, "step": 33758 }, { "epoch": 0.7159763313609467, "grad_norm": 0.3652041256427765, "learning_rate": 1.4334501551981294e-05, "loss": 0.4244, "step": 33759 }, { "epoch": 0.7159975398188798, "grad_norm": 0.4449830949306488, "learning_rate": 1.433420101107423e-05, "loss": 0.5268, "step": 33760 }, { "epoch": 0.7160187482768128, "grad_norm": 0.35398104786872864, "learning_rate": 1.4333900465346705e-05, "loss": 0.55, "step": 33761 }, { "epoch": 0.7160399567347459, "grad_norm": 0.35220006108283997, "learning_rate": 1.433359991479905e-05, "loss": 0.4418, "step": 33762 }, { "epoch": 0.7160611651926788, "grad_norm": 0.3399583697319031, "learning_rate": 1.4333299359431605e-05, "loss": 0.5192, "step": 33763 }, { "epoch": 0.7160823736506119, "grad_norm": 0.3282327353954315, "learning_rate": 1.4332998799244701e-05, "loss": 0.3877, "step": 33764 }, { "epoch": 0.7161035821085449, "grad_norm": 0.3416955769062042, "learning_rate": 1.4332698234238676e-05, "loss": 0.4831, "step": 33765 }, { "epoch": 0.7161247905664779, "grad_norm": 0.32137396931648254, "learning_rate": 1.4332397664413859e-05, "loss": 0.4032, "step": 33766 }, { "epoch": 0.7161459990244109, "grad_norm": 0.4183245301246643, "learning_rate": 1.433209708977059e-05, "loss": 0.4971, "step": 33767 }, { "epoch": 0.716167207482344, "grad_norm": 0.3825729489326477, "learning_rate": 1.4331796510309195e-05, "loss": 0.4629, "step": 33768 }, { "epoch": 0.7161884159402769, "grad_norm": 0.4201238751411438, "learning_rate": 1.4331495926030016e-05, "loss": 0.4875, "step": 33769 }, { "epoch": 0.71620962439821, "grad_norm": 0.38068437576293945, "learning_rate": 1.4331195336933388e-05, "loss": 0.5238, "step": 33770 }, { "epoch": 0.7162308328561431, "grad_norm": 0.3539615273475647, "learning_rate": 1.4330894743019638e-05, "loss": 0.5423, "step": 33771 }, { "epoch": 0.716252041314076, "grad_norm": 0.35747620463371277, "learning_rate": 1.4330594144289108e-05, "loss": 0.4481, "step": 33772 }, { "epoch": 0.7162732497720091, "grad_norm": 0.35282042622566223, "learning_rate": 1.4330293540742127e-05, "loss": 0.4859, "step": 33773 }, { "epoch": 0.7162944582299421, "grad_norm": 0.347562313079834, "learning_rate": 1.4329992932379031e-05, "loss": 0.4661, "step": 33774 }, { "epoch": 0.7163156666878752, "grad_norm": 0.33419471979141235, "learning_rate": 1.4329692319200154e-05, "loss": 0.459, "step": 33775 }, { "epoch": 0.7163368751458081, "grad_norm": 0.3701404631137848, "learning_rate": 1.4329391701205833e-05, "loss": 0.4249, "step": 33776 }, { "epoch": 0.7163580836037412, "grad_norm": 0.3985719680786133, "learning_rate": 1.4329091078396396e-05, "loss": 0.527, "step": 33777 }, { "epoch": 0.7163792920616742, "grad_norm": 0.4895978271961212, "learning_rate": 1.4328790450772185e-05, "loss": 0.4965, "step": 33778 }, { "epoch": 0.7164005005196072, "grad_norm": 0.4400878846645355, "learning_rate": 1.4328489818333528e-05, "loss": 0.4599, "step": 33779 }, { "epoch": 0.7164217089775402, "grad_norm": 0.3583018183708191, "learning_rate": 1.4328189181080765e-05, "loss": 0.4706, "step": 33780 }, { "epoch": 0.7164429174354733, "grad_norm": 2.98299241065979, "learning_rate": 1.432788853901423e-05, "loss": 0.4763, "step": 33781 }, { "epoch": 0.7164641258934062, "grad_norm": 0.3617479205131531, "learning_rate": 1.4327587892134247e-05, "loss": 0.4213, "step": 33782 }, { "epoch": 0.7164853343513393, "grad_norm": 0.37460628151893616, "learning_rate": 1.4327287240441163e-05, "loss": 0.3869, "step": 33783 }, { "epoch": 0.7165065428092723, "grad_norm": 0.34730976819992065, "learning_rate": 1.432698658393531e-05, "loss": 0.5323, "step": 33784 }, { "epoch": 0.7165277512672054, "grad_norm": 0.3612266480922699, "learning_rate": 1.4326685922617016e-05, "loss": 0.4984, "step": 33785 }, { "epoch": 0.7165489597251384, "grad_norm": 0.3937101662158966, "learning_rate": 1.4326385256486624e-05, "loss": 0.4843, "step": 33786 }, { "epoch": 0.7165701681830714, "grad_norm": 0.37525737285614014, "learning_rate": 1.432608458554446e-05, "loss": 0.5147, "step": 33787 }, { "epoch": 0.7165913766410045, "grad_norm": 0.40248316526412964, "learning_rate": 1.4325783909790865e-05, "loss": 0.5562, "step": 33788 }, { "epoch": 0.7166125850989374, "grad_norm": 0.3891634941101074, "learning_rate": 1.432548322922617e-05, "loss": 0.5763, "step": 33789 }, { "epoch": 0.7166337935568705, "grad_norm": 0.34361428022384644, "learning_rate": 1.4325182543850708e-05, "loss": 0.5669, "step": 33790 }, { "epoch": 0.7166550020148035, "grad_norm": 0.3489525020122528, "learning_rate": 1.4324881853664816e-05, "loss": 0.5044, "step": 33791 }, { "epoch": 0.7166762104727366, "grad_norm": 0.4239770770072937, "learning_rate": 1.432458115866883e-05, "loss": 0.5176, "step": 33792 }, { "epoch": 0.7166974189306695, "grad_norm": 0.34480151534080505, "learning_rate": 1.432428045886308e-05, "loss": 0.4545, "step": 33793 }, { "epoch": 0.7167186273886026, "grad_norm": 0.3198498487472534, "learning_rate": 1.4323979754247906e-05, "loss": 0.4969, "step": 33794 }, { "epoch": 0.7167398358465356, "grad_norm": 0.34212788939476013, "learning_rate": 1.4323679044823636e-05, "loss": 0.4658, "step": 33795 }, { "epoch": 0.7167610443044686, "grad_norm": 0.36050015687942505, "learning_rate": 1.4323378330590609e-05, "loss": 0.4518, "step": 33796 }, { "epoch": 0.7167822527624016, "grad_norm": 0.38942164182662964, "learning_rate": 1.4323077611549159e-05, "loss": 0.5557, "step": 33797 }, { "epoch": 0.7168034612203347, "grad_norm": 0.42314326763153076, "learning_rate": 1.4322776887699621e-05, "loss": 0.5409, "step": 33798 }, { "epoch": 0.7168246696782677, "grad_norm": 0.32916194200515747, "learning_rate": 1.4322476159042325e-05, "loss": 0.4856, "step": 33799 }, { "epoch": 0.7168458781362007, "grad_norm": 0.40807363390922546, "learning_rate": 1.432217542557761e-05, "loss": 0.6051, "step": 33800 }, { "epoch": 0.7168670865941338, "grad_norm": 0.3441551625728607, "learning_rate": 1.4321874687305808e-05, "loss": 0.5033, "step": 33801 }, { "epoch": 0.7168882950520667, "grad_norm": 0.38396328687667847, "learning_rate": 1.4321573944227254e-05, "loss": 0.4528, "step": 33802 }, { "epoch": 0.7169095035099998, "grad_norm": 0.33379876613616943, "learning_rate": 1.4321273196342285e-05, "loss": 0.3957, "step": 33803 }, { "epoch": 0.7169307119679328, "grad_norm": 0.38698747754096985, "learning_rate": 1.4320972443651234e-05, "loss": 0.5576, "step": 33804 }, { "epoch": 0.7169519204258659, "grad_norm": 0.38226526975631714, "learning_rate": 1.4320671686154432e-05, "loss": 0.6409, "step": 33805 }, { "epoch": 0.7169731288837988, "grad_norm": 0.3785615861415863, "learning_rate": 1.432037092385222e-05, "loss": 0.48, "step": 33806 }, { "epoch": 0.7169943373417319, "grad_norm": 0.3644806146621704, "learning_rate": 1.4320070156744926e-05, "loss": 0.5296, "step": 33807 }, { "epoch": 0.7170155457996649, "grad_norm": 0.3602387011051178, "learning_rate": 1.4319769384832887e-05, "loss": 0.5153, "step": 33808 }, { "epoch": 0.7170367542575979, "grad_norm": 0.3645018935203552, "learning_rate": 1.4319468608116442e-05, "loss": 0.4479, "step": 33809 }, { "epoch": 0.7170579627155309, "grad_norm": 0.39572256803512573, "learning_rate": 1.4319167826595917e-05, "loss": 0.4882, "step": 33810 }, { "epoch": 0.717079171173464, "grad_norm": 0.3412324786186218, "learning_rate": 1.4318867040271655e-05, "loss": 0.524, "step": 33811 }, { "epoch": 0.7171003796313971, "grad_norm": 0.3372730314731598, "learning_rate": 1.4318566249143987e-05, "loss": 0.4186, "step": 33812 }, { "epoch": 0.71712158808933, "grad_norm": 0.3749106824398041, "learning_rate": 1.4318265453213243e-05, "loss": 0.5281, "step": 33813 }, { "epoch": 0.7171427965472631, "grad_norm": 0.3921642601490021, "learning_rate": 1.4317964652479763e-05, "loss": 0.5334, "step": 33814 }, { "epoch": 0.7171640050051961, "grad_norm": 0.3161538541316986, "learning_rate": 1.431766384694388e-05, "loss": 0.474, "step": 33815 }, { "epoch": 0.7171852134631291, "grad_norm": 0.36127224564552307, "learning_rate": 1.4317363036605931e-05, "loss": 0.5722, "step": 33816 }, { "epoch": 0.7172064219210621, "grad_norm": 0.34809717535972595, "learning_rate": 1.4317062221466247e-05, "loss": 0.5357, "step": 33817 }, { "epoch": 0.7172276303789952, "grad_norm": 0.38947516679763794, "learning_rate": 1.4316761401525165e-05, "loss": 0.4432, "step": 33818 }, { "epoch": 0.7172488388369281, "grad_norm": 0.35200774669647217, "learning_rate": 1.4316460576783016e-05, "loss": 0.4059, "step": 33819 }, { "epoch": 0.7172700472948612, "grad_norm": 0.33553245663642883, "learning_rate": 1.431615974724014e-05, "loss": 0.4781, "step": 33820 }, { "epoch": 0.7172912557527942, "grad_norm": 0.3692135512828827, "learning_rate": 1.4315858912896867e-05, "loss": 0.521, "step": 33821 }, { "epoch": 0.7173124642107273, "grad_norm": 0.3420858681201935, "learning_rate": 1.4315558073753533e-05, "loss": 0.4908, "step": 33822 }, { "epoch": 0.7173336726686602, "grad_norm": 0.3664875328540802, "learning_rate": 1.4315257229810473e-05, "loss": 0.5132, "step": 33823 }, { "epoch": 0.7173548811265933, "grad_norm": 0.4005574584007263, "learning_rate": 1.4314956381068021e-05, "loss": 0.4418, "step": 33824 }, { "epoch": 0.7173760895845263, "grad_norm": 0.5511711239814758, "learning_rate": 1.4314655527526513e-05, "loss": 0.4801, "step": 33825 }, { "epoch": 0.7173972980424593, "grad_norm": 0.3559495210647583, "learning_rate": 1.4314354669186283e-05, "loss": 0.4775, "step": 33826 }, { "epoch": 0.7174185065003924, "grad_norm": 0.4475823640823364, "learning_rate": 1.4314053806047661e-05, "loss": 0.5991, "step": 33827 }, { "epoch": 0.7174397149583254, "grad_norm": 0.40989989042282104, "learning_rate": 1.4313752938110992e-05, "loss": 0.5002, "step": 33828 }, { "epoch": 0.7174609234162584, "grad_norm": 0.45402684807777405, "learning_rate": 1.4313452065376601e-05, "loss": 0.5775, "step": 33829 }, { "epoch": 0.7174821318741914, "grad_norm": 0.33873364329338074, "learning_rate": 1.4313151187844824e-05, "loss": 0.4828, "step": 33830 }, { "epoch": 0.7175033403321245, "grad_norm": 0.3534795641899109, "learning_rate": 1.4312850305516003e-05, "loss": 0.4259, "step": 33831 }, { "epoch": 0.7175245487900574, "grad_norm": 0.42639243602752686, "learning_rate": 1.4312549418390467e-05, "loss": 0.5264, "step": 33832 }, { "epoch": 0.7175457572479905, "grad_norm": 0.33491915464401245, "learning_rate": 1.4312248526468544e-05, "loss": 0.4974, "step": 33833 }, { "epoch": 0.7175669657059235, "grad_norm": 0.39696648716926575, "learning_rate": 1.4311947629750581e-05, "loss": 0.5058, "step": 33834 }, { "epoch": 0.7175881741638566, "grad_norm": 0.6420319676399231, "learning_rate": 1.431164672823691e-05, "loss": 0.6231, "step": 33835 }, { "epoch": 0.7176093826217895, "grad_norm": 0.3688288927078247, "learning_rate": 1.4311345821927857e-05, "loss": 0.4221, "step": 33836 }, { "epoch": 0.7176305910797226, "grad_norm": 0.4380243718624115, "learning_rate": 1.4311044910823765e-05, "loss": 0.5472, "step": 33837 }, { "epoch": 0.7176517995376556, "grad_norm": 0.3387422263622284, "learning_rate": 1.4310743994924967e-05, "loss": 0.5229, "step": 33838 }, { "epoch": 0.7176730079955886, "grad_norm": 0.40307992696762085, "learning_rate": 1.43104430742318e-05, "loss": 0.5304, "step": 33839 }, { "epoch": 0.7176942164535217, "grad_norm": 0.42490801215171814, "learning_rate": 1.431014214874459e-05, "loss": 0.5376, "step": 33840 }, { "epoch": 0.7177154249114547, "grad_norm": 0.335250586271286, "learning_rate": 1.4309841218463679e-05, "loss": 0.4946, "step": 33841 }, { "epoch": 0.7177366333693878, "grad_norm": 0.3894090950489044, "learning_rate": 1.4309540283389401e-05, "loss": 0.5674, "step": 33842 }, { "epoch": 0.7177578418273207, "grad_norm": 0.4411236047744751, "learning_rate": 1.430923934352209e-05, "loss": 0.5003, "step": 33843 }, { "epoch": 0.7177790502852538, "grad_norm": 0.3267517387866974, "learning_rate": 1.430893839886208e-05, "loss": 0.4639, "step": 33844 }, { "epoch": 0.7178002587431868, "grad_norm": 0.33693769574165344, "learning_rate": 1.4308637449409705e-05, "loss": 0.4944, "step": 33845 }, { "epoch": 0.7178214672011198, "grad_norm": 0.3556760847568512, "learning_rate": 1.4308336495165303e-05, "loss": 0.4485, "step": 33846 }, { "epoch": 0.7178426756590528, "grad_norm": 0.41014647483825684, "learning_rate": 1.4308035536129204e-05, "loss": 0.4869, "step": 33847 }, { "epoch": 0.7178638841169859, "grad_norm": 0.4871944785118103, "learning_rate": 1.4307734572301746e-05, "loss": 0.5168, "step": 33848 }, { "epoch": 0.7178850925749188, "grad_norm": 0.34913164377212524, "learning_rate": 1.4307433603683265e-05, "loss": 0.5068, "step": 33849 }, { "epoch": 0.7179063010328519, "grad_norm": 0.3960586190223694, "learning_rate": 1.4307132630274092e-05, "loss": 0.4759, "step": 33850 }, { "epoch": 0.7179275094907849, "grad_norm": 0.3231634497642517, "learning_rate": 1.4306831652074563e-05, "loss": 0.5046, "step": 33851 }, { "epoch": 0.717948717948718, "grad_norm": 0.34385934472084045, "learning_rate": 1.4306530669085014e-05, "loss": 0.4746, "step": 33852 }, { "epoch": 0.717969926406651, "grad_norm": 0.3489772081375122, "learning_rate": 1.4306229681305779e-05, "loss": 0.4929, "step": 33853 }, { "epoch": 0.717991134864584, "grad_norm": 0.3563960790634155, "learning_rate": 1.4305928688737194e-05, "loss": 0.5504, "step": 33854 }, { "epoch": 0.7180123433225171, "grad_norm": 0.3715457320213318, "learning_rate": 1.430562769137959e-05, "loss": 0.5656, "step": 33855 }, { "epoch": 0.71803355178045, "grad_norm": 0.34160318970680237, "learning_rate": 1.4305326689233307e-05, "loss": 0.5054, "step": 33856 }, { "epoch": 0.7180547602383831, "grad_norm": 0.33908092975616455, "learning_rate": 1.4305025682298679e-05, "loss": 0.4395, "step": 33857 }, { "epoch": 0.7180759686963161, "grad_norm": 0.39472368359565735, "learning_rate": 1.4304724670576033e-05, "loss": 0.4452, "step": 33858 }, { "epoch": 0.7180971771542491, "grad_norm": 0.3843071162700653, "learning_rate": 1.430442365406571e-05, "loss": 0.4586, "step": 33859 }, { "epoch": 0.7181183856121821, "grad_norm": 0.44146791100502014, "learning_rate": 1.430412263276805e-05, "loss": 0.4198, "step": 33860 }, { "epoch": 0.7181395940701152, "grad_norm": 0.36361566185951233, "learning_rate": 1.4303821606683377e-05, "loss": 0.4948, "step": 33861 }, { "epoch": 0.7181608025280481, "grad_norm": 0.35173022747039795, "learning_rate": 1.4303520575812036e-05, "loss": 0.4715, "step": 33862 }, { "epoch": 0.7181820109859812, "grad_norm": 0.39948219060897827, "learning_rate": 1.4303219540154355e-05, "loss": 0.5106, "step": 33863 }, { "epoch": 0.7182032194439142, "grad_norm": 0.37869492173194885, "learning_rate": 1.4302918499710666e-05, "loss": 0.4688, "step": 33864 }, { "epoch": 0.7182244279018473, "grad_norm": 0.39253658056259155, "learning_rate": 1.4302617454481314e-05, "loss": 0.5225, "step": 33865 }, { "epoch": 0.7182456363597802, "grad_norm": 0.4419540762901306, "learning_rate": 1.4302316404466625e-05, "loss": 0.5141, "step": 33866 }, { "epoch": 0.7182668448177133, "grad_norm": 0.3944427967071533, "learning_rate": 1.4302015349666936e-05, "loss": 0.5387, "step": 33867 }, { "epoch": 0.7182880532756464, "grad_norm": 0.34550759196281433, "learning_rate": 1.4301714290082586e-05, "loss": 0.4896, "step": 33868 }, { "epoch": 0.7183092617335793, "grad_norm": 0.38117489218711853, "learning_rate": 1.4301413225713905e-05, "loss": 0.4669, "step": 33869 }, { "epoch": 0.7183304701915124, "grad_norm": 0.37297695875167847, "learning_rate": 1.430111215656123e-05, "loss": 0.5296, "step": 33870 }, { "epoch": 0.7183516786494454, "grad_norm": 0.39047443866729736, "learning_rate": 1.4300811082624897e-05, "loss": 0.5314, "step": 33871 }, { "epoch": 0.7183728871073785, "grad_norm": 0.31672462821006775, "learning_rate": 1.4300510003905239e-05, "loss": 0.4423, "step": 33872 }, { "epoch": 0.7183940955653114, "grad_norm": 0.34079790115356445, "learning_rate": 1.4300208920402588e-05, "loss": 0.4999, "step": 33873 }, { "epoch": 0.7184153040232445, "grad_norm": 0.43586400151252747, "learning_rate": 1.4299907832117286e-05, "loss": 0.5049, "step": 33874 }, { "epoch": 0.7184365124811775, "grad_norm": 0.33977144956588745, "learning_rate": 1.4299606739049661e-05, "loss": 0.4645, "step": 33875 }, { "epoch": 0.7184577209391105, "grad_norm": 0.3739490807056427, "learning_rate": 1.4299305641200052e-05, "loss": 0.5351, "step": 33876 }, { "epoch": 0.7184789293970435, "grad_norm": 0.3304778039455414, "learning_rate": 1.4299004538568792e-05, "loss": 0.4984, "step": 33877 }, { "epoch": 0.7185001378549766, "grad_norm": 0.3470504879951477, "learning_rate": 1.4298703431156215e-05, "loss": 0.4766, "step": 33878 }, { "epoch": 0.7185213463129095, "grad_norm": 0.35403019189834595, "learning_rate": 1.429840231896266e-05, "loss": 0.4645, "step": 33879 }, { "epoch": 0.7185425547708426, "grad_norm": 0.3564216196537018, "learning_rate": 1.429810120198846e-05, "loss": 0.4903, "step": 33880 }, { "epoch": 0.7185637632287757, "grad_norm": 0.32407769560813904, "learning_rate": 1.4297800080233946e-05, "loss": 0.4389, "step": 33881 }, { "epoch": 0.7185849716867087, "grad_norm": 0.37550872564315796, "learning_rate": 1.429749895369946e-05, "loss": 0.5387, "step": 33882 }, { "epoch": 0.7186061801446417, "grad_norm": 0.43110692501068115, "learning_rate": 1.429719782238533e-05, "loss": 0.5385, "step": 33883 }, { "epoch": 0.7186273886025747, "grad_norm": 0.3791677951812744, "learning_rate": 1.4296896686291894e-05, "loss": 0.5137, "step": 33884 }, { "epoch": 0.7186485970605078, "grad_norm": 0.35692575573921204, "learning_rate": 1.4296595545419488e-05, "loss": 0.5514, "step": 33885 }, { "epoch": 0.7186698055184407, "grad_norm": 0.37731730937957764, "learning_rate": 1.4296294399768444e-05, "loss": 0.5101, "step": 33886 }, { "epoch": 0.7186910139763738, "grad_norm": 0.3670589029788971, "learning_rate": 1.4295993249339099e-05, "loss": 0.5439, "step": 33887 }, { "epoch": 0.7187122224343068, "grad_norm": 0.4263220429420471, "learning_rate": 1.429569209413179e-05, "loss": 0.5803, "step": 33888 }, { "epoch": 0.7187334308922398, "grad_norm": 0.47824013233184814, "learning_rate": 1.4295390934146847e-05, "loss": 0.4676, "step": 33889 }, { "epoch": 0.7187546393501728, "grad_norm": 0.36227935552597046, "learning_rate": 1.4295089769384612e-05, "loss": 0.5203, "step": 33890 }, { "epoch": 0.7187758478081059, "grad_norm": 0.37926068902015686, "learning_rate": 1.429478859984541e-05, "loss": 0.4664, "step": 33891 }, { "epoch": 0.7187970562660388, "grad_norm": 0.3674318790435791, "learning_rate": 1.4294487425529582e-05, "loss": 0.5291, "step": 33892 }, { "epoch": 0.7188182647239719, "grad_norm": 0.3390873074531555, "learning_rate": 1.4294186246437464e-05, "loss": 0.4305, "step": 33893 }, { "epoch": 0.718839473181905, "grad_norm": 0.3768008351325989, "learning_rate": 1.429388506256939e-05, "loss": 0.563, "step": 33894 }, { "epoch": 0.718860681639838, "grad_norm": 0.35762593150138855, "learning_rate": 1.4293583873925694e-05, "loss": 0.5029, "step": 33895 }, { "epoch": 0.718881890097771, "grad_norm": 0.4112311899662018, "learning_rate": 1.429328268050671e-05, "loss": 0.5441, "step": 33896 }, { "epoch": 0.718903098555704, "grad_norm": 0.380100816488266, "learning_rate": 1.4292981482312776e-05, "loss": 0.517, "step": 33897 }, { "epoch": 0.7189243070136371, "grad_norm": 0.37077680230140686, "learning_rate": 1.4292680279344224e-05, "loss": 0.525, "step": 33898 }, { "epoch": 0.71894551547157, "grad_norm": 0.3778093457221985, "learning_rate": 1.4292379071601392e-05, "loss": 0.4941, "step": 33899 }, { "epoch": 0.7189667239295031, "grad_norm": 0.3699813783168793, "learning_rate": 1.4292077859084612e-05, "loss": 0.4958, "step": 33900 }, { "epoch": 0.7189879323874361, "grad_norm": 0.41580918431282043, "learning_rate": 1.429177664179422e-05, "loss": 0.4181, "step": 33901 }, { "epoch": 0.7190091408453692, "grad_norm": 0.3511919379234314, "learning_rate": 1.4291475419730553e-05, "loss": 0.5495, "step": 33902 }, { "epoch": 0.7190303493033021, "grad_norm": 0.5593119263648987, "learning_rate": 1.4291174192893943e-05, "loss": 0.4776, "step": 33903 }, { "epoch": 0.7190515577612352, "grad_norm": 0.36887452006340027, "learning_rate": 1.4290872961284725e-05, "loss": 0.5148, "step": 33904 }, { "epoch": 0.7190727662191682, "grad_norm": 0.34262460470199585, "learning_rate": 1.4290571724903236e-05, "loss": 0.5504, "step": 33905 }, { "epoch": 0.7190939746771012, "grad_norm": 0.3343961536884308, "learning_rate": 1.429027048374981e-05, "loss": 0.5108, "step": 33906 }, { "epoch": 0.7191151831350342, "grad_norm": 0.3867509663105011, "learning_rate": 1.4289969237824785e-05, "loss": 0.5237, "step": 33907 }, { "epoch": 0.7191363915929673, "grad_norm": 0.3696354031562805, "learning_rate": 1.4289667987128493e-05, "loss": 0.4641, "step": 33908 }, { "epoch": 0.7191576000509003, "grad_norm": 0.6157118678092957, "learning_rate": 1.4289366731661267e-05, "loss": 0.4842, "step": 33909 }, { "epoch": 0.7191788085088333, "grad_norm": 0.3678025007247925, "learning_rate": 1.4289065471423447e-05, "loss": 0.4874, "step": 33910 }, { "epoch": 0.7192000169667664, "grad_norm": 0.39609724283218384, "learning_rate": 1.4288764206415364e-05, "loss": 0.4892, "step": 33911 }, { "epoch": 0.7192212254246994, "grad_norm": 0.41303160786628723, "learning_rate": 1.4288462936637356e-05, "loss": 0.4381, "step": 33912 }, { "epoch": 0.7192424338826324, "grad_norm": 0.37080448865890503, "learning_rate": 1.4288161662089757e-05, "loss": 0.451, "step": 33913 }, { "epoch": 0.7192636423405654, "grad_norm": 0.35562247037887573, "learning_rate": 1.4287860382772903e-05, "loss": 0.4505, "step": 33914 }, { "epoch": 0.7192848507984985, "grad_norm": 0.37613022327423096, "learning_rate": 1.4287559098687122e-05, "loss": 0.5181, "step": 33915 }, { "epoch": 0.7193060592564314, "grad_norm": 0.34697192907333374, "learning_rate": 1.4287257809832761e-05, "loss": 0.5179, "step": 33916 }, { "epoch": 0.7193272677143645, "grad_norm": 0.4127090275287628, "learning_rate": 1.4286956516210149e-05, "loss": 0.5564, "step": 33917 }, { "epoch": 0.7193484761722975, "grad_norm": 0.3388305902481079, "learning_rate": 1.4286655217819617e-05, "loss": 0.4992, "step": 33918 }, { "epoch": 0.7193696846302305, "grad_norm": 0.3679385185241699, "learning_rate": 1.4286353914661505e-05, "loss": 0.5022, "step": 33919 }, { "epoch": 0.7193908930881635, "grad_norm": 0.35190579295158386, "learning_rate": 1.428605260673615e-05, "loss": 0.4867, "step": 33920 }, { "epoch": 0.7194121015460966, "grad_norm": 0.39343681931495667, "learning_rate": 1.4285751294043885e-05, "loss": 0.5347, "step": 33921 }, { "epoch": 0.7194333100040297, "grad_norm": 0.38254663348197937, "learning_rate": 1.4285449976585044e-05, "loss": 0.5487, "step": 33922 }, { "epoch": 0.7194545184619626, "grad_norm": 0.3348856270313263, "learning_rate": 1.428514865435996e-05, "loss": 0.5205, "step": 33923 }, { "epoch": 0.7194757269198957, "grad_norm": 0.39595329761505127, "learning_rate": 1.4284847327368974e-05, "loss": 0.4587, "step": 33924 }, { "epoch": 0.7194969353778287, "grad_norm": 0.4012247920036316, "learning_rate": 1.4284545995612418e-05, "loss": 0.5748, "step": 33925 }, { "epoch": 0.7195181438357617, "grad_norm": 0.34559008479118347, "learning_rate": 1.4284244659090624e-05, "loss": 0.4739, "step": 33926 }, { "epoch": 0.7195393522936947, "grad_norm": 0.5066092610359192, "learning_rate": 1.4283943317803934e-05, "loss": 0.4777, "step": 33927 }, { "epoch": 0.7195605607516278, "grad_norm": 0.3849007487297058, "learning_rate": 1.428364197175268e-05, "loss": 0.4796, "step": 33928 }, { "epoch": 0.7195817692095607, "grad_norm": 0.3996535539627075, "learning_rate": 1.4283340620937192e-05, "loss": 0.518, "step": 33929 }, { "epoch": 0.7196029776674938, "grad_norm": 0.3826121389865875, "learning_rate": 1.4283039265357813e-05, "loss": 0.4832, "step": 33930 }, { "epoch": 0.7196241861254268, "grad_norm": 0.3804948627948761, "learning_rate": 1.4282737905014874e-05, "loss": 0.4705, "step": 33931 }, { "epoch": 0.7196453945833599, "grad_norm": 0.3830236494541168, "learning_rate": 1.428243653990871e-05, "loss": 0.6217, "step": 33932 }, { "epoch": 0.7196666030412928, "grad_norm": 0.36072516441345215, "learning_rate": 1.428213517003966e-05, "loss": 0.51, "step": 33933 }, { "epoch": 0.7196878114992259, "grad_norm": 0.32685112953186035, "learning_rate": 1.4281833795408054e-05, "loss": 0.5191, "step": 33934 }, { "epoch": 0.719709019957159, "grad_norm": 0.36357593536376953, "learning_rate": 1.4281532416014233e-05, "loss": 0.3781, "step": 33935 }, { "epoch": 0.7197302284150919, "grad_norm": 0.3514832556247711, "learning_rate": 1.4281231031858525e-05, "loss": 0.5367, "step": 33936 }, { "epoch": 0.719751436873025, "grad_norm": 0.32740500569343567, "learning_rate": 1.4280929642941268e-05, "loss": 0.5111, "step": 33937 }, { "epoch": 0.719772645330958, "grad_norm": 0.3808612823486328, "learning_rate": 1.4280628249262803e-05, "loss": 0.4414, "step": 33938 }, { "epoch": 0.719793853788891, "grad_norm": 0.4105263650417328, "learning_rate": 1.4280326850823458e-05, "loss": 0.5641, "step": 33939 }, { "epoch": 0.719815062246824, "grad_norm": 0.3498478829860687, "learning_rate": 1.428002544762357e-05, "loss": 0.4095, "step": 33940 }, { "epoch": 0.7198362707047571, "grad_norm": 0.39145150780677795, "learning_rate": 1.4279724039663477e-05, "loss": 0.6061, "step": 33941 }, { "epoch": 0.71985747916269, "grad_norm": 0.3324107825756073, "learning_rate": 1.427942262694351e-05, "loss": 0.4369, "step": 33942 }, { "epoch": 0.7198786876206231, "grad_norm": 0.3211066722869873, "learning_rate": 1.4279121209464007e-05, "loss": 0.5055, "step": 33943 }, { "epoch": 0.7198998960785561, "grad_norm": 0.44263285398483276, "learning_rate": 1.4278819787225304e-05, "loss": 0.5075, "step": 33944 }, { "epoch": 0.7199211045364892, "grad_norm": 0.32944002747535706, "learning_rate": 1.4278518360227734e-05, "loss": 0.4773, "step": 33945 }, { "epoch": 0.7199423129944221, "grad_norm": 0.33735907077789307, "learning_rate": 1.4278216928471632e-05, "loss": 0.5123, "step": 33946 }, { "epoch": 0.7199635214523552, "grad_norm": 0.34633517265319824, "learning_rate": 1.4277915491957334e-05, "loss": 0.4159, "step": 33947 }, { "epoch": 0.7199847299102882, "grad_norm": 0.3560340702533722, "learning_rate": 1.4277614050685178e-05, "loss": 0.5077, "step": 33948 }, { "epoch": 0.7200059383682212, "grad_norm": 0.3880748152732849, "learning_rate": 1.4277312604655493e-05, "loss": 0.4218, "step": 33949 }, { "epoch": 0.7200271468261543, "grad_norm": 0.38081488013267517, "learning_rate": 1.4277011153868623e-05, "loss": 0.4354, "step": 33950 }, { "epoch": 0.7200483552840873, "grad_norm": 0.370431512594223, "learning_rate": 1.4276709698324893e-05, "loss": 0.4677, "step": 33951 }, { "epoch": 0.7200695637420204, "grad_norm": 0.3617686927318573, "learning_rate": 1.4276408238024648e-05, "loss": 0.4808, "step": 33952 }, { "epoch": 0.7200907721999533, "grad_norm": 0.40104997158050537, "learning_rate": 1.4276106772968219e-05, "loss": 0.5042, "step": 33953 }, { "epoch": 0.7201119806578864, "grad_norm": 0.42721444368362427, "learning_rate": 1.4275805303155938e-05, "loss": 0.4236, "step": 33954 }, { "epoch": 0.7201331891158194, "grad_norm": 0.4031420350074768, "learning_rate": 1.4275503828588145e-05, "loss": 0.5211, "step": 33955 }, { "epoch": 0.7201543975737524, "grad_norm": 0.3100716769695282, "learning_rate": 1.4275202349265173e-05, "loss": 0.3928, "step": 33956 }, { "epoch": 0.7201756060316854, "grad_norm": 0.3726797103881836, "learning_rate": 1.4274900865187358e-05, "loss": 0.4307, "step": 33957 }, { "epoch": 0.7201968144896185, "grad_norm": 0.34877046942710876, "learning_rate": 1.4274599376355039e-05, "loss": 0.4663, "step": 33958 }, { "epoch": 0.7202180229475514, "grad_norm": 0.3876665532588959, "learning_rate": 1.4274297882768544e-05, "loss": 0.4816, "step": 33959 }, { "epoch": 0.7202392314054845, "grad_norm": 0.3980971872806549, "learning_rate": 1.4273996384428212e-05, "loss": 0.5064, "step": 33960 }, { "epoch": 0.7202604398634175, "grad_norm": 0.3686386048793793, "learning_rate": 1.427369488133438e-05, "loss": 0.4424, "step": 33961 }, { "epoch": 0.7202816483213506, "grad_norm": 0.3415880799293518, "learning_rate": 1.4273393373487381e-05, "loss": 0.5019, "step": 33962 }, { "epoch": 0.7203028567792836, "grad_norm": 0.3660585880279541, "learning_rate": 1.427309186088755e-05, "loss": 0.4334, "step": 33963 }, { "epoch": 0.7203240652372166, "grad_norm": 0.33703917264938354, "learning_rate": 1.4272790343535226e-05, "loss": 0.4655, "step": 33964 }, { "epoch": 0.7203452736951497, "grad_norm": 0.34938016533851624, "learning_rate": 1.427248882143074e-05, "loss": 0.4992, "step": 33965 }, { "epoch": 0.7203664821530826, "grad_norm": 0.3786797523498535, "learning_rate": 1.4272187294574428e-05, "loss": 0.5354, "step": 33966 }, { "epoch": 0.7203876906110157, "grad_norm": 0.32712167501449585, "learning_rate": 1.4271885762966628e-05, "loss": 0.423, "step": 33967 }, { "epoch": 0.7204088990689487, "grad_norm": 0.5059604644775391, "learning_rate": 1.4271584226607672e-05, "loss": 0.5086, "step": 33968 }, { "epoch": 0.7204301075268817, "grad_norm": 0.448294073343277, "learning_rate": 1.4271282685497898e-05, "loss": 0.4879, "step": 33969 }, { "epoch": 0.7204513159848147, "grad_norm": 0.5753865242004395, "learning_rate": 1.4270981139637643e-05, "loss": 0.4802, "step": 33970 }, { "epoch": 0.7204725244427478, "grad_norm": 0.3584662973880768, "learning_rate": 1.4270679589027236e-05, "loss": 0.5513, "step": 33971 }, { "epoch": 0.7204937329006808, "grad_norm": 0.4042210578918457, "learning_rate": 1.4270378033667018e-05, "loss": 0.508, "step": 33972 }, { "epoch": 0.7205149413586138, "grad_norm": 0.3712449073791504, "learning_rate": 1.4270076473557324e-05, "loss": 0.4327, "step": 33973 }, { "epoch": 0.7205361498165468, "grad_norm": 0.37708598375320435, "learning_rate": 1.4269774908698486e-05, "loss": 0.4831, "step": 33974 }, { "epoch": 0.7205573582744799, "grad_norm": 0.36132925748825073, "learning_rate": 1.4269473339090842e-05, "loss": 0.4593, "step": 33975 }, { "epoch": 0.7205785667324129, "grad_norm": 0.32584622502326965, "learning_rate": 1.4269171764734728e-05, "loss": 0.5317, "step": 33976 }, { "epoch": 0.7205997751903459, "grad_norm": 0.39859598875045776, "learning_rate": 1.4268870185630475e-05, "loss": 0.4959, "step": 33977 }, { "epoch": 0.720620983648279, "grad_norm": 0.5553255081176758, "learning_rate": 1.4268568601778425e-05, "loss": 0.5081, "step": 33978 }, { "epoch": 0.7206421921062119, "grad_norm": 0.39946863055229187, "learning_rate": 1.426826701317891e-05, "loss": 0.4433, "step": 33979 }, { "epoch": 0.720663400564145, "grad_norm": 0.43122443556785583, "learning_rate": 1.4267965419832264e-05, "loss": 0.5173, "step": 33980 }, { "epoch": 0.720684609022078, "grad_norm": 0.3431943356990814, "learning_rate": 1.4267663821738823e-05, "loss": 0.4421, "step": 33981 }, { "epoch": 0.7207058174800111, "grad_norm": 0.44528627395629883, "learning_rate": 1.4267362218898924e-05, "loss": 0.5468, "step": 33982 }, { "epoch": 0.720727025937944, "grad_norm": 0.41163259744644165, "learning_rate": 1.4267060611312904e-05, "loss": 0.5104, "step": 33983 }, { "epoch": 0.7207482343958771, "grad_norm": 0.377995103597641, "learning_rate": 1.4266758998981095e-05, "loss": 0.494, "step": 33984 }, { "epoch": 0.7207694428538101, "grad_norm": 0.4059879779815674, "learning_rate": 1.4266457381903832e-05, "loss": 0.4809, "step": 33985 }, { "epoch": 0.7207906513117431, "grad_norm": 0.48526275157928467, "learning_rate": 1.4266155760081457e-05, "loss": 0.4808, "step": 33986 }, { "epoch": 0.7208118597696761, "grad_norm": 0.338824063539505, "learning_rate": 1.4265854133514296e-05, "loss": 0.4271, "step": 33987 }, { "epoch": 0.7208330682276092, "grad_norm": 0.41643956303596497, "learning_rate": 1.4265552502202688e-05, "loss": 0.3987, "step": 33988 }, { "epoch": 0.7208542766855421, "grad_norm": 0.34250783920288086, "learning_rate": 1.4265250866146973e-05, "loss": 0.4151, "step": 33989 }, { "epoch": 0.7208754851434752, "grad_norm": 0.3989376127719879, "learning_rate": 1.4264949225347485e-05, "loss": 0.4533, "step": 33990 }, { "epoch": 0.7208966936014083, "grad_norm": 0.9060774445533752, "learning_rate": 1.4264647579804553e-05, "loss": 0.5334, "step": 33991 }, { "epoch": 0.7209179020593413, "grad_norm": 0.379487007856369, "learning_rate": 1.426434592951852e-05, "loss": 0.5292, "step": 33992 }, { "epoch": 0.7209391105172743, "grad_norm": 0.3843913972377777, "learning_rate": 1.4264044274489717e-05, "loss": 0.495, "step": 33993 }, { "epoch": 0.7209603189752073, "grad_norm": 0.3581888973712921, "learning_rate": 1.4263742614718478e-05, "loss": 0.484, "step": 33994 }, { "epoch": 0.7209815274331404, "grad_norm": 0.4155145585536957, "learning_rate": 1.4263440950205146e-05, "loss": 0.5672, "step": 33995 }, { "epoch": 0.7210027358910733, "grad_norm": 0.39686450362205505, "learning_rate": 1.4263139280950048e-05, "loss": 0.6062, "step": 33996 }, { "epoch": 0.7210239443490064, "grad_norm": 0.3297789692878723, "learning_rate": 1.4262837606953528e-05, "loss": 0.4927, "step": 33997 }, { "epoch": 0.7210451528069394, "grad_norm": 0.35973379015922546, "learning_rate": 1.4262535928215917e-05, "loss": 0.5327, "step": 33998 }, { "epoch": 0.7210663612648724, "grad_norm": 0.3451743721961975, "learning_rate": 1.4262234244737548e-05, "loss": 0.4663, "step": 33999 }, { "epoch": 0.7210875697228054, "grad_norm": 0.40120285749435425, "learning_rate": 1.4261932556518758e-05, "loss": 0.5138, "step": 34000 }, { "epoch": 0.7211087781807385, "grad_norm": 0.3979575037956238, "learning_rate": 1.4261630863559888e-05, "loss": 0.5019, "step": 34001 }, { "epoch": 0.7211299866386714, "grad_norm": 0.37768852710723877, "learning_rate": 1.4261329165861266e-05, "loss": 0.5113, "step": 34002 }, { "epoch": 0.7211511950966045, "grad_norm": 0.35015445947647095, "learning_rate": 1.4261027463423232e-05, "loss": 0.5254, "step": 34003 }, { "epoch": 0.7211724035545376, "grad_norm": 0.3388063609600067, "learning_rate": 1.426072575624612e-05, "loss": 0.42, "step": 34004 }, { "epoch": 0.7211936120124706, "grad_norm": 0.42782899737358093, "learning_rate": 1.4260424044330266e-05, "loss": 0.5103, "step": 34005 }, { "epoch": 0.7212148204704036, "grad_norm": 0.34146130084991455, "learning_rate": 1.4260122327676005e-05, "loss": 0.4646, "step": 34006 }, { "epoch": 0.7212360289283366, "grad_norm": 0.4299647808074951, "learning_rate": 1.4259820606283671e-05, "loss": 0.5001, "step": 34007 }, { "epoch": 0.7212572373862697, "grad_norm": 0.3478701412677765, "learning_rate": 1.4259518880153603e-05, "loss": 0.4403, "step": 34008 }, { "epoch": 0.7212784458442026, "grad_norm": 0.4261243939399719, "learning_rate": 1.4259217149286135e-05, "loss": 0.484, "step": 34009 }, { "epoch": 0.7212996543021357, "grad_norm": 0.38544005155563354, "learning_rate": 1.4258915413681602e-05, "loss": 0.4506, "step": 34010 }, { "epoch": 0.7213208627600687, "grad_norm": 0.38631749153137207, "learning_rate": 1.4258613673340342e-05, "loss": 0.5605, "step": 34011 }, { "epoch": 0.7213420712180018, "grad_norm": 0.36113542318344116, "learning_rate": 1.425831192826269e-05, "loss": 0.5761, "step": 34012 }, { "epoch": 0.7213632796759347, "grad_norm": 0.34387829899787903, "learning_rate": 1.425801017844898e-05, "loss": 0.4821, "step": 34013 }, { "epoch": 0.7213844881338678, "grad_norm": 0.3476245403289795, "learning_rate": 1.4257708423899544e-05, "loss": 0.5146, "step": 34014 }, { "epoch": 0.7214056965918008, "grad_norm": 0.344042032957077, "learning_rate": 1.4257406664614725e-05, "loss": 0.5036, "step": 34015 }, { "epoch": 0.7214269050497338, "grad_norm": 0.35032469034194946, "learning_rate": 1.4257104900594856e-05, "loss": 0.5406, "step": 34016 }, { "epoch": 0.7214481135076669, "grad_norm": 0.34110957384109497, "learning_rate": 1.425680313184027e-05, "loss": 0.4905, "step": 34017 }, { "epoch": 0.7214693219655999, "grad_norm": 0.4613057076931, "learning_rate": 1.4256501358351308e-05, "loss": 0.5393, "step": 34018 }, { "epoch": 0.721490530423533, "grad_norm": 0.37770116329193115, "learning_rate": 1.4256199580128296e-05, "loss": 0.4345, "step": 34019 }, { "epoch": 0.7215117388814659, "grad_norm": 0.36106687784194946, "learning_rate": 1.425589779717158e-05, "loss": 0.4782, "step": 34020 }, { "epoch": 0.721532947339399, "grad_norm": 0.3776205778121948, "learning_rate": 1.4255596009481492e-05, "loss": 0.4831, "step": 34021 }, { "epoch": 0.721554155797332, "grad_norm": 0.36674490571022034, "learning_rate": 1.4255294217058363e-05, "loss": 0.4571, "step": 34022 }, { "epoch": 0.721575364255265, "grad_norm": 0.6083322763442993, "learning_rate": 1.4254992419902538e-05, "loss": 0.5274, "step": 34023 }, { "epoch": 0.721596572713198, "grad_norm": 0.3434499204158783, "learning_rate": 1.4254690618014344e-05, "loss": 0.5044, "step": 34024 }, { "epoch": 0.7216177811711311, "grad_norm": 1.1170223951339722, "learning_rate": 1.425438881139412e-05, "loss": 0.5183, "step": 34025 }, { "epoch": 0.721638989629064, "grad_norm": 0.39663445949554443, "learning_rate": 1.4254087000042203e-05, "loss": 0.5004, "step": 34026 }, { "epoch": 0.7216601980869971, "grad_norm": 0.37129080295562744, "learning_rate": 1.4253785183958928e-05, "loss": 0.4505, "step": 34027 }, { "epoch": 0.7216814065449301, "grad_norm": 0.3240833580493927, "learning_rate": 1.4253483363144627e-05, "loss": 0.4524, "step": 34028 }, { "epoch": 0.7217026150028631, "grad_norm": 0.3830414414405823, "learning_rate": 1.4253181537599642e-05, "loss": 0.55, "step": 34029 }, { "epoch": 0.7217238234607961, "grad_norm": 0.34638407826423645, "learning_rate": 1.4252879707324305e-05, "loss": 0.4422, "step": 34030 }, { "epoch": 0.7217450319187292, "grad_norm": 0.3757031261920929, "learning_rate": 1.4252577872318951e-05, "loss": 0.5421, "step": 34031 }, { "epoch": 0.7217662403766623, "grad_norm": 0.5941628813743591, "learning_rate": 1.4252276032583916e-05, "loss": 0.5081, "step": 34032 }, { "epoch": 0.7217874488345952, "grad_norm": 0.41357243061065674, "learning_rate": 1.4251974188119537e-05, "loss": 0.5674, "step": 34033 }, { "epoch": 0.7218086572925283, "grad_norm": 0.3205088973045349, "learning_rate": 1.4251672338926152e-05, "loss": 0.5537, "step": 34034 }, { "epoch": 0.7218298657504613, "grad_norm": 0.3737262785434723, "learning_rate": 1.4251370485004093e-05, "loss": 0.4864, "step": 34035 }, { "epoch": 0.7218510742083943, "grad_norm": 0.3466591536998749, "learning_rate": 1.4251068626353694e-05, "loss": 0.4782, "step": 34036 }, { "epoch": 0.7218722826663273, "grad_norm": 0.3331802487373352, "learning_rate": 1.4250766762975296e-05, "loss": 0.4443, "step": 34037 }, { "epoch": 0.7218934911242604, "grad_norm": 0.4619660973548889, "learning_rate": 1.4250464894869231e-05, "loss": 0.481, "step": 34038 }, { "epoch": 0.7219146995821933, "grad_norm": 0.3136468827724457, "learning_rate": 1.4250163022035835e-05, "loss": 0.4361, "step": 34039 }, { "epoch": 0.7219359080401264, "grad_norm": 0.345504492521286, "learning_rate": 1.4249861144475446e-05, "loss": 0.5476, "step": 34040 }, { "epoch": 0.7219571164980594, "grad_norm": 0.40393415093421936, "learning_rate": 1.42495592621884e-05, "loss": 0.4721, "step": 34041 }, { "epoch": 0.7219783249559925, "grad_norm": 0.3934206962585449, "learning_rate": 1.4249257375175028e-05, "loss": 0.5438, "step": 34042 }, { "epoch": 0.7219995334139254, "grad_norm": 0.4751424193382263, "learning_rate": 1.4248955483435671e-05, "loss": 0.513, "step": 34043 }, { "epoch": 0.7220207418718585, "grad_norm": 0.33533743023872375, "learning_rate": 1.424865358697066e-05, "loss": 0.4587, "step": 34044 }, { "epoch": 0.7220419503297916, "grad_norm": 0.33028504252433777, "learning_rate": 1.4248351685780336e-05, "loss": 0.4552, "step": 34045 }, { "epoch": 0.7220631587877245, "grad_norm": 0.45698660612106323, "learning_rate": 1.4248049779865029e-05, "loss": 0.4635, "step": 34046 }, { "epoch": 0.7220843672456576, "grad_norm": 0.3889613151550293, "learning_rate": 1.4247747869225081e-05, "loss": 0.4996, "step": 34047 }, { "epoch": 0.7221055757035906, "grad_norm": 0.4096633195877075, "learning_rate": 1.4247445953860824e-05, "loss": 0.5975, "step": 34048 }, { "epoch": 0.7221267841615236, "grad_norm": 0.35941290855407715, "learning_rate": 1.4247144033772596e-05, "loss": 0.5026, "step": 34049 }, { "epoch": 0.7221479926194566, "grad_norm": 0.7400872111320496, "learning_rate": 1.4246842108960728e-05, "loss": 0.59, "step": 34050 }, { "epoch": 0.7221692010773897, "grad_norm": 0.35638609528541565, "learning_rate": 1.4246540179425564e-05, "loss": 0.4156, "step": 34051 }, { "epoch": 0.7221904095353227, "grad_norm": 0.4016144871711731, "learning_rate": 1.424623824516743e-05, "loss": 0.6165, "step": 34052 }, { "epoch": 0.7222116179932557, "grad_norm": 0.38458624482154846, "learning_rate": 1.4245936306186665e-05, "loss": 0.4837, "step": 34053 }, { "epoch": 0.7222328264511887, "grad_norm": 0.3735050857067108, "learning_rate": 1.4245634362483612e-05, "loss": 0.5714, "step": 34054 }, { "epoch": 0.7222540349091218, "grad_norm": 0.3408365845680237, "learning_rate": 1.4245332414058602e-05, "loss": 0.4766, "step": 34055 }, { "epoch": 0.7222752433670547, "grad_norm": 0.3653256595134735, "learning_rate": 1.4245030460911964e-05, "loss": 0.4705, "step": 34056 }, { "epoch": 0.7222964518249878, "grad_norm": 0.2993626296520233, "learning_rate": 1.4244728503044046e-05, "loss": 0.4595, "step": 34057 }, { "epoch": 0.7223176602829209, "grad_norm": 0.3298027217388153, "learning_rate": 1.4244426540455174e-05, "loss": 0.4792, "step": 34058 }, { "epoch": 0.7223388687408538, "grad_norm": 0.4209442734718323, "learning_rate": 1.4244124573145688e-05, "loss": 0.4751, "step": 34059 }, { "epoch": 0.7223600771987869, "grad_norm": 0.3692396581172943, "learning_rate": 1.4243822601115924e-05, "loss": 0.5596, "step": 34060 }, { "epoch": 0.7223812856567199, "grad_norm": 0.3379906117916107, "learning_rate": 1.4243520624366218e-05, "loss": 0.5003, "step": 34061 }, { "epoch": 0.722402494114653, "grad_norm": 0.44185420870780945, "learning_rate": 1.4243218642896906e-05, "loss": 0.5404, "step": 34062 }, { "epoch": 0.7224237025725859, "grad_norm": 0.4666762948036194, "learning_rate": 1.4242916656708323e-05, "loss": 0.5047, "step": 34063 }, { "epoch": 0.722444911030519, "grad_norm": 0.3456195294857025, "learning_rate": 1.42426146658008e-05, "loss": 0.5057, "step": 34064 }, { "epoch": 0.722466119488452, "grad_norm": 0.36076751351356506, "learning_rate": 1.4242312670174686e-05, "loss": 0.4653, "step": 34065 }, { "epoch": 0.722487327946385, "grad_norm": 0.35042452812194824, "learning_rate": 1.4242010669830305e-05, "loss": 0.4398, "step": 34066 }, { "epoch": 0.722508536404318, "grad_norm": 0.402090460062027, "learning_rate": 1.4241708664767994e-05, "loss": 0.4658, "step": 34067 }, { "epoch": 0.7225297448622511, "grad_norm": 0.33249273896217346, "learning_rate": 1.4241406654988095e-05, "loss": 0.5013, "step": 34068 }, { "epoch": 0.722550953320184, "grad_norm": 0.3860692083835602, "learning_rate": 1.4241104640490938e-05, "loss": 0.4773, "step": 34069 }, { "epoch": 0.7225721617781171, "grad_norm": 0.3251819908618927, "learning_rate": 1.4240802621276864e-05, "loss": 0.4832, "step": 34070 }, { "epoch": 0.7225933702360502, "grad_norm": 0.3872503340244293, "learning_rate": 1.4240500597346203e-05, "loss": 0.5511, "step": 34071 }, { "epoch": 0.7226145786939832, "grad_norm": 0.36096784472465515, "learning_rate": 1.4240198568699296e-05, "loss": 0.481, "step": 34072 }, { "epoch": 0.7226357871519162, "grad_norm": 0.38917627930641174, "learning_rate": 1.4239896535336474e-05, "loss": 0.5402, "step": 34073 }, { "epoch": 0.7226569956098492, "grad_norm": 0.36055153608322144, "learning_rate": 1.423959449725808e-05, "loss": 0.5283, "step": 34074 }, { "epoch": 0.7226782040677823, "grad_norm": 0.3921171724796295, "learning_rate": 1.4239292454464444e-05, "loss": 0.4744, "step": 34075 }, { "epoch": 0.7226994125257152, "grad_norm": 0.4404958486557007, "learning_rate": 1.4238990406955905e-05, "loss": 0.5197, "step": 34076 }, { "epoch": 0.7227206209836483, "grad_norm": 0.32360565662384033, "learning_rate": 1.4238688354732794e-05, "loss": 0.4217, "step": 34077 }, { "epoch": 0.7227418294415813, "grad_norm": 0.36601540446281433, "learning_rate": 1.4238386297795455e-05, "loss": 0.4819, "step": 34078 }, { "epoch": 0.7227630378995143, "grad_norm": 0.4315260350704193, "learning_rate": 1.4238084236144218e-05, "loss": 0.5177, "step": 34079 }, { "epoch": 0.7227842463574473, "grad_norm": 0.36710602045059204, "learning_rate": 1.4237782169779421e-05, "loss": 0.5493, "step": 34080 }, { "epoch": 0.7228054548153804, "grad_norm": 0.3607295751571655, "learning_rate": 1.4237480098701398e-05, "loss": 0.5051, "step": 34081 }, { "epoch": 0.7228266632733134, "grad_norm": 0.3944012224674225, "learning_rate": 1.4237178022910488e-05, "loss": 0.5836, "step": 34082 }, { "epoch": 0.7228478717312464, "grad_norm": 0.35086992383003235, "learning_rate": 1.4236875942407027e-05, "loss": 0.4586, "step": 34083 }, { "epoch": 0.7228690801891794, "grad_norm": 0.3667744994163513, "learning_rate": 1.4236573857191345e-05, "loss": 0.4757, "step": 34084 }, { "epoch": 0.7228902886471125, "grad_norm": 0.3342592716217041, "learning_rate": 1.4236271767263782e-05, "loss": 0.3939, "step": 34085 }, { "epoch": 0.7229114971050455, "grad_norm": 0.40348851680755615, "learning_rate": 1.4235969672624678e-05, "loss": 0.4663, "step": 34086 }, { "epoch": 0.7229327055629785, "grad_norm": 0.4770916998386383, "learning_rate": 1.4235667573274365e-05, "loss": 0.5235, "step": 34087 }, { "epoch": 0.7229539140209116, "grad_norm": 0.4298953711986542, "learning_rate": 1.423536546921318e-05, "loss": 0.4629, "step": 34088 }, { "epoch": 0.7229751224788445, "grad_norm": 0.3671870231628418, "learning_rate": 1.4235063360441454e-05, "loss": 0.4894, "step": 34089 }, { "epoch": 0.7229963309367776, "grad_norm": 0.3582763373851776, "learning_rate": 1.423476124695953e-05, "loss": 0.546, "step": 34090 }, { "epoch": 0.7230175393947106, "grad_norm": 0.4468033015727997, "learning_rate": 1.4234459128767743e-05, "loss": 0.5267, "step": 34091 }, { "epoch": 0.7230387478526437, "grad_norm": 0.3513222336769104, "learning_rate": 1.4234157005866424e-05, "loss": 0.4722, "step": 34092 }, { "epoch": 0.7230599563105766, "grad_norm": 0.3487415611743927, "learning_rate": 1.4233854878255917e-05, "loss": 0.5319, "step": 34093 }, { "epoch": 0.7230811647685097, "grad_norm": 0.3501598536968231, "learning_rate": 1.423355274593655e-05, "loss": 0.5229, "step": 34094 }, { "epoch": 0.7231023732264427, "grad_norm": 0.32686808705329895, "learning_rate": 1.4233250608908662e-05, "loss": 0.4967, "step": 34095 }, { "epoch": 0.7231235816843757, "grad_norm": 0.47739407420158386, "learning_rate": 1.423294846717259e-05, "loss": 0.4795, "step": 34096 }, { "epoch": 0.7231447901423087, "grad_norm": 0.35320085287094116, "learning_rate": 1.4232646320728672e-05, "loss": 0.5172, "step": 34097 }, { "epoch": 0.7231659986002418, "grad_norm": 0.33039626479148865, "learning_rate": 1.4232344169577237e-05, "loss": 0.4535, "step": 34098 }, { "epoch": 0.7231872070581749, "grad_norm": 0.33034634590148926, "learning_rate": 1.423204201371863e-05, "loss": 0.4337, "step": 34099 }, { "epoch": 0.7232084155161078, "grad_norm": 0.3427146077156067, "learning_rate": 1.4231739853153182e-05, "loss": 0.508, "step": 34100 }, { "epoch": 0.7232296239740409, "grad_norm": 0.4272497594356537, "learning_rate": 1.4231437687881227e-05, "loss": 0.4945, "step": 34101 }, { "epoch": 0.7232508324319739, "grad_norm": 0.3885217607021332, "learning_rate": 1.4231135517903106e-05, "loss": 0.5472, "step": 34102 }, { "epoch": 0.7232720408899069, "grad_norm": 0.4764280617237091, "learning_rate": 1.423083334321915e-05, "loss": 0.465, "step": 34103 }, { "epoch": 0.7232932493478399, "grad_norm": 0.3783680200576782, "learning_rate": 1.42305311638297e-05, "loss": 0.5104, "step": 34104 }, { "epoch": 0.723314457805773, "grad_norm": 0.3368844985961914, "learning_rate": 1.423022897973509e-05, "loss": 0.4406, "step": 34105 }, { "epoch": 0.7233356662637059, "grad_norm": 0.376730740070343, "learning_rate": 1.4229926790935655e-05, "loss": 0.5627, "step": 34106 }, { "epoch": 0.723356874721639, "grad_norm": 0.3391949534416199, "learning_rate": 1.4229624597431736e-05, "loss": 0.4277, "step": 34107 }, { "epoch": 0.723378083179572, "grad_norm": 0.35566702485084534, "learning_rate": 1.4229322399223661e-05, "loss": 0.5143, "step": 34108 }, { "epoch": 0.723399291637505, "grad_norm": 0.3784796893596649, "learning_rate": 1.4229020196311771e-05, "loss": 0.5012, "step": 34109 }, { "epoch": 0.723420500095438, "grad_norm": 0.33616864681243896, "learning_rate": 1.42287179886964e-05, "loss": 0.474, "step": 34110 }, { "epoch": 0.7234417085533711, "grad_norm": 0.38571232557296753, "learning_rate": 1.422841577637789e-05, "loss": 0.4242, "step": 34111 }, { "epoch": 0.7234629170113042, "grad_norm": 0.3635321855545044, "learning_rate": 1.422811355935657e-05, "loss": 0.4396, "step": 34112 }, { "epoch": 0.7234841254692371, "grad_norm": 0.44680947065353394, "learning_rate": 1.422781133763278e-05, "loss": 0.5389, "step": 34113 }, { "epoch": 0.7235053339271702, "grad_norm": 0.34240415692329407, "learning_rate": 1.4227509111206854e-05, "loss": 0.4572, "step": 34114 }, { "epoch": 0.7235265423851032, "grad_norm": 0.3570288419723511, "learning_rate": 1.422720688007913e-05, "loss": 0.4504, "step": 34115 }, { "epoch": 0.7235477508430362, "grad_norm": 0.3280167877674103, "learning_rate": 1.4226904644249941e-05, "loss": 0.4855, "step": 34116 }, { "epoch": 0.7235689593009692, "grad_norm": 0.41515159606933594, "learning_rate": 1.4226602403719629e-05, "loss": 0.5156, "step": 34117 }, { "epoch": 0.7235901677589023, "grad_norm": 0.3117266297340393, "learning_rate": 1.4226300158488524e-05, "loss": 0.4011, "step": 34118 }, { "epoch": 0.7236113762168352, "grad_norm": 0.32199805974960327, "learning_rate": 1.4225997908556966e-05, "loss": 0.4404, "step": 34119 }, { "epoch": 0.7236325846747683, "grad_norm": 0.4124177396297455, "learning_rate": 1.4225695653925286e-05, "loss": 0.499, "step": 34120 }, { "epoch": 0.7236537931327013, "grad_norm": 0.380430668592453, "learning_rate": 1.4225393394593829e-05, "loss": 0.4355, "step": 34121 }, { "epoch": 0.7236750015906344, "grad_norm": 0.39589908719062805, "learning_rate": 1.4225091130562923e-05, "loss": 0.5316, "step": 34122 }, { "epoch": 0.7236962100485673, "grad_norm": 0.3559618592262268, "learning_rate": 1.422478886183291e-05, "loss": 0.5218, "step": 34123 }, { "epoch": 0.7237174185065004, "grad_norm": 0.38977575302124023, "learning_rate": 1.4224486588404121e-05, "loss": 0.4185, "step": 34124 }, { "epoch": 0.7237386269644334, "grad_norm": 0.4208275377750397, "learning_rate": 1.4224184310276896e-05, "loss": 0.5094, "step": 34125 }, { "epoch": 0.7237598354223664, "grad_norm": 0.3783855736255646, "learning_rate": 1.422388202745157e-05, "loss": 0.3482, "step": 34126 }, { "epoch": 0.7237810438802995, "grad_norm": 0.4302089214324951, "learning_rate": 1.4223579739928479e-05, "loss": 0.4193, "step": 34127 }, { "epoch": 0.7238022523382325, "grad_norm": 0.46104028820991516, "learning_rate": 1.422327744770796e-05, "loss": 0.452, "step": 34128 }, { "epoch": 0.7238234607961656, "grad_norm": 0.3311885893344879, "learning_rate": 1.4222975150790346e-05, "loss": 0.4937, "step": 34129 }, { "epoch": 0.7238446692540985, "grad_norm": 0.3535686135292053, "learning_rate": 1.4222672849175978e-05, "loss": 0.4693, "step": 34130 }, { "epoch": 0.7238658777120316, "grad_norm": 0.35165897011756897, "learning_rate": 1.4222370542865189e-05, "loss": 0.4843, "step": 34131 }, { "epoch": 0.7238870861699646, "grad_norm": 0.3856979012489319, "learning_rate": 1.4222068231858317e-05, "loss": 0.5374, "step": 34132 }, { "epoch": 0.7239082946278976, "grad_norm": 0.41808751225471497, "learning_rate": 1.4221765916155697e-05, "loss": 0.4474, "step": 34133 }, { "epoch": 0.7239295030858306, "grad_norm": 0.3815859258174896, "learning_rate": 1.4221463595757664e-05, "loss": 0.5733, "step": 34134 }, { "epoch": 0.7239507115437637, "grad_norm": 0.39507970213890076, "learning_rate": 1.4221161270664556e-05, "loss": 0.5223, "step": 34135 }, { "epoch": 0.7239719200016966, "grad_norm": 0.33507150411605835, "learning_rate": 1.4220858940876712e-05, "loss": 0.4895, "step": 34136 }, { "epoch": 0.7239931284596297, "grad_norm": 0.3580555319786072, "learning_rate": 1.4220556606394465e-05, "loss": 0.4751, "step": 34137 }, { "epoch": 0.7240143369175627, "grad_norm": 0.332986444234848, "learning_rate": 1.422025426721815e-05, "loss": 0.4207, "step": 34138 }, { "epoch": 0.7240355453754957, "grad_norm": 0.3538168966770172, "learning_rate": 1.4219951923348106e-05, "loss": 0.4854, "step": 34139 }, { "epoch": 0.7240567538334288, "grad_norm": 0.36688846349716187, "learning_rate": 1.4219649574784667e-05, "loss": 0.5736, "step": 34140 }, { "epoch": 0.7240779622913618, "grad_norm": 0.32636529207229614, "learning_rate": 1.4219347221528169e-05, "loss": 0.4902, "step": 34141 }, { "epoch": 0.7240991707492949, "grad_norm": 0.3620082437992096, "learning_rate": 1.4219044863578952e-05, "loss": 0.4545, "step": 34142 }, { "epoch": 0.7241203792072278, "grad_norm": 0.39318928122520447, "learning_rate": 1.4218742500937348e-05, "loss": 0.5467, "step": 34143 }, { "epoch": 0.7241415876651609, "grad_norm": 0.3455660939216614, "learning_rate": 1.42184401336037e-05, "loss": 0.5086, "step": 34144 }, { "epoch": 0.7241627961230939, "grad_norm": 0.34298840165138245, "learning_rate": 1.4218137761578334e-05, "loss": 0.5712, "step": 34145 }, { "epoch": 0.7241840045810269, "grad_norm": 0.3746447265148163, "learning_rate": 1.4217835384861594e-05, "loss": 0.5229, "step": 34146 }, { "epoch": 0.7242052130389599, "grad_norm": 0.4401850998401642, "learning_rate": 1.4217533003453813e-05, "loss": 0.4494, "step": 34147 }, { "epoch": 0.724226421496893, "grad_norm": 0.31618717312812805, "learning_rate": 1.421723061735533e-05, "loss": 0.4051, "step": 34148 }, { "epoch": 0.7242476299548259, "grad_norm": 0.34012773633003235, "learning_rate": 1.421692822656648e-05, "loss": 0.4425, "step": 34149 }, { "epoch": 0.724268838412759, "grad_norm": 0.3467774987220764, "learning_rate": 1.4216625831087598e-05, "loss": 0.4265, "step": 34150 }, { "epoch": 0.724290046870692, "grad_norm": 0.3368333578109741, "learning_rate": 1.4216323430919024e-05, "loss": 0.4562, "step": 34151 }, { "epoch": 0.7243112553286251, "grad_norm": 0.3499937355518341, "learning_rate": 1.4216021026061086e-05, "loss": 0.3983, "step": 34152 }, { "epoch": 0.7243324637865581, "grad_norm": 0.3435252010822296, "learning_rate": 1.4215718616514131e-05, "loss": 0.5596, "step": 34153 }, { "epoch": 0.7243536722444911, "grad_norm": 0.33815404772758484, "learning_rate": 1.421541620227849e-05, "loss": 0.516, "step": 34154 }, { "epoch": 0.7243748807024242, "grad_norm": 0.5831462740898132, "learning_rate": 1.4215113783354497e-05, "loss": 0.4908, "step": 34155 }, { "epoch": 0.7243960891603571, "grad_norm": 0.3863084316253662, "learning_rate": 1.4214811359742492e-05, "loss": 0.4722, "step": 34156 }, { "epoch": 0.7244172976182902, "grad_norm": 0.41582027077674866, "learning_rate": 1.421450893144281e-05, "loss": 0.4797, "step": 34157 }, { "epoch": 0.7244385060762232, "grad_norm": 0.4139367640018463, "learning_rate": 1.421420649845579e-05, "loss": 0.5359, "step": 34158 }, { "epoch": 0.7244597145341563, "grad_norm": 0.39967092871665955, "learning_rate": 1.4213904060781767e-05, "loss": 0.5758, "step": 34159 }, { "epoch": 0.7244809229920892, "grad_norm": 0.34140223264694214, "learning_rate": 1.4213601618421074e-05, "loss": 0.5312, "step": 34160 }, { "epoch": 0.7245021314500223, "grad_norm": 0.41085904836654663, "learning_rate": 1.421329917137405e-05, "loss": 0.5045, "step": 34161 }, { "epoch": 0.7245233399079553, "grad_norm": 0.3441304862499237, "learning_rate": 1.421299671964103e-05, "loss": 0.4998, "step": 34162 }, { "epoch": 0.7245445483658883, "grad_norm": 0.3487130105495453, "learning_rate": 1.4212694263222353e-05, "loss": 0.5148, "step": 34163 }, { "epoch": 0.7245657568238213, "grad_norm": 0.3687219023704529, "learning_rate": 1.4212391802118358e-05, "loss": 0.6015, "step": 34164 }, { "epoch": 0.7245869652817544, "grad_norm": 0.382879376411438, "learning_rate": 1.4212089336329374e-05, "loss": 0.4339, "step": 34165 }, { "epoch": 0.7246081737396873, "grad_norm": 0.3596951961517334, "learning_rate": 1.4211786865855739e-05, "loss": 0.4699, "step": 34166 }, { "epoch": 0.7246293821976204, "grad_norm": 0.369089275598526, "learning_rate": 1.4211484390697791e-05, "loss": 0.4374, "step": 34167 }, { "epoch": 0.7246505906555535, "grad_norm": 0.3566543161869049, "learning_rate": 1.4211181910855869e-05, "loss": 0.5285, "step": 34168 }, { "epoch": 0.7246717991134864, "grad_norm": 0.3185521066188812, "learning_rate": 1.4210879426330306e-05, "loss": 0.4713, "step": 34169 }, { "epoch": 0.7246930075714195, "grad_norm": 0.3422900140285492, "learning_rate": 1.421057693712144e-05, "loss": 0.5487, "step": 34170 }, { "epoch": 0.7247142160293525, "grad_norm": 0.36462026834487915, "learning_rate": 1.4210274443229608e-05, "loss": 0.5094, "step": 34171 }, { "epoch": 0.7247354244872856, "grad_norm": 0.5777276754379272, "learning_rate": 1.4209971944655143e-05, "loss": 0.4751, "step": 34172 }, { "epoch": 0.7247566329452185, "grad_norm": 0.3411692678928375, "learning_rate": 1.4209669441398387e-05, "loss": 0.4371, "step": 34173 }, { "epoch": 0.7247778414031516, "grad_norm": 0.40490013360977173, "learning_rate": 1.4209366933459668e-05, "loss": 0.5221, "step": 34174 }, { "epoch": 0.7247990498610846, "grad_norm": 0.36062994599342346, "learning_rate": 1.4209064420839332e-05, "loss": 0.555, "step": 34175 }, { "epoch": 0.7248202583190176, "grad_norm": 0.3606029152870178, "learning_rate": 1.420876190353771e-05, "loss": 0.5078, "step": 34176 }, { "epoch": 0.7248414667769506, "grad_norm": 0.45950624346733093, "learning_rate": 1.420845938155514e-05, "loss": 0.5143, "step": 34177 }, { "epoch": 0.7248626752348837, "grad_norm": 0.46789461374282837, "learning_rate": 1.420815685489196e-05, "loss": 0.4887, "step": 34178 }, { "epoch": 0.7248838836928166, "grad_norm": 0.39889097213745117, "learning_rate": 1.4207854323548502e-05, "loss": 0.4684, "step": 34179 }, { "epoch": 0.7249050921507497, "grad_norm": 0.4154769480228424, "learning_rate": 1.4207551787525102e-05, "loss": 0.4935, "step": 34180 }, { "epoch": 0.7249263006086828, "grad_norm": 0.32290908694267273, "learning_rate": 1.4207249246822103e-05, "loss": 0.4879, "step": 34181 }, { "epoch": 0.7249475090666158, "grad_norm": 0.4145284593105316, "learning_rate": 1.4206946701439839e-05, "loss": 0.502, "step": 34182 }, { "epoch": 0.7249687175245488, "grad_norm": 0.3340376615524292, "learning_rate": 1.4206644151378644e-05, "loss": 0.5105, "step": 34183 }, { "epoch": 0.7249899259824818, "grad_norm": 0.37813228368759155, "learning_rate": 1.4206341596638855e-05, "loss": 0.4749, "step": 34184 }, { "epoch": 0.7250111344404149, "grad_norm": 0.4221928119659424, "learning_rate": 1.420603903722081e-05, "loss": 0.5195, "step": 34185 }, { "epoch": 0.7250323428983478, "grad_norm": 0.35842952132225037, "learning_rate": 1.4205736473124844e-05, "loss": 0.5149, "step": 34186 }, { "epoch": 0.7250535513562809, "grad_norm": 0.40252071619033813, "learning_rate": 1.4205433904351296e-05, "loss": 0.5195, "step": 34187 }, { "epoch": 0.7250747598142139, "grad_norm": 0.3715756833553314, "learning_rate": 1.42051313309005e-05, "loss": 0.4442, "step": 34188 }, { "epoch": 0.725095968272147, "grad_norm": 0.3677174150943756, "learning_rate": 1.4204828752772795e-05, "loss": 0.4567, "step": 34189 }, { "epoch": 0.7251171767300799, "grad_norm": 0.35595494508743286, "learning_rate": 1.4204526169968515e-05, "loss": 0.4948, "step": 34190 }, { "epoch": 0.725138385188013, "grad_norm": 0.3701219856739044, "learning_rate": 1.4204223582488e-05, "loss": 0.5851, "step": 34191 }, { "epoch": 0.725159593645946, "grad_norm": 0.341623991727829, "learning_rate": 1.4203920990331582e-05, "loss": 0.4795, "step": 34192 }, { "epoch": 0.725180802103879, "grad_norm": 0.3766734302043915, "learning_rate": 1.4203618393499596e-05, "loss": 0.4624, "step": 34193 }, { "epoch": 0.7252020105618121, "grad_norm": 0.3458237946033478, "learning_rate": 1.4203315791992385e-05, "loss": 0.4908, "step": 34194 }, { "epoch": 0.7252232190197451, "grad_norm": 0.3833199441432953, "learning_rate": 1.4203013185810284e-05, "loss": 0.4968, "step": 34195 }, { "epoch": 0.7252444274776781, "grad_norm": 0.3630247414112091, "learning_rate": 1.420271057495363e-05, "loss": 0.4705, "step": 34196 }, { "epoch": 0.7252656359356111, "grad_norm": 0.3463222086429596, "learning_rate": 1.4202407959422753e-05, "loss": 0.4518, "step": 34197 }, { "epoch": 0.7252868443935442, "grad_norm": 0.3845936059951782, "learning_rate": 1.4202105339217997e-05, "loss": 0.5194, "step": 34198 }, { "epoch": 0.7253080528514771, "grad_norm": 0.38157349824905396, "learning_rate": 1.4201802714339694e-05, "loss": 0.5092, "step": 34199 }, { "epoch": 0.7253292613094102, "grad_norm": 0.35845255851745605, "learning_rate": 1.4201500084788182e-05, "loss": 0.5955, "step": 34200 }, { "epoch": 0.7253504697673432, "grad_norm": 0.3888978958129883, "learning_rate": 1.4201197450563802e-05, "loss": 0.5232, "step": 34201 }, { "epoch": 0.7253716782252763, "grad_norm": 0.4351007044315338, "learning_rate": 1.4200894811666884e-05, "loss": 0.4794, "step": 34202 }, { "epoch": 0.7253928866832092, "grad_norm": 0.3267412483692169, "learning_rate": 1.4200592168097768e-05, "loss": 0.4134, "step": 34203 }, { "epoch": 0.7254140951411423, "grad_norm": 0.3469926714897156, "learning_rate": 1.4200289519856792e-05, "loss": 0.5258, "step": 34204 }, { "epoch": 0.7254353035990753, "grad_norm": 0.34035906195640564, "learning_rate": 1.4199986866944286e-05, "loss": 0.5444, "step": 34205 }, { "epoch": 0.7254565120570083, "grad_norm": 0.44659507274627686, "learning_rate": 1.4199684209360593e-05, "loss": 0.5911, "step": 34206 }, { "epoch": 0.7254777205149413, "grad_norm": 0.38983431458473206, "learning_rate": 1.4199381547106051e-05, "loss": 0.4777, "step": 34207 }, { "epoch": 0.7254989289728744, "grad_norm": 0.4093554615974426, "learning_rate": 1.4199078880180988e-05, "loss": 0.5582, "step": 34208 }, { "epoch": 0.7255201374308075, "grad_norm": 0.44583627581596375, "learning_rate": 1.4198776208585749e-05, "loss": 0.4542, "step": 34209 }, { "epoch": 0.7255413458887404, "grad_norm": 0.3283502459526062, "learning_rate": 1.4198473532320669e-05, "loss": 0.3812, "step": 34210 }, { "epoch": 0.7255625543466735, "grad_norm": 0.32191240787506104, "learning_rate": 1.419817085138608e-05, "loss": 0.489, "step": 34211 }, { "epoch": 0.7255837628046065, "grad_norm": 0.43944811820983887, "learning_rate": 1.4197868165782323e-05, "loss": 0.5509, "step": 34212 }, { "epoch": 0.7256049712625395, "grad_norm": 0.35246044397354126, "learning_rate": 1.4197565475509733e-05, "loss": 0.5536, "step": 34213 }, { "epoch": 0.7256261797204725, "grad_norm": 0.3370286524295807, "learning_rate": 1.4197262780568645e-05, "loss": 0.4348, "step": 34214 }, { "epoch": 0.7256473881784056, "grad_norm": 0.37199273705482483, "learning_rate": 1.4196960080959403e-05, "loss": 0.4856, "step": 34215 }, { "epoch": 0.7256685966363385, "grad_norm": 0.3566233217716217, "learning_rate": 1.4196657376682335e-05, "loss": 0.4902, "step": 34216 }, { "epoch": 0.7256898050942716, "grad_norm": 0.353380411863327, "learning_rate": 1.4196354667737784e-05, "loss": 0.4833, "step": 34217 }, { "epoch": 0.7257110135522046, "grad_norm": 0.39826640486717224, "learning_rate": 1.419605195412608e-05, "loss": 0.5497, "step": 34218 }, { "epoch": 0.7257322220101377, "grad_norm": 0.34021618962287903, "learning_rate": 1.4195749235847567e-05, "loss": 0.4086, "step": 34219 }, { "epoch": 0.7257534304680706, "grad_norm": 0.46749216318130493, "learning_rate": 1.4195446512902575e-05, "loss": 0.5093, "step": 34220 }, { "epoch": 0.7257746389260037, "grad_norm": 0.3559313714504242, "learning_rate": 1.4195143785291445e-05, "loss": 0.4653, "step": 34221 }, { "epoch": 0.7257958473839368, "grad_norm": 0.35156697034835815, "learning_rate": 1.4194841053014514e-05, "loss": 0.4886, "step": 34222 }, { "epoch": 0.7258170558418697, "grad_norm": 0.35869643092155457, "learning_rate": 1.4194538316072118e-05, "loss": 0.5109, "step": 34223 }, { "epoch": 0.7258382642998028, "grad_norm": 0.34036552906036377, "learning_rate": 1.419423557446459e-05, "loss": 0.4604, "step": 34224 }, { "epoch": 0.7258594727577358, "grad_norm": 0.3852632939815521, "learning_rate": 1.4193932828192267e-05, "loss": 0.494, "step": 34225 }, { "epoch": 0.7258806812156688, "grad_norm": 0.3508843779563904, "learning_rate": 1.4193630077255493e-05, "loss": 0.4603, "step": 34226 }, { "epoch": 0.7259018896736018, "grad_norm": 0.38827094435691833, "learning_rate": 1.4193327321654603e-05, "loss": 0.5856, "step": 34227 }, { "epoch": 0.7259230981315349, "grad_norm": 0.39823517203330994, "learning_rate": 1.4193024561389924e-05, "loss": 0.517, "step": 34228 }, { "epoch": 0.7259443065894678, "grad_norm": 0.3238708972930908, "learning_rate": 1.4192721796461804e-05, "loss": 0.4328, "step": 34229 }, { "epoch": 0.7259655150474009, "grad_norm": 0.3650343120098114, "learning_rate": 1.4192419026870572e-05, "loss": 0.506, "step": 34230 }, { "epoch": 0.7259867235053339, "grad_norm": 0.3810986876487732, "learning_rate": 1.4192116252616568e-05, "loss": 0.428, "step": 34231 }, { "epoch": 0.726007931963267, "grad_norm": 0.39431342482566833, "learning_rate": 1.419181347370013e-05, "loss": 0.5168, "step": 34232 }, { "epoch": 0.7260291404211999, "grad_norm": 0.3913861811161041, "learning_rate": 1.4191510690121595e-05, "loss": 0.494, "step": 34233 }, { "epoch": 0.726050348879133, "grad_norm": 0.5393601655960083, "learning_rate": 1.4191207901881297e-05, "loss": 0.4705, "step": 34234 }, { "epoch": 0.7260715573370661, "grad_norm": 0.3314327597618103, "learning_rate": 1.4190905108979576e-05, "loss": 0.4667, "step": 34235 }, { "epoch": 0.726092765794999, "grad_norm": 0.3798283338546753, "learning_rate": 1.4190602311416764e-05, "loss": 0.5535, "step": 34236 }, { "epoch": 0.7261139742529321, "grad_norm": 0.3463650047779083, "learning_rate": 1.4190299509193202e-05, "loss": 0.4495, "step": 34237 }, { "epoch": 0.7261351827108651, "grad_norm": 0.3385571837425232, "learning_rate": 1.4189996702309224e-05, "loss": 0.447, "step": 34238 }, { "epoch": 0.7261563911687982, "grad_norm": 0.31532207131385803, "learning_rate": 1.4189693890765167e-05, "loss": 0.4285, "step": 34239 }, { "epoch": 0.7261775996267311, "grad_norm": 0.3313130736351013, "learning_rate": 1.418939107456137e-05, "loss": 0.4912, "step": 34240 }, { "epoch": 0.7261988080846642, "grad_norm": 0.32177600264549255, "learning_rate": 1.4189088253698172e-05, "loss": 0.46, "step": 34241 }, { "epoch": 0.7262200165425972, "grad_norm": 0.35922378301620483, "learning_rate": 1.41887854281759e-05, "loss": 0.4404, "step": 34242 }, { "epoch": 0.7262412250005302, "grad_norm": 0.37139803171157837, "learning_rate": 1.4188482597994902e-05, "loss": 0.4633, "step": 34243 }, { "epoch": 0.7262624334584632, "grad_norm": 0.34678536653518677, "learning_rate": 1.4188179763155508e-05, "loss": 0.5118, "step": 34244 }, { "epoch": 0.7262836419163963, "grad_norm": 0.3414613902568817, "learning_rate": 1.4187876923658058e-05, "loss": 0.5132, "step": 34245 }, { "epoch": 0.7263048503743292, "grad_norm": 0.39395833015441895, "learning_rate": 1.4187574079502887e-05, "loss": 0.5356, "step": 34246 }, { "epoch": 0.7263260588322623, "grad_norm": 0.46559256315231323, "learning_rate": 1.4187271230690334e-05, "loss": 0.5099, "step": 34247 }, { "epoch": 0.7263472672901953, "grad_norm": 0.3463895916938782, "learning_rate": 1.4186968377220732e-05, "loss": 0.4583, "step": 34248 }, { "epoch": 0.7263684757481284, "grad_norm": 0.3735153079032898, "learning_rate": 1.4186665519094423e-05, "loss": 0.5213, "step": 34249 }, { "epoch": 0.7263896842060614, "grad_norm": 0.3641592562198639, "learning_rate": 1.4186362656311736e-05, "loss": 0.4338, "step": 34250 }, { "epoch": 0.7264108926639944, "grad_norm": 0.3890625834465027, "learning_rate": 1.4186059788873016e-05, "loss": 0.5453, "step": 34251 }, { "epoch": 0.7264321011219275, "grad_norm": 0.37063607573509216, "learning_rate": 1.4185756916778598e-05, "loss": 0.5091, "step": 34252 }, { "epoch": 0.7264533095798604, "grad_norm": 0.35641467571258545, "learning_rate": 1.4185454040028814e-05, "loss": 0.4756, "step": 34253 }, { "epoch": 0.7264745180377935, "grad_norm": 0.5003340840339661, "learning_rate": 1.418515115862401e-05, "loss": 0.5356, "step": 34254 }, { "epoch": 0.7264957264957265, "grad_norm": 0.911945104598999, "learning_rate": 1.4184848272564513e-05, "loss": 0.5564, "step": 34255 }, { "epoch": 0.7265169349536595, "grad_norm": 0.3313599228858948, "learning_rate": 1.4184545381850665e-05, "loss": 0.4957, "step": 34256 }, { "epoch": 0.7265381434115925, "grad_norm": 0.8997819423675537, "learning_rate": 1.41842424864828e-05, "loss": 0.4929, "step": 34257 }, { "epoch": 0.7265593518695256, "grad_norm": 0.4330955743789673, "learning_rate": 1.4183939586461259e-05, "loss": 0.5087, "step": 34258 }, { "epoch": 0.7265805603274585, "grad_norm": 0.3586328625679016, "learning_rate": 1.4183636681786376e-05, "loss": 0.4854, "step": 34259 }, { "epoch": 0.7266017687853916, "grad_norm": 0.34534233808517456, "learning_rate": 1.418333377245849e-05, "loss": 0.5346, "step": 34260 }, { "epoch": 0.7266229772433246, "grad_norm": 0.35752981901168823, "learning_rate": 1.4183030858477938e-05, "loss": 0.4813, "step": 34261 }, { "epoch": 0.7266441857012577, "grad_norm": 0.34419748187065125, "learning_rate": 1.418272793984505e-05, "loss": 0.4561, "step": 34262 }, { "epoch": 0.7266653941591907, "grad_norm": 0.6464256048202515, "learning_rate": 1.418242501656017e-05, "loss": 0.453, "step": 34263 }, { "epoch": 0.7266866026171237, "grad_norm": 0.3828044533729553, "learning_rate": 1.4182122088623635e-05, "loss": 0.456, "step": 34264 }, { "epoch": 0.7267078110750568, "grad_norm": 0.4046173691749573, "learning_rate": 1.418181915603578e-05, "loss": 0.4658, "step": 34265 }, { "epoch": 0.7267290195329897, "grad_norm": 0.362753301858902, "learning_rate": 1.4181516218796942e-05, "loss": 0.4944, "step": 34266 }, { "epoch": 0.7267502279909228, "grad_norm": 0.36127007007598877, "learning_rate": 1.4181213276907457e-05, "loss": 0.4388, "step": 34267 }, { "epoch": 0.7267714364488558, "grad_norm": 0.3708306849002838, "learning_rate": 1.4180910330367665e-05, "loss": 0.4663, "step": 34268 }, { "epoch": 0.7267926449067889, "grad_norm": 0.3468780517578125, "learning_rate": 1.4180607379177902e-05, "loss": 0.4933, "step": 34269 }, { "epoch": 0.7268138533647218, "grad_norm": 0.38300561904907227, "learning_rate": 1.4180304423338498e-05, "loss": 0.492, "step": 34270 }, { "epoch": 0.7268350618226549, "grad_norm": 0.9011298418045044, "learning_rate": 1.41800014628498e-05, "loss": 0.5449, "step": 34271 }, { "epoch": 0.7268562702805879, "grad_norm": 0.34137964248657227, "learning_rate": 1.417969849771214e-05, "loss": 0.4478, "step": 34272 }, { "epoch": 0.7268774787385209, "grad_norm": 0.3412584662437439, "learning_rate": 1.4179395527925854e-05, "loss": 0.5009, "step": 34273 }, { "epoch": 0.7268986871964539, "grad_norm": 0.33268308639526367, "learning_rate": 1.4179092553491285e-05, "loss": 0.4626, "step": 34274 }, { "epoch": 0.726919895654387, "grad_norm": 0.388366162776947, "learning_rate": 1.4178789574408763e-05, "loss": 0.4385, "step": 34275 }, { "epoch": 0.72694110411232, "grad_norm": 0.3529418408870697, "learning_rate": 1.4178486590678624e-05, "loss": 0.517, "step": 34276 }, { "epoch": 0.726962312570253, "grad_norm": 0.3246611952781677, "learning_rate": 1.4178183602301211e-05, "loss": 0.4612, "step": 34277 }, { "epoch": 0.7269835210281861, "grad_norm": 0.3347035348415375, "learning_rate": 1.4177880609276863e-05, "loss": 0.4478, "step": 34278 }, { "epoch": 0.727004729486119, "grad_norm": 0.35118746757507324, "learning_rate": 1.4177577611605906e-05, "loss": 0.5336, "step": 34279 }, { "epoch": 0.7270259379440521, "grad_norm": 0.36040788888931274, "learning_rate": 1.4177274609288688e-05, "loss": 0.4592, "step": 34280 }, { "epoch": 0.7270471464019851, "grad_norm": 0.3268410265445709, "learning_rate": 1.4176971602325538e-05, "loss": 0.4892, "step": 34281 }, { "epoch": 0.7270683548599182, "grad_norm": 0.3496328890323639, "learning_rate": 1.4176668590716795e-05, "loss": 0.403, "step": 34282 }, { "epoch": 0.7270895633178511, "grad_norm": 0.36969077587127686, "learning_rate": 1.4176365574462803e-05, "loss": 0.4616, "step": 34283 }, { "epoch": 0.7271107717757842, "grad_norm": 0.3750416934490204, "learning_rate": 1.4176062553563889e-05, "loss": 0.5925, "step": 34284 }, { "epoch": 0.7271319802337172, "grad_norm": 0.33430245518684387, "learning_rate": 1.4175759528020399e-05, "loss": 0.4623, "step": 34285 }, { "epoch": 0.7271531886916502, "grad_norm": 0.35892701148986816, "learning_rate": 1.4175456497832662e-05, "loss": 0.4911, "step": 34286 }, { "epoch": 0.7271743971495832, "grad_norm": 0.41206881403923035, "learning_rate": 1.4175153463001019e-05, "loss": 0.5009, "step": 34287 }, { "epoch": 0.7271956056075163, "grad_norm": 0.3984629809856415, "learning_rate": 1.4174850423525808e-05, "loss": 0.5236, "step": 34288 }, { "epoch": 0.7272168140654492, "grad_norm": 0.33364880084991455, "learning_rate": 1.4174547379407365e-05, "loss": 0.4979, "step": 34289 }, { "epoch": 0.7272380225233823, "grad_norm": 0.4428841471672058, "learning_rate": 1.4174244330646024e-05, "loss": 0.481, "step": 34290 }, { "epoch": 0.7272592309813154, "grad_norm": 0.3507373631000519, "learning_rate": 1.4173941277242124e-05, "loss": 0.3999, "step": 34291 }, { "epoch": 0.7272804394392484, "grad_norm": 0.33623185753822327, "learning_rate": 1.417363821919601e-05, "loss": 0.4918, "step": 34292 }, { "epoch": 0.7273016478971814, "grad_norm": 0.36410412192344666, "learning_rate": 1.4173335156508003e-05, "loss": 0.4672, "step": 34293 }, { "epoch": 0.7273228563551144, "grad_norm": 0.3893430829048157, "learning_rate": 1.4173032089178453e-05, "loss": 0.4629, "step": 34294 }, { "epoch": 0.7273440648130475, "grad_norm": 0.34509462118148804, "learning_rate": 1.4172729017207693e-05, "loss": 0.5221, "step": 34295 }, { "epoch": 0.7273652732709804, "grad_norm": 0.4208521842956543, "learning_rate": 1.417242594059606e-05, "loss": 0.4369, "step": 34296 }, { "epoch": 0.7273864817289135, "grad_norm": 0.36967745423316956, "learning_rate": 1.4172122859343891e-05, "loss": 0.4809, "step": 34297 }, { "epoch": 0.7274076901868465, "grad_norm": 0.39976274967193604, "learning_rate": 1.4171819773451521e-05, "loss": 0.4925, "step": 34298 }, { "epoch": 0.7274288986447796, "grad_norm": 0.41754505038261414, "learning_rate": 1.4171516682919292e-05, "loss": 0.5266, "step": 34299 }, { "epoch": 0.7274501071027125, "grad_norm": 0.37157291173934937, "learning_rate": 1.4171213587747539e-05, "loss": 0.49, "step": 34300 }, { "epoch": 0.7274713155606456, "grad_norm": 0.5117781162261963, "learning_rate": 1.4170910487936596e-05, "loss": 0.4804, "step": 34301 }, { "epoch": 0.7274925240185786, "grad_norm": 0.6158490777015686, "learning_rate": 1.4170607383486803e-05, "loss": 0.4818, "step": 34302 }, { "epoch": 0.7275137324765116, "grad_norm": 0.39484813809394836, "learning_rate": 1.4170304274398498e-05, "loss": 0.5281, "step": 34303 }, { "epoch": 0.7275349409344447, "grad_norm": 0.3146176040172577, "learning_rate": 1.4170001160672015e-05, "loss": 0.3648, "step": 34304 }, { "epoch": 0.7275561493923777, "grad_norm": 0.3557356894016266, "learning_rate": 1.4169698042307695e-05, "loss": 0.4939, "step": 34305 }, { "epoch": 0.7275773578503107, "grad_norm": 0.36158812046051025, "learning_rate": 1.4169394919305875e-05, "loss": 0.4995, "step": 34306 }, { "epoch": 0.7275985663082437, "grad_norm": 0.34809935092926025, "learning_rate": 1.4169091791666884e-05, "loss": 0.5155, "step": 34307 }, { "epoch": 0.7276197747661768, "grad_norm": 0.3563297390937805, "learning_rate": 1.4168788659391068e-05, "loss": 0.4602, "step": 34308 }, { "epoch": 0.7276409832241097, "grad_norm": 0.41003599762916565, "learning_rate": 1.4168485522478764e-05, "loss": 0.4977, "step": 34309 }, { "epoch": 0.7276621916820428, "grad_norm": 0.3614754378795624, "learning_rate": 1.4168182380930305e-05, "loss": 0.5236, "step": 34310 }, { "epoch": 0.7276834001399758, "grad_norm": 0.37871620059013367, "learning_rate": 1.416787923474603e-05, "loss": 0.4684, "step": 34311 }, { "epoch": 0.7277046085979089, "grad_norm": 0.3540631830692291, "learning_rate": 1.4167576083926273e-05, "loss": 0.4373, "step": 34312 }, { "epoch": 0.7277258170558418, "grad_norm": 0.35385429859161377, "learning_rate": 1.4167272928471378e-05, "loss": 0.5146, "step": 34313 }, { "epoch": 0.7277470255137749, "grad_norm": 0.33129942417144775, "learning_rate": 1.4166969768381678e-05, "loss": 0.4816, "step": 34314 }, { "epoch": 0.7277682339717079, "grad_norm": 0.3375386893749237, "learning_rate": 1.4166666603657508e-05, "loss": 0.4528, "step": 34315 }, { "epoch": 0.7277894424296409, "grad_norm": 0.3608688712120056, "learning_rate": 1.4166363434299209e-05, "loss": 0.4915, "step": 34316 }, { "epoch": 0.727810650887574, "grad_norm": 0.38844355940818787, "learning_rate": 1.4166060260307118e-05, "loss": 0.4953, "step": 34317 }, { "epoch": 0.727831859345507, "grad_norm": 0.39593181014060974, "learning_rate": 1.4165757081681569e-05, "loss": 0.4785, "step": 34318 }, { "epoch": 0.7278530678034401, "grad_norm": 0.3680156469345093, "learning_rate": 1.41654538984229e-05, "loss": 0.5319, "step": 34319 }, { "epoch": 0.727874276261373, "grad_norm": 0.3244469165802002, "learning_rate": 1.4165150710531453e-05, "loss": 0.5166, "step": 34320 }, { "epoch": 0.7278954847193061, "grad_norm": 0.3600076735019684, "learning_rate": 1.4164847518007556e-05, "loss": 0.5196, "step": 34321 }, { "epoch": 0.7279166931772391, "grad_norm": 0.3683546781539917, "learning_rate": 1.4164544320851557e-05, "loss": 0.4624, "step": 34322 }, { "epoch": 0.7279379016351721, "grad_norm": 0.3508504331111908, "learning_rate": 1.4164241119063789e-05, "loss": 0.4269, "step": 34323 }, { "epoch": 0.7279591100931051, "grad_norm": 0.33652645349502563, "learning_rate": 1.4163937912644583e-05, "loss": 0.4429, "step": 34324 }, { "epoch": 0.7279803185510382, "grad_norm": 0.4051559567451477, "learning_rate": 1.4163634701594284e-05, "loss": 0.4746, "step": 34325 }, { "epoch": 0.7280015270089711, "grad_norm": 0.3364330232143402, "learning_rate": 1.4163331485913227e-05, "loss": 0.4473, "step": 34326 }, { "epoch": 0.7280227354669042, "grad_norm": 0.3251619040966034, "learning_rate": 1.4163028265601748e-05, "loss": 0.3554, "step": 34327 }, { "epoch": 0.7280439439248372, "grad_norm": 0.3281957507133484, "learning_rate": 1.4162725040660186e-05, "loss": 0.4341, "step": 34328 }, { "epoch": 0.7280651523827703, "grad_norm": 0.36612504720687866, "learning_rate": 1.4162421811088878e-05, "loss": 0.4367, "step": 34329 }, { "epoch": 0.7280863608407032, "grad_norm": 0.3696710169315338, "learning_rate": 1.4162118576888159e-05, "loss": 0.4433, "step": 34330 }, { "epoch": 0.7281075692986363, "grad_norm": 0.334246426820755, "learning_rate": 1.4161815338058369e-05, "loss": 0.4486, "step": 34331 }, { "epoch": 0.7281287777565694, "grad_norm": 0.3542560040950775, "learning_rate": 1.4161512094599842e-05, "loss": 0.497, "step": 34332 }, { "epoch": 0.7281499862145023, "grad_norm": 0.36193832755088806, "learning_rate": 1.4161208846512921e-05, "loss": 0.5084, "step": 34333 }, { "epoch": 0.7281711946724354, "grad_norm": 0.4241581857204437, "learning_rate": 1.4160905593797939e-05, "loss": 0.5239, "step": 34334 }, { "epoch": 0.7281924031303684, "grad_norm": 0.33471599221229553, "learning_rate": 1.4160602336455229e-05, "loss": 0.4792, "step": 34335 }, { "epoch": 0.7282136115883014, "grad_norm": 0.39006558060646057, "learning_rate": 1.416029907448514e-05, "loss": 0.4863, "step": 34336 }, { "epoch": 0.7282348200462344, "grad_norm": 0.43630415201187134, "learning_rate": 1.4159995807888e-05, "loss": 0.5502, "step": 34337 }, { "epoch": 0.7282560285041675, "grad_norm": 0.3365549147129059, "learning_rate": 1.4159692536664147e-05, "loss": 0.5384, "step": 34338 }, { "epoch": 0.7282772369621004, "grad_norm": 0.34901368618011475, "learning_rate": 1.4159389260813923e-05, "loss": 0.4676, "step": 34339 }, { "epoch": 0.7282984454200335, "grad_norm": 0.36448216438293457, "learning_rate": 1.415908598033766e-05, "loss": 0.4505, "step": 34340 }, { "epoch": 0.7283196538779665, "grad_norm": 0.3771301209926605, "learning_rate": 1.4158782695235695e-05, "loss": 0.5022, "step": 34341 }, { "epoch": 0.7283408623358996, "grad_norm": 0.3440416753292084, "learning_rate": 1.4158479405508375e-05, "loss": 0.5017, "step": 34342 }, { "epoch": 0.7283620707938325, "grad_norm": 0.47467029094696045, "learning_rate": 1.4158176111156027e-05, "loss": 0.6143, "step": 34343 }, { "epoch": 0.7283832792517656, "grad_norm": 0.3260943591594696, "learning_rate": 1.415787281217899e-05, "loss": 0.4433, "step": 34344 }, { "epoch": 0.7284044877096987, "grad_norm": 0.37944281101226807, "learning_rate": 1.4157569508577606e-05, "loss": 0.6033, "step": 34345 }, { "epoch": 0.7284256961676316, "grad_norm": 0.3751361072063446, "learning_rate": 1.4157266200352206e-05, "loss": 0.4581, "step": 34346 }, { "epoch": 0.7284469046255647, "grad_norm": 0.31278717517852783, "learning_rate": 1.4156962887503132e-05, "loss": 0.4979, "step": 34347 }, { "epoch": 0.7284681130834977, "grad_norm": 0.36712655425071716, "learning_rate": 1.4156659570030721e-05, "loss": 0.4775, "step": 34348 }, { "epoch": 0.7284893215414308, "grad_norm": 0.4016838073730469, "learning_rate": 1.4156356247935307e-05, "loss": 0.473, "step": 34349 }, { "epoch": 0.7285105299993637, "grad_norm": 0.3892732560634613, "learning_rate": 1.4156052921217233e-05, "loss": 0.4805, "step": 34350 }, { "epoch": 0.7285317384572968, "grad_norm": 0.4022362530231476, "learning_rate": 1.4155749589876832e-05, "loss": 0.5235, "step": 34351 }, { "epoch": 0.7285529469152298, "grad_norm": 0.35923057794570923, "learning_rate": 1.4155446253914442e-05, "loss": 0.4878, "step": 34352 }, { "epoch": 0.7285741553731628, "grad_norm": 0.45254436135292053, "learning_rate": 1.41551429133304e-05, "loss": 0.4897, "step": 34353 }, { "epoch": 0.7285953638310958, "grad_norm": 0.3820341229438782, "learning_rate": 1.4154839568125046e-05, "loss": 0.5195, "step": 34354 }, { "epoch": 0.7286165722890289, "grad_norm": 0.34482160210609436, "learning_rate": 1.4154536218298713e-05, "loss": 0.4573, "step": 34355 }, { "epoch": 0.7286377807469618, "grad_norm": 0.37155893445014954, "learning_rate": 1.4154232863851742e-05, "loss": 0.4813, "step": 34356 }, { "epoch": 0.7286589892048949, "grad_norm": 0.37086766958236694, "learning_rate": 1.4153929504784472e-05, "loss": 0.4742, "step": 34357 }, { "epoch": 0.728680197662828, "grad_norm": 0.3852025866508484, "learning_rate": 1.4153626141097234e-05, "loss": 0.492, "step": 34358 }, { "epoch": 0.728701406120761, "grad_norm": 0.4155476689338684, "learning_rate": 1.415332277279037e-05, "loss": 0.5346, "step": 34359 }, { "epoch": 0.728722614578694, "grad_norm": 4.174079895019531, "learning_rate": 1.4153019399864215e-05, "loss": 0.5534, "step": 34360 }, { "epoch": 0.728743823036627, "grad_norm": 0.35320988297462463, "learning_rate": 1.4152716022319111e-05, "loss": 0.4597, "step": 34361 }, { "epoch": 0.7287650314945601, "grad_norm": 0.45924344658851624, "learning_rate": 1.415241264015539e-05, "loss": 0.5158, "step": 34362 }, { "epoch": 0.728786239952493, "grad_norm": 0.395937442779541, "learning_rate": 1.4152109253373393e-05, "loss": 0.5231, "step": 34363 }, { "epoch": 0.7288074484104261, "grad_norm": 0.35459795594215393, "learning_rate": 1.4151805861973457e-05, "loss": 0.4941, "step": 34364 }, { "epoch": 0.7288286568683591, "grad_norm": 0.4422512948513031, "learning_rate": 1.4151502465955918e-05, "loss": 0.4746, "step": 34365 }, { "epoch": 0.7288498653262921, "grad_norm": 0.37271231412887573, "learning_rate": 1.415119906532111e-05, "loss": 0.4999, "step": 34366 }, { "epoch": 0.7288710737842251, "grad_norm": 0.37206342816352844, "learning_rate": 1.4150895660069379e-05, "loss": 0.527, "step": 34367 }, { "epoch": 0.7288922822421582, "grad_norm": 0.42261722683906555, "learning_rate": 1.4150592250201059e-05, "loss": 0.515, "step": 34368 }, { "epoch": 0.7289134907000911, "grad_norm": 0.37904930114746094, "learning_rate": 1.4150288835716483e-05, "loss": 0.5691, "step": 34369 }, { "epoch": 0.7289346991580242, "grad_norm": 0.38727131485939026, "learning_rate": 1.4149985416615995e-05, "loss": 0.5394, "step": 34370 }, { "epoch": 0.7289559076159573, "grad_norm": 0.43101054430007935, "learning_rate": 1.4149681992899926e-05, "loss": 0.5524, "step": 34371 }, { "epoch": 0.7289771160738903, "grad_norm": 0.3440755605697632, "learning_rate": 1.4149378564568617e-05, "loss": 0.4695, "step": 34372 }, { "epoch": 0.7289983245318233, "grad_norm": 0.3944222331047058, "learning_rate": 1.4149075131622408e-05, "loss": 0.6063, "step": 34373 }, { "epoch": 0.7290195329897563, "grad_norm": 0.3729591369628906, "learning_rate": 1.4148771694061631e-05, "loss": 0.4958, "step": 34374 }, { "epoch": 0.7290407414476894, "grad_norm": 0.6378604769706726, "learning_rate": 1.4148468251886627e-05, "loss": 0.4571, "step": 34375 }, { "epoch": 0.7290619499056223, "grad_norm": 0.32979634404182434, "learning_rate": 1.4148164805097735e-05, "loss": 0.4681, "step": 34376 }, { "epoch": 0.7290831583635554, "grad_norm": 0.3228085935115814, "learning_rate": 1.4147861353695284e-05, "loss": 0.4994, "step": 34377 }, { "epoch": 0.7291043668214884, "grad_norm": 0.40429770946502686, "learning_rate": 1.4147557897679624e-05, "loss": 0.4781, "step": 34378 }, { "epoch": 0.7291255752794215, "grad_norm": 0.39841434359550476, "learning_rate": 1.4147254437051083e-05, "loss": 0.5917, "step": 34379 }, { "epoch": 0.7291467837373544, "grad_norm": 0.36515259742736816, "learning_rate": 1.4146950971810001e-05, "loss": 0.4858, "step": 34380 }, { "epoch": 0.7291679921952875, "grad_norm": 0.33119839429855347, "learning_rate": 1.4146647501956718e-05, "loss": 0.5315, "step": 34381 }, { "epoch": 0.7291892006532205, "grad_norm": 0.37456026673316956, "learning_rate": 1.414634402749157e-05, "loss": 0.495, "step": 34382 }, { "epoch": 0.7292104091111535, "grad_norm": 0.44116055965423584, "learning_rate": 1.4146040548414893e-05, "loss": 0.512, "step": 34383 }, { "epoch": 0.7292316175690865, "grad_norm": 0.37214380502700806, "learning_rate": 1.4145737064727028e-05, "loss": 0.4261, "step": 34384 }, { "epoch": 0.7292528260270196, "grad_norm": 0.4270153045654297, "learning_rate": 1.4145433576428306e-05, "loss": 0.5466, "step": 34385 }, { "epoch": 0.7292740344849526, "grad_norm": 0.3386342227458954, "learning_rate": 1.414513008351907e-05, "loss": 0.4634, "step": 34386 }, { "epoch": 0.7292952429428856, "grad_norm": 0.3757152259349823, "learning_rate": 1.4144826585999658e-05, "loss": 0.5164, "step": 34387 }, { "epoch": 0.7293164514008187, "grad_norm": 0.34958797693252563, "learning_rate": 1.4144523083870407e-05, "loss": 0.4534, "step": 34388 }, { "epoch": 0.7293376598587517, "grad_norm": 0.34285807609558105, "learning_rate": 1.4144219577131651e-05, "loss": 0.5028, "step": 34389 }, { "epoch": 0.7293588683166847, "grad_norm": 0.3564666509628296, "learning_rate": 1.4143916065783732e-05, "loss": 0.5143, "step": 34390 }, { "epoch": 0.7293800767746177, "grad_norm": 0.36049941182136536, "learning_rate": 1.4143612549826981e-05, "loss": 0.5233, "step": 34391 }, { "epoch": 0.7294012852325508, "grad_norm": 0.3525122404098511, "learning_rate": 1.4143309029261744e-05, "loss": 0.5036, "step": 34392 }, { "epoch": 0.7294224936904837, "grad_norm": 0.3431747257709503, "learning_rate": 1.4143005504088354e-05, "loss": 0.5305, "step": 34393 }, { "epoch": 0.7294437021484168, "grad_norm": 0.3771885931491852, "learning_rate": 1.414270197430715e-05, "loss": 0.5002, "step": 34394 }, { "epoch": 0.7294649106063498, "grad_norm": 0.36433273553848267, "learning_rate": 1.414239843991847e-05, "loss": 0.4706, "step": 34395 }, { "epoch": 0.7294861190642828, "grad_norm": 0.3178885877132416, "learning_rate": 1.4142094900922647e-05, "loss": 0.4683, "step": 34396 }, { "epoch": 0.7295073275222158, "grad_norm": 0.4252718389034271, "learning_rate": 1.4141791357320025e-05, "loss": 0.4249, "step": 34397 }, { "epoch": 0.7295285359801489, "grad_norm": 0.38581496477127075, "learning_rate": 1.4141487809110936e-05, "loss": 0.4437, "step": 34398 }, { "epoch": 0.729549744438082, "grad_norm": 0.41942352056503296, "learning_rate": 1.4141184256295721e-05, "loss": 0.5305, "step": 34399 }, { "epoch": 0.7295709528960149, "grad_norm": 0.3480715751647949, "learning_rate": 1.4140880698874717e-05, "loss": 0.5361, "step": 34400 }, { "epoch": 0.729592161353948, "grad_norm": 0.3467384874820709, "learning_rate": 1.4140577136848264e-05, "loss": 0.4626, "step": 34401 }, { "epoch": 0.729613369811881, "grad_norm": 0.33039191365242004, "learning_rate": 1.4140273570216695e-05, "loss": 0.5365, "step": 34402 }, { "epoch": 0.729634578269814, "grad_norm": 0.3779318034648895, "learning_rate": 1.4139969998980347e-05, "loss": 0.4557, "step": 34403 }, { "epoch": 0.729655786727747, "grad_norm": 0.3630748391151428, "learning_rate": 1.4139666423139562e-05, "loss": 0.6047, "step": 34404 }, { "epoch": 0.7296769951856801, "grad_norm": 0.36705777049064636, "learning_rate": 1.413936284269468e-05, "loss": 0.5293, "step": 34405 }, { "epoch": 0.729698203643613, "grad_norm": 0.35355138778686523, "learning_rate": 1.4139059257646031e-05, "loss": 0.5046, "step": 34406 }, { "epoch": 0.7297194121015461, "grad_norm": 0.3784805238246918, "learning_rate": 1.4138755667993954e-05, "loss": 0.5109, "step": 34407 }, { "epoch": 0.7297406205594791, "grad_norm": 0.5157181620597839, "learning_rate": 1.4138452073738793e-05, "loss": 0.4841, "step": 34408 }, { "epoch": 0.7297618290174122, "grad_norm": 0.3839479386806488, "learning_rate": 1.4138148474880881e-05, "loss": 0.5234, "step": 34409 }, { "epoch": 0.7297830374753451, "grad_norm": 0.3331602215766907, "learning_rate": 1.4137844871420556e-05, "loss": 0.5134, "step": 34410 }, { "epoch": 0.7298042459332782, "grad_norm": 0.3590942621231079, "learning_rate": 1.4137541263358154e-05, "loss": 0.4763, "step": 34411 }, { "epoch": 0.7298254543912113, "grad_norm": 0.34858623147010803, "learning_rate": 1.4137237650694016e-05, "loss": 0.5034, "step": 34412 }, { "epoch": 0.7298466628491442, "grad_norm": 0.36799320578575134, "learning_rate": 1.413693403342848e-05, "loss": 0.4723, "step": 34413 }, { "epoch": 0.7298678713070773, "grad_norm": 0.36233848333358765, "learning_rate": 1.4136630411561877e-05, "loss": 0.4329, "step": 34414 }, { "epoch": 0.7298890797650103, "grad_norm": 0.44409477710723877, "learning_rate": 1.4136326785094554e-05, "loss": 0.5054, "step": 34415 }, { "epoch": 0.7299102882229433, "grad_norm": 0.36222758889198303, "learning_rate": 1.4136023154026846e-05, "loss": 0.4773, "step": 34416 }, { "epoch": 0.7299314966808763, "grad_norm": 0.36419254541397095, "learning_rate": 1.4135719518359081e-05, "loss": 0.4898, "step": 34417 }, { "epoch": 0.7299527051388094, "grad_norm": 0.42753392457962036, "learning_rate": 1.4135415878091609e-05, "loss": 0.5391, "step": 34418 }, { "epoch": 0.7299739135967424, "grad_norm": 0.3851882517337799, "learning_rate": 1.4135112233224768e-05, "loss": 0.5886, "step": 34419 }, { "epoch": 0.7299951220546754, "grad_norm": 0.32742512226104736, "learning_rate": 1.4134808583758886e-05, "loss": 0.4648, "step": 34420 }, { "epoch": 0.7300163305126084, "grad_norm": 0.3441561460494995, "learning_rate": 1.4134504929694306e-05, "loss": 0.4824, "step": 34421 }, { "epoch": 0.7300375389705415, "grad_norm": 0.33926787972450256, "learning_rate": 1.4134201271031365e-05, "loss": 0.515, "step": 34422 }, { "epoch": 0.7300587474284744, "grad_norm": 0.41128936409950256, "learning_rate": 1.4133897607770404e-05, "loss": 0.5426, "step": 34423 }, { "epoch": 0.7300799558864075, "grad_norm": 0.35510143637657166, "learning_rate": 1.4133593939911755e-05, "loss": 0.4773, "step": 34424 }, { "epoch": 0.7301011643443405, "grad_norm": 0.4036628007888794, "learning_rate": 1.413329026745576e-05, "loss": 0.5339, "step": 34425 }, { "epoch": 0.7301223728022735, "grad_norm": 0.42867302894592285, "learning_rate": 1.4132986590402757e-05, "loss": 0.5121, "step": 34426 }, { "epoch": 0.7301435812602066, "grad_norm": 0.35332000255584717, "learning_rate": 1.4132682908753084e-05, "loss": 0.49, "step": 34427 }, { "epoch": 0.7301647897181396, "grad_norm": 0.3681371510028839, "learning_rate": 1.4132379222507071e-05, "loss": 0.5254, "step": 34428 }, { "epoch": 0.7301859981760727, "grad_norm": 0.33411967754364014, "learning_rate": 1.4132075531665066e-05, "loss": 0.5253, "step": 34429 }, { "epoch": 0.7302072066340056, "grad_norm": 0.3206518590450287, "learning_rate": 1.4131771836227399e-05, "loss": 0.4122, "step": 34430 }, { "epoch": 0.7302284150919387, "grad_norm": 0.3552631437778473, "learning_rate": 1.4131468136194415e-05, "loss": 0.472, "step": 34431 }, { "epoch": 0.7302496235498717, "grad_norm": 0.3429866135120392, "learning_rate": 1.4131164431566447e-05, "loss": 0.5377, "step": 34432 }, { "epoch": 0.7302708320078047, "grad_norm": 0.3576328158378601, "learning_rate": 1.4130860722343834e-05, "loss": 0.5271, "step": 34433 }, { "epoch": 0.7302920404657377, "grad_norm": 0.42561075091362, "learning_rate": 1.413055700852691e-05, "loss": 0.5, "step": 34434 }, { "epoch": 0.7303132489236708, "grad_norm": 0.3541956841945648, "learning_rate": 1.4130253290116022e-05, "loss": 0.4429, "step": 34435 }, { "epoch": 0.7303344573816037, "grad_norm": 0.3793352246284485, "learning_rate": 1.4129949567111498e-05, "loss": 0.6039, "step": 34436 }, { "epoch": 0.7303556658395368, "grad_norm": 0.3330529034137726, "learning_rate": 1.4129645839513679e-05, "loss": 0.5024, "step": 34437 }, { "epoch": 0.7303768742974698, "grad_norm": 0.3826841115951538, "learning_rate": 1.4129342107322908e-05, "loss": 0.5483, "step": 34438 }, { "epoch": 0.7303980827554029, "grad_norm": 0.3543556034564972, "learning_rate": 1.4129038370539518e-05, "loss": 0.5454, "step": 34439 }, { "epoch": 0.7304192912133359, "grad_norm": 0.36504265666007996, "learning_rate": 1.4128734629163844e-05, "loss": 0.555, "step": 34440 }, { "epoch": 0.7304404996712689, "grad_norm": 0.41573888063430786, "learning_rate": 1.4128430883196232e-05, "loss": 0.5113, "step": 34441 }, { "epoch": 0.730461708129202, "grad_norm": 0.3572021424770355, "learning_rate": 1.4128127132637011e-05, "loss": 0.4702, "step": 34442 }, { "epoch": 0.7304829165871349, "grad_norm": 0.3557113707065582, "learning_rate": 1.4127823377486523e-05, "loss": 0.5027, "step": 34443 }, { "epoch": 0.730504125045068, "grad_norm": 0.3775683045387268, "learning_rate": 1.412751961774511e-05, "loss": 0.503, "step": 34444 }, { "epoch": 0.730525333503001, "grad_norm": 0.34413763880729675, "learning_rate": 1.4127215853413101e-05, "loss": 0.4921, "step": 34445 }, { "epoch": 0.730546541960934, "grad_norm": 0.35115766525268555, "learning_rate": 1.412691208449084e-05, "loss": 0.4311, "step": 34446 }, { "epoch": 0.730567750418867, "grad_norm": 0.33423763513565063, "learning_rate": 1.4126608310978665e-05, "loss": 0.4716, "step": 34447 }, { "epoch": 0.7305889588768001, "grad_norm": 0.3258475661277771, "learning_rate": 1.412630453287691e-05, "loss": 0.5095, "step": 34448 }, { "epoch": 0.730610167334733, "grad_norm": 0.3385506570339203, "learning_rate": 1.4126000750185914e-05, "loss": 0.5073, "step": 34449 }, { "epoch": 0.7306313757926661, "grad_norm": 0.36509841680526733, "learning_rate": 1.4125696962906018e-05, "loss": 0.5072, "step": 34450 }, { "epoch": 0.7306525842505991, "grad_norm": 0.35271432995796204, "learning_rate": 1.4125393171037554e-05, "loss": 0.4923, "step": 34451 }, { "epoch": 0.7306737927085322, "grad_norm": 0.3451032340526581, "learning_rate": 1.4125089374580868e-05, "loss": 0.4403, "step": 34452 }, { "epoch": 0.7306950011664652, "grad_norm": 0.3596329987049103, "learning_rate": 1.4124785573536293e-05, "loss": 0.4548, "step": 34453 }, { "epoch": 0.7307162096243982, "grad_norm": 0.36080461740493774, "learning_rate": 1.4124481767904164e-05, "loss": 0.4874, "step": 34454 }, { "epoch": 0.7307374180823313, "grad_norm": 0.33797577023506165, "learning_rate": 1.4124177957684823e-05, "loss": 0.5284, "step": 34455 }, { "epoch": 0.7307586265402642, "grad_norm": 0.35804399847984314, "learning_rate": 1.4123874142878608e-05, "loss": 0.571, "step": 34456 }, { "epoch": 0.7307798349981973, "grad_norm": 0.3823244273662567, "learning_rate": 1.4123570323485856e-05, "loss": 0.4222, "step": 34457 }, { "epoch": 0.7308010434561303, "grad_norm": 0.396179735660553, "learning_rate": 1.4123266499506907e-05, "loss": 0.5864, "step": 34458 }, { "epoch": 0.7308222519140634, "grad_norm": 0.33754828572273254, "learning_rate": 1.4122962670942093e-05, "loss": 0.4514, "step": 34459 }, { "epoch": 0.7308434603719963, "grad_norm": 0.3306475877761841, "learning_rate": 1.4122658837791758e-05, "loss": 0.4419, "step": 34460 }, { "epoch": 0.7308646688299294, "grad_norm": 0.35484713315963745, "learning_rate": 1.412235500005624e-05, "loss": 0.4248, "step": 34461 }, { "epoch": 0.7308858772878624, "grad_norm": 0.8524975776672363, "learning_rate": 1.4122051157735867e-05, "loss": 0.5025, "step": 34462 }, { "epoch": 0.7309070857457954, "grad_norm": 0.37519538402557373, "learning_rate": 1.412174731083099e-05, "loss": 0.4711, "step": 34463 }, { "epoch": 0.7309282942037284, "grad_norm": 0.6481720805168152, "learning_rate": 1.4121443459341943e-05, "loss": 0.4663, "step": 34464 }, { "epoch": 0.7309495026616615, "grad_norm": 0.354330837726593, "learning_rate": 1.412113960326906e-05, "loss": 0.4822, "step": 34465 }, { "epoch": 0.7309707111195944, "grad_norm": 0.38091033697128296, "learning_rate": 1.4120835742612682e-05, "loss": 0.5135, "step": 34466 }, { "epoch": 0.7309919195775275, "grad_norm": 0.349038302898407, "learning_rate": 1.4120531877373147e-05, "loss": 0.5239, "step": 34467 }, { "epoch": 0.7310131280354606, "grad_norm": 0.45685335993766785, "learning_rate": 1.412022800755079e-05, "loss": 0.6061, "step": 34468 }, { "epoch": 0.7310343364933936, "grad_norm": 0.3464837670326233, "learning_rate": 1.4119924133145952e-05, "loss": 0.5087, "step": 34469 }, { "epoch": 0.7310555449513266, "grad_norm": 0.34200793504714966, "learning_rate": 1.4119620254158972e-05, "loss": 0.5234, "step": 34470 }, { "epoch": 0.7310767534092596, "grad_norm": 0.35843726992607117, "learning_rate": 1.4119316370590183e-05, "loss": 0.4814, "step": 34471 }, { "epoch": 0.7310979618671927, "grad_norm": 0.4518395960330963, "learning_rate": 1.4119012482439929e-05, "loss": 0.5343, "step": 34472 }, { "epoch": 0.7311191703251256, "grad_norm": 0.3436630666255951, "learning_rate": 1.4118708589708544e-05, "loss": 0.4839, "step": 34473 }, { "epoch": 0.7311403787830587, "grad_norm": 0.37398025393486023, "learning_rate": 1.4118404692396367e-05, "loss": 0.4714, "step": 34474 }, { "epoch": 0.7311615872409917, "grad_norm": 0.42151355743408203, "learning_rate": 1.4118100790503735e-05, "loss": 0.4629, "step": 34475 }, { "epoch": 0.7311827956989247, "grad_norm": 0.39035356044769287, "learning_rate": 1.4117796884030987e-05, "loss": 0.5123, "step": 34476 }, { "epoch": 0.7312040041568577, "grad_norm": 0.3387792408466339, "learning_rate": 1.4117492972978464e-05, "loss": 0.4695, "step": 34477 }, { "epoch": 0.7312252126147908, "grad_norm": 0.47930675745010376, "learning_rate": 1.4117189057346498e-05, "loss": 0.4369, "step": 34478 }, { "epoch": 0.7312464210727238, "grad_norm": 0.41998448967933655, "learning_rate": 1.4116885137135431e-05, "loss": 0.5355, "step": 34479 }, { "epoch": 0.7312676295306568, "grad_norm": 0.36778101325035095, "learning_rate": 1.4116581212345604e-05, "loss": 0.4876, "step": 34480 }, { "epoch": 0.7312888379885899, "grad_norm": 0.32333460450172424, "learning_rate": 1.4116277282977344e-05, "loss": 0.4592, "step": 34481 }, { "epoch": 0.7313100464465229, "grad_norm": 0.37285175919532776, "learning_rate": 1.4115973349031e-05, "loss": 0.5069, "step": 34482 }, { "epoch": 0.7313312549044559, "grad_norm": 0.47975218296051025, "learning_rate": 1.4115669410506908e-05, "loss": 0.4719, "step": 34483 }, { "epoch": 0.7313524633623889, "grad_norm": 0.3738967478275299, "learning_rate": 1.4115365467405402e-05, "loss": 0.4289, "step": 34484 }, { "epoch": 0.731373671820322, "grad_norm": 0.33336547017097473, "learning_rate": 1.4115061519726822e-05, "loss": 0.4914, "step": 34485 }, { "epoch": 0.7313948802782549, "grad_norm": 0.3827798664569855, "learning_rate": 1.4114757567471507e-05, "loss": 0.4118, "step": 34486 }, { "epoch": 0.731416088736188, "grad_norm": 0.3587115406990051, "learning_rate": 1.4114453610639794e-05, "loss": 0.5072, "step": 34487 }, { "epoch": 0.731437297194121, "grad_norm": 0.3273632228374481, "learning_rate": 1.4114149649232021e-05, "loss": 0.4394, "step": 34488 }, { "epoch": 0.7314585056520541, "grad_norm": 0.35312867164611816, "learning_rate": 1.4113845683248525e-05, "loss": 0.4609, "step": 34489 }, { "epoch": 0.731479714109987, "grad_norm": 0.3766709268093109, "learning_rate": 1.411354171268965e-05, "loss": 0.5546, "step": 34490 }, { "epoch": 0.7315009225679201, "grad_norm": 0.36624547839164734, "learning_rate": 1.4113237737555728e-05, "loss": 0.4712, "step": 34491 }, { "epoch": 0.7315221310258531, "grad_norm": 0.40178442001342773, "learning_rate": 1.4112933757847099e-05, "loss": 0.4797, "step": 34492 }, { "epoch": 0.7315433394837861, "grad_norm": 0.9848448038101196, "learning_rate": 1.41126297735641e-05, "loss": 0.4855, "step": 34493 }, { "epoch": 0.7315645479417192, "grad_norm": 0.35171568393707275, "learning_rate": 1.4112325784707066e-05, "loss": 0.5111, "step": 34494 }, { "epoch": 0.7315857563996522, "grad_norm": 0.36253470182418823, "learning_rate": 1.4112021791276345e-05, "loss": 0.4943, "step": 34495 }, { "epoch": 0.7316069648575853, "grad_norm": 0.38628819584846497, "learning_rate": 1.4111717793272266e-05, "loss": 0.4626, "step": 34496 }, { "epoch": 0.7316281733155182, "grad_norm": 0.362567663192749, "learning_rate": 1.4111413790695172e-05, "loss": 0.4819, "step": 34497 }, { "epoch": 0.7316493817734513, "grad_norm": 0.34857770800590515, "learning_rate": 1.4111109783545398e-05, "loss": 0.5045, "step": 34498 }, { "epoch": 0.7316705902313843, "grad_norm": 0.34470972418785095, "learning_rate": 1.4110805771823283e-05, "loss": 0.4399, "step": 34499 }, { "epoch": 0.7316917986893173, "grad_norm": 0.4311218559741974, "learning_rate": 1.4110501755529166e-05, "loss": 0.4288, "step": 34500 }, { "epoch": 0.7317130071472503, "grad_norm": 0.36172425746917725, "learning_rate": 1.4110197734663387e-05, "loss": 0.4949, "step": 34501 }, { "epoch": 0.7317342156051834, "grad_norm": 0.3590179681777954, "learning_rate": 1.4109893709226277e-05, "loss": 0.4688, "step": 34502 }, { "epoch": 0.7317554240631163, "grad_norm": 0.357453316450119, "learning_rate": 1.4109589679218183e-05, "loss": 0.5538, "step": 34503 }, { "epoch": 0.7317766325210494, "grad_norm": 0.42950960993766785, "learning_rate": 1.4109285644639437e-05, "loss": 0.5552, "step": 34504 }, { "epoch": 0.7317978409789824, "grad_norm": 0.35056453943252563, "learning_rate": 1.4108981605490382e-05, "loss": 0.5024, "step": 34505 }, { "epoch": 0.7318190494369154, "grad_norm": 0.417333722114563, "learning_rate": 1.4108677561771351e-05, "loss": 0.5037, "step": 34506 }, { "epoch": 0.7318402578948484, "grad_norm": 0.33351173996925354, "learning_rate": 1.4108373513482682e-05, "loss": 0.4539, "step": 34507 }, { "epoch": 0.7318614663527815, "grad_norm": 0.43795809149742126, "learning_rate": 1.4108069460624721e-05, "loss": 0.538, "step": 34508 }, { "epoch": 0.7318826748107146, "grad_norm": 0.4348100423812866, "learning_rate": 1.4107765403197798e-05, "loss": 0.4603, "step": 34509 }, { "epoch": 0.7319038832686475, "grad_norm": 0.32293638586997986, "learning_rate": 1.4107461341202254e-05, "loss": 0.4029, "step": 34510 }, { "epoch": 0.7319250917265806, "grad_norm": 0.4185413420200348, "learning_rate": 1.4107157274638427e-05, "loss": 0.5217, "step": 34511 }, { "epoch": 0.7319463001845136, "grad_norm": 0.38306570053100586, "learning_rate": 1.4106853203506657e-05, "loss": 0.488, "step": 34512 }, { "epoch": 0.7319675086424466, "grad_norm": 0.342860609292984, "learning_rate": 1.4106549127807277e-05, "loss": 0.5005, "step": 34513 }, { "epoch": 0.7319887171003796, "grad_norm": 0.32793477177619934, "learning_rate": 1.4106245047540629e-05, "loss": 0.489, "step": 34514 }, { "epoch": 0.7320099255583127, "grad_norm": 0.3712313175201416, "learning_rate": 1.4105940962707056e-05, "loss": 0.4819, "step": 34515 }, { "epoch": 0.7320311340162456, "grad_norm": 0.40680018067359924, "learning_rate": 1.4105636873306887e-05, "loss": 0.4542, "step": 34516 }, { "epoch": 0.7320523424741787, "grad_norm": 0.42224353551864624, "learning_rate": 1.4105332779340466e-05, "loss": 0.5012, "step": 34517 }, { "epoch": 0.7320735509321117, "grad_norm": 0.36976107954978943, "learning_rate": 1.4105028680808128e-05, "loss": 0.5169, "step": 34518 }, { "epoch": 0.7320947593900448, "grad_norm": 0.37073925137519836, "learning_rate": 1.4104724577710215e-05, "loss": 0.5291, "step": 34519 }, { "epoch": 0.7321159678479777, "grad_norm": 0.3192881643772125, "learning_rate": 1.4104420470047063e-05, "loss": 0.5024, "step": 34520 }, { "epoch": 0.7321371763059108, "grad_norm": 0.37952208518981934, "learning_rate": 1.4104116357819007e-05, "loss": 0.5288, "step": 34521 }, { "epoch": 0.7321583847638439, "grad_norm": 0.3519119620323181, "learning_rate": 1.4103812241026391e-05, "loss": 0.447, "step": 34522 }, { "epoch": 0.7321795932217768, "grad_norm": 0.31597620248794556, "learning_rate": 1.4103508119669552e-05, "loss": 0.4768, "step": 34523 }, { "epoch": 0.7322008016797099, "grad_norm": 0.3871822953224182, "learning_rate": 1.4103203993748822e-05, "loss": 0.5028, "step": 34524 }, { "epoch": 0.7322220101376429, "grad_norm": 0.3660498559474945, "learning_rate": 1.410289986326455e-05, "loss": 0.4869, "step": 34525 }, { "epoch": 0.732243218595576, "grad_norm": 0.3107996881008148, "learning_rate": 1.4102595728217063e-05, "loss": 0.4117, "step": 34526 }, { "epoch": 0.7322644270535089, "grad_norm": 0.3476203680038452, "learning_rate": 1.4102291588606708e-05, "loss": 0.4811, "step": 34527 }, { "epoch": 0.732285635511442, "grad_norm": 0.38304486870765686, "learning_rate": 1.410198744443382e-05, "loss": 0.5698, "step": 34528 }, { "epoch": 0.732306843969375, "grad_norm": 0.3290051519870758, "learning_rate": 1.4101683295698735e-05, "loss": 0.4668, "step": 34529 }, { "epoch": 0.732328052427308, "grad_norm": 0.31326285004615784, "learning_rate": 1.4101379142401794e-05, "loss": 0.4026, "step": 34530 }, { "epoch": 0.732349260885241, "grad_norm": 0.34400641918182373, "learning_rate": 1.4101074984543337e-05, "loss": 0.4895, "step": 34531 }, { "epoch": 0.7323704693431741, "grad_norm": 0.3648986518383026, "learning_rate": 1.4100770822123697e-05, "loss": 0.5299, "step": 34532 }, { "epoch": 0.732391677801107, "grad_norm": 0.3782519996166229, "learning_rate": 1.4100466655143217e-05, "loss": 0.4737, "step": 34533 }, { "epoch": 0.7324128862590401, "grad_norm": 0.39940890669822693, "learning_rate": 1.4100162483602235e-05, "loss": 0.5764, "step": 34534 }, { "epoch": 0.7324340947169732, "grad_norm": 0.38574209809303284, "learning_rate": 1.4099858307501087e-05, "loss": 0.5345, "step": 34535 }, { "epoch": 0.7324553031749061, "grad_norm": 0.40134841203689575, "learning_rate": 1.4099554126840111e-05, "loss": 0.4852, "step": 34536 }, { "epoch": 0.7324765116328392, "grad_norm": 0.36146533489227295, "learning_rate": 1.4099249941619649e-05, "loss": 0.4935, "step": 34537 }, { "epoch": 0.7324977200907722, "grad_norm": 0.35028594732284546, "learning_rate": 1.4098945751840032e-05, "loss": 0.5339, "step": 34538 }, { "epoch": 0.7325189285487053, "grad_norm": 0.391190767288208, "learning_rate": 1.4098641557501607e-05, "loss": 0.4836, "step": 34539 }, { "epoch": 0.7325401370066382, "grad_norm": 0.3336934447288513, "learning_rate": 1.4098337358604708e-05, "loss": 0.3958, "step": 34540 }, { "epoch": 0.7325613454645713, "grad_norm": 0.3579310476779938, "learning_rate": 1.4098033155149673e-05, "loss": 0.4626, "step": 34541 }, { "epoch": 0.7325825539225043, "grad_norm": 0.3610944151878357, "learning_rate": 1.4097728947136841e-05, "loss": 0.553, "step": 34542 }, { "epoch": 0.7326037623804373, "grad_norm": 0.380324125289917, "learning_rate": 1.4097424734566554e-05, "loss": 0.5083, "step": 34543 }, { "epoch": 0.7326249708383703, "grad_norm": 0.3456405997276306, "learning_rate": 1.409712051743914e-05, "loss": 0.4548, "step": 34544 }, { "epoch": 0.7326461792963034, "grad_norm": 0.3259057402610779, "learning_rate": 1.409681629575495e-05, "loss": 0.5107, "step": 34545 }, { "epoch": 0.7326673877542363, "grad_norm": 0.3296470642089844, "learning_rate": 1.4096512069514314e-05, "loss": 0.4507, "step": 34546 }, { "epoch": 0.7326885962121694, "grad_norm": 0.4529671370983124, "learning_rate": 1.4096207838717572e-05, "loss": 0.5456, "step": 34547 }, { "epoch": 0.7327098046701024, "grad_norm": 0.3868809938430786, "learning_rate": 1.4095903603365067e-05, "loss": 0.4872, "step": 34548 }, { "epoch": 0.7327310131280355, "grad_norm": 0.35996389389038086, "learning_rate": 1.4095599363457128e-05, "loss": 0.5093, "step": 34549 }, { "epoch": 0.7327522215859685, "grad_norm": 0.39706581830978394, "learning_rate": 1.4095295118994104e-05, "loss": 0.4923, "step": 34550 }, { "epoch": 0.7327734300439015, "grad_norm": 0.35336950421333313, "learning_rate": 1.4094990869976326e-05, "loss": 0.5051, "step": 34551 }, { "epoch": 0.7327946385018346, "grad_norm": 0.3325464129447937, "learning_rate": 1.4094686616404137e-05, "loss": 0.3949, "step": 34552 }, { "epoch": 0.7328158469597675, "grad_norm": 0.335519939661026, "learning_rate": 1.409438235827787e-05, "loss": 0.4466, "step": 34553 }, { "epoch": 0.7328370554177006, "grad_norm": 0.4816214144229889, "learning_rate": 1.4094078095597867e-05, "loss": 0.4722, "step": 34554 }, { "epoch": 0.7328582638756336, "grad_norm": 0.3457939028739929, "learning_rate": 1.4093773828364465e-05, "loss": 0.4567, "step": 34555 }, { "epoch": 0.7328794723335667, "grad_norm": 0.4789802134037018, "learning_rate": 1.4093469556578008e-05, "loss": 0.4407, "step": 34556 }, { "epoch": 0.7329006807914996, "grad_norm": 0.3537534773349762, "learning_rate": 1.4093165280238828e-05, "loss": 0.5233, "step": 34557 }, { "epoch": 0.7329218892494327, "grad_norm": 0.3425075113773346, "learning_rate": 1.409286099934726e-05, "loss": 0.4461, "step": 34558 }, { "epoch": 0.7329430977073657, "grad_norm": 0.39138954877853394, "learning_rate": 1.4092556713903652e-05, "loss": 0.5388, "step": 34559 }, { "epoch": 0.7329643061652987, "grad_norm": 0.37308210134506226, "learning_rate": 1.4092252423908339e-05, "loss": 0.4961, "step": 34560 }, { "epoch": 0.7329855146232317, "grad_norm": 0.34350019693374634, "learning_rate": 1.4091948129361654e-05, "loss": 0.4722, "step": 34561 }, { "epoch": 0.7330067230811648, "grad_norm": 0.35735780000686646, "learning_rate": 1.4091643830263943e-05, "loss": 0.4852, "step": 34562 }, { "epoch": 0.7330279315390978, "grad_norm": 0.49515318870544434, "learning_rate": 1.4091339526615542e-05, "loss": 0.4353, "step": 34563 }, { "epoch": 0.7330491399970308, "grad_norm": 0.6480256915092468, "learning_rate": 1.4091035218416788e-05, "loss": 0.4898, "step": 34564 }, { "epoch": 0.7330703484549639, "grad_norm": 0.42318418622016907, "learning_rate": 1.409073090566802e-05, "loss": 0.5277, "step": 34565 }, { "epoch": 0.7330915569128968, "grad_norm": 0.39706891775131226, "learning_rate": 1.4090426588369578e-05, "loss": 0.4914, "step": 34566 }, { "epoch": 0.7331127653708299, "grad_norm": 0.36426129937171936, "learning_rate": 1.4090122266521794e-05, "loss": 0.4387, "step": 34567 }, { "epoch": 0.7331339738287629, "grad_norm": 0.3342731297016144, "learning_rate": 1.4089817940125017e-05, "loss": 0.4642, "step": 34568 }, { "epoch": 0.733155182286696, "grad_norm": 0.4135279655456543, "learning_rate": 1.4089513609179577e-05, "loss": 0.5819, "step": 34569 }, { "epoch": 0.7331763907446289, "grad_norm": 0.3690704107284546, "learning_rate": 1.4089209273685817e-05, "loss": 0.5871, "step": 34570 }, { "epoch": 0.733197599202562, "grad_norm": 0.4172821640968323, "learning_rate": 1.4088904933644073e-05, "loss": 0.5903, "step": 34571 }, { "epoch": 0.733218807660495, "grad_norm": 0.3854944407939911, "learning_rate": 1.4088600589054683e-05, "loss": 0.4873, "step": 34572 }, { "epoch": 0.733240016118428, "grad_norm": 0.32586824893951416, "learning_rate": 1.408829623991799e-05, "loss": 0.4528, "step": 34573 }, { "epoch": 0.733261224576361, "grad_norm": 0.5824245810508728, "learning_rate": 1.4087991886234332e-05, "loss": 0.4204, "step": 34574 }, { "epoch": 0.7332824330342941, "grad_norm": 0.3547188639640808, "learning_rate": 1.4087687528004039e-05, "loss": 0.5233, "step": 34575 }, { "epoch": 0.7333036414922272, "grad_norm": 0.34162646532058716, "learning_rate": 1.408738316522746e-05, "loss": 0.4245, "step": 34576 }, { "epoch": 0.7333248499501601, "grad_norm": 0.48525160551071167, "learning_rate": 1.4087078797904924e-05, "loss": 0.4223, "step": 34577 }, { "epoch": 0.7333460584080932, "grad_norm": 0.3882746398448944, "learning_rate": 1.4086774426036775e-05, "loss": 0.4666, "step": 34578 }, { "epoch": 0.7333672668660262, "grad_norm": 0.3601061701774597, "learning_rate": 1.4086470049623355e-05, "loss": 0.5013, "step": 34579 }, { "epoch": 0.7333884753239592, "grad_norm": 0.3747398257255554, "learning_rate": 1.4086165668665e-05, "loss": 0.5049, "step": 34580 }, { "epoch": 0.7334096837818922, "grad_norm": 0.3869113624095917, "learning_rate": 1.408586128316204e-05, "loss": 0.4616, "step": 34581 }, { "epoch": 0.7334308922398253, "grad_norm": 0.3504123091697693, "learning_rate": 1.4085556893114827e-05, "loss": 0.4616, "step": 34582 }, { "epoch": 0.7334521006977582, "grad_norm": 0.44975894689559937, "learning_rate": 1.4085252498523686e-05, "loss": 0.4661, "step": 34583 }, { "epoch": 0.7334733091556913, "grad_norm": 0.42309385538101196, "learning_rate": 1.4084948099388968e-05, "loss": 0.4087, "step": 34584 }, { "epoch": 0.7334945176136243, "grad_norm": 0.3791409134864807, "learning_rate": 1.4084643695711006e-05, "loss": 0.516, "step": 34585 }, { "epoch": 0.7335157260715574, "grad_norm": 0.36005955934524536, "learning_rate": 1.4084339287490138e-05, "loss": 0.5376, "step": 34586 }, { "epoch": 0.7335369345294903, "grad_norm": 0.3625212609767914, "learning_rate": 1.4084034874726704e-05, "loss": 0.4312, "step": 34587 }, { "epoch": 0.7335581429874234, "grad_norm": 0.3346025347709656, "learning_rate": 1.4083730457421042e-05, "loss": 0.4965, "step": 34588 }, { "epoch": 0.7335793514453564, "grad_norm": 0.42552322149276733, "learning_rate": 1.4083426035573489e-05, "loss": 0.5493, "step": 34589 }, { "epoch": 0.7336005599032894, "grad_norm": 0.4920556843280792, "learning_rate": 1.4083121609184384e-05, "loss": 0.55, "step": 34590 }, { "epoch": 0.7336217683612225, "grad_norm": 0.3767347037792206, "learning_rate": 1.4082817178254069e-05, "loss": 0.4654, "step": 34591 }, { "epoch": 0.7336429768191555, "grad_norm": 0.36314138770103455, "learning_rate": 1.4082512742782879e-05, "loss": 0.5068, "step": 34592 }, { "epoch": 0.7336641852770885, "grad_norm": 0.3918871581554413, "learning_rate": 1.4082208302771157e-05, "loss": 0.4483, "step": 34593 }, { "epoch": 0.7336853937350215, "grad_norm": 0.5550833940505981, "learning_rate": 1.4081903858219237e-05, "loss": 0.4586, "step": 34594 }, { "epoch": 0.7337066021929546, "grad_norm": 0.37071818113327026, "learning_rate": 1.4081599409127455e-05, "loss": 0.5113, "step": 34595 }, { "epoch": 0.7337278106508875, "grad_norm": 0.3952220380306244, "learning_rate": 1.4081294955496159e-05, "loss": 0.485, "step": 34596 }, { "epoch": 0.7337490191088206, "grad_norm": 0.34352022409439087, "learning_rate": 1.4080990497325677e-05, "loss": 0.4952, "step": 34597 }, { "epoch": 0.7337702275667536, "grad_norm": 0.3463533818721771, "learning_rate": 1.4080686034616355e-05, "loss": 0.4964, "step": 34598 }, { "epoch": 0.7337914360246867, "grad_norm": 0.33317849040031433, "learning_rate": 1.4080381567368531e-05, "loss": 0.464, "step": 34599 }, { "epoch": 0.7338126444826196, "grad_norm": 0.3517283499240875, "learning_rate": 1.4080077095582538e-05, "loss": 0.4403, "step": 34600 }, { "epoch": 0.7338338529405527, "grad_norm": 0.38936299085617065, "learning_rate": 1.4079772619258726e-05, "loss": 0.5115, "step": 34601 }, { "epoch": 0.7338550613984857, "grad_norm": 0.36013633012771606, "learning_rate": 1.407946813839742e-05, "loss": 0.5396, "step": 34602 }, { "epoch": 0.7338762698564187, "grad_norm": 0.4395913779735565, "learning_rate": 1.4079163652998964e-05, "loss": 0.5028, "step": 34603 }, { "epoch": 0.7338974783143518, "grad_norm": 0.33024197816848755, "learning_rate": 1.4078859163063703e-05, "loss": 0.4585, "step": 34604 }, { "epoch": 0.7339186867722848, "grad_norm": 0.38253358006477356, "learning_rate": 1.4078554668591969e-05, "loss": 0.4714, "step": 34605 }, { "epoch": 0.7339398952302179, "grad_norm": 0.32601219415664673, "learning_rate": 1.4078250169584097e-05, "loss": 0.4996, "step": 34606 }, { "epoch": 0.7339611036881508, "grad_norm": 0.5409297347068787, "learning_rate": 1.4077945666040435e-05, "loss": 0.4203, "step": 34607 }, { "epoch": 0.7339823121460839, "grad_norm": 0.3239884674549103, "learning_rate": 1.4077641157961319e-05, "loss": 0.4228, "step": 34608 }, { "epoch": 0.7340035206040169, "grad_norm": 0.38228604197502136, "learning_rate": 1.4077336645347082e-05, "loss": 0.4504, "step": 34609 }, { "epoch": 0.7340247290619499, "grad_norm": 0.38751277327537537, "learning_rate": 1.4077032128198067e-05, "loss": 0.4772, "step": 34610 }, { "epoch": 0.7340459375198829, "grad_norm": 0.3516826927661896, "learning_rate": 1.4076727606514616e-05, "loss": 0.4458, "step": 34611 }, { "epoch": 0.734067145977816, "grad_norm": 0.49411675333976746, "learning_rate": 1.4076423080297059e-05, "loss": 0.4958, "step": 34612 }, { "epoch": 0.7340883544357489, "grad_norm": 0.4354550838470459, "learning_rate": 1.4076118549545743e-05, "loss": 0.4661, "step": 34613 }, { "epoch": 0.734109562893682, "grad_norm": 0.36644795536994934, "learning_rate": 1.4075814014261e-05, "loss": 0.4927, "step": 34614 }, { "epoch": 0.734130771351615, "grad_norm": 0.35360029339790344, "learning_rate": 1.4075509474443177e-05, "loss": 0.5326, "step": 34615 }, { "epoch": 0.734151979809548, "grad_norm": 0.3614826500415802, "learning_rate": 1.4075204930092603e-05, "loss": 0.4958, "step": 34616 }, { "epoch": 0.7341731882674811, "grad_norm": 0.4289206862449646, "learning_rate": 1.4074900381209624e-05, "loss": 0.4393, "step": 34617 }, { "epoch": 0.7341943967254141, "grad_norm": 0.48988908529281616, "learning_rate": 1.4074595827794575e-05, "loss": 0.3647, "step": 34618 }, { "epoch": 0.7342156051833472, "grad_norm": 0.32730886340141296, "learning_rate": 1.4074291269847799e-05, "loss": 0.4856, "step": 34619 }, { "epoch": 0.7342368136412801, "grad_norm": 0.3863188624382019, "learning_rate": 1.407398670736963e-05, "loss": 0.5175, "step": 34620 }, { "epoch": 0.7342580220992132, "grad_norm": 0.3780432939529419, "learning_rate": 1.4073682140360408e-05, "loss": 0.5369, "step": 34621 }, { "epoch": 0.7342792305571462, "grad_norm": 0.3604928255081177, "learning_rate": 1.407337756882047e-05, "loss": 0.4918, "step": 34622 }, { "epoch": 0.7343004390150792, "grad_norm": 0.3900342583656311, "learning_rate": 1.4073072992750159e-05, "loss": 0.5208, "step": 34623 }, { "epoch": 0.7343216474730122, "grad_norm": 0.38658228516578674, "learning_rate": 1.4072768412149811e-05, "loss": 0.4707, "step": 34624 }, { "epoch": 0.7343428559309453, "grad_norm": 0.40585124492645264, "learning_rate": 1.4072463827019766e-05, "loss": 0.5407, "step": 34625 }, { "epoch": 0.7343640643888782, "grad_norm": 0.37625646591186523, "learning_rate": 1.4072159237360362e-05, "loss": 0.5518, "step": 34626 }, { "epoch": 0.7343852728468113, "grad_norm": 0.47363102436065674, "learning_rate": 1.4071854643171938e-05, "loss": 0.4847, "step": 34627 }, { "epoch": 0.7344064813047443, "grad_norm": 0.37432757019996643, "learning_rate": 1.4071550044454831e-05, "loss": 0.59, "step": 34628 }, { "epoch": 0.7344276897626774, "grad_norm": 0.3535494804382324, "learning_rate": 1.4071245441209382e-05, "loss": 0.4016, "step": 34629 }, { "epoch": 0.7344488982206103, "grad_norm": 0.7079110741615295, "learning_rate": 1.4070940833435931e-05, "loss": 0.5118, "step": 34630 }, { "epoch": 0.7344701066785434, "grad_norm": 0.32835665345191956, "learning_rate": 1.4070636221134813e-05, "loss": 0.4535, "step": 34631 }, { "epoch": 0.7344913151364765, "grad_norm": 0.4204087257385254, "learning_rate": 1.407033160430637e-05, "loss": 0.49, "step": 34632 }, { "epoch": 0.7345125235944094, "grad_norm": 0.38060295581817627, "learning_rate": 1.4070026982950943e-05, "loss": 0.615, "step": 34633 }, { "epoch": 0.7345337320523425, "grad_norm": 0.3822139799594879, "learning_rate": 1.406972235706886e-05, "loss": 0.4687, "step": 34634 }, { "epoch": 0.7345549405102755, "grad_norm": 0.37567079067230225, "learning_rate": 1.4069417726660471e-05, "loss": 0.4754, "step": 34635 }, { "epoch": 0.7345761489682086, "grad_norm": 0.3763696551322937, "learning_rate": 1.406911309172611e-05, "loss": 0.5248, "step": 34636 }, { "epoch": 0.7345973574261415, "grad_norm": 0.4056810140609741, "learning_rate": 1.4068808452266115e-05, "loss": 0.5229, "step": 34637 }, { "epoch": 0.7346185658840746, "grad_norm": 0.35906657576560974, "learning_rate": 1.4068503808280832e-05, "loss": 0.4323, "step": 34638 }, { "epoch": 0.7346397743420076, "grad_norm": 0.34741199016571045, "learning_rate": 1.4068199159770593e-05, "loss": 0.5384, "step": 34639 }, { "epoch": 0.7346609827999406, "grad_norm": 0.45480605959892273, "learning_rate": 1.4067894506735736e-05, "loss": 0.4597, "step": 34640 }, { "epoch": 0.7346821912578736, "grad_norm": 0.29262521862983704, "learning_rate": 1.4067589849176602e-05, "loss": 0.4054, "step": 34641 }, { "epoch": 0.7347033997158067, "grad_norm": 0.4029269516468048, "learning_rate": 1.406728518709353e-05, "loss": 0.5265, "step": 34642 }, { "epoch": 0.7347246081737396, "grad_norm": 0.35544732213020325, "learning_rate": 1.406698052048686e-05, "loss": 0.5922, "step": 34643 }, { "epoch": 0.7347458166316727, "grad_norm": 0.3520910143852234, "learning_rate": 1.4066675849356928e-05, "loss": 0.4413, "step": 34644 }, { "epoch": 0.7347670250896058, "grad_norm": 0.3414822518825531, "learning_rate": 1.4066371173704074e-05, "loss": 0.5232, "step": 34645 }, { "epoch": 0.7347882335475387, "grad_norm": 0.3588935434818268, "learning_rate": 1.406606649352864e-05, "loss": 0.4845, "step": 34646 }, { "epoch": 0.7348094420054718, "grad_norm": 0.341928631067276, "learning_rate": 1.4065761808830962e-05, "loss": 0.4984, "step": 34647 }, { "epoch": 0.7348306504634048, "grad_norm": 0.4256488084793091, "learning_rate": 1.4065457119611376e-05, "loss": 0.4766, "step": 34648 }, { "epoch": 0.7348518589213379, "grad_norm": 0.3540032207965851, "learning_rate": 1.4065152425870225e-05, "loss": 0.4868, "step": 34649 }, { "epoch": 0.7348730673792708, "grad_norm": 0.3719438314437866, "learning_rate": 1.4064847727607849e-05, "loss": 0.4858, "step": 34650 }, { "epoch": 0.7348942758372039, "grad_norm": 0.3449941575527191, "learning_rate": 1.4064543024824582e-05, "loss": 0.4324, "step": 34651 }, { "epoch": 0.7349154842951369, "grad_norm": 0.41645193099975586, "learning_rate": 1.4064238317520768e-05, "loss": 0.4413, "step": 34652 }, { "epoch": 0.7349366927530699, "grad_norm": 0.4602136015892029, "learning_rate": 1.4063933605696743e-05, "loss": 0.4967, "step": 34653 }, { "epoch": 0.7349579012110029, "grad_norm": 0.36720341444015503, "learning_rate": 1.4063628889352843e-05, "loss": 0.5159, "step": 34654 }, { "epoch": 0.734979109668936, "grad_norm": 0.3855712115764618, "learning_rate": 1.4063324168489414e-05, "loss": 0.5474, "step": 34655 }, { "epoch": 0.735000318126869, "grad_norm": 0.39626944065093994, "learning_rate": 1.4063019443106792e-05, "loss": 0.4835, "step": 34656 }, { "epoch": 0.735021526584802, "grad_norm": 0.38755571842193604, "learning_rate": 1.406271471320531e-05, "loss": 0.5865, "step": 34657 }, { "epoch": 0.7350427350427351, "grad_norm": 0.3775474429130554, "learning_rate": 1.4062409978785318e-05, "loss": 0.5541, "step": 34658 }, { "epoch": 0.7350639435006681, "grad_norm": 0.600101113319397, "learning_rate": 1.4062105239847144e-05, "loss": 0.5513, "step": 34659 }, { "epoch": 0.7350851519586011, "grad_norm": 0.35556891560554504, "learning_rate": 1.4061800496391136e-05, "loss": 0.5481, "step": 34660 }, { "epoch": 0.7351063604165341, "grad_norm": 0.30544722080230713, "learning_rate": 1.4061495748417625e-05, "loss": 0.4406, "step": 34661 }, { "epoch": 0.7351275688744672, "grad_norm": 0.3619149923324585, "learning_rate": 1.4061190995926956e-05, "loss": 0.4793, "step": 34662 }, { "epoch": 0.7351487773324001, "grad_norm": 0.4003630578517914, "learning_rate": 1.4060886238919464e-05, "loss": 0.4572, "step": 34663 }, { "epoch": 0.7351699857903332, "grad_norm": 0.3505929708480835, "learning_rate": 1.4060581477395493e-05, "loss": 0.5558, "step": 34664 }, { "epoch": 0.7351911942482662, "grad_norm": 0.36103740334510803, "learning_rate": 1.4060276711355373e-05, "loss": 0.541, "step": 34665 }, { "epoch": 0.7352124027061993, "grad_norm": 0.3869062066078186, "learning_rate": 1.4059971940799452e-05, "loss": 0.4873, "step": 34666 }, { "epoch": 0.7352336111641322, "grad_norm": 0.34190240502357483, "learning_rate": 1.4059667165728066e-05, "loss": 0.4422, "step": 34667 }, { "epoch": 0.7352548196220653, "grad_norm": 0.4044143259525299, "learning_rate": 1.405936238614155e-05, "loss": 0.5303, "step": 34668 }, { "epoch": 0.7352760280799983, "grad_norm": 0.42945441603660583, "learning_rate": 1.4059057602040251e-05, "loss": 0.5094, "step": 34669 }, { "epoch": 0.7352972365379313, "grad_norm": 0.3395453989505768, "learning_rate": 1.4058752813424503e-05, "loss": 0.5092, "step": 34670 }, { "epoch": 0.7353184449958643, "grad_norm": 0.3926393389701843, "learning_rate": 1.4058448020294642e-05, "loss": 0.4919, "step": 34671 }, { "epoch": 0.7353396534537974, "grad_norm": 0.37551993131637573, "learning_rate": 1.4058143222651012e-05, "loss": 0.5405, "step": 34672 }, { "epoch": 0.7353608619117304, "grad_norm": 0.5433579087257385, "learning_rate": 1.405783842049395e-05, "loss": 0.5309, "step": 34673 }, { "epoch": 0.7353820703696634, "grad_norm": 0.362453430891037, "learning_rate": 1.4057533613823795e-05, "loss": 0.5624, "step": 34674 }, { "epoch": 0.7354032788275965, "grad_norm": 0.34454673528671265, "learning_rate": 1.4057228802640887e-05, "loss": 0.4978, "step": 34675 }, { "epoch": 0.7354244872855294, "grad_norm": 0.37449437379837036, "learning_rate": 1.4056923986945567e-05, "loss": 0.4635, "step": 34676 }, { "epoch": 0.7354456957434625, "grad_norm": 0.42978358268737793, "learning_rate": 1.4056619166738169e-05, "loss": 0.5993, "step": 34677 }, { "epoch": 0.7354669042013955, "grad_norm": 0.4290503263473511, "learning_rate": 1.4056314342019036e-05, "loss": 0.6067, "step": 34678 }, { "epoch": 0.7354881126593286, "grad_norm": 0.4149825870990753, "learning_rate": 1.4056009512788502e-05, "loss": 0.4952, "step": 34679 }, { "epoch": 0.7355093211172615, "grad_norm": 0.35387730598449707, "learning_rate": 1.405570467904691e-05, "loss": 0.4974, "step": 34680 }, { "epoch": 0.7355305295751946, "grad_norm": 0.37935808300971985, "learning_rate": 1.4055399840794599e-05, "loss": 0.4929, "step": 34681 }, { "epoch": 0.7355517380331276, "grad_norm": 0.353604257106781, "learning_rate": 1.4055094998031909e-05, "loss": 0.4381, "step": 34682 }, { "epoch": 0.7355729464910606, "grad_norm": 0.3894546329975128, "learning_rate": 1.4054790150759177e-05, "loss": 0.5572, "step": 34683 }, { "epoch": 0.7355941549489936, "grad_norm": 0.3644544184207916, "learning_rate": 1.4054485298976743e-05, "loss": 0.5058, "step": 34684 }, { "epoch": 0.7356153634069267, "grad_norm": 0.33814117312431335, "learning_rate": 1.4054180442684943e-05, "loss": 0.4396, "step": 34685 }, { "epoch": 0.7356365718648598, "grad_norm": 0.37985706329345703, "learning_rate": 1.4053875581884122e-05, "loss": 0.4733, "step": 34686 }, { "epoch": 0.7356577803227927, "grad_norm": 0.4152633547782898, "learning_rate": 1.4053570716574617e-05, "loss": 0.4817, "step": 34687 }, { "epoch": 0.7356789887807258, "grad_norm": 0.37896209955215454, "learning_rate": 1.4053265846756761e-05, "loss": 0.5654, "step": 34688 }, { "epoch": 0.7357001972386588, "grad_norm": 0.47046324610710144, "learning_rate": 1.40529609724309e-05, "loss": 0.4507, "step": 34689 }, { "epoch": 0.7357214056965918, "grad_norm": 0.3665921092033386, "learning_rate": 1.4052656093597374e-05, "loss": 0.4152, "step": 34690 }, { "epoch": 0.7357426141545248, "grad_norm": 0.3112294673919678, "learning_rate": 1.4052351210256516e-05, "loss": 0.3715, "step": 34691 }, { "epoch": 0.7357638226124579, "grad_norm": 0.33676591515541077, "learning_rate": 1.4052046322408668e-05, "loss": 0.4758, "step": 34692 }, { "epoch": 0.7357850310703908, "grad_norm": 0.3679918348789215, "learning_rate": 1.405174143005417e-05, "loss": 0.4711, "step": 34693 }, { "epoch": 0.7358062395283239, "grad_norm": 0.36237409710884094, "learning_rate": 1.4051436533193358e-05, "loss": 0.4987, "step": 34694 }, { "epoch": 0.7358274479862569, "grad_norm": 0.36853206157684326, "learning_rate": 1.4051131631826578e-05, "loss": 0.5236, "step": 34695 }, { "epoch": 0.73584865644419, "grad_norm": 0.37629643082618713, "learning_rate": 1.405082672595416e-05, "loss": 0.4641, "step": 34696 }, { "epoch": 0.7358698649021229, "grad_norm": 0.42617249488830566, "learning_rate": 1.4050521815576452e-05, "loss": 0.541, "step": 34697 }, { "epoch": 0.735891073360056, "grad_norm": 0.3357684314250946, "learning_rate": 1.4050216900693789e-05, "loss": 0.4589, "step": 34698 }, { "epoch": 0.7359122818179891, "grad_norm": 0.38958805799484253, "learning_rate": 1.4049911981306505e-05, "loss": 0.5728, "step": 34699 }, { "epoch": 0.735933490275922, "grad_norm": 0.3429783880710602, "learning_rate": 1.4049607057414947e-05, "loss": 0.4066, "step": 34700 }, { "epoch": 0.7359546987338551, "grad_norm": 0.37395820021629333, "learning_rate": 1.4049302129019454e-05, "loss": 0.5043, "step": 34701 }, { "epoch": 0.7359759071917881, "grad_norm": 0.4206854999065399, "learning_rate": 1.4048997196120358e-05, "loss": 0.5531, "step": 34702 }, { "epoch": 0.7359971156497211, "grad_norm": 0.35452356934547424, "learning_rate": 1.4048692258718004e-05, "loss": 0.5189, "step": 34703 }, { "epoch": 0.7360183241076541, "grad_norm": 0.36572763323783875, "learning_rate": 1.4048387316812733e-05, "loss": 0.5038, "step": 34704 }, { "epoch": 0.7360395325655872, "grad_norm": 0.4042035937309265, "learning_rate": 1.4048082370404875e-05, "loss": 0.5625, "step": 34705 }, { "epoch": 0.7360607410235201, "grad_norm": 0.41985100507736206, "learning_rate": 1.4047777419494776e-05, "loss": 0.4421, "step": 34706 }, { "epoch": 0.7360819494814532, "grad_norm": 0.7811985611915588, "learning_rate": 1.4047472464082776e-05, "loss": 0.4836, "step": 34707 }, { "epoch": 0.7361031579393862, "grad_norm": 0.33674395084381104, "learning_rate": 1.4047167504169212e-05, "loss": 0.4502, "step": 34708 }, { "epoch": 0.7361243663973193, "grad_norm": 0.33445677161216736, "learning_rate": 1.4046862539754424e-05, "loss": 0.4014, "step": 34709 }, { "epoch": 0.7361455748552522, "grad_norm": 0.3835000991821289, "learning_rate": 1.4046557570838748e-05, "loss": 0.4878, "step": 34710 }, { "epoch": 0.7361667833131853, "grad_norm": 0.3589371144771576, "learning_rate": 1.404625259742253e-05, "loss": 0.4908, "step": 34711 }, { "epoch": 0.7361879917711184, "grad_norm": 0.35595425963401794, "learning_rate": 1.4045947619506102e-05, "loss": 0.5345, "step": 34712 }, { "epoch": 0.7362092002290513, "grad_norm": 0.3333186209201813, "learning_rate": 1.4045642637089807e-05, "loss": 0.5334, "step": 34713 }, { "epoch": 0.7362304086869844, "grad_norm": 0.44779953360557556, "learning_rate": 1.4045337650173983e-05, "loss": 0.4692, "step": 34714 }, { "epoch": 0.7362516171449174, "grad_norm": 0.3890337646007538, "learning_rate": 1.4045032658758971e-05, "loss": 0.5262, "step": 34715 }, { "epoch": 0.7362728256028505, "grad_norm": 0.39505136013031006, "learning_rate": 1.4044727662845108e-05, "loss": 0.5251, "step": 34716 }, { "epoch": 0.7362940340607834, "grad_norm": 0.41714999079704285, "learning_rate": 1.4044422662432732e-05, "loss": 0.6019, "step": 34717 }, { "epoch": 0.7363152425187165, "grad_norm": 0.34000909328460693, "learning_rate": 1.404411765752219e-05, "loss": 0.4681, "step": 34718 }, { "epoch": 0.7363364509766495, "grad_norm": 0.3531130850315094, "learning_rate": 1.4043812648113808e-05, "loss": 0.4811, "step": 34719 }, { "epoch": 0.7363576594345825, "grad_norm": 0.3713248372077942, "learning_rate": 1.4043507634207938e-05, "loss": 0.4911, "step": 34720 }, { "epoch": 0.7363788678925155, "grad_norm": 0.34153005480766296, "learning_rate": 1.4043202615804911e-05, "loss": 0.4137, "step": 34721 }, { "epoch": 0.7364000763504486, "grad_norm": 0.33525583148002625, "learning_rate": 1.404289759290507e-05, "loss": 0.5208, "step": 34722 }, { "epoch": 0.7364212848083815, "grad_norm": 0.356151819229126, "learning_rate": 1.4042592565508755e-05, "loss": 0.5372, "step": 34723 }, { "epoch": 0.7364424932663146, "grad_norm": 0.37261077761650085, "learning_rate": 1.4042287533616302e-05, "loss": 0.5176, "step": 34724 }, { "epoch": 0.7364637017242476, "grad_norm": 0.36054688692092896, "learning_rate": 1.404198249722805e-05, "loss": 0.5726, "step": 34725 }, { "epoch": 0.7364849101821807, "grad_norm": 0.38432180881500244, "learning_rate": 1.4041677456344345e-05, "loss": 0.4587, "step": 34726 }, { "epoch": 0.7365061186401137, "grad_norm": 0.3543001711368561, "learning_rate": 1.4041372410965516e-05, "loss": 0.4649, "step": 34727 }, { "epoch": 0.7365273270980467, "grad_norm": 0.40961822867393494, "learning_rate": 1.4041067361091914e-05, "loss": 0.5581, "step": 34728 }, { "epoch": 0.7365485355559798, "grad_norm": 0.36088475584983826, "learning_rate": 1.404076230672387e-05, "loss": 0.5294, "step": 34729 }, { "epoch": 0.7365697440139127, "grad_norm": 0.3871706426143646, "learning_rate": 1.4040457247861723e-05, "loss": 0.5032, "step": 34730 }, { "epoch": 0.7365909524718458, "grad_norm": 0.44910427927970886, "learning_rate": 1.4040152184505813e-05, "loss": 0.5034, "step": 34731 }, { "epoch": 0.7366121609297788, "grad_norm": 0.32637837529182434, "learning_rate": 1.4039847116656487e-05, "loss": 0.4191, "step": 34732 }, { "epoch": 0.7366333693877118, "grad_norm": 0.3445141911506653, "learning_rate": 1.4039542044314072e-05, "loss": 0.5103, "step": 34733 }, { "epoch": 0.7366545778456448, "grad_norm": 0.3846801519393921, "learning_rate": 1.4039236967478918e-05, "loss": 0.3967, "step": 34734 }, { "epoch": 0.7366757863035779, "grad_norm": 0.37777096033096313, "learning_rate": 1.4038931886151357e-05, "loss": 0.4786, "step": 34735 }, { "epoch": 0.7366969947615108, "grad_norm": 0.3576177954673767, "learning_rate": 1.4038626800331732e-05, "loss": 0.4379, "step": 34736 }, { "epoch": 0.7367182032194439, "grad_norm": 0.3517511785030365, "learning_rate": 1.4038321710020382e-05, "loss": 0.5126, "step": 34737 }, { "epoch": 0.7367394116773769, "grad_norm": 0.3761323392391205, "learning_rate": 1.4038016615217642e-05, "loss": 0.4627, "step": 34738 }, { "epoch": 0.73676062013531, "grad_norm": 0.3246130645275116, "learning_rate": 1.4037711515923858e-05, "loss": 0.458, "step": 34739 }, { "epoch": 0.736781828593243, "grad_norm": 0.4095142185688019, "learning_rate": 1.4037406412139368e-05, "loss": 0.5619, "step": 34740 }, { "epoch": 0.736803037051176, "grad_norm": 0.37398287653923035, "learning_rate": 1.4037101303864504e-05, "loss": 0.6006, "step": 34741 }, { "epoch": 0.7368242455091091, "grad_norm": 0.3558891713619232, "learning_rate": 1.4036796191099618e-05, "loss": 0.4462, "step": 34742 }, { "epoch": 0.736845453967042, "grad_norm": 0.34832602739334106, "learning_rate": 1.403649107384504e-05, "loss": 0.4991, "step": 34743 }, { "epoch": 0.7368666624249751, "grad_norm": 0.36859843134880066, "learning_rate": 1.403618595210111e-05, "loss": 0.4618, "step": 34744 }, { "epoch": 0.7368878708829081, "grad_norm": 0.36762863397598267, "learning_rate": 1.4035880825868166e-05, "loss": 0.5657, "step": 34745 }, { "epoch": 0.7369090793408412, "grad_norm": 0.3884771466255188, "learning_rate": 1.4035575695146557e-05, "loss": 0.5113, "step": 34746 }, { "epoch": 0.7369302877987741, "grad_norm": 0.3944641947746277, "learning_rate": 1.403527055993661e-05, "loss": 0.5516, "step": 34747 }, { "epoch": 0.7369514962567072, "grad_norm": 0.4537968337535858, "learning_rate": 1.4034965420238676e-05, "loss": 0.5123, "step": 34748 }, { "epoch": 0.7369727047146402, "grad_norm": 0.3705909848213196, "learning_rate": 1.4034660276053088e-05, "loss": 0.5191, "step": 34749 }, { "epoch": 0.7369939131725732, "grad_norm": 0.33131951093673706, "learning_rate": 1.4034355127380182e-05, "loss": 0.3866, "step": 34750 }, { "epoch": 0.7370151216305062, "grad_norm": 0.36544468998908997, "learning_rate": 1.4034049974220301e-05, "loss": 0.5026, "step": 34751 }, { "epoch": 0.7370363300884393, "grad_norm": 0.5996817350387573, "learning_rate": 1.403374481657379e-05, "loss": 0.523, "step": 34752 }, { "epoch": 0.7370575385463723, "grad_norm": 0.40791839361190796, "learning_rate": 1.4033439654440977e-05, "loss": 0.5054, "step": 34753 }, { "epoch": 0.7370787470043053, "grad_norm": 0.32864952087402344, "learning_rate": 1.4033134487822211e-05, "loss": 0.4579, "step": 34754 }, { "epoch": 0.7370999554622384, "grad_norm": 0.518913984298706, "learning_rate": 1.4032829316717825e-05, "loss": 0.4544, "step": 34755 }, { "epoch": 0.7371211639201714, "grad_norm": 0.3972143828868866, "learning_rate": 1.4032524141128163e-05, "loss": 0.4632, "step": 34756 }, { "epoch": 0.7371423723781044, "grad_norm": 0.33935993909835815, "learning_rate": 1.4032218961053564e-05, "loss": 0.5135, "step": 34757 }, { "epoch": 0.7371635808360374, "grad_norm": 0.3718257248401642, "learning_rate": 1.4031913776494363e-05, "loss": 0.5363, "step": 34758 }, { "epoch": 0.7371847892939705, "grad_norm": 0.3960944712162018, "learning_rate": 1.4031608587450905e-05, "loss": 0.4541, "step": 34759 }, { "epoch": 0.7372059977519034, "grad_norm": 0.35642504692077637, "learning_rate": 1.4031303393923528e-05, "loss": 0.4633, "step": 34760 }, { "epoch": 0.7372272062098365, "grad_norm": 0.32289159297943115, "learning_rate": 1.4030998195912569e-05, "loss": 0.5296, "step": 34761 }, { "epoch": 0.7372484146677695, "grad_norm": 0.4320509731769562, "learning_rate": 1.4030692993418366e-05, "loss": 0.5257, "step": 34762 }, { "epoch": 0.7372696231257025, "grad_norm": 0.4226927161216736, "learning_rate": 1.4030387786441263e-05, "loss": 0.4891, "step": 34763 }, { "epoch": 0.7372908315836355, "grad_norm": 0.3282569348812103, "learning_rate": 1.4030082574981601e-05, "loss": 0.499, "step": 34764 }, { "epoch": 0.7373120400415686, "grad_norm": 0.3570803999900818, "learning_rate": 1.4029777359039714e-05, "loss": 0.4666, "step": 34765 }, { "epoch": 0.7373332484995015, "grad_norm": 0.3654530942440033, "learning_rate": 1.4029472138615942e-05, "loss": 0.5203, "step": 34766 }, { "epoch": 0.7373544569574346, "grad_norm": 0.3886776864528656, "learning_rate": 1.4029166913710628e-05, "loss": 0.519, "step": 34767 }, { "epoch": 0.7373756654153677, "grad_norm": 0.5480781197547913, "learning_rate": 1.4028861684324109e-05, "loss": 0.4639, "step": 34768 }, { "epoch": 0.7373968738733007, "grad_norm": 0.3616722822189331, "learning_rate": 1.4028556450456725e-05, "loss": 0.4663, "step": 34769 }, { "epoch": 0.7374180823312337, "grad_norm": 0.4186049997806549, "learning_rate": 1.4028251212108814e-05, "loss": 0.3983, "step": 34770 }, { "epoch": 0.7374392907891667, "grad_norm": 0.32213476300239563, "learning_rate": 1.4027945969280721e-05, "loss": 0.4318, "step": 34771 }, { "epoch": 0.7374604992470998, "grad_norm": 0.4038793742656708, "learning_rate": 1.402764072197278e-05, "loss": 0.4727, "step": 34772 }, { "epoch": 0.7374817077050327, "grad_norm": 0.3669919967651367, "learning_rate": 1.4027335470185329e-05, "loss": 0.4661, "step": 34773 }, { "epoch": 0.7375029161629658, "grad_norm": 0.305875688791275, "learning_rate": 1.4027030213918713e-05, "loss": 0.4034, "step": 34774 }, { "epoch": 0.7375241246208988, "grad_norm": 0.3617906868457794, "learning_rate": 1.4026724953173267e-05, "loss": 0.5054, "step": 34775 }, { "epoch": 0.7375453330788319, "grad_norm": 0.37376677989959717, "learning_rate": 1.4026419687949333e-05, "loss": 0.501, "step": 34776 }, { "epoch": 0.7375665415367648, "grad_norm": 0.36044007539749146, "learning_rate": 1.4026114418247252e-05, "loss": 0.4907, "step": 34777 }, { "epoch": 0.7375877499946979, "grad_norm": 0.33871230483055115, "learning_rate": 1.4025809144067362e-05, "loss": 0.504, "step": 34778 }, { "epoch": 0.7376089584526309, "grad_norm": 0.3911064565181732, "learning_rate": 1.402550386541e-05, "loss": 0.5199, "step": 34779 }, { "epoch": 0.7376301669105639, "grad_norm": 0.38412490487098694, "learning_rate": 1.4025198582275507e-05, "loss": 0.5517, "step": 34780 }, { "epoch": 0.737651375368497, "grad_norm": 0.3454422354698181, "learning_rate": 1.4024893294664225e-05, "loss": 0.4267, "step": 34781 }, { "epoch": 0.73767258382643, "grad_norm": 0.3670419156551361, "learning_rate": 1.4024588002576491e-05, "loss": 0.4889, "step": 34782 }, { "epoch": 0.737693792284363, "grad_norm": 0.3658885955810547, "learning_rate": 1.4024282706012646e-05, "loss": 0.4839, "step": 34783 }, { "epoch": 0.737715000742296, "grad_norm": 0.38988226652145386, "learning_rate": 1.4023977404973025e-05, "loss": 0.5376, "step": 34784 }, { "epoch": 0.7377362092002291, "grad_norm": 0.46222352981567383, "learning_rate": 1.4023672099457976e-05, "loss": 0.4996, "step": 34785 }, { "epoch": 0.737757417658162, "grad_norm": 0.32204675674438477, "learning_rate": 1.4023366789467833e-05, "loss": 0.5422, "step": 34786 }, { "epoch": 0.7377786261160951, "grad_norm": 0.4412987530231476, "learning_rate": 1.4023061475002932e-05, "loss": 0.5107, "step": 34787 }, { "epoch": 0.7377998345740281, "grad_norm": 0.6254909634590149, "learning_rate": 1.4022756156063624e-05, "loss": 0.5817, "step": 34788 }, { "epoch": 0.7378210430319612, "grad_norm": 0.321740061044693, "learning_rate": 1.4022450832650236e-05, "loss": 0.4252, "step": 34789 }, { "epoch": 0.7378422514898941, "grad_norm": 0.395591676235199, "learning_rate": 1.4022145504763114e-05, "loss": 0.4804, "step": 34790 }, { "epoch": 0.7378634599478272, "grad_norm": 0.4372749328613281, "learning_rate": 1.4021840172402599e-05, "loss": 0.3931, "step": 34791 }, { "epoch": 0.7378846684057602, "grad_norm": 0.3843262791633606, "learning_rate": 1.4021534835569026e-05, "loss": 0.5003, "step": 34792 }, { "epoch": 0.7379058768636932, "grad_norm": 0.3569771647453308, "learning_rate": 1.4021229494262739e-05, "loss": 0.5666, "step": 34793 }, { "epoch": 0.7379270853216263, "grad_norm": 0.3786850869655609, "learning_rate": 1.4020924148484075e-05, "loss": 0.5498, "step": 34794 }, { "epoch": 0.7379482937795593, "grad_norm": 0.4681362807750702, "learning_rate": 1.4020618798233372e-05, "loss": 0.57, "step": 34795 }, { "epoch": 0.7379695022374924, "grad_norm": 0.43971753120422363, "learning_rate": 1.4020313443510972e-05, "loss": 0.4409, "step": 34796 }, { "epoch": 0.7379907106954253, "grad_norm": 0.38511815667152405, "learning_rate": 1.4020008084317215e-05, "loss": 0.5576, "step": 34797 }, { "epoch": 0.7380119191533584, "grad_norm": 0.4160618185997009, "learning_rate": 1.4019702720652441e-05, "loss": 0.5261, "step": 34798 }, { "epoch": 0.7380331276112914, "grad_norm": 0.3528926372528076, "learning_rate": 1.4019397352516988e-05, "loss": 0.3497, "step": 34799 }, { "epoch": 0.7380543360692244, "grad_norm": 0.35377344489097595, "learning_rate": 1.40190919799112e-05, "loss": 0.4557, "step": 34800 }, { "epoch": 0.7380755445271574, "grad_norm": 0.372207909822464, "learning_rate": 1.4018786602835406e-05, "loss": 0.5477, "step": 34801 }, { "epoch": 0.7380967529850905, "grad_norm": 0.406609445810318, "learning_rate": 1.4018481221289954e-05, "loss": 0.5031, "step": 34802 }, { "epoch": 0.7381179614430234, "grad_norm": 0.4051165282726288, "learning_rate": 1.4018175835275185e-05, "loss": 0.4742, "step": 34803 }, { "epoch": 0.7381391699009565, "grad_norm": 0.37622836232185364, "learning_rate": 1.4017870444791432e-05, "loss": 0.506, "step": 34804 }, { "epoch": 0.7381603783588895, "grad_norm": 0.378640741109848, "learning_rate": 1.4017565049839043e-05, "loss": 0.5305, "step": 34805 }, { "epoch": 0.7381815868168226, "grad_norm": 0.4181254506111145, "learning_rate": 1.4017259650418348e-05, "loss": 0.4182, "step": 34806 }, { "epoch": 0.7382027952747555, "grad_norm": 0.3740736246109009, "learning_rate": 1.4016954246529697e-05, "loss": 0.4668, "step": 34807 }, { "epoch": 0.7382240037326886, "grad_norm": 0.4325668215751648, "learning_rate": 1.4016648838173422e-05, "loss": 0.5256, "step": 34808 }, { "epoch": 0.7382452121906217, "grad_norm": 0.34036654233932495, "learning_rate": 1.4016343425349864e-05, "loss": 0.5283, "step": 34809 }, { "epoch": 0.7382664206485546, "grad_norm": 0.36651429533958435, "learning_rate": 1.4016038008059364e-05, "loss": 0.5091, "step": 34810 }, { "epoch": 0.7382876291064877, "grad_norm": 0.39921388030052185, "learning_rate": 1.4015732586302262e-05, "loss": 0.5433, "step": 34811 }, { "epoch": 0.7383088375644207, "grad_norm": 0.4031105041503906, "learning_rate": 1.4015427160078897e-05, "loss": 0.4928, "step": 34812 }, { "epoch": 0.7383300460223537, "grad_norm": 0.35340002179145813, "learning_rate": 1.401512172938961e-05, "loss": 0.4929, "step": 34813 }, { "epoch": 0.7383512544802867, "grad_norm": 0.37219855189323425, "learning_rate": 1.4014816294234738e-05, "loss": 0.5452, "step": 34814 }, { "epoch": 0.7383724629382198, "grad_norm": 0.36987239122390747, "learning_rate": 1.401451085461462e-05, "loss": 0.4296, "step": 34815 }, { "epoch": 0.7383936713961528, "grad_norm": 0.35511043667793274, "learning_rate": 1.4014205410529603e-05, "loss": 0.5073, "step": 34816 }, { "epoch": 0.7384148798540858, "grad_norm": 0.468758761882782, "learning_rate": 1.401389996198002e-05, "loss": 0.5023, "step": 34817 }, { "epoch": 0.7384360883120188, "grad_norm": 0.4889855682849884, "learning_rate": 1.4013594508966209e-05, "loss": 0.5409, "step": 34818 }, { "epoch": 0.7384572967699519, "grad_norm": 0.38937094807624817, "learning_rate": 1.4013289051488517e-05, "loss": 0.4441, "step": 34819 }, { "epoch": 0.7384785052278848, "grad_norm": 0.362491637468338, "learning_rate": 1.4012983589547277e-05, "loss": 0.5502, "step": 34820 }, { "epoch": 0.7384997136858179, "grad_norm": 0.3165563642978668, "learning_rate": 1.4012678123142831e-05, "loss": 0.4291, "step": 34821 }, { "epoch": 0.738520922143751, "grad_norm": 0.328357458114624, "learning_rate": 1.4012372652275524e-05, "loss": 0.4058, "step": 34822 }, { "epoch": 0.7385421306016839, "grad_norm": 0.3697815239429474, "learning_rate": 1.4012067176945685e-05, "loss": 0.5113, "step": 34823 }, { "epoch": 0.738563339059617, "grad_norm": 0.3917013108730316, "learning_rate": 1.4011761697153666e-05, "loss": 0.5313, "step": 34824 }, { "epoch": 0.73858454751755, "grad_norm": 0.4301658868789673, "learning_rate": 1.4011456212899796e-05, "loss": 0.5004, "step": 34825 }, { "epoch": 0.7386057559754831, "grad_norm": 0.35328200459480286, "learning_rate": 1.4011150724184419e-05, "loss": 0.5152, "step": 34826 }, { "epoch": 0.738626964433416, "grad_norm": 0.3578380048274994, "learning_rate": 1.4010845231007878e-05, "loss": 0.4532, "step": 34827 }, { "epoch": 0.7386481728913491, "grad_norm": 0.3553611934185028, "learning_rate": 1.4010539733370507e-05, "loss": 0.5489, "step": 34828 }, { "epoch": 0.7386693813492821, "grad_norm": 0.3941359519958496, "learning_rate": 1.4010234231272651e-05, "loss": 0.5322, "step": 34829 }, { "epoch": 0.7386905898072151, "grad_norm": 0.3953114449977875, "learning_rate": 1.4009928724714645e-05, "loss": 0.4557, "step": 34830 }, { "epoch": 0.7387117982651481, "grad_norm": 0.35009855031967163, "learning_rate": 1.4009623213696834e-05, "loss": 0.532, "step": 34831 }, { "epoch": 0.7387330067230812, "grad_norm": 0.3707500100135803, "learning_rate": 1.4009317698219551e-05, "loss": 0.4755, "step": 34832 }, { "epoch": 0.7387542151810141, "grad_norm": 0.3914932906627655, "learning_rate": 1.4009012178283143e-05, "loss": 0.435, "step": 34833 }, { "epoch": 0.7387754236389472, "grad_norm": 0.3575553894042969, "learning_rate": 1.4008706653887944e-05, "loss": 0.4983, "step": 34834 }, { "epoch": 0.7387966320968803, "grad_norm": 0.4064912497997284, "learning_rate": 1.4008401125034298e-05, "loss": 0.4888, "step": 34835 }, { "epoch": 0.7388178405548133, "grad_norm": 0.38256731629371643, "learning_rate": 1.4008095591722543e-05, "loss": 0.5352, "step": 34836 }, { "epoch": 0.7388390490127463, "grad_norm": 0.34105196595191956, "learning_rate": 1.4007790053953016e-05, "loss": 0.4273, "step": 34837 }, { "epoch": 0.7388602574706793, "grad_norm": 0.36768338084220886, "learning_rate": 1.4007484511726067e-05, "loss": 0.421, "step": 34838 }, { "epoch": 0.7388814659286124, "grad_norm": 0.3617526590824127, "learning_rate": 1.4007178965042023e-05, "loss": 0.4956, "step": 34839 }, { "epoch": 0.7389026743865453, "grad_norm": 0.3712870180606842, "learning_rate": 1.400687341390123e-05, "loss": 0.5551, "step": 34840 }, { "epoch": 0.7389238828444784, "grad_norm": 0.43274980783462524, "learning_rate": 1.4006567858304025e-05, "loss": 0.4413, "step": 34841 }, { "epoch": 0.7389450913024114, "grad_norm": 0.3432694971561432, "learning_rate": 1.4006262298250754e-05, "loss": 0.5279, "step": 34842 }, { "epoch": 0.7389662997603444, "grad_norm": 0.3579644560813904, "learning_rate": 1.4005956733741751e-05, "loss": 0.544, "step": 34843 }, { "epoch": 0.7389875082182774, "grad_norm": 0.3406251072883606, "learning_rate": 1.400565116477736e-05, "loss": 0.4858, "step": 34844 }, { "epoch": 0.7390087166762105, "grad_norm": 0.352643221616745, "learning_rate": 1.4005345591357915e-05, "loss": 0.5605, "step": 34845 }, { "epoch": 0.7390299251341435, "grad_norm": 0.4112756550312042, "learning_rate": 1.400504001348376e-05, "loss": 0.4892, "step": 34846 }, { "epoch": 0.7390511335920765, "grad_norm": 0.3839629888534546, "learning_rate": 1.4004734431155237e-05, "loss": 0.4799, "step": 34847 }, { "epoch": 0.7390723420500095, "grad_norm": 0.545259416103363, "learning_rate": 1.400442884437268e-05, "loss": 0.5333, "step": 34848 }, { "epoch": 0.7390935505079426, "grad_norm": 0.35123634338378906, "learning_rate": 1.4004123253136433e-05, "loss": 0.5014, "step": 34849 }, { "epoch": 0.7391147589658756, "grad_norm": 0.591808557510376, "learning_rate": 1.4003817657446836e-05, "loss": 0.4424, "step": 34850 }, { "epoch": 0.7391359674238086, "grad_norm": 0.42258110642433167, "learning_rate": 1.4003512057304227e-05, "loss": 0.4273, "step": 34851 }, { "epoch": 0.7391571758817417, "grad_norm": 0.4160146713256836, "learning_rate": 1.4003206452708949e-05, "loss": 0.5097, "step": 34852 }, { "epoch": 0.7391783843396746, "grad_norm": 0.3613356649875641, "learning_rate": 1.4002900843661334e-05, "loss": 0.5111, "step": 34853 }, { "epoch": 0.7391995927976077, "grad_norm": 0.4541689455509186, "learning_rate": 1.4002595230161734e-05, "loss": 0.477, "step": 34854 }, { "epoch": 0.7392208012555407, "grad_norm": 0.3773832619190216, "learning_rate": 1.4002289612210476e-05, "loss": 0.4697, "step": 34855 }, { "epoch": 0.7392420097134738, "grad_norm": 0.37077030539512634, "learning_rate": 1.4001983989807912e-05, "loss": 0.4673, "step": 34856 }, { "epoch": 0.7392632181714067, "grad_norm": 0.5610377192497253, "learning_rate": 1.4001678362954372e-05, "loss": 0.4945, "step": 34857 }, { "epoch": 0.7392844266293398, "grad_norm": 0.3527187705039978, "learning_rate": 1.4001372731650202e-05, "loss": 0.456, "step": 34858 }, { "epoch": 0.7393056350872728, "grad_norm": 0.5408867597579956, "learning_rate": 1.4001067095895741e-05, "loss": 0.476, "step": 34859 }, { "epoch": 0.7393268435452058, "grad_norm": 0.4620635509490967, "learning_rate": 1.4000761455691323e-05, "loss": 0.5146, "step": 34860 }, { "epoch": 0.7393480520031388, "grad_norm": 0.38073763251304626, "learning_rate": 1.4000455811037299e-05, "loss": 0.5211, "step": 34861 }, { "epoch": 0.7393692604610719, "grad_norm": 0.40506085753440857, "learning_rate": 1.4000150161934e-05, "loss": 0.5179, "step": 34862 }, { "epoch": 0.739390468919005, "grad_norm": 0.34086278080940247, "learning_rate": 1.3999844508381768e-05, "loss": 0.4887, "step": 34863 }, { "epoch": 0.7394116773769379, "grad_norm": 0.3450150489807129, "learning_rate": 1.3999538850380948e-05, "loss": 0.4823, "step": 34864 }, { "epoch": 0.739432885834871, "grad_norm": 0.322517067193985, "learning_rate": 1.399923318793187e-05, "loss": 0.4478, "step": 34865 }, { "epoch": 0.739454094292804, "grad_norm": 0.3710668981075287, "learning_rate": 1.399892752103488e-05, "loss": 0.4497, "step": 34866 }, { "epoch": 0.739475302750737, "grad_norm": 0.3441089689731598, "learning_rate": 1.3998621849690321e-05, "loss": 0.4817, "step": 34867 }, { "epoch": 0.73949651120867, "grad_norm": 0.3895321190357208, "learning_rate": 1.399831617389853e-05, "loss": 0.5094, "step": 34868 }, { "epoch": 0.7395177196666031, "grad_norm": 0.3346256911754608, "learning_rate": 1.3998010493659844e-05, "loss": 0.4909, "step": 34869 }, { "epoch": 0.739538928124536, "grad_norm": 0.3715237081050873, "learning_rate": 1.3997704808974606e-05, "loss": 0.4878, "step": 34870 }, { "epoch": 0.7395601365824691, "grad_norm": 0.3663061857223511, "learning_rate": 1.3997399119843154e-05, "loss": 0.4469, "step": 34871 }, { "epoch": 0.7395813450404021, "grad_norm": 0.5337201356887817, "learning_rate": 1.399709342626583e-05, "loss": 0.4961, "step": 34872 }, { "epoch": 0.7396025534983351, "grad_norm": 0.3997448682785034, "learning_rate": 1.3996787728242976e-05, "loss": 0.5491, "step": 34873 }, { "epoch": 0.7396237619562681, "grad_norm": 0.4231852889060974, "learning_rate": 1.3996482025774926e-05, "loss": 0.4526, "step": 34874 }, { "epoch": 0.7396449704142012, "grad_norm": 0.341065376996994, "learning_rate": 1.3996176318862027e-05, "loss": 0.5508, "step": 34875 }, { "epoch": 0.7396661788721343, "grad_norm": 0.3402392864227295, "learning_rate": 1.3995870607504614e-05, "loss": 0.4651, "step": 34876 }, { "epoch": 0.7396873873300672, "grad_norm": 0.4573996067047119, "learning_rate": 1.3995564891703026e-05, "loss": 0.4517, "step": 34877 }, { "epoch": 0.7397085957880003, "grad_norm": 0.37743306159973145, "learning_rate": 1.399525917145761e-05, "loss": 0.542, "step": 34878 }, { "epoch": 0.7397298042459333, "grad_norm": 0.47134310007095337, "learning_rate": 1.3994953446768697e-05, "loss": 0.5414, "step": 34879 }, { "epoch": 0.7397510127038663, "grad_norm": 0.34723329544067383, "learning_rate": 1.3994647717636632e-05, "loss": 0.4585, "step": 34880 }, { "epoch": 0.7397722211617993, "grad_norm": 0.3289080560207367, "learning_rate": 1.3994341984061756e-05, "loss": 0.4171, "step": 34881 }, { "epoch": 0.7397934296197324, "grad_norm": 0.3659304082393646, "learning_rate": 1.399403624604441e-05, "loss": 0.4894, "step": 34882 }, { "epoch": 0.7398146380776653, "grad_norm": 0.4168131947517395, "learning_rate": 1.3993730503584928e-05, "loss": 0.5025, "step": 34883 }, { "epoch": 0.7398358465355984, "grad_norm": 0.4004130959510803, "learning_rate": 1.3993424756683656e-05, "loss": 0.5861, "step": 34884 }, { "epoch": 0.7398570549935314, "grad_norm": 0.3696117699146271, "learning_rate": 1.3993119005340928e-05, "loss": 0.4927, "step": 34885 }, { "epoch": 0.7398782634514645, "grad_norm": 0.38682985305786133, "learning_rate": 1.3992813249557089e-05, "loss": 0.5378, "step": 34886 }, { "epoch": 0.7398994719093974, "grad_norm": 0.33531078696250916, "learning_rate": 1.3992507489332478e-05, "loss": 0.4882, "step": 34887 }, { "epoch": 0.7399206803673305, "grad_norm": 0.33219775557518005, "learning_rate": 1.3992201724667436e-05, "loss": 0.4752, "step": 34888 }, { "epoch": 0.7399418888252635, "grad_norm": 0.34570056200027466, "learning_rate": 1.3991895955562302e-05, "loss": 0.4793, "step": 34889 }, { "epoch": 0.7399630972831965, "grad_norm": 0.36044013500213623, "learning_rate": 1.3991590182017417e-05, "loss": 0.439, "step": 34890 }, { "epoch": 0.7399843057411296, "grad_norm": 0.37708714604377747, "learning_rate": 1.3991284404033119e-05, "loss": 0.4136, "step": 34891 }, { "epoch": 0.7400055141990626, "grad_norm": 0.37730535864830017, "learning_rate": 1.3990978621609745e-05, "loss": 0.4555, "step": 34892 }, { "epoch": 0.7400267226569957, "grad_norm": 0.38570570945739746, "learning_rate": 1.3990672834747646e-05, "loss": 0.5513, "step": 34893 }, { "epoch": 0.7400479311149286, "grad_norm": 0.42797672748565674, "learning_rate": 1.3990367043447151e-05, "loss": 0.4988, "step": 34894 }, { "epoch": 0.7400691395728617, "grad_norm": 0.3773120939731598, "learning_rate": 1.3990061247708606e-05, "loss": 0.4415, "step": 34895 }, { "epoch": 0.7400903480307947, "grad_norm": 0.32650044560432434, "learning_rate": 1.3989755447532351e-05, "loss": 0.4036, "step": 34896 }, { "epoch": 0.7401115564887277, "grad_norm": 0.380748450756073, "learning_rate": 1.3989449642918721e-05, "loss": 0.4238, "step": 34897 }, { "epoch": 0.7401327649466607, "grad_norm": 0.3169323801994324, "learning_rate": 1.3989143833868062e-05, "loss": 0.488, "step": 34898 }, { "epoch": 0.7401539734045938, "grad_norm": 0.3949575126171112, "learning_rate": 1.3988838020380713e-05, "loss": 0.4277, "step": 34899 }, { "epoch": 0.7401751818625267, "grad_norm": 0.29740023612976074, "learning_rate": 1.398853220245701e-05, "loss": 0.3837, "step": 34900 }, { "epoch": 0.7401963903204598, "grad_norm": 0.3778633773326874, "learning_rate": 1.3988226380097298e-05, "loss": 0.4749, "step": 34901 }, { "epoch": 0.7402175987783928, "grad_norm": 0.3058358132839203, "learning_rate": 1.3987920553301915e-05, "loss": 0.458, "step": 34902 }, { "epoch": 0.7402388072363258, "grad_norm": 0.3666459321975708, "learning_rate": 1.3987614722071201e-05, "loss": 0.5061, "step": 34903 }, { "epoch": 0.7402600156942589, "grad_norm": 0.3365098237991333, "learning_rate": 1.39873088864055e-05, "loss": 0.5195, "step": 34904 }, { "epoch": 0.7402812241521919, "grad_norm": 0.3783363997936249, "learning_rate": 1.3987003046305141e-05, "loss": 0.5616, "step": 34905 }, { "epoch": 0.740302432610125, "grad_norm": 0.37018781900405884, "learning_rate": 1.3986697201770478e-05, "loss": 0.5015, "step": 34906 }, { "epoch": 0.7403236410680579, "grad_norm": 0.38585659861564636, "learning_rate": 1.3986391352801844e-05, "loss": 0.5218, "step": 34907 }, { "epoch": 0.740344849525991, "grad_norm": 0.3801368474960327, "learning_rate": 1.3986085499399579e-05, "loss": 0.5106, "step": 34908 }, { "epoch": 0.740366057983924, "grad_norm": 0.34386342763900757, "learning_rate": 1.3985779641564028e-05, "loss": 0.5395, "step": 34909 }, { "epoch": 0.740387266441857, "grad_norm": 0.3828119933605194, "learning_rate": 1.3985473779295524e-05, "loss": 0.5193, "step": 34910 }, { "epoch": 0.74040847489979, "grad_norm": 0.3669573962688446, "learning_rate": 1.3985167912594408e-05, "loss": 0.4437, "step": 34911 }, { "epoch": 0.7404296833577231, "grad_norm": 0.3408028781414032, "learning_rate": 1.3984862041461028e-05, "loss": 0.475, "step": 34912 }, { "epoch": 0.740450891815656, "grad_norm": 0.35642120242118835, "learning_rate": 1.3984556165895717e-05, "loss": 0.4557, "step": 34913 }, { "epoch": 0.7404721002735891, "grad_norm": 0.38181161880493164, "learning_rate": 1.3984250285898816e-05, "loss": 0.444, "step": 34914 }, { "epoch": 0.7404933087315221, "grad_norm": 0.3163664937019348, "learning_rate": 1.398394440147067e-05, "loss": 0.4714, "step": 34915 }, { "epoch": 0.7405145171894552, "grad_norm": 0.32759740948677063, "learning_rate": 1.3983638512611612e-05, "loss": 0.4472, "step": 34916 }, { "epoch": 0.7405357256473882, "grad_norm": 0.39542293548583984, "learning_rate": 1.3983332619321988e-05, "loss": 0.497, "step": 34917 }, { "epoch": 0.7405569341053212, "grad_norm": 0.40227243304252625, "learning_rate": 1.3983026721602136e-05, "loss": 0.475, "step": 34918 }, { "epoch": 0.7405781425632543, "grad_norm": 0.3473189175128937, "learning_rate": 1.3982720819452393e-05, "loss": 0.5353, "step": 34919 }, { "epoch": 0.7405993510211872, "grad_norm": 0.3848680555820465, "learning_rate": 1.3982414912873106e-05, "loss": 0.5153, "step": 34920 }, { "epoch": 0.7406205594791203, "grad_norm": 0.3899039328098297, "learning_rate": 1.3982109001864612e-05, "loss": 0.584, "step": 34921 }, { "epoch": 0.7406417679370533, "grad_norm": 0.3735140562057495, "learning_rate": 1.398180308642725e-05, "loss": 0.4481, "step": 34922 }, { "epoch": 0.7406629763949863, "grad_norm": 0.4218929708003998, "learning_rate": 1.3981497166561362e-05, "loss": 0.5554, "step": 34923 }, { "epoch": 0.7406841848529193, "grad_norm": 0.38289108872413635, "learning_rate": 1.3981191242267284e-05, "loss": 0.5392, "step": 34924 }, { "epoch": 0.7407053933108524, "grad_norm": 0.3636258542537689, "learning_rate": 1.3980885313545362e-05, "loss": 0.4552, "step": 34925 }, { "epoch": 0.7407266017687854, "grad_norm": 0.4046769142150879, "learning_rate": 1.3980579380395936e-05, "loss": 0.478, "step": 34926 }, { "epoch": 0.7407478102267184, "grad_norm": 0.37262409925460815, "learning_rate": 1.3980273442819343e-05, "loss": 0.5909, "step": 34927 }, { "epoch": 0.7407690186846514, "grad_norm": 0.35129106044769287, "learning_rate": 1.3979967500815923e-05, "loss": 0.5105, "step": 34928 }, { "epoch": 0.7407902271425845, "grad_norm": 0.3529896140098572, "learning_rate": 1.397966155438602e-05, "loss": 0.5702, "step": 34929 }, { "epoch": 0.7408114356005174, "grad_norm": 0.345352441072464, "learning_rate": 1.397935560352997e-05, "loss": 0.3827, "step": 34930 }, { "epoch": 0.7408326440584505, "grad_norm": 0.35854610800743103, "learning_rate": 1.3979049648248113e-05, "loss": 0.4591, "step": 34931 }, { "epoch": 0.7408538525163836, "grad_norm": 0.3811711370944977, "learning_rate": 1.3978743688540797e-05, "loss": 0.5118, "step": 34932 }, { "epoch": 0.7408750609743165, "grad_norm": 0.3576175570487976, "learning_rate": 1.3978437724408351e-05, "loss": 0.4352, "step": 34933 }, { "epoch": 0.7408962694322496, "grad_norm": 0.3780546486377716, "learning_rate": 1.3978131755851125e-05, "loss": 0.5311, "step": 34934 }, { "epoch": 0.7409174778901826, "grad_norm": 0.3373585343360901, "learning_rate": 1.3977825782869456e-05, "loss": 0.5067, "step": 34935 }, { "epoch": 0.7409386863481157, "grad_norm": 0.33180201053619385, "learning_rate": 1.3977519805463682e-05, "loss": 0.42, "step": 34936 }, { "epoch": 0.7409598948060486, "grad_norm": 0.3860187828540802, "learning_rate": 1.3977213823634146e-05, "loss": 0.474, "step": 34937 }, { "epoch": 0.7409811032639817, "grad_norm": 0.3831497132778168, "learning_rate": 1.3976907837381185e-05, "loss": 0.5136, "step": 34938 }, { "epoch": 0.7410023117219147, "grad_norm": 0.357406884431839, "learning_rate": 1.3976601846705144e-05, "loss": 0.4613, "step": 34939 }, { "epoch": 0.7410235201798477, "grad_norm": 0.3674199879169464, "learning_rate": 1.3976295851606361e-05, "loss": 0.5459, "step": 34940 }, { "epoch": 0.7410447286377807, "grad_norm": 0.34598085284233093, "learning_rate": 1.3975989852085177e-05, "loss": 0.4599, "step": 34941 }, { "epoch": 0.7410659370957138, "grad_norm": 0.3279975354671478, "learning_rate": 1.397568384814193e-05, "loss": 0.4815, "step": 34942 }, { "epoch": 0.7410871455536467, "grad_norm": 0.3731966018676758, "learning_rate": 1.397537783977696e-05, "loss": 0.4709, "step": 34943 }, { "epoch": 0.7411083540115798, "grad_norm": 0.3667598366737366, "learning_rate": 1.3975071826990613e-05, "loss": 0.4618, "step": 34944 }, { "epoch": 0.7411295624695129, "grad_norm": 0.3165246844291687, "learning_rate": 1.3974765809783225e-05, "loss": 0.4744, "step": 34945 }, { "epoch": 0.7411507709274459, "grad_norm": 0.3866044580936432, "learning_rate": 1.3974459788155134e-05, "loss": 0.5298, "step": 34946 }, { "epoch": 0.7411719793853789, "grad_norm": 0.3519427180290222, "learning_rate": 1.3974153762106685e-05, "loss": 0.5591, "step": 34947 }, { "epoch": 0.7411931878433119, "grad_norm": 0.3804462254047394, "learning_rate": 1.3973847731638219e-05, "loss": 0.5877, "step": 34948 }, { "epoch": 0.741214396301245, "grad_norm": 0.3541795015335083, "learning_rate": 1.3973541696750071e-05, "loss": 0.4396, "step": 34949 }, { "epoch": 0.7412356047591779, "grad_norm": 0.4614613950252533, "learning_rate": 1.3973235657442587e-05, "loss": 0.4984, "step": 34950 }, { "epoch": 0.741256813217111, "grad_norm": 0.359482079744339, "learning_rate": 1.3972929613716101e-05, "loss": 0.5242, "step": 34951 }, { "epoch": 0.741278021675044, "grad_norm": 0.34750163555145264, "learning_rate": 1.3972623565570962e-05, "loss": 0.4967, "step": 34952 }, { "epoch": 0.741299230132977, "grad_norm": 0.3948589563369751, "learning_rate": 1.3972317513007503e-05, "loss": 0.5764, "step": 34953 }, { "epoch": 0.74132043859091, "grad_norm": 0.4116993844509125, "learning_rate": 1.3972011456026069e-05, "loss": 0.5083, "step": 34954 }, { "epoch": 0.7413416470488431, "grad_norm": 0.3626292049884796, "learning_rate": 1.3971705394626998e-05, "loss": 0.4671, "step": 34955 }, { "epoch": 0.741362855506776, "grad_norm": 0.3685148358345032, "learning_rate": 1.3971399328810625e-05, "loss": 0.4558, "step": 34956 }, { "epoch": 0.7413840639647091, "grad_norm": 0.41504260897636414, "learning_rate": 1.3971093258577302e-05, "loss": 0.4179, "step": 34957 }, { "epoch": 0.7414052724226422, "grad_norm": 0.34410232305526733, "learning_rate": 1.3970787183927363e-05, "loss": 0.5126, "step": 34958 }, { "epoch": 0.7414264808805752, "grad_norm": 0.36707359552383423, "learning_rate": 1.3970481104861148e-05, "loss": 0.4454, "step": 34959 }, { "epoch": 0.7414476893385082, "grad_norm": 0.3746277093887329, "learning_rate": 1.3970175021379e-05, "loss": 0.5422, "step": 34960 }, { "epoch": 0.7414688977964412, "grad_norm": 0.3961661756038666, "learning_rate": 1.3969868933481254e-05, "loss": 0.419, "step": 34961 }, { "epoch": 0.7414901062543743, "grad_norm": 0.415463387966156, "learning_rate": 1.3969562841168256e-05, "loss": 0.5852, "step": 34962 }, { "epoch": 0.7415113147123072, "grad_norm": 0.33049601316452026, "learning_rate": 1.3969256744440347e-05, "loss": 0.4787, "step": 34963 }, { "epoch": 0.7415325231702403, "grad_norm": 0.3019131124019623, "learning_rate": 1.3968950643297865e-05, "loss": 0.5484, "step": 34964 }, { "epoch": 0.7415537316281733, "grad_norm": 0.36141446232795715, "learning_rate": 1.3968644537741148e-05, "loss": 0.4768, "step": 34965 }, { "epoch": 0.7415749400861064, "grad_norm": 0.4467940330505371, "learning_rate": 1.3968338427770541e-05, "loss": 0.6296, "step": 34966 }, { "epoch": 0.7415961485440393, "grad_norm": 0.37421125173568726, "learning_rate": 1.396803231338638e-05, "loss": 0.6018, "step": 34967 }, { "epoch": 0.7416173570019724, "grad_norm": 0.37458285689353943, "learning_rate": 1.396772619458901e-05, "loss": 0.4413, "step": 34968 }, { "epoch": 0.7416385654599054, "grad_norm": 0.3700082004070282, "learning_rate": 1.3967420071378768e-05, "loss": 0.4941, "step": 34969 }, { "epoch": 0.7416597739178384, "grad_norm": 0.3579109013080597, "learning_rate": 1.3967113943755996e-05, "loss": 0.5795, "step": 34970 }, { "epoch": 0.7416809823757714, "grad_norm": 0.3453441262245178, "learning_rate": 1.3966807811721037e-05, "loss": 0.4586, "step": 34971 }, { "epoch": 0.7417021908337045, "grad_norm": 0.44207262992858887, "learning_rate": 1.3966501675274227e-05, "loss": 0.4561, "step": 34972 }, { "epoch": 0.7417233992916376, "grad_norm": 0.41004103422164917, "learning_rate": 1.3966195534415907e-05, "loss": 0.4998, "step": 34973 }, { "epoch": 0.7417446077495705, "grad_norm": 0.36677852272987366, "learning_rate": 1.396588938914642e-05, "loss": 0.4719, "step": 34974 }, { "epoch": 0.7417658162075036, "grad_norm": 0.3542494773864746, "learning_rate": 1.3965583239466105e-05, "loss": 0.5152, "step": 34975 }, { "epoch": 0.7417870246654366, "grad_norm": 0.34303945302963257, "learning_rate": 1.3965277085375304e-05, "loss": 0.5016, "step": 34976 }, { "epoch": 0.7418082331233696, "grad_norm": 0.3363052308559418, "learning_rate": 1.3964970926874355e-05, "loss": 0.4913, "step": 34977 }, { "epoch": 0.7418294415813026, "grad_norm": 0.3546839952468872, "learning_rate": 1.39646647639636e-05, "loss": 0.5097, "step": 34978 }, { "epoch": 0.7418506500392357, "grad_norm": 0.32579365372657776, "learning_rate": 1.3964358596643379e-05, "loss": 0.4173, "step": 34979 }, { "epoch": 0.7418718584971686, "grad_norm": 0.3379080295562744, "learning_rate": 1.3964052424914033e-05, "loss": 0.4311, "step": 34980 }, { "epoch": 0.7418930669551017, "grad_norm": 0.34314873814582825, "learning_rate": 1.3963746248775905e-05, "loss": 0.5209, "step": 34981 }, { "epoch": 0.7419142754130347, "grad_norm": 0.3864184617996216, "learning_rate": 1.3963440068229327e-05, "loss": 0.5575, "step": 34982 }, { "epoch": 0.7419354838709677, "grad_norm": 0.39254486560821533, "learning_rate": 1.3963133883274652e-05, "loss": 0.5877, "step": 34983 }, { "epoch": 0.7419566923289007, "grad_norm": 0.5749714970588684, "learning_rate": 1.396282769391221e-05, "loss": 0.5228, "step": 34984 }, { "epoch": 0.7419779007868338, "grad_norm": 0.46052366495132446, "learning_rate": 1.3962521500142345e-05, "loss": 0.5475, "step": 34985 }, { "epoch": 0.7419991092447669, "grad_norm": 0.35226255655288696, "learning_rate": 1.39622153019654e-05, "loss": 0.5332, "step": 34986 }, { "epoch": 0.7420203177026998, "grad_norm": 0.3740386366844177, "learning_rate": 1.3961909099381712e-05, "loss": 0.5142, "step": 34987 }, { "epoch": 0.7420415261606329, "grad_norm": 0.39354920387268066, "learning_rate": 1.3961602892391623e-05, "loss": 0.4974, "step": 34988 }, { "epoch": 0.7420627346185659, "grad_norm": 0.3479127287864685, "learning_rate": 1.3961296680995476e-05, "loss": 0.5015, "step": 34989 }, { "epoch": 0.7420839430764989, "grad_norm": 0.3653956651687622, "learning_rate": 1.3960990465193607e-05, "loss": 0.5862, "step": 34990 }, { "epoch": 0.7421051515344319, "grad_norm": 0.457631915807724, "learning_rate": 1.396068424498636e-05, "loss": 0.4649, "step": 34991 }, { "epoch": 0.742126359992365, "grad_norm": 0.34951111674308777, "learning_rate": 1.3960378020374074e-05, "loss": 0.4988, "step": 34992 }, { "epoch": 0.742147568450298, "grad_norm": 0.350449800491333, "learning_rate": 1.396007179135709e-05, "loss": 0.4518, "step": 34993 }, { "epoch": 0.742168776908231, "grad_norm": 0.33858054876327515, "learning_rate": 1.3959765557935748e-05, "loss": 0.5516, "step": 34994 }, { "epoch": 0.742189985366164, "grad_norm": 0.3730103671550751, "learning_rate": 1.3959459320110388e-05, "loss": 0.43, "step": 34995 }, { "epoch": 0.7422111938240971, "grad_norm": 0.33196064829826355, "learning_rate": 1.3959153077881356e-05, "loss": 0.4064, "step": 34996 }, { "epoch": 0.74223240228203, "grad_norm": 0.37528929114341736, "learning_rate": 1.3958846831248985e-05, "loss": 0.4788, "step": 34997 }, { "epoch": 0.7422536107399631, "grad_norm": 0.42593082785606384, "learning_rate": 1.395854058021362e-05, "loss": 0.5267, "step": 34998 }, { "epoch": 0.7422748191978962, "grad_norm": 0.3778643310070038, "learning_rate": 1.39582343247756e-05, "loss": 0.4809, "step": 34999 }, { "epoch": 0.7422960276558291, "grad_norm": 0.399030476808548, "learning_rate": 1.3957928064935267e-05, "loss": 0.3659, "step": 35000 }, { "epoch": 0.7423172361137622, "grad_norm": 0.4975418746471405, "learning_rate": 1.3957621800692958e-05, "loss": 0.484, "step": 35001 }, { "epoch": 0.7423384445716952, "grad_norm": 0.5631726384162903, "learning_rate": 1.3957315532049018e-05, "loss": 0.4585, "step": 35002 }, { "epoch": 0.7423596530296283, "grad_norm": 0.38238996267318726, "learning_rate": 1.3957009259003788e-05, "loss": 0.4755, "step": 35003 }, { "epoch": 0.7423808614875612, "grad_norm": 0.4235522449016571, "learning_rate": 1.3956702981557605e-05, "loss": 0.53, "step": 35004 }, { "epoch": 0.7424020699454943, "grad_norm": 0.33276891708374023, "learning_rate": 1.3956396699710812e-05, "loss": 0.4671, "step": 35005 }, { "epoch": 0.7424232784034273, "grad_norm": 0.32400646805763245, "learning_rate": 1.395609041346375e-05, "loss": 0.4298, "step": 35006 }, { "epoch": 0.7424444868613603, "grad_norm": 0.39050936698913574, "learning_rate": 1.3955784122816754e-05, "loss": 0.6001, "step": 35007 }, { "epoch": 0.7424656953192933, "grad_norm": 0.3623979389667511, "learning_rate": 1.3955477827770174e-05, "loss": 0.4944, "step": 35008 }, { "epoch": 0.7424869037772264, "grad_norm": 0.3257406949996948, "learning_rate": 1.3955171528324345e-05, "loss": 0.4711, "step": 35009 }, { "epoch": 0.7425081122351593, "grad_norm": 0.3850175440311432, "learning_rate": 1.3954865224479606e-05, "loss": 0.4926, "step": 35010 }, { "epoch": 0.7425293206930924, "grad_norm": 0.36570626497268677, "learning_rate": 1.3954558916236305e-05, "loss": 0.4678, "step": 35011 }, { "epoch": 0.7425505291510254, "grad_norm": 0.3696676790714264, "learning_rate": 1.3954252603594773e-05, "loss": 0.5387, "step": 35012 }, { "epoch": 0.7425717376089584, "grad_norm": 0.3819751739501953, "learning_rate": 1.395394628655536e-05, "loss": 0.5497, "step": 35013 }, { "epoch": 0.7425929460668915, "grad_norm": 0.37897008657455444, "learning_rate": 1.3953639965118398e-05, "loss": 0.543, "step": 35014 }, { "epoch": 0.7426141545248245, "grad_norm": 0.34019365906715393, "learning_rate": 1.3953333639284235e-05, "loss": 0.508, "step": 35015 }, { "epoch": 0.7426353629827576, "grad_norm": 0.3306371569633484, "learning_rate": 1.3953027309053207e-05, "loss": 0.5055, "step": 35016 }, { "epoch": 0.7426565714406905, "grad_norm": 0.4067372679710388, "learning_rate": 1.3952720974425658e-05, "loss": 0.481, "step": 35017 }, { "epoch": 0.7426777798986236, "grad_norm": 0.37469154596328735, "learning_rate": 1.3952414635401925e-05, "loss": 0.4759, "step": 35018 }, { "epoch": 0.7426989883565566, "grad_norm": 0.388776570558548, "learning_rate": 1.3952108291982354e-05, "loss": 0.4924, "step": 35019 }, { "epoch": 0.7427201968144896, "grad_norm": 0.3676849901676178, "learning_rate": 1.3951801944167277e-05, "loss": 0.4826, "step": 35020 }, { "epoch": 0.7427414052724226, "grad_norm": 0.42406728863716125, "learning_rate": 1.3951495591957046e-05, "loss": 0.4822, "step": 35021 }, { "epoch": 0.7427626137303557, "grad_norm": 0.35694214701652527, "learning_rate": 1.3951189235351992e-05, "loss": 0.4144, "step": 35022 }, { "epoch": 0.7427838221882886, "grad_norm": 0.41221797466278076, "learning_rate": 1.3950882874352463e-05, "loss": 0.5776, "step": 35023 }, { "epoch": 0.7428050306462217, "grad_norm": 0.33953657746315, "learning_rate": 1.3950576508958794e-05, "loss": 0.3975, "step": 35024 }, { "epoch": 0.7428262391041547, "grad_norm": 0.414181649684906, "learning_rate": 1.3950270139171329e-05, "loss": 0.5152, "step": 35025 }, { "epoch": 0.7428474475620878, "grad_norm": 0.3199436068534851, "learning_rate": 1.3949963764990406e-05, "loss": 0.4444, "step": 35026 }, { "epoch": 0.7428686560200208, "grad_norm": 0.39356541633605957, "learning_rate": 1.3949657386416368e-05, "loss": 0.4981, "step": 35027 }, { "epoch": 0.7428898644779538, "grad_norm": 0.36380478739738464, "learning_rate": 1.3949351003449559e-05, "loss": 0.5634, "step": 35028 }, { "epoch": 0.7429110729358869, "grad_norm": 0.37931784987449646, "learning_rate": 1.394904461609031e-05, "loss": 0.5465, "step": 35029 }, { "epoch": 0.7429322813938198, "grad_norm": 0.33529600501060486, "learning_rate": 1.3948738224338973e-05, "loss": 0.4874, "step": 35030 }, { "epoch": 0.7429534898517529, "grad_norm": 0.37537017464637756, "learning_rate": 1.3948431828195883e-05, "loss": 0.4841, "step": 35031 }, { "epoch": 0.7429746983096859, "grad_norm": 0.3373916447162628, "learning_rate": 1.3948125427661381e-05, "loss": 0.4598, "step": 35032 }, { "epoch": 0.742995906767619, "grad_norm": 0.4875537157058716, "learning_rate": 1.3947819022735807e-05, "loss": 0.4752, "step": 35033 }, { "epoch": 0.7430171152255519, "grad_norm": 0.32891154289245605, "learning_rate": 1.3947512613419505e-05, "loss": 0.4673, "step": 35034 }, { "epoch": 0.743038323683485, "grad_norm": 0.3326847553253174, "learning_rate": 1.394720619971281e-05, "loss": 0.5254, "step": 35035 }, { "epoch": 0.743059532141418, "grad_norm": 0.3741990923881531, "learning_rate": 1.394689978161607e-05, "loss": 0.5268, "step": 35036 }, { "epoch": 0.743080740599351, "grad_norm": 0.3649928867816925, "learning_rate": 1.3946593359129624e-05, "loss": 0.4389, "step": 35037 }, { "epoch": 0.743101949057284, "grad_norm": 0.37892794609069824, "learning_rate": 1.3946286932253807e-05, "loss": 0.4579, "step": 35038 }, { "epoch": 0.7431231575152171, "grad_norm": 0.32504379749298096, "learning_rate": 1.3945980500988965e-05, "loss": 0.4799, "step": 35039 }, { "epoch": 0.7431443659731501, "grad_norm": 0.3546594977378845, "learning_rate": 1.3945674065335439e-05, "loss": 0.4744, "step": 35040 }, { "epoch": 0.7431655744310831, "grad_norm": 0.3926084041595459, "learning_rate": 1.3945367625293567e-05, "loss": 0.3896, "step": 35041 }, { "epoch": 0.7431867828890162, "grad_norm": 0.49276331067085266, "learning_rate": 1.3945061180863692e-05, "loss": 0.537, "step": 35042 }, { "epoch": 0.7432079913469491, "grad_norm": 0.3552379012107849, "learning_rate": 1.3944754732046154e-05, "loss": 0.542, "step": 35043 }, { "epoch": 0.7432291998048822, "grad_norm": 0.34750646352767944, "learning_rate": 1.3944448278841296e-05, "loss": 0.552, "step": 35044 }, { "epoch": 0.7432504082628152, "grad_norm": 0.37359777092933655, "learning_rate": 1.3944141821249457e-05, "loss": 0.5419, "step": 35045 }, { "epoch": 0.7432716167207483, "grad_norm": 0.35383665561676025, "learning_rate": 1.3943835359270974e-05, "loss": 0.5823, "step": 35046 }, { "epoch": 0.7432928251786812, "grad_norm": 0.4718032479286194, "learning_rate": 1.3943528892906195e-05, "loss": 0.4391, "step": 35047 }, { "epoch": 0.7433140336366143, "grad_norm": 0.4603855609893799, "learning_rate": 1.3943222422155455e-05, "loss": 0.4852, "step": 35048 }, { "epoch": 0.7433352420945473, "grad_norm": 0.3555455505847931, "learning_rate": 1.39429159470191e-05, "loss": 0.466, "step": 35049 }, { "epoch": 0.7433564505524803, "grad_norm": 0.34146612882614136, "learning_rate": 1.3942609467497467e-05, "loss": 0.5564, "step": 35050 }, { "epoch": 0.7433776590104133, "grad_norm": 0.3529197871685028, "learning_rate": 1.3942302983590898e-05, "loss": 0.4498, "step": 35051 }, { "epoch": 0.7433988674683464, "grad_norm": 0.36677348613739014, "learning_rate": 1.394199649529973e-05, "loss": 0.5015, "step": 35052 }, { "epoch": 0.7434200759262795, "grad_norm": 0.37498706579208374, "learning_rate": 1.3941690002624313e-05, "loss": 0.5596, "step": 35053 }, { "epoch": 0.7434412843842124, "grad_norm": 0.36582857370376587, "learning_rate": 1.3941383505564981e-05, "loss": 0.5531, "step": 35054 }, { "epoch": 0.7434624928421455, "grad_norm": 0.3578279912471771, "learning_rate": 1.3941077004122077e-05, "loss": 0.4377, "step": 35055 }, { "epoch": 0.7434837013000785, "grad_norm": 0.37310972809791565, "learning_rate": 1.394077049829594e-05, "loss": 0.5359, "step": 35056 }, { "epoch": 0.7435049097580115, "grad_norm": 0.374744713306427, "learning_rate": 1.3940463988086912e-05, "loss": 0.5143, "step": 35057 }, { "epoch": 0.7435261182159445, "grad_norm": 0.37489524483680725, "learning_rate": 1.3940157473495336e-05, "loss": 0.5151, "step": 35058 }, { "epoch": 0.7435473266738776, "grad_norm": 0.3452411890029907, "learning_rate": 1.393985095452155e-05, "loss": 0.4738, "step": 35059 }, { "epoch": 0.7435685351318105, "grad_norm": 0.4294784665107727, "learning_rate": 1.3939544431165897e-05, "loss": 0.5075, "step": 35060 }, { "epoch": 0.7435897435897436, "grad_norm": 0.38591450452804565, "learning_rate": 1.3939237903428716e-05, "loss": 0.5302, "step": 35061 }, { "epoch": 0.7436109520476766, "grad_norm": 1.2687040567398071, "learning_rate": 1.393893137131035e-05, "loss": 0.469, "step": 35062 }, { "epoch": 0.7436321605056097, "grad_norm": 0.34793218970298767, "learning_rate": 1.3938624834811136e-05, "loss": 0.4745, "step": 35063 }, { "epoch": 0.7436533689635426, "grad_norm": 0.371004581451416, "learning_rate": 1.3938318293931421e-05, "loss": 0.4741, "step": 35064 }, { "epoch": 0.7436745774214757, "grad_norm": 0.4233222007751465, "learning_rate": 1.393801174867154e-05, "loss": 0.4205, "step": 35065 }, { "epoch": 0.7436957858794087, "grad_norm": 0.39235907793045044, "learning_rate": 1.3937705199031837e-05, "loss": 0.509, "step": 35066 }, { "epoch": 0.7437169943373417, "grad_norm": 10.334996223449707, "learning_rate": 1.3937398645012654e-05, "loss": 0.5683, "step": 35067 }, { "epoch": 0.7437382027952748, "grad_norm": 0.39821621775627136, "learning_rate": 1.393709208661433e-05, "loss": 0.5156, "step": 35068 }, { "epoch": 0.7437594112532078, "grad_norm": 0.3694358170032501, "learning_rate": 1.3936785523837207e-05, "loss": 0.4423, "step": 35069 }, { "epoch": 0.7437806197111408, "grad_norm": 0.3826883137226105, "learning_rate": 1.3936478956681622e-05, "loss": 0.5401, "step": 35070 }, { "epoch": 0.7438018281690738, "grad_norm": 0.4208701550960541, "learning_rate": 1.3936172385147921e-05, "loss": 0.5208, "step": 35071 }, { "epoch": 0.7438230366270069, "grad_norm": 0.4270303249359131, "learning_rate": 1.3935865809236444e-05, "loss": 0.4867, "step": 35072 }, { "epoch": 0.7438442450849398, "grad_norm": 0.3875039219856262, "learning_rate": 1.3935559228947533e-05, "loss": 0.5608, "step": 35073 }, { "epoch": 0.7438654535428729, "grad_norm": 0.3434375524520874, "learning_rate": 1.3935252644281522e-05, "loss": 0.4719, "step": 35074 }, { "epoch": 0.7438866620008059, "grad_norm": 0.32087916135787964, "learning_rate": 1.3934946055238763e-05, "loss": 0.4512, "step": 35075 }, { "epoch": 0.743907870458739, "grad_norm": 0.39116334915161133, "learning_rate": 1.3934639461819589e-05, "loss": 0.5717, "step": 35076 }, { "epoch": 0.7439290789166719, "grad_norm": 0.37174883484840393, "learning_rate": 1.3934332864024342e-05, "loss": 0.5432, "step": 35077 }, { "epoch": 0.743950287374605, "grad_norm": 0.34945955872535706, "learning_rate": 1.3934026261853365e-05, "loss": 0.4846, "step": 35078 }, { "epoch": 0.743971495832538, "grad_norm": 0.35524675250053406, "learning_rate": 1.3933719655307e-05, "loss": 0.4981, "step": 35079 }, { "epoch": 0.743992704290471, "grad_norm": 0.44279730319976807, "learning_rate": 1.3933413044385582e-05, "loss": 0.5376, "step": 35080 }, { "epoch": 0.7440139127484041, "grad_norm": 0.3930031657218933, "learning_rate": 1.3933106429089458e-05, "loss": 0.4966, "step": 35081 }, { "epoch": 0.7440351212063371, "grad_norm": 0.40435758233070374, "learning_rate": 1.3932799809418969e-05, "loss": 0.5226, "step": 35082 }, { "epoch": 0.7440563296642702, "grad_norm": 0.5637887120246887, "learning_rate": 1.3932493185374451e-05, "loss": 0.474, "step": 35083 }, { "epoch": 0.7440775381222031, "grad_norm": 0.38391757011413574, "learning_rate": 1.393218655695625e-05, "loss": 0.5041, "step": 35084 }, { "epoch": 0.7440987465801362, "grad_norm": 0.3948117792606354, "learning_rate": 1.3931879924164706e-05, "loss": 0.5979, "step": 35085 }, { "epoch": 0.7441199550380692, "grad_norm": 0.352622926235199, "learning_rate": 1.3931573287000157e-05, "loss": 0.4491, "step": 35086 }, { "epoch": 0.7441411634960022, "grad_norm": 0.4141007661819458, "learning_rate": 1.393126664546295e-05, "loss": 0.4298, "step": 35087 }, { "epoch": 0.7441623719539352, "grad_norm": 0.3301657438278198, "learning_rate": 1.3930959999553419e-05, "loss": 0.465, "step": 35088 }, { "epoch": 0.7441835804118683, "grad_norm": 0.33422014117240906, "learning_rate": 1.3930653349271912e-05, "loss": 0.4401, "step": 35089 }, { "epoch": 0.7442047888698012, "grad_norm": 0.3584706783294678, "learning_rate": 1.3930346694618766e-05, "loss": 0.5393, "step": 35090 }, { "epoch": 0.7442259973277343, "grad_norm": 0.33678507804870605, "learning_rate": 1.3930040035594318e-05, "loss": 0.4393, "step": 35091 }, { "epoch": 0.7442472057856673, "grad_norm": 0.3873836398124695, "learning_rate": 1.3929733372198917e-05, "loss": 0.4967, "step": 35092 }, { "epoch": 0.7442684142436004, "grad_norm": 0.46414995193481445, "learning_rate": 1.39294267044329e-05, "loss": 0.541, "step": 35093 }, { "epoch": 0.7442896227015334, "grad_norm": 0.3694676160812378, "learning_rate": 1.392912003229661e-05, "loss": 0.474, "step": 35094 }, { "epoch": 0.7443108311594664, "grad_norm": 0.38585710525512695, "learning_rate": 1.3928813355790384e-05, "loss": 0.5403, "step": 35095 }, { "epoch": 0.7443320396173995, "grad_norm": 0.3614960014820099, "learning_rate": 1.392850667491457e-05, "loss": 0.485, "step": 35096 }, { "epoch": 0.7443532480753324, "grad_norm": 0.3870515823364258, "learning_rate": 1.39281999896695e-05, "loss": 0.4854, "step": 35097 }, { "epoch": 0.7443744565332655, "grad_norm": 0.3619407117366791, "learning_rate": 1.3927893300055525e-05, "loss": 0.5361, "step": 35098 }, { "epoch": 0.7443956649911985, "grad_norm": 0.3867931365966797, "learning_rate": 1.392758660607298e-05, "loss": 0.5632, "step": 35099 }, { "epoch": 0.7444168734491315, "grad_norm": 0.3620566427707672, "learning_rate": 1.3927279907722205e-05, "loss": 0.5187, "step": 35100 }, { "epoch": 0.7444380819070645, "grad_norm": 0.36524859070777893, "learning_rate": 1.3926973205003544e-05, "loss": 0.5157, "step": 35101 }, { "epoch": 0.7444592903649976, "grad_norm": 0.41008585691452026, "learning_rate": 1.3926666497917337e-05, "loss": 0.4957, "step": 35102 }, { "epoch": 0.7444804988229305, "grad_norm": 0.3861030042171478, "learning_rate": 1.3926359786463927e-05, "loss": 0.4886, "step": 35103 }, { "epoch": 0.7445017072808636, "grad_norm": 0.40596479177474976, "learning_rate": 1.3926053070643654e-05, "loss": 0.4684, "step": 35104 }, { "epoch": 0.7445229157387966, "grad_norm": 0.3449234962463379, "learning_rate": 1.3925746350456862e-05, "loss": 0.4457, "step": 35105 }, { "epoch": 0.7445441241967297, "grad_norm": 0.5518572926521301, "learning_rate": 1.3925439625903882e-05, "loss": 0.4976, "step": 35106 }, { "epoch": 0.7445653326546626, "grad_norm": 0.3561382591724396, "learning_rate": 1.3925132896985068e-05, "loss": 0.4944, "step": 35107 }, { "epoch": 0.7445865411125957, "grad_norm": 0.3500763475894928, "learning_rate": 1.392482616370075e-05, "loss": 0.4534, "step": 35108 }, { "epoch": 0.7446077495705288, "grad_norm": 0.47553250193595886, "learning_rate": 1.392451942605128e-05, "loss": 0.5569, "step": 35109 }, { "epoch": 0.7446289580284617, "grad_norm": 0.3933822810649872, "learning_rate": 1.392421268403699e-05, "loss": 0.5057, "step": 35110 }, { "epoch": 0.7446501664863948, "grad_norm": 0.36798569560050964, "learning_rate": 1.3923905937658225e-05, "loss": 0.4224, "step": 35111 }, { "epoch": 0.7446713749443278, "grad_norm": 0.3657337725162506, "learning_rate": 1.3923599186915327e-05, "loss": 0.5303, "step": 35112 }, { "epoch": 0.7446925834022609, "grad_norm": 0.37497010827064514, "learning_rate": 1.3923292431808637e-05, "loss": 0.4399, "step": 35113 }, { "epoch": 0.7447137918601938, "grad_norm": 0.35712456703186035, "learning_rate": 1.3922985672338493e-05, "loss": 0.5242, "step": 35114 }, { "epoch": 0.7447350003181269, "grad_norm": 0.3696644902229309, "learning_rate": 1.392267890850524e-05, "loss": 0.5897, "step": 35115 }, { "epoch": 0.7447562087760599, "grad_norm": 0.38916683197021484, "learning_rate": 1.3922372140309218e-05, "loss": 0.5021, "step": 35116 }, { "epoch": 0.7447774172339929, "grad_norm": 0.38160431385040283, "learning_rate": 1.3922065367750763e-05, "loss": 0.5214, "step": 35117 }, { "epoch": 0.7447986256919259, "grad_norm": 0.5121830105781555, "learning_rate": 1.3921758590830225e-05, "loss": 0.4714, "step": 35118 }, { "epoch": 0.744819834149859, "grad_norm": 0.3580506145954132, "learning_rate": 1.3921451809547943e-05, "loss": 0.4936, "step": 35119 }, { "epoch": 0.7448410426077919, "grad_norm": 0.7575734853744507, "learning_rate": 1.3921145023904255e-05, "loss": 0.491, "step": 35120 }, { "epoch": 0.744862251065725, "grad_norm": 0.37955355644226074, "learning_rate": 1.3920838233899503e-05, "loss": 0.5532, "step": 35121 }, { "epoch": 0.7448834595236581, "grad_norm": 0.3415173590183258, "learning_rate": 1.3920531439534026e-05, "loss": 0.465, "step": 35122 }, { "epoch": 0.744904667981591, "grad_norm": 0.7343942523002625, "learning_rate": 1.3920224640808172e-05, "loss": 0.5669, "step": 35123 }, { "epoch": 0.7449258764395241, "grad_norm": 0.381229043006897, "learning_rate": 1.3919917837722277e-05, "loss": 0.4978, "step": 35124 }, { "epoch": 0.7449470848974571, "grad_norm": 0.48054367303848267, "learning_rate": 1.3919611030276681e-05, "loss": 0.4941, "step": 35125 }, { "epoch": 0.7449682933553902, "grad_norm": 0.3732556402683258, "learning_rate": 1.3919304218471732e-05, "loss": 0.429, "step": 35126 }, { "epoch": 0.7449895018133231, "grad_norm": 0.3074715733528137, "learning_rate": 1.3918997402307765e-05, "loss": 0.4908, "step": 35127 }, { "epoch": 0.7450107102712562, "grad_norm": 0.3897840678691864, "learning_rate": 1.391869058178512e-05, "loss": 0.5016, "step": 35128 }, { "epoch": 0.7450319187291892, "grad_norm": 0.39581260085105896, "learning_rate": 1.3918383756904146e-05, "loss": 0.5559, "step": 35129 }, { "epoch": 0.7450531271871222, "grad_norm": 0.5190843939781189, "learning_rate": 1.3918076927665179e-05, "loss": 0.5451, "step": 35130 }, { "epoch": 0.7450743356450552, "grad_norm": 0.3918907344341278, "learning_rate": 1.3917770094068557e-05, "loss": 0.5492, "step": 35131 }, { "epoch": 0.7450955441029883, "grad_norm": 0.40531080961227417, "learning_rate": 1.3917463256114629e-05, "loss": 0.5688, "step": 35132 }, { "epoch": 0.7451167525609212, "grad_norm": 0.35413575172424316, "learning_rate": 1.3917156413803734e-05, "loss": 0.5275, "step": 35133 }, { "epoch": 0.7451379610188543, "grad_norm": 0.3632827699184418, "learning_rate": 1.3916849567136207e-05, "loss": 0.469, "step": 35134 }, { "epoch": 0.7451591694767874, "grad_norm": 0.48630598187446594, "learning_rate": 1.3916542716112395e-05, "loss": 0.5726, "step": 35135 }, { "epoch": 0.7451803779347204, "grad_norm": 0.33103933930397034, "learning_rate": 1.3916235860732641e-05, "loss": 0.4468, "step": 35136 }, { "epoch": 0.7452015863926534, "grad_norm": 0.3382328450679779, "learning_rate": 1.3915929000997279e-05, "loss": 0.4635, "step": 35137 }, { "epoch": 0.7452227948505864, "grad_norm": 0.29857176542282104, "learning_rate": 1.391562213690666e-05, "loss": 0.4218, "step": 35138 }, { "epoch": 0.7452440033085195, "grad_norm": 0.392448753118515, "learning_rate": 1.3915315268461116e-05, "loss": 0.5011, "step": 35139 }, { "epoch": 0.7452652117664524, "grad_norm": 0.3898560106754303, "learning_rate": 1.3915008395660994e-05, "loss": 0.5035, "step": 35140 }, { "epoch": 0.7452864202243855, "grad_norm": 0.34330910444259644, "learning_rate": 1.3914701518506635e-05, "loss": 0.4983, "step": 35141 }, { "epoch": 0.7453076286823185, "grad_norm": 0.38538771867752075, "learning_rate": 1.3914394636998374e-05, "loss": 0.5702, "step": 35142 }, { "epoch": 0.7453288371402516, "grad_norm": 0.3414422273635864, "learning_rate": 1.3914087751136562e-05, "loss": 0.3705, "step": 35143 }, { "epoch": 0.7453500455981845, "grad_norm": 0.3322327733039856, "learning_rate": 1.3913780860921536e-05, "loss": 0.443, "step": 35144 }, { "epoch": 0.7453712540561176, "grad_norm": 0.42829906940460205, "learning_rate": 1.3913473966353634e-05, "loss": 0.5242, "step": 35145 }, { "epoch": 0.7453924625140506, "grad_norm": 0.4198961555957794, "learning_rate": 1.3913167067433202e-05, "loss": 0.5343, "step": 35146 }, { "epoch": 0.7454136709719836, "grad_norm": 0.4023022949695587, "learning_rate": 1.391286016416058e-05, "loss": 0.5141, "step": 35147 }, { "epoch": 0.7454348794299166, "grad_norm": 0.3771210312843323, "learning_rate": 1.3912553256536106e-05, "loss": 0.5432, "step": 35148 }, { "epoch": 0.7454560878878497, "grad_norm": 0.3757251501083374, "learning_rate": 1.3912246344560128e-05, "loss": 0.5839, "step": 35149 }, { "epoch": 0.7454772963457827, "grad_norm": 0.4349721074104309, "learning_rate": 1.3911939428232982e-05, "loss": 0.5921, "step": 35150 }, { "epoch": 0.7454985048037157, "grad_norm": 0.3856896162033081, "learning_rate": 1.391163250755501e-05, "loss": 0.4839, "step": 35151 }, { "epoch": 0.7455197132616488, "grad_norm": 0.33839115500450134, "learning_rate": 1.3911325582526555e-05, "loss": 0.417, "step": 35152 }, { "epoch": 0.7455409217195818, "grad_norm": 0.34396713972091675, "learning_rate": 1.3911018653147958e-05, "loss": 0.55, "step": 35153 }, { "epoch": 0.7455621301775148, "grad_norm": 0.38182172179222107, "learning_rate": 1.391071171941956e-05, "loss": 0.5446, "step": 35154 }, { "epoch": 0.7455833386354478, "grad_norm": 0.3859192728996277, "learning_rate": 1.3910404781341703e-05, "loss": 0.4837, "step": 35155 }, { "epoch": 0.7456045470933809, "grad_norm": 0.37166085839271545, "learning_rate": 1.3910097838914725e-05, "loss": 0.445, "step": 35156 }, { "epoch": 0.7456257555513138, "grad_norm": 0.36455368995666504, "learning_rate": 1.3909790892138972e-05, "loss": 0.5153, "step": 35157 }, { "epoch": 0.7456469640092469, "grad_norm": 0.32470211386680603, "learning_rate": 1.3909483941014785e-05, "loss": 0.4261, "step": 35158 }, { "epoch": 0.7456681724671799, "grad_norm": 0.354833722114563, "learning_rate": 1.3909176985542502e-05, "loss": 0.5456, "step": 35159 }, { "epoch": 0.7456893809251129, "grad_norm": 0.3467792570590973, "learning_rate": 1.3908870025722467e-05, "loss": 0.5176, "step": 35160 }, { "epoch": 0.7457105893830459, "grad_norm": 0.3822358250617981, "learning_rate": 1.390856306155502e-05, "loss": 0.5069, "step": 35161 }, { "epoch": 0.745731797840979, "grad_norm": 0.49795833230018616, "learning_rate": 1.3908256093040504e-05, "loss": 0.4563, "step": 35162 }, { "epoch": 0.7457530062989121, "grad_norm": 0.3342623710632324, "learning_rate": 1.390794912017926e-05, "loss": 0.4756, "step": 35163 }, { "epoch": 0.745774214756845, "grad_norm": 0.3620379865169525, "learning_rate": 1.3907642142971628e-05, "loss": 0.4784, "step": 35164 }, { "epoch": 0.7457954232147781, "grad_norm": 0.3485216796398163, "learning_rate": 1.3907335161417951e-05, "loss": 0.5059, "step": 35165 }, { "epoch": 0.7458166316727111, "grad_norm": 0.3702715337276459, "learning_rate": 1.3907028175518572e-05, "loss": 0.4781, "step": 35166 }, { "epoch": 0.7458378401306441, "grad_norm": 0.4178546369075775, "learning_rate": 1.3906721185273827e-05, "loss": 0.563, "step": 35167 }, { "epoch": 0.7458590485885771, "grad_norm": 0.37755563855171204, "learning_rate": 1.3906414190684058e-05, "loss": 0.4987, "step": 35168 }, { "epoch": 0.7458802570465102, "grad_norm": 0.3541509807109833, "learning_rate": 1.3906107191749614e-05, "loss": 0.5168, "step": 35169 }, { "epoch": 0.7459014655044431, "grad_norm": 0.3579816520214081, "learning_rate": 1.3905800188470828e-05, "loss": 0.5103, "step": 35170 }, { "epoch": 0.7459226739623762, "grad_norm": 0.3643799126148224, "learning_rate": 1.3905493180848048e-05, "loss": 0.5529, "step": 35171 }, { "epoch": 0.7459438824203092, "grad_norm": 0.3206057846546173, "learning_rate": 1.3905186168881612e-05, "loss": 0.531, "step": 35172 }, { "epoch": 0.7459650908782423, "grad_norm": 0.40030813217163086, "learning_rate": 1.390487915257186e-05, "loss": 0.4942, "step": 35173 }, { "epoch": 0.7459862993361752, "grad_norm": 0.44237422943115234, "learning_rate": 1.3904572131919136e-05, "loss": 0.4495, "step": 35174 }, { "epoch": 0.7460075077941083, "grad_norm": 0.37784719467163086, "learning_rate": 1.3904265106923782e-05, "loss": 0.4885, "step": 35175 }, { "epoch": 0.7460287162520414, "grad_norm": 0.3641698360443115, "learning_rate": 1.3903958077586137e-05, "loss": 0.4381, "step": 35176 }, { "epoch": 0.7460499247099743, "grad_norm": 0.44202983379364014, "learning_rate": 1.3903651043906545e-05, "loss": 0.5947, "step": 35177 }, { "epoch": 0.7460711331679074, "grad_norm": 0.36012428998947144, "learning_rate": 1.3903344005885346e-05, "loss": 0.5933, "step": 35178 }, { "epoch": 0.7460923416258404, "grad_norm": 0.5159271359443665, "learning_rate": 1.390303696352288e-05, "loss": 0.5184, "step": 35179 }, { "epoch": 0.7461135500837734, "grad_norm": 0.3948126435279846, "learning_rate": 1.3902729916819491e-05, "loss": 0.536, "step": 35180 }, { "epoch": 0.7461347585417064, "grad_norm": 0.361925333738327, "learning_rate": 1.390242286577552e-05, "loss": 0.4117, "step": 35181 }, { "epoch": 0.7461559669996395, "grad_norm": 0.33719301223754883, "learning_rate": 1.3902115810391308e-05, "loss": 0.4514, "step": 35182 }, { "epoch": 0.7461771754575725, "grad_norm": 0.6199622750282288, "learning_rate": 1.3901808750667198e-05, "loss": 0.4734, "step": 35183 }, { "epoch": 0.7461983839155055, "grad_norm": 0.3146817684173584, "learning_rate": 1.3901501686603526e-05, "loss": 0.4686, "step": 35184 }, { "epoch": 0.7462195923734385, "grad_norm": 0.762837290763855, "learning_rate": 1.3901194618200644e-05, "loss": 0.5113, "step": 35185 }, { "epoch": 0.7462408008313716, "grad_norm": 0.32243791222572327, "learning_rate": 1.3900887545458886e-05, "loss": 0.3849, "step": 35186 }, { "epoch": 0.7462620092893045, "grad_norm": 0.4080852270126343, "learning_rate": 1.390058046837859e-05, "loss": 0.4444, "step": 35187 }, { "epoch": 0.7462832177472376, "grad_norm": 0.38824591040611267, "learning_rate": 1.3900273386960103e-05, "loss": 0.5493, "step": 35188 }, { "epoch": 0.7463044262051706, "grad_norm": 0.3736759424209595, "learning_rate": 1.3899966301203769e-05, "loss": 0.4802, "step": 35189 }, { "epoch": 0.7463256346631036, "grad_norm": 0.39639559388160706, "learning_rate": 1.3899659211109926e-05, "loss": 0.5499, "step": 35190 }, { "epoch": 0.7463468431210367, "grad_norm": 0.37087202072143555, "learning_rate": 1.3899352116678917e-05, "loss": 0.5155, "step": 35191 }, { "epoch": 0.7463680515789697, "grad_norm": 0.40624210238456726, "learning_rate": 1.389904501791108e-05, "loss": 0.6121, "step": 35192 }, { "epoch": 0.7463892600369028, "grad_norm": 0.32260647416114807, "learning_rate": 1.3898737914806756e-05, "loss": 0.4697, "step": 35193 }, { "epoch": 0.7464104684948357, "grad_norm": 0.3869878947734833, "learning_rate": 1.3898430807366294e-05, "loss": 0.501, "step": 35194 }, { "epoch": 0.7464316769527688, "grad_norm": 0.3352501690387726, "learning_rate": 1.389812369559003e-05, "loss": 0.4526, "step": 35195 }, { "epoch": 0.7464528854107018, "grad_norm": 0.4523007869720459, "learning_rate": 1.3897816579478307e-05, "loss": 0.519, "step": 35196 }, { "epoch": 0.7464740938686348, "grad_norm": 0.6174303293228149, "learning_rate": 1.3897509459031469e-05, "loss": 0.4781, "step": 35197 }, { "epoch": 0.7464953023265678, "grad_norm": 0.33021748065948486, "learning_rate": 1.389720233424985e-05, "loss": 0.4632, "step": 35198 }, { "epoch": 0.7465165107845009, "grad_norm": 0.34325140714645386, "learning_rate": 1.3896895205133801e-05, "loss": 0.4547, "step": 35199 }, { "epoch": 0.7465377192424338, "grad_norm": 0.35752636194229126, "learning_rate": 1.3896588071683654e-05, "loss": 0.488, "step": 35200 }, { "epoch": 0.7465589277003669, "grad_norm": 0.374489426612854, "learning_rate": 1.389628093389976e-05, "loss": 0.4877, "step": 35201 }, { "epoch": 0.7465801361582999, "grad_norm": 0.31757718324661255, "learning_rate": 1.3895973791782453e-05, "loss": 0.4133, "step": 35202 }, { "epoch": 0.746601344616233, "grad_norm": 0.40280503034591675, "learning_rate": 1.389566664533208e-05, "loss": 0.5002, "step": 35203 }, { "epoch": 0.746622553074166, "grad_norm": 0.3207535743713379, "learning_rate": 1.3895359494548979e-05, "loss": 0.3898, "step": 35204 }, { "epoch": 0.746643761532099, "grad_norm": 0.33437949419021606, "learning_rate": 1.3895052339433494e-05, "loss": 0.4939, "step": 35205 }, { "epoch": 0.7466649699900321, "grad_norm": 0.364955335855484, "learning_rate": 1.3894745179985964e-05, "loss": 0.5321, "step": 35206 }, { "epoch": 0.746686178447965, "grad_norm": 0.3468170464038849, "learning_rate": 1.3894438016206733e-05, "loss": 0.4508, "step": 35207 }, { "epoch": 0.7467073869058981, "grad_norm": 0.3963801860809326, "learning_rate": 1.3894130848096143e-05, "loss": 0.5011, "step": 35208 }, { "epoch": 0.7467285953638311, "grad_norm": 0.3294752240180969, "learning_rate": 1.3893823675654535e-05, "loss": 0.4989, "step": 35209 }, { "epoch": 0.7467498038217641, "grad_norm": 0.4563034772872925, "learning_rate": 1.3893516498882247e-05, "loss": 0.5479, "step": 35210 }, { "epoch": 0.7467710122796971, "grad_norm": 0.36614295840263367, "learning_rate": 1.3893209317779629e-05, "loss": 0.5833, "step": 35211 }, { "epoch": 0.7467922207376302, "grad_norm": 0.402760773897171, "learning_rate": 1.389290213234701e-05, "loss": 0.4421, "step": 35212 }, { "epoch": 0.7468134291955631, "grad_norm": 0.3474763035774231, "learning_rate": 1.3892594942584744e-05, "loss": 0.4817, "step": 35213 }, { "epoch": 0.7468346376534962, "grad_norm": 0.5134021639823914, "learning_rate": 1.3892287748493168e-05, "loss": 0.443, "step": 35214 }, { "epoch": 0.7468558461114292, "grad_norm": 0.40374433994293213, "learning_rate": 1.3891980550072624e-05, "loss": 0.527, "step": 35215 }, { "epoch": 0.7468770545693623, "grad_norm": 0.613211452960968, "learning_rate": 1.3891673347323448e-05, "loss": 0.5204, "step": 35216 }, { "epoch": 0.7468982630272953, "grad_norm": 0.4641599655151367, "learning_rate": 1.3891366140245992e-05, "loss": 0.4518, "step": 35217 }, { "epoch": 0.7469194714852283, "grad_norm": 0.35880953073501587, "learning_rate": 1.3891058928840589e-05, "loss": 0.4785, "step": 35218 }, { "epoch": 0.7469406799431614, "grad_norm": 0.35496070981025696, "learning_rate": 1.3890751713107584e-05, "loss": 0.5279, "step": 35219 }, { "epoch": 0.7469618884010943, "grad_norm": 0.4137406349182129, "learning_rate": 1.3890444493047322e-05, "loss": 0.5887, "step": 35220 }, { "epoch": 0.7469830968590274, "grad_norm": 0.40451958775520325, "learning_rate": 1.3890137268660137e-05, "loss": 0.5672, "step": 35221 }, { "epoch": 0.7470043053169604, "grad_norm": 0.3723790645599365, "learning_rate": 1.388983003994638e-05, "loss": 0.4736, "step": 35222 }, { "epoch": 0.7470255137748935, "grad_norm": 0.3836022913455963, "learning_rate": 1.3889522806906386e-05, "loss": 0.4627, "step": 35223 }, { "epoch": 0.7470467222328264, "grad_norm": 0.38234373927116394, "learning_rate": 1.3889215569540497e-05, "loss": 0.503, "step": 35224 }, { "epoch": 0.7470679306907595, "grad_norm": 0.4091949164867401, "learning_rate": 1.3888908327849056e-05, "loss": 0.4454, "step": 35225 }, { "epoch": 0.7470891391486925, "grad_norm": 0.37688976526260376, "learning_rate": 1.388860108183241e-05, "loss": 0.4455, "step": 35226 }, { "epoch": 0.7471103476066255, "grad_norm": 0.36631762981414795, "learning_rate": 1.3888293831490888e-05, "loss": 0.509, "step": 35227 }, { "epoch": 0.7471315560645585, "grad_norm": 0.34707099199295044, "learning_rate": 1.3887986576824846e-05, "loss": 0.453, "step": 35228 }, { "epoch": 0.7471527645224916, "grad_norm": 0.3348081111907959, "learning_rate": 1.3887679317834617e-05, "loss": 0.4897, "step": 35229 }, { "epoch": 0.7471739729804245, "grad_norm": 0.34432944655418396, "learning_rate": 1.3887372054520543e-05, "loss": 0.5357, "step": 35230 }, { "epoch": 0.7471951814383576, "grad_norm": 0.38008129596710205, "learning_rate": 1.3887064786882969e-05, "loss": 0.5661, "step": 35231 }, { "epoch": 0.7472163898962907, "grad_norm": 0.3615801930427551, "learning_rate": 1.3886757514922233e-05, "loss": 0.6119, "step": 35232 }, { "epoch": 0.7472375983542237, "grad_norm": 0.34706124663352966, "learning_rate": 1.3886450238638682e-05, "loss": 0.5271, "step": 35233 }, { "epoch": 0.7472588068121567, "grad_norm": 0.4312940537929535, "learning_rate": 1.3886142958032655e-05, "loss": 0.516, "step": 35234 }, { "epoch": 0.7472800152700897, "grad_norm": 0.3301319479942322, "learning_rate": 1.388583567310449e-05, "loss": 0.4447, "step": 35235 }, { "epoch": 0.7473012237280228, "grad_norm": 0.3509875535964966, "learning_rate": 1.3885528383854536e-05, "loss": 0.5271, "step": 35236 }, { "epoch": 0.7473224321859557, "grad_norm": 0.3581100106239319, "learning_rate": 1.3885221090283132e-05, "loss": 0.4556, "step": 35237 }, { "epoch": 0.7473436406438888, "grad_norm": 0.3613569140434265, "learning_rate": 1.3884913792390613e-05, "loss": 0.5948, "step": 35238 }, { "epoch": 0.7473648491018218, "grad_norm": 0.32694438099861145, "learning_rate": 1.3884606490177332e-05, "loss": 0.4758, "step": 35239 }, { "epoch": 0.7473860575597548, "grad_norm": 3.7604243755340576, "learning_rate": 1.3884299183643623e-05, "loss": 0.4069, "step": 35240 }, { "epoch": 0.7474072660176878, "grad_norm": 0.38560664653778076, "learning_rate": 1.3883991872789833e-05, "loss": 0.5389, "step": 35241 }, { "epoch": 0.7474284744756209, "grad_norm": 0.3297480344772339, "learning_rate": 1.3883684557616299e-05, "loss": 0.4144, "step": 35242 }, { "epoch": 0.7474496829335538, "grad_norm": 0.3534907102584839, "learning_rate": 1.3883377238123366e-05, "loss": 0.5026, "step": 35243 }, { "epoch": 0.7474708913914869, "grad_norm": 0.34989622235298157, "learning_rate": 1.388306991431137e-05, "loss": 0.4186, "step": 35244 }, { "epoch": 0.74749209984942, "grad_norm": 0.3930860757827759, "learning_rate": 1.3882762586180662e-05, "loss": 0.526, "step": 35245 }, { "epoch": 0.747513308307353, "grad_norm": 0.3972775340080261, "learning_rate": 1.3882455253731579e-05, "loss": 0.5224, "step": 35246 }, { "epoch": 0.747534516765286, "grad_norm": 0.34461867809295654, "learning_rate": 1.388214791696446e-05, "loss": 0.4165, "step": 35247 }, { "epoch": 0.747555725223219, "grad_norm": 0.34619665145874023, "learning_rate": 1.3881840575879653e-05, "loss": 0.5344, "step": 35248 }, { "epoch": 0.7475769336811521, "grad_norm": 0.3528672158718109, "learning_rate": 1.3881533230477495e-05, "loss": 0.5459, "step": 35249 }, { "epoch": 0.747598142139085, "grad_norm": 0.35170623660087585, "learning_rate": 1.3881225880758332e-05, "loss": 0.4197, "step": 35250 }, { "epoch": 0.7476193505970181, "grad_norm": 0.3385973870754242, "learning_rate": 1.3880918526722497e-05, "loss": 0.4613, "step": 35251 }, { "epoch": 0.7476405590549511, "grad_norm": 0.42115309834480286, "learning_rate": 1.3880611168370344e-05, "loss": 0.6751, "step": 35252 }, { "epoch": 0.7476617675128842, "grad_norm": 0.37116438150405884, "learning_rate": 1.3880303805702206e-05, "loss": 0.3751, "step": 35253 }, { "epoch": 0.7476829759708171, "grad_norm": 0.3706701695919037, "learning_rate": 1.387999643871843e-05, "loss": 0.4546, "step": 35254 }, { "epoch": 0.7477041844287502, "grad_norm": 0.502082347869873, "learning_rate": 1.3879689067419356e-05, "loss": 0.5079, "step": 35255 }, { "epoch": 0.7477253928866832, "grad_norm": 0.43003183603286743, "learning_rate": 1.3879381691805324e-05, "loss": 0.5164, "step": 35256 }, { "epoch": 0.7477466013446162, "grad_norm": 0.3936046361923218, "learning_rate": 1.3879074311876677e-05, "loss": 0.5611, "step": 35257 }, { "epoch": 0.7477678098025493, "grad_norm": 0.3522292673587799, "learning_rate": 1.3878766927633755e-05, "loss": 0.502, "step": 35258 }, { "epoch": 0.7477890182604823, "grad_norm": 0.35183587670326233, "learning_rate": 1.3878459539076906e-05, "loss": 0.4677, "step": 35259 }, { "epoch": 0.7478102267184153, "grad_norm": 0.3446827232837677, "learning_rate": 1.387815214620647e-05, "loss": 0.4732, "step": 35260 }, { "epoch": 0.7478314351763483, "grad_norm": 0.6682929992675781, "learning_rate": 1.3877844749022782e-05, "loss": 0.55, "step": 35261 }, { "epoch": 0.7478526436342814, "grad_norm": 0.35289785265922546, "learning_rate": 1.387753734752619e-05, "loss": 0.4637, "step": 35262 }, { "epoch": 0.7478738520922144, "grad_norm": 0.3155863583087921, "learning_rate": 1.3877229941717034e-05, "loss": 0.452, "step": 35263 }, { "epoch": 0.7478950605501474, "grad_norm": 0.35101962089538574, "learning_rate": 1.3876922531595658e-05, "loss": 0.4927, "step": 35264 }, { "epoch": 0.7479162690080804, "grad_norm": 0.3885616064071655, "learning_rate": 1.3876615117162402e-05, "loss": 0.4536, "step": 35265 }, { "epoch": 0.7479374774660135, "grad_norm": 0.36502155661582947, "learning_rate": 1.3876307698417609e-05, "loss": 0.5095, "step": 35266 }, { "epoch": 0.7479586859239464, "grad_norm": 0.3925788402557373, "learning_rate": 1.3876000275361621e-05, "loss": 0.5169, "step": 35267 }, { "epoch": 0.7479798943818795, "grad_norm": 0.37456342577934265, "learning_rate": 1.387569284799478e-05, "loss": 0.469, "step": 35268 }, { "epoch": 0.7480011028398125, "grad_norm": 0.3460007607936859, "learning_rate": 1.3875385416317422e-05, "loss": 0.4997, "step": 35269 }, { "epoch": 0.7480223112977455, "grad_norm": 0.3739606738090515, "learning_rate": 1.3875077980329896e-05, "loss": 0.4647, "step": 35270 }, { "epoch": 0.7480435197556785, "grad_norm": 0.45061540603637695, "learning_rate": 1.3874770540032545e-05, "loss": 0.5254, "step": 35271 }, { "epoch": 0.7480647282136116, "grad_norm": 0.33336755633354187, "learning_rate": 1.3874463095425705e-05, "loss": 0.4839, "step": 35272 }, { "epoch": 0.7480859366715447, "grad_norm": 0.3645511567592621, "learning_rate": 1.3874155646509722e-05, "loss": 0.3998, "step": 35273 }, { "epoch": 0.7481071451294776, "grad_norm": 0.3632062077522278, "learning_rate": 1.3873848193284938e-05, "loss": 0.455, "step": 35274 }, { "epoch": 0.7481283535874107, "grad_norm": 0.36809423565864563, "learning_rate": 1.387354073575169e-05, "loss": 0.4952, "step": 35275 }, { "epoch": 0.7481495620453437, "grad_norm": 0.46961697936058044, "learning_rate": 1.3873233273910328e-05, "loss": 0.4269, "step": 35276 }, { "epoch": 0.7481707705032767, "grad_norm": 0.42507192492485046, "learning_rate": 1.3872925807761187e-05, "loss": 0.4924, "step": 35277 }, { "epoch": 0.7481919789612097, "grad_norm": 0.3838112950325012, "learning_rate": 1.3872618337304609e-05, "loss": 0.4845, "step": 35278 }, { "epoch": 0.7482131874191428, "grad_norm": 0.38522839546203613, "learning_rate": 1.3872310862540943e-05, "loss": 0.4861, "step": 35279 }, { "epoch": 0.7482343958770757, "grad_norm": 0.34715452790260315, "learning_rate": 1.3872003383470523e-05, "loss": 0.4564, "step": 35280 }, { "epoch": 0.7482556043350088, "grad_norm": 0.4188947081565857, "learning_rate": 1.3871695900093698e-05, "loss": 0.5559, "step": 35281 }, { "epoch": 0.7482768127929418, "grad_norm": 0.3744046688079834, "learning_rate": 1.3871388412410807e-05, "loss": 0.5401, "step": 35282 }, { "epoch": 0.7482980212508749, "grad_norm": 0.3183549642562866, "learning_rate": 1.3871080920422186e-05, "loss": 0.4682, "step": 35283 }, { "epoch": 0.7483192297088078, "grad_norm": 0.3288305103778839, "learning_rate": 1.3870773424128186e-05, "loss": 0.4716, "step": 35284 }, { "epoch": 0.7483404381667409, "grad_norm": 0.36124229431152344, "learning_rate": 1.3870465923529146e-05, "loss": 0.539, "step": 35285 }, { "epoch": 0.748361646624674, "grad_norm": 0.3602582812309265, "learning_rate": 1.3870158418625405e-05, "loss": 0.4878, "step": 35286 }, { "epoch": 0.7483828550826069, "grad_norm": 0.3541339933872223, "learning_rate": 1.386985090941731e-05, "loss": 0.4961, "step": 35287 }, { "epoch": 0.74840406354054, "grad_norm": 0.3726169466972351, "learning_rate": 1.3869543395905201e-05, "loss": 0.5416, "step": 35288 }, { "epoch": 0.748425271998473, "grad_norm": 0.43464943766593933, "learning_rate": 1.3869235878089415e-05, "loss": 0.5469, "step": 35289 }, { "epoch": 0.748446480456406, "grad_norm": 0.36409756541252136, "learning_rate": 1.38689283559703e-05, "loss": 0.4586, "step": 35290 }, { "epoch": 0.748467688914339, "grad_norm": 0.7981342077255249, "learning_rate": 1.3868620829548201e-05, "loss": 0.514, "step": 35291 }, { "epoch": 0.7484888973722721, "grad_norm": 0.37107717990875244, "learning_rate": 1.386831329882345e-05, "loss": 0.486, "step": 35292 }, { "epoch": 0.748510105830205, "grad_norm": 0.3343164026737213, "learning_rate": 1.3868005763796399e-05, "loss": 0.4279, "step": 35293 }, { "epoch": 0.7485313142881381, "grad_norm": 0.34672605991363525, "learning_rate": 1.3867698224467382e-05, "loss": 0.5194, "step": 35294 }, { "epoch": 0.7485525227460711, "grad_norm": 0.4193006753921509, "learning_rate": 1.3867390680836748e-05, "loss": 0.506, "step": 35295 }, { "epoch": 0.7485737312040042, "grad_norm": 0.4129004180431366, "learning_rate": 1.3867083132904833e-05, "loss": 0.5517, "step": 35296 }, { "epoch": 0.7485949396619371, "grad_norm": 0.36850279569625854, "learning_rate": 1.3866775580671982e-05, "loss": 0.493, "step": 35297 }, { "epoch": 0.7486161481198702, "grad_norm": 0.3891385495662689, "learning_rate": 1.3866468024138538e-05, "loss": 0.4673, "step": 35298 }, { "epoch": 0.7486373565778033, "grad_norm": 0.9772437810897827, "learning_rate": 1.386616046330484e-05, "loss": 0.5962, "step": 35299 }, { "epoch": 0.7486585650357362, "grad_norm": 0.3183872401714325, "learning_rate": 1.3865852898171232e-05, "loss": 0.4867, "step": 35300 }, { "epoch": 0.7486797734936693, "grad_norm": 0.3617621958255768, "learning_rate": 1.3865545328738059e-05, "loss": 0.518, "step": 35301 }, { "epoch": 0.7487009819516023, "grad_norm": 0.3391402065753937, "learning_rate": 1.3865237755005659e-05, "loss": 0.4842, "step": 35302 }, { "epoch": 0.7487221904095354, "grad_norm": 0.3749304413795471, "learning_rate": 1.3864930176974372e-05, "loss": 0.55, "step": 35303 }, { "epoch": 0.7487433988674683, "grad_norm": 0.3125457465648651, "learning_rate": 1.3864622594644545e-05, "loss": 0.4245, "step": 35304 }, { "epoch": 0.7487646073254014, "grad_norm": 0.3420906662940979, "learning_rate": 1.3864315008016523e-05, "loss": 0.4117, "step": 35305 }, { "epoch": 0.7487858157833344, "grad_norm": 0.3476547598838806, "learning_rate": 1.3864007417090637e-05, "loss": 0.4325, "step": 35306 }, { "epoch": 0.7488070242412674, "grad_norm": 0.3544531762599945, "learning_rate": 1.3863699821867237e-05, "loss": 0.5732, "step": 35307 }, { "epoch": 0.7488282326992004, "grad_norm": 0.40875712037086487, "learning_rate": 1.3863392222346664e-05, "loss": 0.521, "step": 35308 }, { "epoch": 0.7488494411571335, "grad_norm": 0.3524901568889618, "learning_rate": 1.386308461852926e-05, "loss": 0.4404, "step": 35309 }, { "epoch": 0.7488706496150664, "grad_norm": 0.3901533782482147, "learning_rate": 1.3862777010415367e-05, "loss": 0.551, "step": 35310 }, { "epoch": 0.7488918580729995, "grad_norm": 0.37857121229171753, "learning_rate": 1.3862469398005329e-05, "loss": 0.6035, "step": 35311 }, { "epoch": 0.7489130665309325, "grad_norm": 0.36240386962890625, "learning_rate": 1.3862161781299483e-05, "loss": 0.5548, "step": 35312 }, { "epoch": 0.7489342749888656, "grad_norm": 0.33484646677970886, "learning_rate": 1.3861854160298176e-05, "loss": 0.4193, "step": 35313 }, { "epoch": 0.7489554834467986, "grad_norm": 0.3324196934700012, "learning_rate": 1.3861546535001746e-05, "loss": 0.401, "step": 35314 }, { "epoch": 0.7489766919047316, "grad_norm": 0.34687158465385437, "learning_rate": 1.3861238905410539e-05, "loss": 0.4745, "step": 35315 }, { "epoch": 0.7489979003626647, "grad_norm": 0.7833259701728821, "learning_rate": 1.3860931271524896e-05, "loss": 0.5375, "step": 35316 }, { "epoch": 0.7490191088205976, "grad_norm": 0.3770945072174072, "learning_rate": 1.3860623633345158e-05, "loss": 0.5177, "step": 35317 }, { "epoch": 0.7490403172785307, "grad_norm": 0.38649943470954895, "learning_rate": 1.3860315990871669e-05, "loss": 0.4285, "step": 35318 }, { "epoch": 0.7490615257364637, "grad_norm": 0.3403710424900055, "learning_rate": 1.386000834410477e-05, "loss": 0.4494, "step": 35319 }, { "epoch": 0.7490827341943967, "grad_norm": 0.3996237814426422, "learning_rate": 1.3859700693044802e-05, "loss": 0.5072, "step": 35320 }, { "epoch": 0.7491039426523297, "grad_norm": 0.3573148548603058, "learning_rate": 1.3859393037692108e-05, "loss": 0.5265, "step": 35321 }, { "epoch": 0.7491251511102628, "grad_norm": 0.39062803983688354, "learning_rate": 1.3859085378047033e-05, "loss": 0.5493, "step": 35322 }, { "epoch": 0.7491463595681958, "grad_norm": 0.3460042178630829, "learning_rate": 1.3858777714109914e-05, "loss": 0.4858, "step": 35323 }, { "epoch": 0.7491675680261288, "grad_norm": 0.36305922269821167, "learning_rate": 1.38584700458811e-05, "loss": 0.433, "step": 35324 }, { "epoch": 0.7491887764840618, "grad_norm": 0.3812880516052246, "learning_rate": 1.3858162373360929e-05, "loss": 0.5411, "step": 35325 }, { "epoch": 0.7492099849419949, "grad_norm": 0.37892869114875793, "learning_rate": 1.3857854696549737e-05, "loss": 0.5055, "step": 35326 }, { "epoch": 0.7492311933999279, "grad_norm": 0.4008323550224304, "learning_rate": 1.3857547015447878e-05, "loss": 0.5928, "step": 35327 }, { "epoch": 0.7492524018578609, "grad_norm": 0.3714306056499481, "learning_rate": 1.3857239330055687e-05, "loss": 0.5349, "step": 35328 }, { "epoch": 0.749273610315794, "grad_norm": 0.4123426675796509, "learning_rate": 1.3856931640373507e-05, "loss": 0.4648, "step": 35329 }, { "epoch": 0.749294818773727, "grad_norm": 0.3556625545024872, "learning_rate": 1.3856623946401684e-05, "loss": 0.4913, "step": 35330 }, { "epoch": 0.74931602723166, "grad_norm": 0.35877999663352966, "learning_rate": 1.3856316248140554e-05, "loss": 0.5029, "step": 35331 }, { "epoch": 0.749337235689593, "grad_norm": 0.5724379420280457, "learning_rate": 1.3856008545590465e-05, "loss": 0.4037, "step": 35332 }, { "epoch": 0.7493584441475261, "grad_norm": 0.3600965738296509, "learning_rate": 1.3855700838751757e-05, "loss": 0.5361, "step": 35333 }, { "epoch": 0.749379652605459, "grad_norm": 0.3539259135723114, "learning_rate": 1.3855393127624767e-05, "loss": 0.529, "step": 35334 }, { "epoch": 0.7494008610633921, "grad_norm": 0.3720151484012604, "learning_rate": 1.3855085412209847e-05, "loss": 0.4924, "step": 35335 }, { "epoch": 0.7494220695213251, "grad_norm": 0.49033817648887634, "learning_rate": 1.3854777692507334e-05, "loss": 0.5607, "step": 35336 }, { "epoch": 0.7494432779792581, "grad_norm": 0.32314223051071167, "learning_rate": 1.3854469968517571e-05, "loss": 0.4722, "step": 35337 }, { "epoch": 0.7494644864371911, "grad_norm": 0.3594953119754791, "learning_rate": 1.38541622402409e-05, "loss": 0.4885, "step": 35338 }, { "epoch": 0.7494856948951242, "grad_norm": 0.35253235697746277, "learning_rate": 1.3853854507677664e-05, "loss": 0.4596, "step": 35339 }, { "epoch": 0.7495069033530573, "grad_norm": 0.5921490788459778, "learning_rate": 1.3853546770828203e-05, "loss": 0.4313, "step": 35340 }, { "epoch": 0.7495281118109902, "grad_norm": 0.36944225430488586, "learning_rate": 1.385323902969286e-05, "loss": 0.4816, "step": 35341 }, { "epoch": 0.7495493202689233, "grad_norm": 0.37363749742507935, "learning_rate": 1.385293128427198e-05, "loss": 0.5003, "step": 35342 }, { "epoch": 0.7495705287268563, "grad_norm": 0.3480132222175598, "learning_rate": 1.3852623534565901e-05, "loss": 0.4872, "step": 35343 }, { "epoch": 0.7495917371847893, "grad_norm": 0.36974629759788513, "learning_rate": 1.385231578057497e-05, "loss": 0.5761, "step": 35344 }, { "epoch": 0.7496129456427223, "grad_norm": 0.3224252462387085, "learning_rate": 1.3852008022299525e-05, "loss": 0.4754, "step": 35345 }, { "epoch": 0.7496341541006554, "grad_norm": 0.3495519161224365, "learning_rate": 1.385170025973991e-05, "loss": 0.4355, "step": 35346 }, { "epoch": 0.7496553625585883, "grad_norm": 0.4136042296886444, "learning_rate": 1.3851392492896469e-05, "loss": 0.4869, "step": 35347 }, { "epoch": 0.7496765710165214, "grad_norm": 0.34539228677749634, "learning_rate": 1.385108472176954e-05, "loss": 0.4801, "step": 35348 }, { "epoch": 0.7496977794744544, "grad_norm": 0.3001537322998047, "learning_rate": 1.3850776946359472e-05, "loss": 0.4353, "step": 35349 }, { "epoch": 0.7497189879323874, "grad_norm": 0.38693767786026, "learning_rate": 1.3850469166666603e-05, "loss": 0.4748, "step": 35350 }, { "epoch": 0.7497401963903204, "grad_norm": 0.3612171411514282, "learning_rate": 1.3850161382691271e-05, "loss": 0.5144, "step": 35351 }, { "epoch": 0.7497614048482535, "grad_norm": 0.36585599184036255, "learning_rate": 1.3849853594433828e-05, "loss": 0.4327, "step": 35352 }, { "epoch": 0.7497826133061865, "grad_norm": 0.3703692555427551, "learning_rate": 1.3849545801894609e-05, "loss": 0.4769, "step": 35353 }, { "epoch": 0.7498038217641195, "grad_norm": 0.37877705693244934, "learning_rate": 1.3849238005073958e-05, "loss": 0.5072, "step": 35354 }, { "epoch": 0.7498250302220526, "grad_norm": 0.36875665187835693, "learning_rate": 1.3848930203972218e-05, "loss": 0.4547, "step": 35355 }, { "epoch": 0.7498462386799856, "grad_norm": 0.5055211186408997, "learning_rate": 1.3848622398589732e-05, "loss": 0.4946, "step": 35356 }, { "epoch": 0.7498674471379186, "grad_norm": 0.3687591254711151, "learning_rate": 1.384831458892684e-05, "loss": 0.412, "step": 35357 }, { "epoch": 0.7498886555958516, "grad_norm": 0.40722256898880005, "learning_rate": 1.3848006774983891e-05, "loss": 0.4824, "step": 35358 }, { "epoch": 0.7499098640537847, "grad_norm": 0.4657824635505676, "learning_rate": 1.3847698956761218e-05, "loss": 0.5041, "step": 35359 }, { "epoch": 0.7499310725117176, "grad_norm": 0.3719736933708191, "learning_rate": 1.3847391134259167e-05, "loss": 0.602, "step": 35360 }, { "epoch": 0.7499522809696507, "grad_norm": 0.3213033080101013, "learning_rate": 1.3847083307478085e-05, "loss": 0.4649, "step": 35361 }, { "epoch": 0.7499734894275837, "grad_norm": 0.36945804953575134, "learning_rate": 1.3846775476418305e-05, "loss": 0.477, "step": 35362 }, { "epoch": 0.7499946978855168, "grad_norm": 0.3524702787399292, "learning_rate": 1.384646764108018e-05, "loss": 0.4676, "step": 35363 }, { "epoch": 0.7500159063434497, "grad_norm": 0.33290743827819824, "learning_rate": 1.3846159801464046e-05, "loss": 0.4575, "step": 35364 }, { "epoch": 0.7500371148013828, "grad_norm": 0.40020132064819336, "learning_rate": 1.3845851957570244e-05, "loss": 0.4906, "step": 35365 }, { "epoch": 0.7500583232593158, "grad_norm": 0.37351638078689575, "learning_rate": 1.3845544109399118e-05, "loss": 0.4319, "step": 35366 }, { "epoch": 0.7500795317172488, "grad_norm": 0.3596031367778778, "learning_rate": 1.3845236256951013e-05, "loss": 0.467, "step": 35367 }, { "epoch": 0.7501007401751819, "grad_norm": 0.3803996741771698, "learning_rate": 1.384492840022627e-05, "loss": 0.5243, "step": 35368 }, { "epoch": 0.7501219486331149, "grad_norm": 0.3882741630077362, "learning_rate": 1.3844620539225232e-05, "loss": 0.5086, "step": 35369 }, { "epoch": 0.750143157091048, "grad_norm": 0.37065428495407104, "learning_rate": 1.3844312673948241e-05, "loss": 0.5308, "step": 35370 }, { "epoch": 0.7501643655489809, "grad_norm": 0.4111865758895874, "learning_rate": 1.3844004804395637e-05, "loss": 0.5394, "step": 35371 }, { "epoch": 0.750185574006914, "grad_norm": 0.35229673981666565, "learning_rate": 1.3843696930567765e-05, "loss": 0.4655, "step": 35372 }, { "epoch": 0.750206782464847, "grad_norm": 0.31978464126586914, "learning_rate": 1.3843389052464963e-05, "loss": 0.4875, "step": 35373 }, { "epoch": 0.75022799092278, "grad_norm": 0.32579800486564636, "learning_rate": 1.3843081170087583e-05, "loss": 0.4048, "step": 35374 }, { "epoch": 0.750249199380713, "grad_norm": 0.4176226854324341, "learning_rate": 1.3842773283435957e-05, "loss": 0.479, "step": 35375 }, { "epoch": 0.7502704078386461, "grad_norm": 0.36106958985328674, "learning_rate": 1.3842465392510434e-05, "loss": 0.5336, "step": 35376 }, { "epoch": 0.750291616296579, "grad_norm": 0.32364436984062195, "learning_rate": 1.3842157497311355e-05, "loss": 0.4763, "step": 35377 }, { "epoch": 0.7503128247545121, "grad_norm": 0.39901307225227356, "learning_rate": 1.384184959783906e-05, "loss": 0.4643, "step": 35378 }, { "epoch": 0.7503340332124451, "grad_norm": 0.34020644426345825, "learning_rate": 1.3841541694093895e-05, "loss": 0.4955, "step": 35379 }, { "epoch": 0.7503552416703781, "grad_norm": 0.38426437973976135, "learning_rate": 1.3841233786076198e-05, "loss": 0.5342, "step": 35380 }, { "epoch": 0.7503764501283112, "grad_norm": 0.352744996547699, "learning_rate": 1.3840925873786317e-05, "loss": 0.548, "step": 35381 }, { "epoch": 0.7503976585862442, "grad_norm": 0.3917267620563507, "learning_rate": 1.3840617957224589e-05, "loss": 0.4092, "step": 35382 }, { "epoch": 0.7504188670441773, "grad_norm": 0.35589924454689026, "learning_rate": 1.3840310036391362e-05, "loss": 0.5039, "step": 35383 }, { "epoch": 0.7504400755021102, "grad_norm": 0.6085072159767151, "learning_rate": 1.3840002111286977e-05, "loss": 0.3868, "step": 35384 }, { "epoch": 0.7504612839600433, "grad_norm": 0.5064948201179504, "learning_rate": 1.383969418191177e-05, "loss": 0.4529, "step": 35385 }, { "epoch": 0.7504824924179763, "grad_norm": 0.37180742621421814, "learning_rate": 1.3839386248266089e-05, "loss": 0.5213, "step": 35386 }, { "epoch": 0.7505037008759093, "grad_norm": 0.408193975687027, "learning_rate": 1.3839078310350277e-05, "loss": 0.4912, "step": 35387 }, { "epoch": 0.7505249093338423, "grad_norm": 0.34778404235839844, "learning_rate": 1.3838770368164678e-05, "loss": 0.4208, "step": 35388 }, { "epoch": 0.7505461177917754, "grad_norm": 0.38528040051460266, "learning_rate": 1.3838462421709631e-05, "loss": 0.4907, "step": 35389 }, { "epoch": 0.7505673262497083, "grad_norm": 0.3625968396663666, "learning_rate": 1.3838154470985477e-05, "loss": 0.5238, "step": 35390 }, { "epoch": 0.7505885347076414, "grad_norm": 0.46447697281837463, "learning_rate": 1.3837846515992564e-05, "loss": 0.4707, "step": 35391 }, { "epoch": 0.7506097431655744, "grad_norm": 0.4896736741065979, "learning_rate": 1.3837538556731227e-05, "loss": 0.4987, "step": 35392 }, { "epoch": 0.7506309516235075, "grad_norm": 0.3545701801776886, "learning_rate": 1.3837230593201817e-05, "loss": 0.496, "step": 35393 }, { "epoch": 0.7506521600814405, "grad_norm": 0.32459887862205505, "learning_rate": 1.3836922625404672e-05, "loss": 0.4252, "step": 35394 }, { "epoch": 0.7506733685393735, "grad_norm": 0.3276880383491516, "learning_rate": 1.3836614653340135e-05, "loss": 0.4888, "step": 35395 }, { "epoch": 0.7506945769973066, "grad_norm": 0.39089131355285645, "learning_rate": 1.3836306677008547e-05, "loss": 0.5412, "step": 35396 }, { "epoch": 0.7507157854552395, "grad_norm": 0.36122140288352966, "learning_rate": 1.3835998696410252e-05, "loss": 0.5185, "step": 35397 }, { "epoch": 0.7507369939131726, "grad_norm": 0.4141555428504944, "learning_rate": 1.3835690711545594e-05, "loss": 0.5536, "step": 35398 }, { "epoch": 0.7507582023711056, "grad_norm": 0.33743488788604736, "learning_rate": 1.3835382722414911e-05, "loss": 0.5064, "step": 35399 }, { "epoch": 0.7507794108290387, "grad_norm": 0.35599836707115173, "learning_rate": 1.3835074729018553e-05, "loss": 0.4772, "step": 35400 }, { "epoch": 0.7508006192869716, "grad_norm": 0.3828333914279938, "learning_rate": 1.3834766731356858e-05, "loss": 0.4588, "step": 35401 }, { "epoch": 0.7508218277449047, "grad_norm": 0.33875131607055664, "learning_rate": 1.3834458729430167e-05, "loss": 0.473, "step": 35402 }, { "epoch": 0.7508430362028377, "grad_norm": 0.39487212896347046, "learning_rate": 1.3834150723238823e-05, "loss": 0.5091, "step": 35403 }, { "epoch": 0.7508642446607707, "grad_norm": 0.3424469232559204, "learning_rate": 1.3833842712783171e-05, "loss": 0.5744, "step": 35404 }, { "epoch": 0.7508854531187037, "grad_norm": 0.3313791751861572, "learning_rate": 1.3833534698063554e-05, "loss": 0.5002, "step": 35405 }, { "epoch": 0.7509066615766368, "grad_norm": 0.38492798805236816, "learning_rate": 1.3833226679080311e-05, "loss": 0.4967, "step": 35406 }, { "epoch": 0.7509278700345697, "grad_norm": 0.32061728835105896, "learning_rate": 1.383291865583379e-05, "loss": 0.4719, "step": 35407 }, { "epoch": 0.7509490784925028, "grad_norm": 0.39635854959487915, "learning_rate": 1.3832610628324325e-05, "loss": 0.5001, "step": 35408 }, { "epoch": 0.7509702869504359, "grad_norm": 0.3795933127403259, "learning_rate": 1.3832302596552267e-05, "loss": 0.5088, "step": 35409 }, { "epoch": 0.7509914954083688, "grad_norm": 0.4097418487071991, "learning_rate": 1.3831994560517953e-05, "loss": 0.4825, "step": 35410 }, { "epoch": 0.7510127038663019, "grad_norm": 0.34698978066444397, "learning_rate": 1.383168652022173e-05, "loss": 0.4354, "step": 35411 }, { "epoch": 0.7510339123242349, "grad_norm": 0.3507001996040344, "learning_rate": 1.383137847566394e-05, "loss": 0.4712, "step": 35412 }, { "epoch": 0.751055120782168, "grad_norm": 0.3550935387611389, "learning_rate": 1.3831070426844918e-05, "loss": 0.4523, "step": 35413 }, { "epoch": 0.7510763292401009, "grad_norm": 0.4103112816810608, "learning_rate": 1.383076237376502e-05, "loss": 0.5206, "step": 35414 }, { "epoch": 0.751097537698034, "grad_norm": 0.38361403346061707, "learning_rate": 1.383045431642458e-05, "loss": 0.4121, "step": 35415 }, { "epoch": 0.751118746155967, "grad_norm": 0.36244818568229675, "learning_rate": 1.3830146254823937e-05, "loss": 0.4435, "step": 35416 }, { "epoch": 0.7511399546139, "grad_norm": 0.3794757127761841, "learning_rate": 1.3829838188963445e-05, "loss": 0.5397, "step": 35417 }, { "epoch": 0.751161163071833, "grad_norm": 0.35451993346214294, "learning_rate": 1.3829530118843434e-05, "loss": 0.4932, "step": 35418 }, { "epoch": 0.7511823715297661, "grad_norm": 0.3762790560722351, "learning_rate": 1.3829222044464256e-05, "loss": 0.4491, "step": 35419 }, { "epoch": 0.751203579987699, "grad_norm": 0.344186931848526, "learning_rate": 1.3828913965826251e-05, "loss": 0.5155, "step": 35420 }, { "epoch": 0.7512247884456321, "grad_norm": 0.32365643978118896, "learning_rate": 1.3828605882929762e-05, "loss": 0.4337, "step": 35421 }, { "epoch": 0.7512459969035652, "grad_norm": 0.3751038610935211, "learning_rate": 1.3828297795775129e-05, "loss": 0.5152, "step": 35422 }, { "epoch": 0.7512672053614982, "grad_norm": 0.3965093195438385, "learning_rate": 1.3827989704362699e-05, "loss": 0.5686, "step": 35423 }, { "epoch": 0.7512884138194312, "grad_norm": 0.3751049339771271, "learning_rate": 1.3827681608692809e-05, "loss": 0.5048, "step": 35424 }, { "epoch": 0.7513096222773642, "grad_norm": 0.35622528195381165, "learning_rate": 1.3827373508765805e-05, "loss": 0.4544, "step": 35425 }, { "epoch": 0.7513308307352973, "grad_norm": 0.38339680433273315, "learning_rate": 1.3827065404582029e-05, "loss": 0.4792, "step": 35426 }, { "epoch": 0.7513520391932302, "grad_norm": 0.3592066466808319, "learning_rate": 1.3826757296141826e-05, "loss": 0.5061, "step": 35427 }, { "epoch": 0.7513732476511633, "grad_norm": 0.3656657040119171, "learning_rate": 1.3826449183445538e-05, "loss": 0.4902, "step": 35428 }, { "epoch": 0.7513944561090963, "grad_norm": 1.0787088871002197, "learning_rate": 1.3826141066493506e-05, "loss": 0.5003, "step": 35429 }, { "epoch": 0.7514156645670294, "grad_norm": 0.34903350472450256, "learning_rate": 1.3825832945286069e-05, "loss": 0.5557, "step": 35430 }, { "epoch": 0.7514368730249623, "grad_norm": 0.3357844352722168, "learning_rate": 1.3825524819823576e-05, "loss": 0.4754, "step": 35431 }, { "epoch": 0.7514580814828954, "grad_norm": 0.4176185131072998, "learning_rate": 1.3825216690106368e-05, "loss": 0.4515, "step": 35432 }, { "epoch": 0.7514792899408284, "grad_norm": 0.3434167504310608, "learning_rate": 1.3824908556134788e-05, "loss": 0.5196, "step": 35433 }, { "epoch": 0.7515004983987614, "grad_norm": 0.35776636004447937, "learning_rate": 1.3824600417909178e-05, "loss": 0.4599, "step": 35434 }, { "epoch": 0.7515217068566945, "grad_norm": 0.388426810503006, "learning_rate": 1.382429227542988e-05, "loss": 0.4809, "step": 35435 }, { "epoch": 0.7515429153146275, "grad_norm": 0.3686278164386749, "learning_rate": 1.3823984128697237e-05, "loss": 0.5674, "step": 35436 }, { "epoch": 0.7515641237725605, "grad_norm": 0.4274235963821411, "learning_rate": 1.3823675977711592e-05, "loss": 0.4814, "step": 35437 }, { "epoch": 0.7515853322304935, "grad_norm": 0.3689921796321869, "learning_rate": 1.3823367822473288e-05, "loss": 0.3989, "step": 35438 }, { "epoch": 0.7516065406884266, "grad_norm": 0.3295125663280487, "learning_rate": 1.3823059662982667e-05, "loss": 0.5442, "step": 35439 }, { "epoch": 0.7516277491463595, "grad_norm": 0.33251920342445374, "learning_rate": 1.3822751499240074e-05, "loss": 0.4717, "step": 35440 }, { "epoch": 0.7516489576042926, "grad_norm": 0.35347980260849, "learning_rate": 1.3822443331245847e-05, "loss": 0.548, "step": 35441 }, { "epoch": 0.7516701660622256, "grad_norm": 0.36778968572616577, "learning_rate": 1.3822135159000336e-05, "loss": 0.4824, "step": 35442 }, { "epoch": 0.7516913745201587, "grad_norm": 0.6114968061447144, "learning_rate": 1.3821826982503876e-05, "loss": 0.404, "step": 35443 }, { "epoch": 0.7517125829780916, "grad_norm": 0.40552645921707153, "learning_rate": 1.3821518801756813e-05, "loss": 0.5187, "step": 35444 }, { "epoch": 0.7517337914360247, "grad_norm": 0.3762286901473999, "learning_rate": 1.3821210616759493e-05, "loss": 0.5068, "step": 35445 }, { "epoch": 0.7517549998939577, "grad_norm": 0.4598654508590698, "learning_rate": 1.3820902427512256e-05, "loss": 0.4622, "step": 35446 }, { "epoch": 0.7517762083518907, "grad_norm": 0.38279467821121216, "learning_rate": 1.382059423401544e-05, "loss": 0.4803, "step": 35447 }, { "epoch": 0.7517974168098237, "grad_norm": 0.38596367835998535, "learning_rate": 1.3820286036269397e-05, "loss": 0.5064, "step": 35448 }, { "epoch": 0.7518186252677568, "grad_norm": 0.3934788703918457, "learning_rate": 1.3819977834274463e-05, "loss": 0.5295, "step": 35449 }, { "epoch": 0.7518398337256899, "grad_norm": 0.3616545796394348, "learning_rate": 1.381966962803098e-05, "loss": 0.5155, "step": 35450 }, { "epoch": 0.7518610421836228, "grad_norm": 0.4919523298740387, "learning_rate": 1.3819361417539296e-05, "loss": 0.5587, "step": 35451 }, { "epoch": 0.7518822506415559, "grad_norm": 0.41905325651168823, "learning_rate": 1.3819053202799753e-05, "loss": 0.4302, "step": 35452 }, { "epoch": 0.7519034590994889, "grad_norm": 0.35973402857780457, "learning_rate": 1.381874498381269e-05, "loss": 0.4955, "step": 35453 }, { "epoch": 0.7519246675574219, "grad_norm": 0.35145944356918335, "learning_rate": 1.3818436760578456e-05, "loss": 0.5116, "step": 35454 }, { "epoch": 0.7519458760153549, "grad_norm": 0.37636929750442505, "learning_rate": 1.3818128533097386e-05, "loss": 0.5162, "step": 35455 }, { "epoch": 0.751967084473288, "grad_norm": 0.40005943179130554, "learning_rate": 1.3817820301369825e-05, "loss": 0.4772, "step": 35456 }, { "epoch": 0.7519882929312209, "grad_norm": 0.37897729873657227, "learning_rate": 1.3817512065396121e-05, "loss": 0.4493, "step": 35457 }, { "epoch": 0.752009501389154, "grad_norm": 0.3130808174610138, "learning_rate": 1.3817203825176611e-05, "loss": 0.4699, "step": 35458 }, { "epoch": 0.752030709847087, "grad_norm": 0.35140886902809143, "learning_rate": 1.3816895580711643e-05, "loss": 0.4939, "step": 35459 }, { "epoch": 0.75205191830502, "grad_norm": 0.3703022599220276, "learning_rate": 1.3816587332001557e-05, "loss": 0.4316, "step": 35460 }, { "epoch": 0.752073126762953, "grad_norm": 0.3673821687698364, "learning_rate": 1.3816279079046691e-05, "loss": 0.5087, "step": 35461 }, { "epoch": 0.7520943352208861, "grad_norm": 0.3701944649219513, "learning_rate": 1.3815970821847397e-05, "loss": 0.4971, "step": 35462 }, { "epoch": 0.7521155436788192, "grad_norm": 0.3653307557106018, "learning_rate": 1.3815662560404011e-05, "loss": 0.5162, "step": 35463 }, { "epoch": 0.7521367521367521, "grad_norm": 0.3516823947429657, "learning_rate": 1.3815354294716876e-05, "loss": 0.5313, "step": 35464 }, { "epoch": 0.7521579605946852, "grad_norm": 0.4219591021537781, "learning_rate": 1.3815046024786342e-05, "loss": 0.4356, "step": 35465 }, { "epoch": 0.7521791690526182, "grad_norm": 0.35172900557518005, "learning_rate": 1.3814737750612745e-05, "loss": 0.4727, "step": 35466 }, { "epoch": 0.7522003775105512, "grad_norm": 0.33256709575653076, "learning_rate": 1.3814429472196432e-05, "loss": 0.4747, "step": 35467 }, { "epoch": 0.7522215859684842, "grad_norm": 0.34882989525794983, "learning_rate": 1.3814121189537742e-05, "loss": 0.4544, "step": 35468 }, { "epoch": 0.7522427944264173, "grad_norm": 0.34806662797927856, "learning_rate": 1.3813812902637016e-05, "loss": 0.4178, "step": 35469 }, { "epoch": 0.7522640028843502, "grad_norm": 0.3670608103275299, "learning_rate": 1.3813504611494604e-05, "loss": 0.4855, "step": 35470 }, { "epoch": 0.7522852113422833, "grad_norm": 0.507713794708252, "learning_rate": 1.3813196316110843e-05, "loss": 0.498, "step": 35471 }, { "epoch": 0.7523064198002163, "grad_norm": 0.3425883650779724, "learning_rate": 1.3812888016486082e-05, "loss": 0.4933, "step": 35472 }, { "epoch": 0.7523276282581494, "grad_norm": 0.33236682415008545, "learning_rate": 1.3812579712620658e-05, "loss": 0.4584, "step": 35473 }, { "epoch": 0.7523488367160823, "grad_norm": 0.35587430000305176, "learning_rate": 1.3812271404514916e-05, "loss": 0.4372, "step": 35474 }, { "epoch": 0.7523700451740154, "grad_norm": 0.3298661708831787, "learning_rate": 1.3811963092169199e-05, "loss": 0.441, "step": 35475 }, { "epoch": 0.7523912536319485, "grad_norm": 0.47656992077827454, "learning_rate": 1.3811654775583847e-05, "loss": 0.6077, "step": 35476 }, { "epoch": 0.7524124620898814, "grad_norm": 0.36336299777030945, "learning_rate": 1.3811346454759211e-05, "loss": 0.4337, "step": 35477 }, { "epoch": 0.7524336705478145, "grad_norm": 0.3443472683429718, "learning_rate": 1.3811038129695624e-05, "loss": 0.4555, "step": 35478 }, { "epoch": 0.7524548790057475, "grad_norm": 0.3829694986343384, "learning_rate": 1.3810729800393437e-05, "loss": 0.5868, "step": 35479 }, { "epoch": 0.7524760874636806, "grad_norm": 0.36676278710365295, "learning_rate": 1.381042146685299e-05, "loss": 0.5742, "step": 35480 }, { "epoch": 0.7524972959216135, "grad_norm": 0.3588390052318573, "learning_rate": 1.381011312907462e-05, "loss": 0.5265, "step": 35481 }, { "epoch": 0.7525185043795466, "grad_norm": 0.3368280827999115, "learning_rate": 1.3809804787058678e-05, "loss": 0.4734, "step": 35482 }, { "epoch": 0.7525397128374796, "grad_norm": 0.42093905806541443, "learning_rate": 1.3809496440805505e-05, "loss": 0.5195, "step": 35483 }, { "epoch": 0.7525609212954126, "grad_norm": 0.35052353143692017, "learning_rate": 1.3809188090315444e-05, "loss": 0.5718, "step": 35484 }, { "epoch": 0.7525821297533456, "grad_norm": 0.40040215849876404, "learning_rate": 1.3808879735588836e-05, "loss": 0.5135, "step": 35485 }, { "epoch": 0.7526033382112787, "grad_norm": 0.3660010099411011, "learning_rate": 1.3808571376626023e-05, "loss": 0.4795, "step": 35486 }, { "epoch": 0.7526245466692116, "grad_norm": 0.36521434783935547, "learning_rate": 1.3808263013427354e-05, "loss": 0.5086, "step": 35487 }, { "epoch": 0.7526457551271447, "grad_norm": 0.3607030510902405, "learning_rate": 1.3807954645993165e-05, "loss": 0.5346, "step": 35488 }, { "epoch": 0.7526669635850777, "grad_norm": 0.334828644990921, "learning_rate": 1.3807646274323803e-05, "loss": 0.5271, "step": 35489 }, { "epoch": 0.7526881720430108, "grad_norm": 1.0933531522750854, "learning_rate": 1.3807337898419609e-05, "loss": 0.5734, "step": 35490 }, { "epoch": 0.7527093805009438, "grad_norm": 0.35189729928970337, "learning_rate": 1.3807029518280928e-05, "loss": 0.4608, "step": 35491 }, { "epoch": 0.7527305889588768, "grad_norm": 0.4209493398666382, "learning_rate": 1.3806721133908101e-05, "loss": 0.4702, "step": 35492 }, { "epoch": 0.7527517974168099, "grad_norm": 0.37348324060440063, "learning_rate": 1.3806412745301475e-05, "loss": 0.5487, "step": 35493 }, { "epoch": 0.7527730058747428, "grad_norm": 0.3655551075935364, "learning_rate": 1.3806104352461388e-05, "loss": 0.4986, "step": 35494 }, { "epoch": 0.7527942143326759, "grad_norm": 0.7176699042320251, "learning_rate": 1.3805795955388179e-05, "loss": 0.471, "step": 35495 }, { "epoch": 0.7528154227906089, "grad_norm": 0.3988417983055115, "learning_rate": 1.3805487554082203e-05, "loss": 0.475, "step": 35496 }, { "epoch": 0.7528366312485419, "grad_norm": 0.3926446735858917, "learning_rate": 1.3805179148543797e-05, "loss": 0.4728, "step": 35497 }, { "epoch": 0.7528578397064749, "grad_norm": 0.39419013261795044, "learning_rate": 1.3804870738773302e-05, "loss": 0.4471, "step": 35498 }, { "epoch": 0.752879048164408, "grad_norm": 0.40130969882011414, "learning_rate": 1.3804562324771063e-05, "loss": 0.5832, "step": 35499 }, { "epoch": 0.752900256622341, "grad_norm": 0.3599711060523987, "learning_rate": 1.380425390653742e-05, "loss": 0.5123, "step": 35500 }, { "epoch": 0.752921465080274, "grad_norm": 0.3906344473361969, "learning_rate": 1.3803945484072722e-05, "loss": 0.5299, "step": 35501 }, { "epoch": 0.752942673538207, "grad_norm": 0.3782561421394348, "learning_rate": 1.3803637057377311e-05, "loss": 0.4932, "step": 35502 }, { "epoch": 0.7529638819961401, "grad_norm": 0.4209597408771515, "learning_rate": 1.3803328626451524e-05, "loss": 0.5521, "step": 35503 }, { "epoch": 0.7529850904540731, "grad_norm": 0.40910863876342773, "learning_rate": 1.380302019129571e-05, "loss": 0.4433, "step": 35504 }, { "epoch": 0.7530062989120061, "grad_norm": 0.39517444372177124, "learning_rate": 1.380271175191021e-05, "loss": 0.3899, "step": 35505 }, { "epoch": 0.7530275073699392, "grad_norm": 0.38856613636016846, "learning_rate": 1.3802403308295365e-05, "loss": 0.5172, "step": 35506 }, { "epoch": 0.7530487158278721, "grad_norm": 0.3578849732875824, "learning_rate": 1.3802094860451521e-05, "loss": 0.531, "step": 35507 }, { "epoch": 0.7530699242858052, "grad_norm": 0.3498615324497223, "learning_rate": 1.3801786408379023e-05, "loss": 0.486, "step": 35508 }, { "epoch": 0.7530911327437382, "grad_norm": 0.49096840620040894, "learning_rate": 1.3801477952078206e-05, "loss": 0.5587, "step": 35509 }, { "epoch": 0.7531123412016713, "grad_norm": 0.3565394878387451, "learning_rate": 1.3801169491549424e-05, "loss": 0.4244, "step": 35510 }, { "epoch": 0.7531335496596042, "grad_norm": 0.41003215312957764, "learning_rate": 1.3800861026793011e-05, "loss": 0.5158, "step": 35511 }, { "epoch": 0.7531547581175373, "grad_norm": 0.3496242165565491, "learning_rate": 1.3800552557809312e-05, "loss": 0.4625, "step": 35512 }, { "epoch": 0.7531759665754703, "grad_norm": 0.3158007264137268, "learning_rate": 1.3800244084598675e-05, "loss": 0.5182, "step": 35513 }, { "epoch": 0.7531971750334033, "grad_norm": 0.40028834342956543, "learning_rate": 1.3799935607161437e-05, "loss": 0.3682, "step": 35514 }, { "epoch": 0.7532183834913363, "grad_norm": 0.33432212471961975, "learning_rate": 1.3799627125497943e-05, "loss": 0.5285, "step": 35515 }, { "epoch": 0.7532395919492694, "grad_norm": 0.3406277894973755, "learning_rate": 1.379931863960854e-05, "loss": 0.4796, "step": 35516 }, { "epoch": 0.7532608004072024, "grad_norm": 0.33974525332450867, "learning_rate": 1.3799010149493568e-05, "loss": 0.5795, "step": 35517 }, { "epoch": 0.7532820088651354, "grad_norm": 0.3517063856124878, "learning_rate": 1.3798701655153366e-05, "loss": 0.5173, "step": 35518 }, { "epoch": 0.7533032173230685, "grad_norm": 0.35206928849220276, "learning_rate": 1.3798393156588283e-05, "loss": 0.4552, "step": 35519 }, { "epoch": 0.7533244257810014, "grad_norm": 0.38797199726104736, "learning_rate": 1.379808465379866e-05, "loss": 0.5089, "step": 35520 }, { "epoch": 0.7533456342389345, "grad_norm": 0.3593449294567108, "learning_rate": 1.3797776146784841e-05, "loss": 0.4967, "step": 35521 }, { "epoch": 0.7533668426968675, "grad_norm": 0.32593628764152527, "learning_rate": 1.3797467635547166e-05, "loss": 0.4346, "step": 35522 }, { "epoch": 0.7533880511548006, "grad_norm": 0.3764182925224304, "learning_rate": 1.379715912008598e-05, "loss": 0.5176, "step": 35523 }, { "epoch": 0.7534092596127335, "grad_norm": 0.3692891299724579, "learning_rate": 1.3796850600401632e-05, "loss": 0.4214, "step": 35524 }, { "epoch": 0.7534304680706666, "grad_norm": 0.3946950137615204, "learning_rate": 1.3796542076494457e-05, "loss": 0.5056, "step": 35525 }, { "epoch": 0.7534516765285996, "grad_norm": 0.3561626970767975, "learning_rate": 1.3796233548364799e-05, "loss": 0.4016, "step": 35526 }, { "epoch": 0.7534728849865326, "grad_norm": 0.3633468449115753, "learning_rate": 1.3795925016013003e-05, "loss": 0.5063, "step": 35527 }, { "epoch": 0.7534940934444656, "grad_norm": 0.35458728671073914, "learning_rate": 1.3795616479439416e-05, "loss": 0.5012, "step": 35528 }, { "epoch": 0.7535153019023987, "grad_norm": 0.3476210832595825, "learning_rate": 1.3795307938644374e-05, "loss": 0.4556, "step": 35529 }, { "epoch": 0.7535365103603316, "grad_norm": 0.3676840662956238, "learning_rate": 1.3794999393628223e-05, "loss": 0.4803, "step": 35530 }, { "epoch": 0.7535577188182647, "grad_norm": 0.3881908655166626, "learning_rate": 1.3794690844391312e-05, "loss": 0.4608, "step": 35531 }, { "epoch": 0.7535789272761978, "grad_norm": 0.41798681020736694, "learning_rate": 1.3794382290933973e-05, "loss": 0.5375, "step": 35532 }, { "epoch": 0.7536001357341308, "grad_norm": 0.38127580285072327, "learning_rate": 1.3794073733256555e-05, "loss": 0.5203, "step": 35533 }, { "epoch": 0.7536213441920638, "grad_norm": 0.361963152885437, "learning_rate": 1.3793765171359406e-05, "loss": 0.4479, "step": 35534 }, { "epoch": 0.7536425526499968, "grad_norm": 0.3692281246185303, "learning_rate": 1.3793456605242858e-05, "loss": 0.4208, "step": 35535 }, { "epoch": 0.7536637611079299, "grad_norm": 0.3942428231239319, "learning_rate": 1.3793148034907264e-05, "loss": 0.3884, "step": 35536 }, { "epoch": 0.7536849695658628, "grad_norm": 0.4252549409866333, "learning_rate": 1.3792839460352965e-05, "loss": 0.4973, "step": 35537 }, { "epoch": 0.7537061780237959, "grad_norm": 0.40346911549568176, "learning_rate": 1.3792530881580302e-05, "loss": 0.471, "step": 35538 }, { "epoch": 0.7537273864817289, "grad_norm": 0.35068315267562866, "learning_rate": 1.3792222298589617e-05, "loss": 0.5677, "step": 35539 }, { "epoch": 0.753748594939662, "grad_norm": 0.3903205692768097, "learning_rate": 1.3791913711381256e-05, "loss": 0.4799, "step": 35540 }, { "epoch": 0.7537698033975949, "grad_norm": 0.4870021641254425, "learning_rate": 1.3791605119955563e-05, "loss": 0.5445, "step": 35541 }, { "epoch": 0.753791011855528, "grad_norm": 0.36016008257865906, "learning_rate": 1.379129652431288e-05, "loss": 0.4534, "step": 35542 }, { "epoch": 0.753812220313461, "grad_norm": 0.38261356949806213, "learning_rate": 1.3790987924453548e-05, "loss": 0.5153, "step": 35543 }, { "epoch": 0.753833428771394, "grad_norm": 0.4286046326160431, "learning_rate": 1.3790679320377916e-05, "loss": 0.5178, "step": 35544 }, { "epoch": 0.7538546372293271, "grad_norm": 0.38107815384864807, "learning_rate": 1.3790370712086318e-05, "loss": 0.4908, "step": 35545 }, { "epoch": 0.7538758456872601, "grad_norm": 0.35677069425582886, "learning_rate": 1.3790062099579104e-05, "loss": 0.5021, "step": 35546 }, { "epoch": 0.7538970541451931, "grad_norm": 0.3423047959804535, "learning_rate": 1.3789753482856618e-05, "loss": 0.4816, "step": 35547 }, { "epoch": 0.7539182626031261, "grad_norm": 0.4485122561454773, "learning_rate": 1.3789444861919201e-05, "loss": 0.6613, "step": 35548 }, { "epoch": 0.7539394710610592, "grad_norm": 0.3586199879646301, "learning_rate": 1.3789136236767196e-05, "loss": 0.4979, "step": 35549 }, { "epoch": 0.7539606795189921, "grad_norm": 0.35591140389442444, "learning_rate": 1.3788827607400945e-05, "loss": 0.5264, "step": 35550 }, { "epoch": 0.7539818879769252, "grad_norm": 0.36624467372894287, "learning_rate": 1.3788518973820796e-05, "loss": 0.4256, "step": 35551 }, { "epoch": 0.7540030964348582, "grad_norm": 0.3505503535270691, "learning_rate": 1.3788210336027083e-05, "loss": 0.4244, "step": 35552 }, { "epoch": 0.7540243048927913, "grad_norm": 0.3470989167690277, "learning_rate": 1.378790169402016e-05, "loss": 0.512, "step": 35553 }, { "epoch": 0.7540455133507242, "grad_norm": 0.344419002532959, "learning_rate": 1.3787593047800365e-05, "loss": 0.5091, "step": 35554 }, { "epoch": 0.7540667218086573, "grad_norm": 0.42450547218322754, "learning_rate": 1.3787284397368042e-05, "loss": 0.5087, "step": 35555 }, { "epoch": 0.7540879302665903, "grad_norm": 0.3912225067615509, "learning_rate": 1.3786975742723537e-05, "loss": 0.4725, "step": 35556 }, { "epoch": 0.7541091387245233, "grad_norm": 0.38898077607154846, "learning_rate": 1.3786667083867186e-05, "loss": 0.4744, "step": 35557 }, { "epoch": 0.7541303471824564, "grad_norm": 0.3368305563926697, "learning_rate": 1.378635842079934e-05, "loss": 0.457, "step": 35558 }, { "epoch": 0.7541515556403894, "grad_norm": 0.4681236445903778, "learning_rate": 1.3786049753520336e-05, "loss": 0.5117, "step": 35559 }, { "epoch": 0.7541727640983225, "grad_norm": 0.37092694640159607, "learning_rate": 1.378574108203052e-05, "loss": 0.4202, "step": 35560 }, { "epoch": 0.7541939725562554, "grad_norm": 0.3537190556526184, "learning_rate": 1.3785432406330239e-05, "loss": 0.462, "step": 35561 }, { "epoch": 0.7542151810141885, "grad_norm": 0.38147830963134766, "learning_rate": 1.3785123726419834e-05, "loss": 0.4942, "step": 35562 }, { "epoch": 0.7542363894721215, "grad_norm": 0.3441227674484253, "learning_rate": 1.3784815042299641e-05, "loss": 0.4937, "step": 35563 }, { "epoch": 0.7542575979300545, "grad_norm": 0.5658659338951111, "learning_rate": 1.3784506353970015e-05, "loss": 0.4757, "step": 35564 }, { "epoch": 0.7542788063879875, "grad_norm": 0.3817967474460602, "learning_rate": 1.378419766143129e-05, "loss": 0.4365, "step": 35565 }, { "epoch": 0.7543000148459206, "grad_norm": 0.4051051735877991, "learning_rate": 1.3783888964683815e-05, "loss": 0.5182, "step": 35566 }, { "epoch": 0.7543212233038535, "grad_norm": 0.4070185124874115, "learning_rate": 1.3783580263727933e-05, "loss": 0.5863, "step": 35567 }, { "epoch": 0.7543424317617866, "grad_norm": 0.5367431640625, "learning_rate": 1.3783271558563984e-05, "loss": 0.4335, "step": 35568 }, { "epoch": 0.7543636402197196, "grad_norm": 0.3517952561378479, "learning_rate": 1.3782962849192317e-05, "loss": 0.519, "step": 35569 }, { "epoch": 0.7543848486776527, "grad_norm": 0.366024374961853, "learning_rate": 1.378265413561327e-05, "loss": 0.4448, "step": 35570 }, { "epoch": 0.7544060571355856, "grad_norm": 0.4491345286369324, "learning_rate": 1.3782345417827184e-05, "loss": 0.4956, "step": 35571 }, { "epoch": 0.7544272655935187, "grad_norm": 0.35859012603759766, "learning_rate": 1.3782036695834408e-05, "loss": 0.5116, "step": 35572 }, { "epoch": 0.7544484740514518, "grad_norm": 0.6110413670539856, "learning_rate": 1.3781727969635286e-05, "loss": 0.4692, "step": 35573 }, { "epoch": 0.7544696825093847, "grad_norm": 0.33476462960243225, "learning_rate": 1.3781419239230157e-05, "loss": 0.5216, "step": 35574 }, { "epoch": 0.7544908909673178, "grad_norm": 0.3472113609313965, "learning_rate": 1.3781110504619367e-05, "loss": 0.4271, "step": 35575 }, { "epoch": 0.7545120994252508, "grad_norm": 0.3265780806541443, "learning_rate": 1.378080176580326e-05, "loss": 0.4563, "step": 35576 }, { "epoch": 0.7545333078831838, "grad_norm": 0.3574213981628418, "learning_rate": 1.3780493022782175e-05, "loss": 0.4906, "step": 35577 }, { "epoch": 0.7545545163411168, "grad_norm": 0.35140275955200195, "learning_rate": 1.378018427555646e-05, "loss": 0.4169, "step": 35578 }, { "epoch": 0.7545757247990499, "grad_norm": 0.3847143054008484, "learning_rate": 1.3779875524126457e-05, "loss": 0.4935, "step": 35579 }, { "epoch": 0.7545969332569828, "grad_norm": 0.3355076014995575, "learning_rate": 1.377956676849251e-05, "loss": 0.4609, "step": 35580 }, { "epoch": 0.7546181417149159, "grad_norm": 0.38320547342300415, "learning_rate": 1.3779258008654961e-05, "loss": 0.5696, "step": 35581 }, { "epoch": 0.7546393501728489, "grad_norm": 0.4944421648979187, "learning_rate": 1.3778949244614155e-05, "loss": 0.5701, "step": 35582 }, { "epoch": 0.754660558630782, "grad_norm": 0.4171666204929352, "learning_rate": 1.3778640476370433e-05, "loss": 0.5735, "step": 35583 }, { "epoch": 0.7546817670887149, "grad_norm": 0.3673193156719208, "learning_rate": 1.3778331703924139e-05, "loss": 0.4737, "step": 35584 }, { "epoch": 0.754702975546648, "grad_norm": 0.3886094093322754, "learning_rate": 1.3778022927275618e-05, "loss": 0.5023, "step": 35585 }, { "epoch": 0.7547241840045811, "grad_norm": 0.39929211139678955, "learning_rate": 1.3777714146425214e-05, "loss": 0.6122, "step": 35586 }, { "epoch": 0.754745392462514, "grad_norm": 0.4121638238430023, "learning_rate": 1.3777405361373271e-05, "loss": 0.5677, "step": 35587 }, { "epoch": 0.7547666009204471, "grad_norm": 0.36349692940711975, "learning_rate": 1.3777096572120128e-05, "loss": 0.5381, "step": 35588 }, { "epoch": 0.7547878093783801, "grad_norm": 0.3239296078681946, "learning_rate": 1.3776787778666132e-05, "loss": 0.5093, "step": 35589 }, { "epoch": 0.7548090178363132, "grad_norm": 0.36502575874328613, "learning_rate": 1.3776478981011626e-05, "loss": 0.5461, "step": 35590 }, { "epoch": 0.7548302262942461, "grad_norm": 0.3618076741695404, "learning_rate": 1.3776170179156947e-05, "loss": 0.4327, "step": 35591 }, { "epoch": 0.7548514347521792, "grad_norm": 0.4094943106174469, "learning_rate": 1.3775861373102451e-05, "loss": 0.5057, "step": 35592 }, { "epoch": 0.7548726432101122, "grad_norm": 0.33778852224349976, "learning_rate": 1.3775552562848473e-05, "loss": 0.5243, "step": 35593 }, { "epoch": 0.7548938516680452, "grad_norm": 0.3720458149909973, "learning_rate": 1.3775243748395357e-05, "loss": 0.4873, "step": 35594 }, { "epoch": 0.7549150601259782, "grad_norm": 0.36115995049476624, "learning_rate": 1.377493492974345e-05, "loss": 0.4527, "step": 35595 }, { "epoch": 0.7549362685839113, "grad_norm": 0.3372347950935364, "learning_rate": 1.3774626106893093e-05, "loss": 0.474, "step": 35596 }, { "epoch": 0.7549574770418442, "grad_norm": 0.40318796038627625, "learning_rate": 1.3774317279844627e-05, "loss": 0.5135, "step": 35597 }, { "epoch": 0.7549786854997773, "grad_norm": 0.34308695793151855, "learning_rate": 1.3774008448598399e-05, "loss": 0.4915, "step": 35598 }, { "epoch": 0.7549998939577104, "grad_norm": 0.37185385823249817, "learning_rate": 1.3773699613154752e-05, "loss": 0.5242, "step": 35599 }, { "epoch": 0.7550211024156434, "grad_norm": 0.442376047372818, "learning_rate": 1.377339077351403e-05, "loss": 0.6124, "step": 35600 }, { "epoch": 0.7550423108735764, "grad_norm": 0.33760321140289307, "learning_rate": 1.3773081929676575e-05, "loss": 0.4446, "step": 35601 }, { "epoch": 0.7550635193315094, "grad_norm": 0.3926815688610077, "learning_rate": 1.3772773081642732e-05, "loss": 0.4789, "step": 35602 }, { "epoch": 0.7550847277894425, "grad_norm": 0.35209745168685913, "learning_rate": 1.3772464229412843e-05, "loss": 0.5089, "step": 35603 }, { "epoch": 0.7551059362473754, "grad_norm": 0.3434253931045532, "learning_rate": 1.3772155372987249e-05, "loss": 0.4696, "step": 35604 }, { "epoch": 0.7551271447053085, "grad_norm": 0.33623117208480835, "learning_rate": 1.3771846512366298e-05, "loss": 0.3879, "step": 35605 }, { "epoch": 0.7551483531632415, "grad_norm": 0.34150412678718567, "learning_rate": 1.3771537647550335e-05, "loss": 0.4976, "step": 35606 }, { "epoch": 0.7551695616211745, "grad_norm": 0.3643529713153839, "learning_rate": 1.3771228778539701e-05, "loss": 0.3909, "step": 35607 }, { "epoch": 0.7551907700791075, "grad_norm": 0.3489795923233032, "learning_rate": 1.3770919905334734e-05, "loss": 0.4953, "step": 35608 }, { "epoch": 0.7552119785370406, "grad_norm": 0.47622546553611755, "learning_rate": 1.3770611027935787e-05, "loss": 0.41, "step": 35609 }, { "epoch": 0.7552331869949735, "grad_norm": 0.363927960395813, "learning_rate": 1.3770302146343197e-05, "loss": 0.4988, "step": 35610 }, { "epoch": 0.7552543954529066, "grad_norm": 0.5487468242645264, "learning_rate": 1.3769993260557309e-05, "loss": 0.5195, "step": 35611 }, { "epoch": 0.7552756039108396, "grad_norm": 0.3693543076515198, "learning_rate": 1.3769684370578469e-05, "loss": 0.4838, "step": 35612 }, { "epoch": 0.7552968123687727, "grad_norm": 0.3846442997455597, "learning_rate": 1.3769375476407016e-05, "loss": 0.4348, "step": 35613 }, { "epoch": 0.7553180208267057, "grad_norm": 0.35665661096572876, "learning_rate": 1.3769066578043302e-05, "loss": 0.5275, "step": 35614 }, { "epoch": 0.7553392292846387, "grad_norm": 0.3567695617675781, "learning_rate": 1.376875767548766e-05, "loss": 0.5407, "step": 35615 }, { "epoch": 0.7553604377425718, "grad_norm": 0.367245078086853, "learning_rate": 1.3768448768740442e-05, "loss": 0.534, "step": 35616 }, { "epoch": 0.7553816462005047, "grad_norm": 0.3330330550670624, "learning_rate": 1.3768139857801983e-05, "loss": 0.4972, "step": 35617 }, { "epoch": 0.7554028546584378, "grad_norm": 0.353242427110672, "learning_rate": 1.3767830942672638e-05, "loss": 0.4661, "step": 35618 }, { "epoch": 0.7554240631163708, "grad_norm": 0.34888792037963867, "learning_rate": 1.3767522023352737e-05, "loss": 0.4723, "step": 35619 }, { "epoch": 0.7554452715743039, "grad_norm": 0.35587212443351746, "learning_rate": 1.3767213099842638e-05, "loss": 0.4528, "step": 35620 }, { "epoch": 0.7554664800322368, "grad_norm": 0.3449985384941101, "learning_rate": 1.3766904172142675e-05, "loss": 0.4615, "step": 35621 }, { "epoch": 0.7554876884901699, "grad_norm": 0.5762926936149597, "learning_rate": 1.3766595240253192e-05, "loss": 0.4843, "step": 35622 }, { "epoch": 0.7555088969481029, "grad_norm": 0.35851290822029114, "learning_rate": 1.3766286304174531e-05, "loss": 0.4758, "step": 35623 }, { "epoch": 0.7555301054060359, "grad_norm": 0.45205235481262207, "learning_rate": 1.3765977363907045e-05, "loss": 0.5112, "step": 35624 }, { "epoch": 0.7555513138639689, "grad_norm": 0.3692224323749542, "learning_rate": 1.376566841945107e-05, "loss": 0.4088, "step": 35625 }, { "epoch": 0.755572522321902, "grad_norm": 0.39939063787460327, "learning_rate": 1.3765359470806953e-05, "loss": 0.557, "step": 35626 }, { "epoch": 0.755593730779835, "grad_norm": 0.38748449087142944, "learning_rate": 1.3765050517975035e-05, "loss": 0.5057, "step": 35627 }, { "epoch": 0.755614939237768, "grad_norm": 0.3688766658306122, "learning_rate": 1.376474156095566e-05, "loss": 0.5109, "step": 35628 }, { "epoch": 0.7556361476957011, "grad_norm": 0.4053579568862915, "learning_rate": 1.3764432599749169e-05, "loss": 0.4408, "step": 35629 }, { "epoch": 0.755657356153634, "grad_norm": 0.3729434013366699, "learning_rate": 1.3764123634355912e-05, "loss": 0.4876, "step": 35630 }, { "epoch": 0.7556785646115671, "grad_norm": 0.3720313310623169, "learning_rate": 1.376381466477623e-05, "loss": 0.5384, "step": 35631 }, { "epoch": 0.7556997730695001, "grad_norm": 0.37276941537857056, "learning_rate": 1.3763505691010466e-05, "loss": 0.5462, "step": 35632 }, { "epoch": 0.7557209815274332, "grad_norm": 0.344171404838562, "learning_rate": 1.3763196713058962e-05, "loss": 0.591, "step": 35633 }, { "epoch": 0.7557421899853661, "grad_norm": 0.3340390920639038, "learning_rate": 1.3762887730922065e-05, "loss": 0.5095, "step": 35634 }, { "epoch": 0.7557633984432992, "grad_norm": 0.3785504102706909, "learning_rate": 1.3762578744600117e-05, "loss": 0.5322, "step": 35635 }, { "epoch": 0.7557846069012322, "grad_norm": 0.33860307931900024, "learning_rate": 1.3762269754093459e-05, "loss": 0.512, "step": 35636 }, { "epoch": 0.7558058153591652, "grad_norm": 0.36184820532798767, "learning_rate": 1.3761960759402437e-05, "loss": 0.4728, "step": 35637 }, { "epoch": 0.7558270238170982, "grad_norm": 0.3268619477748871, "learning_rate": 1.3761651760527399e-05, "loss": 0.4644, "step": 35638 }, { "epoch": 0.7558482322750313, "grad_norm": 0.3639022409915924, "learning_rate": 1.3761342757468681e-05, "loss": 0.472, "step": 35639 }, { "epoch": 0.7558694407329644, "grad_norm": 0.39994773268699646, "learning_rate": 1.3761033750226634e-05, "loss": 0.4632, "step": 35640 }, { "epoch": 0.7558906491908973, "grad_norm": 0.3980013132095337, "learning_rate": 1.3760724738801594e-05, "loss": 0.5001, "step": 35641 }, { "epoch": 0.7559118576488304, "grad_norm": 0.3277849555015564, "learning_rate": 1.3760415723193909e-05, "loss": 0.4642, "step": 35642 }, { "epoch": 0.7559330661067634, "grad_norm": 0.38699871301651, "learning_rate": 1.3760106703403924e-05, "loss": 0.4343, "step": 35643 }, { "epoch": 0.7559542745646964, "grad_norm": 0.3878548741340637, "learning_rate": 1.3759797679431981e-05, "loss": 0.6208, "step": 35644 }, { "epoch": 0.7559754830226294, "grad_norm": 0.37200313806533813, "learning_rate": 1.3759488651278424e-05, "loss": 0.5262, "step": 35645 }, { "epoch": 0.7559966914805625, "grad_norm": 0.3412075340747833, "learning_rate": 1.3759179618943596e-05, "loss": 0.3984, "step": 35646 }, { "epoch": 0.7560178999384954, "grad_norm": 0.4024847149848938, "learning_rate": 1.375887058242784e-05, "loss": 0.5229, "step": 35647 }, { "epoch": 0.7560391083964285, "grad_norm": 0.38981547951698303, "learning_rate": 1.3758561541731501e-05, "loss": 0.4519, "step": 35648 }, { "epoch": 0.7560603168543615, "grad_norm": 0.3519769012928009, "learning_rate": 1.3758252496854923e-05, "loss": 0.5359, "step": 35649 }, { "epoch": 0.7560815253122946, "grad_norm": 0.3830288052558899, "learning_rate": 1.3757943447798447e-05, "loss": 0.5164, "step": 35650 }, { "epoch": 0.7561027337702275, "grad_norm": 0.3644936978816986, "learning_rate": 1.3757634394562423e-05, "loss": 0.5372, "step": 35651 }, { "epoch": 0.7561239422281606, "grad_norm": 0.4129370450973511, "learning_rate": 1.375732533714719e-05, "loss": 0.499, "step": 35652 }, { "epoch": 0.7561451506860936, "grad_norm": 0.3418812155723572, "learning_rate": 1.375701627555309e-05, "loss": 0.5099, "step": 35653 }, { "epoch": 0.7561663591440266, "grad_norm": 0.35626062750816345, "learning_rate": 1.3756707209780472e-05, "loss": 0.5308, "step": 35654 }, { "epoch": 0.7561875676019597, "grad_norm": 0.4022563397884369, "learning_rate": 1.3756398139829675e-05, "loss": 0.4522, "step": 35655 }, { "epoch": 0.7562087760598927, "grad_norm": 0.37847116589546204, "learning_rate": 1.375608906570104e-05, "loss": 0.5061, "step": 35656 }, { "epoch": 0.7562299845178257, "grad_norm": 0.43846526741981506, "learning_rate": 1.3755779987394923e-05, "loss": 0.5349, "step": 35657 }, { "epoch": 0.7562511929757587, "grad_norm": 0.35940659046173096, "learning_rate": 1.375547090491166e-05, "loss": 0.454, "step": 35658 }, { "epoch": 0.7562724014336918, "grad_norm": 0.33579641580581665, "learning_rate": 1.375516181825159e-05, "loss": 0.4365, "step": 35659 }, { "epoch": 0.7562936098916248, "grad_norm": 0.40653353929519653, "learning_rate": 1.3754852727415064e-05, "loss": 0.5145, "step": 35660 }, { "epoch": 0.7563148183495578, "grad_norm": 0.36160463094711304, "learning_rate": 1.3754543632402425e-05, "loss": 0.5027, "step": 35661 }, { "epoch": 0.7563360268074908, "grad_norm": 0.37197911739349365, "learning_rate": 1.375423453321401e-05, "loss": 0.4995, "step": 35662 }, { "epoch": 0.7563572352654239, "grad_norm": 0.3452659547328949, "learning_rate": 1.3753925429850172e-05, "loss": 0.4226, "step": 35663 }, { "epoch": 0.7563784437233568, "grad_norm": 0.39974895119667053, "learning_rate": 1.375361632231125e-05, "loss": 0.4715, "step": 35664 }, { "epoch": 0.7563996521812899, "grad_norm": 0.3327670395374298, "learning_rate": 1.375330721059759e-05, "loss": 0.5294, "step": 35665 }, { "epoch": 0.7564208606392229, "grad_norm": 0.32474833726882935, "learning_rate": 1.3752998094709534e-05, "loss": 0.4675, "step": 35666 }, { "epoch": 0.756442069097156, "grad_norm": 1.1266372203826904, "learning_rate": 1.3752688974647424e-05, "loss": 0.4284, "step": 35667 }, { "epoch": 0.756463277555089, "grad_norm": 0.35271158814430237, "learning_rate": 1.3752379850411606e-05, "loss": 0.5071, "step": 35668 }, { "epoch": 0.756484486013022, "grad_norm": 0.38104167580604553, "learning_rate": 1.3752070722002427e-05, "loss": 0.5016, "step": 35669 }, { "epoch": 0.7565056944709551, "grad_norm": 0.4451436698436737, "learning_rate": 1.3751761589420222e-05, "loss": 0.495, "step": 35670 }, { "epoch": 0.756526902928888, "grad_norm": 0.5145995020866394, "learning_rate": 1.3751452452665346e-05, "loss": 0.4499, "step": 35671 }, { "epoch": 0.7565481113868211, "grad_norm": 0.3942887485027313, "learning_rate": 1.3751143311738135e-05, "loss": 0.5291, "step": 35672 }, { "epoch": 0.7565693198447541, "grad_norm": 0.348820298910141, "learning_rate": 1.3750834166638933e-05, "loss": 0.4534, "step": 35673 }, { "epoch": 0.7565905283026871, "grad_norm": 0.3637813627719879, "learning_rate": 1.3750525017368089e-05, "loss": 0.5242, "step": 35674 }, { "epoch": 0.7566117367606201, "grad_norm": 0.3913623094558716, "learning_rate": 1.3750215863925942e-05, "loss": 0.5413, "step": 35675 }, { "epoch": 0.7566329452185532, "grad_norm": 0.39577516913414, "learning_rate": 1.3749906706312836e-05, "loss": 0.5854, "step": 35676 }, { "epoch": 0.7566541536764861, "grad_norm": 0.40451717376708984, "learning_rate": 1.3749597544529121e-05, "loss": 0.5053, "step": 35677 }, { "epoch": 0.7566753621344192, "grad_norm": 0.3478371202945709, "learning_rate": 1.3749288378575133e-05, "loss": 0.5495, "step": 35678 }, { "epoch": 0.7566965705923522, "grad_norm": 0.3464919626712799, "learning_rate": 1.3748979208451218e-05, "loss": 0.429, "step": 35679 }, { "epoch": 0.7567177790502853, "grad_norm": 0.3662320077419281, "learning_rate": 1.3748670034157726e-05, "loss": 0.5226, "step": 35680 }, { "epoch": 0.7567389875082183, "grad_norm": 0.3561994433403015, "learning_rate": 1.3748360855694988e-05, "loss": 0.484, "step": 35681 }, { "epoch": 0.7567601959661513, "grad_norm": 0.4779391288757324, "learning_rate": 1.374805167306336e-05, "loss": 0.5092, "step": 35682 }, { "epoch": 0.7567814044240844, "grad_norm": 0.33754196763038635, "learning_rate": 1.3747742486263182e-05, "loss": 0.4665, "step": 35683 }, { "epoch": 0.7568026128820173, "grad_norm": 0.3742210268974304, "learning_rate": 1.3747433295294796e-05, "loss": 0.556, "step": 35684 }, { "epoch": 0.7568238213399504, "grad_norm": 0.3564107120037079, "learning_rate": 1.3747124100158549e-05, "loss": 0.518, "step": 35685 }, { "epoch": 0.7568450297978834, "grad_norm": 0.32975488901138306, "learning_rate": 1.3746814900854783e-05, "loss": 0.5116, "step": 35686 }, { "epoch": 0.7568662382558164, "grad_norm": 0.3503290116786957, "learning_rate": 1.3746505697383837e-05, "loss": 0.4605, "step": 35687 }, { "epoch": 0.7568874467137494, "grad_norm": 0.5083074569702148, "learning_rate": 1.3746196489746064e-05, "loss": 0.4827, "step": 35688 }, { "epoch": 0.7569086551716825, "grad_norm": 0.3781835734844208, "learning_rate": 1.3745887277941804e-05, "loss": 0.4585, "step": 35689 }, { "epoch": 0.7569298636296155, "grad_norm": 0.3485759198665619, "learning_rate": 1.3745578061971401e-05, "loss": 0.5368, "step": 35690 }, { "epoch": 0.7569510720875485, "grad_norm": 0.34786349534988403, "learning_rate": 1.3745268841835197e-05, "loss": 0.5024, "step": 35691 }, { "epoch": 0.7569722805454815, "grad_norm": 0.4043615758419037, "learning_rate": 1.374495961753354e-05, "loss": 0.5234, "step": 35692 }, { "epoch": 0.7569934890034146, "grad_norm": 0.364547461271286, "learning_rate": 1.374465038906677e-05, "loss": 0.5342, "step": 35693 }, { "epoch": 0.7570146974613475, "grad_norm": 0.3494906425476074, "learning_rate": 1.3744341156435232e-05, "loss": 0.5383, "step": 35694 }, { "epoch": 0.7570359059192806, "grad_norm": 0.38328638672828674, "learning_rate": 1.3744031919639269e-05, "loss": 0.4581, "step": 35695 }, { "epoch": 0.7570571143772137, "grad_norm": 0.3495907485485077, "learning_rate": 1.3743722678679227e-05, "loss": 0.5176, "step": 35696 }, { "epoch": 0.7570783228351466, "grad_norm": 0.48009976744651794, "learning_rate": 1.3743413433555452e-05, "loss": 0.5204, "step": 35697 }, { "epoch": 0.7570995312930797, "grad_norm": 0.3814569413661957, "learning_rate": 1.3743104184268284e-05, "loss": 0.4519, "step": 35698 }, { "epoch": 0.7571207397510127, "grad_norm": 0.38741520047187805, "learning_rate": 1.3742794930818067e-05, "loss": 0.4248, "step": 35699 }, { "epoch": 0.7571419482089458, "grad_norm": 0.3734370172023773, "learning_rate": 1.3742485673205144e-05, "loss": 0.5125, "step": 35700 }, { "epoch": 0.7571631566668787, "grad_norm": 0.41603216528892517, "learning_rate": 1.3742176411429862e-05, "loss": 0.537, "step": 35701 }, { "epoch": 0.7571843651248118, "grad_norm": 0.4295312464237213, "learning_rate": 1.3741867145492567e-05, "loss": 0.4849, "step": 35702 }, { "epoch": 0.7572055735827448, "grad_norm": 0.3748757839202881, "learning_rate": 1.3741557875393599e-05, "loss": 0.5145, "step": 35703 }, { "epoch": 0.7572267820406778, "grad_norm": 0.41635292768478394, "learning_rate": 1.3741248601133302e-05, "loss": 0.5223, "step": 35704 }, { "epoch": 0.7572479904986108, "grad_norm": 0.33950480818748474, "learning_rate": 1.3740939322712019e-05, "loss": 0.4645, "step": 35705 }, { "epoch": 0.7572691989565439, "grad_norm": 0.416348934173584, "learning_rate": 1.3740630040130096e-05, "loss": 0.525, "step": 35706 }, { "epoch": 0.7572904074144768, "grad_norm": 0.3884860873222351, "learning_rate": 1.3740320753387879e-05, "loss": 0.4678, "step": 35707 }, { "epoch": 0.7573116158724099, "grad_norm": 0.41460832953453064, "learning_rate": 1.3740011462485709e-05, "loss": 0.5244, "step": 35708 }, { "epoch": 0.757332824330343, "grad_norm": 0.38407158851623535, "learning_rate": 1.373970216742393e-05, "loss": 0.5749, "step": 35709 }, { "epoch": 0.757354032788276, "grad_norm": 0.3828622102737427, "learning_rate": 1.3739392868202888e-05, "loss": 0.5522, "step": 35710 }, { "epoch": 0.757375241246209, "grad_norm": 0.34652385115623474, "learning_rate": 1.3739083564822925e-05, "loss": 0.4619, "step": 35711 }, { "epoch": 0.757396449704142, "grad_norm": 0.4733017086982727, "learning_rate": 1.3738774257284386e-05, "loss": 0.5038, "step": 35712 }, { "epoch": 0.7574176581620751, "grad_norm": 0.3203776478767395, "learning_rate": 1.3738464945587613e-05, "loss": 0.4566, "step": 35713 }, { "epoch": 0.757438866620008, "grad_norm": 0.38344475626945496, "learning_rate": 1.3738155629732954e-05, "loss": 0.5081, "step": 35714 }, { "epoch": 0.7574600750779411, "grad_norm": 0.39663833379745483, "learning_rate": 1.3737846309720748e-05, "loss": 0.5173, "step": 35715 }, { "epoch": 0.7574812835358741, "grad_norm": 0.33867624402046204, "learning_rate": 1.3737536985551346e-05, "loss": 0.4761, "step": 35716 }, { "epoch": 0.7575024919938071, "grad_norm": 0.37686777114868164, "learning_rate": 1.3737227657225085e-05, "loss": 0.4651, "step": 35717 }, { "epoch": 0.7575237004517401, "grad_norm": 0.33757835626602173, "learning_rate": 1.3736918324742311e-05, "loss": 0.4436, "step": 35718 }, { "epoch": 0.7575449089096732, "grad_norm": 0.36832526326179504, "learning_rate": 1.3736608988103371e-05, "loss": 0.5046, "step": 35719 }, { "epoch": 0.7575661173676062, "grad_norm": 0.4637281596660614, "learning_rate": 1.3736299647308609e-05, "loss": 0.4663, "step": 35720 }, { "epoch": 0.7575873258255392, "grad_norm": 0.5542876720428467, "learning_rate": 1.3735990302358361e-05, "loss": 0.5037, "step": 35721 }, { "epoch": 0.7576085342834723, "grad_norm": 0.5110278129577637, "learning_rate": 1.3735680953252981e-05, "loss": 0.5616, "step": 35722 }, { "epoch": 0.7576297427414053, "grad_norm": 0.33696120977401733, "learning_rate": 1.3735371599992808e-05, "loss": 0.4105, "step": 35723 }, { "epoch": 0.7576509511993383, "grad_norm": 0.3453454375267029, "learning_rate": 1.373506224257819e-05, "loss": 0.5229, "step": 35724 }, { "epoch": 0.7576721596572713, "grad_norm": 0.48894819617271423, "learning_rate": 1.3734752881009463e-05, "loss": 0.4394, "step": 35725 }, { "epoch": 0.7576933681152044, "grad_norm": 0.33309853076934814, "learning_rate": 1.3734443515286982e-05, "loss": 0.5154, "step": 35726 }, { "epoch": 0.7577145765731373, "grad_norm": 0.49031105637550354, "learning_rate": 1.373413414541108e-05, "loss": 0.4856, "step": 35727 }, { "epoch": 0.7577357850310704, "grad_norm": 0.3371525704860687, "learning_rate": 1.3733824771382112e-05, "loss": 0.4201, "step": 35728 }, { "epoch": 0.7577569934890034, "grad_norm": 0.35845696926116943, "learning_rate": 1.3733515393200412e-05, "loss": 0.535, "step": 35729 }, { "epoch": 0.7577782019469365, "grad_norm": 0.3374863564968109, "learning_rate": 1.373320601086633e-05, "loss": 0.5356, "step": 35730 }, { "epoch": 0.7577994104048694, "grad_norm": 0.31837189197540283, "learning_rate": 1.373289662438021e-05, "loss": 0.4551, "step": 35731 }, { "epoch": 0.7578206188628025, "grad_norm": 0.3927244246006012, "learning_rate": 1.3732587233742392e-05, "loss": 0.5911, "step": 35732 }, { "epoch": 0.7578418273207355, "grad_norm": 0.39555060863494873, "learning_rate": 1.3732277838953225e-05, "loss": 0.5421, "step": 35733 }, { "epoch": 0.7578630357786685, "grad_norm": 0.36398985981941223, "learning_rate": 1.3731968440013051e-05, "loss": 0.5081, "step": 35734 }, { "epoch": 0.7578842442366016, "grad_norm": 0.314724326133728, "learning_rate": 1.373165903692221e-05, "loss": 0.4609, "step": 35735 }, { "epoch": 0.7579054526945346, "grad_norm": 0.3991685211658478, "learning_rate": 1.3731349629681056e-05, "loss": 0.4942, "step": 35736 }, { "epoch": 0.7579266611524677, "grad_norm": 0.3921107351779938, "learning_rate": 1.3731040218289925e-05, "loss": 0.4929, "step": 35737 }, { "epoch": 0.7579478696104006, "grad_norm": 0.5063672065734863, "learning_rate": 1.3730730802749164e-05, "loss": 0.7314, "step": 35738 }, { "epoch": 0.7579690780683337, "grad_norm": 0.38088351488113403, "learning_rate": 1.3730421383059116e-05, "loss": 0.6117, "step": 35739 }, { "epoch": 0.7579902865262667, "grad_norm": 0.3584584593772888, "learning_rate": 1.3730111959220128e-05, "loss": 0.5378, "step": 35740 }, { "epoch": 0.7580114949841997, "grad_norm": 0.3535519242286682, "learning_rate": 1.3729802531232536e-05, "loss": 0.4905, "step": 35741 }, { "epoch": 0.7580327034421327, "grad_norm": 0.4650380313396454, "learning_rate": 1.3729493099096697e-05, "loss": 0.5305, "step": 35742 }, { "epoch": 0.7580539119000658, "grad_norm": 0.3448982536792755, "learning_rate": 1.3729183662812943e-05, "loss": 0.4902, "step": 35743 }, { "epoch": 0.7580751203579987, "grad_norm": 0.388536274433136, "learning_rate": 1.3728874222381628e-05, "loss": 0.5927, "step": 35744 }, { "epoch": 0.7580963288159318, "grad_norm": 0.3578687310218811, "learning_rate": 1.3728564777803089e-05, "loss": 0.4778, "step": 35745 }, { "epoch": 0.7581175372738648, "grad_norm": 0.40420645475387573, "learning_rate": 1.372825532907767e-05, "loss": 0.5595, "step": 35746 }, { "epoch": 0.7581387457317978, "grad_norm": 0.33796361088752747, "learning_rate": 1.3727945876205722e-05, "loss": 0.4706, "step": 35747 }, { "epoch": 0.7581599541897308, "grad_norm": 0.36416497826576233, "learning_rate": 1.3727636419187584e-05, "loss": 0.5748, "step": 35748 }, { "epoch": 0.7581811626476639, "grad_norm": 0.39397409558296204, "learning_rate": 1.3727326958023599e-05, "loss": 0.5609, "step": 35749 }, { "epoch": 0.758202371105597, "grad_norm": 0.37061625719070435, "learning_rate": 1.3727017492714116e-05, "loss": 0.4781, "step": 35750 }, { "epoch": 0.7582235795635299, "grad_norm": 0.3967639207839966, "learning_rate": 1.3726708023259474e-05, "loss": 0.4628, "step": 35751 }, { "epoch": 0.758244788021463, "grad_norm": 0.3296924829483032, "learning_rate": 1.3726398549660021e-05, "loss": 0.5234, "step": 35752 }, { "epoch": 0.758265996479396, "grad_norm": 0.4442453682422638, "learning_rate": 1.37260890719161e-05, "loss": 0.4857, "step": 35753 }, { "epoch": 0.758287204937329, "grad_norm": 0.3399457633495331, "learning_rate": 1.3725779590028056e-05, "loss": 0.4469, "step": 35754 }, { "epoch": 0.758308413395262, "grad_norm": 0.334526002407074, "learning_rate": 1.372547010399623e-05, "loss": 0.3975, "step": 35755 }, { "epoch": 0.7583296218531951, "grad_norm": 0.3500606119632721, "learning_rate": 1.3725160613820971e-05, "loss": 0.4495, "step": 35756 }, { "epoch": 0.758350830311128, "grad_norm": 0.3317764103412628, "learning_rate": 1.372485111950262e-05, "loss": 0.4583, "step": 35757 }, { "epoch": 0.7583720387690611, "grad_norm": 0.3391433358192444, "learning_rate": 1.372454162104152e-05, "loss": 0.5237, "step": 35758 }, { "epoch": 0.7583932472269941, "grad_norm": 0.34928640723228455, "learning_rate": 1.372423211843802e-05, "loss": 0.4634, "step": 35759 }, { "epoch": 0.7584144556849272, "grad_norm": 0.3809434175491333, "learning_rate": 1.3723922611692459e-05, "loss": 0.4103, "step": 35760 }, { "epoch": 0.7584356641428601, "grad_norm": 0.3384251296520233, "learning_rate": 1.3723613100805187e-05, "loss": 0.5404, "step": 35761 }, { "epoch": 0.7584568726007932, "grad_norm": 0.3505192697048187, "learning_rate": 1.3723303585776542e-05, "loss": 0.5063, "step": 35762 }, { "epoch": 0.7584780810587263, "grad_norm": 0.3539592921733856, "learning_rate": 1.3722994066606872e-05, "loss": 0.5022, "step": 35763 }, { "epoch": 0.7584992895166592, "grad_norm": 0.37384340167045593, "learning_rate": 1.3722684543296519e-05, "loss": 0.5288, "step": 35764 }, { "epoch": 0.7585204979745923, "grad_norm": 0.3587919771671295, "learning_rate": 1.372237501584583e-05, "loss": 0.5118, "step": 35765 }, { "epoch": 0.7585417064325253, "grad_norm": 0.36958980560302734, "learning_rate": 1.3722065484255148e-05, "loss": 0.4287, "step": 35766 }, { "epoch": 0.7585629148904584, "grad_norm": 0.3868926167488098, "learning_rate": 1.3721755948524816e-05, "loss": 0.4279, "step": 35767 }, { "epoch": 0.7585841233483913, "grad_norm": 0.35442161560058594, "learning_rate": 1.3721446408655182e-05, "loss": 0.437, "step": 35768 }, { "epoch": 0.7586053318063244, "grad_norm": 0.3386183977127075, "learning_rate": 1.3721136864646582e-05, "loss": 0.4753, "step": 35769 }, { "epoch": 0.7586265402642574, "grad_norm": 0.35510748624801636, "learning_rate": 1.372082731649937e-05, "loss": 0.4962, "step": 35770 }, { "epoch": 0.7586477487221904, "grad_norm": 0.3703792989253998, "learning_rate": 1.3720517764213887e-05, "loss": 0.5037, "step": 35771 }, { "epoch": 0.7586689571801234, "grad_norm": 0.33991339802742004, "learning_rate": 1.3720208207790475e-05, "loss": 0.4895, "step": 35772 }, { "epoch": 0.7586901656380565, "grad_norm": 0.3654153645038605, "learning_rate": 1.371989864722948e-05, "loss": 0.5344, "step": 35773 }, { "epoch": 0.7587113740959894, "grad_norm": 0.40411826968193054, "learning_rate": 1.3719589082531246e-05, "loss": 0.5205, "step": 35774 }, { "epoch": 0.7587325825539225, "grad_norm": 0.4040612280368805, "learning_rate": 1.3719279513696118e-05, "loss": 0.535, "step": 35775 }, { "epoch": 0.7587537910118556, "grad_norm": 0.37109920382499695, "learning_rate": 1.3718969940724441e-05, "loss": 0.5135, "step": 35776 }, { "epoch": 0.7587749994697885, "grad_norm": 0.38254472613334656, "learning_rate": 1.3718660363616552e-05, "loss": 0.5285, "step": 35777 }, { "epoch": 0.7587962079277216, "grad_norm": 0.34907716512680054, "learning_rate": 1.3718350782372804e-05, "loss": 0.4731, "step": 35778 }, { "epoch": 0.7588174163856546, "grad_norm": 0.41267287731170654, "learning_rate": 1.3718041196993541e-05, "loss": 0.5065, "step": 35779 }, { "epoch": 0.7588386248435877, "grad_norm": 0.3559351861476898, "learning_rate": 1.3717731607479103e-05, "loss": 0.5295, "step": 35780 }, { "epoch": 0.7588598333015206, "grad_norm": 0.40885305404663086, "learning_rate": 1.3717422013829838e-05, "loss": 0.4812, "step": 35781 }, { "epoch": 0.7588810417594537, "grad_norm": 0.36567404866218567, "learning_rate": 1.3717112416046088e-05, "loss": 0.4773, "step": 35782 }, { "epoch": 0.7589022502173867, "grad_norm": 0.41007474064826965, "learning_rate": 1.3716802814128193e-05, "loss": 0.5968, "step": 35783 }, { "epoch": 0.7589234586753197, "grad_norm": 0.3258209526538849, "learning_rate": 1.3716493208076509e-05, "loss": 0.5357, "step": 35784 }, { "epoch": 0.7589446671332527, "grad_norm": 0.32834765315055847, "learning_rate": 1.371618359789137e-05, "loss": 0.3836, "step": 35785 }, { "epoch": 0.7589658755911858, "grad_norm": 0.37608617544174194, "learning_rate": 1.3715873983573125e-05, "loss": 0.5239, "step": 35786 }, { "epoch": 0.7589870840491187, "grad_norm": 0.37793490290641785, "learning_rate": 1.3715564365122118e-05, "loss": 0.4739, "step": 35787 }, { "epoch": 0.7590082925070518, "grad_norm": 0.35001251101493835, "learning_rate": 1.371525474253869e-05, "loss": 0.4906, "step": 35788 }, { "epoch": 0.7590295009649848, "grad_norm": 0.3343747556209564, "learning_rate": 1.371494511582319e-05, "loss": 0.5265, "step": 35789 }, { "epoch": 0.7590507094229179, "grad_norm": 0.3748050332069397, "learning_rate": 1.371463548497596e-05, "loss": 0.5492, "step": 35790 }, { "epoch": 0.7590719178808509, "grad_norm": 0.40541312098503113, "learning_rate": 1.3714325849997343e-05, "loss": 0.4793, "step": 35791 }, { "epoch": 0.7590931263387839, "grad_norm": 0.3426569700241089, "learning_rate": 1.3714016210887688e-05, "loss": 0.4868, "step": 35792 }, { "epoch": 0.759114334796717, "grad_norm": 0.35674819350242615, "learning_rate": 1.3713706567647336e-05, "loss": 0.4532, "step": 35793 }, { "epoch": 0.7591355432546499, "grad_norm": 0.3474118113517761, "learning_rate": 1.3713396920276629e-05, "loss": 0.4863, "step": 35794 }, { "epoch": 0.759156751712583, "grad_norm": 0.37137866020202637, "learning_rate": 1.3713087268775918e-05, "loss": 0.4811, "step": 35795 }, { "epoch": 0.759177960170516, "grad_norm": 0.36418694257736206, "learning_rate": 1.371277761314554e-05, "loss": 0.4631, "step": 35796 }, { "epoch": 0.759199168628449, "grad_norm": 0.41653552651405334, "learning_rate": 1.3712467953385844e-05, "loss": 0.5319, "step": 35797 }, { "epoch": 0.759220377086382, "grad_norm": 0.35512012243270874, "learning_rate": 1.3712158289497175e-05, "loss": 0.4602, "step": 35798 }, { "epoch": 0.7592415855443151, "grad_norm": 0.34033268690109253, "learning_rate": 1.3711848621479877e-05, "loss": 0.4755, "step": 35799 }, { "epoch": 0.759262794002248, "grad_norm": 0.3421320915222168, "learning_rate": 1.3711538949334288e-05, "loss": 0.4842, "step": 35800 }, { "epoch": 0.7592840024601811, "grad_norm": 0.32879438996315, "learning_rate": 1.3711229273060762e-05, "loss": 0.4178, "step": 35801 }, { "epoch": 0.7593052109181141, "grad_norm": 0.32182952761650085, "learning_rate": 1.3710919592659637e-05, "loss": 0.4851, "step": 35802 }, { "epoch": 0.7593264193760472, "grad_norm": 0.34097936749458313, "learning_rate": 1.371060990813126e-05, "loss": 0.392, "step": 35803 }, { "epoch": 0.7593476278339802, "grad_norm": 0.3677809536457062, "learning_rate": 1.3710300219475976e-05, "loss": 0.5432, "step": 35804 }, { "epoch": 0.7593688362919132, "grad_norm": 0.3980933427810669, "learning_rate": 1.3709990526694126e-05, "loss": 0.5387, "step": 35805 }, { "epoch": 0.7593900447498463, "grad_norm": 0.3692397475242615, "learning_rate": 1.3709680829786058e-05, "loss": 0.5072, "step": 35806 }, { "epoch": 0.7594112532077792, "grad_norm": 0.34586018323898315, "learning_rate": 1.3709371128752119e-05, "loss": 0.4359, "step": 35807 }, { "epoch": 0.7594324616657123, "grad_norm": 0.32844969630241394, "learning_rate": 1.3709061423592643e-05, "loss": 0.5139, "step": 35808 }, { "epoch": 0.7594536701236453, "grad_norm": 0.3944661319255829, "learning_rate": 1.3708751714307984e-05, "loss": 0.4949, "step": 35809 }, { "epoch": 0.7594748785815784, "grad_norm": 0.4427088499069214, "learning_rate": 1.3708442000898482e-05, "loss": 0.4991, "step": 35810 }, { "epoch": 0.7594960870395113, "grad_norm": 0.6065211892127991, "learning_rate": 1.3708132283364485e-05, "loss": 0.4718, "step": 35811 }, { "epoch": 0.7595172954974444, "grad_norm": 0.340276300907135, "learning_rate": 1.3707822561706336e-05, "loss": 0.4746, "step": 35812 }, { "epoch": 0.7595385039553774, "grad_norm": 0.401995450258255, "learning_rate": 1.370751283592438e-05, "loss": 0.5806, "step": 35813 }, { "epoch": 0.7595597124133104, "grad_norm": 0.382643461227417, "learning_rate": 1.3707203106018958e-05, "loss": 0.4989, "step": 35814 }, { "epoch": 0.7595809208712434, "grad_norm": 0.37380221486091614, "learning_rate": 1.3706893371990418e-05, "loss": 0.4763, "step": 35815 }, { "epoch": 0.7596021293291765, "grad_norm": 0.3692013621330261, "learning_rate": 1.3706583633839103e-05, "loss": 0.5013, "step": 35816 }, { "epoch": 0.7596233377871096, "grad_norm": 0.3734721839427948, "learning_rate": 1.3706273891565356e-05, "loss": 0.4361, "step": 35817 }, { "epoch": 0.7596445462450425, "grad_norm": 0.3387952148914337, "learning_rate": 1.3705964145169526e-05, "loss": 0.4512, "step": 35818 }, { "epoch": 0.7596657547029756, "grad_norm": 0.40084490180015564, "learning_rate": 1.3705654394651953e-05, "loss": 0.4847, "step": 35819 }, { "epoch": 0.7596869631609086, "grad_norm": 0.46007972955703735, "learning_rate": 1.3705344640012985e-05, "loss": 0.5792, "step": 35820 }, { "epoch": 0.7597081716188416, "grad_norm": 0.3714563846588135, "learning_rate": 1.3705034881252966e-05, "loss": 0.4554, "step": 35821 }, { "epoch": 0.7597293800767746, "grad_norm": 0.3774525225162506, "learning_rate": 1.3704725118372237e-05, "loss": 0.5151, "step": 35822 }, { "epoch": 0.7597505885347077, "grad_norm": 0.40174663066864014, "learning_rate": 1.3704415351371143e-05, "loss": 0.5351, "step": 35823 }, { "epoch": 0.7597717969926406, "grad_norm": 0.38852083683013916, "learning_rate": 1.3704105580250036e-05, "loss": 0.5537, "step": 35824 }, { "epoch": 0.7597930054505737, "grad_norm": 0.3271079659461975, "learning_rate": 1.370379580500925e-05, "loss": 0.4159, "step": 35825 }, { "epoch": 0.7598142139085067, "grad_norm": 0.3434467613697052, "learning_rate": 1.3703486025649137e-05, "loss": 0.5503, "step": 35826 }, { "epoch": 0.7598354223664397, "grad_norm": 0.3646126687526703, "learning_rate": 1.370317624217004e-05, "loss": 0.5252, "step": 35827 }, { "epoch": 0.7598566308243727, "grad_norm": 0.6831393241882324, "learning_rate": 1.3702866454572297e-05, "loss": 0.5369, "step": 35828 }, { "epoch": 0.7598778392823058, "grad_norm": 0.3382895290851593, "learning_rate": 1.3702556662856263e-05, "loss": 0.4756, "step": 35829 }, { "epoch": 0.7598990477402388, "grad_norm": 0.3985656201839447, "learning_rate": 1.370224686702228e-05, "loss": 0.5668, "step": 35830 }, { "epoch": 0.7599202561981718, "grad_norm": 0.3624967634677887, "learning_rate": 1.3701937067070686e-05, "loss": 0.5671, "step": 35831 }, { "epoch": 0.7599414646561049, "grad_norm": 0.3498622477054596, "learning_rate": 1.370162726300183e-05, "loss": 0.5437, "step": 35832 }, { "epoch": 0.7599626731140379, "grad_norm": 0.3497101366519928, "learning_rate": 1.3701317454816057e-05, "loss": 0.4932, "step": 35833 }, { "epoch": 0.7599838815719709, "grad_norm": 0.37765786051750183, "learning_rate": 1.370100764251371e-05, "loss": 0.5728, "step": 35834 }, { "epoch": 0.7600050900299039, "grad_norm": 0.34811216592788696, "learning_rate": 1.3700697826095136e-05, "loss": 0.5668, "step": 35835 }, { "epoch": 0.760026298487837, "grad_norm": 0.3263298273086548, "learning_rate": 1.3700388005560679e-05, "loss": 0.4182, "step": 35836 }, { "epoch": 0.76004750694577, "grad_norm": 0.34739556908607483, "learning_rate": 1.3700078180910679e-05, "loss": 0.4443, "step": 35837 }, { "epoch": 0.760068715403703, "grad_norm": 0.3623919188976288, "learning_rate": 1.3699768352145486e-05, "loss": 0.4262, "step": 35838 }, { "epoch": 0.760089923861636, "grad_norm": 0.3377787172794342, "learning_rate": 1.3699458519265442e-05, "loss": 0.452, "step": 35839 }, { "epoch": 0.7601111323195691, "grad_norm": 0.405383437871933, "learning_rate": 1.3699148682270896e-05, "loss": 0.5251, "step": 35840 }, { "epoch": 0.760132340777502, "grad_norm": 0.3756042718887329, "learning_rate": 1.3698838841162183e-05, "loss": 0.5847, "step": 35841 }, { "epoch": 0.7601535492354351, "grad_norm": 0.4142295718193054, "learning_rate": 1.3698528995939656e-05, "loss": 0.4958, "step": 35842 }, { "epoch": 0.7601747576933681, "grad_norm": 0.3592814803123474, "learning_rate": 1.369821914660366e-05, "loss": 0.4368, "step": 35843 }, { "epoch": 0.7601959661513011, "grad_norm": 0.37440189719200134, "learning_rate": 1.3697909293154536e-05, "loss": 0.5152, "step": 35844 }, { "epoch": 0.7602171746092342, "grad_norm": 0.3751639425754547, "learning_rate": 1.3697599435592625e-05, "loss": 0.4696, "step": 35845 }, { "epoch": 0.7602383830671672, "grad_norm": 0.39939045906066895, "learning_rate": 1.3697289573918281e-05, "loss": 0.4834, "step": 35846 }, { "epoch": 0.7602595915251003, "grad_norm": 0.3407515585422516, "learning_rate": 1.3696979708131842e-05, "loss": 0.489, "step": 35847 }, { "epoch": 0.7602807999830332, "grad_norm": 0.35225585103034973, "learning_rate": 1.3696669838233655e-05, "loss": 0.5111, "step": 35848 }, { "epoch": 0.7603020084409663, "grad_norm": 0.3329739272594452, "learning_rate": 1.3696359964224061e-05, "loss": 0.4909, "step": 35849 }, { "epoch": 0.7603232168988993, "grad_norm": 0.5055163502693176, "learning_rate": 1.3696050086103412e-05, "loss": 0.4095, "step": 35850 }, { "epoch": 0.7603444253568323, "grad_norm": 0.3758288621902466, "learning_rate": 1.3695740203872046e-05, "loss": 0.4807, "step": 35851 }, { "epoch": 0.7603656338147653, "grad_norm": 0.36615481972694397, "learning_rate": 1.3695430317530309e-05, "loss": 0.482, "step": 35852 }, { "epoch": 0.7603868422726984, "grad_norm": 0.33096304535865784, "learning_rate": 1.3695120427078545e-05, "loss": 0.3963, "step": 35853 }, { "epoch": 0.7604080507306313, "grad_norm": 0.34466758370399475, "learning_rate": 1.3694810532517102e-05, "loss": 0.4731, "step": 35854 }, { "epoch": 0.7604292591885644, "grad_norm": 0.3596235513687134, "learning_rate": 1.3694500633846326e-05, "loss": 0.5124, "step": 35855 }, { "epoch": 0.7604504676464974, "grad_norm": 0.3338256776332855, "learning_rate": 1.3694190731066555e-05, "loss": 0.4855, "step": 35856 }, { "epoch": 0.7604716761044304, "grad_norm": 0.39753779768943787, "learning_rate": 1.3693880824178138e-05, "loss": 0.4811, "step": 35857 }, { "epoch": 0.7604928845623635, "grad_norm": 0.4417804479598999, "learning_rate": 1.369357091318142e-05, "loss": 0.4856, "step": 35858 }, { "epoch": 0.7605140930202965, "grad_norm": 0.3767363429069519, "learning_rate": 1.3693260998076742e-05, "loss": 0.5307, "step": 35859 }, { "epoch": 0.7605353014782296, "grad_norm": 0.335460364818573, "learning_rate": 1.3692951078864451e-05, "loss": 0.5149, "step": 35860 }, { "epoch": 0.7605565099361625, "grad_norm": 0.3563694655895233, "learning_rate": 1.3692641155544894e-05, "loss": 0.485, "step": 35861 }, { "epoch": 0.7605777183940956, "grad_norm": 0.33553916215896606, "learning_rate": 1.3692331228118411e-05, "loss": 0.5131, "step": 35862 }, { "epoch": 0.7605989268520286, "grad_norm": 0.3013548254966736, "learning_rate": 1.3692021296585354e-05, "loss": 0.4074, "step": 35863 }, { "epoch": 0.7606201353099616, "grad_norm": 0.34966808557510376, "learning_rate": 1.369171136094606e-05, "loss": 0.4608, "step": 35864 }, { "epoch": 0.7606413437678946, "grad_norm": 0.46097880601882935, "learning_rate": 1.3691401421200875e-05, "loss": 0.5096, "step": 35865 }, { "epoch": 0.7606625522258277, "grad_norm": 0.41684219241142273, "learning_rate": 1.369109147735015e-05, "loss": 0.505, "step": 35866 }, { "epoch": 0.7606837606837606, "grad_norm": 0.33475765585899353, "learning_rate": 1.369078152939422e-05, "loss": 0.4484, "step": 35867 }, { "epoch": 0.7607049691416937, "grad_norm": 0.40353846549987793, "learning_rate": 1.3690471577333438e-05, "loss": 0.5126, "step": 35868 }, { "epoch": 0.7607261775996267, "grad_norm": 0.33556705713272095, "learning_rate": 1.3690161621168146e-05, "loss": 0.3923, "step": 35869 }, { "epoch": 0.7607473860575598, "grad_norm": 0.3964013159275055, "learning_rate": 1.3689851660898685e-05, "loss": 0.4817, "step": 35870 }, { "epoch": 0.7607685945154927, "grad_norm": 0.311941921710968, "learning_rate": 1.3689541696525407e-05, "loss": 0.4604, "step": 35871 }, { "epoch": 0.7607898029734258, "grad_norm": 0.3846954107284546, "learning_rate": 1.3689231728048651e-05, "loss": 0.4857, "step": 35872 }, { "epoch": 0.7608110114313589, "grad_norm": 0.3473277688026428, "learning_rate": 1.368892175546876e-05, "loss": 0.4736, "step": 35873 }, { "epoch": 0.7608322198892918, "grad_norm": 0.37391117215156555, "learning_rate": 1.3688611778786088e-05, "loss": 0.4656, "step": 35874 }, { "epoch": 0.7608534283472249, "grad_norm": 0.38539981842041016, "learning_rate": 1.3688301798000971e-05, "loss": 0.4362, "step": 35875 }, { "epoch": 0.7608746368051579, "grad_norm": 0.3756431043148041, "learning_rate": 1.3687991813113757e-05, "loss": 0.4562, "step": 35876 }, { "epoch": 0.760895845263091, "grad_norm": 0.3436475694179535, "learning_rate": 1.3687681824124793e-05, "loss": 0.4019, "step": 35877 }, { "epoch": 0.7609170537210239, "grad_norm": 0.38453125953674316, "learning_rate": 1.3687371831034419e-05, "loss": 0.5904, "step": 35878 }, { "epoch": 0.760938262178957, "grad_norm": 0.4177657961845398, "learning_rate": 1.368706183384298e-05, "loss": 0.5294, "step": 35879 }, { "epoch": 0.76095947063689, "grad_norm": 0.3224005699157715, "learning_rate": 1.3686751832550827e-05, "loss": 0.502, "step": 35880 }, { "epoch": 0.760980679094823, "grad_norm": 0.3464234471321106, "learning_rate": 1.3686441827158299e-05, "loss": 0.4181, "step": 35881 }, { "epoch": 0.761001887552756, "grad_norm": 0.4294028878211975, "learning_rate": 1.3686131817665741e-05, "loss": 0.5109, "step": 35882 }, { "epoch": 0.7610230960106891, "grad_norm": 0.37137287855148315, "learning_rate": 1.3685821804073502e-05, "loss": 0.4992, "step": 35883 }, { "epoch": 0.761044304468622, "grad_norm": 0.420146644115448, "learning_rate": 1.3685511786381922e-05, "loss": 0.5259, "step": 35884 }, { "epoch": 0.7610655129265551, "grad_norm": 0.35359418392181396, "learning_rate": 1.368520176459135e-05, "loss": 0.5917, "step": 35885 }, { "epoch": 0.7610867213844882, "grad_norm": 0.3423946797847748, "learning_rate": 1.3684891738702125e-05, "loss": 0.4653, "step": 35886 }, { "epoch": 0.7611079298424211, "grad_norm": 0.4289070963859558, "learning_rate": 1.3684581708714597e-05, "loss": 0.4625, "step": 35887 }, { "epoch": 0.7611291383003542, "grad_norm": 0.34419265389442444, "learning_rate": 1.368427167462911e-05, "loss": 0.4215, "step": 35888 }, { "epoch": 0.7611503467582872, "grad_norm": 0.3571343719959259, "learning_rate": 1.3683961636446008e-05, "loss": 0.5195, "step": 35889 }, { "epoch": 0.7611715552162203, "grad_norm": 0.3686206638813019, "learning_rate": 1.3683651594165635e-05, "loss": 0.5391, "step": 35890 }, { "epoch": 0.7611927636741532, "grad_norm": 0.3474476635456085, "learning_rate": 1.3683341547788339e-05, "loss": 0.4858, "step": 35891 }, { "epoch": 0.7612139721320863, "grad_norm": 0.3614954650402069, "learning_rate": 1.368303149731446e-05, "loss": 0.4627, "step": 35892 }, { "epoch": 0.7612351805900193, "grad_norm": 0.4067571759223938, "learning_rate": 1.3682721442744345e-05, "loss": 0.4586, "step": 35893 }, { "epoch": 0.7612563890479523, "grad_norm": 0.4444901645183563, "learning_rate": 1.368241138407834e-05, "loss": 0.5316, "step": 35894 }, { "epoch": 0.7612775975058853, "grad_norm": 0.3611246347427368, "learning_rate": 1.3682101321316792e-05, "loss": 0.519, "step": 35895 }, { "epoch": 0.7612988059638184, "grad_norm": 0.34489354491233826, "learning_rate": 1.368179125446004e-05, "loss": 0.5008, "step": 35896 }, { "epoch": 0.7613200144217513, "grad_norm": 0.9130419492721558, "learning_rate": 1.3681481183508432e-05, "loss": 0.4614, "step": 35897 }, { "epoch": 0.7613412228796844, "grad_norm": 0.3786322772502899, "learning_rate": 1.3681171108462312e-05, "loss": 0.4161, "step": 35898 }, { "epoch": 0.7613624313376175, "grad_norm": 0.3604426383972168, "learning_rate": 1.3680861029322024e-05, "loss": 0.4818, "step": 35899 }, { "epoch": 0.7613836397955505, "grad_norm": 0.40726059675216675, "learning_rate": 1.3680550946087918e-05, "loss": 0.4797, "step": 35900 }, { "epoch": 0.7614048482534835, "grad_norm": 0.34109896421432495, "learning_rate": 1.3680240858760331e-05, "loss": 0.3983, "step": 35901 }, { "epoch": 0.7614260567114165, "grad_norm": 0.3246840536594391, "learning_rate": 1.3679930767339618e-05, "loss": 0.4402, "step": 35902 }, { "epoch": 0.7614472651693496, "grad_norm": 0.51771479845047, "learning_rate": 1.3679620671826115e-05, "loss": 0.515, "step": 35903 }, { "epoch": 0.7614684736272825, "grad_norm": 0.3907970190048218, "learning_rate": 1.3679310572220169e-05, "loss": 0.5233, "step": 35904 }, { "epoch": 0.7614896820852156, "grad_norm": 0.33124616742134094, "learning_rate": 1.3679000468522125e-05, "loss": 0.4746, "step": 35905 }, { "epoch": 0.7615108905431486, "grad_norm": 0.43460145592689514, "learning_rate": 1.3678690360732333e-05, "loss": 0.4567, "step": 35906 }, { "epoch": 0.7615320990010817, "grad_norm": 0.35320547223091125, "learning_rate": 1.3678380248851127e-05, "loss": 0.5092, "step": 35907 }, { "epoch": 0.7615533074590146, "grad_norm": 0.3572427034378052, "learning_rate": 1.3678070132878864e-05, "loss": 0.4261, "step": 35908 }, { "epoch": 0.7615745159169477, "grad_norm": 0.4036444425582886, "learning_rate": 1.3677760012815882e-05, "loss": 0.4798, "step": 35909 }, { "epoch": 0.7615957243748807, "grad_norm": 0.3805276155471802, "learning_rate": 1.3677449888662526e-05, "loss": 0.4665, "step": 35910 }, { "epoch": 0.7616169328328137, "grad_norm": 0.33082231879234314, "learning_rate": 1.3677139760419143e-05, "loss": 0.4131, "step": 35911 }, { "epoch": 0.7616381412907467, "grad_norm": 0.37314683198928833, "learning_rate": 1.3676829628086078e-05, "loss": 0.456, "step": 35912 }, { "epoch": 0.7616593497486798, "grad_norm": 0.35949277877807617, "learning_rate": 1.3676519491663671e-05, "loss": 0.4703, "step": 35913 }, { "epoch": 0.7616805582066128, "grad_norm": 0.4779205024242401, "learning_rate": 1.3676209351152275e-05, "loss": 0.4944, "step": 35914 }, { "epoch": 0.7617017666645458, "grad_norm": 0.3727376461029053, "learning_rate": 1.3675899206552228e-05, "loss": 0.499, "step": 35915 }, { "epoch": 0.7617229751224789, "grad_norm": 0.3851185441017151, "learning_rate": 1.367558905786388e-05, "loss": 0.488, "step": 35916 }, { "epoch": 0.7617441835804118, "grad_norm": 0.360661119222641, "learning_rate": 1.3675278905087575e-05, "loss": 0.4612, "step": 35917 }, { "epoch": 0.7617653920383449, "grad_norm": 0.39500993490219116, "learning_rate": 1.3674968748223653e-05, "loss": 0.4658, "step": 35918 }, { "epoch": 0.7617866004962779, "grad_norm": 0.3711622953414917, "learning_rate": 1.3674658587272465e-05, "loss": 0.4736, "step": 35919 }, { "epoch": 0.761807808954211, "grad_norm": 0.41957059502601624, "learning_rate": 1.3674348422234354e-05, "loss": 0.5397, "step": 35920 }, { "epoch": 0.7618290174121439, "grad_norm": 0.37718114256858826, "learning_rate": 1.3674038253109662e-05, "loss": 0.6119, "step": 35921 }, { "epoch": 0.761850225870077, "grad_norm": 0.39312395453453064, "learning_rate": 1.367372807989874e-05, "loss": 0.474, "step": 35922 }, { "epoch": 0.76187143432801, "grad_norm": 0.3355499804019928, "learning_rate": 1.367341790260193e-05, "loss": 0.4929, "step": 35923 }, { "epoch": 0.761892642785943, "grad_norm": 0.3559788763523102, "learning_rate": 1.3673107721219571e-05, "loss": 0.5371, "step": 35924 }, { "epoch": 0.761913851243876, "grad_norm": 0.36978697776794434, "learning_rate": 1.3672797535752018e-05, "loss": 0.5095, "step": 35925 }, { "epoch": 0.7619350597018091, "grad_norm": 0.34404256939888, "learning_rate": 1.3672487346199612e-05, "loss": 0.4798, "step": 35926 }, { "epoch": 0.7619562681597422, "grad_norm": 0.41883575916290283, "learning_rate": 1.3672177152562695e-05, "loss": 0.5271, "step": 35927 }, { "epoch": 0.7619774766176751, "grad_norm": 0.43097200989723206, "learning_rate": 1.3671866954841618e-05, "loss": 0.5491, "step": 35928 }, { "epoch": 0.7619986850756082, "grad_norm": 0.3989621698856354, "learning_rate": 1.367155675303672e-05, "loss": 0.5163, "step": 35929 }, { "epoch": 0.7620198935335412, "grad_norm": 0.38740062713623047, "learning_rate": 1.3671246547148349e-05, "loss": 0.5339, "step": 35930 }, { "epoch": 0.7620411019914742, "grad_norm": 0.36498844623565674, "learning_rate": 1.3670936337176848e-05, "loss": 0.5183, "step": 35931 }, { "epoch": 0.7620623104494072, "grad_norm": 0.3259641230106354, "learning_rate": 1.3670626123122563e-05, "loss": 0.5044, "step": 35932 }, { "epoch": 0.7620835189073403, "grad_norm": 0.3769613802433014, "learning_rate": 1.3670315904985843e-05, "loss": 0.5213, "step": 35933 }, { "epoch": 0.7621047273652732, "grad_norm": 0.3803209066390991, "learning_rate": 1.3670005682767026e-05, "loss": 0.4773, "step": 35934 }, { "epoch": 0.7621259358232063, "grad_norm": 0.4439944922924042, "learning_rate": 1.3669695456466464e-05, "loss": 0.5498, "step": 35935 }, { "epoch": 0.7621471442811393, "grad_norm": 0.43056681752204895, "learning_rate": 1.3669385226084495e-05, "loss": 0.4364, "step": 35936 }, { "epoch": 0.7621683527390724, "grad_norm": 0.36583301424980164, "learning_rate": 1.366907499162147e-05, "loss": 0.5065, "step": 35937 }, { "epoch": 0.7621895611970053, "grad_norm": 0.35590535402297974, "learning_rate": 1.3668764753077729e-05, "loss": 0.5397, "step": 35938 }, { "epoch": 0.7622107696549384, "grad_norm": 0.320698618888855, "learning_rate": 1.3668454510453622e-05, "loss": 0.4715, "step": 35939 }, { "epoch": 0.7622319781128715, "grad_norm": 0.4181455969810486, "learning_rate": 1.3668144263749494e-05, "loss": 0.6165, "step": 35940 }, { "epoch": 0.7622531865708044, "grad_norm": 0.36370477080345154, "learning_rate": 1.3667834012965684e-05, "loss": 0.3676, "step": 35941 }, { "epoch": 0.7622743950287375, "grad_norm": 0.3496791422367096, "learning_rate": 1.3667523758102544e-05, "loss": 0.4017, "step": 35942 }, { "epoch": 0.7622956034866705, "grad_norm": 0.38324373960494995, "learning_rate": 1.3667213499160413e-05, "loss": 0.5609, "step": 35943 }, { "epoch": 0.7623168119446035, "grad_norm": 0.35298043489456177, "learning_rate": 1.366690323613964e-05, "loss": 0.5063, "step": 35944 }, { "epoch": 0.7623380204025365, "grad_norm": 0.3315850496292114, "learning_rate": 1.3666592969040568e-05, "loss": 0.4464, "step": 35945 }, { "epoch": 0.7623592288604696, "grad_norm": 0.34738799929618835, "learning_rate": 1.3666282697863546e-05, "loss": 0.4796, "step": 35946 }, { "epoch": 0.7623804373184025, "grad_norm": 0.32282090187072754, "learning_rate": 1.3665972422608913e-05, "loss": 0.5026, "step": 35947 }, { "epoch": 0.7624016457763356, "grad_norm": 0.3735273778438568, "learning_rate": 1.366566214327702e-05, "loss": 0.4836, "step": 35948 }, { "epoch": 0.7624228542342686, "grad_norm": 0.39353957772254944, "learning_rate": 1.3665351859868208e-05, "loss": 0.6099, "step": 35949 }, { "epoch": 0.7624440626922017, "grad_norm": 0.3902362883090973, "learning_rate": 1.3665041572382822e-05, "loss": 0.5077, "step": 35950 }, { "epoch": 0.7624652711501346, "grad_norm": 0.40565139055252075, "learning_rate": 1.366473128082121e-05, "loss": 0.4991, "step": 35951 }, { "epoch": 0.7624864796080677, "grad_norm": 0.4152074456214905, "learning_rate": 1.3664420985183715e-05, "loss": 0.54, "step": 35952 }, { "epoch": 0.7625076880660007, "grad_norm": 0.39831072092056274, "learning_rate": 1.3664110685470687e-05, "loss": 0.4897, "step": 35953 }, { "epoch": 0.7625288965239337, "grad_norm": 0.36651235818862915, "learning_rate": 1.3663800381682465e-05, "loss": 0.4736, "step": 35954 }, { "epoch": 0.7625501049818668, "grad_norm": 0.4079110324382782, "learning_rate": 1.3663490073819392e-05, "loss": 0.4833, "step": 35955 }, { "epoch": 0.7625713134397998, "grad_norm": 0.3598266839981079, "learning_rate": 1.366317976188182e-05, "loss": 0.4907, "step": 35956 }, { "epoch": 0.7625925218977329, "grad_norm": 0.36098289489746094, "learning_rate": 1.3662869445870093e-05, "loss": 0.4939, "step": 35957 }, { "epoch": 0.7626137303556658, "grad_norm": 0.7015882134437561, "learning_rate": 1.3662559125784552e-05, "loss": 0.5099, "step": 35958 }, { "epoch": 0.7626349388135989, "grad_norm": 0.37274739146232605, "learning_rate": 1.3662248801625546e-05, "loss": 0.5429, "step": 35959 }, { "epoch": 0.7626561472715319, "grad_norm": 0.3635421693325043, "learning_rate": 1.3661938473393419e-05, "loss": 0.5191, "step": 35960 }, { "epoch": 0.7626773557294649, "grad_norm": 0.4151209890842438, "learning_rate": 1.3661628141088513e-05, "loss": 0.5465, "step": 35961 }, { "epoch": 0.7626985641873979, "grad_norm": 0.4172651171684265, "learning_rate": 1.3661317804711179e-05, "loss": 0.5481, "step": 35962 }, { "epoch": 0.762719772645331, "grad_norm": 0.3502744138240814, "learning_rate": 1.3661007464261758e-05, "loss": 0.3856, "step": 35963 }, { "epoch": 0.7627409811032639, "grad_norm": 0.3581870198249817, "learning_rate": 1.3660697119740594e-05, "loss": 0.5035, "step": 35964 }, { "epoch": 0.762762189561197, "grad_norm": 0.378010630607605, "learning_rate": 1.3660386771148037e-05, "loss": 0.5123, "step": 35965 }, { "epoch": 0.76278339801913, "grad_norm": 0.4443001449108124, "learning_rate": 1.366007641848443e-05, "loss": 0.5144, "step": 35966 }, { "epoch": 0.762804606477063, "grad_norm": 0.377615362405777, "learning_rate": 1.3659766061750118e-05, "loss": 0.5095, "step": 35967 }, { "epoch": 0.7628258149349961, "grad_norm": 0.3550906181335449, "learning_rate": 1.3659455700945445e-05, "loss": 0.4402, "step": 35968 }, { "epoch": 0.7628470233929291, "grad_norm": 0.3896273374557495, "learning_rate": 1.3659145336070756e-05, "loss": 0.4649, "step": 35969 }, { "epoch": 0.7628682318508622, "grad_norm": 0.36323845386505127, "learning_rate": 1.3658834967126398e-05, "loss": 0.4658, "step": 35970 }, { "epoch": 0.7628894403087951, "grad_norm": 0.41768044233322144, "learning_rate": 1.3658524594112718e-05, "loss": 0.427, "step": 35971 }, { "epoch": 0.7629106487667282, "grad_norm": 0.3421406149864197, "learning_rate": 1.3658214217030055e-05, "loss": 0.4938, "step": 35972 }, { "epoch": 0.7629318572246612, "grad_norm": 0.39409342408180237, "learning_rate": 1.3657903835878763e-05, "loss": 0.5792, "step": 35973 }, { "epoch": 0.7629530656825942, "grad_norm": 0.38287460803985596, "learning_rate": 1.3657593450659179e-05, "loss": 0.5684, "step": 35974 }, { "epoch": 0.7629742741405272, "grad_norm": 0.37055519223213196, "learning_rate": 1.365728306137165e-05, "loss": 0.5611, "step": 35975 }, { "epoch": 0.7629954825984603, "grad_norm": 0.5140107870101929, "learning_rate": 1.3656972668016523e-05, "loss": 0.5296, "step": 35976 }, { "epoch": 0.7630166910563932, "grad_norm": 0.3629691004753113, "learning_rate": 1.3656662270594145e-05, "loss": 0.5055, "step": 35977 }, { "epoch": 0.7630378995143263, "grad_norm": 0.3823399543762207, "learning_rate": 1.3656351869104855e-05, "loss": 0.4516, "step": 35978 }, { "epoch": 0.7630591079722593, "grad_norm": 0.41298994421958923, "learning_rate": 1.3656041463549006e-05, "loss": 0.4834, "step": 35979 }, { "epoch": 0.7630803164301924, "grad_norm": 0.35450369119644165, "learning_rate": 1.3655731053926937e-05, "loss": 0.4812, "step": 35980 }, { "epoch": 0.7631015248881254, "grad_norm": 0.4032197892665863, "learning_rate": 1.3655420640238999e-05, "loss": 0.5007, "step": 35981 }, { "epoch": 0.7631227333460584, "grad_norm": 0.37871140241622925, "learning_rate": 1.3655110222485531e-05, "loss": 0.4648, "step": 35982 }, { "epoch": 0.7631439418039915, "grad_norm": 0.35119420289993286, "learning_rate": 1.365479980066688e-05, "loss": 0.5211, "step": 35983 }, { "epoch": 0.7631651502619244, "grad_norm": 0.3601761758327484, "learning_rate": 1.3654489374783398e-05, "loss": 0.4572, "step": 35984 }, { "epoch": 0.7631863587198575, "grad_norm": 0.3530995547771454, "learning_rate": 1.365417894483542e-05, "loss": 0.5276, "step": 35985 }, { "epoch": 0.7632075671777905, "grad_norm": 0.3828011453151703, "learning_rate": 1.3653868510823296e-05, "loss": 0.5387, "step": 35986 }, { "epoch": 0.7632287756357236, "grad_norm": 0.36549100279808044, "learning_rate": 1.3653558072747375e-05, "loss": 0.4691, "step": 35987 }, { "epoch": 0.7632499840936565, "grad_norm": 0.6397485136985779, "learning_rate": 1.3653247630607993e-05, "loss": 0.4433, "step": 35988 }, { "epoch": 0.7632711925515896, "grad_norm": 0.42876362800598145, "learning_rate": 1.3652937184405504e-05, "loss": 0.5226, "step": 35989 }, { "epoch": 0.7632924010095226, "grad_norm": 0.44373586773872375, "learning_rate": 1.365262673414025e-05, "loss": 0.5565, "step": 35990 }, { "epoch": 0.7633136094674556, "grad_norm": 0.4138711094856262, "learning_rate": 1.3652316279812576e-05, "loss": 0.5289, "step": 35991 }, { "epoch": 0.7633348179253886, "grad_norm": 0.3855245113372803, "learning_rate": 1.3652005821422828e-05, "loss": 0.5488, "step": 35992 }, { "epoch": 0.7633560263833217, "grad_norm": 0.4067692458629608, "learning_rate": 1.3651695358971352e-05, "loss": 0.4891, "step": 35993 }, { "epoch": 0.7633772348412546, "grad_norm": 0.3932282626628876, "learning_rate": 1.3651384892458488e-05, "loss": 0.552, "step": 35994 }, { "epoch": 0.7633984432991877, "grad_norm": 0.35857850313186646, "learning_rate": 1.3651074421884584e-05, "loss": 0.517, "step": 35995 }, { "epoch": 0.7634196517571208, "grad_norm": 0.6878137588500977, "learning_rate": 1.3650763947249991e-05, "loss": 0.4472, "step": 35996 }, { "epoch": 0.7634408602150538, "grad_norm": 0.3580847978591919, "learning_rate": 1.3650453468555052e-05, "loss": 0.5032, "step": 35997 }, { "epoch": 0.7634620686729868, "grad_norm": 0.32606056332588196, "learning_rate": 1.3650142985800109e-05, "loss": 0.4686, "step": 35998 }, { "epoch": 0.7634832771309198, "grad_norm": 0.36751723289489746, "learning_rate": 1.3649832498985507e-05, "loss": 0.5727, "step": 35999 }, { "epoch": 0.7635044855888529, "grad_norm": 0.3600068986415863, "learning_rate": 1.3649522008111591e-05, "loss": 0.4643, "step": 36000 }, { "epoch": 0.7635256940467858, "grad_norm": 0.3282678425312042, "learning_rate": 1.364921151317871e-05, "loss": 0.5533, "step": 36001 }, { "epoch": 0.7635469025047189, "grad_norm": 0.6446000933647156, "learning_rate": 1.364890101418721e-05, "loss": 0.5378, "step": 36002 }, { "epoch": 0.7635681109626519, "grad_norm": 0.3797273635864258, "learning_rate": 1.3648590511137432e-05, "loss": 0.4669, "step": 36003 }, { "epoch": 0.7635893194205849, "grad_norm": 0.3242231607437134, "learning_rate": 1.3648280004029725e-05, "loss": 0.5019, "step": 36004 }, { "epoch": 0.7636105278785179, "grad_norm": 0.33817002177238464, "learning_rate": 1.3647969492864432e-05, "loss": 0.4362, "step": 36005 }, { "epoch": 0.763631736336451, "grad_norm": 0.36946478486061096, "learning_rate": 1.3647658977641897e-05, "loss": 0.4983, "step": 36006 }, { "epoch": 0.763652944794384, "grad_norm": 0.34676483273506165, "learning_rate": 1.364734845836247e-05, "loss": 0.4716, "step": 36007 }, { "epoch": 0.763674153252317, "grad_norm": 0.34889355301856995, "learning_rate": 1.3647037935026492e-05, "loss": 0.5056, "step": 36008 }, { "epoch": 0.7636953617102501, "grad_norm": 0.3419325053691864, "learning_rate": 1.3646727407634311e-05, "loss": 0.4433, "step": 36009 }, { "epoch": 0.7637165701681831, "grad_norm": 0.3427906632423401, "learning_rate": 1.3646416876186272e-05, "loss": 0.4658, "step": 36010 }, { "epoch": 0.7637377786261161, "grad_norm": 0.36759835481643677, "learning_rate": 1.3646106340682717e-05, "loss": 0.5396, "step": 36011 }, { "epoch": 0.7637589870840491, "grad_norm": 0.37232664227485657, "learning_rate": 1.3645795801123997e-05, "loss": 0.5453, "step": 36012 }, { "epoch": 0.7637801955419822, "grad_norm": 0.3509182333946228, "learning_rate": 1.3645485257510456e-05, "loss": 0.4736, "step": 36013 }, { "epoch": 0.7638014039999151, "grad_norm": 0.37753647565841675, "learning_rate": 1.3645174709842434e-05, "loss": 0.5999, "step": 36014 }, { "epoch": 0.7638226124578482, "grad_norm": 0.4380934238433838, "learning_rate": 1.364486415812028e-05, "loss": 0.4968, "step": 36015 }, { "epoch": 0.7638438209157812, "grad_norm": 0.3814659118652344, "learning_rate": 1.3644553602344344e-05, "loss": 0.4773, "step": 36016 }, { "epoch": 0.7638650293737143, "grad_norm": 0.35437554121017456, "learning_rate": 1.3644243042514964e-05, "loss": 0.4616, "step": 36017 }, { "epoch": 0.7638862378316472, "grad_norm": 0.3441358208656311, "learning_rate": 1.3643932478632488e-05, "loss": 0.4422, "step": 36018 }, { "epoch": 0.7639074462895803, "grad_norm": 0.45027029514312744, "learning_rate": 1.3643621910697267e-05, "loss": 0.459, "step": 36019 }, { "epoch": 0.7639286547475133, "grad_norm": 0.38628101348876953, "learning_rate": 1.3643311338709638e-05, "loss": 0.4481, "step": 36020 }, { "epoch": 0.7639498632054463, "grad_norm": 0.3298865556716919, "learning_rate": 1.3643000762669946e-05, "loss": 0.4279, "step": 36021 }, { "epoch": 0.7639710716633794, "grad_norm": 0.38323482871055603, "learning_rate": 1.3642690182578546e-05, "loss": 0.542, "step": 36022 }, { "epoch": 0.7639922801213124, "grad_norm": 0.37831100821495056, "learning_rate": 1.3642379598435773e-05, "loss": 0.4625, "step": 36023 }, { "epoch": 0.7640134885792454, "grad_norm": 0.4011398255825043, "learning_rate": 1.3642069010241983e-05, "loss": 0.4997, "step": 36024 }, { "epoch": 0.7640346970371784, "grad_norm": 0.37444832921028137, "learning_rate": 1.364175841799751e-05, "loss": 0.4526, "step": 36025 }, { "epoch": 0.7640559054951115, "grad_norm": 0.3921706974506378, "learning_rate": 1.364144782170271e-05, "loss": 0.4776, "step": 36026 }, { "epoch": 0.7640771139530445, "grad_norm": 0.39667627215385437, "learning_rate": 1.3641137221357917e-05, "loss": 0.518, "step": 36027 }, { "epoch": 0.7640983224109775, "grad_norm": 0.36464592814445496, "learning_rate": 1.3640826616963487e-05, "loss": 0.5214, "step": 36028 }, { "epoch": 0.7641195308689105, "grad_norm": 0.4472014904022217, "learning_rate": 1.364051600851976e-05, "loss": 0.5229, "step": 36029 }, { "epoch": 0.7641407393268436, "grad_norm": 0.3748890161514282, "learning_rate": 1.3640205396027084e-05, "loss": 0.5642, "step": 36030 }, { "epoch": 0.7641619477847765, "grad_norm": 0.3360840082168579, "learning_rate": 1.36398947794858e-05, "loss": 0.5215, "step": 36031 }, { "epoch": 0.7641831562427096, "grad_norm": 0.35754790902137756, "learning_rate": 1.3639584158896263e-05, "loss": 0.5347, "step": 36032 }, { "epoch": 0.7642043647006426, "grad_norm": 0.3918883502483368, "learning_rate": 1.3639273534258806e-05, "loss": 0.5281, "step": 36033 }, { "epoch": 0.7642255731585756, "grad_norm": 0.3665611147880554, "learning_rate": 1.363896290557378e-05, "loss": 0.5042, "step": 36034 }, { "epoch": 0.7642467816165087, "grad_norm": 0.37445592880249023, "learning_rate": 1.3638652272841536e-05, "loss": 0.4925, "step": 36035 }, { "epoch": 0.7642679900744417, "grad_norm": 0.34267085790634155, "learning_rate": 1.3638341636062414e-05, "loss": 0.5139, "step": 36036 }, { "epoch": 0.7642891985323748, "grad_norm": 0.35442617535591125, "learning_rate": 1.3638030995236755e-05, "loss": 0.5108, "step": 36037 }, { "epoch": 0.7643104069903077, "grad_norm": 0.37159454822540283, "learning_rate": 1.3637720350364914e-05, "loss": 0.4784, "step": 36038 }, { "epoch": 0.7643316154482408, "grad_norm": 0.3957757353782654, "learning_rate": 1.363740970144723e-05, "loss": 0.4677, "step": 36039 }, { "epoch": 0.7643528239061738, "grad_norm": 0.5083510279655457, "learning_rate": 1.363709904848405e-05, "loss": 0.4972, "step": 36040 }, { "epoch": 0.7643740323641068, "grad_norm": 0.38324183225631714, "learning_rate": 1.3636788391475721e-05, "loss": 0.5123, "step": 36041 }, { "epoch": 0.7643952408220398, "grad_norm": 0.3964184820652008, "learning_rate": 1.363647773042259e-05, "loss": 0.558, "step": 36042 }, { "epoch": 0.7644164492799729, "grad_norm": 0.3892974555492401, "learning_rate": 1.3636167065324997e-05, "loss": 0.4982, "step": 36043 }, { "epoch": 0.7644376577379058, "grad_norm": 0.3422665596008301, "learning_rate": 1.3635856396183294e-05, "loss": 0.4451, "step": 36044 }, { "epoch": 0.7644588661958389, "grad_norm": 0.4255959093570709, "learning_rate": 1.3635545722997818e-05, "loss": 0.4467, "step": 36045 }, { "epoch": 0.7644800746537719, "grad_norm": 0.3942660093307495, "learning_rate": 1.3635235045768922e-05, "loss": 0.5927, "step": 36046 }, { "epoch": 0.764501283111705, "grad_norm": 0.47577255964279175, "learning_rate": 1.363492436449695e-05, "loss": 0.4582, "step": 36047 }, { "epoch": 0.7645224915696379, "grad_norm": 0.35267508029937744, "learning_rate": 1.3634613679182244e-05, "loss": 0.525, "step": 36048 }, { "epoch": 0.764543700027571, "grad_norm": 0.5315905809402466, "learning_rate": 1.3634302989825156e-05, "loss": 0.5525, "step": 36049 }, { "epoch": 0.7645649084855041, "grad_norm": 0.4103894829750061, "learning_rate": 1.3633992296426029e-05, "loss": 0.5309, "step": 36050 }, { "epoch": 0.764586116943437, "grad_norm": 0.33687660098075867, "learning_rate": 1.3633681598985203e-05, "loss": 0.52, "step": 36051 }, { "epoch": 0.7646073254013701, "grad_norm": 0.37017300724983215, "learning_rate": 1.3633370897503031e-05, "loss": 0.4982, "step": 36052 }, { "epoch": 0.7646285338593031, "grad_norm": 0.3796713948249817, "learning_rate": 1.3633060191979852e-05, "loss": 0.5466, "step": 36053 }, { "epoch": 0.7646497423172361, "grad_norm": 0.39304807782173157, "learning_rate": 1.3632749482416017e-05, "loss": 0.5473, "step": 36054 }, { "epoch": 0.7646709507751691, "grad_norm": 0.3378836214542389, "learning_rate": 1.3632438768811871e-05, "loss": 0.5095, "step": 36055 }, { "epoch": 0.7646921592331022, "grad_norm": 0.40028199553489685, "learning_rate": 1.3632128051167757e-05, "loss": 0.5271, "step": 36056 }, { "epoch": 0.7647133676910352, "grad_norm": 0.35909244418144226, "learning_rate": 1.3631817329484021e-05, "loss": 0.4854, "step": 36057 }, { "epoch": 0.7647345761489682, "grad_norm": 0.8489618897438049, "learning_rate": 1.3631506603761008e-05, "loss": 0.5441, "step": 36058 }, { "epoch": 0.7647557846069012, "grad_norm": 0.32602280378341675, "learning_rate": 1.3631195873999068e-05, "loss": 0.4521, "step": 36059 }, { "epoch": 0.7647769930648343, "grad_norm": 0.354236364364624, "learning_rate": 1.363088514019854e-05, "loss": 0.4678, "step": 36060 }, { "epoch": 0.7647982015227672, "grad_norm": 0.3836483955383301, "learning_rate": 1.3630574402359776e-05, "loss": 0.5151, "step": 36061 }, { "epoch": 0.7648194099807003, "grad_norm": 0.34062308073043823, "learning_rate": 1.3630263660483117e-05, "loss": 0.462, "step": 36062 }, { "epoch": 0.7648406184386334, "grad_norm": 0.3454934358596802, "learning_rate": 1.362995291456891e-05, "loss": 0.4409, "step": 36063 }, { "epoch": 0.7648618268965663, "grad_norm": 0.34008562564849854, "learning_rate": 1.3629642164617505e-05, "loss": 0.4916, "step": 36064 }, { "epoch": 0.7648830353544994, "grad_norm": 0.3646796941757202, "learning_rate": 1.3629331410629237e-05, "loss": 0.5256, "step": 36065 }, { "epoch": 0.7649042438124324, "grad_norm": 0.35147765278816223, "learning_rate": 1.3629020652604463e-05, "loss": 0.472, "step": 36066 }, { "epoch": 0.7649254522703655, "grad_norm": 0.33250680565834045, "learning_rate": 1.3628709890543524e-05, "loss": 0.448, "step": 36067 }, { "epoch": 0.7649466607282984, "grad_norm": 0.36112847924232483, "learning_rate": 1.3628399124446762e-05, "loss": 0.5024, "step": 36068 }, { "epoch": 0.7649678691862315, "grad_norm": 0.44701993465423584, "learning_rate": 1.3628088354314529e-05, "loss": 0.4434, "step": 36069 }, { "epoch": 0.7649890776441645, "grad_norm": 0.34448713064193726, "learning_rate": 1.3627777580147168e-05, "loss": 0.4245, "step": 36070 }, { "epoch": 0.7650102861020975, "grad_norm": 0.37633979320526123, "learning_rate": 1.3627466801945023e-05, "loss": 0.5208, "step": 36071 }, { "epoch": 0.7650314945600305, "grad_norm": 0.3790009915828705, "learning_rate": 1.362715601970844e-05, "loss": 0.5293, "step": 36072 }, { "epoch": 0.7650527030179636, "grad_norm": 0.3933117687702179, "learning_rate": 1.3626845233437767e-05, "loss": 0.4763, "step": 36073 }, { "epoch": 0.7650739114758965, "grad_norm": 0.33613666892051697, "learning_rate": 1.3626534443133345e-05, "loss": 0.5085, "step": 36074 }, { "epoch": 0.7650951199338296, "grad_norm": 0.39538732171058655, "learning_rate": 1.3626223648795526e-05, "loss": 0.4229, "step": 36075 }, { "epoch": 0.7651163283917627, "grad_norm": 0.34984102845191956, "learning_rate": 1.3625912850424652e-05, "loss": 0.4552, "step": 36076 }, { "epoch": 0.7651375368496957, "grad_norm": 0.35734128952026367, "learning_rate": 1.3625602048021069e-05, "loss": 0.48, "step": 36077 }, { "epoch": 0.7651587453076287, "grad_norm": 0.3667718172073364, "learning_rate": 1.3625291241585123e-05, "loss": 0.5133, "step": 36078 }, { "epoch": 0.7651799537655617, "grad_norm": 0.34559252858161926, "learning_rate": 1.3624980431117159e-05, "loss": 0.5713, "step": 36079 }, { "epoch": 0.7652011622234948, "grad_norm": 0.5330314636230469, "learning_rate": 1.3624669616617524e-05, "loss": 0.475, "step": 36080 }, { "epoch": 0.7652223706814277, "grad_norm": 0.3537856340408325, "learning_rate": 1.3624358798086564e-05, "loss": 0.491, "step": 36081 }, { "epoch": 0.7652435791393608, "grad_norm": 0.39353498816490173, "learning_rate": 1.3624047975524622e-05, "loss": 0.525, "step": 36082 }, { "epoch": 0.7652647875972938, "grad_norm": 0.3840198218822479, "learning_rate": 1.3623737148932046e-05, "loss": 0.5108, "step": 36083 }, { "epoch": 0.7652859960552268, "grad_norm": 0.4305713474750519, "learning_rate": 1.3623426318309183e-05, "loss": 0.5396, "step": 36084 }, { "epoch": 0.7653072045131598, "grad_norm": 0.36418256163597107, "learning_rate": 1.3623115483656371e-05, "loss": 0.4305, "step": 36085 }, { "epoch": 0.7653284129710929, "grad_norm": 0.3999965190887451, "learning_rate": 1.3622804644973966e-05, "loss": 0.47, "step": 36086 }, { "epoch": 0.7653496214290259, "grad_norm": 0.34618544578552246, "learning_rate": 1.3622493802262309e-05, "loss": 0.4867, "step": 36087 }, { "epoch": 0.7653708298869589, "grad_norm": 0.4004196524620056, "learning_rate": 1.3622182955521743e-05, "loss": 0.549, "step": 36088 }, { "epoch": 0.7653920383448919, "grad_norm": 0.6165447235107422, "learning_rate": 1.362187210475262e-05, "loss": 0.5368, "step": 36089 }, { "epoch": 0.765413246802825, "grad_norm": 0.37152227759361267, "learning_rate": 1.362156124995528e-05, "loss": 0.4434, "step": 36090 }, { "epoch": 0.765434455260758, "grad_norm": 0.3430456519126892, "learning_rate": 1.3621250391130072e-05, "loss": 0.4645, "step": 36091 }, { "epoch": 0.765455663718691, "grad_norm": 0.35323846340179443, "learning_rate": 1.3620939528277341e-05, "loss": 0.5392, "step": 36092 }, { "epoch": 0.7654768721766241, "grad_norm": 0.38438284397125244, "learning_rate": 1.362062866139743e-05, "loss": 0.4238, "step": 36093 }, { "epoch": 0.765498080634557, "grad_norm": 0.41985735297203064, "learning_rate": 1.3620317790490691e-05, "loss": 0.4816, "step": 36094 }, { "epoch": 0.7655192890924901, "grad_norm": 0.38212305307388306, "learning_rate": 1.3620006915557467e-05, "loss": 0.5643, "step": 36095 }, { "epoch": 0.7655404975504231, "grad_norm": 0.37305116653442383, "learning_rate": 1.36196960365981e-05, "loss": 0.4848, "step": 36096 }, { "epoch": 0.7655617060083562, "grad_norm": 0.33158278465270996, "learning_rate": 1.3619385153612936e-05, "loss": 0.4465, "step": 36097 }, { "epoch": 0.7655829144662891, "grad_norm": 0.4086403250694275, "learning_rate": 1.3619074266602328e-05, "loss": 0.573, "step": 36098 }, { "epoch": 0.7656041229242222, "grad_norm": 0.3845960199832916, "learning_rate": 1.3618763375566616e-05, "loss": 0.4532, "step": 36099 }, { "epoch": 0.7656253313821552, "grad_norm": 0.36576464772224426, "learning_rate": 1.3618452480506145e-05, "loss": 0.5995, "step": 36100 }, { "epoch": 0.7656465398400882, "grad_norm": 0.36534470319747925, "learning_rate": 1.3618141581421265e-05, "loss": 0.5495, "step": 36101 }, { "epoch": 0.7656677482980212, "grad_norm": 0.36497563123703003, "learning_rate": 1.3617830678312316e-05, "loss": 0.4973, "step": 36102 }, { "epoch": 0.7656889567559543, "grad_norm": 0.37474727630615234, "learning_rate": 1.3617519771179651e-05, "loss": 0.4439, "step": 36103 }, { "epoch": 0.7657101652138874, "grad_norm": 0.4599142074584961, "learning_rate": 1.3617208860023608e-05, "loss": 0.4919, "step": 36104 }, { "epoch": 0.7657313736718203, "grad_norm": 0.3864120543003082, "learning_rate": 1.3616897944844538e-05, "loss": 0.5023, "step": 36105 }, { "epoch": 0.7657525821297534, "grad_norm": 0.3038043975830078, "learning_rate": 1.3616587025642786e-05, "loss": 0.4593, "step": 36106 }, { "epoch": 0.7657737905876864, "grad_norm": 0.3551115393638611, "learning_rate": 1.3616276102418697e-05, "loss": 0.4557, "step": 36107 }, { "epoch": 0.7657949990456194, "grad_norm": 0.3889073431491852, "learning_rate": 1.3615965175172618e-05, "loss": 0.5147, "step": 36108 }, { "epoch": 0.7658162075035524, "grad_norm": 0.383840411901474, "learning_rate": 1.3615654243904897e-05, "loss": 0.5191, "step": 36109 }, { "epoch": 0.7658374159614855, "grad_norm": 0.35656413435935974, "learning_rate": 1.3615343308615869e-05, "loss": 0.4659, "step": 36110 }, { "epoch": 0.7658586244194184, "grad_norm": 0.40556344389915466, "learning_rate": 1.3615032369305894e-05, "loss": 0.5405, "step": 36111 }, { "epoch": 0.7658798328773515, "grad_norm": 0.36088645458221436, "learning_rate": 1.3614721425975308e-05, "loss": 0.5479, "step": 36112 }, { "epoch": 0.7659010413352845, "grad_norm": 0.36382371187210083, "learning_rate": 1.3614410478624463e-05, "loss": 0.4893, "step": 36113 }, { "epoch": 0.7659222497932175, "grad_norm": 0.3991937041282654, "learning_rate": 1.36140995272537e-05, "loss": 0.5178, "step": 36114 }, { "epoch": 0.7659434582511505, "grad_norm": 0.367061048746109, "learning_rate": 1.3613788571863368e-05, "loss": 0.5854, "step": 36115 }, { "epoch": 0.7659646667090836, "grad_norm": 0.33912110328674316, "learning_rate": 1.3613477612453811e-05, "loss": 0.43, "step": 36116 }, { "epoch": 0.7659858751670167, "grad_norm": 0.4265934228897095, "learning_rate": 1.3613166649025376e-05, "loss": 0.4331, "step": 36117 }, { "epoch": 0.7660070836249496, "grad_norm": 1.36172354221344, "learning_rate": 1.3612855681578407e-05, "loss": 0.5257, "step": 36118 }, { "epoch": 0.7660282920828827, "grad_norm": 0.4204491972923279, "learning_rate": 1.3612544710113254e-05, "loss": 0.5126, "step": 36119 }, { "epoch": 0.7660495005408157, "grad_norm": 0.36591556668281555, "learning_rate": 1.3612233734630257e-05, "loss": 0.5468, "step": 36120 }, { "epoch": 0.7660707089987487, "grad_norm": 0.47365742921829224, "learning_rate": 1.3611922755129767e-05, "loss": 0.4936, "step": 36121 }, { "epoch": 0.7660919174566817, "grad_norm": 0.36314037442207336, "learning_rate": 1.361161177161213e-05, "loss": 0.5203, "step": 36122 }, { "epoch": 0.7661131259146148, "grad_norm": 0.34881725907325745, "learning_rate": 1.3611300784077686e-05, "loss": 0.4357, "step": 36123 }, { "epoch": 0.7661343343725477, "grad_norm": 0.3245016932487488, "learning_rate": 1.3610989792526783e-05, "loss": 0.5383, "step": 36124 }, { "epoch": 0.7661555428304808, "grad_norm": 0.38685470819473267, "learning_rate": 1.3610678796959773e-05, "loss": 0.4857, "step": 36125 }, { "epoch": 0.7661767512884138, "grad_norm": 0.45050227642059326, "learning_rate": 1.3610367797376998e-05, "loss": 0.4853, "step": 36126 }, { "epoch": 0.7661979597463469, "grad_norm": 0.355752557516098, "learning_rate": 1.36100567937788e-05, "loss": 0.4449, "step": 36127 }, { "epoch": 0.7662191682042798, "grad_norm": 0.3922770023345947, "learning_rate": 1.3609745786165532e-05, "loss": 0.453, "step": 36128 }, { "epoch": 0.7662403766622129, "grad_norm": 0.35612621903419495, "learning_rate": 1.3609434774537534e-05, "loss": 0.4622, "step": 36129 }, { "epoch": 0.7662615851201459, "grad_norm": 0.37759673595428467, "learning_rate": 1.360912375889515e-05, "loss": 0.5661, "step": 36130 }, { "epoch": 0.7662827935780789, "grad_norm": 0.3439073860645294, "learning_rate": 1.3608812739238735e-05, "loss": 0.4529, "step": 36131 }, { "epoch": 0.766304002036012, "grad_norm": 0.3553159534931183, "learning_rate": 1.3608501715568629e-05, "loss": 0.4839, "step": 36132 }, { "epoch": 0.766325210493945, "grad_norm": 0.3976261019706726, "learning_rate": 1.3608190687885176e-05, "loss": 0.5446, "step": 36133 }, { "epoch": 0.766346418951878, "grad_norm": 0.3487129509449005, "learning_rate": 1.3607879656188728e-05, "loss": 0.4753, "step": 36134 }, { "epoch": 0.766367627409811, "grad_norm": 0.3709303140640259, "learning_rate": 1.3607568620479626e-05, "loss": 0.4671, "step": 36135 }, { "epoch": 0.7663888358677441, "grad_norm": 0.43264853954315186, "learning_rate": 1.3607257580758218e-05, "loss": 0.4634, "step": 36136 }, { "epoch": 0.766410044325677, "grad_norm": 0.3428017497062683, "learning_rate": 1.3606946537024848e-05, "loss": 0.5589, "step": 36137 }, { "epoch": 0.7664312527836101, "grad_norm": 0.3562951683998108, "learning_rate": 1.3606635489279865e-05, "loss": 0.5489, "step": 36138 }, { "epoch": 0.7664524612415431, "grad_norm": 0.3692125082015991, "learning_rate": 1.3606324437523613e-05, "loss": 0.5111, "step": 36139 }, { "epoch": 0.7664736696994762, "grad_norm": 0.359368234872818, "learning_rate": 1.360601338175644e-05, "loss": 0.4784, "step": 36140 }, { "epoch": 0.7664948781574091, "grad_norm": 0.3504898250102997, "learning_rate": 1.3605702321978687e-05, "loss": 0.4267, "step": 36141 }, { "epoch": 0.7665160866153422, "grad_norm": 0.35006704926490784, "learning_rate": 1.3605391258190703e-05, "loss": 0.4946, "step": 36142 }, { "epoch": 0.7665372950732752, "grad_norm": 0.39803776144981384, "learning_rate": 1.3605080190392839e-05, "loss": 0.517, "step": 36143 }, { "epoch": 0.7665585035312082, "grad_norm": 0.39467671513557434, "learning_rate": 1.360476911858543e-05, "loss": 0.4633, "step": 36144 }, { "epoch": 0.7665797119891413, "grad_norm": 0.394707053899765, "learning_rate": 1.3604458042768833e-05, "loss": 0.3957, "step": 36145 }, { "epoch": 0.7666009204470743, "grad_norm": 0.31703081727027893, "learning_rate": 1.3604146962943387e-05, "loss": 0.4799, "step": 36146 }, { "epoch": 0.7666221289050074, "grad_norm": 0.3801397681236267, "learning_rate": 1.3603835879109442e-05, "loss": 0.4869, "step": 36147 }, { "epoch": 0.7666433373629403, "grad_norm": 0.5086683630943298, "learning_rate": 1.3603524791267339e-05, "loss": 0.5051, "step": 36148 }, { "epoch": 0.7666645458208734, "grad_norm": 0.4017055332660675, "learning_rate": 1.3603213699417428e-05, "loss": 0.5453, "step": 36149 }, { "epoch": 0.7666857542788064, "grad_norm": 0.3757389187812805, "learning_rate": 1.3602902603560052e-05, "loss": 0.4445, "step": 36150 }, { "epoch": 0.7667069627367394, "grad_norm": 0.42876869440078735, "learning_rate": 1.3602591503695563e-05, "loss": 0.4905, "step": 36151 }, { "epoch": 0.7667281711946724, "grad_norm": 0.38839027285575867, "learning_rate": 1.3602280399824302e-05, "loss": 0.5126, "step": 36152 }, { "epoch": 0.7667493796526055, "grad_norm": 0.3453732430934906, "learning_rate": 1.3601969291946615e-05, "loss": 0.4239, "step": 36153 }, { "epoch": 0.7667705881105384, "grad_norm": 0.3627623915672302, "learning_rate": 1.360165818006285e-05, "loss": 0.4824, "step": 36154 }, { "epoch": 0.7667917965684715, "grad_norm": 0.36487865447998047, "learning_rate": 1.360134706417335e-05, "loss": 0.4448, "step": 36155 }, { "epoch": 0.7668130050264045, "grad_norm": 0.3417549431324005, "learning_rate": 1.3601035944278465e-05, "loss": 0.474, "step": 36156 }, { "epoch": 0.7668342134843376, "grad_norm": 0.3527590334415436, "learning_rate": 1.3600724820378539e-05, "loss": 0.4374, "step": 36157 }, { "epoch": 0.7668554219422706, "grad_norm": 0.38336679339408875, "learning_rate": 1.3600413692473917e-05, "loss": 0.5354, "step": 36158 }, { "epoch": 0.7668766304002036, "grad_norm": 0.34433814883232117, "learning_rate": 1.360010256056495e-05, "loss": 0.4538, "step": 36159 }, { "epoch": 0.7668978388581367, "grad_norm": 0.3437569737434387, "learning_rate": 1.3599791424651977e-05, "loss": 0.3651, "step": 36160 }, { "epoch": 0.7669190473160696, "grad_norm": 0.41295671463012695, "learning_rate": 1.3599480284735347e-05, "loss": 0.4573, "step": 36161 }, { "epoch": 0.7669402557740027, "grad_norm": 0.3546094000339508, "learning_rate": 1.3599169140815404e-05, "loss": 0.5155, "step": 36162 }, { "epoch": 0.7669614642319357, "grad_norm": 0.4702901542186737, "learning_rate": 1.3598857992892502e-05, "loss": 0.5192, "step": 36163 }, { "epoch": 0.7669826726898687, "grad_norm": 0.37374845147132874, "learning_rate": 1.3598546840966978e-05, "loss": 0.5026, "step": 36164 }, { "epoch": 0.7670038811478017, "grad_norm": 0.355734646320343, "learning_rate": 1.3598235685039182e-05, "loss": 0.515, "step": 36165 }, { "epoch": 0.7670250896057348, "grad_norm": 0.36354297399520874, "learning_rate": 1.3597924525109458e-05, "loss": 0.4859, "step": 36166 }, { "epoch": 0.7670462980636678, "grad_norm": 0.3517613410949707, "learning_rate": 1.3597613361178156e-05, "loss": 0.4538, "step": 36167 }, { "epoch": 0.7670675065216008, "grad_norm": 0.4308542311191559, "learning_rate": 1.3597302193245618e-05, "loss": 0.4271, "step": 36168 }, { "epoch": 0.7670887149795338, "grad_norm": 0.3160020709037781, "learning_rate": 1.3596991021312193e-05, "loss": 0.4862, "step": 36169 }, { "epoch": 0.7671099234374669, "grad_norm": 0.3819419741630554, "learning_rate": 1.3596679845378225e-05, "loss": 0.5432, "step": 36170 }, { "epoch": 0.7671311318953998, "grad_norm": 0.3245457112789154, "learning_rate": 1.3596368665444062e-05, "loss": 0.4286, "step": 36171 }, { "epoch": 0.7671523403533329, "grad_norm": 0.42009755969047546, "learning_rate": 1.3596057481510047e-05, "loss": 0.5138, "step": 36172 }, { "epoch": 0.767173548811266, "grad_norm": 0.3437950313091278, "learning_rate": 1.359574629357653e-05, "loss": 0.4982, "step": 36173 }, { "epoch": 0.767194757269199, "grad_norm": 0.35268107056617737, "learning_rate": 1.3595435101643855e-05, "loss": 0.5172, "step": 36174 }, { "epoch": 0.767215965727132, "grad_norm": 0.3523503243923187, "learning_rate": 1.3595123905712369e-05, "loss": 0.4979, "step": 36175 }, { "epoch": 0.767237174185065, "grad_norm": 0.35951051115989685, "learning_rate": 1.3594812705782416e-05, "loss": 0.4723, "step": 36176 }, { "epoch": 0.7672583826429981, "grad_norm": 0.42154183983802795, "learning_rate": 1.3594501501854344e-05, "loss": 0.3752, "step": 36177 }, { "epoch": 0.767279591100931, "grad_norm": 0.3694591522216797, "learning_rate": 1.3594190293928497e-05, "loss": 0.4924, "step": 36178 }, { "epoch": 0.7673007995588641, "grad_norm": 0.37050506472587585, "learning_rate": 1.3593879082005225e-05, "loss": 0.4869, "step": 36179 }, { "epoch": 0.7673220080167971, "grad_norm": 0.4076024293899536, "learning_rate": 1.3593567866084871e-05, "loss": 0.4937, "step": 36180 }, { "epoch": 0.7673432164747301, "grad_norm": 0.3718511164188385, "learning_rate": 1.3593256646167783e-05, "loss": 0.4278, "step": 36181 }, { "epoch": 0.7673644249326631, "grad_norm": 0.32609906792640686, "learning_rate": 1.3592945422254306e-05, "loss": 0.4579, "step": 36182 }, { "epoch": 0.7673856333905962, "grad_norm": 0.3294060230255127, "learning_rate": 1.3592634194344787e-05, "loss": 0.4638, "step": 36183 }, { "epoch": 0.7674068418485291, "grad_norm": 0.4498421847820282, "learning_rate": 1.3592322962439567e-05, "loss": 0.512, "step": 36184 }, { "epoch": 0.7674280503064622, "grad_norm": 0.3388805389404297, "learning_rate": 1.3592011726539004e-05, "loss": 0.5146, "step": 36185 }, { "epoch": 0.7674492587643953, "grad_norm": 0.3443532884120941, "learning_rate": 1.3591700486643431e-05, "loss": 0.532, "step": 36186 }, { "epoch": 0.7674704672223283, "grad_norm": 0.40994083881378174, "learning_rate": 1.3591389242753198e-05, "loss": 0.4438, "step": 36187 }, { "epoch": 0.7674916756802613, "grad_norm": 0.3706657290458679, "learning_rate": 1.359107799486866e-05, "loss": 0.5773, "step": 36188 }, { "epoch": 0.7675128841381943, "grad_norm": 0.6493487358093262, "learning_rate": 1.3590766742990151e-05, "loss": 0.5166, "step": 36189 }, { "epoch": 0.7675340925961274, "grad_norm": 0.38031524419784546, "learning_rate": 1.3590455487118024e-05, "loss": 0.5371, "step": 36190 }, { "epoch": 0.7675553010540603, "grad_norm": 0.3433578908443451, "learning_rate": 1.3590144227252627e-05, "loss": 0.5244, "step": 36191 }, { "epoch": 0.7675765095119934, "grad_norm": 0.36784887313842773, "learning_rate": 1.3589832963394299e-05, "loss": 0.5377, "step": 36192 }, { "epoch": 0.7675977179699264, "grad_norm": 0.40623363852500916, "learning_rate": 1.358952169554339e-05, "loss": 0.497, "step": 36193 }, { "epoch": 0.7676189264278594, "grad_norm": 0.378866583108902, "learning_rate": 1.3589210423700245e-05, "loss": 0.4527, "step": 36194 }, { "epoch": 0.7676401348857924, "grad_norm": 0.3932681977748871, "learning_rate": 1.3588899147865214e-05, "loss": 0.4921, "step": 36195 }, { "epoch": 0.7676613433437255, "grad_norm": 0.37128952145576477, "learning_rate": 1.3588587868038641e-05, "loss": 0.4899, "step": 36196 }, { "epoch": 0.7676825518016585, "grad_norm": 0.5228662490844727, "learning_rate": 1.358827658422087e-05, "loss": 0.3805, "step": 36197 }, { "epoch": 0.7677037602595915, "grad_norm": 0.4897037744522095, "learning_rate": 1.3587965296412248e-05, "loss": 0.5009, "step": 36198 }, { "epoch": 0.7677249687175246, "grad_norm": 0.3809519410133362, "learning_rate": 1.3587654004613124e-05, "loss": 0.4445, "step": 36199 }, { "epoch": 0.7677461771754576, "grad_norm": 0.36607635021209717, "learning_rate": 1.3587342708823843e-05, "loss": 0.545, "step": 36200 }, { "epoch": 0.7677673856333906, "grad_norm": 0.3653610050678253, "learning_rate": 1.3587031409044747e-05, "loss": 0.5916, "step": 36201 }, { "epoch": 0.7677885940913236, "grad_norm": 0.3111395835876465, "learning_rate": 1.358672010527619e-05, "loss": 0.4812, "step": 36202 }, { "epoch": 0.7678098025492567, "grad_norm": 0.3542890250682831, "learning_rate": 1.3586408797518511e-05, "loss": 0.5254, "step": 36203 }, { "epoch": 0.7678310110071896, "grad_norm": 0.34089499711990356, "learning_rate": 1.358609748577206e-05, "loss": 0.5301, "step": 36204 }, { "epoch": 0.7678522194651227, "grad_norm": 0.3579622805118561, "learning_rate": 1.3585786170037185e-05, "loss": 0.4758, "step": 36205 }, { "epoch": 0.7678734279230557, "grad_norm": 0.35277828574180603, "learning_rate": 1.3585474850314227e-05, "loss": 0.4582, "step": 36206 }, { "epoch": 0.7678946363809888, "grad_norm": 0.3702145516872406, "learning_rate": 1.3585163526603535e-05, "loss": 0.5093, "step": 36207 }, { "epoch": 0.7679158448389217, "grad_norm": 0.34083664417266846, "learning_rate": 1.3584852198905458e-05, "loss": 0.4238, "step": 36208 }, { "epoch": 0.7679370532968548, "grad_norm": 0.3774508535861969, "learning_rate": 1.3584540867220337e-05, "loss": 0.4664, "step": 36209 }, { "epoch": 0.7679582617547878, "grad_norm": 0.3384435474872589, "learning_rate": 1.3584229531548523e-05, "loss": 0.5325, "step": 36210 }, { "epoch": 0.7679794702127208, "grad_norm": 0.3914148509502411, "learning_rate": 1.3583918191890359e-05, "loss": 0.5293, "step": 36211 }, { "epoch": 0.7680006786706538, "grad_norm": 0.35531988739967346, "learning_rate": 1.3583606848246191e-05, "loss": 0.5331, "step": 36212 }, { "epoch": 0.7680218871285869, "grad_norm": 0.3202584385871887, "learning_rate": 1.3583295500616367e-05, "loss": 0.4124, "step": 36213 }, { "epoch": 0.76804309558652, "grad_norm": 0.4009227156639099, "learning_rate": 1.3582984149001235e-05, "loss": 0.4674, "step": 36214 }, { "epoch": 0.7680643040444529, "grad_norm": 0.34207072854042053, "learning_rate": 1.3582672793401135e-05, "loss": 0.5146, "step": 36215 }, { "epoch": 0.768085512502386, "grad_norm": 0.43080079555511475, "learning_rate": 1.3582361433816422e-05, "loss": 0.4836, "step": 36216 }, { "epoch": 0.768106720960319, "grad_norm": 2.2048327922821045, "learning_rate": 1.3582050070247434e-05, "loss": 0.4757, "step": 36217 }, { "epoch": 0.768127929418252, "grad_norm": 0.38051146268844604, "learning_rate": 1.3581738702694525e-05, "loss": 0.5525, "step": 36218 }, { "epoch": 0.768149137876185, "grad_norm": 0.3596082031726837, "learning_rate": 1.3581427331158033e-05, "loss": 0.532, "step": 36219 }, { "epoch": 0.7681703463341181, "grad_norm": 0.3889699876308441, "learning_rate": 1.3581115955638311e-05, "loss": 0.456, "step": 36220 }, { "epoch": 0.768191554792051, "grad_norm": 0.45398470759391785, "learning_rate": 1.3580804576135705e-05, "loss": 0.4699, "step": 36221 }, { "epoch": 0.7682127632499841, "grad_norm": 0.4031373858451843, "learning_rate": 1.3580493192650557e-05, "loss": 0.5249, "step": 36222 }, { "epoch": 0.7682339717079171, "grad_norm": 0.3476024568080902, "learning_rate": 1.3580181805183213e-05, "loss": 0.5229, "step": 36223 }, { "epoch": 0.7682551801658501, "grad_norm": 0.3774263560771942, "learning_rate": 1.3579870413734027e-05, "loss": 0.4386, "step": 36224 }, { "epoch": 0.7682763886237831, "grad_norm": 0.4872116446495056, "learning_rate": 1.3579559018303336e-05, "loss": 0.49, "step": 36225 }, { "epoch": 0.7682975970817162, "grad_norm": 0.3995848298072815, "learning_rate": 1.3579247618891491e-05, "loss": 0.5099, "step": 36226 }, { "epoch": 0.7683188055396493, "grad_norm": 0.41460901498794556, "learning_rate": 1.357893621549884e-05, "loss": 0.5287, "step": 36227 }, { "epoch": 0.7683400139975822, "grad_norm": 0.5338839888572693, "learning_rate": 1.3578624808125727e-05, "loss": 0.4175, "step": 36228 }, { "epoch": 0.7683612224555153, "grad_norm": 0.49371394515037537, "learning_rate": 1.3578313396772498e-05, "loss": 0.5147, "step": 36229 }, { "epoch": 0.7683824309134483, "grad_norm": 0.36409395933151245, "learning_rate": 1.3578001981439499e-05, "loss": 0.4417, "step": 36230 }, { "epoch": 0.7684036393713813, "grad_norm": 0.3645048439502716, "learning_rate": 1.3577690562127078e-05, "loss": 0.4548, "step": 36231 }, { "epoch": 0.7684248478293143, "grad_norm": 0.3530806303024292, "learning_rate": 1.357737913883558e-05, "loss": 0.4128, "step": 36232 }, { "epoch": 0.7684460562872474, "grad_norm": 0.3405815660953522, "learning_rate": 1.3577067711565354e-05, "loss": 0.5303, "step": 36233 }, { "epoch": 0.7684672647451803, "grad_norm": 0.3830893635749817, "learning_rate": 1.3576756280316743e-05, "loss": 0.4503, "step": 36234 }, { "epoch": 0.7684884732031134, "grad_norm": 0.4301949739456177, "learning_rate": 1.3576444845090096e-05, "loss": 0.5024, "step": 36235 }, { "epoch": 0.7685096816610464, "grad_norm": 0.41135573387145996, "learning_rate": 1.3576133405885756e-05, "loss": 0.5991, "step": 36236 }, { "epoch": 0.7685308901189795, "grad_norm": 0.4580850303173065, "learning_rate": 1.3575821962704072e-05, "loss": 0.4708, "step": 36237 }, { "epoch": 0.7685520985769124, "grad_norm": 0.3297983407974243, "learning_rate": 1.3575510515545392e-05, "loss": 0.4712, "step": 36238 }, { "epoch": 0.7685733070348455, "grad_norm": 0.3443954288959503, "learning_rate": 1.3575199064410058e-05, "loss": 0.4683, "step": 36239 }, { "epoch": 0.7685945154927786, "grad_norm": 0.3528781831264496, "learning_rate": 1.3574887609298418e-05, "loss": 0.5596, "step": 36240 }, { "epoch": 0.7686157239507115, "grad_norm": 0.36814066767692566, "learning_rate": 1.357457615021082e-05, "loss": 0.4878, "step": 36241 }, { "epoch": 0.7686369324086446, "grad_norm": 0.3399953544139862, "learning_rate": 1.3574264687147611e-05, "loss": 0.4184, "step": 36242 }, { "epoch": 0.7686581408665776, "grad_norm": 0.39907366037368774, "learning_rate": 1.3573953220109135e-05, "loss": 0.4677, "step": 36243 }, { "epoch": 0.7686793493245107, "grad_norm": 0.3551928400993347, "learning_rate": 1.3573641749095739e-05, "loss": 0.3964, "step": 36244 }, { "epoch": 0.7687005577824436, "grad_norm": 0.358786016702652, "learning_rate": 1.3573330274107768e-05, "loss": 0.4364, "step": 36245 }, { "epoch": 0.7687217662403767, "grad_norm": 0.35770583152770996, "learning_rate": 1.3573018795145572e-05, "loss": 0.4735, "step": 36246 }, { "epoch": 0.7687429746983097, "grad_norm": 0.354887992143631, "learning_rate": 1.3572707312209494e-05, "loss": 0.3912, "step": 36247 }, { "epoch": 0.7687641831562427, "grad_norm": 0.5396183729171753, "learning_rate": 1.3572395825299885e-05, "loss": 0.4752, "step": 36248 }, { "epoch": 0.7687853916141757, "grad_norm": 0.3459533154964447, "learning_rate": 1.3572084334417086e-05, "loss": 0.3734, "step": 36249 }, { "epoch": 0.7688066000721088, "grad_norm": 0.3328929543495178, "learning_rate": 1.3571772839561449e-05, "loss": 0.4362, "step": 36250 }, { "epoch": 0.7688278085300417, "grad_norm": 0.4784180819988251, "learning_rate": 1.3571461340733312e-05, "loss": 0.524, "step": 36251 }, { "epoch": 0.7688490169879748, "grad_norm": 0.35627904534339905, "learning_rate": 1.3571149837933029e-05, "loss": 0.4393, "step": 36252 }, { "epoch": 0.7688702254459078, "grad_norm": 0.3819911777973175, "learning_rate": 1.3570838331160946e-05, "loss": 0.4615, "step": 36253 }, { "epoch": 0.7688914339038408, "grad_norm": 0.3467920422554016, "learning_rate": 1.3570526820417405e-05, "loss": 0.4426, "step": 36254 }, { "epoch": 0.7689126423617739, "grad_norm": 0.3270214796066284, "learning_rate": 1.3570215305702756e-05, "loss": 0.5344, "step": 36255 }, { "epoch": 0.7689338508197069, "grad_norm": 0.3571954071521759, "learning_rate": 1.3569903787017346e-05, "loss": 0.5091, "step": 36256 }, { "epoch": 0.76895505927764, "grad_norm": 0.36539342999458313, "learning_rate": 1.356959226436152e-05, "loss": 0.5137, "step": 36257 }, { "epoch": 0.7689762677355729, "grad_norm": 0.40822362899780273, "learning_rate": 1.3569280737735622e-05, "loss": 0.4729, "step": 36258 }, { "epoch": 0.768997476193506, "grad_norm": 0.3552066683769226, "learning_rate": 1.3568969207140002e-05, "loss": 0.4777, "step": 36259 }, { "epoch": 0.769018684651439, "grad_norm": 0.3539561629295349, "learning_rate": 1.3568657672575006e-05, "loss": 0.4782, "step": 36260 }, { "epoch": 0.769039893109372, "grad_norm": 0.32448917627334595, "learning_rate": 1.356834613404098e-05, "loss": 0.4091, "step": 36261 }, { "epoch": 0.769061101567305, "grad_norm": 0.3699314296245575, "learning_rate": 1.3568034591538268e-05, "loss": 0.558, "step": 36262 }, { "epoch": 0.7690823100252381, "grad_norm": 0.3366822600364685, "learning_rate": 1.3567723045067225e-05, "loss": 0.4524, "step": 36263 }, { "epoch": 0.769103518483171, "grad_norm": 0.35393771529197693, "learning_rate": 1.3567411494628184e-05, "loss": 0.4943, "step": 36264 }, { "epoch": 0.7691247269411041, "grad_norm": 0.3846742808818817, "learning_rate": 1.3567099940221504e-05, "loss": 0.5236, "step": 36265 }, { "epoch": 0.7691459353990371, "grad_norm": 0.3254758417606354, "learning_rate": 1.3566788381847524e-05, "loss": 0.5229, "step": 36266 }, { "epoch": 0.7691671438569702, "grad_norm": 0.36806973814964294, "learning_rate": 1.3566476819506595e-05, "loss": 0.4375, "step": 36267 }, { "epoch": 0.7691883523149032, "grad_norm": 0.5814807415008545, "learning_rate": 1.356616525319906e-05, "loss": 0.5076, "step": 36268 }, { "epoch": 0.7692095607728362, "grad_norm": 0.37394222617149353, "learning_rate": 1.3565853682925267e-05, "loss": 0.4987, "step": 36269 }, { "epoch": 0.7692307692307693, "grad_norm": 0.3349263668060303, "learning_rate": 1.3565542108685566e-05, "loss": 0.4219, "step": 36270 }, { "epoch": 0.7692519776887022, "grad_norm": 0.41862061619758606, "learning_rate": 1.3565230530480293e-05, "loss": 0.5102, "step": 36271 }, { "epoch": 0.7692731861466353, "grad_norm": 0.4629501700401306, "learning_rate": 1.3564918948309806e-05, "loss": 0.6134, "step": 36272 }, { "epoch": 0.7692943946045683, "grad_norm": 0.4015461802482605, "learning_rate": 1.3564607362174447e-05, "loss": 0.4558, "step": 36273 }, { "epoch": 0.7693156030625014, "grad_norm": 0.34623822569847107, "learning_rate": 1.3564295772074561e-05, "loss": 0.5232, "step": 36274 }, { "epoch": 0.7693368115204343, "grad_norm": 0.3662130832672119, "learning_rate": 1.3563984178010498e-05, "loss": 0.5316, "step": 36275 }, { "epoch": 0.7693580199783674, "grad_norm": 0.43978673219680786, "learning_rate": 1.3563672579982602e-05, "loss": 0.6457, "step": 36276 }, { "epoch": 0.7693792284363004, "grad_norm": 0.33335837721824646, "learning_rate": 1.356336097799122e-05, "loss": 0.4938, "step": 36277 }, { "epoch": 0.7694004368942334, "grad_norm": 0.3688890039920807, "learning_rate": 1.35630493720367e-05, "loss": 0.5028, "step": 36278 }, { "epoch": 0.7694216453521664, "grad_norm": 0.34887877106666565, "learning_rate": 1.3562737762119386e-05, "loss": 0.4799, "step": 36279 }, { "epoch": 0.7694428538100995, "grad_norm": 0.5180715918540955, "learning_rate": 1.3562426148239628e-05, "loss": 0.5509, "step": 36280 }, { "epoch": 0.7694640622680325, "grad_norm": 0.5144663453102112, "learning_rate": 1.356211453039777e-05, "loss": 0.5266, "step": 36281 }, { "epoch": 0.7694852707259655, "grad_norm": 0.3613969385623932, "learning_rate": 1.3561802908594157e-05, "loss": 0.4707, "step": 36282 }, { "epoch": 0.7695064791838986, "grad_norm": 0.3456076979637146, "learning_rate": 1.3561491282829139e-05, "loss": 0.5108, "step": 36283 }, { "epoch": 0.7695276876418315, "grad_norm": 0.3806309700012207, "learning_rate": 1.3561179653103062e-05, "loss": 0.4738, "step": 36284 }, { "epoch": 0.7695488960997646, "grad_norm": 0.33273959159851074, "learning_rate": 1.3560868019416272e-05, "loss": 0.5207, "step": 36285 }, { "epoch": 0.7695701045576976, "grad_norm": 0.323383092880249, "learning_rate": 1.3560556381769115e-05, "loss": 0.4096, "step": 36286 }, { "epoch": 0.7695913130156307, "grad_norm": 0.37782636284828186, "learning_rate": 1.3560244740161938e-05, "loss": 0.4903, "step": 36287 }, { "epoch": 0.7696125214735636, "grad_norm": 0.31575825810432434, "learning_rate": 1.3559933094595088e-05, "loss": 0.3796, "step": 36288 }, { "epoch": 0.7696337299314967, "grad_norm": 0.38101863861083984, "learning_rate": 1.3559621445068914e-05, "loss": 0.5152, "step": 36289 }, { "epoch": 0.7696549383894297, "grad_norm": 0.3488471210002899, "learning_rate": 1.3559309791583755e-05, "loss": 0.4392, "step": 36290 }, { "epoch": 0.7696761468473627, "grad_norm": 0.3846355974674225, "learning_rate": 1.3558998134139962e-05, "loss": 0.5371, "step": 36291 }, { "epoch": 0.7696973553052957, "grad_norm": 0.3712822198867798, "learning_rate": 1.3558686472737888e-05, "loss": 0.5229, "step": 36292 }, { "epoch": 0.7697185637632288, "grad_norm": 0.35302379727363586, "learning_rate": 1.3558374807377873e-05, "loss": 0.4302, "step": 36293 }, { "epoch": 0.7697397722211617, "grad_norm": 0.3469180762767792, "learning_rate": 1.355806313806026e-05, "loss": 0.4591, "step": 36294 }, { "epoch": 0.7697609806790948, "grad_norm": 0.3356613516807556, "learning_rate": 1.3557751464785403e-05, "loss": 0.4353, "step": 36295 }, { "epoch": 0.7697821891370279, "grad_norm": 0.33407992124557495, "learning_rate": 1.3557439787553643e-05, "loss": 0.4725, "step": 36296 }, { "epoch": 0.7698033975949609, "grad_norm": 0.3730597496032715, "learning_rate": 1.3557128106365332e-05, "loss": 0.453, "step": 36297 }, { "epoch": 0.7698246060528939, "grad_norm": 0.4166352152824402, "learning_rate": 1.3556816421220814e-05, "loss": 0.5878, "step": 36298 }, { "epoch": 0.7698458145108269, "grad_norm": 0.3412776589393616, "learning_rate": 1.3556504732120433e-05, "loss": 0.4508, "step": 36299 }, { "epoch": 0.76986702296876, "grad_norm": 0.39893385767936707, "learning_rate": 1.3556193039064543e-05, "loss": 0.5713, "step": 36300 }, { "epoch": 0.7698882314266929, "grad_norm": 0.3985818326473236, "learning_rate": 1.3555881342053483e-05, "loss": 0.4523, "step": 36301 }, { "epoch": 0.769909439884626, "grad_norm": 0.334052175283432, "learning_rate": 1.3555569641087602e-05, "loss": 0.4964, "step": 36302 }, { "epoch": 0.769930648342559, "grad_norm": 0.3573765754699707, "learning_rate": 1.3555257936167247e-05, "loss": 0.4831, "step": 36303 }, { "epoch": 0.769951856800492, "grad_norm": 0.38132262229919434, "learning_rate": 1.3554946227292769e-05, "loss": 0.4479, "step": 36304 }, { "epoch": 0.769973065258425, "grad_norm": 0.6006175875663757, "learning_rate": 1.3554634514464507e-05, "loss": 0.4492, "step": 36305 }, { "epoch": 0.7699942737163581, "grad_norm": 0.4349084496498108, "learning_rate": 1.3554322797682812e-05, "loss": 0.5508, "step": 36306 }, { "epoch": 0.770015482174291, "grad_norm": 0.3361324965953827, "learning_rate": 1.3554011076948033e-05, "loss": 0.4147, "step": 36307 }, { "epoch": 0.7700366906322241, "grad_norm": 0.3548842668533325, "learning_rate": 1.355369935226051e-05, "loss": 0.4606, "step": 36308 }, { "epoch": 0.7700578990901572, "grad_norm": 0.767117977142334, "learning_rate": 1.3553387623620593e-05, "loss": 0.5466, "step": 36309 }, { "epoch": 0.7700791075480902, "grad_norm": 0.3654242157936096, "learning_rate": 1.3553075891028634e-05, "loss": 0.4888, "step": 36310 }, { "epoch": 0.7701003160060232, "grad_norm": 0.3872087895870209, "learning_rate": 1.3552764154484969e-05, "loss": 0.5719, "step": 36311 }, { "epoch": 0.7701215244639562, "grad_norm": 0.3338611125946045, "learning_rate": 1.3552452413989957e-05, "loss": 0.405, "step": 36312 }, { "epoch": 0.7701427329218893, "grad_norm": 0.38140353560447693, "learning_rate": 1.3552140669543932e-05, "loss": 0.4758, "step": 36313 }, { "epoch": 0.7701639413798222, "grad_norm": 0.46730339527130127, "learning_rate": 1.355182892114725e-05, "loss": 0.556, "step": 36314 }, { "epoch": 0.7701851498377553, "grad_norm": 0.36814895272254944, "learning_rate": 1.3551517168800255e-05, "loss": 0.4612, "step": 36315 }, { "epoch": 0.7702063582956883, "grad_norm": 0.3721761703491211, "learning_rate": 1.355120541250329e-05, "loss": 0.4974, "step": 36316 }, { "epoch": 0.7702275667536214, "grad_norm": 0.39208337664604187, "learning_rate": 1.355089365225671e-05, "loss": 0.4947, "step": 36317 }, { "epoch": 0.7702487752115543, "grad_norm": 0.5279006958007812, "learning_rate": 1.3550581888060857e-05, "loss": 0.4684, "step": 36318 }, { "epoch": 0.7702699836694874, "grad_norm": 0.31031328439712524, "learning_rate": 1.3550270119916073e-05, "loss": 0.4181, "step": 36319 }, { "epoch": 0.7702911921274204, "grad_norm": 0.3326338827610016, "learning_rate": 1.3549958347822713e-05, "loss": 0.518, "step": 36320 }, { "epoch": 0.7703124005853534, "grad_norm": 0.41682687401771545, "learning_rate": 1.3549646571781118e-05, "loss": 0.4765, "step": 36321 }, { "epoch": 0.7703336090432865, "grad_norm": 0.348471075296402, "learning_rate": 1.3549334791791638e-05, "loss": 0.4739, "step": 36322 }, { "epoch": 0.7703548175012195, "grad_norm": 4.542934417724609, "learning_rate": 1.3549023007854621e-05, "loss": 0.4763, "step": 36323 }, { "epoch": 0.7703760259591526, "grad_norm": 0.4168328046798706, "learning_rate": 1.3548711219970407e-05, "loss": 0.4985, "step": 36324 }, { "epoch": 0.7703972344170855, "grad_norm": 0.3868321478366852, "learning_rate": 1.3548399428139349e-05, "loss": 0.4971, "step": 36325 }, { "epoch": 0.7704184428750186, "grad_norm": 0.37204548716545105, "learning_rate": 1.3548087632361796e-05, "loss": 0.4018, "step": 36326 }, { "epoch": 0.7704396513329516, "grad_norm": 0.32942184805870056, "learning_rate": 1.3547775832638084e-05, "loss": 0.4272, "step": 36327 }, { "epoch": 0.7704608597908846, "grad_norm": 0.42684122920036316, "learning_rate": 1.354746402896857e-05, "loss": 0.4408, "step": 36328 }, { "epoch": 0.7704820682488176, "grad_norm": 0.3621273338794708, "learning_rate": 1.3547152221353597e-05, "loss": 0.4761, "step": 36329 }, { "epoch": 0.7705032767067507, "grad_norm": 0.3507118225097656, "learning_rate": 1.3546840409793511e-05, "loss": 0.4822, "step": 36330 }, { "epoch": 0.7705244851646836, "grad_norm": 0.39729395508766174, "learning_rate": 1.3546528594288661e-05, "loss": 0.4855, "step": 36331 }, { "epoch": 0.7705456936226167, "grad_norm": 0.40348732471466064, "learning_rate": 1.3546216774839395e-05, "loss": 0.5195, "step": 36332 }, { "epoch": 0.7705669020805497, "grad_norm": 0.3610319495201111, "learning_rate": 1.3545904951446054e-05, "loss": 0.4708, "step": 36333 }, { "epoch": 0.7705881105384828, "grad_norm": 0.33547985553741455, "learning_rate": 1.354559312410899e-05, "loss": 0.4699, "step": 36334 }, { "epoch": 0.7706093189964157, "grad_norm": 0.3502376675605774, "learning_rate": 1.3545281292828546e-05, "loss": 0.4731, "step": 36335 }, { "epoch": 0.7706305274543488, "grad_norm": 0.3774721920490265, "learning_rate": 1.3544969457605072e-05, "loss": 0.5221, "step": 36336 }, { "epoch": 0.7706517359122819, "grad_norm": 0.33206745982170105, "learning_rate": 1.3544657618438918e-05, "loss": 0.4433, "step": 36337 }, { "epoch": 0.7706729443702148, "grad_norm": 0.33751946687698364, "learning_rate": 1.3544345775330422e-05, "loss": 0.4113, "step": 36338 }, { "epoch": 0.7706941528281479, "grad_norm": 0.33114510774612427, "learning_rate": 1.3544033928279936e-05, "loss": 0.4694, "step": 36339 }, { "epoch": 0.7707153612860809, "grad_norm": 0.4185616970062256, "learning_rate": 1.3543722077287808e-05, "loss": 0.5644, "step": 36340 }, { "epoch": 0.7707365697440139, "grad_norm": 0.37226682901382446, "learning_rate": 1.354341022235438e-05, "loss": 0.5657, "step": 36341 }, { "epoch": 0.7707577782019469, "grad_norm": 0.3274692893028259, "learning_rate": 1.3543098363480003e-05, "loss": 0.4775, "step": 36342 }, { "epoch": 0.77077898665988, "grad_norm": 0.7219835519790649, "learning_rate": 1.3542786500665024e-05, "loss": 0.5207, "step": 36343 }, { "epoch": 0.770800195117813, "grad_norm": 0.3355920910835266, "learning_rate": 1.3542474633909788e-05, "loss": 0.3973, "step": 36344 }, { "epoch": 0.770821403575746, "grad_norm": 0.35879796743392944, "learning_rate": 1.3542162763214644e-05, "loss": 0.5974, "step": 36345 }, { "epoch": 0.770842612033679, "grad_norm": 0.3842642307281494, "learning_rate": 1.3541850888579937e-05, "loss": 0.5474, "step": 36346 }, { "epoch": 0.7708638204916121, "grad_norm": 0.34435200691223145, "learning_rate": 1.3541539010006011e-05, "loss": 0.5079, "step": 36347 }, { "epoch": 0.770885028949545, "grad_norm": 0.39991915225982666, "learning_rate": 1.3541227127493218e-05, "loss": 0.5729, "step": 36348 }, { "epoch": 0.7709062374074781, "grad_norm": 0.3679470717906952, "learning_rate": 1.3540915241041903e-05, "loss": 0.4285, "step": 36349 }, { "epoch": 0.7709274458654112, "grad_norm": 0.354512095451355, "learning_rate": 1.3540603350652413e-05, "loss": 0.4575, "step": 36350 }, { "epoch": 0.7709486543233441, "grad_norm": 0.39360296726226807, "learning_rate": 1.3540291456325097e-05, "loss": 0.4425, "step": 36351 }, { "epoch": 0.7709698627812772, "grad_norm": 0.38662299513816833, "learning_rate": 1.3539979558060297e-05, "loss": 0.4802, "step": 36352 }, { "epoch": 0.7709910712392102, "grad_norm": 0.38190755248069763, "learning_rate": 1.3539667655858361e-05, "loss": 0.462, "step": 36353 }, { "epoch": 0.7710122796971433, "grad_norm": 0.3771299719810486, "learning_rate": 1.3539355749719639e-05, "loss": 0.5107, "step": 36354 }, { "epoch": 0.7710334881550762, "grad_norm": 0.3209502100944519, "learning_rate": 1.3539043839644478e-05, "loss": 0.4674, "step": 36355 }, { "epoch": 0.7710546966130093, "grad_norm": 0.35574761033058167, "learning_rate": 1.353873192563322e-05, "loss": 0.4979, "step": 36356 }, { "epoch": 0.7710759050709423, "grad_norm": 0.3674769103527069, "learning_rate": 1.353842000768622e-05, "loss": 0.4636, "step": 36357 }, { "epoch": 0.7710971135288753, "grad_norm": 0.3665633201599121, "learning_rate": 1.3538108085803814e-05, "loss": 0.4805, "step": 36358 }, { "epoch": 0.7711183219868083, "grad_norm": 0.3668103516101837, "learning_rate": 1.3537796159986358e-05, "loss": 0.446, "step": 36359 }, { "epoch": 0.7711395304447414, "grad_norm": 0.3823799192905426, "learning_rate": 1.3537484230234195e-05, "loss": 0.5542, "step": 36360 }, { "epoch": 0.7711607389026743, "grad_norm": 0.358400821685791, "learning_rate": 1.3537172296547673e-05, "loss": 0.486, "step": 36361 }, { "epoch": 0.7711819473606074, "grad_norm": 0.34949737787246704, "learning_rate": 1.353686035892714e-05, "loss": 0.5441, "step": 36362 }, { "epoch": 0.7712031558185405, "grad_norm": 0.3752390444278717, "learning_rate": 1.3536548417372942e-05, "loss": 0.4829, "step": 36363 }, { "epoch": 0.7712243642764735, "grad_norm": 0.35199177265167236, "learning_rate": 1.3536236471885422e-05, "loss": 0.5053, "step": 36364 }, { "epoch": 0.7712455727344065, "grad_norm": 0.3695524334907532, "learning_rate": 1.3535924522464934e-05, "loss": 0.4687, "step": 36365 }, { "epoch": 0.7712667811923395, "grad_norm": 0.43377816677093506, "learning_rate": 1.3535612569111823e-05, "loss": 0.654, "step": 36366 }, { "epoch": 0.7712879896502726, "grad_norm": 0.3469281792640686, "learning_rate": 1.3535300611826427e-05, "loss": 0.5423, "step": 36367 }, { "epoch": 0.7713091981082055, "grad_norm": 0.3643597364425659, "learning_rate": 1.3534988650609107e-05, "loss": 0.5646, "step": 36368 }, { "epoch": 0.7713304065661386, "grad_norm": 0.3725118041038513, "learning_rate": 1.3534676685460202e-05, "loss": 0.5332, "step": 36369 }, { "epoch": 0.7713516150240716, "grad_norm": 0.33802661299705505, "learning_rate": 1.3534364716380057e-05, "loss": 0.4285, "step": 36370 }, { "epoch": 0.7713728234820046, "grad_norm": 0.39784446358680725, "learning_rate": 1.3534052743369027e-05, "loss": 0.5245, "step": 36371 }, { "epoch": 0.7713940319399376, "grad_norm": 0.3469882607460022, "learning_rate": 1.3533740766427452e-05, "loss": 0.4928, "step": 36372 }, { "epoch": 0.7714152403978707, "grad_norm": 0.35709723830223083, "learning_rate": 1.353342878555568e-05, "loss": 0.5515, "step": 36373 }, { "epoch": 0.7714364488558036, "grad_norm": 0.4070293605327606, "learning_rate": 1.353311680075406e-05, "loss": 0.4524, "step": 36374 }, { "epoch": 0.7714576573137367, "grad_norm": 0.4032968282699585, "learning_rate": 1.3532804812022939e-05, "loss": 0.521, "step": 36375 }, { "epoch": 0.7714788657716698, "grad_norm": 0.37376824021339417, "learning_rate": 1.3532492819362664e-05, "loss": 0.3626, "step": 36376 }, { "epoch": 0.7715000742296028, "grad_norm": 0.39600369334220886, "learning_rate": 1.3532180822773579e-05, "loss": 0.5333, "step": 36377 }, { "epoch": 0.7715212826875358, "grad_norm": 0.3736647963523865, "learning_rate": 1.3531868822256035e-05, "loss": 0.5647, "step": 36378 }, { "epoch": 0.7715424911454688, "grad_norm": 0.3908082842826843, "learning_rate": 1.3531556817810376e-05, "loss": 0.4867, "step": 36379 }, { "epoch": 0.7715636996034019, "grad_norm": 0.342928022146225, "learning_rate": 1.353124480943695e-05, "loss": 0.4836, "step": 36380 }, { "epoch": 0.7715849080613348, "grad_norm": 0.3713604211807251, "learning_rate": 1.3530932797136105e-05, "loss": 0.538, "step": 36381 }, { "epoch": 0.7716061165192679, "grad_norm": 0.3878925144672394, "learning_rate": 1.3530620780908189e-05, "loss": 0.5552, "step": 36382 }, { "epoch": 0.7716273249772009, "grad_norm": 0.363413006067276, "learning_rate": 1.3530308760753546e-05, "loss": 0.4991, "step": 36383 }, { "epoch": 0.771648533435134, "grad_norm": 0.34864604473114014, "learning_rate": 1.3529996736672521e-05, "loss": 0.4573, "step": 36384 }, { "epoch": 0.7716697418930669, "grad_norm": 0.4031716585159302, "learning_rate": 1.352968470866547e-05, "loss": 0.6032, "step": 36385 }, { "epoch": 0.771690950351, "grad_norm": 0.3396894037723541, "learning_rate": 1.3529372676732728e-05, "loss": 0.452, "step": 36386 }, { "epoch": 0.771712158808933, "grad_norm": 0.3546295464038849, "learning_rate": 1.3529060640874651e-05, "loss": 0.542, "step": 36387 }, { "epoch": 0.771733367266866, "grad_norm": 0.3622244596481323, "learning_rate": 1.3528748601091586e-05, "loss": 0.4429, "step": 36388 }, { "epoch": 0.771754575724799, "grad_norm": 0.35207483172416687, "learning_rate": 1.3528436557383876e-05, "loss": 0.4698, "step": 36389 }, { "epoch": 0.7717757841827321, "grad_norm": 0.44622108340263367, "learning_rate": 1.3528124509751867e-05, "loss": 0.4477, "step": 36390 }, { "epoch": 0.7717969926406651, "grad_norm": 0.37030959129333496, "learning_rate": 1.3527812458195912e-05, "loss": 0.4679, "step": 36391 }, { "epoch": 0.7718182010985981, "grad_norm": 0.3419947624206543, "learning_rate": 1.3527500402716352e-05, "loss": 0.5194, "step": 36392 }, { "epoch": 0.7718394095565312, "grad_norm": 0.32681432366371155, "learning_rate": 1.3527188343313538e-05, "loss": 0.5287, "step": 36393 }, { "epoch": 0.7718606180144642, "grad_norm": 0.3518524765968323, "learning_rate": 1.3526876279987815e-05, "loss": 0.3885, "step": 36394 }, { "epoch": 0.7718818264723972, "grad_norm": 0.3516305088996887, "learning_rate": 1.352656421273953e-05, "loss": 0.4712, "step": 36395 }, { "epoch": 0.7719030349303302, "grad_norm": 0.40192437171936035, "learning_rate": 1.3526252141569035e-05, "loss": 0.5172, "step": 36396 }, { "epoch": 0.7719242433882633, "grad_norm": 0.34558847546577454, "learning_rate": 1.352594006647667e-05, "loss": 0.4652, "step": 36397 }, { "epoch": 0.7719454518461962, "grad_norm": 0.425481379032135, "learning_rate": 1.3525627987462784e-05, "loss": 0.6137, "step": 36398 }, { "epoch": 0.7719666603041293, "grad_norm": 0.35284340381622314, "learning_rate": 1.3525315904527724e-05, "loss": 0.4878, "step": 36399 }, { "epoch": 0.7719878687620623, "grad_norm": 0.3821714520454407, "learning_rate": 1.3525003817671844e-05, "loss": 0.5449, "step": 36400 }, { "epoch": 0.7720090772199953, "grad_norm": 0.35060107707977295, "learning_rate": 1.3524691726895483e-05, "loss": 0.4515, "step": 36401 }, { "epoch": 0.7720302856779283, "grad_norm": 0.4153219163417816, "learning_rate": 1.352437963219899e-05, "loss": 0.5459, "step": 36402 }, { "epoch": 0.7720514941358614, "grad_norm": 0.3688921630382538, "learning_rate": 1.3524067533582713e-05, "loss": 0.5125, "step": 36403 }, { "epoch": 0.7720727025937945, "grad_norm": 0.3880571722984314, "learning_rate": 1.3523755431046996e-05, "loss": 0.5324, "step": 36404 }, { "epoch": 0.7720939110517274, "grad_norm": 0.36607012152671814, "learning_rate": 1.352344332459219e-05, "loss": 0.4841, "step": 36405 }, { "epoch": 0.7721151195096605, "grad_norm": 0.3152083158493042, "learning_rate": 1.3523131214218643e-05, "loss": 0.5073, "step": 36406 }, { "epoch": 0.7721363279675935, "grad_norm": 0.38464513421058655, "learning_rate": 1.3522819099926698e-05, "loss": 0.5572, "step": 36407 }, { "epoch": 0.7721575364255265, "grad_norm": 0.34985390305519104, "learning_rate": 1.3522506981716705e-05, "loss": 0.4638, "step": 36408 }, { "epoch": 0.7721787448834595, "grad_norm": 0.40519413352012634, "learning_rate": 1.352219485958901e-05, "loss": 0.4297, "step": 36409 }, { "epoch": 0.7721999533413926, "grad_norm": 0.4831506311893463, "learning_rate": 1.3521882733543962e-05, "loss": 0.5659, "step": 36410 }, { "epoch": 0.7722211617993255, "grad_norm": 0.3967800438404083, "learning_rate": 1.3521570603581906e-05, "loss": 0.5688, "step": 36411 }, { "epoch": 0.7722423702572586, "grad_norm": 0.7517001032829285, "learning_rate": 1.3521258469703187e-05, "loss": 0.471, "step": 36412 }, { "epoch": 0.7722635787151916, "grad_norm": 0.33262893557548523, "learning_rate": 1.3520946331908158e-05, "loss": 0.445, "step": 36413 }, { "epoch": 0.7722847871731247, "grad_norm": 0.3361985385417938, "learning_rate": 1.3520634190197164e-05, "loss": 0.3899, "step": 36414 }, { "epoch": 0.7723059956310576, "grad_norm": 0.34081003069877625, "learning_rate": 1.3520322044570549e-05, "loss": 0.5517, "step": 36415 }, { "epoch": 0.7723272040889907, "grad_norm": 0.3915172815322876, "learning_rate": 1.3520009895028664e-05, "loss": 0.4867, "step": 36416 }, { "epoch": 0.7723484125469238, "grad_norm": 0.336104154586792, "learning_rate": 1.3519697741571854e-05, "loss": 0.4452, "step": 36417 }, { "epoch": 0.7723696210048567, "grad_norm": 0.3851979970932007, "learning_rate": 1.3519385584200462e-05, "loss": 0.5566, "step": 36418 }, { "epoch": 0.7723908294627898, "grad_norm": 0.3514757752418518, "learning_rate": 1.3519073422914845e-05, "loss": 0.607, "step": 36419 }, { "epoch": 0.7724120379207228, "grad_norm": 0.3266296684741974, "learning_rate": 1.3518761257715344e-05, "loss": 0.451, "step": 36420 }, { "epoch": 0.7724332463786558, "grad_norm": 0.4628666341304779, "learning_rate": 1.3518449088602308e-05, "loss": 0.4913, "step": 36421 }, { "epoch": 0.7724544548365888, "grad_norm": 0.3621463477611542, "learning_rate": 1.3518136915576085e-05, "loss": 0.5382, "step": 36422 }, { "epoch": 0.7724756632945219, "grad_norm": 0.32885339856147766, "learning_rate": 1.3517824738637018e-05, "loss": 0.4394, "step": 36423 }, { "epoch": 0.7724968717524549, "grad_norm": 0.3921147882938385, "learning_rate": 1.3517512557785456e-05, "loss": 0.5264, "step": 36424 }, { "epoch": 0.7725180802103879, "grad_norm": 0.47992703318595886, "learning_rate": 1.3517200373021751e-05, "loss": 0.432, "step": 36425 }, { "epoch": 0.7725392886683209, "grad_norm": 0.37389829754829407, "learning_rate": 1.3516888184346241e-05, "loss": 0.4742, "step": 36426 }, { "epoch": 0.772560497126254, "grad_norm": 0.35933634638786316, "learning_rate": 1.3516575991759282e-05, "loss": 0.5289, "step": 36427 }, { "epoch": 0.7725817055841869, "grad_norm": 0.36930781602859497, "learning_rate": 1.351626379526122e-05, "loss": 0.4998, "step": 36428 }, { "epoch": 0.77260291404212, "grad_norm": 0.40386995673179626, "learning_rate": 1.3515951594852394e-05, "loss": 0.5061, "step": 36429 }, { "epoch": 0.772624122500053, "grad_norm": 0.36635592579841614, "learning_rate": 1.3515639390533161e-05, "loss": 0.5143, "step": 36430 }, { "epoch": 0.772645330957986, "grad_norm": 0.3584553599357605, "learning_rate": 1.3515327182303865e-05, "loss": 0.5318, "step": 36431 }, { "epoch": 0.7726665394159191, "grad_norm": 0.3552720248699188, "learning_rate": 1.351501497016485e-05, "loss": 0.4622, "step": 36432 }, { "epoch": 0.7726877478738521, "grad_norm": 0.3700396716594696, "learning_rate": 1.3514702754116469e-05, "loss": 0.5074, "step": 36433 }, { "epoch": 0.7727089563317852, "grad_norm": 0.3788425624370575, "learning_rate": 1.3514390534159065e-05, "loss": 0.5082, "step": 36434 }, { "epoch": 0.7727301647897181, "grad_norm": 0.32049664855003357, "learning_rate": 1.3514078310292985e-05, "loss": 0.5326, "step": 36435 }, { "epoch": 0.7727513732476512, "grad_norm": 0.3921588361263275, "learning_rate": 1.351376608251858e-05, "loss": 0.4765, "step": 36436 }, { "epoch": 0.7727725817055842, "grad_norm": 0.42066529393196106, "learning_rate": 1.3513453850836191e-05, "loss": 0.4875, "step": 36437 }, { "epoch": 0.7727937901635172, "grad_norm": 0.37429532408714294, "learning_rate": 1.351314161524617e-05, "loss": 0.4679, "step": 36438 }, { "epoch": 0.7728149986214502, "grad_norm": 0.3355948328971863, "learning_rate": 1.3512829375748865e-05, "loss": 0.4347, "step": 36439 }, { "epoch": 0.7728362070793833, "grad_norm": 0.4030834138393402, "learning_rate": 1.3512517132344622e-05, "loss": 0.4596, "step": 36440 }, { "epoch": 0.7728574155373162, "grad_norm": 0.3529621660709381, "learning_rate": 1.3512204885033788e-05, "loss": 0.4744, "step": 36441 }, { "epoch": 0.7728786239952493, "grad_norm": 0.35653337836265564, "learning_rate": 1.351189263381671e-05, "loss": 0.4647, "step": 36442 }, { "epoch": 0.7728998324531823, "grad_norm": 0.35485321283340454, "learning_rate": 1.3511580378693736e-05, "loss": 0.4736, "step": 36443 }, { "epoch": 0.7729210409111154, "grad_norm": 0.42503517866134644, "learning_rate": 1.3511268119665211e-05, "loss": 0.4847, "step": 36444 }, { "epoch": 0.7729422493690484, "grad_norm": 0.34987252950668335, "learning_rate": 1.3510955856731486e-05, "loss": 0.5376, "step": 36445 }, { "epoch": 0.7729634578269814, "grad_norm": 0.38639429211616516, "learning_rate": 1.3510643589892904e-05, "loss": 0.5052, "step": 36446 }, { "epoch": 0.7729846662849145, "grad_norm": 0.37654125690460205, "learning_rate": 1.3510331319149819e-05, "loss": 0.5042, "step": 36447 }, { "epoch": 0.7730058747428474, "grad_norm": 0.3395710289478302, "learning_rate": 1.3510019044502572e-05, "loss": 0.5263, "step": 36448 }, { "epoch": 0.7730270832007805, "grad_norm": 0.40233832597732544, "learning_rate": 1.350970676595151e-05, "loss": 0.5343, "step": 36449 }, { "epoch": 0.7730482916587135, "grad_norm": 0.3626053035259247, "learning_rate": 1.3509394483496982e-05, "loss": 0.4792, "step": 36450 }, { "epoch": 0.7730695001166465, "grad_norm": 0.36238527297973633, "learning_rate": 1.350908219713934e-05, "loss": 0.3808, "step": 36451 }, { "epoch": 0.7730907085745795, "grad_norm": 0.34713006019592285, "learning_rate": 1.3508769906878925e-05, "loss": 0.405, "step": 36452 }, { "epoch": 0.7731119170325126, "grad_norm": 0.40309199690818787, "learning_rate": 1.3508457612716089e-05, "loss": 0.4634, "step": 36453 }, { "epoch": 0.7731331254904455, "grad_norm": 0.43231430649757385, "learning_rate": 1.3508145314651173e-05, "loss": 0.4828, "step": 36454 }, { "epoch": 0.7731543339483786, "grad_norm": 0.31737419962882996, "learning_rate": 1.350783301268453e-05, "loss": 0.4644, "step": 36455 }, { "epoch": 0.7731755424063116, "grad_norm": 0.47818636894226074, "learning_rate": 1.3507520706816508e-05, "loss": 0.4759, "step": 36456 }, { "epoch": 0.7731967508642447, "grad_norm": 0.35167956352233887, "learning_rate": 1.3507208397047448e-05, "loss": 0.4663, "step": 36457 }, { "epoch": 0.7732179593221777, "grad_norm": 0.624004065990448, "learning_rate": 1.3506896083377704e-05, "loss": 0.5048, "step": 36458 }, { "epoch": 0.7732391677801107, "grad_norm": 0.34242939949035645, "learning_rate": 1.350658376580762e-05, "loss": 0.4397, "step": 36459 }, { "epoch": 0.7732603762380438, "grad_norm": 0.3611057996749878, "learning_rate": 1.3506271444337542e-05, "loss": 0.536, "step": 36460 }, { "epoch": 0.7732815846959767, "grad_norm": 0.3850767910480499, "learning_rate": 1.3505959118967822e-05, "loss": 0.4852, "step": 36461 }, { "epoch": 0.7733027931539098, "grad_norm": 0.42324039340019226, "learning_rate": 1.3505646789698806e-05, "loss": 0.4876, "step": 36462 }, { "epoch": 0.7733240016118428, "grad_norm": 0.34439000487327576, "learning_rate": 1.3505334456530834e-05, "loss": 0.4518, "step": 36463 }, { "epoch": 0.7733452100697759, "grad_norm": 0.3828426003456116, "learning_rate": 1.3505022119464264e-05, "loss": 0.5609, "step": 36464 }, { "epoch": 0.7733664185277088, "grad_norm": 0.411706805229187, "learning_rate": 1.3504709778499438e-05, "loss": 0.5162, "step": 36465 }, { "epoch": 0.7733876269856419, "grad_norm": 0.36724549531936646, "learning_rate": 1.3504397433636704e-05, "loss": 0.4258, "step": 36466 }, { "epoch": 0.7734088354435749, "grad_norm": 0.37137436866760254, "learning_rate": 1.350408508487641e-05, "loss": 0.5151, "step": 36467 }, { "epoch": 0.7734300439015079, "grad_norm": 0.3390338718891144, "learning_rate": 1.3503772732218903e-05, "loss": 0.5151, "step": 36468 }, { "epoch": 0.7734512523594409, "grad_norm": 0.38355767726898193, "learning_rate": 1.350346037566453e-05, "loss": 0.4914, "step": 36469 }, { "epoch": 0.773472460817374, "grad_norm": 0.4316822290420532, "learning_rate": 1.3503148015213641e-05, "loss": 0.5006, "step": 36470 }, { "epoch": 0.7734936692753069, "grad_norm": 0.36863404512405396, "learning_rate": 1.3502835650866579e-05, "loss": 0.4952, "step": 36471 }, { "epoch": 0.77351487773324, "grad_norm": 0.3869999051094055, "learning_rate": 1.3502523282623694e-05, "loss": 0.5095, "step": 36472 }, { "epoch": 0.7735360861911731, "grad_norm": 0.6913701295852661, "learning_rate": 1.3502210910485334e-05, "loss": 0.4766, "step": 36473 }, { "epoch": 0.773557294649106, "grad_norm": 0.39978644251823425, "learning_rate": 1.3501898534451843e-05, "loss": 0.5016, "step": 36474 }, { "epoch": 0.7735785031070391, "grad_norm": 0.405965656042099, "learning_rate": 1.3501586154523573e-05, "loss": 0.5312, "step": 36475 }, { "epoch": 0.7735997115649721, "grad_norm": 0.35887324810028076, "learning_rate": 1.3501273770700869e-05, "loss": 0.454, "step": 36476 }, { "epoch": 0.7736209200229052, "grad_norm": 0.366597056388855, "learning_rate": 1.3500961382984077e-05, "loss": 0.4678, "step": 36477 }, { "epoch": 0.7736421284808381, "grad_norm": 0.37226027250289917, "learning_rate": 1.3500648991373549e-05, "loss": 0.5488, "step": 36478 }, { "epoch": 0.7736633369387712, "grad_norm": 0.4010479748249054, "learning_rate": 1.3500336595869627e-05, "loss": 0.5751, "step": 36479 }, { "epoch": 0.7736845453967042, "grad_norm": 0.42220425605773926, "learning_rate": 1.3500024196472663e-05, "loss": 0.5283, "step": 36480 }, { "epoch": 0.7737057538546372, "grad_norm": 0.48783665895462036, "learning_rate": 1.3499711793183003e-05, "loss": 0.4287, "step": 36481 }, { "epoch": 0.7737269623125702, "grad_norm": 0.3494216203689575, "learning_rate": 1.349939938600099e-05, "loss": 0.4956, "step": 36482 }, { "epoch": 0.7737481707705033, "grad_norm": 0.44872570037841797, "learning_rate": 1.3499086974926978e-05, "loss": 0.5373, "step": 36483 }, { "epoch": 0.7737693792284362, "grad_norm": 0.33807244896888733, "learning_rate": 1.3498774559961314e-05, "loss": 0.4886, "step": 36484 }, { "epoch": 0.7737905876863693, "grad_norm": 0.3408204913139343, "learning_rate": 1.3498462141104343e-05, "loss": 0.4315, "step": 36485 }, { "epoch": 0.7738117961443024, "grad_norm": 0.31516504287719727, "learning_rate": 1.3498149718356407e-05, "loss": 0.4344, "step": 36486 }, { "epoch": 0.7738330046022354, "grad_norm": 0.35366523265838623, "learning_rate": 1.3497837291717865e-05, "loss": 0.4873, "step": 36487 }, { "epoch": 0.7738542130601684, "grad_norm": 0.3479940593242645, "learning_rate": 1.3497524861189058e-05, "loss": 0.4975, "step": 36488 }, { "epoch": 0.7738754215181014, "grad_norm": 0.49001824855804443, "learning_rate": 1.349721242677033e-05, "loss": 0.4949, "step": 36489 }, { "epoch": 0.7738966299760345, "grad_norm": 0.39892324805259705, "learning_rate": 1.3496899988462039e-05, "loss": 0.6184, "step": 36490 }, { "epoch": 0.7739178384339674, "grad_norm": 0.36415600776672363, "learning_rate": 1.349658754626452e-05, "loss": 0.4762, "step": 36491 }, { "epoch": 0.7739390468919005, "grad_norm": 0.3677019476890564, "learning_rate": 1.3496275100178134e-05, "loss": 0.5395, "step": 36492 }, { "epoch": 0.7739602553498335, "grad_norm": 0.4276650547981262, "learning_rate": 1.3495962650203216e-05, "loss": 0.4688, "step": 36493 }, { "epoch": 0.7739814638077666, "grad_norm": 0.3519534766674042, "learning_rate": 1.349565019634012e-05, "loss": 0.5065, "step": 36494 }, { "epoch": 0.7740026722656995, "grad_norm": 0.34657958149909973, "learning_rate": 1.349533773858919e-05, "loss": 0.4796, "step": 36495 }, { "epoch": 0.7740238807236326, "grad_norm": 0.5168577432632446, "learning_rate": 1.349502527695078e-05, "loss": 0.4643, "step": 36496 }, { "epoch": 0.7740450891815656, "grad_norm": 0.47377753257751465, "learning_rate": 1.349471281142523e-05, "loss": 0.7001, "step": 36497 }, { "epoch": 0.7740662976394986, "grad_norm": 0.4019966423511505, "learning_rate": 1.3494400342012892e-05, "loss": 0.5536, "step": 36498 }, { "epoch": 0.7740875060974317, "grad_norm": 1.0247111320495605, "learning_rate": 1.3494087868714114e-05, "loss": 0.5444, "step": 36499 }, { "epoch": 0.7741087145553647, "grad_norm": 0.3873896300792694, "learning_rate": 1.3493775391529238e-05, "loss": 0.5087, "step": 36500 }, { "epoch": 0.7741299230132977, "grad_norm": 0.3616114854812622, "learning_rate": 1.3493462910458618e-05, "loss": 0.492, "step": 36501 }, { "epoch": 0.7741511314712307, "grad_norm": 0.3964017331600189, "learning_rate": 1.3493150425502596e-05, "loss": 0.523, "step": 36502 }, { "epoch": 0.7741723399291638, "grad_norm": 0.36970850825309753, "learning_rate": 1.3492837936661525e-05, "loss": 0.5037, "step": 36503 }, { "epoch": 0.7741935483870968, "grad_norm": 0.3709365725517273, "learning_rate": 1.3492525443935751e-05, "loss": 0.4896, "step": 36504 }, { "epoch": 0.7742147568450298, "grad_norm": 0.3463556170463562, "learning_rate": 1.3492212947325618e-05, "loss": 0.4835, "step": 36505 }, { "epoch": 0.7742359653029628, "grad_norm": 0.3562617301940918, "learning_rate": 1.3491900446831478e-05, "loss": 0.5517, "step": 36506 }, { "epoch": 0.7742571737608959, "grad_norm": 0.3984438180923462, "learning_rate": 1.349158794245368e-05, "loss": 0.5206, "step": 36507 }, { "epoch": 0.7742783822188288, "grad_norm": 0.3687876760959625, "learning_rate": 1.3491275434192559e-05, "loss": 0.5942, "step": 36508 }, { "epoch": 0.7742995906767619, "grad_norm": 0.37130898237228394, "learning_rate": 1.3490962922048478e-05, "loss": 0.5145, "step": 36509 }, { "epoch": 0.7743207991346949, "grad_norm": 0.3741886615753174, "learning_rate": 1.3490650406021777e-05, "loss": 0.4794, "step": 36510 }, { "epoch": 0.774342007592628, "grad_norm": 0.38827839493751526, "learning_rate": 1.3490337886112806e-05, "loss": 0.5881, "step": 36511 }, { "epoch": 0.7743632160505609, "grad_norm": 0.43760567903518677, "learning_rate": 1.3490025362321912e-05, "loss": 0.5115, "step": 36512 }, { "epoch": 0.774384424508494, "grad_norm": 0.34931203722953796, "learning_rate": 1.3489712834649443e-05, "loss": 0.4947, "step": 36513 }, { "epoch": 0.7744056329664271, "grad_norm": 0.3244415521621704, "learning_rate": 1.348940030309574e-05, "loss": 0.4668, "step": 36514 }, { "epoch": 0.77442684142436, "grad_norm": 0.3589154779911041, "learning_rate": 1.348908776766116e-05, "loss": 0.4692, "step": 36515 }, { "epoch": 0.7744480498822931, "grad_norm": 0.356254518032074, "learning_rate": 1.3488775228346048e-05, "loss": 0.6274, "step": 36516 }, { "epoch": 0.7744692583402261, "grad_norm": 0.38656094670295715, "learning_rate": 1.3488462685150747e-05, "loss": 0.4922, "step": 36517 }, { "epoch": 0.7744904667981591, "grad_norm": 0.37317782640457153, "learning_rate": 1.3488150138075614e-05, "loss": 0.5725, "step": 36518 }, { "epoch": 0.7745116752560921, "grad_norm": 0.3861778974533081, "learning_rate": 1.3487837587120985e-05, "loss": 0.5233, "step": 36519 }, { "epoch": 0.7745328837140252, "grad_norm": 0.3637617826461792, "learning_rate": 1.3487525032287216e-05, "loss": 0.569, "step": 36520 }, { "epoch": 0.7745540921719581, "grad_norm": 0.3756803870201111, "learning_rate": 1.3487212473574652e-05, "loss": 0.4914, "step": 36521 }, { "epoch": 0.7745753006298912, "grad_norm": 0.36278069019317627, "learning_rate": 1.3486899910983637e-05, "loss": 0.5021, "step": 36522 }, { "epoch": 0.7745965090878242, "grad_norm": 0.40098196268081665, "learning_rate": 1.3486587344514526e-05, "loss": 0.4592, "step": 36523 }, { "epoch": 0.7746177175457573, "grad_norm": 0.3526620864868164, "learning_rate": 1.3486274774167662e-05, "loss": 0.499, "step": 36524 }, { "epoch": 0.7746389260036902, "grad_norm": 0.3789065182209015, "learning_rate": 1.3485962199943395e-05, "loss": 0.5428, "step": 36525 }, { "epoch": 0.7746601344616233, "grad_norm": 0.34009596705436707, "learning_rate": 1.348564962184207e-05, "loss": 0.5524, "step": 36526 }, { "epoch": 0.7746813429195564, "grad_norm": 0.43972501158714294, "learning_rate": 1.3485337039864034e-05, "loss": 0.5551, "step": 36527 }, { "epoch": 0.7747025513774893, "grad_norm": 0.4004093110561371, "learning_rate": 1.3485024454009637e-05, "loss": 0.4886, "step": 36528 }, { "epoch": 0.7747237598354224, "grad_norm": 0.37259551882743835, "learning_rate": 1.3484711864279227e-05, "loss": 0.5303, "step": 36529 }, { "epoch": 0.7747449682933554, "grad_norm": 0.3855806887149811, "learning_rate": 1.3484399270673152e-05, "loss": 0.4967, "step": 36530 }, { "epoch": 0.7747661767512884, "grad_norm": 0.38349154591560364, "learning_rate": 1.3484086673191756e-05, "loss": 0.5069, "step": 36531 }, { "epoch": 0.7747873852092214, "grad_norm": 0.37825527787208557, "learning_rate": 1.3483774071835392e-05, "loss": 0.4717, "step": 36532 }, { "epoch": 0.7748085936671545, "grad_norm": 0.38279685378074646, "learning_rate": 1.3483461466604402e-05, "loss": 0.4848, "step": 36533 }, { "epoch": 0.7748298021250875, "grad_norm": 0.3867312967777252, "learning_rate": 1.3483148857499135e-05, "loss": 0.4722, "step": 36534 }, { "epoch": 0.7748510105830205, "grad_norm": 0.3968927562236786, "learning_rate": 1.3482836244519944e-05, "loss": 0.5042, "step": 36535 }, { "epoch": 0.7748722190409535, "grad_norm": 0.35022974014282227, "learning_rate": 1.348252362766717e-05, "loss": 0.3943, "step": 36536 }, { "epoch": 0.7748934274988866, "grad_norm": 0.3605116605758667, "learning_rate": 1.3482211006941167e-05, "loss": 0.4595, "step": 36537 }, { "epoch": 0.7749146359568195, "grad_norm": 0.35692793130874634, "learning_rate": 1.3481898382342275e-05, "loss": 0.5023, "step": 36538 }, { "epoch": 0.7749358444147526, "grad_norm": 0.3817891478538513, "learning_rate": 1.348158575387085e-05, "loss": 0.4929, "step": 36539 }, { "epoch": 0.7749570528726857, "grad_norm": 0.3177429139614105, "learning_rate": 1.3481273121527232e-05, "loss": 0.4838, "step": 36540 }, { "epoch": 0.7749782613306186, "grad_norm": 0.38571128249168396, "learning_rate": 1.3480960485311773e-05, "loss": 0.5122, "step": 36541 }, { "epoch": 0.7749994697885517, "grad_norm": 0.38842153549194336, "learning_rate": 1.348064784522482e-05, "loss": 0.5608, "step": 36542 }, { "epoch": 0.7750206782464847, "grad_norm": 0.41704899072647095, "learning_rate": 1.3480335201266722e-05, "loss": 0.5686, "step": 36543 }, { "epoch": 0.7750418867044178, "grad_norm": 0.3495884835720062, "learning_rate": 1.3480022553437826e-05, "loss": 0.543, "step": 36544 }, { "epoch": 0.7750630951623507, "grad_norm": 0.3598972260951996, "learning_rate": 1.3479709901738476e-05, "loss": 0.4379, "step": 36545 }, { "epoch": 0.7750843036202838, "grad_norm": 0.3367466330528259, "learning_rate": 1.3479397246169023e-05, "loss": 0.4755, "step": 36546 }, { "epoch": 0.7751055120782168, "grad_norm": 0.3878566324710846, "learning_rate": 1.3479084586729818e-05, "loss": 0.5058, "step": 36547 }, { "epoch": 0.7751267205361498, "grad_norm": 0.4035295844078064, "learning_rate": 1.3478771923421204e-05, "loss": 0.4021, "step": 36548 }, { "epoch": 0.7751479289940828, "grad_norm": 0.34959521889686584, "learning_rate": 1.347845925624353e-05, "loss": 0.4351, "step": 36549 }, { "epoch": 0.7751691374520159, "grad_norm": 0.33986708521842957, "learning_rate": 1.3478146585197143e-05, "loss": 0.491, "step": 36550 }, { "epoch": 0.7751903459099488, "grad_norm": 0.3509523868560791, "learning_rate": 1.3477833910282392e-05, "loss": 0.4653, "step": 36551 }, { "epoch": 0.7752115543678819, "grad_norm": 0.350990891456604, "learning_rate": 1.3477521231499626e-05, "loss": 0.4376, "step": 36552 }, { "epoch": 0.7752327628258149, "grad_norm": 0.35689032077789307, "learning_rate": 1.3477208548849188e-05, "loss": 0.4988, "step": 36553 }, { "epoch": 0.775253971283748, "grad_norm": 0.38933899998664856, "learning_rate": 1.3476895862331429e-05, "loss": 0.485, "step": 36554 }, { "epoch": 0.775275179741681, "grad_norm": 0.3366830348968506, "learning_rate": 1.34765831719467e-05, "loss": 0.4441, "step": 36555 }, { "epoch": 0.775296388199614, "grad_norm": 0.45230284333229065, "learning_rate": 1.3476270477695343e-05, "loss": 0.5192, "step": 36556 }, { "epoch": 0.7753175966575471, "grad_norm": 0.34827619791030884, "learning_rate": 1.3475957779577708e-05, "loss": 0.4578, "step": 36557 }, { "epoch": 0.77533880511548, "grad_norm": 0.5090379118919373, "learning_rate": 1.3475645077594145e-05, "loss": 0.5077, "step": 36558 }, { "epoch": 0.7753600135734131, "grad_norm": 0.36293408274650574, "learning_rate": 1.3475332371744995e-05, "loss": 0.5821, "step": 36559 }, { "epoch": 0.7753812220313461, "grad_norm": 0.9165655970573425, "learning_rate": 1.3475019662030615e-05, "loss": 0.4618, "step": 36560 }, { "epoch": 0.7754024304892791, "grad_norm": 0.37522900104522705, "learning_rate": 1.3474706948451348e-05, "loss": 0.4994, "step": 36561 }, { "epoch": 0.7754236389472121, "grad_norm": 0.4542776644229889, "learning_rate": 1.3474394231007541e-05, "loss": 0.5746, "step": 36562 }, { "epoch": 0.7754448474051452, "grad_norm": 0.4255320429801941, "learning_rate": 1.3474081509699543e-05, "loss": 0.5727, "step": 36563 }, { "epoch": 0.7754660558630782, "grad_norm": 0.38257136940956116, "learning_rate": 1.3473768784527703e-05, "loss": 0.5174, "step": 36564 }, { "epoch": 0.7754872643210112, "grad_norm": 0.3654351532459259, "learning_rate": 1.3473456055492366e-05, "loss": 0.417, "step": 36565 }, { "epoch": 0.7755084727789442, "grad_norm": 0.34893739223480225, "learning_rate": 1.347314332259388e-05, "loss": 0.4545, "step": 36566 }, { "epoch": 0.7755296812368773, "grad_norm": 0.3656957745552063, "learning_rate": 1.3472830585832597e-05, "loss": 0.5232, "step": 36567 }, { "epoch": 0.7755508896948103, "grad_norm": 0.41768330335617065, "learning_rate": 1.3472517845208862e-05, "loss": 0.4336, "step": 36568 }, { "epoch": 0.7755720981527433, "grad_norm": 0.3411901891231537, "learning_rate": 1.3472205100723022e-05, "loss": 0.4284, "step": 36569 }, { "epoch": 0.7755933066106764, "grad_norm": 0.4406127631664276, "learning_rate": 1.3471892352375424e-05, "loss": 0.521, "step": 36570 }, { "epoch": 0.7756145150686093, "grad_norm": 0.5113945007324219, "learning_rate": 1.347157960016642e-05, "loss": 0.4412, "step": 36571 }, { "epoch": 0.7756357235265424, "grad_norm": 0.42631852626800537, "learning_rate": 1.3471266844096353e-05, "loss": 0.5012, "step": 36572 }, { "epoch": 0.7756569319844754, "grad_norm": 0.3556079566478729, "learning_rate": 1.3470954084165576e-05, "loss": 0.5146, "step": 36573 }, { "epoch": 0.7756781404424085, "grad_norm": 0.33827996253967285, "learning_rate": 1.3470641320374432e-05, "loss": 0.5009, "step": 36574 }, { "epoch": 0.7756993489003414, "grad_norm": 0.3816331624984741, "learning_rate": 1.3470328552723273e-05, "loss": 0.5059, "step": 36575 }, { "epoch": 0.7757205573582745, "grad_norm": 0.5707471370697021, "learning_rate": 1.347001578121244e-05, "loss": 0.4658, "step": 36576 }, { "epoch": 0.7757417658162075, "grad_norm": 0.343901664018631, "learning_rate": 1.346970300584229e-05, "loss": 0.4831, "step": 36577 }, { "epoch": 0.7757629742741405, "grad_norm": 0.3697490990161896, "learning_rate": 1.3469390226613163e-05, "loss": 0.4763, "step": 36578 }, { "epoch": 0.7757841827320735, "grad_norm": 0.6321348547935486, "learning_rate": 1.3469077443525413e-05, "loss": 0.6122, "step": 36579 }, { "epoch": 0.7758053911900066, "grad_norm": 0.3928252160549164, "learning_rate": 1.3468764656579385e-05, "loss": 0.4496, "step": 36580 }, { "epoch": 0.7758265996479397, "grad_norm": 0.32650166749954224, "learning_rate": 1.3468451865775429e-05, "loss": 0.4596, "step": 36581 }, { "epoch": 0.7758478081058726, "grad_norm": 0.3760359585285187, "learning_rate": 1.3468139071113887e-05, "loss": 0.5057, "step": 36582 }, { "epoch": 0.7758690165638057, "grad_norm": 0.3359931409358978, "learning_rate": 1.3467826272595112e-05, "loss": 0.4192, "step": 36583 }, { "epoch": 0.7758902250217387, "grad_norm": 0.3661193251609802, "learning_rate": 1.3467513470219451e-05, "loss": 0.537, "step": 36584 }, { "epoch": 0.7759114334796717, "grad_norm": 0.3834758698940277, "learning_rate": 1.3467200663987252e-05, "loss": 0.5006, "step": 36585 }, { "epoch": 0.7759326419376047, "grad_norm": 0.34966495633125305, "learning_rate": 1.3466887853898862e-05, "loss": 0.4887, "step": 36586 }, { "epoch": 0.7759538503955378, "grad_norm": 0.35666191577911377, "learning_rate": 1.346657503995463e-05, "loss": 0.5582, "step": 36587 }, { "epoch": 0.7759750588534707, "grad_norm": 0.3618167042732239, "learning_rate": 1.3466262222154905e-05, "loss": 0.5147, "step": 36588 }, { "epoch": 0.7759962673114038, "grad_norm": 0.32068923115730286, "learning_rate": 1.3465949400500032e-05, "loss": 0.5056, "step": 36589 }, { "epoch": 0.7760174757693368, "grad_norm": 0.3584231734275818, "learning_rate": 1.346563657499036e-05, "loss": 0.5413, "step": 36590 }, { "epoch": 0.7760386842272698, "grad_norm": 0.37402209639549255, "learning_rate": 1.3465323745626235e-05, "loss": 0.5931, "step": 36591 }, { "epoch": 0.7760598926852028, "grad_norm": 0.3479591906070709, "learning_rate": 1.3465010912408007e-05, "loss": 0.5231, "step": 36592 }, { "epoch": 0.7760811011431359, "grad_norm": 0.40637046098709106, "learning_rate": 1.3464698075336026e-05, "loss": 0.5106, "step": 36593 }, { "epoch": 0.7761023096010689, "grad_norm": 0.3636813163757324, "learning_rate": 1.3464385234410639e-05, "loss": 0.4231, "step": 36594 }, { "epoch": 0.7761235180590019, "grad_norm": 0.36078500747680664, "learning_rate": 1.3464072389632192e-05, "loss": 0.4901, "step": 36595 }, { "epoch": 0.776144726516935, "grad_norm": 0.6080401539802551, "learning_rate": 1.3463759541001034e-05, "loss": 0.5331, "step": 36596 }, { "epoch": 0.776165934974868, "grad_norm": 0.3286246061325073, "learning_rate": 1.3463446688517512e-05, "loss": 0.4833, "step": 36597 }, { "epoch": 0.776187143432801, "grad_norm": 0.38454389572143555, "learning_rate": 1.3463133832181973e-05, "loss": 0.5178, "step": 36598 }, { "epoch": 0.776208351890734, "grad_norm": 0.3915906846523285, "learning_rate": 1.3462820971994768e-05, "loss": 0.5501, "step": 36599 }, { "epoch": 0.7762295603486671, "grad_norm": 0.44123196601867676, "learning_rate": 1.3462508107956245e-05, "loss": 0.5204, "step": 36600 }, { "epoch": 0.7762507688066, "grad_norm": 0.33214306831359863, "learning_rate": 1.3462195240066748e-05, "loss": 0.4525, "step": 36601 }, { "epoch": 0.7762719772645331, "grad_norm": 0.3856807053089142, "learning_rate": 1.346188236832663e-05, "loss": 0.5039, "step": 36602 }, { "epoch": 0.7762931857224661, "grad_norm": 0.35566991567611694, "learning_rate": 1.3461569492736237e-05, "loss": 0.4397, "step": 36603 }, { "epoch": 0.7763143941803992, "grad_norm": 0.34265536069869995, "learning_rate": 1.3461256613295912e-05, "loss": 0.4607, "step": 36604 }, { "epoch": 0.7763356026383321, "grad_norm": 0.4127318859100342, "learning_rate": 1.346094373000601e-05, "loss": 0.5148, "step": 36605 }, { "epoch": 0.7763568110962652, "grad_norm": 0.35945260524749756, "learning_rate": 1.346063084286688e-05, "loss": 0.5041, "step": 36606 }, { "epoch": 0.7763780195541982, "grad_norm": 0.44889914989471436, "learning_rate": 1.3460317951878864e-05, "loss": 0.5, "step": 36607 }, { "epoch": 0.7763992280121312, "grad_norm": 0.5142038464546204, "learning_rate": 1.3460005057042311e-05, "loss": 0.5464, "step": 36608 }, { "epoch": 0.7764204364700643, "grad_norm": 0.566896915435791, "learning_rate": 1.345969215835757e-05, "loss": 0.5027, "step": 36609 }, { "epoch": 0.7764416449279973, "grad_norm": 0.399336576461792, "learning_rate": 1.3459379255824992e-05, "loss": 0.5113, "step": 36610 }, { "epoch": 0.7764628533859304, "grad_norm": 0.3723425567150116, "learning_rate": 1.345906634944492e-05, "loss": 0.5537, "step": 36611 }, { "epoch": 0.7764840618438633, "grad_norm": 0.354043573141098, "learning_rate": 1.3458753439217707e-05, "loss": 0.4466, "step": 36612 }, { "epoch": 0.7765052703017964, "grad_norm": 0.39057472348213196, "learning_rate": 1.3458440525143694e-05, "loss": 0.4662, "step": 36613 }, { "epoch": 0.7765264787597294, "grad_norm": 0.36901795864105225, "learning_rate": 1.3458127607223239e-05, "loss": 0.5588, "step": 36614 }, { "epoch": 0.7765476872176624, "grad_norm": 0.3698473572731018, "learning_rate": 1.3457814685456683e-05, "loss": 0.5048, "step": 36615 }, { "epoch": 0.7765688956755954, "grad_norm": 0.42340341210365295, "learning_rate": 1.3457501759844374e-05, "loss": 0.4384, "step": 36616 }, { "epoch": 0.7765901041335285, "grad_norm": 0.37009984254837036, "learning_rate": 1.345718883038666e-05, "loss": 0.4118, "step": 36617 }, { "epoch": 0.7766113125914614, "grad_norm": 0.4187591075897217, "learning_rate": 1.3456875897083892e-05, "loss": 0.5557, "step": 36618 }, { "epoch": 0.7766325210493945, "grad_norm": 0.3943142592906952, "learning_rate": 1.3456562959936419e-05, "loss": 0.5156, "step": 36619 }, { "epoch": 0.7766537295073275, "grad_norm": 0.3340057134628296, "learning_rate": 1.3456250018944585e-05, "loss": 0.4862, "step": 36620 }, { "epoch": 0.7766749379652605, "grad_norm": 0.3351244628429413, "learning_rate": 1.3455937074108741e-05, "loss": 0.5093, "step": 36621 }, { "epoch": 0.7766961464231936, "grad_norm": 0.37542539834976196, "learning_rate": 1.3455624125429235e-05, "loss": 0.5495, "step": 36622 }, { "epoch": 0.7767173548811266, "grad_norm": 0.35110917687416077, "learning_rate": 1.3455311172906409e-05, "loss": 0.5108, "step": 36623 }, { "epoch": 0.7767385633390597, "grad_norm": 0.3879281282424927, "learning_rate": 1.3454998216540617e-05, "loss": 0.4706, "step": 36624 }, { "epoch": 0.7767597717969926, "grad_norm": 0.3443601429462433, "learning_rate": 1.345468525633221e-05, "loss": 0.4458, "step": 36625 }, { "epoch": 0.7767809802549257, "grad_norm": 0.3553590476512909, "learning_rate": 1.345437229228153e-05, "loss": 0.5182, "step": 36626 }, { "epoch": 0.7768021887128587, "grad_norm": 0.39134854078292847, "learning_rate": 1.3454059324388924e-05, "loss": 0.4829, "step": 36627 }, { "epoch": 0.7768233971707917, "grad_norm": 0.31693127751350403, "learning_rate": 1.3453746352654748e-05, "loss": 0.4809, "step": 36628 }, { "epoch": 0.7768446056287247, "grad_norm": 0.35053449869155884, "learning_rate": 1.3453433377079343e-05, "loss": 0.4906, "step": 36629 }, { "epoch": 0.7768658140866578, "grad_norm": 0.32324859499931335, "learning_rate": 1.3453120397663058e-05, "loss": 0.4861, "step": 36630 }, { "epoch": 0.7768870225445907, "grad_norm": 0.3519299030303955, "learning_rate": 1.3452807414406247e-05, "loss": 0.5106, "step": 36631 }, { "epoch": 0.7769082310025238, "grad_norm": 0.33526721596717834, "learning_rate": 1.3452494427309249e-05, "loss": 0.51, "step": 36632 }, { "epoch": 0.7769294394604568, "grad_norm": 0.37620097398757935, "learning_rate": 1.3452181436372417e-05, "loss": 0.4285, "step": 36633 }, { "epoch": 0.7769506479183899, "grad_norm": 0.3692165017127991, "learning_rate": 1.3451868441596104e-05, "loss": 0.503, "step": 36634 }, { "epoch": 0.7769718563763228, "grad_norm": 0.3398154079914093, "learning_rate": 1.3451555442980646e-05, "loss": 0.3943, "step": 36635 }, { "epoch": 0.7769930648342559, "grad_norm": 0.3642028272151947, "learning_rate": 1.3451242440526402e-05, "loss": 0.5502, "step": 36636 }, { "epoch": 0.777014273292189, "grad_norm": 0.35223037004470825, "learning_rate": 1.3450929434233715e-05, "loss": 0.5443, "step": 36637 }, { "epoch": 0.7770354817501219, "grad_norm": 0.40460753440856934, "learning_rate": 1.3450616424102934e-05, "loss": 0.4256, "step": 36638 }, { "epoch": 0.777056690208055, "grad_norm": 0.3713170289993286, "learning_rate": 1.3450303410134409e-05, "loss": 0.5155, "step": 36639 }, { "epoch": 0.777077898665988, "grad_norm": 0.6055802702903748, "learning_rate": 1.3449990392328488e-05, "loss": 0.4926, "step": 36640 }, { "epoch": 0.777099107123921, "grad_norm": 0.4004831314086914, "learning_rate": 1.3449677370685512e-05, "loss": 0.4591, "step": 36641 }, { "epoch": 0.777120315581854, "grad_norm": 0.3453199863433838, "learning_rate": 1.344936434520584e-05, "loss": 0.4887, "step": 36642 }, { "epoch": 0.7771415240397871, "grad_norm": 0.3410692512989044, "learning_rate": 1.3449051315889812e-05, "loss": 0.4907, "step": 36643 }, { "epoch": 0.77716273249772, "grad_norm": 0.32362839579582214, "learning_rate": 1.344873828273778e-05, "loss": 0.4598, "step": 36644 }, { "epoch": 0.7771839409556531, "grad_norm": 0.3869338929653168, "learning_rate": 1.3448425245750089e-05, "loss": 0.4446, "step": 36645 }, { "epoch": 0.7772051494135861, "grad_norm": 0.39825278520584106, "learning_rate": 1.3448112204927091e-05, "loss": 0.5306, "step": 36646 }, { "epoch": 0.7772263578715192, "grad_norm": 0.37417542934417725, "learning_rate": 1.3447799160269134e-05, "loss": 0.509, "step": 36647 }, { "epoch": 0.7772475663294521, "grad_norm": 0.3237341642379761, "learning_rate": 1.3447486111776566e-05, "loss": 0.4755, "step": 36648 }, { "epoch": 0.7772687747873852, "grad_norm": 0.3769162595272064, "learning_rate": 1.3447173059449727e-05, "loss": 0.5169, "step": 36649 }, { "epoch": 0.7772899832453183, "grad_norm": 0.3740783929824829, "learning_rate": 1.344686000328898e-05, "loss": 0.4882, "step": 36650 }, { "epoch": 0.7773111917032512, "grad_norm": 0.4101797640323639, "learning_rate": 1.3446546943294662e-05, "loss": 0.6284, "step": 36651 }, { "epoch": 0.7773324001611843, "grad_norm": 0.3654944896697998, "learning_rate": 1.3446233879467125e-05, "loss": 0.4952, "step": 36652 }, { "epoch": 0.7773536086191173, "grad_norm": 0.3571687340736389, "learning_rate": 1.3445920811806716e-05, "loss": 0.4263, "step": 36653 }, { "epoch": 0.7773748170770504, "grad_norm": 0.35829490423202515, "learning_rate": 1.3445607740313787e-05, "loss": 0.4631, "step": 36654 }, { "epoch": 0.7773960255349833, "grad_norm": 0.3709993064403534, "learning_rate": 1.3445294664988675e-05, "loss": 0.5155, "step": 36655 }, { "epoch": 0.7774172339929164, "grad_norm": 0.3718739449977875, "learning_rate": 1.3444981585831743e-05, "loss": 0.4239, "step": 36656 }, { "epoch": 0.7774384424508494, "grad_norm": 0.3286885917186737, "learning_rate": 1.344466850284333e-05, "loss": 0.4836, "step": 36657 }, { "epoch": 0.7774596509087824, "grad_norm": 0.3626621961593628, "learning_rate": 1.3444355416023786e-05, "loss": 0.5362, "step": 36658 }, { "epoch": 0.7774808593667154, "grad_norm": 0.3666183352470398, "learning_rate": 1.3444042325373463e-05, "loss": 0.5031, "step": 36659 }, { "epoch": 0.7775020678246485, "grad_norm": 0.40385597944259644, "learning_rate": 1.3443729230892703e-05, "loss": 0.4538, "step": 36660 }, { "epoch": 0.7775232762825814, "grad_norm": 0.34048330783843994, "learning_rate": 1.3443416132581859e-05, "loss": 0.3729, "step": 36661 }, { "epoch": 0.7775444847405145, "grad_norm": 0.3481951057910919, "learning_rate": 1.3443103030441276e-05, "loss": 0.4837, "step": 36662 }, { "epoch": 0.7775656931984476, "grad_norm": 0.4136093556880951, "learning_rate": 1.3442789924471304e-05, "loss": 0.5244, "step": 36663 }, { "epoch": 0.7775869016563806, "grad_norm": 0.3782610595226288, "learning_rate": 1.3442476814672292e-05, "loss": 0.5379, "step": 36664 }, { "epoch": 0.7776081101143136, "grad_norm": 0.3743284344673157, "learning_rate": 1.3442163701044588e-05, "loss": 0.4797, "step": 36665 }, { "epoch": 0.7776293185722466, "grad_norm": 0.3746299147605896, "learning_rate": 1.3441850583588539e-05, "loss": 0.4179, "step": 36666 }, { "epoch": 0.7776505270301797, "grad_norm": 0.3759208023548126, "learning_rate": 1.3441537462304492e-05, "loss": 0.4877, "step": 36667 }, { "epoch": 0.7776717354881126, "grad_norm": 0.3442087769508362, "learning_rate": 1.3441224337192798e-05, "loss": 0.3734, "step": 36668 }, { "epoch": 0.7776929439460457, "grad_norm": 0.41813960671424866, "learning_rate": 1.3440911208253801e-05, "loss": 0.4265, "step": 36669 }, { "epoch": 0.7777141524039787, "grad_norm": 0.3866405189037323, "learning_rate": 1.3440598075487857e-05, "loss": 0.4616, "step": 36670 }, { "epoch": 0.7777353608619118, "grad_norm": 0.354870080947876, "learning_rate": 1.3440284938895309e-05, "loss": 0.5272, "step": 36671 }, { "epoch": 0.7777565693198447, "grad_norm": 0.32369980216026306, "learning_rate": 1.3439971798476503e-05, "loss": 0.4355, "step": 36672 }, { "epoch": 0.7777777777777778, "grad_norm": 0.36834070086479187, "learning_rate": 1.3439658654231794e-05, "loss": 0.5777, "step": 36673 }, { "epoch": 0.7777989862357108, "grad_norm": 0.37609249353408813, "learning_rate": 1.3439345506161522e-05, "loss": 0.4471, "step": 36674 }, { "epoch": 0.7778201946936438, "grad_norm": 0.3849092423915863, "learning_rate": 1.3439032354266044e-05, "loss": 0.4814, "step": 36675 }, { "epoch": 0.7778414031515768, "grad_norm": 0.3972834646701813, "learning_rate": 1.3438719198545705e-05, "loss": 0.4804, "step": 36676 }, { "epoch": 0.7778626116095099, "grad_norm": 0.35660454630851746, "learning_rate": 1.3438406039000849e-05, "loss": 0.436, "step": 36677 }, { "epoch": 0.7778838200674429, "grad_norm": 0.38039445877075195, "learning_rate": 1.3438092875631828e-05, "loss": 0.5042, "step": 36678 }, { "epoch": 0.7779050285253759, "grad_norm": 0.39886710047721863, "learning_rate": 1.3437779708438992e-05, "loss": 0.522, "step": 36679 }, { "epoch": 0.777926236983309, "grad_norm": 0.45102399587631226, "learning_rate": 1.3437466537422686e-05, "loss": 0.5452, "step": 36680 }, { "epoch": 0.777947445441242, "grad_norm": 0.40986186265945435, "learning_rate": 1.3437153362583257e-05, "loss": 0.4892, "step": 36681 }, { "epoch": 0.777968653899175, "grad_norm": 0.34877753257751465, "learning_rate": 1.343684018392106e-05, "loss": 0.5526, "step": 36682 }, { "epoch": 0.777989862357108, "grad_norm": 0.38823026418685913, "learning_rate": 1.3436527001436437e-05, "loss": 0.4206, "step": 36683 }, { "epoch": 0.7780110708150411, "grad_norm": 0.4132840633392334, "learning_rate": 1.343621381512974e-05, "loss": 0.4867, "step": 36684 }, { "epoch": 0.778032279272974, "grad_norm": 0.3653031289577484, "learning_rate": 1.3435900625001317e-05, "loss": 0.4322, "step": 36685 }, { "epoch": 0.7780534877309071, "grad_norm": 0.4895476698875427, "learning_rate": 1.3435587431051511e-05, "loss": 0.513, "step": 36686 }, { "epoch": 0.7780746961888401, "grad_norm": 0.5758644342422485, "learning_rate": 1.3435274233280677e-05, "loss": 0.4313, "step": 36687 }, { "epoch": 0.7780959046467731, "grad_norm": 0.3829924762248993, "learning_rate": 1.343496103168916e-05, "loss": 0.5483, "step": 36688 }, { "epoch": 0.7781171131047061, "grad_norm": 0.36099356412887573, "learning_rate": 1.343464782627731e-05, "loss": 0.5409, "step": 36689 }, { "epoch": 0.7781383215626392, "grad_norm": 0.36931857466697693, "learning_rate": 1.3434334617045475e-05, "loss": 0.5104, "step": 36690 }, { "epoch": 0.7781595300205723, "grad_norm": 0.3704158067703247, "learning_rate": 1.3434021403994e-05, "loss": 0.6189, "step": 36691 }, { "epoch": 0.7781807384785052, "grad_norm": 0.42335522174835205, "learning_rate": 1.343370818712324e-05, "loss": 0.4351, "step": 36692 }, { "epoch": 0.7782019469364383, "grad_norm": 0.3337576687335968, "learning_rate": 1.343339496643354e-05, "loss": 0.5561, "step": 36693 }, { "epoch": 0.7782231553943713, "grad_norm": 0.3503884971141815, "learning_rate": 1.3433081741925245e-05, "loss": 0.486, "step": 36694 }, { "epoch": 0.7782443638523043, "grad_norm": 0.4226480722427368, "learning_rate": 1.3432768513598704e-05, "loss": 0.5829, "step": 36695 }, { "epoch": 0.7782655723102373, "grad_norm": 0.3956000804901123, "learning_rate": 1.3432455281454274e-05, "loss": 0.4549, "step": 36696 }, { "epoch": 0.7782867807681704, "grad_norm": 0.3343712091445923, "learning_rate": 1.3432142045492291e-05, "loss": 0.4678, "step": 36697 }, { "epoch": 0.7783079892261033, "grad_norm": 0.3766941726207733, "learning_rate": 1.3431828805713114e-05, "loss": 0.4285, "step": 36698 }, { "epoch": 0.7783291976840364, "grad_norm": 0.34675082564353943, "learning_rate": 1.3431515562117086e-05, "loss": 0.4815, "step": 36699 }, { "epoch": 0.7783504061419694, "grad_norm": 0.3573678433895111, "learning_rate": 1.3431202314704553e-05, "loss": 0.4174, "step": 36700 }, { "epoch": 0.7783716145999025, "grad_norm": 0.39389756321907043, "learning_rate": 1.3430889063475868e-05, "loss": 0.5157, "step": 36701 }, { "epoch": 0.7783928230578354, "grad_norm": 0.3521462678909302, "learning_rate": 1.343057580843138e-05, "loss": 0.525, "step": 36702 }, { "epoch": 0.7784140315157685, "grad_norm": 0.35726338624954224, "learning_rate": 1.3430262549571436e-05, "loss": 0.4943, "step": 36703 }, { "epoch": 0.7784352399737016, "grad_norm": 0.37993988394737244, "learning_rate": 1.3429949286896382e-05, "loss": 0.5151, "step": 36704 }, { "epoch": 0.7784564484316345, "grad_norm": 0.34805500507354736, "learning_rate": 1.3429636020406566e-05, "loss": 0.4672, "step": 36705 }, { "epoch": 0.7784776568895676, "grad_norm": 0.3644105792045593, "learning_rate": 1.3429322750102343e-05, "loss": 0.5414, "step": 36706 }, { "epoch": 0.7784988653475006, "grad_norm": 0.3431795835494995, "learning_rate": 1.3429009475984053e-05, "loss": 0.4748, "step": 36707 }, { "epoch": 0.7785200738054336, "grad_norm": 0.3666776418685913, "learning_rate": 1.3428696198052054e-05, "loss": 0.4965, "step": 36708 }, { "epoch": 0.7785412822633666, "grad_norm": 0.3540116548538208, "learning_rate": 1.3428382916306682e-05, "loss": 0.502, "step": 36709 }, { "epoch": 0.7785624907212997, "grad_norm": 0.3699757754802704, "learning_rate": 1.3428069630748296e-05, "loss": 0.5741, "step": 36710 }, { "epoch": 0.7785836991792326, "grad_norm": 0.33873459696769714, "learning_rate": 1.3427756341377241e-05, "loss": 0.4774, "step": 36711 }, { "epoch": 0.7786049076371657, "grad_norm": 0.33504071831703186, "learning_rate": 1.3427443048193865e-05, "loss": 0.4671, "step": 36712 }, { "epoch": 0.7786261160950987, "grad_norm": 0.5541760325431824, "learning_rate": 1.3427129751198515e-05, "loss": 0.5696, "step": 36713 }, { "epoch": 0.7786473245530318, "grad_norm": 0.34790724515914917, "learning_rate": 1.3426816450391542e-05, "loss": 0.5045, "step": 36714 }, { "epoch": 0.7786685330109647, "grad_norm": 0.3165592849254608, "learning_rate": 1.3426503145773295e-05, "loss": 0.5103, "step": 36715 }, { "epoch": 0.7786897414688978, "grad_norm": 0.3429219424724579, "learning_rate": 1.3426189837344122e-05, "loss": 0.5008, "step": 36716 }, { "epoch": 0.7787109499268309, "grad_norm": 0.34820377826690674, "learning_rate": 1.3425876525104365e-05, "loss": 0.4685, "step": 36717 }, { "epoch": 0.7787321583847638, "grad_norm": 0.3790334463119507, "learning_rate": 1.3425563209054382e-05, "loss": 0.5691, "step": 36718 }, { "epoch": 0.7787533668426969, "grad_norm": 0.35111525654792786, "learning_rate": 1.3425249889194516e-05, "loss": 0.4694, "step": 36719 }, { "epoch": 0.7787745753006299, "grad_norm": 0.3891501724720001, "learning_rate": 1.3424936565525116e-05, "loss": 0.5131, "step": 36720 }, { "epoch": 0.778795783758563, "grad_norm": 0.38224512338638306, "learning_rate": 1.3424623238046533e-05, "loss": 0.535, "step": 36721 }, { "epoch": 0.7788169922164959, "grad_norm": 0.3955267667770386, "learning_rate": 1.3424309906759115e-05, "loss": 0.4627, "step": 36722 }, { "epoch": 0.778838200674429, "grad_norm": 0.5035564303398132, "learning_rate": 1.3423996571663205e-05, "loss": 0.56, "step": 36723 }, { "epoch": 0.778859409132362, "grad_norm": 0.43084120750427246, "learning_rate": 1.342368323275916e-05, "loss": 0.5204, "step": 36724 }, { "epoch": 0.778880617590295, "grad_norm": 0.4175855219364166, "learning_rate": 1.3423369890047321e-05, "loss": 0.4625, "step": 36725 }, { "epoch": 0.778901826048228, "grad_norm": 0.35700738430023193, "learning_rate": 1.3423056543528042e-05, "loss": 0.4857, "step": 36726 }, { "epoch": 0.7789230345061611, "grad_norm": 0.36773791909217834, "learning_rate": 1.3422743193201668e-05, "loss": 0.4932, "step": 36727 }, { "epoch": 0.778944242964094, "grad_norm": 0.3273827135562897, "learning_rate": 1.3422429839068549e-05, "loss": 0.4813, "step": 36728 }, { "epoch": 0.7789654514220271, "grad_norm": 0.4386965334415436, "learning_rate": 1.3422116481129034e-05, "loss": 0.5006, "step": 36729 }, { "epoch": 0.7789866598799601, "grad_norm": 0.3317979574203491, "learning_rate": 1.3421803119383472e-05, "loss": 0.4718, "step": 36730 }, { "epoch": 0.7790078683378932, "grad_norm": 0.3669195771217346, "learning_rate": 1.3421489753832206e-05, "loss": 0.4513, "step": 36731 }, { "epoch": 0.7790290767958262, "grad_norm": 0.46832475066185, "learning_rate": 1.3421176384475592e-05, "loss": 0.4796, "step": 36732 }, { "epoch": 0.7790502852537592, "grad_norm": 0.36781978607177734, "learning_rate": 1.3420863011313976e-05, "loss": 0.4763, "step": 36733 }, { "epoch": 0.7790714937116923, "grad_norm": 0.3619948625564575, "learning_rate": 1.3420549634347704e-05, "loss": 0.4771, "step": 36734 }, { "epoch": 0.7790927021696252, "grad_norm": 0.6231457591056824, "learning_rate": 1.3420236253577128e-05, "loss": 0.5274, "step": 36735 }, { "epoch": 0.7791139106275583, "grad_norm": 0.5687523484230042, "learning_rate": 1.3419922869002595e-05, "loss": 0.4571, "step": 36736 }, { "epoch": 0.7791351190854913, "grad_norm": 0.36117228865623474, "learning_rate": 1.3419609480624452e-05, "loss": 0.5725, "step": 36737 }, { "epoch": 0.7791563275434243, "grad_norm": 0.39859694242477417, "learning_rate": 1.341929608844305e-05, "loss": 0.4956, "step": 36738 }, { "epoch": 0.7791775360013573, "grad_norm": 0.37398645281791687, "learning_rate": 1.3418982692458735e-05, "loss": 0.5891, "step": 36739 }, { "epoch": 0.7791987444592904, "grad_norm": 0.3911178708076477, "learning_rate": 1.3418669292671855e-05, "loss": 0.4782, "step": 36740 }, { "epoch": 0.7792199529172233, "grad_norm": 0.402778685092926, "learning_rate": 1.3418355889082767e-05, "loss": 0.5425, "step": 36741 }, { "epoch": 0.7792411613751564, "grad_norm": 0.4419819712638855, "learning_rate": 1.3418042481691809e-05, "loss": 0.514, "step": 36742 }, { "epoch": 0.7792623698330894, "grad_norm": 0.39495649933815, "learning_rate": 1.3417729070499336e-05, "loss": 0.4801, "step": 36743 }, { "epoch": 0.7792835782910225, "grad_norm": 0.47759440541267395, "learning_rate": 1.3417415655505695e-05, "loss": 0.6529, "step": 36744 }, { "epoch": 0.7793047867489555, "grad_norm": 0.35116106271743774, "learning_rate": 1.341710223671123e-05, "loss": 0.4932, "step": 36745 }, { "epoch": 0.7793259952068885, "grad_norm": 0.44136425852775574, "learning_rate": 1.3416788814116297e-05, "loss": 0.5102, "step": 36746 }, { "epoch": 0.7793472036648216, "grad_norm": 0.3803746998310089, "learning_rate": 1.341647538772124e-05, "loss": 0.5196, "step": 36747 }, { "epoch": 0.7793684121227545, "grad_norm": 0.3834289312362671, "learning_rate": 1.341616195752641e-05, "loss": 0.506, "step": 36748 }, { "epoch": 0.7793896205806876, "grad_norm": 0.3927372992038727, "learning_rate": 1.3415848523532153e-05, "loss": 0.3901, "step": 36749 }, { "epoch": 0.7794108290386206, "grad_norm": 0.35689738392829895, "learning_rate": 1.341553508573882e-05, "loss": 0.4635, "step": 36750 }, { "epoch": 0.7794320374965537, "grad_norm": 0.33692193031311035, "learning_rate": 1.3415221644146756e-05, "loss": 0.486, "step": 36751 }, { "epoch": 0.7794532459544866, "grad_norm": 0.3603801429271698, "learning_rate": 1.3414908198756313e-05, "loss": 0.4647, "step": 36752 }, { "epoch": 0.7794744544124197, "grad_norm": 0.38605210185050964, "learning_rate": 1.3414594749567841e-05, "loss": 0.5182, "step": 36753 }, { "epoch": 0.7794956628703527, "grad_norm": 0.3607282340526581, "learning_rate": 1.3414281296581686e-05, "loss": 0.4829, "step": 36754 }, { "epoch": 0.7795168713282857, "grad_norm": 0.376808226108551, "learning_rate": 1.3413967839798197e-05, "loss": 0.4869, "step": 36755 }, { "epoch": 0.7795380797862187, "grad_norm": 0.32525014877319336, "learning_rate": 1.341365437921772e-05, "loss": 0.4589, "step": 36756 }, { "epoch": 0.7795592882441518, "grad_norm": 0.3499499261379242, "learning_rate": 1.341334091484061e-05, "loss": 0.4632, "step": 36757 }, { "epoch": 0.7795804967020848, "grad_norm": 0.35540977120399475, "learning_rate": 1.3413027446667208e-05, "loss": 0.5015, "step": 36758 }, { "epoch": 0.7796017051600178, "grad_norm": 0.4490174949169159, "learning_rate": 1.3412713974697868e-05, "loss": 0.4318, "step": 36759 }, { "epoch": 0.7796229136179509, "grad_norm": 0.3716842830181122, "learning_rate": 1.3412400498932939e-05, "loss": 0.4597, "step": 36760 }, { "epoch": 0.7796441220758838, "grad_norm": 0.3517615497112274, "learning_rate": 1.3412087019372768e-05, "loss": 0.5686, "step": 36761 }, { "epoch": 0.7796653305338169, "grad_norm": 0.416548490524292, "learning_rate": 1.3411773536017701e-05, "loss": 0.4839, "step": 36762 }, { "epoch": 0.7796865389917499, "grad_norm": 0.37426069378852844, "learning_rate": 1.341146004886809e-05, "loss": 0.6128, "step": 36763 }, { "epoch": 0.779707747449683, "grad_norm": 0.3948383033275604, "learning_rate": 1.3411146557924282e-05, "loss": 0.4639, "step": 36764 }, { "epoch": 0.7797289559076159, "grad_norm": 0.41572028398513794, "learning_rate": 1.3410833063186626e-05, "loss": 0.536, "step": 36765 }, { "epoch": 0.779750164365549, "grad_norm": 0.4090016186237335, "learning_rate": 1.3410519564655473e-05, "loss": 0.4815, "step": 36766 }, { "epoch": 0.779771372823482, "grad_norm": 0.3315553367137909, "learning_rate": 1.341020606233117e-05, "loss": 0.4489, "step": 36767 }, { "epoch": 0.779792581281415, "grad_norm": 0.32189154624938965, "learning_rate": 1.3409892556214063e-05, "loss": 0.4282, "step": 36768 }, { "epoch": 0.779813789739348, "grad_norm": 0.37066954374313354, "learning_rate": 1.3409579046304507e-05, "loss": 0.5545, "step": 36769 }, { "epoch": 0.7798349981972811, "grad_norm": 0.4303666949272156, "learning_rate": 1.3409265532602845e-05, "loss": 0.4987, "step": 36770 }, { "epoch": 0.779856206655214, "grad_norm": 0.48973548412323, "learning_rate": 1.3408952015109425e-05, "loss": 0.471, "step": 36771 }, { "epoch": 0.7798774151131471, "grad_norm": 0.35634151101112366, "learning_rate": 1.3408638493824599e-05, "loss": 0.5012, "step": 36772 }, { "epoch": 0.7798986235710802, "grad_norm": 0.36331838369369507, "learning_rate": 1.3408324968748716e-05, "loss": 0.5088, "step": 36773 }, { "epoch": 0.7799198320290132, "grad_norm": 0.3435247540473938, "learning_rate": 1.3408011439882125e-05, "loss": 0.4604, "step": 36774 }, { "epoch": 0.7799410404869462, "grad_norm": 0.35041412711143494, "learning_rate": 1.3407697907225172e-05, "loss": 0.4874, "step": 36775 }, { "epoch": 0.7799622489448792, "grad_norm": 0.4341718554496765, "learning_rate": 1.3407384370778208e-05, "loss": 0.5299, "step": 36776 }, { "epoch": 0.7799834574028123, "grad_norm": 0.3595338463783264, "learning_rate": 1.3407070830541577e-05, "loss": 0.4987, "step": 36777 }, { "epoch": 0.7800046658607452, "grad_norm": 0.43428391218185425, "learning_rate": 1.3406757286515634e-05, "loss": 0.5345, "step": 36778 }, { "epoch": 0.7800258743186783, "grad_norm": 0.3832826018333435, "learning_rate": 1.3406443738700725e-05, "loss": 0.5371, "step": 36779 }, { "epoch": 0.7800470827766113, "grad_norm": 0.5031601786613464, "learning_rate": 1.34061301870972e-05, "loss": 0.4957, "step": 36780 }, { "epoch": 0.7800682912345444, "grad_norm": 0.3245565891265869, "learning_rate": 1.3405816631705407e-05, "loss": 0.4231, "step": 36781 }, { "epoch": 0.7800894996924773, "grad_norm": 0.31190264225006104, "learning_rate": 1.3405503072525692e-05, "loss": 0.472, "step": 36782 }, { "epoch": 0.7801107081504104, "grad_norm": 0.4033787250518799, "learning_rate": 1.3405189509558407e-05, "loss": 0.5254, "step": 36783 }, { "epoch": 0.7801319166083434, "grad_norm": 0.3607158362865448, "learning_rate": 1.3404875942803899e-05, "loss": 0.4662, "step": 36784 }, { "epoch": 0.7801531250662764, "grad_norm": 0.30967891216278076, "learning_rate": 1.3404562372262517e-05, "loss": 0.542, "step": 36785 }, { "epoch": 0.7801743335242095, "grad_norm": 0.3521602153778076, "learning_rate": 1.3404248797934614e-05, "loss": 0.4911, "step": 36786 }, { "epoch": 0.7801955419821425, "grad_norm": 0.3354438543319702, "learning_rate": 1.3403935219820531e-05, "loss": 0.4669, "step": 36787 }, { "epoch": 0.7802167504400755, "grad_norm": 0.3463050425052643, "learning_rate": 1.3403621637920624e-05, "loss": 0.5082, "step": 36788 }, { "epoch": 0.7802379588980085, "grad_norm": 0.38879191875457764, "learning_rate": 1.3403308052235237e-05, "loss": 0.5791, "step": 36789 }, { "epoch": 0.7802591673559416, "grad_norm": 0.3385665714740753, "learning_rate": 1.340299446276472e-05, "loss": 0.4994, "step": 36790 }, { "epoch": 0.7802803758138745, "grad_norm": 0.4293522536754608, "learning_rate": 1.3402680869509422e-05, "loss": 0.4983, "step": 36791 }, { "epoch": 0.7803015842718076, "grad_norm": 0.3576371371746063, "learning_rate": 1.3402367272469695e-05, "loss": 0.4839, "step": 36792 }, { "epoch": 0.7803227927297406, "grad_norm": 0.3467964231967926, "learning_rate": 1.3402053671645878e-05, "loss": 0.4743, "step": 36793 }, { "epoch": 0.7803440011876737, "grad_norm": 0.38156139850616455, "learning_rate": 1.3401740067038333e-05, "loss": 0.5395, "step": 36794 }, { "epoch": 0.7803652096456066, "grad_norm": 0.3310549259185791, "learning_rate": 1.3401426458647402e-05, "loss": 0.4283, "step": 36795 }, { "epoch": 0.7803864181035397, "grad_norm": 0.4021952748298645, "learning_rate": 1.340111284647343e-05, "loss": 0.4711, "step": 36796 }, { "epoch": 0.7804076265614727, "grad_norm": 0.3342845141887665, "learning_rate": 1.340079923051677e-05, "loss": 0.4118, "step": 36797 }, { "epoch": 0.7804288350194057, "grad_norm": 0.4871848225593567, "learning_rate": 1.3400485610777774e-05, "loss": 0.4011, "step": 36798 }, { "epoch": 0.7804500434773388, "grad_norm": 0.3309932053089142, "learning_rate": 1.3400171987256785e-05, "loss": 0.4935, "step": 36799 }, { "epoch": 0.7804712519352718, "grad_norm": 0.3811372220516205, "learning_rate": 1.3399858359954156e-05, "loss": 0.5487, "step": 36800 }, { "epoch": 0.7804924603932049, "grad_norm": 0.37298938632011414, "learning_rate": 1.3399544728870231e-05, "loss": 0.4904, "step": 36801 }, { "epoch": 0.7805136688511378, "grad_norm": 0.3619687259197235, "learning_rate": 1.3399231094005365e-05, "loss": 0.5551, "step": 36802 }, { "epoch": 0.7805348773090709, "grad_norm": 0.36485105752944946, "learning_rate": 1.3398917455359903e-05, "loss": 0.5665, "step": 36803 }, { "epoch": 0.7805560857670039, "grad_norm": 0.3394120931625366, "learning_rate": 1.3398603812934195e-05, "loss": 0.43, "step": 36804 }, { "epoch": 0.7805772942249369, "grad_norm": 0.32586705684661865, "learning_rate": 1.3398290166728587e-05, "loss": 0.4074, "step": 36805 }, { "epoch": 0.7805985026828699, "grad_norm": 0.3680173456668854, "learning_rate": 1.3397976516743434e-05, "loss": 0.4865, "step": 36806 }, { "epoch": 0.780619711140803, "grad_norm": 0.44758397340774536, "learning_rate": 1.3397662862979078e-05, "loss": 0.5091, "step": 36807 }, { "epoch": 0.7806409195987359, "grad_norm": 0.32287558913230896, "learning_rate": 1.3397349205435871e-05, "loss": 0.4074, "step": 36808 }, { "epoch": 0.780662128056669, "grad_norm": 0.33728843927383423, "learning_rate": 1.3397035544114163e-05, "loss": 0.4624, "step": 36809 }, { "epoch": 0.780683336514602, "grad_norm": 1.521655797958374, "learning_rate": 1.3396721879014298e-05, "loss": 0.4761, "step": 36810 }, { "epoch": 0.780704544972535, "grad_norm": 0.3124537467956543, "learning_rate": 1.3396408210136636e-05, "loss": 0.4032, "step": 36811 }, { "epoch": 0.780725753430468, "grad_norm": 0.3533584475517273, "learning_rate": 1.3396094537481515e-05, "loss": 0.4657, "step": 36812 }, { "epoch": 0.7807469618884011, "grad_norm": 0.35365065932273865, "learning_rate": 1.3395780861049284e-05, "loss": 0.438, "step": 36813 }, { "epoch": 0.7807681703463342, "grad_norm": 0.3687933683395386, "learning_rate": 1.3395467180840296e-05, "loss": 0.451, "step": 36814 }, { "epoch": 0.7807893788042671, "grad_norm": 0.37784379720687866, "learning_rate": 1.33951534968549e-05, "loss": 0.4978, "step": 36815 }, { "epoch": 0.7808105872622002, "grad_norm": 0.4427775740623474, "learning_rate": 1.3394839809093443e-05, "loss": 0.4692, "step": 36816 }, { "epoch": 0.7808317957201332, "grad_norm": 0.4538133144378662, "learning_rate": 1.3394526117556277e-05, "loss": 0.5153, "step": 36817 }, { "epoch": 0.7808530041780662, "grad_norm": 0.3935014307498932, "learning_rate": 1.3394212422243746e-05, "loss": 0.5277, "step": 36818 }, { "epoch": 0.7808742126359992, "grad_norm": 0.41987088322639465, "learning_rate": 1.3393898723156202e-05, "loss": 0.3916, "step": 36819 }, { "epoch": 0.7808954210939323, "grad_norm": 0.39098045229911804, "learning_rate": 1.3393585020293994e-05, "loss": 0.4276, "step": 36820 }, { "epoch": 0.7809166295518652, "grad_norm": 0.5782681703567505, "learning_rate": 1.339327131365747e-05, "loss": 0.4656, "step": 36821 }, { "epoch": 0.7809378380097983, "grad_norm": 0.32584863901138306, "learning_rate": 1.3392957603246976e-05, "loss": 0.4918, "step": 36822 }, { "epoch": 0.7809590464677313, "grad_norm": 0.4034155011177063, "learning_rate": 1.3392643889062868e-05, "loss": 0.5171, "step": 36823 }, { "epoch": 0.7809802549256644, "grad_norm": 0.3690396845340729, "learning_rate": 1.339233017110549e-05, "loss": 0.5191, "step": 36824 }, { "epoch": 0.7810014633835973, "grad_norm": 0.48770564794540405, "learning_rate": 1.3392016449375192e-05, "loss": 0.5176, "step": 36825 }, { "epoch": 0.7810226718415304, "grad_norm": 0.3643956780433655, "learning_rate": 1.3391702723872324e-05, "loss": 0.5695, "step": 36826 }, { "epoch": 0.7810438802994635, "grad_norm": 0.4645809233188629, "learning_rate": 1.3391388994597233e-05, "loss": 0.5031, "step": 36827 }, { "epoch": 0.7810650887573964, "grad_norm": 0.3212297558784485, "learning_rate": 1.3391075261550265e-05, "loss": 0.5147, "step": 36828 }, { "epoch": 0.7810862972153295, "grad_norm": 0.40470463037490845, "learning_rate": 1.3390761524731777e-05, "loss": 0.5411, "step": 36829 }, { "epoch": 0.7811075056732625, "grad_norm": 0.34545114636421204, "learning_rate": 1.3390447784142111e-05, "loss": 0.4091, "step": 36830 }, { "epoch": 0.7811287141311956, "grad_norm": 0.4112236201763153, "learning_rate": 1.339013403978162e-05, "loss": 0.5183, "step": 36831 }, { "epoch": 0.7811499225891285, "grad_norm": 0.36564090847969055, "learning_rate": 1.3389820291650652e-05, "loss": 0.5221, "step": 36832 }, { "epoch": 0.7811711310470616, "grad_norm": 0.3331565260887146, "learning_rate": 1.3389506539749553e-05, "loss": 0.4417, "step": 36833 }, { "epoch": 0.7811923395049946, "grad_norm": 0.4038558006286621, "learning_rate": 1.3389192784078676e-05, "loss": 0.4845, "step": 36834 }, { "epoch": 0.7812135479629276, "grad_norm": 0.3689582943916321, "learning_rate": 1.3388879024638366e-05, "loss": 0.5228, "step": 36835 }, { "epoch": 0.7812347564208606, "grad_norm": 0.32164570689201355, "learning_rate": 1.3388565261428974e-05, "loss": 0.4253, "step": 36836 }, { "epoch": 0.7812559648787937, "grad_norm": 0.3271166682243347, "learning_rate": 1.3388251494450853e-05, "loss": 0.5237, "step": 36837 }, { "epoch": 0.7812771733367266, "grad_norm": 0.34632349014282227, "learning_rate": 1.3387937723704346e-05, "loss": 0.501, "step": 36838 }, { "epoch": 0.7812983817946597, "grad_norm": 0.38633450865745544, "learning_rate": 1.3387623949189803e-05, "loss": 0.5073, "step": 36839 }, { "epoch": 0.7813195902525928, "grad_norm": 0.37628787755966187, "learning_rate": 1.3387310170907576e-05, "loss": 0.528, "step": 36840 }, { "epoch": 0.7813407987105258, "grad_norm": 0.32590559124946594, "learning_rate": 1.338699638885801e-05, "loss": 0.4815, "step": 36841 }, { "epoch": 0.7813620071684588, "grad_norm": 0.3892533779144287, "learning_rate": 1.3386682603041457e-05, "loss": 0.4823, "step": 36842 }, { "epoch": 0.7813832156263918, "grad_norm": 0.45308494567871094, "learning_rate": 1.3386368813458266e-05, "loss": 0.6351, "step": 36843 }, { "epoch": 0.7814044240843249, "grad_norm": 0.4010372757911682, "learning_rate": 1.3386055020108783e-05, "loss": 0.4959, "step": 36844 }, { "epoch": 0.7814256325422578, "grad_norm": 0.37769806385040283, "learning_rate": 1.3385741222993363e-05, "loss": 0.5572, "step": 36845 }, { "epoch": 0.7814468410001909, "grad_norm": 0.33808761835098267, "learning_rate": 1.3385427422112347e-05, "loss": 0.528, "step": 36846 }, { "epoch": 0.7814680494581239, "grad_norm": 0.35971373319625854, "learning_rate": 1.338511361746609e-05, "loss": 0.4973, "step": 36847 }, { "epoch": 0.781489257916057, "grad_norm": 0.3324936032295227, "learning_rate": 1.3384799809054937e-05, "loss": 0.4414, "step": 36848 }, { "epoch": 0.7815104663739899, "grad_norm": 0.4000537693500519, "learning_rate": 1.3384485996879241e-05, "loss": 0.4534, "step": 36849 }, { "epoch": 0.781531674831923, "grad_norm": 0.36054903268814087, "learning_rate": 1.3384172180939348e-05, "loss": 0.4183, "step": 36850 }, { "epoch": 0.781552883289856, "grad_norm": 0.37904131412506104, "learning_rate": 1.3383858361235609e-05, "loss": 0.4083, "step": 36851 }, { "epoch": 0.781574091747789, "grad_norm": 0.361752450466156, "learning_rate": 1.3383544537768372e-05, "loss": 0.4526, "step": 36852 }, { "epoch": 0.781595300205722, "grad_norm": 0.3430555462837219, "learning_rate": 1.3383230710537987e-05, "loss": 0.5285, "step": 36853 }, { "epoch": 0.7816165086636551, "grad_norm": 0.32202428579330444, "learning_rate": 1.33829168795448e-05, "loss": 0.461, "step": 36854 }, { "epoch": 0.7816377171215881, "grad_norm": 0.34210488200187683, "learning_rate": 1.3382603044789162e-05, "loss": 0.542, "step": 36855 }, { "epoch": 0.7816589255795211, "grad_norm": 0.4112038016319275, "learning_rate": 1.3382289206271424e-05, "loss": 0.5454, "step": 36856 }, { "epoch": 0.7816801340374542, "grad_norm": 0.3360079824924469, "learning_rate": 1.3381975363991934e-05, "loss": 0.4628, "step": 36857 }, { "epoch": 0.7817013424953871, "grad_norm": 0.3792082667350769, "learning_rate": 1.338166151795104e-05, "loss": 0.5548, "step": 36858 }, { "epoch": 0.7817225509533202, "grad_norm": 0.45803043246269226, "learning_rate": 1.338134766814909e-05, "loss": 0.536, "step": 36859 }, { "epoch": 0.7817437594112532, "grad_norm": 0.3842640519142151, "learning_rate": 1.3381033814586437e-05, "loss": 0.4401, "step": 36860 }, { "epoch": 0.7817649678691863, "grad_norm": 0.404441237449646, "learning_rate": 1.3380719957263422e-05, "loss": 0.4933, "step": 36861 }, { "epoch": 0.7817861763271192, "grad_norm": 0.3642195761203766, "learning_rate": 1.3380406096180404e-05, "loss": 0.5441, "step": 36862 }, { "epoch": 0.7818073847850523, "grad_norm": 0.3306584358215332, "learning_rate": 1.338009223133773e-05, "loss": 0.4806, "step": 36863 }, { "epoch": 0.7818285932429853, "grad_norm": 0.3604968786239624, "learning_rate": 1.3379778362735742e-05, "loss": 0.4427, "step": 36864 }, { "epoch": 0.7818498017009183, "grad_norm": 0.3797883987426758, "learning_rate": 1.3379464490374796e-05, "loss": 0.5017, "step": 36865 }, { "epoch": 0.7818710101588513, "grad_norm": 0.34137287735939026, "learning_rate": 1.337915061425524e-05, "loss": 0.4997, "step": 36866 }, { "epoch": 0.7818922186167844, "grad_norm": 0.6729121804237366, "learning_rate": 1.3378836734377418e-05, "loss": 0.5037, "step": 36867 }, { "epoch": 0.7819134270747174, "grad_norm": 0.44195330142974854, "learning_rate": 1.3378522850741687e-05, "loss": 0.5573, "step": 36868 }, { "epoch": 0.7819346355326504, "grad_norm": 0.3565727472305298, "learning_rate": 1.3378208963348391e-05, "loss": 0.545, "step": 36869 }, { "epoch": 0.7819558439905835, "grad_norm": 0.370881587266922, "learning_rate": 1.3377895072197879e-05, "loss": 0.4597, "step": 36870 }, { "epoch": 0.7819770524485165, "grad_norm": 0.3787948489189148, "learning_rate": 1.3377581177290506e-05, "loss": 0.5846, "step": 36871 }, { "epoch": 0.7819982609064495, "grad_norm": 0.3914567828178406, "learning_rate": 1.3377267278626614e-05, "loss": 0.4605, "step": 36872 }, { "epoch": 0.7820194693643825, "grad_norm": 0.37333524227142334, "learning_rate": 1.3376953376206553e-05, "loss": 0.4578, "step": 36873 }, { "epoch": 0.7820406778223156, "grad_norm": 0.3413902819156647, "learning_rate": 1.3376639470030675e-05, "loss": 0.5321, "step": 36874 }, { "epoch": 0.7820618862802485, "grad_norm": 0.3699204921722412, "learning_rate": 1.3376325560099326e-05, "loss": 0.4286, "step": 36875 }, { "epoch": 0.7820830947381816, "grad_norm": 0.3532646894454956, "learning_rate": 1.337601164641286e-05, "loss": 0.5196, "step": 36876 }, { "epoch": 0.7821043031961146, "grad_norm": 0.3968150317668915, "learning_rate": 1.3375697728971625e-05, "loss": 0.5036, "step": 36877 }, { "epoch": 0.7821255116540476, "grad_norm": 0.3563971519470215, "learning_rate": 1.3375383807775963e-05, "loss": 0.4866, "step": 36878 }, { "epoch": 0.7821467201119806, "grad_norm": 0.4550471901893616, "learning_rate": 1.3375069882826232e-05, "loss": 0.5066, "step": 36879 }, { "epoch": 0.7821679285699137, "grad_norm": 0.3263821005821228, "learning_rate": 1.3374755954122775e-05, "loss": 0.4129, "step": 36880 }, { "epoch": 0.7821891370278468, "grad_norm": 0.33305758237838745, "learning_rate": 1.3374442021665942e-05, "loss": 0.5347, "step": 36881 }, { "epoch": 0.7822103454857797, "grad_norm": 0.40578705072402954, "learning_rate": 1.3374128085456091e-05, "loss": 0.5614, "step": 36882 }, { "epoch": 0.7822315539437128, "grad_norm": 0.34496384859085083, "learning_rate": 1.3373814145493557e-05, "loss": 0.5265, "step": 36883 }, { "epoch": 0.7822527624016458, "grad_norm": 0.3725566267967224, "learning_rate": 1.3373500201778701e-05, "loss": 0.511, "step": 36884 }, { "epoch": 0.7822739708595788, "grad_norm": 0.3392026126384735, "learning_rate": 1.3373186254311867e-05, "loss": 0.4797, "step": 36885 }, { "epoch": 0.7822951793175118, "grad_norm": 0.37522703409194946, "learning_rate": 1.3372872303093404e-05, "loss": 0.5261, "step": 36886 }, { "epoch": 0.7823163877754449, "grad_norm": 0.3493022322654724, "learning_rate": 1.3372558348123658e-05, "loss": 0.5548, "step": 36887 }, { "epoch": 0.7823375962333778, "grad_norm": 0.3996219336986542, "learning_rate": 1.3372244389402987e-05, "loss": 0.4826, "step": 36888 }, { "epoch": 0.7823588046913109, "grad_norm": 0.3064785897731781, "learning_rate": 1.3371930426931731e-05, "loss": 0.4831, "step": 36889 }, { "epoch": 0.7823800131492439, "grad_norm": 0.405274361371994, "learning_rate": 1.3371616460710249e-05, "loss": 0.4542, "step": 36890 }, { "epoch": 0.782401221607177, "grad_norm": 0.3955746293067932, "learning_rate": 1.337130249073888e-05, "loss": 0.3678, "step": 36891 }, { "epoch": 0.7824224300651099, "grad_norm": 0.36126986145973206, "learning_rate": 1.3370988517017978e-05, "loss": 0.4766, "step": 36892 }, { "epoch": 0.782443638523043, "grad_norm": 0.32876405119895935, "learning_rate": 1.337067453954789e-05, "loss": 0.4607, "step": 36893 }, { "epoch": 0.782464846980976, "grad_norm": 0.34738102555274963, "learning_rate": 1.337036055832897e-05, "loss": 0.5241, "step": 36894 }, { "epoch": 0.782486055438909, "grad_norm": 0.45681068301200867, "learning_rate": 1.3370046573361564e-05, "loss": 0.4628, "step": 36895 }, { "epoch": 0.7825072638968421, "grad_norm": 0.5187800526618958, "learning_rate": 1.3369732584646022e-05, "loss": 0.4165, "step": 36896 }, { "epoch": 0.7825284723547751, "grad_norm": 0.383949339389801, "learning_rate": 1.336941859218269e-05, "loss": 0.5321, "step": 36897 }, { "epoch": 0.7825496808127081, "grad_norm": 0.3914070427417755, "learning_rate": 1.3369104595971922e-05, "loss": 0.5097, "step": 36898 }, { "epoch": 0.7825708892706411, "grad_norm": 0.36621400713920593, "learning_rate": 1.3368790596014065e-05, "loss": 0.5106, "step": 36899 }, { "epoch": 0.7825920977285742, "grad_norm": 0.34167200326919556, "learning_rate": 1.336847659230947e-05, "loss": 0.3969, "step": 36900 }, { "epoch": 0.7826133061865072, "grad_norm": 0.36733391880989075, "learning_rate": 1.3368162584858482e-05, "loss": 0.5596, "step": 36901 }, { "epoch": 0.7826345146444402, "grad_norm": 0.3411475121974945, "learning_rate": 1.3367848573661453e-05, "loss": 0.3203, "step": 36902 }, { "epoch": 0.7826557231023732, "grad_norm": 0.3757762908935547, "learning_rate": 1.3367534558718731e-05, "loss": 0.4922, "step": 36903 }, { "epoch": 0.7826769315603063, "grad_norm": 0.9489438533782959, "learning_rate": 1.336722054003067e-05, "loss": 0.4316, "step": 36904 }, { "epoch": 0.7826981400182392, "grad_norm": 0.3412937521934509, "learning_rate": 1.3366906517597613e-05, "loss": 0.4726, "step": 36905 }, { "epoch": 0.7827193484761723, "grad_norm": 0.3711864650249481, "learning_rate": 1.3366592491419912e-05, "loss": 0.5064, "step": 36906 }, { "epoch": 0.7827405569341053, "grad_norm": 0.4893273711204529, "learning_rate": 1.3366278461497916e-05, "loss": 0.5292, "step": 36907 }, { "epoch": 0.7827617653920383, "grad_norm": 0.36621060967445374, "learning_rate": 1.3365964427831976e-05, "loss": 0.5252, "step": 36908 }, { "epoch": 0.7827829738499714, "grad_norm": 0.39502424001693726, "learning_rate": 1.3365650390422437e-05, "loss": 0.5045, "step": 36909 }, { "epoch": 0.7828041823079044, "grad_norm": 0.3568789064884186, "learning_rate": 1.3365336349269652e-05, "loss": 0.4734, "step": 36910 }, { "epoch": 0.7828253907658375, "grad_norm": 0.3660382032394409, "learning_rate": 1.336502230437397e-05, "loss": 0.5153, "step": 36911 }, { "epoch": 0.7828465992237704, "grad_norm": 0.5527909398078918, "learning_rate": 1.3364708255735738e-05, "loss": 0.4059, "step": 36912 }, { "epoch": 0.7828678076817035, "grad_norm": 0.36320966482162476, "learning_rate": 1.3364394203355308e-05, "loss": 0.5016, "step": 36913 }, { "epoch": 0.7828890161396365, "grad_norm": 0.38158118724823, "learning_rate": 1.3364080147233029e-05, "loss": 0.5503, "step": 36914 }, { "epoch": 0.7829102245975695, "grad_norm": 0.3233150839805603, "learning_rate": 1.3363766087369246e-05, "loss": 0.4234, "step": 36915 }, { "epoch": 0.7829314330555025, "grad_norm": 0.46630656719207764, "learning_rate": 1.3363452023764314e-05, "loss": 0.5014, "step": 36916 }, { "epoch": 0.7829526415134356, "grad_norm": 0.43322357535362244, "learning_rate": 1.336313795641858e-05, "loss": 0.523, "step": 36917 }, { "epoch": 0.7829738499713685, "grad_norm": 0.41696739196777344, "learning_rate": 1.336282388533239e-05, "loss": 0.5258, "step": 36918 }, { "epoch": 0.7829950584293016, "grad_norm": 0.4328001141548157, "learning_rate": 1.33625098105061e-05, "loss": 0.5029, "step": 36919 }, { "epoch": 0.7830162668872346, "grad_norm": 0.38420483469963074, "learning_rate": 1.3362195731940055e-05, "loss": 0.4667, "step": 36920 }, { "epoch": 0.7830374753451677, "grad_norm": 0.3504655063152313, "learning_rate": 1.3361881649634605e-05, "loss": 0.4498, "step": 36921 }, { "epoch": 0.7830586838031007, "grad_norm": 0.34386083483695984, "learning_rate": 1.3361567563590101e-05, "loss": 0.4512, "step": 36922 }, { "epoch": 0.7830798922610337, "grad_norm": 0.3605523407459259, "learning_rate": 1.3361253473806888e-05, "loss": 0.5098, "step": 36923 }, { "epoch": 0.7831011007189668, "grad_norm": 0.36012449860572815, "learning_rate": 1.3360939380285321e-05, "loss": 0.4502, "step": 36924 }, { "epoch": 0.7831223091768997, "grad_norm": 0.3848925828933716, "learning_rate": 1.3360625283025743e-05, "loss": 0.5014, "step": 36925 }, { "epoch": 0.7831435176348328, "grad_norm": 0.3542388081550598, "learning_rate": 1.3360311182028507e-05, "loss": 0.4845, "step": 36926 }, { "epoch": 0.7831647260927658, "grad_norm": 0.39849793910980225, "learning_rate": 1.3359997077293965e-05, "loss": 0.4344, "step": 36927 }, { "epoch": 0.7831859345506988, "grad_norm": 0.3810202181339264, "learning_rate": 1.3359682968822462e-05, "loss": 0.5066, "step": 36928 }, { "epoch": 0.7832071430086318, "grad_norm": 0.34564733505249023, "learning_rate": 1.3359368856614347e-05, "loss": 0.5544, "step": 36929 }, { "epoch": 0.7832283514665649, "grad_norm": 0.3658192753791809, "learning_rate": 1.3359054740669975e-05, "loss": 0.5566, "step": 36930 }, { "epoch": 0.7832495599244979, "grad_norm": 0.39352503418922424, "learning_rate": 1.3358740620989687e-05, "loss": 0.5141, "step": 36931 }, { "epoch": 0.7832707683824309, "grad_norm": 0.3354770839214325, "learning_rate": 1.335842649757384e-05, "loss": 0.3828, "step": 36932 }, { "epoch": 0.7832919768403639, "grad_norm": 0.38113895058631897, "learning_rate": 1.335811237042278e-05, "loss": 0.5388, "step": 36933 }, { "epoch": 0.783313185298297, "grad_norm": 0.35920917987823486, "learning_rate": 1.3357798239536854e-05, "loss": 0.5556, "step": 36934 }, { "epoch": 0.7833343937562299, "grad_norm": 0.3735351860523224, "learning_rate": 1.3357484104916416e-05, "loss": 0.4549, "step": 36935 }, { "epoch": 0.783355602214163, "grad_norm": 0.44209200143814087, "learning_rate": 1.3357169966561815e-05, "loss": 0.5076, "step": 36936 }, { "epoch": 0.7833768106720961, "grad_norm": 0.3700017035007477, "learning_rate": 1.3356855824473396e-05, "loss": 0.4843, "step": 36937 }, { "epoch": 0.783398019130029, "grad_norm": 0.33973824977874756, "learning_rate": 1.335654167865151e-05, "loss": 0.437, "step": 36938 }, { "epoch": 0.7834192275879621, "grad_norm": 0.37529268860816956, "learning_rate": 1.335622752909651e-05, "loss": 0.4671, "step": 36939 }, { "epoch": 0.7834404360458951, "grad_norm": 0.375472754240036, "learning_rate": 1.3355913375808745e-05, "loss": 0.5134, "step": 36940 }, { "epoch": 0.7834616445038282, "grad_norm": 0.4328767657279968, "learning_rate": 1.335559921878856e-05, "loss": 0.4653, "step": 36941 }, { "epoch": 0.7834828529617611, "grad_norm": 0.42300403118133545, "learning_rate": 1.3355285058036307e-05, "loss": 0.5454, "step": 36942 }, { "epoch": 0.7835040614196942, "grad_norm": 0.4198521077632904, "learning_rate": 1.3354970893552332e-05, "loss": 0.4315, "step": 36943 }, { "epoch": 0.7835252698776272, "grad_norm": 0.39757609367370605, "learning_rate": 1.335465672533699e-05, "loss": 0.4219, "step": 36944 }, { "epoch": 0.7835464783355602, "grad_norm": 0.3335437476634979, "learning_rate": 1.335434255339063e-05, "loss": 0.4915, "step": 36945 }, { "epoch": 0.7835676867934932, "grad_norm": 0.33630359172821045, "learning_rate": 1.3354028377713597e-05, "loss": 0.5289, "step": 36946 }, { "epoch": 0.7835888952514263, "grad_norm": 0.3246324956417084, "learning_rate": 1.3353714198306243e-05, "loss": 0.4894, "step": 36947 }, { "epoch": 0.7836101037093592, "grad_norm": 0.4085098206996918, "learning_rate": 1.3353400015168917e-05, "loss": 0.5779, "step": 36948 }, { "epoch": 0.7836313121672923, "grad_norm": 0.33506157994270325, "learning_rate": 1.335308582830197e-05, "loss": 0.5018, "step": 36949 }, { "epoch": 0.7836525206252254, "grad_norm": 0.3691645562648773, "learning_rate": 1.3352771637705748e-05, "loss": 0.3864, "step": 36950 }, { "epoch": 0.7836737290831584, "grad_norm": 0.4096446931362152, "learning_rate": 1.3352457443380603e-05, "loss": 0.4308, "step": 36951 }, { "epoch": 0.7836949375410914, "grad_norm": 0.3199005722999573, "learning_rate": 1.3352143245326886e-05, "loss": 0.4598, "step": 36952 }, { "epoch": 0.7837161459990244, "grad_norm": 0.4358769953250885, "learning_rate": 1.3351829043544941e-05, "loss": 0.4637, "step": 36953 }, { "epoch": 0.7837373544569575, "grad_norm": 0.34992295503616333, "learning_rate": 1.3351514838035122e-05, "loss": 0.4124, "step": 36954 }, { "epoch": 0.7837585629148904, "grad_norm": 0.3666328489780426, "learning_rate": 1.335120062879778e-05, "loss": 0.545, "step": 36955 }, { "epoch": 0.7837797713728235, "grad_norm": 0.38905373215675354, "learning_rate": 1.3350886415833263e-05, "loss": 0.5044, "step": 36956 }, { "epoch": 0.7838009798307565, "grad_norm": 0.37314119935035706, "learning_rate": 1.3350572199141912e-05, "loss": 0.4455, "step": 36957 }, { "epoch": 0.7838221882886895, "grad_norm": 0.4139624536037445, "learning_rate": 1.3350257978724088e-05, "loss": 0.5177, "step": 36958 }, { "epoch": 0.7838433967466225, "grad_norm": 0.3704236149787903, "learning_rate": 1.3349943754580136e-05, "loss": 0.5374, "step": 36959 }, { "epoch": 0.7838646052045556, "grad_norm": 0.39966660737991333, "learning_rate": 1.3349629526710405e-05, "loss": 0.4935, "step": 36960 }, { "epoch": 0.7838858136624886, "grad_norm": 0.35496893525123596, "learning_rate": 1.3349315295115248e-05, "loss": 0.5072, "step": 36961 }, { "epoch": 0.7839070221204216, "grad_norm": 0.3498871624469757, "learning_rate": 1.3349001059795008e-05, "loss": 0.4923, "step": 36962 }, { "epoch": 0.7839282305783547, "grad_norm": 0.35972845554351807, "learning_rate": 1.3348686820750038e-05, "loss": 0.4918, "step": 36963 }, { "epoch": 0.7839494390362877, "grad_norm": 0.3123909533023834, "learning_rate": 1.3348372577980691e-05, "loss": 0.4085, "step": 36964 }, { "epoch": 0.7839706474942207, "grad_norm": 0.37140247225761414, "learning_rate": 1.334805833148731e-05, "loss": 0.5256, "step": 36965 }, { "epoch": 0.7839918559521537, "grad_norm": 0.3632371425628662, "learning_rate": 1.3347744081270251e-05, "loss": 0.5246, "step": 36966 }, { "epoch": 0.7840130644100868, "grad_norm": 0.3580131530761719, "learning_rate": 1.334742982732986e-05, "loss": 0.3914, "step": 36967 }, { "epoch": 0.7840342728680197, "grad_norm": 0.3299817144870758, "learning_rate": 1.3347115569666482e-05, "loss": 0.47, "step": 36968 }, { "epoch": 0.7840554813259528, "grad_norm": 0.39583662152290344, "learning_rate": 1.3346801308280475e-05, "loss": 0.4981, "step": 36969 }, { "epoch": 0.7840766897838858, "grad_norm": 0.32401469349861145, "learning_rate": 1.3346487043172181e-05, "loss": 0.4582, "step": 36970 }, { "epoch": 0.7840978982418189, "grad_norm": 0.360988587141037, "learning_rate": 1.3346172774341955e-05, "loss": 0.5266, "step": 36971 }, { "epoch": 0.7841191066997518, "grad_norm": 0.37678438425064087, "learning_rate": 1.3345858501790145e-05, "loss": 0.4818, "step": 36972 }, { "epoch": 0.7841403151576849, "grad_norm": 0.34094348549842834, "learning_rate": 1.3345544225517104e-05, "loss": 0.4913, "step": 36973 }, { "epoch": 0.7841615236156179, "grad_norm": 0.38124048709869385, "learning_rate": 1.3345229945523172e-05, "loss": 0.501, "step": 36974 }, { "epoch": 0.7841827320735509, "grad_norm": 0.31653469800949097, "learning_rate": 1.3344915661808708e-05, "loss": 0.4433, "step": 36975 }, { "epoch": 0.7842039405314839, "grad_norm": 0.36539575457572937, "learning_rate": 1.3344601374374055e-05, "loss": 0.5829, "step": 36976 }, { "epoch": 0.784225148989417, "grad_norm": 0.34697625041007996, "learning_rate": 1.3344287083219567e-05, "loss": 0.5205, "step": 36977 }, { "epoch": 0.78424635744735, "grad_norm": 0.39997801184654236, "learning_rate": 1.3343972788345592e-05, "loss": 0.4542, "step": 36978 }, { "epoch": 0.784267565905283, "grad_norm": 0.47882720828056335, "learning_rate": 1.3343658489752477e-05, "loss": 0.555, "step": 36979 }, { "epoch": 0.7842887743632161, "grad_norm": 0.34925469756126404, "learning_rate": 1.3343344187440578e-05, "loss": 0.5536, "step": 36980 }, { "epoch": 0.784309982821149, "grad_norm": 0.35809794068336487, "learning_rate": 1.334302988141024e-05, "loss": 0.4056, "step": 36981 }, { "epoch": 0.7843311912790821, "grad_norm": 0.37184783816337585, "learning_rate": 1.3342715571661812e-05, "loss": 0.51, "step": 36982 }, { "epoch": 0.7843523997370151, "grad_norm": 0.4049329459667206, "learning_rate": 1.3342401258195645e-05, "loss": 0.5885, "step": 36983 }, { "epoch": 0.7843736081949482, "grad_norm": 0.33640363812446594, "learning_rate": 1.3342086941012089e-05, "loss": 0.4892, "step": 36984 }, { "epoch": 0.7843948166528811, "grad_norm": 0.3855166733264923, "learning_rate": 1.3341772620111493e-05, "loss": 0.4418, "step": 36985 }, { "epoch": 0.7844160251108142, "grad_norm": 0.41542860865592957, "learning_rate": 1.3341458295494206e-05, "loss": 0.4536, "step": 36986 }, { "epoch": 0.7844372335687472, "grad_norm": 0.32033708691596985, "learning_rate": 1.3341143967160579e-05, "loss": 0.4151, "step": 36987 }, { "epoch": 0.7844584420266802, "grad_norm": 0.3767203092575073, "learning_rate": 1.3340829635110959e-05, "loss": 0.4766, "step": 36988 }, { "epoch": 0.7844796504846132, "grad_norm": 0.3689724802970886, "learning_rate": 1.3340515299345695e-05, "loss": 0.5301, "step": 36989 }, { "epoch": 0.7845008589425463, "grad_norm": 0.3615916669368744, "learning_rate": 1.3340200959865144e-05, "loss": 0.4437, "step": 36990 }, { "epoch": 0.7845220674004794, "grad_norm": 0.42254704236984253, "learning_rate": 1.3339886616669646e-05, "loss": 0.4564, "step": 36991 }, { "epoch": 0.7845432758584123, "grad_norm": 0.3962056040763855, "learning_rate": 1.333957226975956e-05, "loss": 0.5079, "step": 36992 }, { "epoch": 0.7845644843163454, "grad_norm": 0.3311513662338257, "learning_rate": 1.3339257919135226e-05, "loss": 0.3903, "step": 36993 }, { "epoch": 0.7845856927742784, "grad_norm": 0.4109528958797455, "learning_rate": 1.3338943564797002e-05, "loss": 0.448, "step": 36994 }, { "epoch": 0.7846069012322114, "grad_norm": 0.3300624489784241, "learning_rate": 1.3338629206745232e-05, "loss": 0.4733, "step": 36995 }, { "epoch": 0.7846281096901444, "grad_norm": 0.4373115599155426, "learning_rate": 1.3338314844980269e-05, "loss": 0.4479, "step": 36996 }, { "epoch": 0.7846493181480775, "grad_norm": 0.4286525845527649, "learning_rate": 1.333800047950246e-05, "loss": 0.469, "step": 36997 }, { "epoch": 0.7846705266060104, "grad_norm": 0.3762301206588745, "learning_rate": 1.3337686110312158e-05, "loss": 0.4517, "step": 36998 }, { "epoch": 0.7846917350639435, "grad_norm": 0.3458288908004761, "learning_rate": 1.3337371737409707e-05, "loss": 0.4199, "step": 36999 }, { "epoch": 0.7847129435218765, "grad_norm": 0.3681432604789734, "learning_rate": 1.3337057360795463e-05, "loss": 0.4536, "step": 37000 }, { "epoch": 0.7847341519798096, "grad_norm": 0.33013594150543213, "learning_rate": 1.3336742980469773e-05, "loss": 0.472, "step": 37001 }, { "epoch": 0.7847553604377425, "grad_norm": 0.3792532682418823, "learning_rate": 1.3336428596432983e-05, "loss": 0.4736, "step": 37002 }, { "epoch": 0.7847765688956756, "grad_norm": 0.431048721075058, "learning_rate": 1.333611420868545e-05, "loss": 0.4725, "step": 37003 }, { "epoch": 0.7847977773536087, "grad_norm": 0.348746657371521, "learning_rate": 1.3335799817227517e-05, "loss": 0.4308, "step": 37004 }, { "epoch": 0.7848189858115416, "grad_norm": 0.3775157928466797, "learning_rate": 1.3335485422059535e-05, "loss": 0.4947, "step": 37005 }, { "epoch": 0.7848401942694747, "grad_norm": 0.35745730996131897, "learning_rate": 1.3335171023181862e-05, "loss": 0.435, "step": 37006 }, { "epoch": 0.7848614027274077, "grad_norm": 0.35764846205711365, "learning_rate": 1.3334856620594835e-05, "loss": 0.4651, "step": 37007 }, { "epoch": 0.7848826111853408, "grad_norm": 0.35597366094589233, "learning_rate": 1.333454221429881e-05, "loss": 0.4672, "step": 37008 }, { "epoch": 0.7849038196432737, "grad_norm": 0.3585661053657532, "learning_rate": 1.3334227804294135e-05, "loss": 0.4834, "step": 37009 }, { "epoch": 0.7849250281012068, "grad_norm": 0.35557088255882263, "learning_rate": 1.3333913390581166e-05, "loss": 0.4792, "step": 37010 }, { "epoch": 0.7849462365591398, "grad_norm": 0.3299165368080139, "learning_rate": 1.3333598973160243e-05, "loss": 0.511, "step": 37011 }, { "epoch": 0.7849674450170728, "grad_norm": 0.37250280380249023, "learning_rate": 1.333328455203172e-05, "loss": 0.4911, "step": 37012 }, { "epoch": 0.7849886534750058, "grad_norm": 0.3606410026550293, "learning_rate": 1.3332970127195948e-05, "loss": 0.5054, "step": 37013 }, { "epoch": 0.7850098619329389, "grad_norm": 0.36397120356559753, "learning_rate": 1.3332655698653279e-05, "loss": 0.5041, "step": 37014 }, { "epoch": 0.7850310703908718, "grad_norm": 0.3597233295440674, "learning_rate": 1.3332341266404054e-05, "loss": 0.5074, "step": 37015 }, { "epoch": 0.7850522788488049, "grad_norm": 0.32356271147727966, "learning_rate": 1.3332026830448628e-05, "loss": 0.4569, "step": 37016 }, { "epoch": 0.7850734873067379, "grad_norm": 0.42591142654418945, "learning_rate": 1.3331712390787355e-05, "loss": 0.5313, "step": 37017 }, { "epoch": 0.785094695764671, "grad_norm": 0.33453771471977234, "learning_rate": 1.3331397947420578e-05, "loss": 0.4875, "step": 37018 }, { "epoch": 0.785115904222604, "grad_norm": 0.3725135624408722, "learning_rate": 1.3331083500348649e-05, "loss": 0.4601, "step": 37019 }, { "epoch": 0.785137112680537, "grad_norm": 0.3724689483642578, "learning_rate": 1.3330769049571919e-05, "loss": 0.5065, "step": 37020 }, { "epoch": 0.7851583211384701, "grad_norm": 0.48935428261756897, "learning_rate": 1.3330454595090734e-05, "loss": 0.5072, "step": 37021 }, { "epoch": 0.785179529596403, "grad_norm": 0.412124365568161, "learning_rate": 1.3330140136905448e-05, "loss": 0.4426, "step": 37022 }, { "epoch": 0.7852007380543361, "grad_norm": 0.3388564884662628, "learning_rate": 1.332982567501641e-05, "loss": 0.4418, "step": 37023 }, { "epoch": 0.7852219465122691, "grad_norm": 0.37762391567230225, "learning_rate": 1.3329511209423968e-05, "loss": 0.4763, "step": 37024 }, { "epoch": 0.7852431549702021, "grad_norm": 0.40560102462768555, "learning_rate": 1.3329196740128472e-05, "loss": 0.4886, "step": 37025 }, { "epoch": 0.7852643634281351, "grad_norm": 0.39301764965057373, "learning_rate": 1.3328882267130274e-05, "loss": 0.5299, "step": 37026 }, { "epoch": 0.7852855718860682, "grad_norm": 0.38149309158325195, "learning_rate": 1.3328567790429721e-05, "loss": 0.4857, "step": 37027 }, { "epoch": 0.7853067803440011, "grad_norm": 0.38032066822052, "learning_rate": 1.3328253310027161e-05, "loss": 0.5973, "step": 37028 }, { "epoch": 0.7853279888019342, "grad_norm": 0.3398791253566742, "learning_rate": 1.332793882592295e-05, "loss": 0.4647, "step": 37029 }, { "epoch": 0.7853491972598672, "grad_norm": 0.33085736632347107, "learning_rate": 1.3327624338117433e-05, "loss": 0.4183, "step": 37030 }, { "epoch": 0.7853704057178003, "grad_norm": 0.4385634958744049, "learning_rate": 1.3327309846610962e-05, "loss": 0.5188, "step": 37031 }, { "epoch": 0.7853916141757333, "grad_norm": 0.36385267972946167, "learning_rate": 1.332699535140389e-05, "loss": 0.536, "step": 37032 }, { "epoch": 0.7854128226336663, "grad_norm": 0.41438817977905273, "learning_rate": 1.3326680852496556e-05, "loss": 0.4161, "step": 37033 }, { "epoch": 0.7854340310915994, "grad_norm": 0.36728402972221375, "learning_rate": 1.3326366349889317e-05, "loss": 0.4844, "step": 37034 }, { "epoch": 0.7854552395495323, "grad_norm": 0.37657463550567627, "learning_rate": 1.3326051843582527e-05, "loss": 0.5605, "step": 37035 }, { "epoch": 0.7854764480074654, "grad_norm": 0.3419804573059082, "learning_rate": 1.3325737333576524e-05, "loss": 0.4455, "step": 37036 }, { "epoch": 0.7854976564653984, "grad_norm": 0.3283027410507202, "learning_rate": 1.3325422819871672e-05, "loss": 0.4335, "step": 37037 }, { "epoch": 0.7855188649233315, "grad_norm": 0.33021172881126404, "learning_rate": 1.332510830246831e-05, "loss": 0.3923, "step": 37038 }, { "epoch": 0.7855400733812644, "grad_norm": 0.41782596707344055, "learning_rate": 1.3324793781366792e-05, "loss": 0.5424, "step": 37039 }, { "epoch": 0.7855612818391975, "grad_norm": 0.38751909136772156, "learning_rate": 1.3324479256567466e-05, "loss": 0.4262, "step": 37040 }, { "epoch": 0.7855824902971305, "grad_norm": 0.3987360894680023, "learning_rate": 1.3324164728070685e-05, "loss": 0.5385, "step": 37041 }, { "epoch": 0.7856036987550635, "grad_norm": 0.3586617410182953, "learning_rate": 1.3323850195876795e-05, "loss": 0.4881, "step": 37042 }, { "epoch": 0.7856249072129965, "grad_norm": 0.3943251371383667, "learning_rate": 1.332353565998615e-05, "loss": 0.5414, "step": 37043 }, { "epoch": 0.7856461156709296, "grad_norm": 0.36545053124427795, "learning_rate": 1.3323221120399095e-05, "loss": 0.423, "step": 37044 }, { "epoch": 0.7856673241288626, "grad_norm": 0.3782114088535309, "learning_rate": 1.3322906577115985e-05, "loss": 0.5452, "step": 37045 }, { "epoch": 0.7856885325867956, "grad_norm": 0.4118250906467438, "learning_rate": 1.3322592030137166e-05, "loss": 0.4829, "step": 37046 }, { "epoch": 0.7857097410447287, "grad_norm": 0.3784923553466797, "learning_rate": 1.3322277479462988e-05, "loss": 0.4892, "step": 37047 }, { "epoch": 0.7857309495026616, "grad_norm": 0.33482539653778076, "learning_rate": 1.3321962925093803e-05, "loss": 0.4143, "step": 37048 }, { "epoch": 0.7857521579605947, "grad_norm": 0.3583829998970032, "learning_rate": 1.332164836702996e-05, "loss": 0.4465, "step": 37049 }, { "epoch": 0.7857733664185277, "grad_norm": 0.4376647174358368, "learning_rate": 1.3321333805271808e-05, "loss": 0.6132, "step": 37050 }, { "epoch": 0.7857945748764608, "grad_norm": 0.38149750232696533, "learning_rate": 1.3321019239819697e-05, "loss": 0.546, "step": 37051 }, { "epoch": 0.7858157833343937, "grad_norm": 0.3451332151889801, "learning_rate": 1.3320704670673978e-05, "loss": 0.4813, "step": 37052 }, { "epoch": 0.7858369917923268, "grad_norm": 0.35955145955085754, "learning_rate": 1.3320390097834999e-05, "loss": 0.4751, "step": 37053 }, { "epoch": 0.7858582002502598, "grad_norm": 0.36324286460876465, "learning_rate": 1.332007552130311e-05, "loss": 0.45, "step": 37054 }, { "epoch": 0.7858794087081928, "grad_norm": 0.3602272570133209, "learning_rate": 1.3319760941078666e-05, "loss": 0.47, "step": 37055 }, { "epoch": 0.7859006171661258, "grad_norm": 0.3271905183792114, "learning_rate": 1.3319446357162009e-05, "loss": 0.4063, "step": 37056 }, { "epoch": 0.7859218256240589, "grad_norm": 0.3394748866558075, "learning_rate": 1.3319131769553494e-05, "loss": 0.4318, "step": 37057 }, { "epoch": 0.785943034081992, "grad_norm": 0.32619383931159973, "learning_rate": 1.331881717825347e-05, "loss": 0.3883, "step": 37058 }, { "epoch": 0.7859642425399249, "grad_norm": 0.519561767578125, "learning_rate": 1.3318502583262283e-05, "loss": 0.4138, "step": 37059 }, { "epoch": 0.785985450997858, "grad_norm": 0.3903108239173889, "learning_rate": 1.3318187984580292e-05, "loss": 0.5208, "step": 37060 }, { "epoch": 0.786006659455791, "grad_norm": 0.36252444982528687, "learning_rate": 1.3317873382207835e-05, "loss": 0.5476, "step": 37061 }, { "epoch": 0.786027867913724, "grad_norm": 0.3430638611316681, "learning_rate": 1.3317558776145273e-05, "loss": 0.479, "step": 37062 }, { "epoch": 0.786049076371657, "grad_norm": 0.3426109850406647, "learning_rate": 1.331724416639295e-05, "loss": 0.4803, "step": 37063 }, { "epoch": 0.7860702848295901, "grad_norm": 0.34300827980041504, "learning_rate": 1.3316929552951215e-05, "loss": 0.5128, "step": 37064 }, { "epoch": 0.786091493287523, "grad_norm": 0.34059038758277893, "learning_rate": 1.3316614935820422e-05, "loss": 0.4323, "step": 37065 }, { "epoch": 0.7861127017454561, "grad_norm": 0.5538220405578613, "learning_rate": 1.3316300315000916e-05, "loss": 0.4944, "step": 37066 }, { "epoch": 0.7861339102033891, "grad_norm": 0.3745616376399994, "learning_rate": 1.3315985690493049e-05, "loss": 0.5593, "step": 37067 }, { "epoch": 0.7861551186613221, "grad_norm": 0.36570703983306885, "learning_rate": 1.3315671062297175e-05, "loss": 0.5207, "step": 37068 }, { "epoch": 0.7861763271192551, "grad_norm": 0.37490442395210266, "learning_rate": 1.331535643041364e-05, "loss": 0.4116, "step": 37069 }, { "epoch": 0.7861975355771882, "grad_norm": 0.38744404911994934, "learning_rate": 1.3315041794842793e-05, "loss": 0.5029, "step": 37070 }, { "epoch": 0.7862187440351212, "grad_norm": 0.36361217498779297, "learning_rate": 1.3314727155584986e-05, "loss": 0.4815, "step": 37071 }, { "epoch": 0.7862399524930542, "grad_norm": 0.37947213649749756, "learning_rate": 1.3314412512640566e-05, "loss": 0.4882, "step": 37072 }, { "epoch": 0.7862611609509873, "grad_norm": 0.38231799006462097, "learning_rate": 1.3314097866009887e-05, "loss": 0.4702, "step": 37073 }, { "epoch": 0.7862823694089203, "grad_norm": 0.3300519287586212, "learning_rate": 1.3313783215693299e-05, "loss": 0.4385, "step": 37074 }, { "epoch": 0.7863035778668533, "grad_norm": 0.34637126326560974, "learning_rate": 1.3313468561691147e-05, "loss": 0.535, "step": 37075 }, { "epoch": 0.7863247863247863, "grad_norm": 0.3306473195552826, "learning_rate": 1.3313153904003787e-05, "loss": 0.4694, "step": 37076 }, { "epoch": 0.7863459947827194, "grad_norm": 0.32715001702308655, "learning_rate": 1.3312839242631564e-05, "loss": 0.5103, "step": 37077 }, { "epoch": 0.7863672032406523, "grad_norm": 0.39686447381973267, "learning_rate": 1.331252457757483e-05, "loss": 0.5444, "step": 37078 }, { "epoch": 0.7863884116985854, "grad_norm": 0.4742825925350189, "learning_rate": 1.3312209908833934e-05, "loss": 0.4896, "step": 37079 }, { "epoch": 0.7864096201565184, "grad_norm": 0.3344835340976715, "learning_rate": 1.3311895236409228e-05, "loss": 0.4204, "step": 37080 }, { "epoch": 0.7864308286144515, "grad_norm": 0.37317368388175964, "learning_rate": 1.3311580560301063e-05, "loss": 0.4882, "step": 37081 }, { "epoch": 0.7864520370723844, "grad_norm": 0.6271417140960693, "learning_rate": 1.3311265880509785e-05, "loss": 0.51, "step": 37082 }, { "epoch": 0.7864732455303175, "grad_norm": 0.3916500508785248, "learning_rate": 1.3310951197035746e-05, "loss": 0.532, "step": 37083 }, { "epoch": 0.7864944539882505, "grad_norm": 0.387419193983078, "learning_rate": 1.3310636509879295e-05, "loss": 0.4343, "step": 37084 }, { "epoch": 0.7865156624461835, "grad_norm": 0.37484630942344666, "learning_rate": 1.3310321819040781e-05, "loss": 0.5178, "step": 37085 }, { "epoch": 0.7865368709041166, "grad_norm": 0.3856549561023712, "learning_rate": 1.331000712452056e-05, "loss": 0.4599, "step": 37086 }, { "epoch": 0.7865580793620496, "grad_norm": 0.3920011818408966, "learning_rate": 1.3309692426318976e-05, "loss": 0.5669, "step": 37087 }, { "epoch": 0.7865792878199827, "grad_norm": 0.3647317886352539, "learning_rate": 1.330937772443638e-05, "loss": 0.5101, "step": 37088 }, { "epoch": 0.7866004962779156, "grad_norm": 0.3808150887489319, "learning_rate": 1.3309063018873125e-05, "loss": 0.5261, "step": 37089 }, { "epoch": 0.7866217047358487, "grad_norm": 0.43511754274368286, "learning_rate": 1.3308748309629558e-05, "loss": 0.4794, "step": 37090 }, { "epoch": 0.7866429131937817, "grad_norm": 0.35048308968544006, "learning_rate": 1.3308433596706032e-05, "loss": 0.531, "step": 37091 }, { "epoch": 0.7866641216517147, "grad_norm": 0.5102968811988831, "learning_rate": 1.3308118880102892e-05, "loss": 0.4998, "step": 37092 }, { "epoch": 0.7866853301096477, "grad_norm": 0.3646363914012909, "learning_rate": 1.3307804159820488e-05, "loss": 0.4715, "step": 37093 }, { "epoch": 0.7867065385675808, "grad_norm": 0.3795863091945648, "learning_rate": 1.3307489435859178e-05, "loss": 0.468, "step": 37094 }, { "epoch": 0.7867277470255137, "grad_norm": 0.4666381776332855, "learning_rate": 1.3307174708219303e-05, "loss": 0.4497, "step": 37095 }, { "epoch": 0.7867489554834468, "grad_norm": 0.3308603763580322, "learning_rate": 1.330685997690122e-05, "loss": 0.4546, "step": 37096 }, { "epoch": 0.7867701639413798, "grad_norm": 0.40027329325675964, "learning_rate": 1.3306545241905278e-05, "loss": 0.5546, "step": 37097 }, { "epoch": 0.7867913723993128, "grad_norm": 0.39042428135871887, "learning_rate": 1.3306230503231818e-05, "loss": 0.4754, "step": 37098 }, { "epoch": 0.7868125808572459, "grad_norm": 0.3878394067287445, "learning_rate": 1.3305915760881204e-05, "loss": 0.501, "step": 37099 }, { "epoch": 0.7868337893151789, "grad_norm": 0.41342398524284363, "learning_rate": 1.3305601014853778e-05, "loss": 0.4012, "step": 37100 }, { "epoch": 0.786854997773112, "grad_norm": 0.360086053609848, "learning_rate": 1.3305286265149891e-05, "loss": 0.4394, "step": 37101 }, { "epoch": 0.7868762062310449, "grad_norm": 0.42046162486076355, "learning_rate": 1.3304971511769891e-05, "loss": 0.493, "step": 37102 }, { "epoch": 0.786897414688978, "grad_norm": 0.3684708774089813, "learning_rate": 1.3304656754714131e-05, "loss": 0.5143, "step": 37103 }, { "epoch": 0.786918623146911, "grad_norm": 0.37097230553627014, "learning_rate": 1.3304341993982961e-05, "loss": 0.4107, "step": 37104 }, { "epoch": 0.786939831604844, "grad_norm": 0.36793166399002075, "learning_rate": 1.3304027229576732e-05, "loss": 0.4984, "step": 37105 }, { "epoch": 0.786961040062777, "grad_norm": 0.34366878867149353, "learning_rate": 1.3303712461495792e-05, "loss": 0.47, "step": 37106 }, { "epoch": 0.7869822485207101, "grad_norm": 0.37550538778305054, "learning_rate": 1.330339768974049e-05, "loss": 0.4689, "step": 37107 }, { "epoch": 0.787003456978643, "grad_norm": 0.4625295400619507, "learning_rate": 1.330308291431118e-05, "loss": 0.4889, "step": 37108 }, { "epoch": 0.7870246654365761, "grad_norm": 0.3497580885887146, "learning_rate": 1.3302768135208208e-05, "loss": 0.4301, "step": 37109 }, { "epoch": 0.7870458738945091, "grad_norm": 0.39028921723365784, "learning_rate": 1.3302453352431927e-05, "loss": 0.5327, "step": 37110 }, { "epoch": 0.7870670823524422, "grad_norm": 0.3467576801776886, "learning_rate": 1.3302138565982685e-05, "loss": 0.4898, "step": 37111 }, { "epoch": 0.7870882908103751, "grad_norm": 0.36400192975997925, "learning_rate": 1.3301823775860834e-05, "loss": 0.4614, "step": 37112 }, { "epoch": 0.7871094992683082, "grad_norm": 0.3446563184261322, "learning_rate": 1.3301508982066724e-05, "loss": 0.458, "step": 37113 }, { "epoch": 0.7871307077262413, "grad_norm": 0.5564420223236084, "learning_rate": 1.3301194184600705e-05, "loss": 0.4648, "step": 37114 }, { "epoch": 0.7871519161841742, "grad_norm": 0.3071438670158386, "learning_rate": 1.3300879383463126e-05, "loss": 0.3845, "step": 37115 }, { "epoch": 0.7871731246421073, "grad_norm": 0.3442266285419464, "learning_rate": 1.3300564578654337e-05, "loss": 0.4682, "step": 37116 }, { "epoch": 0.7871943331000403, "grad_norm": 0.3567771911621094, "learning_rate": 1.3300249770174686e-05, "loss": 0.502, "step": 37117 }, { "epoch": 0.7872155415579734, "grad_norm": 0.34924694895744324, "learning_rate": 1.3299934958024527e-05, "loss": 0.5025, "step": 37118 }, { "epoch": 0.7872367500159063, "grad_norm": 0.35661110281944275, "learning_rate": 1.3299620142204213e-05, "loss": 0.472, "step": 37119 }, { "epoch": 0.7872579584738394, "grad_norm": 0.36952516436576843, "learning_rate": 1.3299305322714088e-05, "loss": 0.6004, "step": 37120 }, { "epoch": 0.7872791669317724, "grad_norm": 0.3711235821247101, "learning_rate": 1.3298990499554503e-05, "loss": 0.5311, "step": 37121 }, { "epoch": 0.7873003753897054, "grad_norm": 0.34889620542526245, "learning_rate": 1.3298675672725812e-05, "loss": 0.453, "step": 37122 }, { "epoch": 0.7873215838476384, "grad_norm": 0.5837450623512268, "learning_rate": 1.329836084222836e-05, "loss": 0.4433, "step": 37123 }, { "epoch": 0.7873427923055715, "grad_norm": 0.36246007680892944, "learning_rate": 1.32980460080625e-05, "loss": 0.4591, "step": 37124 }, { "epoch": 0.7873640007635044, "grad_norm": 0.34800851345062256, "learning_rate": 1.3297731170228582e-05, "loss": 0.4722, "step": 37125 }, { "epoch": 0.7873852092214375, "grad_norm": 0.38223570585250854, "learning_rate": 1.3297416328726956e-05, "loss": 0.4875, "step": 37126 }, { "epoch": 0.7874064176793706, "grad_norm": 0.4145348072052002, "learning_rate": 1.3297101483557975e-05, "loss": 0.5008, "step": 37127 }, { "epoch": 0.7874276261373035, "grad_norm": 0.35247287154197693, "learning_rate": 1.3296786634721984e-05, "loss": 0.4868, "step": 37128 }, { "epoch": 0.7874488345952366, "grad_norm": 0.4328968822956085, "learning_rate": 1.3296471782219334e-05, "loss": 0.5428, "step": 37129 }, { "epoch": 0.7874700430531696, "grad_norm": 0.5147634744644165, "learning_rate": 1.3296156926050379e-05, "loss": 0.4459, "step": 37130 }, { "epoch": 0.7874912515111027, "grad_norm": 0.39022397994995117, "learning_rate": 1.3295842066215468e-05, "loss": 0.3976, "step": 37131 }, { "epoch": 0.7875124599690356, "grad_norm": 0.4090001881122589, "learning_rate": 1.3295527202714946e-05, "loss": 0.5239, "step": 37132 }, { "epoch": 0.7875336684269687, "grad_norm": 0.3298332393169403, "learning_rate": 1.329521233554917e-05, "loss": 0.5279, "step": 37133 }, { "epoch": 0.7875548768849017, "grad_norm": 0.37997907400131226, "learning_rate": 1.3294897464718488e-05, "loss": 0.3603, "step": 37134 }, { "epoch": 0.7875760853428347, "grad_norm": 0.3759419322013855, "learning_rate": 1.3294582590223248e-05, "loss": 0.4634, "step": 37135 }, { "epoch": 0.7875972938007677, "grad_norm": 0.4379948675632477, "learning_rate": 1.3294267712063802e-05, "loss": 0.5461, "step": 37136 }, { "epoch": 0.7876185022587008, "grad_norm": 0.4530336558818817, "learning_rate": 1.3293952830240503e-05, "loss": 0.4724, "step": 37137 }, { "epoch": 0.7876397107166337, "grad_norm": 0.3474203646183014, "learning_rate": 1.3293637944753694e-05, "loss": 0.4761, "step": 37138 }, { "epoch": 0.7876609191745668, "grad_norm": 0.4434424638748169, "learning_rate": 1.329332305560373e-05, "loss": 0.4908, "step": 37139 }, { "epoch": 0.7876821276324999, "grad_norm": 0.4085039496421814, "learning_rate": 1.3293008162790964e-05, "loss": 0.4462, "step": 37140 }, { "epoch": 0.7877033360904329, "grad_norm": 0.3820437788963318, "learning_rate": 1.3292693266315742e-05, "loss": 0.5365, "step": 37141 }, { "epoch": 0.7877245445483659, "grad_norm": 0.36998650431632996, "learning_rate": 1.3292378366178413e-05, "loss": 0.4732, "step": 37142 }, { "epoch": 0.7877457530062989, "grad_norm": 0.36197352409362793, "learning_rate": 1.3292063462379328e-05, "loss": 0.4897, "step": 37143 }, { "epoch": 0.787766961464232, "grad_norm": 0.36580905318260193, "learning_rate": 1.3291748554918844e-05, "loss": 0.4958, "step": 37144 }, { "epoch": 0.7877881699221649, "grad_norm": 0.38501718640327454, "learning_rate": 1.3291433643797302e-05, "loss": 0.5366, "step": 37145 }, { "epoch": 0.787809378380098, "grad_norm": 0.5676653981208801, "learning_rate": 1.3291118729015057e-05, "loss": 0.5027, "step": 37146 }, { "epoch": 0.787830586838031, "grad_norm": 0.3583624064922333, "learning_rate": 1.3290803810572461e-05, "loss": 0.4595, "step": 37147 }, { "epoch": 0.787851795295964, "grad_norm": 0.40881282091140747, "learning_rate": 1.329048888846986e-05, "loss": 0.5772, "step": 37148 }, { "epoch": 0.787873003753897, "grad_norm": 0.33547908067703247, "learning_rate": 1.3290173962707602e-05, "loss": 0.4838, "step": 37149 }, { "epoch": 0.7878942122118301, "grad_norm": 0.48614028096199036, "learning_rate": 1.3289859033286043e-05, "loss": 0.469, "step": 37150 }, { "epoch": 0.7879154206697631, "grad_norm": 0.3728896677494049, "learning_rate": 1.3289544100205537e-05, "loss": 0.4722, "step": 37151 }, { "epoch": 0.7879366291276961, "grad_norm": 0.35768890380859375, "learning_rate": 1.3289229163466421e-05, "loss": 0.5376, "step": 37152 }, { "epoch": 0.7879578375856291, "grad_norm": 0.3337467312812805, "learning_rate": 1.3288914223069056e-05, "loss": 0.4681, "step": 37153 }, { "epoch": 0.7879790460435622, "grad_norm": 0.38293904066085815, "learning_rate": 1.328859927901379e-05, "loss": 0.5424, "step": 37154 }, { "epoch": 0.7880002545014952, "grad_norm": 0.364092618227005, "learning_rate": 1.3288284331300971e-05, "loss": 0.5268, "step": 37155 }, { "epoch": 0.7880214629594282, "grad_norm": 0.3888421654701233, "learning_rate": 1.3287969379930952e-05, "loss": 0.4428, "step": 37156 }, { "epoch": 0.7880426714173613, "grad_norm": 0.34986943006515503, "learning_rate": 1.328765442490408e-05, "loss": 0.4381, "step": 37157 }, { "epoch": 0.7880638798752942, "grad_norm": 0.3733953535556793, "learning_rate": 1.3287339466220708e-05, "loss": 0.4864, "step": 37158 }, { "epoch": 0.7880850883332273, "grad_norm": 0.4448925256729126, "learning_rate": 1.3287024503881187e-05, "loss": 0.421, "step": 37159 }, { "epoch": 0.7881062967911603, "grad_norm": 0.408200204372406, "learning_rate": 1.3286709537885865e-05, "loss": 0.5259, "step": 37160 }, { "epoch": 0.7881275052490934, "grad_norm": 0.39047670364379883, "learning_rate": 1.3286394568235094e-05, "loss": 0.486, "step": 37161 }, { "epoch": 0.7881487137070263, "grad_norm": 0.3636328876018524, "learning_rate": 1.3286079594929222e-05, "loss": 0.5415, "step": 37162 }, { "epoch": 0.7881699221649594, "grad_norm": 0.37686121463775635, "learning_rate": 1.3285764617968601e-05, "loss": 0.4498, "step": 37163 }, { "epoch": 0.7881911306228924, "grad_norm": 0.41599705815315247, "learning_rate": 1.3285449637353582e-05, "loss": 0.5652, "step": 37164 }, { "epoch": 0.7882123390808254, "grad_norm": 0.31672540307044983, "learning_rate": 1.3285134653084517e-05, "loss": 0.4554, "step": 37165 }, { "epoch": 0.7882335475387584, "grad_norm": 0.36726242303848267, "learning_rate": 1.328481966516175e-05, "loss": 0.4958, "step": 37166 }, { "epoch": 0.7882547559966915, "grad_norm": 0.3452977240085602, "learning_rate": 1.3284504673585637e-05, "loss": 0.4305, "step": 37167 }, { "epoch": 0.7882759644546246, "grad_norm": 0.4363369643688202, "learning_rate": 1.3284189678356523e-05, "loss": 0.5049, "step": 37168 }, { "epoch": 0.7882971729125575, "grad_norm": 0.36269891262054443, "learning_rate": 1.3283874679474764e-05, "loss": 0.5258, "step": 37169 }, { "epoch": 0.7883183813704906, "grad_norm": 0.367753803730011, "learning_rate": 1.3283559676940709e-05, "loss": 0.4516, "step": 37170 }, { "epoch": 0.7883395898284236, "grad_norm": 0.34122076630592346, "learning_rate": 1.3283244670754707e-05, "loss": 0.467, "step": 37171 }, { "epoch": 0.7883607982863566, "grad_norm": 0.4998075067996979, "learning_rate": 1.328292966091711e-05, "loss": 0.4704, "step": 37172 }, { "epoch": 0.7883820067442896, "grad_norm": 0.41534337401390076, "learning_rate": 1.3282614647428267e-05, "loss": 0.5609, "step": 37173 }, { "epoch": 0.7884032152022227, "grad_norm": 0.4158851206302643, "learning_rate": 1.3282299630288526e-05, "loss": 0.5435, "step": 37174 }, { "epoch": 0.7884244236601556, "grad_norm": 0.33599817752838135, "learning_rate": 1.328198460949824e-05, "loss": 0.4796, "step": 37175 }, { "epoch": 0.7884456321180887, "grad_norm": 0.33158183097839355, "learning_rate": 1.3281669585057761e-05, "loss": 0.551, "step": 37176 }, { "epoch": 0.7884668405760217, "grad_norm": 0.42767298221588135, "learning_rate": 1.3281354556967436e-05, "loss": 0.4841, "step": 37177 }, { "epoch": 0.7884880490339548, "grad_norm": 0.35692960023880005, "learning_rate": 1.328103952522762e-05, "loss": 0.4884, "step": 37178 }, { "epoch": 0.7885092574918877, "grad_norm": 0.3693429231643677, "learning_rate": 1.3280724489838659e-05, "loss": 0.5039, "step": 37179 }, { "epoch": 0.7885304659498208, "grad_norm": 0.40863507986068726, "learning_rate": 1.3280409450800905e-05, "loss": 0.4749, "step": 37180 }, { "epoch": 0.7885516744077539, "grad_norm": 0.36629587411880493, "learning_rate": 1.3280094408114703e-05, "loss": 0.491, "step": 37181 }, { "epoch": 0.7885728828656868, "grad_norm": 0.36443495750427246, "learning_rate": 1.3279779361780417e-05, "loss": 0.5268, "step": 37182 }, { "epoch": 0.7885940913236199, "grad_norm": 0.3447358012199402, "learning_rate": 1.3279464311798383e-05, "loss": 0.4552, "step": 37183 }, { "epoch": 0.7886152997815529, "grad_norm": 0.40571674704551697, "learning_rate": 1.327914925816896e-05, "loss": 0.5526, "step": 37184 }, { "epoch": 0.788636508239486, "grad_norm": 0.30612730979919434, "learning_rate": 1.3278834200892492e-05, "loss": 0.387, "step": 37185 }, { "epoch": 0.7886577166974189, "grad_norm": 0.35678547620773315, "learning_rate": 1.3278519139969337e-05, "loss": 0.5127, "step": 37186 }, { "epoch": 0.788678925155352, "grad_norm": 0.33992379903793335, "learning_rate": 1.3278204075399841e-05, "loss": 0.4701, "step": 37187 }, { "epoch": 0.788700133613285, "grad_norm": 0.35871949791908264, "learning_rate": 1.327788900718435e-05, "loss": 0.4483, "step": 37188 }, { "epoch": 0.788721342071218, "grad_norm": 0.3283891975879669, "learning_rate": 1.3277573935323224e-05, "loss": 0.5191, "step": 37189 }, { "epoch": 0.788742550529151, "grad_norm": 0.4021121859550476, "learning_rate": 1.327725885981681e-05, "loss": 0.5517, "step": 37190 }, { "epoch": 0.7887637589870841, "grad_norm": 0.3842545449733734, "learning_rate": 1.3276943780665455e-05, "loss": 0.526, "step": 37191 }, { "epoch": 0.788784967445017, "grad_norm": 0.3714475631713867, "learning_rate": 1.3276628697869511e-05, "loss": 0.5417, "step": 37192 }, { "epoch": 0.7888061759029501, "grad_norm": 0.7111781239509583, "learning_rate": 1.3276313611429333e-05, "loss": 0.463, "step": 37193 }, { "epoch": 0.7888273843608831, "grad_norm": 0.4264369010925293, "learning_rate": 1.327599852134526e-05, "loss": 0.4705, "step": 37194 }, { "epoch": 0.7888485928188161, "grad_norm": 0.37368249893188477, "learning_rate": 1.3275683427617656e-05, "loss": 0.5064, "step": 37195 }, { "epoch": 0.7888698012767492, "grad_norm": 0.3437490463256836, "learning_rate": 1.3275368330246864e-05, "loss": 0.4894, "step": 37196 }, { "epoch": 0.7888910097346822, "grad_norm": 0.3469877541065216, "learning_rate": 1.3275053229233235e-05, "loss": 0.4897, "step": 37197 }, { "epoch": 0.7889122181926153, "grad_norm": 0.3640378415584564, "learning_rate": 1.3274738124577122e-05, "loss": 0.5319, "step": 37198 }, { "epoch": 0.7889334266505482, "grad_norm": 0.3260873854160309, "learning_rate": 1.327442301627887e-05, "loss": 0.5059, "step": 37199 }, { "epoch": 0.7889546351084813, "grad_norm": 0.3412115275859833, "learning_rate": 1.3274107904338837e-05, "loss": 0.4714, "step": 37200 }, { "epoch": 0.7889758435664143, "grad_norm": 0.5833460688591003, "learning_rate": 1.3273792788757366e-05, "loss": 0.4857, "step": 37201 }, { "epoch": 0.7889970520243473, "grad_norm": 0.3236943781375885, "learning_rate": 1.3273477669534813e-05, "loss": 0.4435, "step": 37202 }, { "epoch": 0.7890182604822803, "grad_norm": 0.41086193919181824, "learning_rate": 1.327316254667153e-05, "loss": 0.5569, "step": 37203 }, { "epoch": 0.7890394689402134, "grad_norm": 0.3222896158695221, "learning_rate": 1.327284742016786e-05, "loss": 0.4982, "step": 37204 }, { "epoch": 0.7890606773981463, "grad_norm": 0.3426424562931061, "learning_rate": 1.3272532290024159e-05, "loss": 0.4817, "step": 37205 }, { "epoch": 0.7890818858560794, "grad_norm": 0.3501019775867462, "learning_rate": 1.3272217156240776e-05, "loss": 0.4633, "step": 37206 }, { "epoch": 0.7891030943140124, "grad_norm": 0.35060998797416687, "learning_rate": 1.327190201881806e-05, "loss": 0.4662, "step": 37207 }, { "epoch": 0.7891243027719455, "grad_norm": 0.38558465242385864, "learning_rate": 1.3271586877756362e-05, "loss": 0.5215, "step": 37208 }, { "epoch": 0.7891455112298785, "grad_norm": 0.3813609480857849, "learning_rate": 1.3271271733056036e-05, "loss": 0.5199, "step": 37209 }, { "epoch": 0.7891667196878115, "grad_norm": 0.3448812663555145, "learning_rate": 1.3270956584717432e-05, "loss": 0.503, "step": 37210 }, { "epoch": 0.7891879281457446, "grad_norm": 0.38118892908096313, "learning_rate": 1.3270641432740895e-05, "loss": 0.4888, "step": 37211 }, { "epoch": 0.7892091366036775, "grad_norm": 0.37778258323669434, "learning_rate": 1.327032627712678e-05, "loss": 0.4489, "step": 37212 }, { "epoch": 0.7892303450616106, "grad_norm": 0.40871119499206543, "learning_rate": 1.3270011117875434e-05, "loss": 0.5125, "step": 37213 }, { "epoch": 0.7892515535195436, "grad_norm": 0.4059247076511383, "learning_rate": 1.3269695954987212e-05, "loss": 0.5913, "step": 37214 }, { "epoch": 0.7892727619774766, "grad_norm": 0.3196835517883301, "learning_rate": 1.3269380788462464e-05, "loss": 0.3992, "step": 37215 }, { "epoch": 0.7892939704354096, "grad_norm": 0.4112609028816223, "learning_rate": 1.3269065618301538e-05, "loss": 0.4706, "step": 37216 }, { "epoch": 0.7893151788933427, "grad_norm": 0.3334905207157135, "learning_rate": 1.3268750444504787e-05, "loss": 0.5207, "step": 37217 }, { "epoch": 0.7893363873512756, "grad_norm": 0.3910633623600006, "learning_rate": 1.3268435267072559e-05, "loss": 0.4549, "step": 37218 }, { "epoch": 0.7893575958092087, "grad_norm": 0.3921438753604889, "learning_rate": 1.3268120086005205e-05, "loss": 0.4867, "step": 37219 }, { "epoch": 0.7893788042671417, "grad_norm": 0.3386157751083374, "learning_rate": 1.3267804901303076e-05, "loss": 0.4143, "step": 37220 }, { "epoch": 0.7894000127250748, "grad_norm": 0.3801589012145996, "learning_rate": 1.3267489712966523e-05, "loss": 0.4796, "step": 37221 }, { "epoch": 0.7894212211830078, "grad_norm": 0.36128681898117065, "learning_rate": 1.3267174520995898e-05, "loss": 0.4756, "step": 37222 }, { "epoch": 0.7894424296409408, "grad_norm": 0.3692528009414673, "learning_rate": 1.3266859325391548e-05, "loss": 0.4322, "step": 37223 }, { "epoch": 0.7894636380988739, "grad_norm": 0.34528928995132446, "learning_rate": 1.3266544126153829e-05, "loss": 0.4435, "step": 37224 }, { "epoch": 0.7894848465568068, "grad_norm": 0.3369123637676239, "learning_rate": 1.3266228923283084e-05, "loss": 0.4767, "step": 37225 }, { "epoch": 0.7895060550147399, "grad_norm": 0.5242788195610046, "learning_rate": 1.3265913716779668e-05, "loss": 0.5517, "step": 37226 }, { "epoch": 0.7895272634726729, "grad_norm": 0.4381345212459564, "learning_rate": 1.3265598506643933e-05, "loss": 0.497, "step": 37227 }, { "epoch": 0.789548471930606, "grad_norm": 0.3540444076061249, "learning_rate": 1.3265283292876226e-05, "loss": 0.4653, "step": 37228 }, { "epoch": 0.7895696803885389, "grad_norm": 0.3684904873371124, "learning_rate": 1.32649680754769e-05, "loss": 0.4635, "step": 37229 }, { "epoch": 0.789590888846472, "grad_norm": 0.5280359387397766, "learning_rate": 1.3264652854446306e-05, "loss": 0.511, "step": 37230 }, { "epoch": 0.789612097304405, "grad_norm": 0.427018404006958, "learning_rate": 1.326433762978479e-05, "loss": 0.5479, "step": 37231 }, { "epoch": 0.789633305762338, "grad_norm": 0.5027798414230347, "learning_rate": 1.3264022401492711e-05, "loss": 0.4634, "step": 37232 }, { "epoch": 0.789654514220271, "grad_norm": 0.35258644819259644, "learning_rate": 1.326370716957041e-05, "loss": 0.4028, "step": 37233 }, { "epoch": 0.7896757226782041, "grad_norm": 0.346024751663208, "learning_rate": 1.3263391934018243e-05, "loss": 0.5519, "step": 37234 }, { "epoch": 0.789696931136137, "grad_norm": 0.3906082510948181, "learning_rate": 1.3263076694836562e-05, "loss": 0.4564, "step": 37235 }, { "epoch": 0.7897181395940701, "grad_norm": 0.4055075943470001, "learning_rate": 1.3262761452025715e-05, "loss": 0.5488, "step": 37236 }, { "epoch": 0.7897393480520032, "grad_norm": 0.3410720229148865, "learning_rate": 1.3262446205586053e-05, "loss": 0.4348, "step": 37237 }, { "epoch": 0.7897605565099362, "grad_norm": 0.34707021713256836, "learning_rate": 1.3262130955517927e-05, "loss": 0.4254, "step": 37238 }, { "epoch": 0.7897817649678692, "grad_norm": 0.39781445264816284, "learning_rate": 1.3261815701821683e-05, "loss": 0.5447, "step": 37239 }, { "epoch": 0.7898029734258022, "grad_norm": 0.36391502618789673, "learning_rate": 1.3261500444497681e-05, "loss": 0.4733, "step": 37240 }, { "epoch": 0.7898241818837353, "grad_norm": 0.42107415199279785, "learning_rate": 1.3261185183546266e-05, "loss": 0.5063, "step": 37241 }, { "epoch": 0.7898453903416682, "grad_norm": 0.35684868693351746, "learning_rate": 1.3260869918967787e-05, "loss": 0.4735, "step": 37242 }, { "epoch": 0.7898665987996013, "grad_norm": 0.4298490881919861, "learning_rate": 1.3260554650762598e-05, "loss": 0.5922, "step": 37243 }, { "epoch": 0.7898878072575343, "grad_norm": 0.4118455946445465, "learning_rate": 1.3260239378931048e-05, "loss": 0.5719, "step": 37244 }, { "epoch": 0.7899090157154673, "grad_norm": 0.4130342900753021, "learning_rate": 1.3259924103473488e-05, "loss": 0.4855, "step": 37245 }, { "epoch": 0.7899302241734003, "grad_norm": 0.36690908670425415, "learning_rate": 1.3259608824390269e-05, "loss": 0.4914, "step": 37246 }, { "epoch": 0.7899514326313334, "grad_norm": 0.37788811326026917, "learning_rate": 1.3259293541681743e-05, "loss": 0.4752, "step": 37247 }, { "epoch": 0.7899726410892663, "grad_norm": 0.3687320947647095, "learning_rate": 1.3258978255348256e-05, "loss": 0.4977, "step": 37248 }, { "epoch": 0.7899938495471994, "grad_norm": 0.3298614025115967, "learning_rate": 1.3258662965390165e-05, "loss": 0.468, "step": 37249 }, { "epoch": 0.7900150580051325, "grad_norm": 0.4016258418560028, "learning_rate": 1.3258347671807814e-05, "loss": 0.5789, "step": 37250 }, { "epoch": 0.7900362664630655, "grad_norm": 0.32964617013931274, "learning_rate": 1.3258032374601561e-05, "loss": 0.4264, "step": 37251 }, { "epoch": 0.7900574749209985, "grad_norm": 0.3606370985507965, "learning_rate": 1.3257717073771748e-05, "loss": 0.495, "step": 37252 }, { "epoch": 0.7900786833789315, "grad_norm": 0.36140838265419006, "learning_rate": 1.3257401769318732e-05, "loss": 0.4648, "step": 37253 }, { "epoch": 0.7900998918368646, "grad_norm": 0.3441483974456787, "learning_rate": 1.3257086461242863e-05, "loss": 0.4566, "step": 37254 }, { "epoch": 0.7901211002947975, "grad_norm": 0.3281177282333374, "learning_rate": 1.3256771149544494e-05, "loss": 0.4398, "step": 37255 }, { "epoch": 0.7901423087527306, "grad_norm": 0.8263603448867798, "learning_rate": 1.3256455834223967e-05, "loss": 0.5644, "step": 37256 }, { "epoch": 0.7901635172106636, "grad_norm": 0.4032524824142456, "learning_rate": 1.325614051528164e-05, "loss": 0.4997, "step": 37257 }, { "epoch": 0.7901847256685967, "grad_norm": 0.3798641264438629, "learning_rate": 1.3255825192717862e-05, "loss": 0.4852, "step": 37258 }, { "epoch": 0.7902059341265296, "grad_norm": 0.35682737827301025, "learning_rate": 1.325550986653298e-05, "loss": 0.4507, "step": 37259 }, { "epoch": 0.7902271425844627, "grad_norm": 0.35055574774742126, "learning_rate": 1.3255194536727353e-05, "loss": 0.4892, "step": 37260 }, { "epoch": 0.7902483510423957, "grad_norm": 0.3538915514945984, "learning_rate": 1.3254879203301328e-05, "loss": 0.4051, "step": 37261 }, { "epoch": 0.7902695595003287, "grad_norm": 0.38209760189056396, "learning_rate": 1.325456386625525e-05, "loss": 0.456, "step": 37262 }, { "epoch": 0.7902907679582618, "grad_norm": 0.4661884605884552, "learning_rate": 1.3254248525589479e-05, "loss": 0.5653, "step": 37263 }, { "epoch": 0.7903119764161948, "grad_norm": 0.3338101804256439, "learning_rate": 1.3253933181304357e-05, "loss": 0.4116, "step": 37264 }, { "epoch": 0.7903331848741278, "grad_norm": 0.37903255224227905, "learning_rate": 1.3253617833400238e-05, "loss": 0.5336, "step": 37265 }, { "epoch": 0.7903543933320608, "grad_norm": 0.3900778889656067, "learning_rate": 1.3253302481877479e-05, "loss": 0.5028, "step": 37266 }, { "epoch": 0.7903756017899939, "grad_norm": 0.34622013568878174, "learning_rate": 1.3252987126736422e-05, "loss": 0.5096, "step": 37267 }, { "epoch": 0.7903968102479269, "grad_norm": 0.4128398001194, "learning_rate": 1.3252671767977421e-05, "loss": 0.468, "step": 37268 }, { "epoch": 0.7904180187058599, "grad_norm": 0.4440547525882721, "learning_rate": 1.3252356405600826e-05, "loss": 0.5413, "step": 37269 }, { "epoch": 0.7904392271637929, "grad_norm": 0.33100786805152893, "learning_rate": 1.325204103960699e-05, "loss": 0.5195, "step": 37270 }, { "epoch": 0.790460435621726, "grad_norm": 0.33417361974716187, "learning_rate": 1.3251725669996258e-05, "loss": 0.3849, "step": 37271 }, { "epoch": 0.7904816440796589, "grad_norm": 0.47588998079299927, "learning_rate": 1.3251410296768992e-05, "loss": 0.3731, "step": 37272 }, { "epoch": 0.790502852537592, "grad_norm": 0.33956167101860046, "learning_rate": 1.3251094919925529e-05, "loss": 0.5074, "step": 37273 }, { "epoch": 0.790524060995525, "grad_norm": 0.3738135099411011, "learning_rate": 1.3250779539466232e-05, "loss": 0.4831, "step": 37274 }, { "epoch": 0.790545269453458, "grad_norm": 0.340243399143219, "learning_rate": 1.3250464155391445e-05, "loss": 0.5086, "step": 37275 }, { "epoch": 0.790566477911391, "grad_norm": 0.3739962577819824, "learning_rate": 1.3250148767701517e-05, "loss": 0.495, "step": 37276 }, { "epoch": 0.7905876863693241, "grad_norm": 0.32688581943511963, "learning_rate": 1.3249833376396803e-05, "loss": 0.4011, "step": 37277 }, { "epoch": 0.7906088948272572, "grad_norm": 0.38067787885665894, "learning_rate": 1.3249517981477653e-05, "loss": 0.4822, "step": 37278 }, { "epoch": 0.7906301032851901, "grad_norm": 0.37789109349250793, "learning_rate": 1.3249202582944416e-05, "loss": 0.4659, "step": 37279 }, { "epoch": 0.7906513117431232, "grad_norm": 0.39271023869514465, "learning_rate": 1.3248887180797447e-05, "loss": 0.4112, "step": 37280 }, { "epoch": 0.7906725202010562, "grad_norm": 0.37448111176490784, "learning_rate": 1.3248571775037093e-05, "loss": 0.5161, "step": 37281 }, { "epoch": 0.7906937286589892, "grad_norm": 0.3694884181022644, "learning_rate": 1.3248256365663706e-05, "loss": 0.5601, "step": 37282 }, { "epoch": 0.7907149371169222, "grad_norm": 0.33512550592422485, "learning_rate": 1.3247940952677636e-05, "loss": 0.4642, "step": 37283 }, { "epoch": 0.7907361455748553, "grad_norm": 0.3344220817089081, "learning_rate": 1.3247625536079231e-05, "loss": 0.5098, "step": 37284 }, { "epoch": 0.7907573540327882, "grad_norm": 0.34891754388809204, "learning_rate": 1.3247310115868847e-05, "loss": 0.4889, "step": 37285 }, { "epoch": 0.7907785624907213, "grad_norm": 0.35140910744667053, "learning_rate": 1.3246994692046837e-05, "loss": 0.4236, "step": 37286 }, { "epoch": 0.7907997709486543, "grad_norm": 0.37243983149528503, "learning_rate": 1.3246679264613542e-05, "loss": 0.4839, "step": 37287 }, { "epoch": 0.7908209794065874, "grad_norm": 0.35223978757858276, "learning_rate": 1.3246363833569324e-05, "loss": 0.4609, "step": 37288 }, { "epoch": 0.7908421878645203, "grad_norm": 0.35534948110580444, "learning_rate": 1.3246048398914527e-05, "loss": 0.4134, "step": 37289 }, { "epoch": 0.7908633963224534, "grad_norm": 0.46898016333580017, "learning_rate": 1.3245732960649498e-05, "loss": 0.4339, "step": 37290 }, { "epoch": 0.7908846047803865, "grad_norm": 0.3230934739112854, "learning_rate": 1.3245417518774597e-05, "loss": 0.4671, "step": 37291 }, { "epoch": 0.7909058132383194, "grad_norm": 0.35119518637657166, "learning_rate": 1.3245102073290173e-05, "loss": 0.4308, "step": 37292 }, { "epoch": 0.7909270216962525, "grad_norm": 0.3878597021102905, "learning_rate": 1.324478662419657e-05, "loss": 0.4997, "step": 37293 }, { "epoch": 0.7909482301541855, "grad_norm": 0.36669179797172546, "learning_rate": 1.3244471171494147e-05, "loss": 0.5681, "step": 37294 }, { "epoch": 0.7909694386121185, "grad_norm": 0.3575586676597595, "learning_rate": 1.324415571518325e-05, "loss": 0.4585, "step": 37295 }, { "epoch": 0.7909906470700515, "grad_norm": 0.3776986598968506, "learning_rate": 1.324384025526423e-05, "loss": 0.52, "step": 37296 }, { "epoch": 0.7910118555279846, "grad_norm": 0.42841294407844543, "learning_rate": 1.324352479173744e-05, "loss": 0.6146, "step": 37297 }, { "epoch": 0.7910330639859176, "grad_norm": 0.33683881163597107, "learning_rate": 1.324320932460323e-05, "loss": 0.485, "step": 37298 }, { "epoch": 0.7910542724438506, "grad_norm": 0.3637215495109558, "learning_rate": 1.3242893853861951e-05, "loss": 0.4485, "step": 37299 }, { "epoch": 0.7910754809017836, "grad_norm": 0.44083741307258606, "learning_rate": 1.3242578379513955e-05, "loss": 0.4789, "step": 37300 }, { "epoch": 0.7910966893597167, "grad_norm": 0.34866777062416077, "learning_rate": 1.3242262901559588e-05, "loss": 0.4621, "step": 37301 }, { "epoch": 0.7911178978176496, "grad_norm": 0.40093907713890076, "learning_rate": 1.3241947419999208e-05, "loss": 0.5486, "step": 37302 }, { "epoch": 0.7911391062755827, "grad_norm": 0.4707852602005005, "learning_rate": 1.324163193483316e-05, "loss": 0.4961, "step": 37303 }, { "epoch": 0.7911603147335158, "grad_norm": 0.4143620431423187, "learning_rate": 1.3241316446061796e-05, "loss": 0.5003, "step": 37304 }, { "epoch": 0.7911815231914487, "grad_norm": 0.42190787196159363, "learning_rate": 1.324100095368547e-05, "loss": 0.5072, "step": 37305 }, { "epoch": 0.7912027316493818, "grad_norm": 0.3520248830318451, "learning_rate": 1.3240685457704533e-05, "loss": 0.5038, "step": 37306 }, { "epoch": 0.7912239401073148, "grad_norm": 0.38661816716194153, "learning_rate": 1.324036995811933e-05, "loss": 0.5616, "step": 37307 }, { "epoch": 0.7912451485652479, "grad_norm": 0.35302436351776123, "learning_rate": 1.3240054454930218e-05, "loss": 0.5583, "step": 37308 }, { "epoch": 0.7912663570231808, "grad_norm": 0.39866065979003906, "learning_rate": 1.3239738948137544e-05, "loss": 0.5223, "step": 37309 }, { "epoch": 0.7912875654811139, "grad_norm": 0.3096366822719574, "learning_rate": 1.323942343774166e-05, "loss": 0.4773, "step": 37310 }, { "epoch": 0.7913087739390469, "grad_norm": 0.3243841528892517, "learning_rate": 1.323910792374292e-05, "loss": 0.5149, "step": 37311 }, { "epoch": 0.7913299823969799, "grad_norm": 0.3460746705532074, "learning_rate": 1.3238792406141669e-05, "loss": 0.4978, "step": 37312 }, { "epoch": 0.7913511908549129, "grad_norm": 0.35321855545043945, "learning_rate": 1.3238476884938264e-05, "loss": 0.4392, "step": 37313 }, { "epoch": 0.791372399312846, "grad_norm": 0.3051077723503113, "learning_rate": 1.3238161360133055e-05, "loss": 0.422, "step": 37314 }, { "epoch": 0.7913936077707789, "grad_norm": 0.3838498294353485, "learning_rate": 1.3237845831726386e-05, "loss": 0.4876, "step": 37315 }, { "epoch": 0.791414816228712, "grad_norm": 0.3693481683731079, "learning_rate": 1.3237530299718616e-05, "loss": 0.5502, "step": 37316 }, { "epoch": 0.791436024686645, "grad_norm": 0.41245096921920776, "learning_rate": 1.3237214764110093e-05, "loss": 0.5482, "step": 37317 }, { "epoch": 0.791457233144578, "grad_norm": 0.41426825523376465, "learning_rate": 1.3236899224901165e-05, "loss": 0.4626, "step": 37318 }, { "epoch": 0.7914784416025111, "grad_norm": 0.3622020184993744, "learning_rate": 1.323658368209219e-05, "loss": 0.5132, "step": 37319 }, { "epoch": 0.7914996500604441, "grad_norm": 0.40129542350769043, "learning_rate": 1.3236268135683514e-05, "loss": 0.525, "step": 37320 }, { "epoch": 0.7915208585183772, "grad_norm": 0.4041687846183777, "learning_rate": 1.3235952585675484e-05, "loss": 0.5234, "step": 37321 }, { "epoch": 0.7915420669763101, "grad_norm": 0.373712956905365, "learning_rate": 1.323563703206846e-05, "loss": 0.4415, "step": 37322 }, { "epoch": 0.7915632754342432, "grad_norm": 0.34926193952560425, "learning_rate": 1.3235321474862789e-05, "loss": 0.4915, "step": 37323 }, { "epoch": 0.7915844838921762, "grad_norm": 0.39258965849876404, "learning_rate": 1.3235005914058818e-05, "loss": 0.5095, "step": 37324 }, { "epoch": 0.7916056923501092, "grad_norm": 0.3506458103656769, "learning_rate": 1.3234690349656905e-05, "loss": 0.5094, "step": 37325 }, { "epoch": 0.7916269008080422, "grad_norm": 0.35171326994895935, "learning_rate": 1.3234374781657395e-05, "loss": 0.4229, "step": 37326 }, { "epoch": 0.7916481092659753, "grad_norm": 0.44384220242500305, "learning_rate": 1.3234059210060644e-05, "loss": 0.5538, "step": 37327 }, { "epoch": 0.7916693177239083, "grad_norm": 0.4128173291683197, "learning_rate": 1.3233743634867e-05, "loss": 0.4893, "step": 37328 }, { "epoch": 0.7916905261818413, "grad_norm": 0.40176376700401306, "learning_rate": 1.3233428056076814e-05, "loss": 0.556, "step": 37329 }, { "epoch": 0.7917117346397743, "grad_norm": 0.36854174733161926, "learning_rate": 1.3233112473690435e-05, "loss": 0.5503, "step": 37330 }, { "epoch": 0.7917329430977074, "grad_norm": 0.42523401975631714, "learning_rate": 1.3232796887708219e-05, "loss": 0.3882, "step": 37331 }, { "epoch": 0.7917541515556404, "grad_norm": 0.3762456476688385, "learning_rate": 1.3232481298130512e-05, "loss": 0.496, "step": 37332 }, { "epoch": 0.7917753600135734, "grad_norm": 0.3800816833972931, "learning_rate": 1.3232165704957671e-05, "loss": 0.5674, "step": 37333 }, { "epoch": 0.7917965684715065, "grad_norm": 0.356137216091156, "learning_rate": 1.323185010819004e-05, "loss": 0.4456, "step": 37334 }, { "epoch": 0.7918177769294394, "grad_norm": 0.37101224064826965, "learning_rate": 1.3231534507827975e-05, "loss": 0.4972, "step": 37335 }, { "epoch": 0.7918389853873725, "grad_norm": 0.3255996108055115, "learning_rate": 1.3231218903871825e-05, "loss": 0.4898, "step": 37336 }, { "epoch": 0.7918601938453055, "grad_norm": 0.3194688558578491, "learning_rate": 1.3230903296321943e-05, "loss": 0.4471, "step": 37337 }, { "epoch": 0.7918814023032386, "grad_norm": 0.4064897298812866, "learning_rate": 1.3230587685178676e-05, "loss": 0.5014, "step": 37338 }, { "epoch": 0.7919026107611715, "grad_norm": 0.35162967443466187, "learning_rate": 1.3230272070442378e-05, "loss": 0.4737, "step": 37339 }, { "epoch": 0.7919238192191046, "grad_norm": 0.32956868410110474, "learning_rate": 1.3229956452113399e-05, "loss": 0.473, "step": 37340 }, { "epoch": 0.7919450276770376, "grad_norm": 0.3469986617565155, "learning_rate": 1.3229640830192091e-05, "loss": 0.4491, "step": 37341 }, { "epoch": 0.7919662361349706, "grad_norm": 0.32808107137680054, "learning_rate": 1.3229325204678807e-05, "loss": 0.4777, "step": 37342 }, { "epoch": 0.7919874445929036, "grad_norm": 0.3386330008506775, "learning_rate": 1.3229009575573895e-05, "loss": 0.4964, "step": 37343 }, { "epoch": 0.7920086530508367, "grad_norm": 0.3512415587902069, "learning_rate": 1.3228693942877703e-05, "loss": 0.4684, "step": 37344 }, { "epoch": 0.7920298615087698, "grad_norm": 0.43907099962234497, "learning_rate": 1.3228378306590589e-05, "loss": 0.4621, "step": 37345 }, { "epoch": 0.7920510699667027, "grad_norm": 0.3814152777194977, "learning_rate": 1.3228062666712898e-05, "loss": 0.45, "step": 37346 }, { "epoch": 0.7920722784246358, "grad_norm": 0.40418145060539246, "learning_rate": 1.3227747023244985e-05, "loss": 0.4697, "step": 37347 }, { "epoch": 0.7920934868825688, "grad_norm": 0.3876458704471588, "learning_rate": 1.3227431376187201e-05, "loss": 0.5388, "step": 37348 }, { "epoch": 0.7921146953405018, "grad_norm": 0.3237810432910919, "learning_rate": 1.3227115725539891e-05, "loss": 0.4654, "step": 37349 }, { "epoch": 0.7921359037984348, "grad_norm": 0.34090644121170044, "learning_rate": 1.3226800071303418e-05, "loss": 0.5251, "step": 37350 }, { "epoch": 0.7921571122563679, "grad_norm": 0.3865273594856262, "learning_rate": 1.3226484413478124e-05, "loss": 0.5618, "step": 37351 }, { "epoch": 0.7921783207143008, "grad_norm": 0.591898500919342, "learning_rate": 1.3226168752064358e-05, "loss": 0.4984, "step": 37352 }, { "epoch": 0.7921995291722339, "grad_norm": 0.5190033912658691, "learning_rate": 1.3225853087062481e-05, "loss": 0.4875, "step": 37353 }, { "epoch": 0.7922207376301669, "grad_norm": 0.34751832485198975, "learning_rate": 1.3225537418472833e-05, "loss": 0.52, "step": 37354 }, { "epoch": 0.7922419460881, "grad_norm": 0.33401137590408325, "learning_rate": 1.3225221746295774e-05, "loss": 0.4831, "step": 37355 }, { "epoch": 0.7922631545460329, "grad_norm": 0.36112192273139954, "learning_rate": 1.3224906070531648e-05, "loss": 0.5, "step": 37356 }, { "epoch": 0.792284363003966, "grad_norm": 0.33483824133872986, "learning_rate": 1.3224590391180814e-05, "loss": 0.4603, "step": 37357 }, { "epoch": 0.7923055714618991, "grad_norm": 0.3214891254901886, "learning_rate": 1.3224274708243614e-05, "loss": 0.4812, "step": 37358 }, { "epoch": 0.792326779919832, "grad_norm": 0.3346565067768097, "learning_rate": 1.3223959021720407e-05, "loss": 0.5022, "step": 37359 }, { "epoch": 0.7923479883777651, "grad_norm": 0.3390060365200043, "learning_rate": 1.3223643331611538e-05, "loss": 0.4556, "step": 37360 }, { "epoch": 0.7923691968356981, "grad_norm": 0.44441089034080505, "learning_rate": 1.3223327637917363e-05, "loss": 0.553, "step": 37361 }, { "epoch": 0.7923904052936311, "grad_norm": 0.34488338232040405, "learning_rate": 1.322301194063823e-05, "loss": 0.4374, "step": 37362 }, { "epoch": 0.7924116137515641, "grad_norm": 0.518606960773468, "learning_rate": 1.322269623977449e-05, "loss": 0.5031, "step": 37363 }, { "epoch": 0.7924328222094972, "grad_norm": 0.34505605697631836, "learning_rate": 1.32223805353265e-05, "loss": 0.4742, "step": 37364 }, { "epoch": 0.7924540306674301, "grad_norm": 0.35227227210998535, "learning_rate": 1.3222064827294603e-05, "loss": 0.4575, "step": 37365 }, { "epoch": 0.7924752391253632, "grad_norm": 0.33830389380455017, "learning_rate": 1.3221749115679152e-05, "loss": 0.4902, "step": 37366 }, { "epoch": 0.7924964475832962, "grad_norm": 0.35408276319503784, "learning_rate": 1.3221433400480502e-05, "loss": 0.5078, "step": 37367 }, { "epoch": 0.7925176560412293, "grad_norm": 0.3948865532875061, "learning_rate": 1.3221117681699e-05, "loss": 0.4993, "step": 37368 }, { "epoch": 0.7925388644991622, "grad_norm": 0.32255807518959045, "learning_rate": 1.3220801959335e-05, "loss": 0.4541, "step": 37369 }, { "epoch": 0.7925600729570953, "grad_norm": 0.4427562952041626, "learning_rate": 1.3220486233388851e-05, "loss": 0.3855, "step": 37370 }, { "epoch": 0.7925812814150283, "grad_norm": 0.3782566785812378, "learning_rate": 1.3220170503860907e-05, "loss": 0.5628, "step": 37371 }, { "epoch": 0.7926024898729613, "grad_norm": 0.3716428577899933, "learning_rate": 1.3219854770751518e-05, "loss": 0.5469, "step": 37372 }, { "epoch": 0.7926236983308944, "grad_norm": 0.2971125543117523, "learning_rate": 1.3219539034061032e-05, "loss": 0.4266, "step": 37373 }, { "epoch": 0.7926449067888274, "grad_norm": 0.3677466809749603, "learning_rate": 1.3219223293789802e-05, "loss": 0.4274, "step": 37374 }, { "epoch": 0.7926661152467604, "grad_norm": 0.4887990653514862, "learning_rate": 1.321890754993818e-05, "loss": 0.5481, "step": 37375 }, { "epoch": 0.7926873237046934, "grad_norm": 0.5012055039405823, "learning_rate": 1.3218591802506522e-05, "loss": 0.4367, "step": 37376 }, { "epoch": 0.7927085321626265, "grad_norm": 0.3403097689151764, "learning_rate": 1.3218276051495168e-05, "loss": 0.4227, "step": 37377 }, { "epoch": 0.7927297406205595, "grad_norm": 0.3336412310600281, "learning_rate": 1.3217960296904479e-05, "loss": 0.5256, "step": 37378 }, { "epoch": 0.7927509490784925, "grad_norm": 0.6257212162017822, "learning_rate": 1.3217644538734802e-05, "loss": 0.484, "step": 37379 }, { "epoch": 0.7927721575364255, "grad_norm": 0.41783249378204346, "learning_rate": 1.3217328776986485e-05, "loss": 0.438, "step": 37380 }, { "epoch": 0.7927933659943586, "grad_norm": 0.3996695280075073, "learning_rate": 1.3217013011659887e-05, "loss": 0.4781, "step": 37381 }, { "epoch": 0.7928145744522915, "grad_norm": 0.34461989998817444, "learning_rate": 1.3216697242755357e-05, "loss": 0.4921, "step": 37382 }, { "epoch": 0.7928357829102246, "grad_norm": 0.3679133951663971, "learning_rate": 1.3216381470273239e-05, "loss": 0.5758, "step": 37383 }, { "epoch": 0.7928569913681576, "grad_norm": 0.40324723720550537, "learning_rate": 1.3216065694213893e-05, "loss": 0.421, "step": 37384 }, { "epoch": 0.7928781998260906, "grad_norm": 0.3338370621204376, "learning_rate": 1.3215749914577667e-05, "loss": 0.4469, "step": 37385 }, { "epoch": 0.7928994082840237, "grad_norm": 0.4204292893409729, "learning_rate": 1.3215434131364909e-05, "loss": 0.5108, "step": 37386 }, { "epoch": 0.7929206167419567, "grad_norm": 0.3410796821117401, "learning_rate": 1.3215118344575972e-05, "loss": 0.4742, "step": 37387 }, { "epoch": 0.7929418251998898, "grad_norm": 0.46485838294029236, "learning_rate": 1.3214802554211213e-05, "loss": 0.4884, "step": 37388 }, { "epoch": 0.7929630336578227, "grad_norm": 0.4166203439235687, "learning_rate": 1.3214486760270975e-05, "loss": 0.4784, "step": 37389 }, { "epoch": 0.7929842421157558, "grad_norm": 0.3670971393585205, "learning_rate": 1.3214170962755614e-05, "loss": 0.5352, "step": 37390 }, { "epoch": 0.7930054505736888, "grad_norm": 0.3815860152244568, "learning_rate": 1.3213855161665478e-05, "loss": 0.5151, "step": 37391 }, { "epoch": 0.7930266590316218, "grad_norm": 0.3937769830226898, "learning_rate": 1.3213539357000924e-05, "loss": 0.4446, "step": 37392 }, { "epoch": 0.7930478674895548, "grad_norm": 0.3874635696411133, "learning_rate": 1.3213223548762298e-05, "loss": 0.5627, "step": 37393 }, { "epoch": 0.7930690759474879, "grad_norm": 0.3672753572463989, "learning_rate": 1.321290773694995e-05, "loss": 0.5021, "step": 37394 }, { "epoch": 0.7930902844054208, "grad_norm": 0.3507483899593353, "learning_rate": 1.3212591921564237e-05, "loss": 0.5154, "step": 37395 }, { "epoch": 0.7931114928633539, "grad_norm": 0.36768069863319397, "learning_rate": 1.3212276102605509e-05, "loss": 0.5367, "step": 37396 }, { "epoch": 0.7931327013212869, "grad_norm": 0.3128311038017273, "learning_rate": 1.321196028007411e-05, "loss": 0.4602, "step": 37397 }, { "epoch": 0.79315390977922, "grad_norm": 0.3599722981452942, "learning_rate": 1.32116444539704e-05, "loss": 0.5997, "step": 37398 }, { "epoch": 0.793175118237153, "grad_norm": 0.35364148020744324, "learning_rate": 1.3211328624294726e-05, "loss": 0.453, "step": 37399 }, { "epoch": 0.793196326695086, "grad_norm": 0.4753817319869995, "learning_rate": 1.321101279104744e-05, "loss": 0.4997, "step": 37400 }, { "epoch": 0.7932175351530191, "grad_norm": 0.3284596800804138, "learning_rate": 1.3210696954228895e-05, "loss": 0.5089, "step": 37401 }, { "epoch": 0.793238743610952, "grad_norm": 0.60880446434021, "learning_rate": 1.321038111383944e-05, "loss": 0.5519, "step": 37402 }, { "epoch": 0.7932599520688851, "grad_norm": 0.44652634859085083, "learning_rate": 1.3210065269879427e-05, "loss": 0.4492, "step": 37403 }, { "epoch": 0.7932811605268181, "grad_norm": 0.36169400811195374, "learning_rate": 1.3209749422349207e-05, "loss": 0.527, "step": 37404 }, { "epoch": 0.7933023689847511, "grad_norm": 0.8959938883781433, "learning_rate": 1.3209433571249131e-05, "loss": 0.4895, "step": 37405 }, { "epoch": 0.7933235774426841, "grad_norm": 0.3527081608772278, "learning_rate": 1.3209117716579552e-05, "loss": 0.4886, "step": 37406 }, { "epoch": 0.7933447859006172, "grad_norm": 0.44761738181114197, "learning_rate": 1.320880185834082e-05, "loss": 0.4679, "step": 37407 }, { "epoch": 0.7933659943585502, "grad_norm": 0.36237847805023193, "learning_rate": 1.3208485996533285e-05, "loss": 0.5737, "step": 37408 }, { "epoch": 0.7933872028164832, "grad_norm": 0.39188462495803833, "learning_rate": 1.3208170131157304e-05, "loss": 0.518, "step": 37409 }, { "epoch": 0.7934084112744162, "grad_norm": 0.36910900473594666, "learning_rate": 1.320785426221322e-05, "loss": 0.4985, "step": 37410 }, { "epoch": 0.7934296197323493, "grad_norm": 0.3359670341014862, "learning_rate": 1.320753838970139e-05, "loss": 0.4466, "step": 37411 }, { "epoch": 0.7934508281902822, "grad_norm": 0.40575289726257324, "learning_rate": 1.320722251362216e-05, "loss": 0.4977, "step": 37412 }, { "epoch": 0.7934720366482153, "grad_norm": 0.3562758266925812, "learning_rate": 1.3206906633975889e-05, "loss": 0.4702, "step": 37413 }, { "epoch": 0.7934932451061484, "grad_norm": 0.39138680696487427, "learning_rate": 1.3206590750762922e-05, "loss": 0.4897, "step": 37414 }, { "epoch": 0.7935144535640813, "grad_norm": 0.3568960130214691, "learning_rate": 1.3206274863983615e-05, "loss": 0.5264, "step": 37415 }, { "epoch": 0.7935356620220144, "grad_norm": 0.34640273451805115, "learning_rate": 1.3205958973638317e-05, "loss": 0.4556, "step": 37416 }, { "epoch": 0.7935568704799474, "grad_norm": 0.3774208426475525, "learning_rate": 1.3205643079727376e-05, "loss": 0.4675, "step": 37417 }, { "epoch": 0.7935780789378805, "grad_norm": 0.3289423882961273, "learning_rate": 1.320532718225115e-05, "loss": 0.5017, "step": 37418 }, { "epoch": 0.7935992873958134, "grad_norm": 0.37549132108688354, "learning_rate": 1.3205011281209985e-05, "loss": 0.4889, "step": 37419 }, { "epoch": 0.7936204958537465, "grad_norm": 0.3256600499153137, "learning_rate": 1.3204695376604235e-05, "loss": 0.5426, "step": 37420 }, { "epoch": 0.7936417043116795, "grad_norm": 0.35860997438430786, "learning_rate": 1.3204379468434252e-05, "loss": 0.5093, "step": 37421 }, { "epoch": 0.7936629127696125, "grad_norm": 0.39824599027633667, "learning_rate": 1.3204063556700382e-05, "loss": 0.419, "step": 37422 }, { "epoch": 0.7936841212275455, "grad_norm": 0.43033018708229065, "learning_rate": 1.3203747641402984e-05, "loss": 0.5697, "step": 37423 }, { "epoch": 0.7937053296854786, "grad_norm": 0.46585938334465027, "learning_rate": 1.3203431722542405e-05, "loss": 0.5026, "step": 37424 }, { "epoch": 0.7937265381434115, "grad_norm": 0.3655299246311188, "learning_rate": 1.3203115800118998e-05, "loss": 0.4585, "step": 37425 }, { "epoch": 0.7937477466013446, "grad_norm": 0.3567817807197571, "learning_rate": 1.320279987413311e-05, "loss": 0.465, "step": 37426 }, { "epoch": 0.7937689550592777, "grad_norm": 0.38089293241500854, "learning_rate": 1.3202483944585098e-05, "loss": 0.4886, "step": 37427 }, { "epoch": 0.7937901635172107, "grad_norm": 0.3577229678630829, "learning_rate": 1.3202168011475312e-05, "loss": 0.4729, "step": 37428 }, { "epoch": 0.7938113719751437, "grad_norm": 0.3958054780960083, "learning_rate": 1.3201852074804102e-05, "loss": 0.4666, "step": 37429 }, { "epoch": 0.7938325804330767, "grad_norm": 0.42447271943092346, "learning_rate": 1.320153613457182e-05, "loss": 0.5632, "step": 37430 }, { "epoch": 0.7938537888910098, "grad_norm": 0.4523901045322418, "learning_rate": 1.3201220190778814e-05, "loss": 0.489, "step": 37431 }, { "epoch": 0.7938749973489427, "grad_norm": 0.3919692635536194, "learning_rate": 1.3200904243425443e-05, "loss": 0.5124, "step": 37432 }, { "epoch": 0.7938962058068758, "grad_norm": 0.36145883798599243, "learning_rate": 1.3200588292512055e-05, "loss": 0.5009, "step": 37433 }, { "epoch": 0.7939174142648088, "grad_norm": 0.3649985194206238, "learning_rate": 1.3200272338038996e-05, "loss": 0.5523, "step": 37434 }, { "epoch": 0.7939386227227418, "grad_norm": 0.361256867647171, "learning_rate": 1.3199956380006626e-05, "loss": 0.4958, "step": 37435 }, { "epoch": 0.7939598311806748, "grad_norm": 0.32550716400146484, "learning_rate": 1.319964041841529e-05, "loss": 0.4726, "step": 37436 }, { "epoch": 0.7939810396386079, "grad_norm": 0.36540600657463074, "learning_rate": 1.3199324453265343e-05, "loss": 0.4853, "step": 37437 }, { "epoch": 0.7940022480965409, "grad_norm": 0.4225311875343323, "learning_rate": 1.3199008484557133e-05, "loss": 0.5411, "step": 37438 }, { "epoch": 0.7940234565544739, "grad_norm": 0.31812649965286255, "learning_rate": 1.3198692512291016e-05, "loss": 0.499, "step": 37439 }, { "epoch": 0.794044665012407, "grad_norm": 0.40903908014297485, "learning_rate": 1.319837653646734e-05, "loss": 0.4788, "step": 37440 }, { "epoch": 0.79406587347034, "grad_norm": 0.38625749945640564, "learning_rate": 1.3198060557086457e-05, "loss": 0.4198, "step": 37441 }, { "epoch": 0.794087081928273, "grad_norm": 0.3416106700897217, "learning_rate": 1.319774457414872e-05, "loss": 0.4243, "step": 37442 }, { "epoch": 0.794108290386206, "grad_norm": 0.3507673144340515, "learning_rate": 1.3197428587654477e-05, "loss": 0.5249, "step": 37443 }, { "epoch": 0.7941294988441391, "grad_norm": 0.4425047039985657, "learning_rate": 1.3197112597604083e-05, "loss": 0.5054, "step": 37444 }, { "epoch": 0.794150707302072, "grad_norm": 0.3411465585231781, "learning_rate": 1.3196796603997886e-05, "loss": 0.5188, "step": 37445 }, { "epoch": 0.7941719157600051, "grad_norm": 0.4535848796367645, "learning_rate": 1.3196480606836243e-05, "loss": 0.4745, "step": 37446 }, { "epoch": 0.7941931242179381, "grad_norm": 0.3606967031955719, "learning_rate": 1.3196164606119503e-05, "loss": 0.4832, "step": 37447 }, { "epoch": 0.7942143326758712, "grad_norm": 0.3681331276893616, "learning_rate": 1.3195848601848012e-05, "loss": 0.4809, "step": 37448 }, { "epoch": 0.7942355411338041, "grad_norm": 0.45408135652542114, "learning_rate": 1.319553259402213e-05, "loss": 0.4932, "step": 37449 }, { "epoch": 0.7942567495917372, "grad_norm": 0.3398692309856415, "learning_rate": 1.3195216582642201e-05, "loss": 0.481, "step": 37450 }, { "epoch": 0.7942779580496702, "grad_norm": 0.4060521423816681, "learning_rate": 1.3194900567708583e-05, "loss": 0.5322, "step": 37451 }, { "epoch": 0.7942991665076032, "grad_norm": 0.3222121000289917, "learning_rate": 1.3194584549221623e-05, "loss": 0.4996, "step": 37452 }, { "epoch": 0.7943203749655362, "grad_norm": 0.35317331552505493, "learning_rate": 1.3194268527181672e-05, "loss": 0.5038, "step": 37453 }, { "epoch": 0.7943415834234693, "grad_norm": 0.5676889419555664, "learning_rate": 1.3193952501589087e-05, "loss": 0.5235, "step": 37454 }, { "epoch": 0.7943627918814024, "grad_norm": 0.44523581862449646, "learning_rate": 1.3193636472444214e-05, "loss": 0.4232, "step": 37455 }, { "epoch": 0.7943840003393353, "grad_norm": 0.37644413113594055, "learning_rate": 1.3193320439747406e-05, "loss": 0.4653, "step": 37456 }, { "epoch": 0.7944052087972684, "grad_norm": 0.3252218961715698, "learning_rate": 1.3193004403499013e-05, "loss": 0.454, "step": 37457 }, { "epoch": 0.7944264172552014, "grad_norm": 0.39364132285118103, "learning_rate": 1.319268836369939e-05, "loss": 0.5181, "step": 37458 }, { "epoch": 0.7944476257131344, "grad_norm": 0.3764578700065613, "learning_rate": 1.3192372320348885e-05, "loss": 0.4345, "step": 37459 }, { "epoch": 0.7944688341710674, "grad_norm": 0.33083418011665344, "learning_rate": 1.3192056273447855e-05, "loss": 0.4876, "step": 37460 }, { "epoch": 0.7944900426290005, "grad_norm": 0.33728548884391785, "learning_rate": 1.3191740222996647e-05, "loss": 0.4826, "step": 37461 }, { "epoch": 0.7945112510869334, "grad_norm": 0.37035486102104187, "learning_rate": 1.319142416899561e-05, "loss": 0.4671, "step": 37462 }, { "epoch": 0.7945324595448665, "grad_norm": 0.3548295199871063, "learning_rate": 1.3191108111445104e-05, "loss": 0.5207, "step": 37463 }, { "epoch": 0.7945536680027995, "grad_norm": 0.33639463782310486, "learning_rate": 1.319079205034547e-05, "loss": 0.4961, "step": 37464 }, { "epoch": 0.7945748764607325, "grad_norm": 0.33560773730278015, "learning_rate": 1.3190475985697067e-05, "loss": 0.497, "step": 37465 }, { "epoch": 0.7945960849186655, "grad_norm": 0.34696754813194275, "learning_rate": 1.3190159917500245e-05, "loss": 0.506, "step": 37466 }, { "epoch": 0.7946172933765986, "grad_norm": 0.3450857996940613, "learning_rate": 1.3189843845755355e-05, "loss": 0.4306, "step": 37467 }, { "epoch": 0.7946385018345317, "grad_norm": 0.3278609812259674, "learning_rate": 1.3189527770462745e-05, "loss": 0.464, "step": 37468 }, { "epoch": 0.7946597102924646, "grad_norm": 0.31798484921455383, "learning_rate": 1.3189211691622773e-05, "loss": 0.4652, "step": 37469 }, { "epoch": 0.7946809187503977, "grad_norm": 0.3255375921726227, "learning_rate": 1.3188895609235787e-05, "loss": 0.4534, "step": 37470 }, { "epoch": 0.7947021272083307, "grad_norm": 0.3584042489528656, "learning_rate": 1.3188579523302139e-05, "loss": 0.552, "step": 37471 }, { "epoch": 0.7947233356662637, "grad_norm": 0.4948504567146301, "learning_rate": 1.3188263433822179e-05, "loss": 0.4584, "step": 37472 }, { "epoch": 0.7947445441241967, "grad_norm": 0.4032096862792969, "learning_rate": 1.318794734079626e-05, "loss": 0.4729, "step": 37473 }, { "epoch": 0.7947657525821298, "grad_norm": 0.370165079832077, "learning_rate": 1.3187631244224739e-05, "loss": 0.497, "step": 37474 }, { "epoch": 0.7947869610400627, "grad_norm": 0.3255585730075836, "learning_rate": 1.318731514410796e-05, "loss": 0.4071, "step": 37475 }, { "epoch": 0.7948081694979958, "grad_norm": 0.6942317485809326, "learning_rate": 1.3186999040446269e-05, "loss": 0.5615, "step": 37476 }, { "epoch": 0.7948293779559288, "grad_norm": 0.36417147517204285, "learning_rate": 1.3186682933240033e-05, "loss": 0.4481, "step": 37477 }, { "epoch": 0.7948505864138619, "grad_norm": 0.3311498761177063, "learning_rate": 1.3186366822489595e-05, "loss": 0.5363, "step": 37478 }, { "epoch": 0.7948717948717948, "grad_norm": 0.4674607217311859, "learning_rate": 1.3186050708195306e-05, "loss": 0.5174, "step": 37479 }, { "epoch": 0.7948930033297279, "grad_norm": 0.348354309797287, "learning_rate": 1.3185734590357522e-05, "loss": 0.4804, "step": 37480 }, { "epoch": 0.794914211787661, "grad_norm": 0.4531162977218628, "learning_rate": 1.318541846897659e-05, "loss": 0.4843, "step": 37481 }, { "epoch": 0.7949354202455939, "grad_norm": 0.4466601014137268, "learning_rate": 1.3185102344052863e-05, "loss": 0.4697, "step": 37482 }, { "epoch": 0.794956628703527, "grad_norm": 0.36112305521965027, "learning_rate": 1.3184786215586691e-05, "loss": 0.502, "step": 37483 }, { "epoch": 0.79497783716146, "grad_norm": 0.3388241231441498, "learning_rate": 1.3184470083578432e-05, "loss": 0.4318, "step": 37484 }, { "epoch": 0.794999045619393, "grad_norm": 0.3918542265892029, "learning_rate": 1.3184153948028429e-05, "loss": 0.4748, "step": 37485 }, { "epoch": 0.795020254077326, "grad_norm": 0.3576587736606598, "learning_rate": 1.318383780893704e-05, "loss": 0.5027, "step": 37486 }, { "epoch": 0.7950414625352591, "grad_norm": 0.6501715183258057, "learning_rate": 1.3183521666304611e-05, "loss": 0.5396, "step": 37487 }, { "epoch": 0.795062670993192, "grad_norm": 0.3479434847831726, "learning_rate": 1.31832055201315e-05, "loss": 0.573, "step": 37488 }, { "epoch": 0.7950838794511251, "grad_norm": 0.36029812693595886, "learning_rate": 1.3182889370418052e-05, "loss": 0.5207, "step": 37489 }, { "epoch": 0.7951050879090581, "grad_norm": 0.35085076093673706, "learning_rate": 1.3182573217164625e-05, "loss": 0.4602, "step": 37490 }, { "epoch": 0.7951262963669912, "grad_norm": 0.35413989424705505, "learning_rate": 1.3182257060371568e-05, "loss": 0.4433, "step": 37491 }, { "epoch": 0.7951475048249241, "grad_norm": 0.37666139006614685, "learning_rate": 1.3181940900039232e-05, "loss": 0.5307, "step": 37492 }, { "epoch": 0.7951687132828572, "grad_norm": 0.35522976517677307, "learning_rate": 1.3181624736167968e-05, "loss": 0.4373, "step": 37493 }, { "epoch": 0.7951899217407902, "grad_norm": 0.37355145812034607, "learning_rate": 1.3181308568758132e-05, "loss": 0.5146, "step": 37494 }, { "epoch": 0.7952111301987232, "grad_norm": 0.3859359323978424, "learning_rate": 1.3180992397810068e-05, "loss": 0.5055, "step": 37495 }, { "epoch": 0.7952323386566563, "grad_norm": 0.3598953187465668, "learning_rate": 1.3180676223324131e-05, "loss": 0.5576, "step": 37496 }, { "epoch": 0.7952535471145893, "grad_norm": 0.30946046113967896, "learning_rate": 1.3180360045300678e-05, "loss": 0.4425, "step": 37497 }, { "epoch": 0.7952747555725224, "grad_norm": 0.37425297498703003, "learning_rate": 1.3180043863740056e-05, "loss": 0.4996, "step": 37498 }, { "epoch": 0.7952959640304553, "grad_norm": 0.3406672477722168, "learning_rate": 1.3179727678642612e-05, "loss": 0.5203, "step": 37499 }, { "epoch": 0.7953171724883884, "grad_norm": 0.3315069377422333, "learning_rate": 1.3179411490008707e-05, "loss": 0.5044, "step": 37500 }, { "epoch": 0.7953383809463214, "grad_norm": 0.35427188873291016, "learning_rate": 1.3179095297838684e-05, "loss": 0.4649, "step": 37501 }, { "epoch": 0.7953595894042544, "grad_norm": 0.3805358111858368, "learning_rate": 1.3178779102132901e-05, "loss": 0.4947, "step": 37502 }, { "epoch": 0.7953807978621874, "grad_norm": 0.3329623341560364, "learning_rate": 1.317846290289171e-05, "loss": 0.5115, "step": 37503 }, { "epoch": 0.7954020063201205, "grad_norm": 0.37657541036605835, "learning_rate": 1.3178146700115454e-05, "loss": 0.5668, "step": 37504 }, { "epoch": 0.7954232147780534, "grad_norm": 0.3525729477405548, "learning_rate": 1.3177830493804497e-05, "loss": 0.4824, "step": 37505 }, { "epoch": 0.7954444232359865, "grad_norm": 0.3749791085720062, "learning_rate": 1.3177514283959183e-05, "loss": 0.4353, "step": 37506 }, { "epoch": 0.7954656316939195, "grad_norm": 0.36557838320732117, "learning_rate": 1.3177198070579863e-05, "loss": 0.513, "step": 37507 }, { "epoch": 0.7954868401518526, "grad_norm": 0.3972680866718292, "learning_rate": 1.3176881853666891e-05, "loss": 0.5312, "step": 37508 }, { "epoch": 0.7955080486097856, "grad_norm": 0.3251816928386688, "learning_rate": 1.3176565633220624e-05, "loss": 0.455, "step": 37509 }, { "epoch": 0.7955292570677186, "grad_norm": 0.37144699692726135, "learning_rate": 1.3176249409241404e-05, "loss": 0.4649, "step": 37510 }, { "epoch": 0.7955504655256517, "grad_norm": 0.36510172486305237, "learning_rate": 1.3175933181729588e-05, "loss": 0.6059, "step": 37511 }, { "epoch": 0.7955716739835846, "grad_norm": 0.3633107841014862, "learning_rate": 1.3175616950685526e-05, "loss": 0.5522, "step": 37512 }, { "epoch": 0.7955928824415177, "grad_norm": 0.37605398893356323, "learning_rate": 1.3175300716109569e-05, "loss": 0.5357, "step": 37513 }, { "epoch": 0.7956140908994507, "grad_norm": 0.37000203132629395, "learning_rate": 1.3174984478002073e-05, "loss": 0.466, "step": 37514 }, { "epoch": 0.7956352993573838, "grad_norm": 0.33747732639312744, "learning_rate": 1.3174668236363384e-05, "loss": 0.4526, "step": 37515 }, { "epoch": 0.7956565078153167, "grad_norm": 0.38897624611854553, "learning_rate": 1.3174351991193858e-05, "loss": 0.5845, "step": 37516 }, { "epoch": 0.7956777162732498, "grad_norm": 0.38422536849975586, "learning_rate": 1.3174035742493847e-05, "loss": 0.3762, "step": 37517 }, { "epoch": 0.7956989247311828, "grad_norm": 0.45159435272216797, "learning_rate": 1.3173719490263697e-05, "loss": 0.4079, "step": 37518 }, { "epoch": 0.7957201331891158, "grad_norm": 0.3650556206703186, "learning_rate": 1.3173403234503768e-05, "loss": 0.5711, "step": 37519 }, { "epoch": 0.7957413416470488, "grad_norm": 0.3591436445713043, "learning_rate": 1.3173086975214407e-05, "loss": 0.4858, "step": 37520 }, { "epoch": 0.7957625501049819, "grad_norm": 0.36372727155685425, "learning_rate": 1.3172770712395964e-05, "loss": 0.4624, "step": 37521 }, { "epoch": 0.795783758562915, "grad_norm": 0.3767372667789459, "learning_rate": 1.3172454446048792e-05, "loss": 0.5534, "step": 37522 }, { "epoch": 0.7958049670208479, "grad_norm": 0.3614508807659149, "learning_rate": 1.3172138176173249e-05, "loss": 0.4779, "step": 37523 }, { "epoch": 0.795826175478781, "grad_norm": 0.3381332457065582, "learning_rate": 1.3171821902769674e-05, "loss": 0.4822, "step": 37524 }, { "epoch": 0.795847383936714, "grad_norm": 0.39325612783432007, "learning_rate": 1.3171505625838433e-05, "loss": 0.5456, "step": 37525 }, { "epoch": 0.795868592394647, "grad_norm": 0.3160625696182251, "learning_rate": 1.317118934537987e-05, "loss": 0.4312, "step": 37526 }, { "epoch": 0.79588980085258, "grad_norm": 0.4036553204059601, "learning_rate": 1.3170873061394334e-05, "loss": 0.5478, "step": 37527 }, { "epoch": 0.7959110093105131, "grad_norm": 0.4748482406139374, "learning_rate": 1.3170556773882181e-05, "loss": 0.478, "step": 37528 }, { "epoch": 0.795932217768446, "grad_norm": 0.40469610691070557, "learning_rate": 1.3170240482843767e-05, "loss": 0.4631, "step": 37529 }, { "epoch": 0.7959534262263791, "grad_norm": 0.3641913831233978, "learning_rate": 1.3169924188279432e-05, "loss": 0.4636, "step": 37530 }, { "epoch": 0.7959746346843121, "grad_norm": 0.38306763768196106, "learning_rate": 1.3169607890189541e-05, "loss": 0.4777, "step": 37531 }, { "epoch": 0.7959958431422451, "grad_norm": 0.4139218032360077, "learning_rate": 1.3169291588574435e-05, "loss": 0.4806, "step": 37532 }, { "epoch": 0.7960170516001781, "grad_norm": 0.33229246735572815, "learning_rate": 1.3168975283434475e-05, "loss": 0.5173, "step": 37533 }, { "epoch": 0.7960382600581112, "grad_norm": 0.3734566271305084, "learning_rate": 1.3168658974770005e-05, "loss": 0.4856, "step": 37534 }, { "epoch": 0.7960594685160441, "grad_norm": 0.32911771535873413, "learning_rate": 1.3168342662581381e-05, "loss": 0.5848, "step": 37535 }, { "epoch": 0.7960806769739772, "grad_norm": 0.3574211597442627, "learning_rate": 1.3168026346868955e-05, "loss": 0.4399, "step": 37536 }, { "epoch": 0.7961018854319103, "grad_norm": 0.3524329662322998, "learning_rate": 1.3167710027633076e-05, "loss": 0.5348, "step": 37537 }, { "epoch": 0.7961230938898433, "grad_norm": 0.37831321358680725, "learning_rate": 1.3167393704874098e-05, "loss": 0.4887, "step": 37538 }, { "epoch": 0.7961443023477763, "grad_norm": 0.3823915719985962, "learning_rate": 1.3167077378592372e-05, "loss": 0.5393, "step": 37539 }, { "epoch": 0.7961655108057093, "grad_norm": 0.4858555495738983, "learning_rate": 1.3166761048788253e-05, "loss": 0.5828, "step": 37540 }, { "epoch": 0.7961867192636424, "grad_norm": 0.411508172750473, "learning_rate": 1.3166444715462083e-05, "loss": 0.5613, "step": 37541 }, { "epoch": 0.7962079277215753, "grad_norm": 0.32795751094818115, "learning_rate": 1.3166128378614226e-05, "loss": 0.4533, "step": 37542 }, { "epoch": 0.7962291361795084, "grad_norm": 0.3462724983692169, "learning_rate": 1.316581203824503e-05, "loss": 0.5371, "step": 37543 }, { "epoch": 0.7962503446374414, "grad_norm": 0.3839223086833954, "learning_rate": 1.316549569435484e-05, "loss": 0.4842, "step": 37544 }, { "epoch": 0.7962715530953745, "grad_norm": 0.3388268053531647, "learning_rate": 1.3165179346944017e-05, "loss": 0.4675, "step": 37545 }, { "epoch": 0.7962927615533074, "grad_norm": 0.37876591086387634, "learning_rate": 1.3164862996012906e-05, "loss": 0.5117, "step": 37546 }, { "epoch": 0.7963139700112405, "grad_norm": 0.3535449802875519, "learning_rate": 1.3164546641561863e-05, "loss": 0.4924, "step": 37547 }, { "epoch": 0.7963351784691735, "grad_norm": 0.41953375935554504, "learning_rate": 1.3164230283591239e-05, "loss": 0.4555, "step": 37548 }, { "epoch": 0.7963563869271065, "grad_norm": 1.4514461755752563, "learning_rate": 1.316391392210139e-05, "loss": 0.5687, "step": 37549 }, { "epoch": 0.7963775953850396, "grad_norm": 0.4819967448711395, "learning_rate": 1.3163597557092657e-05, "loss": 0.5941, "step": 37550 }, { "epoch": 0.7963988038429726, "grad_norm": 0.3431636095046997, "learning_rate": 1.3163281188565402e-05, "loss": 0.4833, "step": 37551 }, { "epoch": 0.7964200123009056, "grad_norm": 0.570395290851593, "learning_rate": 1.316296481651997e-05, "loss": 0.5004, "step": 37552 }, { "epoch": 0.7964412207588386, "grad_norm": 0.36045533418655396, "learning_rate": 1.3162648440956717e-05, "loss": 0.5748, "step": 37553 }, { "epoch": 0.7964624292167717, "grad_norm": 0.32200154662132263, "learning_rate": 1.3162332061875996e-05, "loss": 0.4849, "step": 37554 }, { "epoch": 0.7964836376747046, "grad_norm": 0.39263448119163513, "learning_rate": 1.3162015679278155e-05, "loss": 0.5483, "step": 37555 }, { "epoch": 0.7965048461326377, "grad_norm": 0.425984263420105, "learning_rate": 1.316169929316355e-05, "loss": 0.524, "step": 37556 }, { "epoch": 0.7965260545905707, "grad_norm": 0.3282952308654785, "learning_rate": 1.316138290353253e-05, "loss": 0.4543, "step": 37557 }, { "epoch": 0.7965472630485038, "grad_norm": 0.4525461792945862, "learning_rate": 1.3161066510385446e-05, "loss": 0.5237, "step": 37558 }, { "epoch": 0.7965684715064367, "grad_norm": 0.35190361738204956, "learning_rate": 1.3160750113722651e-05, "loss": 0.463, "step": 37559 }, { "epoch": 0.7965896799643698, "grad_norm": 0.3898656666278839, "learning_rate": 1.3160433713544496e-05, "loss": 0.5302, "step": 37560 }, { "epoch": 0.7966108884223028, "grad_norm": 0.3567310869693756, "learning_rate": 1.3160117309851335e-05, "loss": 0.5172, "step": 37561 }, { "epoch": 0.7966320968802358, "grad_norm": 0.3321284353733063, "learning_rate": 1.315980090264352e-05, "loss": 0.4617, "step": 37562 }, { "epoch": 0.7966533053381689, "grad_norm": 0.3574533462524414, "learning_rate": 1.3159484491921402e-05, "loss": 0.5413, "step": 37563 }, { "epoch": 0.7966745137961019, "grad_norm": 0.34470996260643005, "learning_rate": 1.3159168077685332e-05, "loss": 0.4115, "step": 37564 }, { "epoch": 0.796695722254035, "grad_norm": 0.3623206615447998, "learning_rate": 1.3158851659935663e-05, "loss": 0.4639, "step": 37565 }, { "epoch": 0.7967169307119679, "grad_norm": 0.4049392342567444, "learning_rate": 1.3158535238672748e-05, "loss": 0.5003, "step": 37566 }, { "epoch": 0.796738139169901, "grad_norm": 0.41070255637168884, "learning_rate": 1.3158218813896935e-05, "loss": 0.5286, "step": 37567 }, { "epoch": 0.796759347627834, "grad_norm": 0.3878541886806488, "learning_rate": 1.3157902385608578e-05, "loss": 0.4642, "step": 37568 }, { "epoch": 0.796780556085767, "grad_norm": 0.3576016128063202, "learning_rate": 1.3157585953808032e-05, "loss": 0.4664, "step": 37569 }, { "epoch": 0.7968017645437, "grad_norm": 0.3753116726875305, "learning_rate": 1.3157269518495646e-05, "loss": 0.4873, "step": 37570 }, { "epoch": 0.7968229730016331, "grad_norm": 0.3336648941040039, "learning_rate": 1.3156953079671773e-05, "loss": 0.472, "step": 37571 }, { "epoch": 0.796844181459566, "grad_norm": 0.3540757894515991, "learning_rate": 1.3156636637336762e-05, "loss": 0.4462, "step": 37572 }, { "epoch": 0.7968653899174991, "grad_norm": 0.3989535868167877, "learning_rate": 1.3156320191490966e-05, "loss": 0.475, "step": 37573 }, { "epoch": 0.7968865983754321, "grad_norm": 0.3791462779045105, "learning_rate": 1.315600374213474e-05, "loss": 0.5324, "step": 37574 }, { "epoch": 0.7969078068333652, "grad_norm": 0.3355043828487396, "learning_rate": 1.3155687289268432e-05, "loss": 0.4766, "step": 37575 }, { "epoch": 0.7969290152912981, "grad_norm": 0.4090883731842041, "learning_rate": 1.31553708328924e-05, "loss": 0.5871, "step": 37576 }, { "epoch": 0.7969502237492312, "grad_norm": 0.3422173857688904, "learning_rate": 1.3155054373006992e-05, "loss": 0.3927, "step": 37577 }, { "epoch": 0.7969714322071643, "grad_norm": 0.4432794153690338, "learning_rate": 1.3154737909612559e-05, "loss": 0.466, "step": 37578 }, { "epoch": 0.7969926406650972, "grad_norm": 0.34184572100639343, "learning_rate": 1.315442144270945e-05, "loss": 0.4916, "step": 37579 }, { "epoch": 0.7970138491230303, "grad_norm": 0.3540396988391876, "learning_rate": 1.3154104972298025e-05, "loss": 0.4711, "step": 37580 }, { "epoch": 0.7970350575809633, "grad_norm": 0.4061756730079651, "learning_rate": 1.315378849837863e-05, "loss": 0.4949, "step": 37581 }, { "epoch": 0.7970562660388963, "grad_norm": 0.37906360626220703, "learning_rate": 1.3153472020951621e-05, "loss": 0.5287, "step": 37582 }, { "epoch": 0.7970774744968293, "grad_norm": 0.4028787612915039, "learning_rate": 1.3153155540017344e-05, "loss": 0.4999, "step": 37583 }, { "epoch": 0.7970986829547624, "grad_norm": 0.396186500787735, "learning_rate": 1.3152839055576158e-05, "loss": 0.5577, "step": 37584 }, { "epoch": 0.7971198914126953, "grad_norm": 0.3500712215900421, "learning_rate": 1.315252256762841e-05, "loss": 0.5319, "step": 37585 }, { "epoch": 0.7971410998706284, "grad_norm": 0.35213953256607056, "learning_rate": 1.3152206076174455e-05, "loss": 0.4869, "step": 37586 }, { "epoch": 0.7971623083285614, "grad_norm": 0.36679530143737793, "learning_rate": 1.3151889581214645e-05, "loss": 0.5694, "step": 37587 }, { "epoch": 0.7971835167864945, "grad_norm": 0.4169679582118988, "learning_rate": 1.315157308274933e-05, "loss": 0.5639, "step": 37588 }, { "epoch": 0.7972047252444274, "grad_norm": 0.36656978726387024, "learning_rate": 1.3151256580778862e-05, "loss": 0.4707, "step": 37589 }, { "epoch": 0.7972259337023605, "grad_norm": 0.32580506801605225, "learning_rate": 1.3150940075303593e-05, "loss": 0.5211, "step": 37590 }, { "epoch": 0.7972471421602936, "grad_norm": 0.3314770460128784, "learning_rate": 1.315062356632388e-05, "loss": 0.5298, "step": 37591 }, { "epoch": 0.7972683506182265, "grad_norm": 0.36320510506629944, "learning_rate": 1.3150307053840061e-05, "loss": 0.4311, "step": 37592 }, { "epoch": 0.7972895590761596, "grad_norm": 0.3468848764896393, "learning_rate": 1.3149990537852507e-05, "loss": 0.4626, "step": 37593 }, { "epoch": 0.7973107675340926, "grad_norm": 0.41074344515800476, "learning_rate": 1.3149674018361558e-05, "loss": 0.5283, "step": 37594 }, { "epoch": 0.7973319759920257, "grad_norm": 0.338125079870224, "learning_rate": 1.314935749536757e-05, "loss": 0.4892, "step": 37595 }, { "epoch": 0.7973531844499586, "grad_norm": 0.3422383964061737, "learning_rate": 1.3149040968870895e-05, "loss": 0.513, "step": 37596 }, { "epoch": 0.7973743929078917, "grad_norm": 0.32159683108329773, "learning_rate": 1.314872443887188e-05, "loss": 0.4968, "step": 37597 }, { "epoch": 0.7973956013658247, "grad_norm": 0.4011926054954529, "learning_rate": 1.3148407905370881e-05, "loss": 0.5493, "step": 37598 }, { "epoch": 0.7974168098237577, "grad_norm": 0.39697569608688354, "learning_rate": 1.3148091368368255e-05, "loss": 0.4884, "step": 37599 }, { "epoch": 0.7974380182816907, "grad_norm": 0.3458000123500824, "learning_rate": 1.3147774827864343e-05, "loss": 0.4663, "step": 37600 }, { "epoch": 0.7974592267396238, "grad_norm": 0.3519253432750702, "learning_rate": 1.3147458283859508e-05, "loss": 0.5061, "step": 37601 }, { "epoch": 0.7974804351975567, "grad_norm": 0.3379178047180176, "learning_rate": 1.3147141736354097e-05, "loss": 0.4381, "step": 37602 }, { "epoch": 0.7975016436554898, "grad_norm": 0.3596651256084442, "learning_rate": 1.3146825185348462e-05, "loss": 0.4373, "step": 37603 }, { "epoch": 0.7975228521134229, "grad_norm": 0.3856910467147827, "learning_rate": 1.3146508630842955e-05, "loss": 0.4667, "step": 37604 }, { "epoch": 0.7975440605713559, "grad_norm": 0.4015483558177948, "learning_rate": 1.3146192072837926e-05, "loss": 0.4529, "step": 37605 }, { "epoch": 0.7975652690292889, "grad_norm": 0.5978503227233887, "learning_rate": 1.314587551133373e-05, "loss": 0.5188, "step": 37606 }, { "epoch": 0.7975864774872219, "grad_norm": 0.40048080682754517, "learning_rate": 1.3145558946330721e-05, "loss": 0.4305, "step": 37607 }, { "epoch": 0.797607685945155, "grad_norm": 0.4299449622631073, "learning_rate": 1.314524237782925e-05, "loss": 0.4784, "step": 37608 }, { "epoch": 0.7976288944030879, "grad_norm": 0.3923063576221466, "learning_rate": 1.3144925805829663e-05, "loss": 0.5643, "step": 37609 }, { "epoch": 0.797650102861021, "grad_norm": 0.3826128840446472, "learning_rate": 1.3144609230332322e-05, "loss": 0.5288, "step": 37610 }, { "epoch": 0.797671311318954, "grad_norm": 0.3509368300437927, "learning_rate": 1.3144292651337569e-05, "loss": 0.541, "step": 37611 }, { "epoch": 0.797692519776887, "grad_norm": 0.35192546248435974, "learning_rate": 1.3143976068845763e-05, "loss": 0.4825, "step": 37612 }, { "epoch": 0.79771372823482, "grad_norm": 0.37310799956321716, "learning_rate": 1.3143659482857256e-05, "loss": 0.497, "step": 37613 }, { "epoch": 0.7977349366927531, "grad_norm": 0.3333151638507843, "learning_rate": 1.3143342893372395e-05, "loss": 0.4471, "step": 37614 }, { "epoch": 0.797756145150686, "grad_norm": 0.4501952528953552, "learning_rate": 1.3143026300391536e-05, "loss": 0.4973, "step": 37615 }, { "epoch": 0.7977773536086191, "grad_norm": 0.3855310082435608, "learning_rate": 1.3142709703915033e-05, "loss": 0.5254, "step": 37616 }, { "epoch": 0.7977985620665521, "grad_norm": 0.5087560415267944, "learning_rate": 1.3142393103943234e-05, "loss": 0.4431, "step": 37617 }, { "epoch": 0.7978197705244852, "grad_norm": 0.3579455018043518, "learning_rate": 1.314207650047649e-05, "loss": 0.4945, "step": 37618 }, { "epoch": 0.7978409789824182, "grad_norm": 0.3618873655796051, "learning_rate": 1.314175989351516e-05, "loss": 0.5298, "step": 37619 }, { "epoch": 0.7978621874403512, "grad_norm": 0.40287718176841736, "learning_rate": 1.314144328305959e-05, "loss": 0.4675, "step": 37620 }, { "epoch": 0.7978833958982843, "grad_norm": 0.4285109341144562, "learning_rate": 1.3141126669110136e-05, "loss": 0.535, "step": 37621 }, { "epoch": 0.7979046043562172, "grad_norm": 0.3436415493488312, "learning_rate": 1.3140810051667148e-05, "loss": 0.514, "step": 37622 }, { "epoch": 0.7979258128141503, "grad_norm": 0.3717092275619507, "learning_rate": 1.3140493430730973e-05, "loss": 0.4288, "step": 37623 }, { "epoch": 0.7979470212720833, "grad_norm": 0.40533965826034546, "learning_rate": 1.3140176806301975e-05, "loss": 0.528, "step": 37624 }, { "epoch": 0.7979682297300164, "grad_norm": 0.35902637243270874, "learning_rate": 1.3139860178380497e-05, "loss": 0.4379, "step": 37625 }, { "epoch": 0.7979894381879493, "grad_norm": 0.36589476466178894, "learning_rate": 1.3139543546966894e-05, "loss": 0.5626, "step": 37626 }, { "epoch": 0.7980106466458824, "grad_norm": 0.33342599868774414, "learning_rate": 1.3139226912061517e-05, "loss": 0.4794, "step": 37627 }, { "epoch": 0.7980318551038154, "grad_norm": 0.3592786192893982, "learning_rate": 1.313891027366472e-05, "loss": 0.4627, "step": 37628 }, { "epoch": 0.7980530635617484, "grad_norm": 0.4068584740161896, "learning_rate": 1.3138593631776855e-05, "loss": 0.5489, "step": 37629 }, { "epoch": 0.7980742720196814, "grad_norm": 0.38197532296180725, "learning_rate": 1.313827698639827e-05, "loss": 0.5586, "step": 37630 }, { "epoch": 0.7980954804776145, "grad_norm": 0.3665958046913147, "learning_rate": 1.3137960337529325e-05, "loss": 0.5249, "step": 37631 }, { "epoch": 0.7981166889355475, "grad_norm": 0.41998085379600525, "learning_rate": 1.3137643685170366e-05, "loss": 0.5536, "step": 37632 }, { "epoch": 0.7981378973934805, "grad_norm": 0.4255952835083008, "learning_rate": 1.3137327029321747e-05, "loss": 0.475, "step": 37633 }, { "epoch": 0.7981591058514136, "grad_norm": 0.35269445180892944, "learning_rate": 1.313701036998382e-05, "loss": 0.5051, "step": 37634 }, { "epoch": 0.7981803143093466, "grad_norm": 0.3982347548007965, "learning_rate": 1.3136693707156938e-05, "loss": 0.4986, "step": 37635 }, { "epoch": 0.7982015227672796, "grad_norm": 0.36167675256729126, "learning_rate": 1.3136377040841453e-05, "loss": 0.4555, "step": 37636 }, { "epoch": 0.7982227312252126, "grad_norm": 0.33334022760391235, "learning_rate": 1.3136060371037714e-05, "loss": 0.4529, "step": 37637 }, { "epoch": 0.7982439396831457, "grad_norm": 0.3784019351005554, "learning_rate": 1.3135743697746077e-05, "loss": 0.4947, "step": 37638 }, { "epoch": 0.7982651481410786, "grad_norm": 0.4341280162334442, "learning_rate": 1.3135427020966893e-05, "loss": 0.4837, "step": 37639 }, { "epoch": 0.7982863565990117, "grad_norm": 0.4768025875091553, "learning_rate": 1.3135110340700514e-05, "loss": 0.5092, "step": 37640 }, { "epoch": 0.7983075650569447, "grad_norm": 0.38629230856895447, "learning_rate": 1.3134793656947296e-05, "loss": 0.5268, "step": 37641 }, { "epoch": 0.7983287735148777, "grad_norm": 0.34545671939849854, "learning_rate": 1.3134476969707584e-05, "loss": 0.4384, "step": 37642 }, { "epoch": 0.7983499819728107, "grad_norm": 0.3435325026512146, "learning_rate": 1.3134160278981734e-05, "loss": 0.466, "step": 37643 }, { "epoch": 0.7983711904307438, "grad_norm": 0.3820311725139618, "learning_rate": 1.31338435847701e-05, "loss": 0.5072, "step": 37644 }, { "epoch": 0.7983923988886769, "grad_norm": 0.3301542401313782, "learning_rate": 1.3133526887073033e-05, "loss": 0.4321, "step": 37645 }, { "epoch": 0.7984136073466098, "grad_norm": 0.3423747718334198, "learning_rate": 1.3133210185890883e-05, "loss": 0.4224, "step": 37646 }, { "epoch": 0.7984348158045429, "grad_norm": 0.3919724225997925, "learning_rate": 1.3132893481224005e-05, "loss": 0.4307, "step": 37647 }, { "epoch": 0.7984560242624759, "grad_norm": 0.33804646134376526, "learning_rate": 1.3132576773072747e-05, "loss": 0.5016, "step": 37648 }, { "epoch": 0.7984772327204089, "grad_norm": 0.34979695081710815, "learning_rate": 1.3132260061437467e-05, "loss": 0.5604, "step": 37649 }, { "epoch": 0.7984984411783419, "grad_norm": 0.34647735953330994, "learning_rate": 1.3131943346318514e-05, "loss": 0.4346, "step": 37650 }, { "epoch": 0.798519649636275, "grad_norm": 0.42615780234336853, "learning_rate": 1.313162662771624e-05, "loss": 0.5638, "step": 37651 }, { "epoch": 0.7985408580942079, "grad_norm": 0.38921305537223816, "learning_rate": 1.3131309905631e-05, "loss": 0.4914, "step": 37652 }, { "epoch": 0.798562066552141, "grad_norm": 0.36292827129364014, "learning_rate": 1.3130993180063146e-05, "loss": 0.4778, "step": 37653 }, { "epoch": 0.798583275010074, "grad_norm": 0.3678237795829773, "learning_rate": 1.3130676451013025e-05, "loss": 0.5456, "step": 37654 }, { "epoch": 0.798604483468007, "grad_norm": 0.36221083998680115, "learning_rate": 1.3130359718480993e-05, "loss": 0.5436, "step": 37655 }, { "epoch": 0.79862569192594, "grad_norm": 0.3525674641132355, "learning_rate": 1.3130042982467401e-05, "loss": 0.511, "step": 37656 }, { "epoch": 0.7986469003838731, "grad_norm": 0.3452592194080353, "learning_rate": 1.3129726242972604e-05, "loss": 0.5131, "step": 37657 }, { "epoch": 0.7986681088418061, "grad_norm": 0.36114946007728577, "learning_rate": 1.3129409499996955e-05, "loss": 0.4859, "step": 37658 }, { "epoch": 0.7986893172997391, "grad_norm": 0.4026571810245514, "learning_rate": 1.3129092753540802e-05, "loss": 0.4124, "step": 37659 }, { "epoch": 0.7987105257576722, "grad_norm": 0.3893982172012329, "learning_rate": 1.3128776003604498e-05, "loss": 0.5139, "step": 37660 }, { "epoch": 0.7987317342156052, "grad_norm": 0.37324076890945435, "learning_rate": 1.3128459250188399e-05, "loss": 0.5168, "step": 37661 }, { "epoch": 0.7987529426735382, "grad_norm": 0.33121925592422485, "learning_rate": 1.3128142493292853e-05, "loss": 0.4559, "step": 37662 }, { "epoch": 0.7987741511314712, "grad_norm": 0.4968341290950775, "learning_rate": 1.312782573291821e-05, "loss": 0.472, "step": 37663 }, { "epoch": 0.7987953595894043, "grad_norm": 0.400168240070343, "learning_rate": 1.3127508969064834e-05, "loss": 0.6698, "step": 37664 }, { "epoch": 0.7988165680473372, "grad_norm": 0.3962550759315491, "learning_rate": 1.3127192201733065e-05, "loss": 0.5325, "step": 37665 }, { "epoch": 0.7988377765052703, "grad_norm": 0.3504987955093384, "learning_rate": 1.3126875430923263e-05, "loss": 0.5681, "step": 37666 }, { "epoch": 0.7988589849632033, "grad_norm": 0.3316740393638611, "learning_rate": 1.3126558656635778e-05, "loss": 0.454, "step": 37667 }, { "epoch": 0.7988801934211364, "grad_norm": 0.36418694257736206, "learning_rate": 1.3126241878870958e-05, "loss": 0.4731, "step": 37668 }, { "epoch": 0.7989014018790693, "grad_norm": 0.3287619352340698, "learning_rate": 1.3125925097629159e-05, "loss": 0.5018, "step": 37669 }, { "epoch": 0.7989226103370024, "grad_norm": 0.3585715889930725, "learning_rate": 1.3125608312910736e-05, "loss": 0.5386, "step": 37670 }, { "epoch": 0.7989438187949354, "grad_norm": 0.35722893476486206, "learning_rate": 1.3125291524716035e-05, "loss": 0.4613, "step": 37671 }, { "epoch": 0.7989650272528684, "grad_norm": 0.3516750931739807, "learning_rate": 1.3124974733045417e-05, "loss": 0.5059, "step": 37672 }, { "epoch": 0.7989862357108015, "grad_norm": 0.35927721858024597, "learning_rate": 1.3124657937899227e-05, "loss": 0.526, "step": 37673 }, { "epoch": 0.7990074441687345, "grad_norm": 0.36370834708213806, "learning_rate": 1.3124341139277818e-05, "loss": 0.5472, "step": 37674 }, { "epoch": 0.7990286526266676, "grad_norm": 0.34155434370040894, "learning_rate": 1.3124024337181545e-05, "loss": 0.458, "step": 37675 }, { "epoch": 0.7990498610846005, "grad_norm": 0.3593848943710327, "learning_rate": 1.3123707531610758e-05, "loss": 0.5352, "step": 37676 }, { "epoch": 0.7990710695425336, "grad_norm": 0.38853219151496887, "learning_rate": 1.3123390722565813e-05, "loss": 0.5121, "step": 37677 }, { "epoch": 0.7990922780004666, "grad_norm": 0.3320707082748413, "learning_rate": 1.312307391004706e-05, "loss": 0.4487, "step": 37678 }, { "epoch": 0.7991134864583996, "grad_norm": 0.4191743731498718, "learning_rate": 1.3122757094054848e-05, "loss": 0.4618, "step": 37679 }, { "epoch": 0.7991346949163326, "grad_norm": 0.3322303593158722, "learning_rate": 1.3122440274589536e-05, "loss": 0.4688, "step": 37680 }, { "epoch": 0.7991559033742657, "grad_norm": 0.3771437406539917, "learning_rate": 1.3122123451651473e-05, "loss": 0.5411, "step": 37681 }, { "epoch": 0.7991771118321986, "grad_norm": 0.6577334403991699, "learning_rate": 1.3121806625241007e-05, "loss": 0.5222, "step": 37682 }, { "epoch": 0.7991983202901317, "grad_norm": 0.34049323201179504, "learning_rate": 1.3121489795358499e-05, "loss": 0.5607, "step": 37683 }, { "epoch": 0.7992195287480647, "grad_norm": 0.4130668044090271, "learning_rate": 1.3121172962004298e-05, "loss": 0.5159, "step": 37684 }, { "epoch": 0.7992407372059978, "grad_norm": 0.387151837348938, "learning_rate": 1.3120856125178752e-05, "loss": 0.5082, "step": 37685 }, { "epoch": 0.7992619456639308, "grad_norm": 0.3465171754360199, "learning_rate": 1.3120539284882221e-05, "loss": 0.4648, "step": 37686 }, { "epoch": 0.7992831541218638, "grad_norm": 0.3587052822113037, "learning_rate": 1.3120222441115052e-05, "loss": 0.4635, "step": 37687 }, { "epoch": 0.7993043625797969, "grad_norm": 0.4121861159801483, "learning_rate": 1.3119905593877593e-05, "loss": 0.476, "step": 37688 }, { "epoch": 0.7993255710377298, "grad_norm": 0.3725782632827759, "learning_rate": 1.3119588743170209e-05, "loss": 0.5242, "step": 37689 }, { "epoch": 0.7993467794956629, "grad_norm": 0.375999391078949, "learning_rate": 1.3119271888993244e-05, "loss": 0.4977, "step": 37690 }, { "epoch": 0.7993679879535959, "grad_norm": 0.39378008246421814, "learning_rate": 1.3118955031347049e-05, "loss": 0.4936, "step": 37691 }, { "epoch": 0.799389196411529, "grad_norm": 0.46429598331451416, "learning_rate": 1.3118638170231983e-05, "loss": 0.5435, "step": 37692 }, { "epoch": 0.7994104048694619, "grad_norm": 0.3554321825504303, "learning_rate": 1.3118321305648393e-05, "loss": 0.482, "step": 37693 }, { "epoch": 0.799431613327395, "grad_norm": 0.34048691391944885, "learning_rate": 1.3118004437596633e-05, "loss": 0.6243, "step": 37694 }, { "epoch": 0.799452821785328, "grad_norm": 0.3649875819683075, "learning_rate": 1.3117687566077058e-05, "loss": 0.4982, "step": 37695 }, { "epoch": 0.799474030243261, "grad_norm": 0.45629334449768066, "learning_rate": 1.3117370691090013e-05, "loss": 0.4923, "step": 37696 }, { "epoch": 0.799495238701194, "grad_norm": 0.36052393913269043, "learning_rate": 1.311705381263586e-05, "loss": 0.4774, "step": 37697 }, { "epoch": 0.7995164471591271, "grad_norm": 0.3632010519504547, "learning_rate": 1.3116736930714947e-05, "loss": 0.4856, "step": 37698 }, { "epoch": 0.7995376556170601, "grad_norm": 0.36291250586509705, "learning_rate": 1.3116420045327623e-05, "loss": 0.4577, "step": 37699 }, { "epoch": 0.7995588640749931, "grad_norm": 0.38995862007141113, "learning_rate": 1.3116103156474244e-05, "loss": 0.4628, "step": 37700 }, { "epoch": 0.7995800725329262, "grad_norm": 0.36990898847579956, "learning_rate": 1.3115786264155164e-05, "loss": 0.5394, "step": 37701 }, { "epoch": 0.7996012809908591, "grad_norm": 0.43732333183288574, "learning_rate": 1.3115469368370733e-05, "loss": 0.5797, "step": 37702 }, { "epoch": 0.7996224894487922, "grad_norm": 0.3721069097518921, "learning_rate": 1.3115152469121303e-05, "loss": 0.5351, "step": 37703 }, { "epoch": 0.7996436979067252, "grad_norm": 0.3945489525794983, "learning_rate": 1.311483556640723e-05, "loss": 0.4162, "step": 37704 }, { "epoch": 0.7996649063646583, "grad_norm": 0.3429008722305298, "learning_rate": 1.311451866022886e-05, "loss": 0.4252, "step": 37705 }, { "epoch": 0.7996861148225912, "grad_norm": 0.4641575217247009, "learning_rate": 1.3114201750586552e-05, "loss": 0.4278, "step": 37706 }, { "epoch": 0.7997073232805243, "grad_norm": 0.38769999146461487, "learning_rate": 1.3113884837480656e-05, "loss": 0.5765, "step": 37707 }, { "epoch": 0.7997285317384573, "grad_norm": 0.34366270899772644, "learning_rate": 1.3113567920911522e-05, "loss": 0.4444, "step": 37708 }, { "epoch": 0.7997497401963903, "grad_norm": 0.39099937677383423, "learning_rate": 1.311325100087951e-05, "loss": 0.4328, "step": 37709 }, { "epoch": 0.7997709486543233, "grad_norm": 0.41367867588996887, "learning_rate": 1.3112934077384963e-05, "loss": 0.5016, "step": 37710 }, { "epoch": 0.7997921571122564, "grad_norm": 0.3308964669704437, "learning_rate": 1.3112617150428237e-05, "loss": 0.4595, "step": 37711 }, { "epoch": 0.7998133655701893, "grad_norm": 0.40632393956184387, "learning_rate": 1.3112300220009688e-05, "loss": 0.5163, "step": 37712 }, { "epoch": 0.7998345740281224, "grad_norm": 0.40451523661613464, "learning_rate": 1.3111983286129665e-05, "loss": 0.4975, "step": 37713 }, { "epoch": 0.7998557824860555, "grad_norm": 0.33091166615486145, "learning_rate": 1.311166634878852e-05, "loss": 0.4788, "step": 37714 }, { "epoch": 0.7998769909439885, "grad_norm": 0.35261040925979614, "learning_rate": 1.3111349407986609e-05, "loss": 0.4764, "step": 37715 }, { "epoch": 0.7998981994019215, "grad_norm": 0.36450710892677307, "learning_rate": 1.311103246372428e-05, "loss": 0.4838, "step": 37716 }, { "epoch": 0.7999194078598545, "grad_norm": 0.36937475204467773, "learning_rate": 1.3110715516001888e-05, "loss": 0.474, "step": 37717 }, { "epoch": 0.7999406163177876, "grad_norm": 0.36310428380966187, "learning_rate": 1.3110398564819789e-05, "loss": 0.5245, "step": 37718 }, { "epoch": 0.7999618247757205, "grad_norm": 0.4207688271999359, "learning_rate": 1.3110081610178327e-05, "loss": 0.5524, "step": 37719 }, { "epoch": 0.7999830332336536, "grad_norm": 0.3582226037979126, "learning_rate": 1.3109764652077858e-05, "loss": 0.4436, "step": 37720 }, { "epoch": 0.8000042416915866, "grad_norm": 0.4587731659412384, "learning_rate": 1.3109447690518742e-05, "loss": 0.493, "step": 37721 }, { "epoch": 0.8000254501495196, "grad_norm": 0.41218292713165283, "learning_rate": 1.310913072550132e-05, "loss": 0.4732, "step": 37722 }, { "epoch": 0.8000466586074526, "grad_norm": 0.3522239923477173, "learning_rate": 1.3108813757025954e-05, "loss": 0.514, "step": 37723 }, { "epoch": 0.8000678670653857, "grad_norm": 0.38170886039733887, "learning_rate": 1.3108496785092988e-05, "loss": 0.5332, "step": 37724 }, { "epoch": 0.8000890755233186, "grad_norm": 0.36988356709480286, "learning_rate": 1.310817980970278e-05, "loss": 0.5843, "step": 37725 }, { "epoch": 0.8001102839812517, "grad_norm": 0.3432096540927887, "learning_rate": 1.3107862830855687e-05, "loss": 0.4568, "step": 37726 }, { "epoch": 0.8001314924391848, "grad_norm": 0.35915252566337585, "learning_rate": 1.3107545848552046e-05, "loss": 0.4345, "step": 37727 }, { "epoch": 0.8001527008971178, "grad_norm": 0.3555143475532532, "learning_rate": 1.3107228862792228e-05, "loss": 0.5405, "step": 37728 }, { "epoch": 0.8001739093550508, "grad_norm": 0.35814255475997925, "learning_rate": 1.3106911873576573e-05, "loss": 0.4659, "step": 37729 }, { "epoch": 0.8001951178129838, "grad_norm": 0.41545429825782776, "learning_rate": 1.3106594880905438e-05, "loss": 0.3876, "step": 37730 }, { "epoch": 0.8002163262709169, "grad_norm": 0.3933734595775604, "learning_rate": 1.3106277884779175e-05, "loss": 0.5094, "step": 37731 }, { "epoch": 0.8002375347288498, "grad_norm": 0.4102429747581482, "learning_rate": 1.310596088519814e-05, "loss": 0.5343, "step": 37732 }, { "epoch": 0.8002587431867829, "grad_norm": 0.431000292301178, "learning_rate": 1.3105643882162674e-05, "loss": 0.4842, "step": 37733 }, { "epoch": 0.8002799516447159, "grad_norm": 0.38093042373657227, "learning_rate": 1.3105326875673146e-05, "loss": 0.4842, "step": 37734 }, { "epoch": 0.800301160102649, "grad_norm": 0.4259743094444275, "learning_rate": 1.3105009865729899e-05, "loss": 0.4568, "step": 37735 }, { "epoch": 0.8003223685605819, "grad_norm": 0.3993842601776123, "learning_rate": 1.3104692852333285e-05, "loss": 0.5322, "step": 37736 }, { "epoch": 0.800343577018515, "grad_norm": 0.5025430917739868, "learning_rate": 1.3104375835483661e-05, "loss": 0.4926, "step": 37737 }, { "epoch": 0.800364785476448, "grad_norm": 0.5217945575714111, "learning_rate": 1.3104058815181375e-05, "loss": 0.4146, "step": 37738 }, { "epoch": 0.800385993934381, "grad_norm": 0.3257468342781067, "learning_rate": 1.3103741791426781e-05, "loss": 0.3908, "step": 37739 }, { "epoch": 0.8004072023923141, "grad_norm": 0.3979334235191345, "learning_rate": 1.3103424764220235e-05, "loss": 0.5217, "step": 37740 }, { "epoch": 0.8004284108502471, "grad_norm": 0.3581464886665344, "learning_rate": 1.3103107733562083e-05, "loss": 0.4739, "step": 37741 }, { "epoch": 0.8004496193081801, "grad_norm": 0.4524018466472626, "learning_rate": 1.3102790699452688e-05, "loss": 0.4691, "step": 37742 }, { "epoch": 0.8004708277661131, "grad_norm": 0.3798842430114746, "learning_rate": 1.3102473661892392e-05, "loss": 0.5393, "step": 37743 }, { "epoch": 0.8004920362240462, "grad_norm": 0.3706759512424469, "learning_rate": 1.310215662088155e-05, "loss": 0.5092, "step": 37744 }, { "epoch": 0.8005132446819792, "grad_norm": 0.370077520608902, "learning_rate": 1.310183957642052e-05, "loss": 0.4713, "step": 37745 }, { "epoch": 0.8005344531399122, "grad_norm": 0.37649551033973694, "learning_rate": 1.310152252850965e-05, "loss": 0.4787, "step": 37746 }, { "epoch": 0.8005556615978452, "grad_norm": 0.41038545966148376, "learning_rate": 1.310120547714929e-05, "loss": 0.4987, "step": 37747 }, { "epoch": 0.8005768700557783, "grad_norm": 0.3262564539909363, "learning_rate": 1.3100888422339802e-05, "loss": 0.4958, "step": 37748 }, { "epoch": 0.8005980785137112, "grad_norm": 0.3966975510120392, "learning_rate": 1.3100571364081529e-05, "loss": 0.457, "step": 37749 }, { "epoch": 0.8006192869716443, "grad_norm": 0.36591431498527527, "learning_rate": 1.310025430237483e-05, "loss": 0.433, "step": 37750 }, { "epoch": 0.8006404954295773, "grad_norm": 0.3663930892944336, "learning_rate": 1.3099937237220053e-05, "loss": 0.4682, "step": 37751 }, { "epoch": 0.8006617038875103, "grad_norm": 0.3728850185871124, "learning_rate": 1.3099620168617553e-05, "loss": 0.5064, "step": 37752 }, { "epoch": 0.8006829123454433, "grad_norm": 0.45815038681030273, "learning_rate": 1.3099303096567682e-05, "loss": 0.4074, "step": 37753 }, { "epoch": 0.8007041208033764, "grad_norm": 0.3949156403541565, "learning_rate": 1.3098986021070794e-05, "loss": 0.5482, "step": 37754 }, { "epoch": 0.8007253292613095, "grad_norm": 0.32740432024002075, "learning_rate": 1.309866894212724e-05, "loss": 0.4295, "step": 37755 }, { "epoch": 0.8007465377192424, "grad_norm": 0.3808777928352356, "learning_rate": 1.3098351859737375e-05, "loss": 0.4768, "step": 37756 }, { "epoch": 0.8007677461771755, "grad_norm": 0.34488850831985474, "learning_rate": 1.309803477390155e-05, "loss": 0.5092, "step": 37757 }, { "epoch": 0.8007889546351085, "grad_norm": 0.33833691477775574, "learning_rate": 1.3097717684620116e-05, "loss": 0.4096, "step": 37758 }, { "epoch": 0.8008101630930415, "grad_norm": 0.3642581105232239, "learning_rate": 1.3097400591893426e-05, "loss": 0.544, "step": 37759 }, { "epoch": 0.8008313715509745, "grad_norm": 0.34161001443862915, "learning_rate": 1.3097083495721837e-05, "loss": 0.4599, "step": 37760 }, { "epoch": 0.8008525800089076, "grad_norm": 0.37075504660606384, "learning_rate": 1.3096766396105696e-05, "loss": 0.5007, "step": 37761 }, { "epoch": 0.8008737884668405, "grad_norm": 0.3893992602825165, "learning_rate": 1.3096449293045359e-05, "loss": 0.4998, "step": 37762 }, { "epoch": 0.8008949969247736, "grad_norm": 0.4486120045185089, "learning_rate": 1.3096132186541179e-05, "loss": 0.4791, "step": 37763 }, { "epoch": 0.8009162053827066, "grad_norm": 0.3597952425479889, "learning_rate": 1.3095815076593508e-05, "loss": 0.4521, "step": 37764 }, { "epoch": 0.8009374138406397, "grad_norm": 0.3778820037841797, "learning_rate": 1.3095497963202695e-05, "loss": 0.4785, "step": 37765 }, { "epoch": 0.8009586222985726, "grad_norm": 0.3515837490558624, "learning_rate": 1.30951808463691e-05, "loss": 0.5502, "step": 37766 }, { "epoch": 0.8009798307565057, "grad_norm": 0.3666330575942993, "learning_rate": 1.309486372609307e-05, "loss": 0.4992, "step": 37767 }, { "epoch": 0.8010010392144388, "grad_norm": 0.4325815737247467, "learning_rate": 1.309454660237496e-05, "loss": 0.4629, "step": 37768 }, { "epoch": 0.8010222476723717, "grad_norm": 0.34104034304618835, "learning_rate": 1.3094229475215121e-05, "loss": 0.4629, "step": 37769 }, { "epoch": 0.8010434561303048, "grad_norm": 0.3631097674369812, "learning_rate": 1.309391234461391e-05, "loss": 0.4892, "step": 37770 }, { "epoch": 0.8010646645882378, "grad_norm": 0.3634696900844574, "learning_rate": 1.3093595210571673e-05, "loss": 0.4614, "step": 37771 }, { "epoch": 0.8010858730461708, "grad_norm": 0.413299560546875, "learning_rate": 1.3093278073088769e-05, "loss": 0.4847, "step": 37772 }, { "epoch": 0.8011070815041038, "grad_norm": 0.3771931231021881, "learning_rate": 1.3092960932165546e-05, "loss": 0.4153, "step": 37773 }, { "epoch": 0.8011282899620369, "grad_norm": 0.3706775903701782, "learning_rate": 1.3092643787802359e-05, "loss": 0.3984, "step": 37774 }, { "epoch": 0.8011494984199699, "grad_norm": 0.29216960072517395, "learning_rate": 1.3092326639999561e-05, "loss": 0.5047, "step": 37775 }, { "epoch": 0.8011707068779029, "grad_norm": 0.3444582521915436, "learning_rate": 1.3092009488757505e-05, "loss": 0.4214, "step": 37776 }, { "epoch": 0.8011919153358359, "grad_norm": 0.35120469331741333, "learning_rate": 1.3091692334076544e-05, "loss": 0.4791, "step": 37777 }, { "epoch": 0.801213123793769, "grad_norm": 0.4322456419467926, "learning_rate": 1.3091375175957024e-05, "loss": 0.501, "step": 37778 }, { "epoch": 0.8012343322517019, "grad_norm": 0.3373275697231293, "learning_rate": 1.3091058014399307e-05, "loss": 0.402, "step": 37779 }, { "epoch": 0.801255540709635, "grad_norm": 0.35250377655029297, "learning_rate": 1.3090740849403743e-05, "loss": 0.3953, "step": 37780 }, { "epoch": 0.8012767491675681, "grad_norm": 0.3558311462402344, "learning_rate": 1.3090423680970683e-05, "loss": 0.545, "step": 37781 }, { "epoch": 0.801297957625501, "grad_norm": 0.3561097979545593, "learning_rate": 1.3090106509100483e-05, "loss": 0.5155, "step": 37782 }, { "epoch": 0.8013191660834341, "grad_norm": 0.3291743993759155, "learning_rate": 1.3089789333793488e-05, "loss": 0.4228, "step": 37783 }, { "epoch": 0.8013403745413671, "grad_norm": 0.3658716380596161, "learning_rate": 1.3089472155050058e-05, "loss": 0.5124, "step": 37784 }, { "epoch": 0.8013615829993002, "grad_norm": 0.3812408745288849, "learning_rate": 1.3089154972870546e-05, "loss": 0.5702, "step": 37785 }, { "epoch": 0.8013827914572331, "grad_norm": 0.4189110994338989, "learning_rate": 1.3088837787255303e-05, "loss": 0.555, "step": 37786 }, { "epoch": 0.8014039999151662, "grad_norm": 0.34677502512931824, "learning_rate": 1.308852059820468e-05, "loss": 0.4537, "step": 37787 }, { "epoch": 0.8014252083730992, "grad_norm": 0.3750954568386078, "learning_rate": 1.3088203405719032e-05, "loss": 0.4646, "step": 37788 }, { "epoch": 0.8014464168310322, "grad_norm": 0.49574318528175354, "learning_rate": 1.308788620979871e-05, "loss": 0.5634, "step": 37789 }, { "epoch": 0.8014676252889652, "grad_norm": 0.33858242630958557, "learning_rate": 1.308756901044407e-05, "loss": 0.4648, "step": 37790 }, { "epoch": 0.8014888337468983, "grad_norm": 0.38579627871513367, "learning_rate": 1.3087251807655458e-05, "loss": 0.402, "step": 37791 }, { "epoch": 0.8015100422048312, "grad_norm": 0.378429651260376, "learning_rate": 1.3086934601433235e-05, "loss": 0.4912, "step": 37792 }, { "epoch": 0.8015312506627643, "grad_norm": 0.3939991295337677, "learning_rate": 1.308661739177775e-05, "loss": 0.4204, "step": 37793 }, { "epoch": 0.8015524591206973, "grad_norm": 0.3638206124305725, "learning_rate": 1.3086300178689354e-05, "loss": 0.4422, "step": 37794 }, { "epoch": 0.8015736675786304, "grad_norm": 0.37050917744636536, "learning_rate": 1.3085982962168405e-05, "loss": 0.479, "step": 37795 }, { "epoch": 0.8015948760365634, "grad_norm": 0.3824926018714905, "learning_rate": 1.308566574221525e-05, "loss": 0.4977, "step": 37796 }, { "epoch": 0.8016160844944964, "grad_norm": 0.44329628348350525, "learning_rate": 1.3085348518830243e-05, "loss": 0.4841, "step": 37797 }, { "epoch": 0.8016372929524295, "grad_norm": 0.3872934877872467, "learning_rate": 1.3085031292013739e-05, "loss": 0.6005, "step": 37798 }, { "epoch": 0.8016585014103624, "grad_norm": 0.3866669833660126, "learning_rate": 1.3084714061766093e-05, "loss": 0.4352, "step": 37799 }, { "epoch": 0.8016797098682955, "grad_norm": 0.33256199955940247, "learning_rate": 1.3084396828087653e-05, "loss": 0.4415, "step": 37800 }, { "epoch": 0.8017009183262285, "grad_norm": 0.36017969250679016, "learning_rate": 1.3084079590978773e-05, "loss": 0.4248, "step": 37801 }, { "epoch": 0.8017221267841615, "grad_norm": 0.3846758306026459, "learning_rate": 1.3083762350439808e-05, "loss": 0.6001, "step": 37802 }, { "epoch": 0.8017433352420945, "grad_norm": 0.3519532084465027, "learning_rate": 1.3083445106471107e-05, "loss": 0.472, "step": 37803 }, { "epoch": 0.8017645437000276, "grad_norm": 0.35703545808792114, "learning_rate": 1.3083127859073027e-05, "loss": 0.4765, "step": 37804 }, { "epoch": 0.8017857521579606, "grad_norm": 0.3894684910774231, "learning_rate": 1.3082810608245916e-05, "loss": 0.5327, "step": 37805 }, { "epoch": 0.8018069606158936, "grad_norm": 0.379599392414093, "learning_rate": 1.3082493353990134e-05, "loss": 0.4644, "step": 37806 }, { "epoch": 0.8018281690738266, "grad_norm": 0.3618386387825012, "learning_rate": 1.308217609630603e-05, "loss": 0.4746, "step": 37807 }, { "epoch": 0.8018493775317597, "grad_norm": 0.3761121928691864, "learning_rate": 1.3081858835193955e-05, "loss": 0.4858, "step": 37808 }, { "epoch": 0.8018705859896927, "grad_norm": 0.33502131700515747, "learning_rate": 1.308154157065426e-05, "loss": 0.4965, "step": 37809 }, { "epoch": 0.8018917944476257, "grad_norm": 0.3318833112716675, "learning_rate": 1.3081224302687303e-05, "loss": 0.451, "step": 37810 }, { "epoch": 0.8019130029055588, "grad_norm": 0.38255491852760315, "learning_rate": 1.3080907031293437e-05, "loss": 0.514, "step": 37811 }, { "epoch": 0.8019342113634917, "grad_norm": 0.3524913489818573, "learning_rate": 1.3080589756473012e-05, "loss": 0.5124, "step": 37812 }, { "epoch": 0.8019554198214248, "grad_norm": 0.5186400413513184, "learning_rate": 1.3080272478226383e-05, "loss": 0.5064, "step": 37813 }, { "epoch": 0.8019766282793578, "grad_norm": 0.35851073265075684, "learning_rate": 1.3079955196553903e-05, "loss": 0.5616, "step": 37814 }, { "epoch": 0.8019978367372909, "grad_norm": 0.3740232586860657, "learning_rate": 1.3079637911455917e-05, "loss": 0.4413, "step": 37815 }, { "epoch": 0.8020190451952238, "grad_norm": 0.3532061278820038, "learning_rate": 1.307932062293279e-05, "loss": 0.5981, "step": 37816 }, { "epoch": 0.8020402536531569, "grad_norm": 0.3448304831981659, "learning_rate": 1.3079003330984868e-05, "loss": 0.4431, "step": 37817 }, { "epoch": 0.8020614621110899, "grad_norm": 0.34024712443351746, "learning_rate": 1.3078686035612505e-05, "loss": 0.4733, "step": 37818 }, { "epoch": 0.8020826705690229, "grad_norm": 0.3534889221191406, "learning_rate": 1.3078368736816055e-05, "loss": 0.4345, "step": 37819 }, { "epoch": 0.8021038790269559, "grad_norm": 0.38920772075653076, "learning_rate": 1.3078051434595868e-05, "loss": 0.5123, "step": 37820 }, { "epoch": 0.802125087484889, "grad_norm": 0.3360680639743805, "learning_rate": 1.3077734128952303e-05, "loss": 0.4713, "step": 37821 }, { "epoch": 0.802146295942822, "grad_norm": 0.36919569969177246, "learning_rate": 1.3077416819885707e-05, "loss": 0.5081, "step": 37822 }, { "epoch": 0.802167504400755, "grad_norm": 0.35670652985572815, "learning_rate": 1.3077099507396431e-05, "loss": 0.5226, "step": 37823 }, { "epoch": 0.8021887128586881, "grad_norm": 0.3208361268043518, "learning_rate": 1.3076782191484836e-05, "loss": 0.4701, "step": 37824 }, { "epoch": 0.802209921316621, "grad_norm": 0.3484332263469696, "learning_rate": 1.307646487215127e-05, "loss": 0.4736, "step": 37825 }, { "epoch": 0.8022311297745541, "grad_norm": 0.34967663884162903, "learning_rate": 1.3076147549396083e-05, "loss": 0.4713, "step": 37826 }, { "epoch": 0.8022523382324871, "grad_norm": 0.3760203421115875, "learning_rate": 1.3075830223219636e-05, "loss": 0.5103, "step": 37827 }, { "epoch": 0.8022735466904202, "grad_norm": 0.48260441422462463, "learning_rate": 1.3075512893622278e-05, "loss": 0.5092, "step": 37828 }, { "epoch": 0.8022947551483531, "grad_norm": 0.4225064516067505, "learning_rate": 1.3075195560604354e-05, "loss": 0.527, "step": 37829 }, { "epoch": 0.8023159636062862, "grad_norm": 0.45189130306243896, "learning_rate": 1.307487822416623e-05, "loss": 0.4527, "step": 37830 }, { "epoch": 0.8023371720642192, "grad_norm": 0.9634535312652588, "learning_rate": 1.3074560884308254e-05, "loss": 0.4895, "step": 37831 }, { "epoch": 0.8023583805221522, "grad_norm": 0.3720308840274811, "learning_rate": 1.3074243541030776e-05, "loss": 0.4358, "step": 37832 }, { "epoch": 0.8023795889800852, "grad_norm": 0.3299192488193512, "learning_rate": 1.307392619433415e-05, "loss": 0.5104, "step": 37833 }, { "epoch": 0.8024007974380183, "grad_norm": 0.363669753074646, "learning_rate": 1.3073608844218733e-05, "loss": 0.4696, "step": 37834 }, { "epoch": 0.8024220058959513, "grad_norm": 0.35216522216796875, "learning_rate": 1.3073291490684871e-05, "loss": 0.4103, "step": 37835 }, { "epoch": 0.8024432143538843, "grad_norm": 0.4353484511375427, "learning_rate": 1.3072974133732923e-05, "loss": 0.5019, "step": 37836 }, { "epoch": 0.8024644228118174, "grad_norm": 0.37149909138679504, "learning_rate": 1.307265677336324e-05, "loss": 0.5512, "step": 37837 }, { "epoch": 0.8024856312697504, "grad_norm": 0.34360504150390625, "learning_rate": 1.3072339409576175e-05, "loss": 0.4765, "step": 37838 }, { "epoch": 0.8025068397276834, "grad_norm": 0.3533223271369934, "learning_rate": 1.3072022042372081e-05, "loss": 0.4314, "step": 37839 }, { "epoch": 0.8025280481856164, "grad_norm": 0.3693099319934845, "learning_rate": 1.307170467175131e-05, "loss": 0.4627, "step": 37840 }, { "epoch": 0.8025492566435495, "grad_norm": 0.38410866260528564, "learning_rate": 1.3071387297714218e-05, "loss": 0.4748, "step": 37841 }, { "epoch": 0.8025704651014824, "grad_norm": 0.39591681957244873, "learning_rate": 1.3071069920261152e-05, "loss": 0.4329, "step": 37842 }, { "epoch": 0.8025916735594155, "grad_norm": 0.39829301834106445, "learning_rate": 1.3070752539392469e-05, "loss": 0.5395, "step": 37843 }, { "epoch": 0.8026128820173485, "grad_norm": 0.38686618208885193, "learning_rate": 1.3070435155108524e-05, "loss": 0.4551, "step": 37844 }, { "epoch": 0.8026340904752816, "grad_norm": 0.43113234639167786, "learning_rate": 1.3070117767409669e-05, "loss": 0.4733, "step": 37845 }, { "epoch": 0.8026552989332145, "grad_norm": 0.33743423223495483, "learning_rate": 1.3069800376296253e-05, "loss": 0.4399, "step": 37846 }, { "epoch": 0.8026765073911476, "grad_norm": 0.3697505295276642, "learning_rate": 1.3069482981768633e-05, "loss": 0.4788, "step": 37847 }, { "epoch": 0.8026977158490806, "grad_norm": 0.3235563635826111, "learning_rate": 1.3069165583827159e-05, "loss": 0.5121, "step": 37848 }, { "epoch": 0.8027189243070136, "grad_norm": 0.3519798517227173, "learning_rate": 1.3068848182472187e-05, "loss": 0.4744, "step": 37849 }, { "epoch": 0.8027401327649467, "grad_norm": 0.3295847475528717, "learning_rate": 1.3068530777704069e-05, "loss": 0.4474, "step": 37850 }, { "epoch": 0.8027613412228797, "grad_norm": 0.3941883444786072, "learning_rate": 1.3068213369523155e-05, "loss": 0.4782, "step": 37851 }, { "epoch": 0.8027825496808128, "grad_norm": 0.4912370443344116, "learning_rate": 1.3067895957929804e-05, "loss": 0.6244, "step": 37852 }, { "epoch": 0.8028037581387457, "grad_norm": 0.4349597096443176, "learning_rate": 1.3067578542924367e-05, "loss": 0.5064, "step": 37853 }, { "epoch": 0.8028249665966788, "grad_norm": 0.3818187415599823, "learning_rate": 1.3067261124507192e-05, "loss": 0.4342, "step": 37854 }, { "epoch": 0.8028461750546118, "grad_norm": 0.3903442621231079, "learning_rate": 1.3066943702678636e-05, "loss": 0.4846, "step": 37855 }, { "epoch": 0.8028673835125448, "grad_norm": 0.39826637506484985, "learning_rate": 1.3066626277439054e-05, "loss": 0.4696, "step": 37856 }, { "epoch": 0.8028885919704778, "grad_norm": 0.34914353489875793, "learning_rate": 1.3066308848788793e-05, "loss": 0.4334, "step": 37857 }, { "epoch": 0.8029098004284109, "grad_norm": 0.44198715686798096, "learning_rate": 1.3065991416728215e-05, "loss": 0.417, "step": 37858 }, { "epoch": 0.8029310088863438, "grad_norm": 0.3747788965702057, "learning_rate": 1.3065673981257668e-05, "loss": 0.5264, "step": 37859 }, { "epoch": 0.8029522173442769, "grad_norm": 0.3592051863670349, "learning_rate": 1.30653565423775e-05, "loss": 0.4803, "step": 37860 }, { "epoch": 0.8029734258022099, "grad_norm": 0.3255749046802521, "learning_rate": 1.3065039100088072e-05, "loss": 0.515, "step": 37861 }, { "epoch": 0.802994634260143, "grad_norm": 0.3478797972202301, "learning_rate": 1.3064721654389736e-05, "loss": 0.4645, "step": 37862 }, { "epoch": 0.803015842718076, "grad_norm": 0.36536550521850586, "learning_rate": 1.3064404205282842e-05, "loss": 0.552, "step": 37863 }, { "epoch": 0.803037051176009, "grad_norm": 2.2379021644592285, "learning_rate": 1.3064086752767744e-05, "loss": 0.5533, "step": 37864 }, { "epoch": 0.8030582596339421, "grad_norm": 0.38799262046813965, "learning_rate": 1.3063769296844792e-05, "loss": 0.4659, "step": 37865 }, { "epoch": 0.803079468091875, "grad_norm": 0.3898756206035614, "learning_rate": 1.3063451837514347e-05, "loss": 0.5398, "step": 37866 }, { "epoch": 0.8031006765498081, "grad_norm": 0.3636973798274994, "learning_rate": 1.3063134374776758e-05, "loss": 0.5398, "step": 37867 }, { "epoch": 0.8031218850077411, "grad_norm": 0.36347389221191406, "learning_rate": 1.3062816908632375e-05, "loss": 0.4592, "step": 37868 }, { "epoch": 0.8031430934656741, "grad_norm": 0.624235987663269, "learning_rate": 1.306249943908155e-05, "loss": 0.4633, "step": 37869 }, { "epoch": 0.8031643019236071, "grad_norm": 0.3833083212375641, "learning_rate": 1.3062181966124644e-05, "loss": 0.491, "step": 37870 }, { "epoch": 0.8031855103815402, "grad_norm": 0.4050569534301758, "learning_rate": 1.3061864489762007e-05, "loss": 0.4849, "step": 37871 }, { "epoch": 0.8032067188394731, "grad_norm": 0.4156023859977722, "learning_rate": 1.3061547009993991e-05, "loss": 0.5454, "step": 37872 }, { "epoch": 0.8032279272974062, "grad_norm": 0.3351912796497345, "learning_rate": 1.306122952682095e-05, "loss": 0.4726, "step": 37873 }, { "epoch": 0.8032491357553392, "grad_norm": 0.3675525188446045, "learning_rate": 1.3060912040243228e-05, "loss": 0.4467, "step": 37874 }, { "epoch": 0.8032703442132723, "grad_norm": 0.36595070362091064, "learning_rate": 1.3060594550261193e-05, "loss": 0.4917, "step": 37875 }, { "epoch": 0.8032915526712052, "grad_norm": 0.4313582181930542, "learning_rate": 1.3060277056875192e-05, "loss": 0.4986, "step": 37876 }, { "epoch": 0.8033127611291383, "grad_norm": 0.3260704576969147, "learning_rate": 1.3059959560085576e-05, "loss": 0.4413, "step": 37877 }, { "epoch": 0.8033339695870714, "grad_norm": 0.4248066246509552, "learning_rate": 1.30596420598927e-05, "loss": 0.5843, "step": 37878 }, { "epoch": 0.8033551780450043, "grad_norm": 0.3178723156452179, "learning_rate": 1.3059324556296912e-05, "loss": 0.4599, "step": 37879 }, { "epoch": 0.8033763865029374, "grad_norm": 1.659218430519104, "learning_rate": 1.3059007049298575e-05, "loss": 0.4873, "step": 37880 }, { "epoch": 0.8033975949608704, "grad_norm": 0.3464720547199249, "learning_rate": 1.3058689538898035e-05, "loss": 0.484, "step": 37881 }, { "epoch": 0.8034188034188035, "grad_norm": 0.35946667194366455, "learning_rate": 1.3058372025095648e-05, "loss": 0.5016, "step": 37882 }, { "epoch": 0.8034400118767364, "grad_norm": 0.36415430903434753, "learning_rate": 1.3058054507891767e-05, "loss": 0.4715, "step": 37883 }, { "epoch": 0.8034612203346695, "grad_norm": 0.3571162521839142, "learning_rate": 1.3057736987286743e-05, "loss": 0.4957, "step": 37884 }, { "epoch": 0.8034824287926025, "grad_norm": 0.3315846621990204, "learning_rate": 1.3057419463280932e-05, "loss": 0.468, "step": 37885 }, { "epoch": 0.8035036372505355, "grad_norm": 0.325537770986557, "learning_rate": 1.3057101935874682e-05, "loss": 0.4902, "step": 37886 }, { "epoch": 0.8035248457084685, "grad_norm": 0.3767782747745514, "learning_rate": 1.3056784405068351e-05, "loss": 0.5067, "step": 37887 }, { "epoch": 0.8035460541664016, "grad_norm": 0.38491252064704895, "learning_rate": 1.3056466870862293e-05, "loss": 0.448, "step": 37888 }, { "epoch": 0.8035672626243345, "grad_norm": 0.349997341632843, "learning_rate": 1.305614933325686e-05, "loss": 0.5079, "step": 37889 }, { "epoch": 0.8035884710822676, "grad_norm": 0.3342423439025879, "learning_rate": 1.3055831792252402e-05, "loss": 0.443, "step": 37890 }, { "epoch": 0.8036096795402007, "grad_norm": 0.439176082611084, "learning_rate": 1.3055514247849275e-05, "loss": 0.5171, "step": 37891 }, { "epoch": 0.8036308879981336, "grad_norm": 0.5672287344932556, "learning_rate": 1.3055196700047832e-05, "loss": 0.5344, "step": 37892 }, { "epoch": 0.8036520964560667, "grad_norm": 0.3846755027770996, "learning_rate": 1.3054879148848426e-05, "loss": 0.5462, "step": 37893 }, { "epoch": 0.8036733049139997, "grad_norm": 0.38490042090415955, "learning_rate": 1.3054561594251408e-05, "loss": 0.5035, "step": 37894 }, { "epoch": 0.8036945133719328, "grad_norm": 0.3685188591480255, "learning_rate": 1.3054244036257133e-05, "loss": 0.5636, "step": 37895 }, { "epoch": 0.8037157218298657, "grad_norm": 0.3723483681678772, "learning_rate": 1.3053926474865957e-05, "loss": 0.4951, "step": 37896 }, { "epoch": 0.8037369302877988, "grad_norm": 0.39832016825675964, "learning_rate": 1.3053608910078228e-05, "loss": 0.4837, "step": 37897 }, { "epoch": 0.8037581387457318, "grad_norm": 0.36724546551704407, "learning_rate": 1.3053291341894303e-05, "loss": 0.5065, "step": 37898 }, { "epoch": 0.8037793472036648, "grad_norm": 0.381552129983902, "learning_rate": 1.3052973770314534e-05, "loss": 0.4469, "step": 37899 }, { "epoch": 0.8038005556615978, "grad_norm": 0.38283175230026245, "learning_rate": 1.3052656195339273e-05, "loss": 0.4867, "step": 37900 }, { "epoch": 0.8038217641195309, "grad_norm": 0.3601139783859253, "learning_rate": 1.3052338616968876e-05, "loss": 0.4885, "step": 37901 }, { "epoch": 0.8038429725774638, "grad_norm": 0.3980937600135803, "learning_rate": 1.3052021035203693e-05, "loss": 0.4918, "step": 37902 }, { "epoch": 0.8038641810353969, "grad_norm": 0.4360485076904297, "learning_rate": 1.3051703450044079e-05, "loss": 0.5486, "step": 37903 }, { "epoch": 0.80388538949333, "grad_norm": 0.3451692461967468, "learning_rate": 1.305138586149039e-05, "loss": 0.4726, "step": 37904 }, { "epoch": 0.803906597951263, "grad_norm": 0.3336225748062134, "learning_rate": 1.305106826954297e-05, "loss": 0.514, "step": 37905 }, { "epoch": 0.803927806409196, "grad_norm": 0.47680604457855225, "learning_rate": 1.3050750674202182e-05, "loss": 0.4876, "step": 37906 }, { "epoch": 0.803949014867129, "grad_norm": 0.35337305068969727, "learning_rate": 1.3050433075468376e-05, "loss": 0.4906, "step": 37907 }, { "epoch": 0.8039702233250621, "grad_norm": 0.3270210921764374, "learning_rate": 1.3050115473341904e-05, "loss": 0.4651, "step": 37908 }, { "epoch": 0.803991431782995, "grad_norm": 0.35583415627479553, "learning_rate": 1.3049797867823122e-05, "loss": 0.4333, "step": 37909 }, { "epoch": 0.8040126402409281, "grad_norm": 0.334640234708786, "learning_rate": 1.304948025891238e-05, "loss": 0.4079, "step": 37910 }, { "epoch": 0.8040338486988611, "grad_norm": 0.36698436737060547, "learning_rate": 1.304916264661003e-05, "loss": 0.4593, "step": 37911 }, { "epoch": 0.8040550571567942, "grad_norm": 0.451229989528656, "learning_rate": 1.304884503091643e-05, "loss": 0.5776, "step": 37912 }, { "epoch": 0.8040762656147271, "grad_norm": 0.4372338354587555, "learning_rate": 1.3048527411831931e-05, "loss": 0.561, "step": 37913 }, { "epoch": 0.8040974740726602, "grad_norm": 0.3427974581718445, "learning_rate": 1.3048209789356884e-05, "loss": 0.4462, "step": 37914 }, { "epoch": 0.8041186825305932, "grad_norm": 0.3643137812614441, "learning_rate": 1.3047892163491649e-05, "loss": 0.4436, "step": 37915 }, { "epoch": 0.8041398909885262, "grad_norm": 0.35974767804145813, "learning_rate": 1.3047574534236574e-05, "loss": 0.4473, "step": 37916 }, { "epoch": 0.8041610994464592, "grad_norm": 0.3933725953102112, "learning_rate": 1.304725690159201e-05, "loss": 0.503, "step": 37917 }, { "epoch": 0.8041823079043923, "grad_norm": 0.3383038341999054, "learning_rate": 1.3046939265558318e-05, "loss": 0.5516, "step": 37918 }, { "epoch": 0.8042035163623253, "grad_norm": 0.35623735189437866, "learning_rate": 1.304662162613584e-05, "loss": 0.4742, "step": 37919 }, { "epoch": 0.8042247248202583, "grad_norm": 0.3275868594646454, "learning_rate": 1.3046303983324942e-05, "loss": 0.3981, "step": 37920 }, { "epoch": 0.8042459332781914, "grad_norm": 0.33264562487602234, "learning_rate": 1.3045986337125969e-05, "loss": 0.4375, "step": 37921 }, { "epoch": 0.8042671417361243, "grad_norm": 0.3503211438655853, "learning_rate": 1.3045668687539274e-05, "loss": 0.5199, "step": 37922 }, { "epoch": 0.8042883501940574, "grad_norm": 0.3794170022010803, "learning_rate": 1.3045351034565217e-05, "loss": 0.4987, "step": 37923 }, { "epoch": 0.8043095586519904, "grad_norm": 0.357454776763916, "learning_rate": 1.3045033378204146e-05, "loss": 0.5379, "step": 37924 }, { "epoch": 0.8043307671099235, "grad_norm": 0.39666327834129333, "learning_rate": 1.304471571845641e-05, "loss": 0.436, "step": 37925 }, { "epoch": 0.8043519755678564, "grad_norm": 0.3291880190372467, "learning_rate": 1.3044398055322374e-05, "loss": 0.4698, "step": 37926 }, { "epoch": 0.8043731840257895, "grad_norm": 0.4059693515300751, "learning_rate": 1.3044080388802383e-05, "loss": 0.3974, "step": 37927 }, { "epoch": 0.8043943924837225, "grad_norm": 0.4165763556957245, "learning_rate": 1.3043762718896793e-05, "loss": 0.5318, "step": 37928 }, { "epoch": 0.8044156009416555, "grad_norm": 0.38223856687545776, "learning_rate": 1.3043445045605956e-05, "loss": 0.5392, "step": 37929 }, { "epoch": 0.8044368093995885, "grad_norm": 0.32356390357017517, "learning_rate": 1.3043127368930224e-05, "loss": 0.499, "step": 37930 }, { "epoch": 0.8044580178575216, "grad_norm": 0.4221072494983673, "learning_rate": 1.3042809688869956e-05, "loss": 0.4724, "step": 37931 }, { "epoch": 0.8044792263154547, "grad_norm": 0.4177921712398529, "learning_rate": 1.3042492005425497e-05, "loss": 0.5075, "step": 37932 }, { "epoch": 0.8045004347733876, "grad_norm": 0.34853944182395935, "learning_rate": 1.3042174318597205e-05, "loss": 0.424, "step": 37933 }, { "epoch": 0.8045216432313207, "grad_norm": 0.417508989572525, "learning_rate": 1.3041856628385437e-05, "loss": 0.4774, "step": 37934 }, { "epoch": 0.8045428516892537, "grad_norm": 0.3443838357925415, "learning_rate": 1.3041538934790542e-05, "loss": 0.4295, "step": 37935 }, { "epoch": 0.8045640601471867, "grad_norm": 0.2962424159049988, "learning_rate": 1.3041221237812871e-05, "loss": 0.4038, "step": 37936 }, { "epoch": 0.8045852686051197, "grad_norm": 0.37965336441993713, "learning_rate": 1.3040903537452782e-05, "loss": 0.514, "step": 37937 }, { "epoch": 0.8046064770630528, "grad_norm": 0.42099836468696594, "learning_rate": 1.3040585833710626e-05, "loss": 0.5253, "step": 37938 }, { "epoch": 0.8046276855209857, "grad_norm": 0.3747555911540985, "learning_rate": 1.3040268126586755e-05, "loss": 0.473, "step": 37939 }, { "epoch": 0.8046488939789188, "grad_norm": 0.4013179838657379, "learning_rate": 1.3039950416081529e-05, "loss": 0.5189, "step": 37940 }, { "epoch": 0.8046701024368518, "grad_norm": 0.4377041757106781, "learning_rate": 1.3039632702195294e-05, "loss": 0.4613, "step": 37941 }, { "epoch": 0.8046913108947849, "grad_norm": 0.39832955598831177, "learning_rate": 1.3039314984928404e-05, "loss": 0.5217, "step": 37942 }, { "epoch": 0.8047125193527178, "grad_norm": 0.3371623456478119, "learning_rate": 1.3038997264281217e-05, "loss": 0.4913, "step": 37943 }, { "epoch": 0.8047337278106509, "grad_norm": 0.32680022716522217, "learning_rate": 1.3038679540254082e-05, "loss": 0.5294, "step": 37944 }, { "epoch": 0.804754936268584, "grad_norm": 0.36000126600265503, "learning_rate": 1.3038361812847353e-05, "loss": 0.5041, "step": 37945 }, { "epoch": 0.8047761447265169, "grad_norm": 0.33850905299186707, "learning_rate": 1.3038044082061387e-05, "loss": 0.4964, "step": 37946 }, { "epoch": 0.80479735318445, "grad_norm": 0.36590051651000977, "learning_rate": 1.3037726347896533e-05, "loss": 0.4561, "step": 37947 }, { "epoch": 0.804818561642383, "grad_norm": 0.3642056882381439, "learning_rate": 1.3037408610353148e-05, "loss": 0.415, "step": 37948 }, { "epoch": 0.804839770100316, "grad_norm": 0.35883232951164246, "learning_rate": 1.3037090869431584e-05, "loss": 0.5165, "step": 37949 }, { "epoch": 0.804860978558249, "grad_norm": 0.44787341356277466, "learning_rate": 1.3036773125132189e-05, "loss": 0.5429, "step": 37950 }, { "epoch": 0.8048821870161821, "grad_norm": 0.3677595853805542, "learning_rate": 1.3036455377455327e-05, "loss": 0.4429, "step": 37951 }, { "epoch": 0.804903395474115, "grad_norm": 0.36237621307373047, "learning_rate": 1.3036137626401342e-05, "loss": 0.4608, "step": 37952 }, { "epoch": 0.8049246039320481, "grad_norm": 0.37627649307250977, "learning_rate": 1.3035819871970592e-05, "loss": 0.5345, "step": 37953 }, { "epoch": 0.8049458123899811, "grad_norm": 0.36061426997184753, "learning_rate": 1.3035502114163432e-05, "loss": 0.4668, "step": 37954 }, { "epoch": 0.8049670208479142, "grad_norm": 0.4527563154697418, "learning_rate": 1.303518435298021e-05, "loss": 0.4934, "step": 37955 }, { "epoch": 0.8049882293058471, "grad_norm": 0.34971413016319275, "learning_rate": 1.3034866588421284e-05, "loss": 0.5016, "step": 37956 }, { "epoch": 0.8050094377637802, "grad_norm": 0.34117579460144043, "learning_rate": 1.3034548820487002e-05, "loss": 0.425, "step": 37957 }, { "epoch": 0.8050306462217132, "grad_norm": 0.3264201581478119, "learning_rate": 1.3034231049177726e-05, "loss": 0.402, "step": 37958 }, { "epoch": 0.8050518546796462, "grad_norm": 0.3624044954776764, "learning_rate": 1.3033913274493801e-05, "loss": 0.4607, "step": 37959 }, { "epoch": 0.8050730631375793, "grad_norm": 0.3900268077850342, "learning_rate": 1.3033595496435586e-05, "loss": 0.5037, "step": 37960 }, { "epoch": 0.8050942715955123, "grad_norm": 0.4547101855278015, "learning_rate": 1.3033277715003431e-05, "loss": 0.5298, "step": 37961 }, { "epoch": 0.8051154800534454, "grad_norm": 0.39481183886528015, "learning_rate": 1.3032959930197695e-05, "loss": 0.4772, "step": 37962 }, { "epoch": 0.8051366885113783, "grad_norm": 0.39295849204063416, "learning_rate": 1.3032642142018726e-05, "loss": 0.5506, "step": 37963 }, { "epoch": 0.8051578969693114, "grad_norm": 0.3889564275741577, "learning_rate": 1.3032324350466876e-05, "loss": 0.5773, "step": 37964 }, { "epoch": 0.8051791054272444, "grad_norm": 0.592240035533905, "learning_rate": 1.3032006555542501e-05, "loss": 0.5562, "step": 37965 }, { "epoch": 0.8052003138851774, "grad_norm": 0.35550281405448914, "learning_rate": 1.3031688757245957e-05, "loss": 0.5019, "step": 37966 }, { "epoch": 0.8052215223431104, "grad_norm": 0.3645423948764801, "learning_rate": 1.3031370955577592e-05, "loss": 0.4947, "step": 37967 }, { "epoch": 0.8052427308010435, "grad_norm": 0.3571324348449707, "learning_rate": 1.3031053150537766e-05, "loss": 0.5089, "step": 37968 }, { "epoch": 0.8052639392589764, "grad_norm": 0.37038958072662354, "learning_rate": 1.303073534212683e-05, "loss": 0.5031, "step": 37969 }, { "epoch": 0.8052851477169095, "grad_norm": 0.3794986307621002, "learning_rate": 1.3030417530345129e-05, "loss": 0.4932, "step": 37970 }, { "epoch": 0.8053063561748425, "grad_norm": 0.4276489317417145, "learning_rate": 1.3030099715193033e-05, "loss": 0.5198, "step": 37971 }, { "epoch": 0.8053275646327755, "grad_norm": 0.5026589632034302, "learning_rate": 1.302978189667088e-05, "loss": 0.4803, "step": 37972 }, { "epoch": 0.8053487730907086, "grad_norm": 1.370104432106018, "learning_rate": 1.3029464074779033e-05, "loss": 0.5427, "step": 37973 }, { "epoch": 0.8053699815486416, "grad_norm": 0.3483150005340576, "learning_rate": 1.3029146249517842e-05, "loss": 0.4195, "step": 37974 }, { "epoch": 0.8053911900065747, "grad_norm": 0.3819125294685364, "learning_rate": 1.302882842088766e-05, "loss": 0.4923, "step": 37975 }, { "epoch": 0.8054123984645076, "grad_norm": 0.36574694514274597, "learning_rate": 1.3028510588888843e-05, "loss": 0.481, "step": 37976 }, { "epoch": 0.8054336069224407, "grad_norm": 0.3531563878059387, "learning_rate": 1.302819275352174e-05, "loss": 0.4629, "step": 37977 }, { "epoch": 0.8054548153803737, "grad_norm": 0.3528658151626587, "learning_rate": 1.3027874914786711e-05, "loss": 0.3261, "step": 37978 }, { "epoch": 0.8054760238383067, "grad_norm": 0.38230758905410767, "learning_rate": 1.3027557072684103e-05, "loss": 0.5153, "step": 37979 }, { "epoch": 0.8054972322962397, "grad_norm": 0.32658523321151733, "learning_rate": 1.3027239227214275e-05, "loss": 0.3558, "step": 37980 }, { "epoch": 0.8055184407541728, "grad_norm": 0.3844428062438965, "learning_rate": 1.3026921378377573e-05, "loss": 0.4636, "step": 37981 }, { "epoch": 0.8055396492121057, "grad_norm": 0.4068009853363037, "learning_rate": 1.3026603526174361e-05, "loss": 0.5127, "step": 37982 }, { "epoch": 0.8055608576700388, "grad_norm": 0.3511069715023041, "learning_rate": 1.3026285670604983e-05, "loss": 0.4663, "step": 37983 }, { "epoch": 0.8055820661279718, "grad_norm": 0.3872513473033905, "learning_rate": 1.3025967811669794e-05, "loss": 0.4454, "step": 37984 }, { "epoch": 0.8056032745859049, "grad_norm": 0.3512641489505768, "learning_rate": 1.3025649949369157e-05, "loss": 0.4501, "step": 37985 }, { "epoch": 0.8056244830438379, "grad_norm": 0.36921462416648865, "learning_rate": 1.3025332083703414e-05, "loss": 0.5638, "step": 37986 }, { "epoch": 0.8056456915017709, "grad_norm": 0.4523867666721344, "learning_rate": 1.3025014214672924e-05, "loss": 0.524, "step": 37987 }, { "epoch": 0.805666899959704, "grad_norm": 0.362081378698349, "learning_rate": 1.302469634227804e-05, "loss": 0.4946, "step": 37988 }, { "epoch": 0.8056881084176369, "grad_norm": 0.347224622964859, "learning_rate": 1.3024378466519115e-05, "loss": 0.5136, "step": 37989 }, { "epoch": 0.80570931687557, "grad_norm": 0.4092068672180176, "learning_rate": 1.30240605873965e-05, "loss": 0.4354, "step": 37990 }, { "epoch": 0.805730525333503, "grad_norm": 0.4045353829860687, "learning_rate": 1.3023742704910553e-05, "loss": 0.4967, "step": 37991 }, { "epoch": 0.805751733791436, "grad_norm": 0.41696882247924805, "learning_rate": 1.3023424819061628e-05, "loss": 0.4699, "step": 37992 }, { "epoch": 0.805772942249369, "grad_norm": 0.3286718726158142, "learning_rate": 1.3023106929850071e-05, "loss": 0.4442, "step": 37993 }, { "epoch": 0.8057941507073021, "grad_norm": 0.3512807786464691, "learning_rate": 1.3022789037276246e-05, "loss": 0.4825, "step": 37994 }, { "epoch": 0.8058153591652351, "grad_norm": 0.3350631594657898, "learning_rate": 1.3022471141340498e-05, "loss": 0.5044, "step": 37995 }, { "epoch": 0.8058365676231681, "grad_norm": 0.363503634929657, "learning_rate": 1.3022153242043183e-05, "loss": 0.4082, "step": 37996 }, { "epoch": 0.8058577760811011, "grad_norm": 0.32135891914367676, "learning_rate": 1.3021835339384659e-05, "loss": 0.4683, "step": 37997 }, { "epoch": 0.8058789845390342, "grad_norm": 0.3696911334991455, "learning_rate": 1.3021517433365272e-05, "loss": 0.5164, "step": 37998 }, { "epoch": 0.8059001929969671, "grad_norm": 0.38011330366134644, "learning_rate": 1.3021199523985381e-05, "loss": 0.5024, "step": 37999 }, { "epoch": 0.8059214014549002, "grad_norm": 0.37152040004730225, "learning_rate": 1.3020881611245342e-05, "loss": 0.5487, "step": 38000 }, { "epoch": 0.8059426099128333, "grad_norm": 0.36495083570480347, "learning_rate": 1.30205636951455e-05, "loss": 0.4949, "step": 38001 }, { "epoch": 0.8059638183707662, "grad_norm": 1.0300986766815186, "learning_rate": 1.3020245775686213e-05, "loss": 0.5068, "step": 38002 }, { "epoch": 0.8059850268286993, "grad_norm": 1.3042100667953491, "learning_rate": 1.301992785286784e-05, "loss": 0.4457, "step": 38003 }, { "epoch": 0.8060062352866323, "grad_norm": 0.3379080593585968, "learning_rate": 1.3019609926690724e-05, "loss": 0.4441, "step": 38004 }, { "epoch": 0.8060274437445654, "grad_norm": 0.34400230646133423, "learning_rate": 1.3019291997155229e-05, "loss": 0.4755, "step": 38005 }, { "epoch": 0.8060486522024983, "grad_norm": 0.4326227903366089, "learning_rate": 1.30189740642617e-05, "loss": 0.4877, "step": 38006 }, { "epoch": 0.8060698606604314, "grad_norm": 0.37958237528800964, "learning_rate": 1.3018656128010494e-05, "loss": 0.5398, "step": 38007 }, { "epoch": 0.8060910691183644, "grad_norm": 0.3265835642814636, "learning_rate": 1.3018338188401967e-05, "loss": 0.4172, "step": 38008 }, { "epoch": 0.8061122775762974, "grad_norm": 0.3823329210281372, "learning_rate": 1.301802024543647e-05, "loss": 0.5078, "step": 38009 }, { "epoch": 0.8061334860342304, "grad_norm": 0.36043912172317505, "learning_rate": 1.3017702299114354e-05, "loss": 0.4878, "step": 38010 }, { "epoch": 0.8061546944921635, "grad_norm": 0.46119996905326843, "learning_rate": 1.301738434943598e-05, "loss": 0.4776, "step": 38011 }, { "epoch": 0.8061759029500964, "grad_norm": 0.45787134766578674, "learning_rate": 1.3017066396401693e-05, "loss": 0.4376, "step": 38012 }, { "epoch": 0.8061971114080295, "grad_norm": 0.39086177945137024, "learning_rate": 1.3016748440011856e-05, "loss": 0.504, "step": 38013 }, { "epoch": 0.8062183198659626, "grad_norm": 0.6393349170684814, "learning_rate": 1.3016430480266815e-05, "loss": 0.476, "step": 38014 }, { "epoch": 0.8062395283238956, "grad_norm": 0.6659348011016846, "learning_rate": 1.3016112517166923e-05, "loss": 0.4752, "step": 38015 }, { "epoch": 0.8062607367818286, "grad_norm": 0.5172303318977356, "learning_rate": 1.3015794550712542e-05, "loss": 0.507, "step": 38016 }, { "epoch": 0.8062819452397616, "grad_norm": 0.3831368386745453, "learning_rate": 1.301547658090402e-05, "loss": 0.4033, "step": 38017 }, { "epoch": 0.8063031536976947, "grad_norm": 0.3629009425640106, "learning_rate": 1.3015158607741708e-05, "loss": 0.5194, "step": 38018 }, { "epoch": 0.8063243621556276, "grad_norm": 0.33935654163360596, "learning_rate": 1.3014840631225966e-05, "loss": 0.4711, "step": 38019 }, { "epoch": 0.8063455706135607, "grad_norm": 0.35523852705955505, "learning_rate": 1.3014522651357144e-05, "loss": 0.4682, "step": 38020 }, { "epoch": 0.8063667790714937, "grad_norm": 0.34010055661201477, "learning_rate": 1.3014204668135595e-05, "loss": 0.4799, "step": 38021 }, { "epoch": 0.8063879875294268, "grad_norm": 0.4630371630191803, "learning_rate": 1.3013886681561674e-05, "loss": 0.5006, "step": 38022 }, { "epoch": 0.8064091959873597, "grad_norm": 0.36147817969322205, "learning_rate": 1.3013568691635733e-05, "loss": 0.4478, "step": 38023 }, { "epoch": 0.8064304044452928, "grad_norm": 0.36766517162323, "learning_rate": 1.3013250698358129e-05, "loss": 0.5071, "step": 38024 }, { "epoch": 0.8064516129032258, "grad_norm": 0.37306174635887146, "learning_rate": 1.3012932701729215e-05, "loss": 0.5233, "step": 38025 }, { "epoch": 0.8064728213611588, "grad_norm": 0.35014891624450684, "learning_rate": 1.3012614701749339e-05, "loss": 0.5055, "step": 38026 }, { "epoch": 0.8064940298190919, "grad_norm": 0.3706260025501251, "learning_rate": 1.3012296698418863e-05, "loss": 0.4672, "step": 38027 }, { "epoch": 0.8065152382770249, "grad_norm": 0.3508732318878174, "learning_rate": 1.3011978691738137e-05, "loss": 0.4966, "step": 38028 }, { "epoch": 0.806536446734958, "grad_norm": 0.3265976309776306, "learning_rate": 1.301166068170751e-05, "loss": 0.4747, "step": 38029 }, { "epoch": 0.8065576551928909, "grad_norm": 0.41364917159080505, "learning_rate": 1.3011342668327344e-05, "loss": 0.4914, "step": 38030 }, { "epoch": 0.806578863650824, "grad_norm": 0.6333304643630981, "learning_rate": 1.301102465159799e-05, "loss": 0.4795, "step": 38031 }, { "epoch": 0.806600072108757, "grad_norm": 0.3525509238243103, "learning_rate": 1.3010706631519799e-05, "loss": 0.5025, "step": 38032 }, { "epoch": 0.80662128056669, "grad_norm": 0.33682551980018616, "learning_rate": 1.3010388608093126e-05, "loss": 0.504, "step": 38033 }, { "epoch": 0.806642489024623, "grad_norm": 0.32595565915107727, "learning_rate": 1.3010070581318321e-05, "loss": 0.5544, "step": 38034 }, { "epoch": 0.8066636974825561, "grad_norm": 0.368316113948822, "learning_rate": 1.3009752551195747e-05, "loss": 0.5008, "step": 38035 }, { "epoch": 0.806684905940489, "grad_norm": 0.3628639876842499, "learning_rate": 1.3009434517725752e-05, "loss": 0.4623, "step": 38036 }, { "epoch": 0.8067061143984221, "grad_norm": 0.34952425956726074, "learning_rate": 1.3009116480908688e-05, "loss": 0.4803, "step": 38037 }, { "epoch": 0.8067273228563551, "grad_norm": 0.35766640305519104, "learning_rate": 1.3008798440744911e-05, "loss": 0.54, "step": 38038 }, { "epoch": 0.8067485313142881, "grad_norm": 0.6439785957336426, "learning_rate": 1.3008480397234777e-05, "loss": 0.3581, "step": 38039 }, { "epoch": 0.8067697397722212, "grad_norm": 0.4768042266368866, "learning_rate": 1.3008162350378632e-05, "loss": 0.5275, "step": 38040 }, { "epoch": 0.8067909482301542, "grad_norm": 0.41210901737213135, "learning_rate": 1.300784430017684e-05, "loss": 0.4347, "step": 38041 }, { "epoch": 0.8068121566880873, "grad_norm": 0.3840935528278351, "learning_rate": 1.300752624662975e-05, "loss": 0.4736, "step": 38042 }, { "epoch": 0.8068333651460202, "grad_norm": 0.3902431130409241, "learning_rate": 1.300720818973771e-05, "loss": 0.4746, "step": 38043 }, { "epoch": 0.8068545736039533, "grad_norm": 0.3784884810447693, "learning_rate": 1.3006890129501084e-05, "loss": 0.5446, "step": 38044 }, { "epoch": 0.8068757820618863, "grad_norm": 0.3596588671207428, "learning_rate": 1.300657206592022e-05, "loss": 0.4454, "step": 38045 }, { "epoch": 0.8068969905198193, "grad_norm": 0.3290010690689087, "learning_rate": 1.300625399899547e-05, "loss": 0.4941, "step": 38046 }, { "epoch": 0.8069181989777523, "grad_norm": 0.414582222700119, "learning_rate": 1.3005935928727194e-05, "loss": 0.4609, "step": 38047 }, { "epoch": 0.8069394074356854, "grad_norm": 0.3581564128398895, "learning_rate": 1.3005617855115742e-05, "loss": 0.5067, "step": 38048 }, { "epoch": 0.8069606158936183, "grad_norm": 0.35685816407203674, "learning_rate": 1.3005299778161464e-05, "loss": 0.4133, "step": 38049 }, { "epoch": 0.8069818243515514, "grad_norm": 0.41978710889816284, "learning_rate": 1.3004981697864722e-05, "loss": 0.5736, "step": 38050 }, { "epoch": 0.8070030328094844, "grad_norm": 0.33289170265197754, "learning_rate": 1.3004663614225865e-05, "loss": 0.5378, "step": 38051 }, { "epoch": 0.8070242412674175, "grad_norm": 0.41828101873397827, "learning_rate": 1.3004345527245246e-05, "loss": 0.5079, "step": 38052 }, { "epoch": 0.8070454497253504, "grad_norm": 0.37763580679893494, "learning_rate": 1.3004027436923221e-05, "loss": 0.4682, "step": 38053 }, { "epoch": 0.8070666581832835, "grad_norm": 0.6647118926048279, "learning_rate": 1.3003709343260141e-05, "loss": 0.4665, "step": 38054 }, { "epoch": 0.8070878666412166, "grad_norm": 0.3708103597164154, "learning_rate": 1.3003391246256361e-05, "loss": 0.4879, "step": 38055 }, { "epoch": 0.8071090750991495, "grad_norm": 0.35461336374282837, "learning_rate": 1.3003073145912237e-05, "loss": 0.5087, "step": 38056 }, { "epoch": 0.8071302835570826, "grad_norm": 0.3607899248600006, "learning_rate": 1.3002755042228122e-05, "loss": 0.4569, "step": 38057 }, { "epoch": 0.8071514920150156, "grad_norm": 0.34414562582969666, "learning_rate": 1.3002436935204367e-05, "loss": 0.4439, "step": 38058 }, { "epoch": 0.8071727004729486, "grad_norm": 0.37639668583869934, "learning_rate": 1.3002118824841329e-05, "loss": 0.5238, "step": 38059 }, { "epoch": 0.8071939089308816, "grad_norm": 0.34795185923576355, "learning_rate": 1.300180071113936e-05, "loss": 0.4911, "step": 38060 }, { "epoch": 0.8072151173888147, "grad_norm": 0.36283576488494873, "learning_rate": 1.3001482594098812e-05, "loss": 0.4304, "step": 38061 }, { "epoch": 0.8072363258467476, "grad_norm": 0.3785777688026428, "learning_rate": 1.3001164473720047e-05, "loss": 0.4767, "step": 38062 }, { "epoch": 0.8072575343046807, "grad_norm": 0.3627745807170868, "learning_rate": 1.300084635000341e-05, "loss": 0.4921, "step": 38063 }, { "epoch": 0.8072787427626137, "grad_norm": 0.3499663174152374, "learning_rate": 1.3000528222949255e-05, "loss": 0.5224, "step": 38064 }, { "epoch": 0.8072999512205468, "grad_norm": 0.36593809723854065, "learning_rate": 1.3000210092557943e-05, "loss": 0.5253, "step": 38065 }, { "epoch": 0.8073211596784797, "grad_norm": 0.3742753565311432, "learning_rate": 1.2999891958829818e-05, "loss": 0.4528, "step": 38066 }, { "epoch": 0.8073423681364128, "grad_norm": 0.5524768829345703, "learning_rate": 1.2999573821765244e-05, "loss": 0.5847, "step": 38067 }, { "epoch": 0.8073635765943459, "grad_norm": 0.34415215253829956, "learning_rate": 1.2999255681364568e-05, "loss": 0.4878, "step": 38068 }, { "epoch": 0.8073847850522788, "grad_norm": 0.3458070456981659, "learning_rate": 1.2998937537628147e-05, "loss": 0.4788, "step": 38069 }, { "epoch": 0.8074059935102119, "grad_norm": 0.3323156535625458, "learning_rate": 1.2998619390556333e-05, "loss": 0.4276, "step": 38070 }, { "epoch": 0.8074272019681449, "grad_norm": 0.3515549600124359, "learning_rate": 1.299830124014948e-05, "loss": 0.4735, "step": 38071 }, { "epoch": 0.807448410426078, "grad_norm": 0.4206693768501282, "learning_rate": 1.2997983086407944e-05, "loss": 0.5792, "step": 38072 }, { "epoch": 0.8074696188840109, "grad_norm": 0.3604518175125122, "learning_rate": 1.2997664929332077e-05, "loss": 0.4405, "step": 38073 }, { "epoch": 0.807490827341944, "grad_norm": 0.36820194125175476, "learning_rate": 1.2997346768922233e-05, "loss": 0.6011, "step": 38074 }, { "epoch": 0.807512035799877, "grad_norm": 0.3772488236427307, "learning_rate": 1.2997028605178765e-05, "loss": 0.5108, "step": 38075 }, { "epoch": 0.80753324425781, "grad_norm": 0.42793965339660645, "learning_rate": 1.2996710438102026e-05, "loss": 0.4352, "step": 38076 }, { "epoch": 0.807554452715743, "grad_norm": 0.35713520646095276, "learning_rate": 1.2996392267692376e-05, "loss": 0.5154, "step": 38077 }, { "epoch": 0.8075756611736761, "grad_norm": 0.3468163311481476, "learning_rate": 1.2996074093950162e-05, "loss": 0.5129, "step": 38078 }, { "epoch": 0.807596869631609, "grad_norm": 0.3553891181945801, "learning_rate": 1.299575591687574e-05, "loss": 0.4909, "step": 38079 }, { "epoch": 0.8076180780895421, "grad_norm": 0.3587448298931122, "learning_rate": 1.2995437736469464e-05, "loss": 0.4728, "step": 38080 }, { "epoch": 0.8076392865474752, "grad_norm": 0.3951170742511749, "learning_rate": 1.2995119552731693e-05, "loss": 0.5289, "step": 38081 }, { "epoch": 0.8076604950054082, "grad_norm": 0.3512963354587555, "learning_rate": 1.299480136566277e-05, "loss": 0.5106, "step": 38082 }, { "epoch": 0.8076817034633412, "grad_norm": 0.49614256620407104, "learning_rate": 1.2994483175263057e-05, "loss": 0.5804, "step": 38083 }, { "epoch": 0.8077029119212742, "grad_norm": 0.3438517451286316, "learning_rate": 1.2994164981532909e-05, "loss": 0.5176, "step": 38084 }, { "epoch": 0.8077241203792073, "grad_norm": 0.3307555615901947, "learning_rate": 1.2993846784472673e-05, "loss": 0.4722, "step": 38085 }, { "epoch": 0.8077453288371402, "grad_norm": 0.3549267053604126, "learning_rate": 1.2993528584082706e-05, "loss": 0.5008, "step": 38086 }, { "epoch": 0.8077665372950733, "grad_norm": 0.339217871427536, "learning_rate": 1.2993210380363364e-05, "loss": 0.4633, "step": 38087 }, { "epoch": 0.8077877457530063, "grad_norm": 0.3522883951663971, "learning_rate": 1.2992892173315001e-05, "loss": 0.4479, "step": 38088 }, { "epoch": 0.8078089542109393, "grad_norm": 0.36549246311187744, "learning_rate": 1.2992573962937967e-05, "loss": 0.4951, "step": 38089 }, { "epoch": 0.8078301626688723, "grad_norm": 0.3360956311225891, "learning_rate": 1.299225574923262e-05, "loss": 0.4493, "step": 38090 }, { "epoch": 0.8078513711268054, "grad_norm": 0.41489893198013306, "learning_rate": 1.299193753219931e-05, "loss": 0.5256, "step": 38091 }, { "epoch": 0.8078725795847383, "grad_norm": 0.37779802083969116, "learning_rate": 1.2991619311838393e-05, "loss": 0.4569, "step": 38092 }, { "epoch": 0.8078937880426714, "grad_norm": 0.3501052260398865, "learning_rate": 1.2991301088150226e-05, "loss": 0.51, "step": 38093 }, { "epoch": 0.8079149965006044, "grad_norm": 0.3578716516494751, "learning_rate": 1.2990982861135157e-05, "loss": 0.4503, "step": 38094 }, { "epoch": 0.8079362049585375, "grad_norm": 0.3967767357826233, "learning_rate": 1.2990664630793544e-05, "loss": 0.4702, "step": 38095 }, { "epoch": 0.8079574134164705, "grad_norm": 0.5122667551040649, "learning_rate": 1.2990346397125742e-05, "loss": 0.4831, "step": 38096 }, { "epoch": 0.8079786218744035, "grad_norm": 0.3692333400249481, "learning_rate": 1.2990028160132098e-05, "loss": 0.4719, "step": 38097 }, { "epoch": 0.8079998303323366, "grad_norm": 0.34263870120048523, "learning_rate": 1.2989709919812974e-05, "loss": 0.4639, "step": 38098 }, { "epoch": 0.8080210387902695, "grad_norm": 0.37036997079849243, "learning_rate": 1.298939167616872e-05, "loss": 0.5776, "step": 38099 }, { "epoch": 0.8080422472482026, "grad_norm": 0.3264576494693756, "learning_rate": 1.2989073429199687e-05, "loss": 0.5008, "step": 38100 }, { "epoch": 0.8080634557061356, "grad_norm": 0.3797154128551483, "learning_rate": 1.2988755178906239e-05, "loss": 0.5186, "step": 38101 }, { "epoch": 0.8080846641640687, "grad_norm": 0.3689556419849396, "learning_rate": 1.2988436925288721e-05, "loss": 0.3884, "step": 38102 }, { "epoch": 0.8081058726220016, "grad_norm": 0.8803364038467407, "learning_rate": 1.2988118668347487e-05, "loss": 0.5085, "step": 38103 }, { "epoch": 0.8081270810799347, "grad_norm": 0.3539908528327942, "learning_rate": 1.2987800408082896e-05, "loss": 0.4398, "step": 38104 }, { "epoch": 0.8081482895378677, "grad_norm": 0.37552323937416077, "learning_rate": 1.2987482144495298e-05, "loss": 0.4804, "step": 38105 }, { "epoch": 0.8081694979958007, "grad_norm": 0.3408568501472473, "learning_rate": 1.2987163877585047e-05, "loss": 0.5401, "step": 38106 }, { "epoch": 0.8081907064537337, "grad_norm": 0.5093522071838379, "learning_rate": 1.29868456073525e-05, "loss": 0.489, "step": 38107 }, { "epoch": 0.8082119149116668, "grad_norm": 0.3739274740219116, "learning_rate": 1.298652733379801e-05, "loss": 0.5181, "step": 38108 }, { "epoch": 0.8082331233695998, "grad_norm": 0.35456034541130066, "learning_rate": 1.298620905692193e-05, "loss": 0.4248, "step": 38109 }, { "epoch": 0.8082543318275328, "grad_norm": 0.3772942125797272, "learning_rate": 1.2985890776724615e-05, "loss": 0.5124, "step": 38110 }, { "epoch": 0.8082755402854659, "grad_norm": 0.354747474193573, "learning_rate": 1.2985572493206412e-05, "loss": 0.4765, "step": 38111 }, { "epoch": 0.8082967487433989, "grad_norm": 0.3552319407463074, "learning_rate": 1.2985254206367689e-05, "loss": 0.4522, "step": 38112 }, { "epoch": 0.8083179572013319, "grad_norm": 0.39244142174720764, "learning_rate": 1.2984935916208789e-05, "loss": 0.5113, "step": 38113 }, { "epoch": 0.8083391656592649, "grad_norm": 0.35285577178001404, "learning_rate": 1.2984617622730069e-05, "loss": 0.465, "step": 38114 }, { "epoch": 0.808360374117198, "grad_norm": 0.3616235852241516, "learning_rate": 1.2984299325931886e-05, "loss": 0.5047, "step": 38115 }, { "epoch": 0.8083815825751309, "grad_norm": 0.46461692452430725, "learning_rate": 1.2983981025814589e-05, "loss": 0.492, "step": 38116 }, { "epoch": 0.808402791033064, "grad_norm": 0.3559115529060364, "learning_rate": 1.2983662722378534e-05, "loss": 0.5246, "step": 38117 }, { "epoch": 0.808423999490997, "grad_norm": 0.3518151342868805, "learning_rate": 1.2983344415624073e-05, "loss": 0.4759, "step": 38118 }, { "epoch": 0.80844520794893, "grad_norm": 0.3620350956916809, "learning_rate": 1.2983026105551566e-05, "loss": 0.5234, "step": 38119 }, { "epoch": 0.808466416406863, "grad_norm": 0.41884809732437134, "learning_rate": 1.2982707792161361e-05, "loss": 0.4918, "step": 38120 }, { "epoch": 0.8084876248647961, "grad_norm": 0.36129891872406006, "learning_rate": 1.2982389475453818e-05, "loss": 0.5562, "step": 38121 }, { "epoch": 0.8085088333227292, "grad_norm": 1.0240180492401123, "learning_rate": 1.2982071155429283e-05, "loss": 0.3842, "step": 38122 }, { "epoch": 0.8085300417806621, "grad_norm": 0.3681126832962036, "learning_rate": 1.2981752832088117e-05, "loss": 0.4904, "step": 38123 }, { "epoch": 0.8085512502385952, "grad_norm": 0.5235715508460999, "learning_rate": 1.2981434505430667e-05, "loss": 0.5112, "step": 38124 }, { "epoch": 0.8085724586965282, "grad_norm": 0.36651256680488586, "learning_rate": 1.2981116175457295e-05, "loss": 0.5237, "step": 38125 }, { "epoch": 0.8085936671544612, "grad_norm": 0.3480922281742096, "learning_rate": 1.2980797842168354e-05, "loss": 0.5132, "step": 38126 }, { "epoch": 0.8086148756123942, "grad_norm": 0.3394635319709778, "learning_rate": 1.2980479505564193e-05, "loss": 0.4941, "step": 38127 }, { "epoch": 0.8086360840703273, "grad_norm": 0.3740624487400055, "learning_rate": 1.2980161165645167e-05, "loss": 0.4535, "step": 38128 }, { "epoch": 0.8086572925282602, "grad_norm": 0.40388962626457214, "learning_rate": 1.2979842822411635e-05, "loss": 0.4942, "step": 38129 }, { "epoch": 0.8086785009861933, "grad_norm": 0.4331386089324951, "learning_rate": 1.2979524475863947e-05, "loss": 0.5298, "step": 38130 }, { "epoch": 0.8086997094441263, "grad_norm": 0.4377915561199188, "learning_rate": 1.2979206126002452e-05, "loss": 0.5543, "step": 38131 }, { "epoch": 0.8087209179020594, "grad_norm": 0.3554058372974396, "learning_rate": 1.2978887772827516e-05, "loss": 0.4514, "step": 38132 }, { "epoch": 0.8087421263599923, "grad_norm": 0.3367899954319, "learning_rate": 1.2978569416339485e-05, "loss": 0.4333, "step": 38133 }, { "epoch": 0.8087633348179254, "grad_norm": 0.47870761156082153, "learning_rate": 1.2978251056538716e-05, "loss": 0.44, "step": 38134 }, { "epoch": 0.8087845432758584, "grad_norm": 0.37131014466285706, "learning_rate": 1.2977932693425561e-05, "loss": 0.5068, "step": 38135 }, { "epoch": 0.8088057517337914, "grad_norm": 0.45393916964530945, "learning_rate": 1.2977614327000374e-05, "loss": 0.5184, "step": 38136 }, { "epoch": 0.8088269601917245, "grad_norm": 0.4357212781906128, "learning_rate": 1.2977295957263511e-05, "loss": 0.4915, "step": 38137 }, { "epoch": 0.8088481686496575, "grad_norm": 0.3491400182247162, "learning_rate": 1.2976977584215327e-05, "loss": 0.4804, "step": 38138 }, { "epoch": 0.8088693771075905, "grad_norm": 0.39426189661026, "learning_rate": 1.2976659207856173e-05, "loss": 0.5383, "step": 38139 }, { "epoch": 0.8088905855655235, "grad_norm": 0.37145036458969116, "learning_rate": 1.2976340828186404e-05, "loss": 0.4861, "step": 38140 }, { "epoch": 0.8089117940234566, "grad_norm": 0.31957006454467773, "learning_rate": 1.2976022445206378e-05, "loss": 0.5072, "step": 38141 }, { "epoch": 0.8089330024813896, "grad_norm": 0.4021444022655487, "learning_rate": 1.297570405891644e-05, "loss": 0.469, "step": 38142 }, { "epoch": 0.8089542109393226, "grad_norm": 0.3589784801006317, "learning_rate": 1.2975385669316953e-05, "loss": 0.4464, "step": 38143 }, { "epoch": 0.8089754193972556, "grad_norm": 0.36239856481552124, "learning_rate": 1.2975067276408269e-05, "loss": 0.4764, "step": 38144 }, { "epoch": 0.8089966278551887, "grad_norm": 0.38511669635772705, "learning_rate": 1.2974748880190739e-05, "loss": 0.4815, "step": 38145 }, { "epoch": 0.8090178363131216, "grad_norm": 0.3555699586868286, "learning_rate": 1.2974430480664722e-05, "loss": 0.526, "step": 38146 }, { "epoch": 0.8090390447710547, "grad_norm": 0.37417665123939514, "learning_rate": 1.2974112077830567e-05, "loss": 0.518, "step": 38147 }, { "epoch": 0.8090602532289877, "grad_norm": 0.36342141032218933, "learning_rate": 1.2973793671688628e-05, "loss": 0.5204, "step": 38148 }, { "epoch": 0.8090814616869207, "grad_norm": 0.33811065554618835, "learning_rate": 1.2973475262239266e-05, "loss": 0.5408, "step": 38149 }, { "epoch": 0.8091026701448538, "grad_norm": 0.37839752435684204, "learning_rate": 1.2973156849482828e-05, "loss": 0.5399, "step": 38150 }, { "epoch": 0.8091238786027868, "grad_norm": 0.3414270579814911, "learning_rate": 1.297283843341967e-05, "loss": 0.4989, "step": 38151 }, { "epoch": 0.8091450870607199, "grad_norm": 0.4441486895084381, "learning_rate": 1.297252001405015e-05, "loss": 0.4543, "step": 38152 }, { "epoch": 0.8091662955186528, "grad_norm": 0.41832616925239563, "learning_rate": 1.2972201591374616e-05, "loss": 0.4543, "step": 38153 }, { "epoch": 0.8091875039765859, "grad_norm": 0.4254963994026184, "learning_rate": 1.2971883165393429e-05, "loss": 0.5049, "step": 38154 }, { "epoch": 0.8092087124345189, "grad_norm": 0.3795906901359558, "learning_rate": 1.2971564736106937e-05, "loss": 0.5205, "step": 38155 }, { "epoch": 0.8092299208924519, "grad_norm": 0.3905981481075287, "learning_rate": 1.2971246303515494e-05, "loss": 0.4373, "step": 38156 }, { "epoch": 0.8092511293503849, "grad_norm": 0.34250566363334656, "learning_rate": 1.297092786761946e-05, "loss": 0.4697, "step": 38157 }, { "epoch": 0.809272337808318, "grad_norm": 0.4199146628379822, "learning_rate": 1.2970609428419185e-05, "loss": 0.5056, "step": 38158 }, { "epoch": 0.8092935462662509, "grad_norm": 0.3542194068431854, "learning_rate": 1.2970290985915023e-05, "loss": 0.4717, "step": 38159 }, { "epoch": 0.809314754724184, "grad_norm": 0.3703230321407318, "learning_rate": 1.2969972540107331e-05, "loss": 0.431, "step": 38160 }, { "epoch": 0.809335963182117, "grad_norm": 0.4393540322780609, "learning_rate": 1.2969654090996465e-05, "loss": 0.4902, "step": 38161 }, { "epoch": 0.80935717164005, "grad_norm": 0.3571067452430725, "learning_rate": 1.2969335638582768e-05, "loss": 0.5319, "step": 38162 }, { "epoch": 0.8093783800979831, "grad_norm": 0.3802717924118042, "learning_rate": 1.2969017182866604e-05, "loss": 0.4525, "step": 38163 }, { "epoch": 0.8093995885559161, "grad_norm": 0.35330715775489807, "learning_rate": 1.2968698723848325e-05, "loss": 0.5172, "step": 38164 }, { "epoch": 0.8094207970138492, "grad_norm": 0.3886362910270691, "learning_rate": 1.2968380261528286e-05, "loss": 0.4808, "step": 38165 }, { "epoch": 0.8094420054717821, "grad_norm": 0.3528246581554413, "learning_rate": 1.2968061795906842e-05, "loss": 0.5037, "step": 38166 }, { "epoch": 0.8094632139297152, "grad_norm": 0.3116524815559387, "learning_rate": 1.296774332698434e-05, "loss": 0.4041, "step": 38167 }, { "epoch": 0.8094844223876482, "grad_norm": 0.3490556478500366, "learning_rate": 1.2967424854761147e-05, "loss": 0.4246, "step": 38168 }, { "epoch": 0.8095056308455812, "grad_norm": 0.36850765347480774, "learning_rate": 1.2967106379237604e-05, "loss": 0.4604, "step": 38169 }, { "epoch": 0.8095268393035142, "grad_norm": 0.322191059589386, "learning_rate": 1.2966787900414074e-05, "loss": 0.4719, "step": 38170 }, { "epoch": 0.8095480477614473, "grad_norm": 0.35839372873306274, "learning_rate": 1.2966469418290906e-05, "loss": 0.5444, "step": 38171 }, { "epoch": 0.8095692562193803, "grad_norm": 0.3302164077758789, "learning_rate": 1.296615093286846e-05, "loss": 0.415, "step": 38172 }, { "epoch": 0.8095904646773133, "grad_norm": 0.3533945083618164, "learning_rate": 1.2965832444147082e-05, "loss": 0.4673, "step": 38173 }, { "epoch": 0.8096116731352463, "grad_norm": 0.3711114227771759, "learning_rate": 1.2965513952127134e-05, "loss": 0.4592, "step": 38174 }, { "epoch": 0.8096328815931794, "grad_norm": 0.34290188550949097, "learning_rate": 1.2965195456808965e-05, "loss": 0.4799, "step": 38175 }, { "epoch": 0.8096540900511123, "grad_norm": 0.3575894832611084, "learning_rate": 1.2964876958192933e-05, "loss": 0.4675, "step": 38176 }, { "epoch": 0.8096752985090454, "grad_norm": 0.476947546005249, "learning_rate": 1.2964558456279393e-05, "loss": 0.4891, "step": 38177 }, { "epoch": 0.8096965069669785, "grad_norm": 0.3421124219894409, "learning_rate": 1.2964239951068694e-05, "loss": 0.4755, "step": 38178 }, { "epoch": 0.8097177154249114, "grad_norm": 0.40342873334884644, "learning_rate": 1.2963921442561192e-05, "loss": 0.4814, "step": 38179 }, { "epoch": 0.8097389238828445, "grad_norm": 0.33574795722961426, "learning_rate": 1.2963602930757244e-05, "loss": 0.4708, "step": 38180 }, { "epoch": 0.8097601323407775, "grad_norm": 0.4075985550880432, "learning_rate": 1.2963284415657202e-05, "loss": 0.4876, "step": 38181 }, { "epoch": 0.8097813407987106, "grad_norm": 0.4004073143005371, "learning_rate": 1.2962965897261422e-05, "loss": 0.4967, "step": 38182 }, { "epoch": 0.8098025492566435, "grad_norm": 0.3680410087108612, "learning_rate": 1.2962647375570257e-05, "loss": 0.5821, "step": 38183 }, { "epoch": 0.8098237577145766, "grad_norm": 0.3366951048374176, "learning_rate": 1.2962328850584061e-05, "loss": 0.4369, "step": 38184 }, { "epoch": 0.8098449661725096, "grad_norm": 0.3338223099708557, "learning_rate": 1.2962010322303185e-05, "loss": 0.4962, "step": 38185 }, { "epoch": 0.8098661746304426, "grad_norm": 0.3908587396144867, "learning_rate": 1.2961691790727993e-05, "loss": 0.5392, "step": 38186 }, { "epoch": 0.8098873830883756, "grad_norm": 0.4560275375843048, "learning_rate": 1.2961373255858826e-05, "loss": 0.5056, "step": 38187 }, { "epoch": 0.8099085915463087, "grad_norm": 0.3745807111263275, "learning_rate": 1.296105471769605e-05, "loss": 0.4583, "step": 38188 }, { "epoch": 0.8099298000042416, "grad_norm": 0.4140998125076294, "learning_rate": 1.2960736176240017e-05, "loss": 0.4851, "step": 38189 }, { "epoch": 0.8099510084621747, "grad_norm": 0.4177611470222473, "learning_rate": 1.2960417631491074e-05, "loss": 0.546, "step": 38190 }, { "epoch": 0.8099722169201078, "grad_norm": 1.2113761901855469, "learning_rate": 1.2960099083449581e-05, "loss": 0.5088, "step": 38191 }, { "epoch": 0.8099934253780408, "grad_norm": 0.4629533290863037, "learning_rate": 1.2959780532115895e-05, "loss": 0.5287, "step": 38192 }, { "epoch": 0.8100146338359738, "grad_norm": 0.3378204107284546, "learning_rate": 1.2959461977490363e-05, "loss": 0.4594, "step": 38193 }, { "epoch": 0.8100358422939068, "grad_norm": 0.3229367136955261, "learning_rate": 1.2959143419573347e-05, "loss": 0.3828, "step": 38194 }, { "epoch": 0.8100570507518399, "grad_norm": 0.4025723934173584, "learning_rate": 1.2958824858365194e-05, "loss": 0.4504, "step": 38195 }, { "epoch": 0.8100782592097728, "grad_norm": 0.3825252652168274, "learning_rate": 1.2958506293866263e-05, "loss": 0.5049, "step": 38196 }, { "epoch": 0.8100994676677059, "grad_norm": 0.3263610899448395, "learning_rate": 1.2958187726076907e-05, "loss": 0.3816, "step": 38197 }, { "epoch": 0.8101206761256389, "grad_norm": 0.34534838795661926, "learning_rate": 1.2957869154997482e-05, "loss": 0.4672, "step": 38198 }, { "epoch": 0.810141884583572, "grad_norm": 0.341674268245697, "learning_rate": 1.2957550580628335e-05, "loss": 0.4884, "step": 38199 }, { "epoch": 0.8101630930415049, "grad_norm": 0.3619106113910675, "learning_rate": 1.2957232002969832e-05, "loss": 0.4427, "step": 38200 }, { "epoch": 0.810184301499438, "grad_norm": 0.3560369312763214, "learning_rate": 1.2956913422022316e-05, "loss": 0.4173, "step": 38201 }, { "epoch": 0.810205509957371, "grad_norm": 0.39925700426101685, "learning_rate": 1.2956594837786152e-05, "loss": 0.5592, "step": 38202 }, { "epoch": 0.810226718415304, "grad_norm": 0.31745120882987976, "learning_rate": 1.2956276250261686e-05, "loss": 0.4655, "step": 38203 }, { "epoch": 0.8102479268732371, "grad_norm": 0.4328635334968567, "learning_rate": 1.2955957659449275e-05, "loss": 0.5887, "step": 38204 }, { "epoch": 0.8102691353311701, "grad_norm": 0.35099464654922485, "learning_rate": 1.2955639065349275e-05, "loss": 0.4565, "step": 38205 }, { "epoch": 0.8102903437891031, "grad_norm": 0.394281804561615, "learning_rate": 1.295532046796204e-05, "loss": 0.5322, "step": 38206 }, { "epoch": 0.8103115522470361, "grad_norm": 0.3656165599822998, "learning_rate": 1.2955001867287919e-05, "loss": 0.4303, "step": 38207 }, { "epoch": 0.8103327607049692, "grad_norm": 0.4141707122325897, "learning_rate": 1.2954683263327273e-05, "loss": 0.4543, "step": 38208 }, { "epoch": 0.8103539691629021, "grad_norm": 0.338809072971344, "learning_rate": 1.2954364656080454e-05, "loss": 0.4906, "step": 38209 }, { "epoch": 0.8103751776208352, "grad_norm": 0.38806965947151184, "learning_rate": 1.2954046045547815e-05, "loss": 0.4354, "step": 38210 }, { "epoch": 0.8103963860787682, "grad_norm": 0.38054317235946655, "learning_rate": 1.2953727431729715e-05, "loss": 0.5132, "step": 38211 }, { "epoch": 0.8104175945367013, "grad_norm": 0.37799927592277527, "learning_rate": 1.2953408814626502e-05, "loss": 0.4471, "step": 38212 }, { "epoch": 0.8104388029946342, "grad_norm": 0.37600335478782654, "learning_rate": 1.2953090194238533e-05, "loss": 0.5082, "step": 38213 }, { "epoch": 0.8104600114525673, "grad_norm": 0.41660913825035095, "learning_rate": 1.2952771570566165e-05, "loss": 0.5494, "step": 38214 }, { "epoch": 0.8104812199105003, "grad_norm": 0.3983609080314636, "learning_rate": 1.295245294360975e-05, "loss": 0.491, "step": 38215 }, { "epoch": 0.8105024283684333, "grad_norm": 0.32606106996536255, "learning_rate": 1.295213431336964e-05, "loss": 0.5429, "step": 38216 }, { "epoch": 0.8105236368263663, "grad_norm": 0.34103718400001526, "learning_rate": 1.2951815679846194e-05, "loss": 0.4758, "step": 38217 }, { "epoch": 0.8105448452842994, "grad_norm": 0.371660977602005, "learning_rate": 1.2951497043039763e-05, "loss": 0.5441, "step": 38218 }, { "epoch": 0.8105660537422325, "grad_norm": 0.4260866045951843, "learning_rate": 1.2951178402950704e-05, "loss": 0.565, "step": 38219 }, { "epoch": 0.8105872622001654, "grad_norm": 0.32947325706481934, "learning_rate": 1.2950859759579368e-05, "loss": 0.4602, "step": 38220 }, { "epoch": 0.8106084706580985, "grad_norm": 0.4103918671607971, "learning_rate": 1.2950541112926112e-05, "loss": 0.4001, "step": 38221 }, { "epoch": 0.8106296791160315, "grad_norm": 0.41869890689849854, "learning_rate": 1.295022246299129e-05, "loss": 0.4682, "step": 38222 }, { "epoch": 0.8106508875739645, "grad_norm": 0.3733627498149872, "learning_rate": 1.2949903809775259e-05, "loss": 0.4962, "step": 38223 }, { "epoch": 0.8106720960318975, "grad_norm": 0.3882821500301361, "learning_rate": 1.2949585153278368e-05, "loss": 0.5237, "step": 38224 }, { "epoch": 0.8106933044898306, "grad_norm": 0.3560410439968109, "learning_rate": 1.2949266493500975e-05, "loss": 0.4818, "step": 38225 }, { "epoch": 0.8107145129477635, "grad_norm": 0.5113629698753357, "learning_rate": 1.2948947830443435e-05, "loss": 0.4936, "step": 38226 }, { "epoch": 0.8107357214056966, "grad_norm": 0.35806018114089966, "learning_rate": 1.2948629164106095e-05, "loss": 0.4643, "step": 38227 }, { "epoch": 0.8107569298636296, "grad_norm": 0.4095699191093445, "learning_rate": 1.294831049448932e-05, "loss": 0.4904, "step": 38228 }, { "epoch": 0.8107781383215626, "grad_norm": 0.33947667479515076, "learning_rate": 1.294799182159346e-05, "loss": 0.4839, "step": 38229 }, { "epoch": 0.8107993467794956, "grad_norm": 0.38088586926460266, "learning_rate": 1.2947673145418866e-05, "loss": 0.4744, "step": 38230 }, { "epoch": 0.8108205552374287, "grad_norm": 0.3738921880722046, "learning_rate": 1.2947354465965897e-05, "loss": 0.5272, "step": 38231 }, { "epoch": 0.8108417636953618, "grad_norm": 0.40355923771858215, "learning_rate": 1.2947035783234907e-05, "loss": 0.5339, "step": 38232 }, { "epoch": 0.8108629721532947, "grad_norm": 0.37474748492240906, "learning_rate": 1.2946717097226247e-05, "loss": 0.4951, "step": 38233 }, { "epoch": 0.8108841806112278, "grad_norm": 0.3536263108253479, "learning_rate": 1.2946398407940274e-05, "loss": 0.3719, "step": 38234 }, { "epoch": 0.8109053890691608, "grad_norm": 0.36756983399391174, "learning_rate": 1.2946079715377345e-05, "loss": 0.5319, "step": 38235 }, { "epoch": 0.8109265975270938, "grad_norm": 0.3529464602470398, "learning_rate": 1.2945761019537812e-05, "loss": 0.4579, "step": 38236 }, { "epoch": 0.8109478059850268, "grad_norm": 0.3192797005176544, "learning_rate": 1.2945442320422028e-05, "loss": 0.4304, "step": 38237 }, { "epoch": 0.8109690144429599, "grad_norm": 0.412621408700943, "learning_rate": 1.2945123618030348e-05, "loss": 0.4542, "step": 38238 }, { "epoch": 0.8109902229008928, "grad_norm": 0.3438951373100281, "learning_rate": 1.2944804912363127e-05, "loss": 0.4169, "step": 38239 }, { "epoch": 0.8110114313588259, "grad_norm": 0.37240102887153625, "learning_rate": 1.2944486203420722e-05, "loss": 0.5134, "step": 38240 }, { "epoch": 0.8110326398167589, "grad_norm": 0.35950595140457153, "learning_rate": 1.2944167491203478e-05, "loss": 0.5147, "step": 38241 }, { "epoch": 0.811053848274692, "grad_norm": 0.3885093629360199, "learning_rate": 1.2943848775711763e-05, "loss": 0.4657, "step": 38242 }, { "epoch": 0.8110750567326249, "grad_norm": 0.38014936447143555, "learning_rate": 1.2943530056945926e-05, "loss": 0.4982, "step": 38243 }, { "epoch": 0.811096265190558, "grad_norm": 0.35110464692115784, "learning_rate": 1.2943211334906316e-05, "loss": 0.4816, "step": 38244 }, { "epoch": 0.8111174736484911, "grad_norm": 0.36892223358154297, "learning_rate": 1.2942892609593294e-05, "loss": 0.4638, "step": 38245 }, { "epoch": 0.811138682106424, "grad_norm": 0.38082587718963623, "learning_rate": 1.2942573881007212e-05, "loss": 0.4925, "step": 38246 }, { "epoch": 0.8111598905643571, "grad_norm": 0.3781503736972809, "learning_rate": 1.2942255149148424e-05, "loss": 0.5028, "step": 38247 }, { "epoch": 0.8111810990222901, "grad_norm": 0.3968050181865692, "learning_rate": 1.2941936414017286e-05, "loss": 0.5619, "step": 38248 }, { "epoch": 0.8112023074802232, "grad_norm": 0.35704755783081055, "learning_rate": 1.2941617675614153e-05, "loss": 0.4315, "step": 38249 }, { "epoch": 0.8112235159381561, "grad_norm": 0.39467692375183105, "learning_rate": 1.2941298933939379e-05, "loss": 0.5318, "step": 38250 }, { "epoch": 0.8112447243960892, "grad_norm": 0.36966705322265625, "learning_rate": 1.2940980188993317e-05, "loss": 0.455, "step": 38251 }, { "epoch": 0.8112659328540222, "grad_norm": 0.37056103348731995, "learning_rate": 1.2940661440776318e-05, "loss": 0.4394, "step": 38252 }, { "epoch": 0.8112871413119552, "grad_norm": 0.33790016174316406, "learning_rate": 1.2940342689288748e-05, "loss": 0.4524, "step": 38253 }, { "epoch": 0.8113083497698882, "grad_norm": 0.3419303297996521, "learning_rate": 1.2940023934530952e-05, "loss": 0.5807, "step": 38254 }, { "epoch": 0.8113295582278213, "grad_norm": 0.3473791778087616, "learning_rate": 1.2939705176503283e-05, "loss": 0.5276, "step": 38255 }, { "epoch": 0.8113507666857542, "grad_norm": 0.37685760855674744, "learning_rate": 1.2939386415206105e-05, "loss": 0.4655, "step": 38256 }, { "epoch": 0.8113719751436873, "grad_norm": 0.5993094444274902, "learning_rate": 1.2939067650639765e-05, "loss": 0.4535, "step": 38257 }, { "epoch": 0.8113931836016203, "grad_norm": 0.39668092131614685, "learning_rate": 1.2938748882804619e-05, "loss": 0.4918, "step": 38258 }, { "epoch": 0.8114143920595533, "grad_norm": 0.42653459310531616, "learning_rate": 1.2938430111701022e-05, "loss": 0.5627, "step": 38259 }, { "epoch": 0.8114356005174864, "grad_norm": 0.34535035490989685, "learning_rate": 1.2938111337329329e-05, "loss": 0.5167, "step": 38260 }, { "epoch": 0.8114568089754194, "grad_norm": 0.4268615245819092, "learning_rate": 1.2937792559689893e-05, "loss": 0.5338, "step": 38261 }, { "epoch": 0.8114780174333525, "grad_norm": 0.3450263440608978, "learning_rate": 1.2937473778783072e-05, "loss": 0.4802, "step": 38262 }, { "epoch": 0.8114992258912854, "grad_norm": 0.3591926693916321, "learning_rate": 1.2937154994609214e-05, "loss": 0.492, "step": 38263 }, { "epoch": 0.8115204343492185, "grad_norm": 0.359747976064682, "learning_rate": 1.2936836207168682e-05, "loss": 0.4359, "step": 38264 }, { "epoch": 0.8115416428071515, "grad_norm": 0.3511495590209961, "learning_rate": 1.2936517416461823e-05, "loss": 0.4478, "step": 38265 }, { "epoch": 0.8115628512650845, "grad_norm": 0.3702332675457001, "learning_rate": 1.2936198622488997e-05, "loss": 0.4058, "step": 38266 }, { "epoch": 0.8115840597230175, "grad_norm": 0.3530309200286865, "learning_rate": 1.2935879825250555e-05, "loss": 0.4063, "step": 38267 }, { "epoch": 0.8116052681809506, "grad_norm": 0.36776652932167053, "learning_rate": 1.2935561024746854e-05, "loss": 0.5105, "step": 38268 }, { "epoch": 0.8116264766388835, "grad_norm": 0.36237582564353943, "learning_rate": 1.2935242220978247e-05, "loss": 0.4445, "step": 38269 }, { "epoch": 0.8116476850968166, "grad_norm": 0.34462234377861023, "learning_rate": 1.2934923413945091e-05, "loss": 0.5166, "step": 38270 }, { "epoch": 0.8116688935547496, "grad_norm": 0.3822282552719116, "learning_rate": 1.2934604603647738e-05, "loss": 0.5167, "step": 38271 }, { "epoch": 0.8116901020126827, "grad_norm": 0.34780755639076233, "learning_rate": 1.2934285790086537e-05, "loss": 0.4811, "step": 38272 }, { "epoch": 0.8117113104706157, "grad_norm": 0.33507248759269714, "learning_rate": 1.2933966973261858e-05, "loss": 0.4991, "step": 38273 }, { "epoch": 0.8117325189285487, "grad_norm": 0.37823179364204407, "learning_rate": 1.2933648153174044e-05, "loss": 0.4755, "step": 38274 }, { "epoch": 0.8117537273864818, "grad_norm": 0.3811434507369995, "learning_rate": 1.2933329329823447e-05, "loss": 0.4599, "step": 38275 }, { "epoch": 0.8117749358444147, "grad_norm": 0.3358791470527649, "learning_rate": 1.2933010503210433e-05, "loss": 0.476, "step": 38276 }, { "epoch": 0.8117961443023478, "grad_norm": 0.35918834805488586, "learning_rate": 1.2932691673335348e-05, "loss": 0.4185, "step": 38277 }, { "epoch": 0.8118173527602808, "grad_norm": 0.37293919920921326, "learning_rate": 1.2932372840198543e-05, "loss": 0.5331, "step": 38278 }, { "epoch": 0.8118385612182138, "grad_norm": 0.33900153636932373, "learning_rate": 1.2932054003800388e-05, "loss": 0.4489, "step": 38279 }, { "epoch": 0.8118597696761468, "grad_norm": 0.35358646512031555, "learning_rate": 1.2931735164141222e-05, "loss": 0.473, "step": 38280 }, { "epoch": 0.8118809781340799, "grad_norm": 0.34168729186058044, "learning_rate": 1.2931416321221409e-05, "loss": 0.4868, "step": 38281 }, { "epoch": 0.8119021865920129, "grad_norm": 0.37265974283218384, "learning_rate": 1.2931097475041297e-05, "loss": 0.5259, "step": 38282 }, { "epoch": 0.8119233950499459, "grad_norm": 0.36405864357948303, "learning_rate": 1.2930778625601245e-05, "loss": 0.4489, "step": 38283 }, { "epoch": 0.8119446035078789, "grad_norm": 0.2998751401901245, "learning_rate": 1.2930459772901607e-05, "loss": 0.4488, "step": 38284 }, { "epoch": 0.811965811965812, "grad_norm": 0.30867472290992737, "learning_rate": 1.2930140916942737e-05, "loss": 0.3954, "step": 38285 }, { "epoch": 0.811987020423745, "grad_norm": 0.4058002531528473, "learning_rate": 1.2929822057724988e-05, "loss": 0.5035, "step": 38286 }, { "epoch": 0.812008228881678, "grad_norm": 0.37815433740615845, "learning_rate": 1.2929503195248718e-05, "loss": 0.4682, "step": 38287 }, { "epoch": 0.8120294373396111, "grad_norm": 0.43467533588409424, "learning_rate": 1.2929184329514281e-05, "loss": 0.4463, "step": 38288 }, { "epoch": 0.812050645797544, "grad_norm": 0.4398104250431061, "learning_rate": 1.2928865460522029e-05, "loss": 0.5352, "step": 38289 }, { "epoch": 0.8120718542554771, "grad_norm": 0.38002800941467285, "learning_rate": 1.2928546588272321e-05, "loss": 0.492, "step": 38290 }, { "epoch": 0.8120930627134101, "grad_norm": 0.3891356587409973, "learning_rate": 1.2928227712765504e-05, "loss": 0.4363, "step": 38291 }, { "epoch": 0.8121142711713432, "grad_norm": 0.35799112915992737, "learning_rate": 1.292790883400194e-05, "loss": 0.5441, "step": 38292 }, { "epoch": 0.8121354796292761, "grad_norm": 0.3467407822608948, "learning_rate": 1.2927589951981982e-05, "loss": 0.4345, "step": 38293 }, { "epoch": 0.8121566880872092, "grad_norm": 0.3495064675807953, "learning_rate": 1.2927271066705984e-05, "loss": 0.4613, "step": 38294 }, { "epoch": 0.8121778965451422, "grad_norm": 0.3608286380767822, "learning_rate": 1.29269521781743e-05, "loss": 0.4719, "step": 38295 }, { "epoch": 0.8121991050030752, "grad_norm": 0.3285532295703888, "learning_rate": 1.2926633286387287e-05, "loss": 0.5297, "step": 38296 }, { "epoch": 0.8122203134610082, "grad_norm": 0.3349325656890869, "learning_rate": 1.2926314391345297e-05, "loss": 0.4415, "step": 38297 }, { "epoch": 0.8122415219189413, "grad_norm": 0.38959094882011414, "learning_rate": 1.2925995493048682e-05, "loss": 0.5325, "step": 38298 }, { "epoch": 0.8122627303768742, "grad_norm": 0.40186744928359985, "learning_rate": 1.2925676591497805e-05, "loss": 0.4874, "step": 38299 }, { "epoch": 0.8122839388348073, "grad_norm": 0.34801891446113586, "learning_rate": 1.2925357686693013e-05, "loss": 0.4583, "step": 38300 }, { "epoch": 0.8123051472927404, "grad_norm": 0.3542621433734894, "learning_rate": 1.2925038778634664e-05, "loss": 0.4952, "step": 38301 }, { "epoch": 0.8123263557506734, "grad_norm": 0.40948471426963806, "learning_rate": 1.2924719867323114e-05, "loss": 0.4818, "step": 38302 }, { "epoch": 0.8123475642086064, "grad_norm": 0.7981572151184082, "learning_rate": 1.2924400952758713e-05, "loss": 0.474, "step": 38303 }, { "epoch": 0.8123687726665394, "grad_norm": 0.36029040813446045, "learning_rate": 1.2924082034941821e-05, "loss": 0.4556, "step": 38304 }, { "epoch": 0.8123899811244725, "grad_norm": 0.3950894773006439, "learning_rate": 1.2923763113872789e-05, "loss": 0.5339, "step": 38305 }, { "epoch": 0.8124111895824054, "grad_norm": 0.38526126742362976, "learning_rate": 1.2923444189551973e-05, "loss": 0.5517, "step": 38306 }, { "epoch": 0.8124323980403385, "grad_norm": 0.35403335094451904, "learning_rate": 1.292312526197973e-05, "loss": 0.4776, "step": 38307 }, { "epoch": 0.8124536064982715, "grad_norm": 0.3867606818675995, "learning_rate": 1.292280633115641e-05, "loss": 0.5609, "step": 38308 }, { "epoch": 0.8124748149562045, "grad_norm": 0.35786235332489014, "learning_rate": 1.2922487397082373e-05, "loss": 0.5023, "step": 38309 }, { "epoch": 0.8124960234141375, "grad_norm": 0.4113410413265228, "learning_rate": 1.2922168459757966e-05, "loss": 0.482, "step": 38310 }, { "epoch": 0.8125172318720706, "grad_norm": 0.37598225474357605, "learning_rate": 1.2921849519183554e-05, "loss": 0.4962, "step": 38311 }, { "epoch": 0.8125384403300036, "grad_norm": 0.3670933246612549, "learning_rate": 1.2921530575359483e-05, "loss": 0.4825, "step": 38312 }, { "epoch": 0.8125596487879366, "grad_norm": 0.3553207516670227, "learning_rate": 1.2921211628286112e-05, "loss": 0.4056, "step": 38313 }, { "epoch": 0.8125808572458697, "grad_norm": 0.3636661171913147, "learning_rate": 1.2920892677963794e-05, "loss": 0.5099, "step": 38314 }, { "epoch": 0.8126020657038027, "grad_norm": 0.3221547603607178, "learning_rate": 1.2920573724392886e-05, "loss": 0.4882, "step": 38315 }, { "epoch": 0.8126232741617357, "grad_norm": 0.32168900966644287, "learning_rate": 1.2920254767573743e-05, "loss": 0.4149, "step": 38316 }, { "epoch": 0.8126444826196687, "grad_norm": 0.36558249592781067, "learning_rate": 1.2919935807506714e-05, "loss": 0.4644, "step": 38317 }, { "epoch": 0.8126656910776018, "grad_norm": 0.3210872411727905, "learning_rate": 1.291961684419216e-05, "loss": 0.4391, "step": 38318 }, { "epoch": 0.8126868995355347, "grad_norm": 0.3824930191040039, "learning_rate": 1.2919297877630434e-05, "loss": 0.4614, "step": 38319 }, { "epoch": 0.8127081079934678, "grad_norm": 0.31978458166122437, "learning_rate": 1.2918978907821886e-05, "loss": 0.466, "step": 38320 }, { "epoch": 0.8127293164514008, "grad_norm": 0.4518474340438843, "learning_rate": 1.2918659934766882e-05, "loss": 0.5575, "step": 38321 }, { "epoch": 0.8127505249093339, "grad_norm": 0.37758180499076843, "learning_rate": 1.2918340958465765e-05, "loss": 0.4996, "step": 38322 }, { "epoch": 0.8127717333672668, "grad_norm": 0.4895283281803131, "learning_rate": 1.2918021978918892e-05, "loss": 0.4793, "step": 38323 }, { "epoch": 0.8127929418251999, "grad_norm": 0.3936831057071686, "learning_rate": 1.2917702996126627e-05, "loss": 0.5583, "step": 38324 }, { "epoch": 0.8128141502831329, "grad_norm": 0.33287426829338074, "learning_rate": 1.2917384010089315e-05, "loss": 0.5213, "step": 38325 }, { "epoch": 0.8128353587410659, "grad_norm": 0.4239482581615448, "learning_rate": 1.2917065020807313e-05, "loss": 0.4034, "step": 38326 }, { "epoch": 0.812856567198999, "grad_norm": 0.34717684984207153, "learning_rate": 1.291674602828098e-05, "loss": 0.5356, "step": 38327 }, { "epoch": 0.812877775656932, "grad_norm": 0.32053306698799133, "learning_rate": 1.2916427032510664e-05, "loss": 0.4305, "step": 38328 }, { "epoch": 0.812898984114865, "grad_norm": 0.3690173029899597, "learning_rate": 1.2916108033496721e-05, "loss": 0.4893, "step": 38329 }, { "epoch": 0.812920192572798, "grad_norm": 0.45794492959976196, "learning_rate": 1.2915789031239511e-05, "loss": 0.5112, "step": 38330 }, { "epoch": 0.8129414010307311, "grad_norm": 0.3870803713798523, "learning_rate": 1.2915470025739388e-05, "loss": 0.5209, "step": 38331 }, { "epoch": 0.8129626094886641, "grad_norm": 0.37216275930404663, "learning_rate": 1.2915151016996701e-05, "loss": 0.5424, "step": 38332 }, { "epoch": 0.8129838179465971, "grad_norm": 0.5405343770980835, "learning_rate": 1.2914832005011812e-05, "loss": 0.4157, "step": 38333 }, { "epoch": 0.8130050264045301, "grad_norm": 0.34657761454582214, "learning_rate": 1.291451298978507e-05, "loss": 0.4995, "step": 38334 }, { "epoch": 0.8130262348624632, "grad_norm": 0.3573814332485199, "learning_rate": 1.2914193971316831e-05, "loss": 0.5214, "step": 38335 }, { "epoch": 0.8130474433203961, "grad_norm": 0.3350193202495575, "learning_rate": 1.2913874949607452e-05, "loss": 0.4633, "step": 38336 }, { "epoch": 0.8130686517783292, "grad_norm": 0.3384174108505249, "learning_rate": 1.2913555924657284e-05, "loss": 0.4474, "step": 38337 }, { "epoch": 0.8130898602362622, "grad_norm": 0.370738685131073, "learning_rate": 1.2913236896466688e-05, "loss": 0.583, "step": 38338 }, { "epoch": 0.8131110686941952, "grad_norm": 0.35918891429901123, "learning_rate": 1.2912917865036017e-05, "loss": 0.484, "step": 38339 }, { "epoch": 0.8131322771521282, "grad_norm": 0.36310040950775146, "learning_rate": 1.291259883036562e-05, "loss": 0.4835, "step": 38340 }, { "epoch": 0.8131534856100613, "grad_norm": 0.3473297357559204, "learning_rate": 1.2912279792455857e-05, "loss": 0.4844, "step": 38341 }, { "epoch": 0.8131746940679944, "grad_norm": 0.38861602544784546, "learning_rate": 1.291196075130708e-05, "loss": 0.5482, "step": 38342 }, { "epoch": 0.8131959025259273, "grad_norm": 0.3813496530056, "learning_rate": 1.2911641706919644e-05, "loss": 0.5507, "step": 38343 }, { "epoch": 0.8132171109838604, "grad_norm": 0.3715096712112427, "learning_rate": 1.2911322659293912e-05, "loss": 0.4897, "step": 38344 }, { "epoch": 0.8132383194417934, "grad_norm": 0.36630377173423767, "learning_rate": 1.2911003608430228e-05, "loss": 0.4794, "step": 38345 }, { "epoch": 0.8132595278997264, "grad_norm": 0.3595460057258606, "learning_rate": 1.2910684554328952e-05, "loss": 0.4581, "step": 38346 }, { "epoch": 0.8132807363576594, "grad_norm": 0.4437762498855591, "learning_rate": 1.2910365496990438e-05, "loss": 0.5604, "step": 38347 }, { "epoch": 0.8133019448155925, "grad_norm": 0.3693900406360626, "learning_rate": 1.291004643641504e-05, "loss": 0.5269, "step": 38348 }, { "epoch": 0.8133231532735254, "grad_norm": 0.42027080059051514, "learning_rate": 1.2909727372603114e-05, "loss": 0.5178, "step": 38349 }, { "epoch": 0.8133443617314585, "grad_norm": 0.4018445909023285, "learning_rate": 1.2909408305555014e-05, "loss": 0.5457, "step": 38350 }, { "epoch": 0.8133655701893915, "grad_norm": 0.34849363565444946, "learning_rate": 1.2909089235271095e-05, "loss": 0.489, "step": 38351 }, { "epoch": 0.8133867786473246, "grad_norm": 0.4593261778354645, "learning_rate": 1.2908770161751716e-05, "loss": 0.4439, "step": 38352 }, { "epoch": 0.8134079871052575, "grad_norm": 0.37996363639831543, "learning_rate": 1.2908451084997225e-05, "loss": 0.4578, "step": 38353 }, { "epoch": 0.8134291955631906, "grad_norm": 0.3560642898082733, "learning_rate": 1.2908132005007979e-05, "loss": 0.5484, "step": 38354 }, { "epoch": 0.8134504040211237, "grad_norm": 0.42067575454711914, "learning_rate": 1.2907812921784335e-05, "loss": 0.4608, "step": 38355 }, { "epoch": 0.8134716124790566, "grad_norm": 0.3643544018268585, "learning_rate": 1.2907493835326648e-05, "loss": 0.4986, "step": 38356 }, { "epoch": 0.8134928209369897, "grad_norm": 0.3282128572463989, "learning_rate": 1.290717474563527e-05, "loss": 0.5102, "step": 38357 }, { "epoch": 0.8135140293949227, "grad_norm": 0.34465429186820984, "learning_rate": 1.2906855652710557e-05, "loss": 0.4222, "step": 38358 }, { "epoch": 0.8135352378528558, "grad_norm": 0.3532921075820923, "learning_rate": 1.2906536556552865e-05, "loss": 0.5083, "step": 38359 }, { "epoch": 0.8135564463107887, "grad_norm": 0.41521114110946655, "learning_rate": 1.2906217457162548e-05, "loss": 0.489, "step": 38360 }, { "epoch": 0.8135776547687218, "grad_norm": 0.41801920533180237, "learning_rate": 1.2905898354539962e-05, "loss": 0.4827, "step": 38361 }, { "epoch": 0.8135988632266548, "grad_norm": 0.3589514195919037, "learning_rate": 1.290557924868546e-05, "loss": 0.5135, "step": 38362 }, { "epoch": 0.8136200716845878, "grad_norm": 0.5266520380973816, "learning_rate": 1.29052601395994e-05, "loss": 0.436, "step": 38363 }, { "epoch": 0.8136412801425208, "grad_norm": 0.3582627475261688, "learning_rate": 1.2904941027282135e-05, "loss": 0.5502, "step": 38364 }, { "epoch": 0.8136624886004539, "grad_norm": 0.3399965763092041, "learning_rate": 1.2904621911734019e-05, "loss": 0.4812, "step": 38365 }, { "epoch": 0.8136836970583868, "grad_norm": 0.3867805600166321, "learning_rate": 1.2904302792955408e-05, "loss": 0.4995, "step": 38366 }, { "epoch": 0.8137049055163199, "grad_norm": 0.33749550580978394, "learning_rate": 1.2903983670946657e-05, "loss": 0.4805, "step": 38367 }, { "epoch": 0.813726113974253, "grad_norm": 0.3379350006580353, "learning_rate": 1.2903664545708116e-05, "loss": 0.4879, "step": 38368 }, { "epoch": 0.813747322432186, "grad_norm": 0.4548061192035675, "learning_rate": 1.290334541724015e-05, "loss": 0.5225, "step": 38369 }, { "epoch": 0.813768530890119, "grad_norm": 0.38304200768470764, "learning_rate": 1.2903026285543107e-05, "loss": 0.4289, "step": 38370 }, { "epoch": 0.813789739348052, "grad_norm": 0.5098377466201782, "learning_rate": 1.2902707150617342e-05, "loss": 0.4257, "step": 38371 }, { "epoch": 0.8138109478059851, "grad_norm": 0.5559868216514587, "learning_rate": 1.2902388012463213e-05, "loss": 0.5825, "step": 38372 }, { "epoch": 0.813832156263918, "grad_norm": 0.32404834032058716, "learning_rate": 1.2902068871081072e-05, "loss": 0.4273, "step": 38373 }, { "epoch": 0.8138533647218511, "grad_norm": 0.39045602083206177, "learning_rate": 1.2901749726471273e-05, "loss": 0.419, "step": 38374 }, { "epoch": 0.8138745731797841, "grad_norm": 0.3779434561729431, "learning_rate": 1.2901430578634178e-05, "loss": 0.3685, "step": 38375 }, { "epoch": 0.8138957816377171, "grad_norm": 0.5185024738311768, "learning_rate": 1.2901111427570134e-05, "loss": 0.4715, "step": 38376 }, { "epoch": 0.8139169900956501, "grad_norm": 0.38250985741615295, "learning_rate": 1.2900792273279504e-05, "loss": 0.5207, "step": 38377 }, { "epoch": 0.8139381985535832, "grad_norm": 0.4132172167301178, "learning_rate": 1.2900473115762633e-05, "loss": 0.576, "step": 38378 }, { "epoch": 0.8139594070115161, "grad_norm": 0.42668020725250244, "learning_rate": 1.290015395501988e-05, "loss": 0.4746, "step": 38379 }, { "epoch": 0.8139806154694492, "grad_norm": 0.3784700930118561, "learning_rate": 1.2899834791051604e-05, "loss": 0.5178, "step": 38380 }, { "epoch": 0.8140018239273823, "grad_norm": 0.4738105237483978, "learning_rate": 1.2899515623858157e-05, "loss": 0.5915, "step": 38381 }, { "epoch": 0.8140230323853153, "grad_norm": 0.32585906982421875, "learning_rate": 1.2899196453439892e-05, "loss": 0.485, "step": 38382 }, { "epoch": 0.8140442408432483, "grad_norm": 0.3442668616771698, "learning_rate": 1.2898877279797168e-05, "loss": 0.4165, "step": 38383 }, { "epoch": 0.8140654493011813, "grad_norm": 0.3799264430999756, "learning_rate": 1.2898558102930338e-05, "loss": 0.4754, "step": 38384 }, { "epoch": 0.8140866577591144, "grad_norm": 0.5378502011299133, "learning_rate": 1.2898238922839754e-05, "loss": 0.4681, "step": 38385 }, { "epoch": 0.8141078662170473, "grad_norm": 0.3575267493724823, "learning_rate": 1.2897919739525777e-05, "loss": 0.4652, "step": 38386 }, { "epoch": 0.8141290746749804, "grad_norm": 0.4245327413082123, "learning_rate": 1.2897600552988756e-05, "loss": 0.5095, "step": 38387 }, { "epoch": 0.8141502831329134, "grad_norm": 0.3559044599533081, "learning_rate": 1.2897281363229048e-05, "loss": 0.4853, "step": 38388 }, { "epoch": 0.8141714915908465, "grad_norm": 0.34265652298927307, "learning_rate": 1.2896962170247012e-05, "loss": 0.5047, "step": 38389 }, { "epoch": 0.8141927000487794, "grad_norm": 0.45690685510635376, "learning_rate": 1.2896642974043e-05, "loss": 0.5009, "step": 38390 }, { "epoch": 0.8142139085067125, "grad_norm": 0.36545881628990173, "learning_rate": 1.2896323774617365e-05, "loss": 0.4828, "step": 38391 }, { "epoch": 0.8142351169646455, "grad_norm": 0.39309629797935486, "learning_rate": 1.2896004571970466e-05, "loss": 0.4499, "step": 38392 }, { "epoch": 0.8142563254225785, "grad_norm": 0.35560688376426697, "learning_rate": 1.2895685366102652e-05, "loss": 0.4849, "step": 38393 }, { "epoch": 0.8142775338805115, "grad_norm": 0.35263127088546753, "learning_rate": 1.2895366157014285e-05, "loss": 0.473, "step": 38394 }, { "epoch": 0.8142987423384446, "grad_norm": 0.35590171813964844, "learning_rate": 1.2895046944705716e-05, "loss": 0.5369, "step": 38395 }, { "epoch": 0.8143199507963776, "grad_norm": 0.39587584137916565, "learning_rate": 1.28947277291773e-05, "loss": 0.5177, "step": 38396 }, { "epoch": 0.8143411592543106, "grad_norm": 0.3145376443862915, "learning_rate": 1.2894408510429398e-05, "loss": 0.4383, "step": 38397 }, { "epoch": 0.8143623677122437, "grad_norm": 0.35202452540397644, "learning_rate": 1.2894089288462355e-05, "loss": 0.4821, "step": 38398 }, { "epoch": 0.8143835761701766, "grad_norm": 0.3556015193462372, "learning_rate": 1.2893770063276532e-05, "loss": 0.4166, "step": 38399 }, { "epoch": 0.8144047846281097, "grad_norm": 0.3646508455276489, "learning_rate": 1.2893450834872281e-05, "loss": 0.4529, "step": 38400 }, { "epoch": 0.8144259930860427, "grad_norm": 0.3702307939529419, "learning_rate": 1.2893131603249962e-05, "loss": 0.4418, "step": 38401 }, { "epoch": 0.8144472015439758, "grad_norm": 0.3528575003147125, "learning_rate": 1.2892812368409925e-05, "loss": 0.453, "step": 38402 }, { "epoch": 0.8144684100019087, "grad_norm": 0.44078895449638367, "learning_rate": 1.2892493130352531e-05, "loss": 0.5576, "step": 38403 }, { "epoch": 0.8144896184598418, "grad_norm": 0.3999783992767334, "learning_rate": 1.2892173889078127e-05, "loss": 0.5026, "step": 38404 }, { "epoch": 0.8145108269177748, "grad_norm": 0.3732791841030121, "learning_rate": 1.2891854644587076e-05, "loss": 0.4813, "step": 38405 }, { "epoch": 0.8145320353757078, "grad_norm": 0.3469434380531311, "learning_rate": 1.2891535396879726e-05, "loss": 0.4466, "step": 38406 }, { "epoch": 0.8145532438336408, "grad_norm": 0.33928924798965454, "learning_rate": 1.289121614595644e-05, "loss": 0.4719, "step": 38407 }, { "epoch": 0.8145744522915739, "grad_norm": 0.35676994919776917, "learning_rate": 1.2890896891817564e-05, "loss": 0.4856, "step": 38408 }, { "epoch": 0.814595660749507, "grad_norm": 0.35941463708877563, "learning_rate": 1.289057763446346e-05, "loss": 0.4178, "step": 38409 }, { "epoch": 0.8146168692074399, "grad_norm": 0.3966028690338135, "learning_rate": 1.2890258373894478e-05, "loss": 0.5343, "step": 38410 }, { "epoch": 0.814638077665373, "grad_norm": 0.40652281045913696, "learning_rate": 1.2889939110110978e-05, "loss": 0.4298, "step": 38411 }, { "epoch": 0.814659286123306, "grad_norm": 0.37040436267852783, "learning_rate": 1.2889619843113314e-05, "loss": 0.5311, "step": 38412 }, { "epoch": 0.814680494581239, "grad_norm": 0.3194552958011627, "learning_rate": 1.2889300572901836e-05, "loss": 0.3899, "step": 38413 }, { "epoch": 0.814701703039172, "grad_norm": 0.3374978303909302, "learning_rate": 1.2888981299476906e-05, "loss": 0.3939, "step": 38414 }, { "epoch": 0.8147229114971051, "grad_norm": 0.450663298368454, "learning_rate": 1.2888662022838878e-05, "loss": 0.5042, "step": 38415 }, { "epoch": 0.814744119955038, "grad_norm": 0.353019654750824, "learning_rate": 1.28883427429881e-05, "loss": 0.5299, "step": 38416 }, { "epoch": 0.8147653284129711, "grad_norm": 0.3863629400730133, "learning_rate": 1.2888023459924937e-05, "loss": 0.4669, "step": 38417 }, { "epoch": 0.8147865368709041, "grad_norm": 0.3680197298526764, "learning_rate": 1.2887704173649738e-05, "loss": 0.4558, "step": 38418 }, { "epoch": 0.8148077453288372, "grad_norm": 0.3330523371696472, "learning_rate": 1.2887384884162858e-05, "loss": 0.4848, "step": 38419 }, { "epoch": 0.8148289537867701, "grad_norm": 0.3627892732620239, "learning_rate": 1.2887065591464654e-05, "loss": 0.538, "step": 38420 }, { "epoch": 0.8148501622447032, "grad_norm": 0.40480244159698486, "learning_rate": 1.2886746295555481e-05, "loss": 0.5775, "step": 38421 }, { "epoch": 0.8148713707026363, "grad_norm": 0.4593507647514343, "learning_rate": 1.2886426996435695e-05, "loss": 0.5112, "step": 38422 }, { "epoch": 0.8148925791605692, "grad_norm": 0.43251433968544006, "learning_rate": 1.2886107694105649e-05, "loss": 0.5158, "step": 38423 }, { "epoch": 0.8149137876185023, "grad_norm": 0.5983548760414124, "learning_rate": 1.2885788388565699e-05, "loss": 0.4633, "step": 38424 }, { "epoch": 0.8149349960764353, "grad_norm": 0.39431750774383545, "learning_rate": 1.28854690798162e-05, "loss": 0.4323, "step": 38425 }, { "epoch": 0.8149562045343683, "grad_norm": 0.34412309527397156, "learning_rate": 1.2885149767857509e-05, "loss": 0.5582, "step": 38426 }, { "epoch": 0.8149774129923013, "grad_norm": 0.3093065917491913, "learning_rate": 1.2884830452689977e-05, "loss": 0.4568, "step": 38427 }, { "epoch": 0.8149986214502344, "grad_norm": 0.34468066692352295, "learning_rate": 1.2884511134313962e-05, "loss": 0.4629, "step": 38428 }, { "epoch": 0.8150198299081673, "grad_norm": 0.32547906041145325, "learning_rate": 1.288419181272982e-05, "loss": 0.48, "step": 38429 }, { "epoch": 0.8150410383661004, "grad_norm": 0.40919095277786255, "learning_rate": 1.2883872487937905e-05, "loss": 0.5094, "step": 38430 }, { "epoch": 0.8150622468240334, "grad_norm": 0.3224072754383087, "learning_rate": 1.2883553159938572e-05, "loss": 0.422, "step": 38431 }, { "epoch": 0.8150834552819665, "grad_norm": 0.36695021390914917, "learning_rate": 1.2883233828732176e-05, "loss": 0.5124, "step": 38432 }, { "epoch": 0.8151046637398994, "grad_norm": 0.38181814551353455, "learning_rate": 1.288291449431907e-05, "loss": 0.5786, "step": 38433 }, { "epoch": 0.8151258721978325, "grad_norm": 0.3693525195121765, "learning_rate": 1.2882595156699615e-05, "loss": 0.5003, "step": 38434 }, { "epoch": 0.8151470806557655, "grad_norm": 0.4023784399032593, "learning_rate": 1.2882275815874161e-05, "loss": 0.5334, "step": 38435 }, { "epoch": 0.8151682891136985, "grad_norm": 0.3652831017971039, "learning_rate": 1.2881956471843067e-05, "loss": 0.5399, "step": 38436 }, { "epoch": 0.8151894975716316, "grad_norm": 0.4270765781402588, "learning_rate": 1.2881637124606683e-05, "loss": 0.4787, "step": 38437 }, { "epoch": 0.8152107060295646, "grad_norm": 0.423000693321228, "learning_rate": 1.2881317774165369e-05, "loss": 0.4718, "step": 38438 }, { "epoch": 0.8152319144874977, "grad_norm": 0.8439947962760925, "learning_rate": 1.2880998420519477e-05, "loss": 0.5189, "step": 38439 }, { "epoch": 0.8152531229454306, "grad_norm": 0.37323978543281555, "learning_rate": 1.2880679063669365e-05, "loss": 0.549, "step": 38440 }, { "epoch": 0.8152743314033637, "grad_norm": 0.47214069962501526, "learning_rate": 1.2880359703615386e-05, "loss": 0.5552, "step": 38441 }, { "epoch": 0.8152955398612967, "grad_norm": 0.3590145707130432, "learning_rate": 1.2880040340357897e-05, "loss": 0.5249, "step": 38442 }, { "epoch": 0.8153167483192297, "grad_norm": 0.3830726444721222, "learning_rate": 1.2879720973897252e-05, "loss": 0.5151, "step": 38443 }, { "epoch": 0.8153379567771627, "grad_norm": 0.35471272468566895, "learning_rate": 1.2879401604233806e-05, "loss": 0.5048, "step": 38444 }, { "epoch": 0.8153591652350958, "grad_norm": 0.38884228467941284, "learning_rate": 1.2879082231367913e-05, "loss": 0.5081, "step": 38445 }, { "epoch": 0.8153803736930287, "grad_norm": 0.34206870198249817, "learning_rate": 1.2878762855299932e-05, "loss": 0.4584, "step": 38446 }, { "epoch": 0.8154015821509618, "grad_norm": 0.3842817544937134, "learning_rate": 1.2878443476030214e-05, "loss": 0.5125, "step": 38447 }, { "epoch": 0.8154227906088948, "grad_norm": 0.34845206141471863, "learning_rate": 1.287812409355912e-05, "loss": 0.4829, "step": 38448 }, { "epoch": 0.8154439990668279, "grad_norm": 0.35394051671028137, "learning_rate": 1.2877804707886998e-05, "loss": 0.5957, "step": 38449 }, { "epoch": 0.8154652075247609, "grad_norm": 0.40614262223243713, "learning_rate": 1.2877485319014206e-05, "loss": 0.4707, "step": 38450 }, { "epoch": 0.8154864159826939, "grad_norm": 0.3513422906398773, "learning_rate": 1.28771659269411e-05, "loss": 0.4549, "step": 38451 }, { "epoch": 0.815507624440627, "grad_norm": 0.33638498187065125, "learning_rate": 1.2876846531668037e-05, "loss": 0.4658, "step": 38452 }, { "epoch": 0.8155288328985599, "grad_norm": 0.3545018434524536, "learning_rate": 1.2876527133195369e-05, "loss": 0.4833, "step": 38453 }, { "epoch": 0.815550041356493, "grad_norm": 0.34914520382881165, "learning_rate": 1.2876207731523453e-05, "loss": 0.5194, "step": 38454 }, { "epoch": 0.815571249814426, "grad_norm": 0.3928864300251007, "learning_rate": 1.2875888326652644e-05, "loss": 0.4842, "step": 38455 }, { "epoch": 0.815592458272359, "grad_norm": 0.35557007789611816, "learning_rate": 1.2875568918583296e-05, "loss": 0.4852, "step": 38456 }, { "epoch": 0.815613666730292, "grad_norm": 0.33738136291503906, "learning_rate": 1.2875249507315768e-05, "loss": 0.4945, "step": 38457 }, { "epoch": 0.8156348751882251, "grad_norm": 0.4395839273929596, "learning_rate": 1.2874930092850408e-05, "loss": 0.4774, "step": 38458 }, { "epoch": 0.815656083646158, "grad_norm": 0.39226803183555603, "learning_rate": 1.2874610675187578e-05, "loss": 0.5267, "step": 38459 }, { "epoch": 0.8156772921040911, "grad_norm": 0.36473697423934937, "learning_rate": 1.2874291254327633e-05, "loss": 0.472, "step": 38460 }, { "epoch": 0.8156985005620241, "grad_norm": 0.3521210551261902, "learning_rate": 1.2873971830270922e-05, "loss": 0.5202, "step": 38461 }, { "epoch": 0.8157197090199572, "grad_norm": 0.3611765503883362, "learning_rate": 1.287365240301781e-05, "loss": 0.5214, "step": 38462 }, { "epoch": 0.8157409174778902, "grad_norm": 0.43537333607673645, "learning_rate": 1.2873332972568644e-05, "loss": 0.6116, "step": 38463 }, { "epoch": 0.8157621259358232, "grad_norm": 0.37026122212409973, "learning_rate": 1.2873013538923778e-05, "loss": 0.5586, "step": 38464 }, { "epoch": 0.8157833343937563, "grad_norm": 0.3330938220024109, "learning_rate": 1.2872694102083575e-05, "loss": 0.4453, "step": 38465 }, { "epoch": 0.8158045428516892, "grad_norm": 0.34271422028541565, "learning_rate": 1.2872374662048388e-05, "loss": 0.5029, "step": 38466 }, { "epoch": 0.8158257513096223, "grad_norm": 0.35323506593704224, "learning_rate": 1.2872055218818568e-05, "loss": 0.4292, "step": 38467 }, { "epoch": 0.8158469597675553, "grad_norm": 0.3645167946815491, "learning_rate": 1.2871735772394476e-05, "loss": 0.5176, "step": 38468 }, { "epoch": 0.8158681682254884, "grad_norm": 0.3714263141155243, "learning_rate": 1.2871416322776462e-05, "loss": 0.4855, "step": 38469 }, { "epoch": 0.8158893766834213, "grad_norm": 0.3427806496620178, "learning_rate": 1.2871096869964884e-05, "loss": 0.5347, "step": 38470 }, { "epoch": 0.8159105851413544, "grad_norm": 0.4781394898891449, "learning_rate": 1.2870777413960098e-05, "loss": 0.4423, "step": 38471 }, { "epoch": 0.8159317935992874, "grad_norm": 0.3511278033256531, "learning_rate": 1.2870457954762457e-05, "loss": 0.4731, "step": 38472 }, { "epoch": 0.8159530020572204, "grad_norm": 0.3475733995437622, "learning_rate": 1.287013849237232e-05, "loss": 0.4982, "step": 38473 }, { "epoch": 0.8159742105151534, "grad_norm": 0.32921770215034485, "learning_rate": 1.2869819026790038e-05, "loss": 0.4026, "step": 38474 }, { "epoch": 0.8159954189730865, "grad_norm": 0.334288626909256, "learning_rate": 1.2869499558015968e-05, "loss": 0.4621, "step": 38475 }, { "epoch": 0.8160166274310194, "grad_norm": 0.3348613381385803, "learning_rate": 1.2869180086050467e-05, "loss": 0.4976, "step": 38476 }, { "epoch": 0.8160378358889525, "grad_norm": 0.39251619577407837, "learning_rate": 1.2868860610893887e-05, "loss": 0.4887, "step": 38477 }, { "epoch": 0.8160590443468856, "grad_norm": 0.5658801794052124, "learning_rate": 1.2868541132546586e-05, "loss": 0.5352, "step": 38478 }, { "epoch": 0.8160802528048186, "grad_norm": 0.3451732397079468, "learning_rate": 1.2868221651008919e-05, "loss": 0.4531, "step": 38479 }, { "epoch": 0.8161014612627516, "grad_norm": 0.3809661865234375, "learning_rate": 1.2867902166281241e-05, "loss": 0.4676, "step": 38480 }, { "epoch": 0.8161226697206846, "grad_norm": 0.47503724694252014, "learning_rate": 1.2867582678363906e-05, "loss": 0.5119, "step": 38481 }, { "epoch": 0.8161438781786177, "grad_norm": 0.30064332485198975, "learning_rate": 1.286726318725727e-05, "loss": 0.4103, "step": 38482 }, { "epoch": 0.8161650866365506, "grad_norm": 0.36492377519607544, "learning_rate": 1.286694369296169e-05, "loss": 0.509, "step": 38483 }, { "epoch": 0.8161862950944837, "grad_norm": 0.3724348545074463, "learning_rate": 1.286662419547752e-05, "loss": 0.5116, "step": 38484 }, { "epoch": 0.8162075035524167, "grad_norm": 0.40942108631134033, "learning_rate": 1.2866304694805116e-05, "loss": 0.5092, "step": 38485 }, { "epoch": 0.8162287120103497, "grad_norm": 0.35777518153190613, "learning_rate": 1.2865985190944832e-05, "loss": 0.4651, "step": 38486 }, { "epoch": 0.8162499204682827, "grad_norm": 0.4027353823184967, "learning_rate": 1.2865665683897024e-05, "loss": 0.4633, "step": 38487 }, { "epoch": 0.8162711289262158, "grad_norm": 0.36045417189598083, "learning_rate": 1.286534617366205e-05, "loss": 0.4892, "step": 38488 }, { "epoch": 0.8162923373841487, "grad_norm": 0.3215140700340271, "learning_rate": 1.2865026660240259e-05, "loss": 0.4991, "step": 38489 }, { "epoch": 0.8163135458420818, "grad_norm": 0.36425670981407166, "learning_rate": 1.2864707143632012e-05, "loss": 0.4723, "step": 38490 }, { "epoch": 0.8163347543000149, "grad_norm": 0.3778512179851532, "learning_rate": 1.2864387623837663e-05, "loss": 0.4685, "step": 38491 }, { "epoch": 0.8163559627579479, "grad_norm": 0.3469967842102051, "learning_rate": 1.2864068100857565e-05, "loss": 0.4284, "step": 38492 }, { "epoch": 0.8163771712158809, "grad_norm": 0.40408584475517273, "learning_rate": 1.2863748574692076e-05, "loss": 0.5384, "step": 38493 }, { "epoch": 0.8163983796738139, "grad_norm": 0.48648780584335327, "learning_rate": 1.2863429045341554e-05, "loss": 0.4599, "step": 38494 }, { "epoch": 0.816419588131747, "grad_norm": 0.4330149292945862, "learning_rate": 1.2863109512806347e-05, "loss": 0.6036, "step": 38495 }, { "epoch": 0.8164407965896799, "grad_norm": 0.3798505365848541, "learning_rate": 1.2862789977086816e-05, "loss": 0.4422, "step": 38496 }, { "epoch": 0.816462005047613, "grad_norm": 0.3622247278690338, "learning_rate": 1.2862470438183317e-05, "loss": 0.528, "step": 38497 }, { "epoch": 0.816483213505546, "grad_norm": 0.39552435278892517, "learning_rate": 1.28621508960962e-05, "loss": 0.4505, "step": 38498 }, { "epoch": 0.816504421963479, "grad_norm": 0.35308146476745605, "learning_rate": 1.2861831350825825e-05, "loss": 0.4944, "step": 38499 }, { "epoch": 0.816525630421412, "grad_norm": 0.5264652967453003, "learning_rate": 1.2861511802372544e-05, "loss": 0.4667, "step": 38500 }, { "epoch": 0.8165468388793451, "grad_norm": 0.36908644437789917, "learning_rate": 1.2861192250736719e-05, "loss": 0.5375, "step": 38501 }, { "epoch": 0.8165680473372781, "grad_norm": 0.35136646032333374, "learning_rate": 1.2860872695918698e-05, "loss": 0.4665, "step": 38502 }, { "epoch": 0.8165892557952111, "grad_norm": 0.3789398968219757, "learning_rate": 1.286055313791884e-05, "loss": 0.5804, "step": 38503 }, { "epoch": 0.8166104642531442, "grad_norm": 0.37032145261764526, "learning_rate": 1.2860233576737498e-05, "loss": 0.46, "step": 38504 }, { "epoch": 0.8166316727110772, "grad_norm": 0.35964298248291016, "learning_rate": 1.285991401237503e-05, "loss": 0.4864, "step": 38505 }, { "epoch": 0.8166528811690102, "grad_norm": 0.41393736004829407, "learning_rate": 1.2859594444831788e-05, "loss": 0.5011, "step": 38506 }, { "epoch": 0.8166740896269432, "grad_norm": 0.42729079723358154, "learning_rate": 1.2859274874108136e-05, "loss": 0.5112, "step": 38507 }, { "epoch": 0.8166952980848763, "grad_norm": 0.3521043062210083, "learning_rate": 1.2858955300204418e-05, "loss": 0.506, "step": 38508 }, { "epoch": 0.8167165065428093, "grad_norm": 0.36165332794189453, "learning_rate": 1.2858635723120995e-05, "loss": 0.4507, "step": 38509 }, { "epoch": 0.8167377150007423, "grad_norm": 0.37317919731140137, "learning_rate": 1.2858316142858224e-05, "loss": 0.4598, "step": 38510 }, { "epoch": 0.8167589234586753, "grad_norm": 0.3710120618343353, "learning_rate": 1.2857996559416461e-05, "loss": 0.4877, "step": 38511 }, { "epoch": 0.8167801319166084, "grad_norm": 0.45177242159843445, "learning_rate": 1.2857676972796056e-05, "loss": 0.4459, "step": 38512 }, { "epoch": 0.8168013403745413, "grad_norm": 0.7803024649620056, "learning_rate": 1.2857357382997369e-05, "loss": 0.4004, "step": 38513 }, { "epoch": 0.8168225488324744, "grad_norm": 0.42855218052864075, "learning_rate": 1.2857037790020752e-05, "loss": 0.5484, "step": 38514 }, { "epoch": 0.8168437572904074, "grad_norm": 0.36609479784965515, "learning_rate": 1.2856718193866562e-05, "loss": 0.5541, "step": 38515 }, { "epoch": 0.8168649657483404, "grad_norm": 0.4180350601673126, "learning_rate": 1.2856398594535155e-05, "loss": 0.4871, "step": 38516 }, { "epoch": 0.8168861742062734, "grad_norm": 0.3631305992603302, "learning_rate": 1.285607899202689e-05, "loss": 0.5166, "step": 38517 }, { "epoch": 0.8169073826642065, "grad_norm": 0.42194482684135437, "learning_rate": 1.2855759386342115e-05, "loss": 0.4959, "step": 38518 }, { "epoch": 0.8169285911221396, "grad_norm": 0.3533362150192261, "learning_rate": 1.285543977748119e-05, "loss": 0.5191, "step": 38519 }, { "epoch": 0.8169497995800725, "grad_norm": 0.3630407154560089, "learning_rate": 1.285512016544447e-05, "loss": 0.4633, "step": 38520 }, { "epoch": 0.8169710080380056, "grad_norm": 0.3490321636199951, "learning_rate": 1.2854800550232312e-05, "loss": 0.4461, "step": 38521 }, { "epoch": 0.8169922164959386, "grad_norm": 0.3663022816181183, "learning_rate": 1.2854480931845066e-05, "loss": 0.5415, "step": 38522 }, { "epoch": 0.8170134249538716, "grad_norm": 0.35366013646125793, "learning_rate": 1.2854161310283093e-05, "loss": 0.4361, "step": 38523 }, { "epoch": 0.8170346334118046, "grad_norm": 0.3408173620700836, "learning_rate": 1.2853841685546747e-05, "loss": 0.46, "step": 38524 }, { "epoch": 0.8170558418697377, "grad_norm": 0.3610880672931671, "learning_rate": 1.2853522057636382e-05, "loss": 0.4174, "step": 38525 }, { "epoch": 0.8170770503276706, "grad_norm": 0.36870232224464417, "learning_rate": 1.2853202426552356e-05, "loss": 0.4766, "step": 38526 }, { "epoch": 0.8170982587856037, "grad_norm": 0.3680402636528015, "learning_rate": 1.2852882792295023e-05, "loss": 0.4548, "step": 38527 }, { "epoch": 0.8171194672435367, "grad_norm": 0.36677515506744385, "learning_rate": 1.2852563154864736e-05, "loss": 0.5082, "step": 38528 }, { "epoch": 0.8171406757014698, "grad_norm": 0.3621933162212372, "learning_rate": 1.2852243514261856e-05, "loss": 0.4815, "step": 38529 }, { "epoch": 0.8171618841594027, "grad_norm": 0.3498745858669281, "learning_rate": 1.2851923870486733e-05, "loss": 0.495, "step": 38530 }, { "epoch": 0.8171830926173358, "grad_norm": 0.3542303442955017, "learning_rate": 1.2851604223539728e-05, "loss": 0.5546, "step": 38531 }, { "epoch": 0.8172043010752689, "grad_norm": 0.3363931179046631, "learning_rate": 1.2851284573421193e-05, "loss": 0.484, "step": 38532 }, { "epoch": 0.8172255095332018, "grad_norm": 0.35309845209121704, "learning_rate": 1.2850964920131482e-05, "loss": 0.4949, "step": 38533 }, { "epoch": 0.8172467179911349, "grad_norm": 0.3616110384464264, "learning_rate": 1.2850645263670952e-05, "loss": 0.4943, "step": 38534 }, { "epoch": 0.8172679264490679, "grad_norm": 0.38727492094039917, "learning_rate": 1.2850325604039959e-05, "loss": 0.4679, "step": 38535 }, { "epoch": 0.817289134907001, "grad_norm": 0.3380795121192932, "learning_rate": 1.2850005941238863e-05, "loss": 0.4231, "step": 38536 }, { "epoch": 0.8173103433649339, "grad_norm": 0.35575515031814575, "learning_rate": 1.284968627526801e-05, "loss": 0.4696, "step": 38537 }, { "epoch": 0.817331551822867, "grad_norm": 0.40007567405700684, "learning_rate": 1.2849366606127764e-05, "loss": 0.5134, "step": 38538 }, { "epoch": 0.8173527602808, "grad_norm": 0.37531778216362, "learning_rate": 1.2849046933818479e-05, "loss": 0.5118, "step": 38539 }, { "epoch": 0.817373968738733, "grad_norm": 0.4276372492313385, "learning_rate": 1.2848727258340504e-05, "loss": 0.485, "step": 38540 }, { "epoch": 0.817395177196666, "grad_norm": 0.3618724048137665, "learning_rate": 1.2848407579694202e-05, "loss": 0.5567, "step": 38541 }, { "epoch": 0.8174163856545991, "grad_norm": 0.3671576976776123, "learning_rate": 1.2848087897879924e-05, "loss": 0.481, "step": 38542 }, { "epoch": 0.817437594112532, "grad_norm": 0.3444981873035431, "learning_rate": 1.2847768212898028e-05, "loss": 0.4394, "step": 38543 }, { "epoch": 0.8174588025704651, "grad_norm": 0.31443750858306885, "learning_rate": 1.284744852474887e-05, "loss": 0.4204, "step": 38544 }, { "epoch": 0.8174800110283982, "grad_norm": 0.3809400498867035, "learning_rate": 1.2847128833432808e-05, "loss": 0.5041, "step": 38545 }, { "epoch": 0.8175012194863311, "grad_norm": 0.36617839336395264, "learning_rate": 1.2846809138950187e-05, "loss": 0.461, "step": 38546 }, { "epoch": 0.8175224279442642, "grad_norm": 0.3428979218006134, "learning_rate": 1.284648944130137e-05, "loss": 0.418, "step": 38547 }, { "epoch": 0.8175436364021972, "grad_norm": 0.342866986989975, "learning_rate": 1.2846169740486719e-05, "loss": 0.529, "step": 38548 }, { "epoch": 0.8175648448601303, "grad_norm": 0.38104361295700073, "learning_rate": 1.2845850036506575e-05, "loss": 0.4317, "step": 38549 }, { "epoch": 0.8175860533180632, "grad_norm": 0.6186994314193726, "learning_rate": 1.2845530329361307e-05, "loss": 0.5344, "step": 38550 }, { "epoch": 0.8176072617759963, "grad_norm": 0.5069591999053955, "learning_rate": 1.2845210619051262e-05, "loss": 0.4539, "step": 38551 }, { "epoch": 0.8176284702339293, "grad_norm": 0.40914201736450195, "learning_rate": 1.2844890905576799e-05, "loss": 0.4958, "step": 38552 }, { "epoch": 0.8176496786918623, "grad_norm": 0.39042577147483826, "learning_rate": 1.2844571188938274e-05, "loss": 0.5638, "step": 38553 }, { "epoch": 0.8176708871497953, "grad_norm": 0.33449649810791016, "learning_rate": 1.284425146913604e-05, "loss": 0.4294, "step": 38554 }, { "epoch": 0.8176920956077284, "grad_norm": 0.7443337440490723, "learning_rate": 1.2843931746170455e-05, "loss": 0.5455, "step": 38555 }, { "epoch": 0.8177133040656613, "grad_norm": 0.3342241942882538, "learning_rate": 1.2843612020041875e-05, "loss": 0.5122, "step": 38556 }, { "epoch": 0.8177345125235944, "grad_norm": 0.36942341923713684, "learning_rate": 1.2843292290750653e-05, "loss": 0.5281, "step": 38557 }, { "epoch": 0.8177557209815274, "grad_norm": 0.38356900215148926, "learning_rate": 1.2842972558297148e-05, "loss": 0.5052, "step": 38558 }, { "epoch": 0.8177769294394605, "grad_norm": 0.3981204628944397, "learning_rate": 1.284265282268171e-05, "loss": 0.4959, "step": 38559 }, { "epoch": 0.8177981378973935, "grad_norm": 0.386300265789032, "learning_rate": 1.28423330839047e-05, "loss": 0.4902, "step": 38560 }, { "epoch": 0.8178193463553265, "grad_norm": 0.42034921050071716, "learning_rate": 1.2842013341966473e-05, "loss": 0.5485, "step": 38561 }, { "epoch": 0.8178405548132596, "grad_norm": 0.6806470155715942, "learning_rate": 1.2841693596867384e-05, "loss": 0.5199, "step": 38562 }, { "epoch": 0.8178617632711925, "grad_norm": 0.35451045632362366, "learning_rate": 1.2841373848607785e-05, "loss": 0.512, "step": 38563 }, { "epoch": 0.8178829717291256, "grad_norm": 0.32677799463272095, "learning_rate": 1.2841054097188038e-05, "loss": 0.4781, "step": 38564 }, { "epoch": 0.8179041801870586, "grad_norm": 0.38393962383270264, "learning_rate": 1.2840734342608492e-05, "loss": 0.5624, "step": 38565 }, { "epoch": 0.8179253886449916, "grad_norm": 0.34825384616851807, "learning_rate": 1.284041458486951e-05, "loss": 0.4809, "step": 38566 }, { "epoch": 0.8179465971029246, "grad_norm": 0.3285365104675293, "learning_rate": 1.2840094823971438e-05, "loss": 0.5032, "step": 38567 }, { "epoch": 0.8179678055608577, "grad_norm": 0.35540154576301575, "learning_rate": 1.283977505991464e-05, "loss": 0.4319, "step": 38568 }, { "epoch": 0.8179890140187906, "grad_norm": 0.3346083462238312, "learning_rate": 1.283945529269947e-05, "loss": 0.4941, "step": 38569 }, { "epoch": 0.8180102224767237, "grad_norm": 0.33557766675949097, "learning_rate": 1.2839135522326283e-05, "loss": 0.3918, "step": 38570 }, { "epoch": 0.8180314309346567, "grad_norm": 0.392116516828537, "learning_rate": 1.2838815748795432e-05, "loss": 0.4662, "step": 38571 }, { "epoch": 0.8180526393925898, "grad_norm": 0.37724658846855164, "learning_rate": 1.2838495972107276e-05, "loss": 0.4687, "step": 38572 }, { "epoch": 0.8180738478505228, "grad_norm": 0.3568326532840729, "learning_rate": 1.2838176192262169e-05, "loss": 0.4838, "step": 38573 }, { "epoch": 0.8180950563084558, "grad_norm": 0.6592864394187927, "learning_rate": 1.2837856409260464e-05, "loss": 0.4722, "step": 38574 }, { "epoch": 0.8181162647663889, "grad_norm": 0.3883814513683319, "learning_rate": 1.2837536623102524e-05, "loss": 0.4858, "step": 38575 }, { "epoch": 0.8181374732243218, "grad_norm": 0.34699395298957825, "learning_rate": 1.2837216833788701e-05, "loss": 0.5004, "step": 38576 }, { "epoch": 0.8181586816822549, "grad_norm": 0.43868547677993774, "learning_rate": 1.2836897041319347e-05, "loss": 0.4808, "step": 38577 }, { "epoch": 0.8181798901401879, "grad_norm": 0.318636953830719, "learning_rate": 1.2836577245694824e-05, "loss": 0.3975, "step": 38578 }, { "epoch": 0.818201098598121, "grad_norm": 0.3692478537559509, "learning_rate": 1.2836257446915481e-05, "loss": 0.4994, "step": 38579 }, { "epoch": 0.8182223070560539, "grad_norm": 0.3737497329711914, "learning_rate": 1.2835937644981677e-05, "loss": 0.5422, "step": 38580 }, { "epoch": 0.818243515513987, "grad_norm": 0.38105514645576477, "learning_rate": 1.2835617839893772e-05, "loss": 0.584, "step": 38581 }, { "epoch": 0.81826472397192, "grad_norm": 0.3887817859649658, "learning_rate": 1.2835298031652112e-05, "loss": 0.4979, "step": 38582 }, { "epoch": 0.818285932429853, "grad_norm": 0.37479427456855774, "learning_rate": 1.2834978220257064e-05, "loss": 0.5067, "step": 38583 }, { "epoch": 0.818307140887786, "grad_norm": 0.4442628026008606, "learning_rate": 1.2834658405708976e-05, "loss": 0.4511, "step": 38584 }, { "epoch": 0.8183283493457191, "grad_norm": 0.36142465472221375, "learning_rate": 1.2834338588008204e-05, "loss": 0.4868, "step": 38585 }, { "epoch": 0.8183495578036522, "grad_norm": 0.39464840292930603, "learning_rate": 1.2834018767155103e-05, "loss": 0.5749, "step": 38586 }, { "epoch": 0.8183707662615851, "grad_norm": 0.39617204666137695, "learning_rate": 1.2833698943150035e-05, "loss": 0.572, "step": 38587 }, { "epoch": 0.8183919747195182, "grad_norm": 0.3688644766807556, "learning_rate": 1.283337911599335e-05, "loss": 0.4668, "step": 38588 }, { "epoch": 0.8184131831774512, "grad_norm": 0.375993549823761, "learning_rate": 1.2833059285685406e-05, "loss": 0.5067, "step": 38589 }, { "epoch": 0.8184343916353842, "grad_norm": 0.3472321927547455, "learning_rate": 1.2832739452226559e-05, "loss": 0.4936, "step": 38590 }, { "epoch": 0.8184556000933172, "grad_norm": 0.3586612641811371, "learning_rate": 1.2832419615617162e-05, "loss": 0.5509, "step": 38591 }, { "epoch": 0.8184768085512503, "grad_norm": 0.3618835508823395, "learning_rate": 1.2832099775857571e-05, "loss": 0.4721, "step": 38592 }, { "epoch": 0.8184980170091832, "grad_norm": 0.35247722268104553, "learning_rate": 1.2831779932948147e-05, "loss": 0.4701, "step": 38593 }, { "epoch": 0.8185192254671163, "grad_norm": 0.3443465530872345, "learning_rate": 1.2831460086889237e-05, "loss": 0.4622, "step": 38594 }, { "epoch": 0.8185404339250493, "grad_norm": 0.3427889347076416, "learning_rate": 1.2831140237681208e-05, "loss": 0.4817, "step": 38595 }, { "epoch": 0.8185616423829823, "grad_norm": 0.3357388973236084, "learning_rate": 1.2830820385324406e-05, "loss": 0.4352, "step": 38596 }, { "epoch": 0.8185828508409153, "grad_norm": 0.36897382140159607, "learning_rate": 1.2830500529819192e-05, "loss": 0.4764, "step": 38597 }, { "epoch": 0.8186040592988484, "grad_norm": 0.42943981289863586, "learning_rate": 1.2830180671165917e-05, "loss": 0.455, "step": 38598 }, { "epoch": 0.8186252677567813, "grad_norm": 0.6709458827972412, "learning_rate": 1.2829860809364943e-05, "loss": 0.4379, "step": 38599 }, { "epoch": 0.8186464762147144, "grad_norm": 0.35496824979782104, "learning_rate": 1.2829540944416619e-05, "loss": 0.4417, "step": 38600 }, { "epoch": 0.8186676846726475, "grad_norm": 0.571797251701355, "learning_rate": 1.2829221076321306e-05, "loss": 0.5178, "step": 38601 }, { "epoch": 0.8186888931305805, "grad_norm": 0.34836265444755554, "learning_rate": 1.2828901205079356e-05, "loss": 0.5762, "step": 38602 }, { "epoch": 0.8187101015885135, "grad_norm": 0.41106754541397095, "learning_rate": 1.2828581330691128e-05, "loss": 0.4862, "step": 38603 }, { "epoch": 0.8187313100464465, "grad_norm": 0.4013470709323883, "learning_rate": 1.2828261453156978e-05, "loss": 0.4881, "step": 38604 }, { "epoch": 0.8187525185043796, "grad_norm": 0.3927195370197296, "learning_rate": 1.2827941572477257e-05, "loss": 0.4822, "step": 38605 }, { "epoch": 0.8187737269623125, "grad_norm": 0.33426347374916077, "learning_rate": 1.2827621688652326e-05, "loss": 0.5076, "step": 38606 }, { "epoch": 0.8187949354202456, "grad_norm": 0.3278786242008209, "learning_rate": 1.2827301801682538e-05, "loss": 0.4129, "step": 38607 }, { "epoch": 0.8188161438781786, "grad_norm": 0.3306449055671692, "learning_rate": 1.2826981911568251e-05, "loss": 0.5201, "step": 38608 }, { "epoch": 0.8188373523361117, "grad_norm": 0.41646575927734375, "learning_rate": 1.282666201830982e-05, "loss": 0.5535, "step": 38609 }, { "epoch": 0.8188585607940446, "grad_norm": 0.34159398078918457, "learning_rate": 1.2826342121907595e-05, "loss": 0.4989, "step": 38610 }, { "epoch": 0.8188797692519777, "grad_norm": 0.39969781041145325, "learning_rate": 1.2826022222361943e-05, "loss": 0.4629, "step": 38611 }, { "epoch": 0.8189009777099107, "grad_norm": 0.3378147482872009, "learning_rate": 1.2825702319673208e-05, "loss": 0.443, "step": 38612 }, { "epoch": 0.8189221861678437, "grad_norm": 0.3531721830368042, "learning_rate": 1.2825382413841756e-05, "loss": 0.5297, "step": 38613 }, { "epoch": 0.8189433946257768, "grad_norm": 0.49369674921035767, "learning_rate": 1.2825062504867935e-05, "loss": 0.4726, "step": 38614 }, { "epoch": 0.8189646030837098, "grad_norm": 0.3562314212322235, "learning_rate": 1.2824742592752106e-05, "loss": 0.5386, "step": 38615 }, { "epoch": 0.8189858115416428, "grad_norm": 0.3267415165901184, "learning_rate": 1.2824422677494621e-05, "loss": 0.4814, "step": 38616 }, { "epoch": 0.8190070199995758, "grad_norm": 0.350858598947525, "learning_rate": 1.282410275909584e-05, "loss": 0.5031, "step": 38617 }, { "epoch": 0.8190282284575089, "grad_norm": 0.3763920068740845, "learning_rate": 1.2823782837556116e-05, "loss": 0.4033, "step": 38618 }, { "epoch": 0.8190494369154419, "grad_norm": 0.37494590878486633, "learning_rate": 1.2823462912875801e-05, "loss": 0.532, "step": 38619 }, { "epoch": 0.8190706453733749, "grad_norm": 0.35555076599121094, "learning_rate": 1.2823142985055261e-05, "loss": 0.4976, "step": 38620 }, { "epoch": 0.8190918538313079, "grad_norm": 0.37146031856536865, "learning_rate": 1.2822823054094844e-05, "loss": 0.5889, "step": 38621 }, { "epoch": 0.819113062289241, "grad_norm": 0.42246025800704956, "learning_rate": 1.2822503119994907e-05, "loss": 0.492, "step": 38622 }, { "epoch": 0.8191342707471739, "grad_norm": 0.329767644405365, "learning_rate": 1.2822183182755808e-05, "loss": 0.4009, "step": 38623 }, { "epoch": 0.819155479205107, "grad_norm": 0.3033207058906555, "learning_rate": 1.2821863242377899e-05, "loss": 0.4195, "step": 38624 }, { "epoch": 0.81917668766304, "grad_norm": 0.371348112821579, "learning_rate": 1.2821543298861539e-05, "loss": 0.4657, "step": 38625 }, { "epoch": 0.819197896120973, "grad_norm": 0.35623252391815186, "learning_rate": 1.2821223352207084e-05, "loss": 0.4641, "step": 38626 }, { "epoch": 0.8192191045789061, "grad_norm": 0.3794053792953491, "learning_rate": 1.2820903402414888e-05, "loss": 0.5134, "step": 38627 }, { "epoch": 0.8192403130368391, "grad_norm": 0.35055333375930786, "learning_rate": 1.2820583449485308e-05, "loss": 0.4835, "step": 38628 }, { "epoch": 0.8192615214947722, "grad_norm": 0.365940660238266, "learning_rate": 1.2820263493418701e-05, "loss": 0.504, "step": 38629 }, { "epoch": 0.8192827299527051, "grad_norm": 0.32181501388549805, "learning_rate": 1.281994353421542e-05, "loss": 0.4529, "step": 38630 }, { "epoch": 0.8193039384106382, "grad_norm": 0.40966251492500305, "learning_rate": 1.2819623571875819e-05, "loss": 0.4618, "step": 38631 }, { "epoch": 0.8193251468685712, "grad_norm": 0.3781510591506958, "learning_rate": 1.2819303606400261e-05, "loss": 0.5566, "step": 38632 }, { "epoch": 0.8193463553265042, "grad_norm": 0.3362267017364502, "learning_rate": 1.2818983637789098e-05, "loss": 0.4768, "step": 38633 }, { "epoch": 0.8193675637844372, "grad_norm": 0.36440300941467285, "learning_rate": 1.2818663666042688e-05, "loss": 0.4504, "step": 38634 }, { "epoch": 0.8193887722423703, "grad_norm": 0.35800835490226746, "learning_rate": 1.2818343691161382e-05, "loss": 0.4658, "step": 38635 }, { "epoch": 0.8194099807003032, "grad_norm": 0.35038912296295166, "learning_rate": 1.2818023713145537e-05, "loss": 0.4567, "step": 38636 }, { "epoch": 0.8194311891582363, "grad_norm": 0.34697356820106506, "learning_rate": 1.2817703731995513e-05, "loss": 0.4841, "step": 38637 }, { "epoch": 0.8194523976161693, "grad_norm": 0.3385719656944275, "learning_rate": 1.2817383747711662e-05, "loss": 0.5149, "step": 38638 }, { "epoch": 0.8194736060741024, "grad_norm": 0.31968992948532104, "learning_rate": 1.2817063760294341e-05, "loss": 0.4282, "step": 38639 }, { "epoch": 0.8194948145320353, "grad_norm": 0.3380144238471985, "learning_rate": 1.281674376974391e-05, "loss": 0.4393, "step": 38640 }, { "epoch": 0.8195160229899684, "grad_norm": 0.3658977746963501, "learning_rate": 1.2816423776060718e-05, "loss": 0.3954, "step": 38641 }, { "epoch": 0.8195372314479015, "grad_norm": 0.31966114044189453, "learning_rate": 1.2816103779245123e-05, "loss": 0.4798, "step": 38642 }, { "epoch": 0.8195584399058344, "grad_norm": 0.34358200430870056, "learning_rate": 1.2815783779297486e-05, "loss": 0.4386, "step": 38643 }, { "epoch": 0.8195796483637675, "grad_norm": 0.3389960825443268, "learning_rate": 1.2815463776218154e-05, "loss": 0.4202, "step": 38644 }, { "epoch": 0.8196008568217005, "grad_norm": 0.543093204498291, "learning_rate": 1.2815143770007487e-05, "loss": 0.4562, "step": 38645 }, { "epoch": 0.8196220652796335, "grad_norm": 0.38831520080566406, "learning_rate": 1.2814823760665847e-05, "loss": 0.4765, "step": 38646 }, { "epoch": 0.8196432737375665, "grad_norm": 0.3364941477775574, "learning_rate": 1.2814503748193578e-05, "loss": 0.4641, "step": 38647 }, { "epoch": 0.8196644821954996, "grad_norm": 0.36039888858795166, "learning_rate": 1.2814183732591048e-05, "loss": 0.4601, "step": 38648 }, { "epoch": 0.8196856906534326, "grad_norm": 0.4013007879257202, "learning_rate": 1.2813863713858607e-05, "loss": 0.469, "step": 38649 }, { "epoch": 0.8197068991113656, "grad_norm": 0.4325418174266815, "learning_rate": 1.2813543691996607e-05, "loss": 0.556, "step": 38650 }, { "epoch": 0.8197281075692986, "grad_norm": 0.37383943796157837, "learning_rate": 1.281322366700541e-05, "loss": 0.46, "step": 38651 }, { "epoch": 0.8197493160272317, "grad_norm": 0.3500712513923645, "learning_rate": 1.2812903638885372e-05, "loss": 0.4468, "step": 38652 }, { "epoch": 0.8197705244851646, "grad_norm": 0.40735507011413574, "learning_rate": 1.2812583607636844e-05, "loss": 0.4175, "step": 38653 }, { "epoch": 0.8197917329430977, "grad_norm": 0.37316006422042847, "learning_rate": 1.2812263573260189e-05, "loss": 0.4613, "step": 38654 }, { "epoch": 0.8198129414010308, "grad_norm": 0.37367933988571167, "learning_rate": 1.2811943535755755e-05, "loss": 0.4954, "step": 38655 }, { "epoch": 0.8198341498589637, "grad_norm": 0.4108055531978607, "learning_rate": 1.2811623495123901e-05, "loss": 0.4764, "step": 38656 }, { "epoch": 0.8198553583168968, "grad_norm": 0.396973192691803, "learning_rate": 1.2811303451364988e-05, "loss": 0.5099, "step": 38657 }, { "epoch": 0.8198765667748298, "grad_norm": 0.38678839802742004, "learning_rate": 1.2810983404479367e-05, "loss": 0.4651, "step": 38658 }, { "epoch": 0.8198977752327629, "grad_norm": 0.38437482714653015, "learning_rate": 1.2810663354467393e-05, "loss": 0.4813, "step": 38659 }, { "epoch": 0.8199189836906958, "grad_norm": 0.4007278084754944, "learning_rate": 1.2810343301329424e-05, "loss": 0.5267, "step": 38660 }, { "epoch": 0.8199401921486289, "grad_norm": 0.3843703866004944, "learning_rate": 1.2810023245065814e-05, "loss": 0.4965, "step": 38661 }, { "epoch": 0.8199614006065619, "grad_norm": 0.38540300726890564, "learning_rate": 1.2809703185676922e-05, "loss": 0.5127, "step": 38662 }, { "epoch": 0.8199826090644949, "grad_norm": 0.35335102677345276, "learning_rate": 1.2809383123163102e-05, "loss": 0.518, "step": 38663 }, { "epoch": 0.8200038175224279, "grad_norm": 0.35941630601882935, "learning_rate": 1.280906305752471e-05, "loss": 0.4404, "step": 38664 }, { "epoch": 0.820025025980361, "grad_norm": 0.3501231372356415, "learning_rate": 1.2808742988762106e-05, "loss": 0.5396, "step": 38665 }, { "epoch": 0.8200462344382939, "grad_norm": 0.36806124448776245, "learning_rate": 1.280842291687564e-05, "loss": 0.4432, "step": 38666 }, { "epoch": 0.820067442896227, "grad_norm": 0.32130131125450134, "learning_rate": 1.2808102841865671e-05, "loss": 0.4108, "step": 38667 }, { "epoch": 0.8200886513541601, "grad_norm": 0.38314899802207947, "learning_rate": 1.2807782763732555e-05, "loss": 0.5179, "step": 38668 }, { "epoch": 0.8201098598120931, "grad_norm": 0.3392219543457031, "learning_rate": 1.2807462682476643e-05, "loss": 0.4889, "step": 38669 }, { "epoch": 0.8201310682700261, "grad_norm": 0.35017526149749756, "learning_rate": 1.2807142598098298e-05, "loss": 0.498, "step": 38670 }, { "epoch": 0.8201522767279591, "grad_norm": 0.5050255656242371, "learning_rate": 1.2806822510597875e-05, "loss": 0.525, "step": 38671 }, { "epoch": 0.8201734851858922, "grad_norm": 0.345328152179718, "learning_rate": 1.280650241997573e-05, "loss": 0.4579, "step": 38672 }, { "epoch": 0.8201946936438251, "grad_norm": 0.4114818274974823, "learning_rate": 1.2806182326232212e-05, "loss": 0.4798, "step": 38673 }, { "epoch": 0.8202159021017582, "grad_norm": 0.3505312204360962, "learning_rate": 1.2805862229367687e-05, "loss": 0.4629, "step": 38674 }, { "epoch": 0.8202371105596912, "grad_norm": 0.3529585003852844, "learning_rate": 1.2805542129382505e-05, "loss": 0.5005, "step": 38675 }, { "epoch": 0.8202583190176242, "grad_norm": 0.3671175241470337, "learning_rate": 1.2805222026277023e-05, "loss": 0.4222, "step": 38676 }, { "epoch": 0.8202795274755572, "grad_norm": 0.3969738483428955, "learning_rate": 1.2804901920051598e-05, "loss": 0.5035, "step": 38677 }, { "epoch": 0.8203007359334903, "grad_norm": 0.4332376718521118, "learning_rate": 1.2804581810706585e-05, "loss": 0.4224, "step": 38678 }, { "epoch": 0.8203219443914233, "grad_norm": 0.490824818611145, "learning_rate": 1.2804261698242339e-05, "loss": 0.511, "step": 38679 }, { "epoch": 0.8203431528493563, "grad_norm": 0.365024596452713, "learning_rate": 1.2803941582659221e-05, "loss": 0.4973, "step": 38680 }, { "epoch": 0.8203643613072893, "grad_norm": 0.36505910754203796, "learning_rate": 1.280362146395758e-05, "loss": 0.5242, "step": 38681 }, { "epoch": 0.8203855697652224, "grad_norm": 0.3824632465839386, "learning_rate": 1.2803301342137777e-05, "loss": 0.5547, "step": 38682 }, { "epoch": 0.8204067782231554, "grad_norm": 0.35261106491088867, "learning_rate": 1.2802981217200168e-05, "loss": 0.5185, "step": 38683 }, { "epoch": 0.8204279866810884, "grad_norm": 0.353314071893692, "learning_rate": 1.2802661089145105e-05, "loss": 0.3977, "step": 38684 }, { "epoch": 0.8204491951390215, "grad_norm": 0.36486515402793884, "learning_rate": 1.2802340957972949e-05, "loss": 0.5352, "step": 38685 }, { "epoch": 0.8204704035969544, "grad_norm": 0.5844183564186096, "learning_rate": 1.2802020823684054e-05, "loss": 0.5294, "step": 38686 }, { "epoch": 0.8204916120548875, "grad_norm": 0.33831143379211426, "learning_rate": 1.2801700686278774e-05, "loss": 0.4141, "step": 38687 }, { "epoch": 0.8205128205128205, "grad_norm": 0.3884650468826294, "learning_rate": 1.2801380545757468e-05, "loss": 0.4605, "step": 38688 }, { "epoch": 0.8205340289707536, "grad_norm": 0.3273554742336273, "learning_rate": 1.2801060402120488e-05, "loss": 0.4755, "step": 38689 }, { "epoch": 0.8205552374286865, "grad_norm": 0.3572606146335602, "learning_rate": 1.2800740255368194e-05, "loss": 0.4697, "step": 38690 }, { "epoch": 0.8205764458866196, "grad_norm": 0.3861182630062103, "learning_rate": 1.2800420105500944e-05, "loss": 0.5009, "step": 38691 }, { "epoch": 0.8205976543445526, "grad_norm": 0.33838343620300293, "learning_rate": 1.2800099952519087e-05, "loss": 0.4715, "step": 38692 }, { "epoch": 0.8206188628024856, "grad_norm": 0.34739190340042114, "learning_rate": 1.279977979642299e-05, "loss": 0.432, "step": 38693 }, { "epoch": 0.8206400712604186, "grad_norm": 0.3111729025840759, "learning_rate": 1.2799459637212996e-05, "loss": 0.4192, "step": 38694 }, { "epoch": 0.8206612797183517, "grad_norm": 0.42288661003112793, "learning_rate": 1.2799139474889468e-05, "loss": 0.4182, "step": 38695 }, { "epoch": 0.8206824881762848, "grad_norm": 0.38781729340553284, "learning_rate": 1.2798819309452761e-05, "loss": 0.5973, "step": 38696 }, { "epoch": 0.8207036966342177, "grad_norm": 0.33860257267951965, "learning_rate": 1.2798499140903236e-05, "loss": 0.4442, "step": 38697 }, { "epoch": 0.8207249050921508, "grad_norm": 0.3675377070903778, "learning_rate": 1.279817896924124e-05, "loss": 0.5673, "step": 38698 }, { "epoch": 0.8207461135500838, "grad_norm": 0.3534846007823944, "learning_rate": 1.2797858794467135e-05, "loss": 0.4548, "step": 38699 }, { "epoch": 0.8207673220080168, "grad_norm": 0.3843572437763214, "learning_rate": 1.2797538616581277e-05, "loss": 0.4878, "step": 38700 }, { "epoch": 0.8207885304659498, "grad_norm": 0.40071213245391846, "learning_rate": 1.2797218435584017e-05, "loss": 0.549, "step": 38701 }, { "epoch": 0.8208097389238829, "grad_norm": 0.36528337001800537, "learning_rate": 1.2796898251475717e-05, "loss": 0.5224, "step": 38702 }, { "epoch": 0.8208309473818158, "grad_norm": 0.35352158546447754, "learning_rate": 1.2796578064256734e-05, "loss": 0.5386, "step": 38703 }, { "epoch": 0.8208521558397489, "grad_norm": 0.42105308175086975, "learning_rate": 1.2796257873927418e-05, "loss": 0.541, "step": 38704 }, { "epoch": 0.8208733642976819, "grad_norm": 0.4179310202598572, "learning_rate": 1.2795937680488132e-05, "loss": 0.5869, "step": 38705 }, { "epoch": 0.820894572755615, "grad_norm": 0.37713974714279175, "learning_rate": 1.2795617483939221e-05, "loss": 0.47, "step": 38706 }, { "epoch": 0.8209157812135479, "grad_norm": 0.34628063440322876, "learning_rate": 1.2795297284281057e-05, "loss": 0.4516, "step": 38707 }, { "epoch": 0.820936989671481, "grad_norm": 0.5047913789749146, "learning_rate": 1.2794977081513981e-05, "loss": 0.5161, "step": 38708 }, { "epoch": 0.8209581981294141, "grad_norm": 0.41460585594177246, "learning_rate": 1.279465687563836e-05, "loss": 0.5461, "step": 38709 }, { "epoch": 0.820979406587347, "grad_norm": 0.38507434725761414, "learning_rate": 1.2794336666654542e-05, "loss": 0.48, "step": 38710 }, { "epoch": 0.8210006150452801, "grad_norm": 0.38992780447006226, "learning_rate": 1.2794016454562893e-05, "loss": 0.5058, "step": 38711 }, { "epoch": 0.8210218235032131, "grad_norm": 0.38698017597198486, "learning_rate": 1.2793696239363757e-05, "loss": 0.5306, "step": 38712 }, { "epoch": 0.8210430319611461, "grad_norm": 0.35280725359916687, "learning_rate": 1.2793376021057501e-05, "loss": 0.445, "step": 38713 }, { "epoch": 0.8210642404190791, "grad_norm": 0.35638999938964844, "learning_rate": 1.2793055799644474e-05, "loss": 0.4589, "step": 38714 }, { "epoch": 0.8210854488770122, "grad_norm": 0.3781723380088806, "learning_rate": 1.2792735575125033e-05, "loss": 0.5402, "step": 38715 }, { "epoch": 0.8211066573349451, "grad_norm": 0.8182826042175293, "learning_rate": 1.2792415347499538e-05, "loss": 0.5039, "step": 38716 }, { "epoch": 0.8211278657928782, "grad_norm": 0.3343997001647949, "learning_rate": 1.2792095116768344e-05, "loss": 0.4168, "step": 38717 }, { "epoch": 0.8211490742508112, "grad_norm": 0.4981165826320648, "learning_rate": 1.2791774882931805e-05, "loss": 0.4466, "step": 38718 }, { "epoch": 0.8211702827087443, "grad_norm": 0.4147970378398895, "learning_rate": 1.2791454645990277e-05, "loss": 0.4608, "step": 38719 }, { "epoch": 0.8211914911666772, "grad_norm": 0.3940674662590027, "learning_rate": 1.279113440594412e-05, "loss": 0.5458, "step": 38720 }, { "epoch": 0.8212126996246103, "grad_norm": 0.39346426725387573, "learning_rate": 1.2790814162793682e-05, "loss": 0.5189, "step": 38721 }, { "epoch": 0.8212339080825434, "grad_norm": 0.3432096540927887, "learning_rate": 1.279049391653933e-05, "loss": 0.4433, "step": 38722 }, { "epoch": 0.8212551165404763, "grad_norm": 0.33414512872695923, "learning_rate": 1.2790173667181416e-05, "loss": 0.4109, "step": 38723 }, { "epoch": 0.8212763249984094, "grad_norm": 0.38405993580818176, "learning_rate": 1.278985341472029e-05, "loss": 0.5199, "step": 38724 }, { "epoch": 0.8212975334563424, "grad_norm": 0.3701052963733673, "learning_rate": 1.2789533159156317e-05, "loss": 0.4871, "step": 38725 }, { "epoch": 0.8213187419142755, "grad_norm": 0.3195364773273468, "learning_rate": 1.278921290048985e-05, "loss": 0.4196, "step": 38726 }, { "epoch": 0.8213399503722084, "grad_norm": 0.40063878893852234, "learning_rate": 1.278889263872124e-05, "loss": 0.5671, "step": 38727 }, { "epoch": 0.8213611588301415, "grad_norm": 0.3723316192626953, "learning_rate": 1.278857237385085e-05, "loss": 0.517, "step": 38728 }, { "epoch": 0.8213823672880745, "grad_norm": 0.42934656143188477, "learning_rate": 1.2788252105879035e-05, "loss": 0.4874, "step": 38729 }, { "epoch": 0.8214035757460075, "grad_norm": 0.362816721200943, "learning_rate": 1.2787931834806152e-05, "loss": 0.4589, "step": 38730 }, { "epoch": 0.8214247842039405, "grad_norm": 0.3895696997642517, "learning_rate": 1.2787611560632556e-05, "loss": 0.4561, "step": 38731 }, { "epoch": 0.8214459926618736, "grad_norm": 0.3620222806930542, "learning_rate": 1.2787291283358598e-05, "loss": 0.4146, "step": 38732 }, { "epoch": 0.8214672011198065, "grad_norm": 0.36731061339378357, "learning_rate": 1.2786971002984641e-05, "loss": 0.4988, "step": 38733 }, { "epoch": 0.8214884095777396, "grad_norm": 0.40007856488227844, "learning_rate": 1.278665071951104e-05, "loss": 0.4694, "step": 38734 }, { "epoch": 0.8215096180356726, "grad_norm": 0.36966007947921753, "learning_rate": 1.2786330432938148e-05, "loss": 0.5973, "step": 38735 }, { "epoch": 0.8215308264936056, "grad_norm": 0.3432927131652832, "learning_rate": 1.2786010143266326e-05, "loss": 0.5095, "step": 38736 }, { "epoch": 0.8215520349515387, "grad_norm": 0.38264337182044983, "learning_rate": 1.2785689850495926e-05, "loss": 0.4856, "step": 38737 }, { "epoch": 0.8215732434094717, "grad_norm": 0.7291140556335449, "learning_rate": 1.2785369554627307e-05, "loss": 0.4217, "step": 38738 }, { "epoch": 0.8215944518674048, "grad_norm": 0.362407386302948, "learning_rate": 1.2785049255660825e-05, "loss": 0.5349, "step": 38739 }, { "epoch": 0.8216156603253377, "grad_norm": 0.33792003989219666, "learning_rate": 1.2784728953596832e-05, "loss": 0.5303, "step": 38740 }, { "epoch": 0.8216368687832708, "grad_norm": 0.3272463083267212, "learning_rate": 1.278440864843569e-05, "loss": 0.4456, "step": 38741 }, { "epoch": 0.8216580772412038, "grad_norm": 0.3922710120677948, "learning_rate": 1.2784088340177751e-05, "loss": 0.5425, "step": 38742 }, { "epoch": 0.8216792856991368, "grad_norm": 0.3722062408924103, "learning_rate": 1.2783768028823373e-05, "loss": 0.4441, "step": 38743 }, { "epoch": 0.8217004941570698, "grad_norm": 0.34261512756347656, "learning_rate": 1.2783447714372916e-05, "loss": 0.4583, "step": 38744 }, { "epoch": 0.8217217026150029, "grad_norm": 0.36366787552833557, "learning_rate": 1.2783127396826733e-05, "loss": 0.4907, "step": 38745 }, { "epoch": 0.8217429110729358, "grad_norm": 0.46891769766807556, "learning_rate": 1.2782807076185174e-05, "loss": 0.5609, "step": 38746 }, { "epoch": 0.8217641195308689, "grad_norm": 0.446923166513443, "learning_rate": 1.2782486752448607e-05, "loss": 0.5469, "step": 38747 }, { "epoch": 0.8217853279888019, "grad_norm": 0.3234313428401947, "learning_rate": 1.2782166425617381e-05, "loss": 0.5655, "step": 38748 }, { "epoch": 0.821806536446735, "grad_norm": 0.3852202594280243, "learning_rate": 1.278184609569185e-05, "loss": 0.5828, "step": 38749 }, { "epoch": 0.821827744904668, "grad_norm": 0.36055970191955566, "learning_rate": 1.278152576267238e-05, "loss": 0.5023, "step": 38750 }, { "epoch": 0.821848953362601, "grad_norm": 0.377153605222702, "learning_rate": 1.2781205426559318e-05, "loss": 0.4183, "step": 38751 }, { "epoch": 0.8218701618205341, "grad_norm": 0.4679242670536041, "learning_rate": 1.2780885087353023e-05, "loss": 0.4774, "step": 38752 }, { "epoch": 0.821891370278467, "grad_norm": 0.409263014793396, "learning_rate": 1.2780564745053852e-05, "loss": 0.4735, "step": 38753 }, { "epoch": 0.8219125787364001, "grad_norm": 0.4069773852825165, "learning_rate": 1.2780244399662161e-05, "loss": 0.4513, "step": 38754 }, { "epoch": 0.8219337871943331, "grad_norm": 0.3840380012989044, "learning_rate": 1.2779924051178305e-05, "loss": 0.54, "step": 38755 }, { "epoch": 0.8219549956522662, "grad_norm": 0.3219592273235321, "learning_rate": 1.2779603699602645e-05, "loss": 0.4441, "step": 38756 }, { "epoch": 0.8219762041101991, "grad_norm": 0.35955360531806946, "learning_rate": 1.2779283344935531e-05, "loss": 0.5369, "step": 38757 }, { "epoch": 0.8219974125681322, "grad_norm": 0.3753091096878052, "learning_rate": 1.2778962987177325e-05, "loss": 0.5046, "step": 38758 }, { "epoch": 0.8220186210260652, "grad_norm": 0.3595693111419678, "learning_rate": 1.2778642626328377e-05, "loss": 0.4531, "step": 38759 }, { "epoch": 0.8220398294839982, "grad_norm": 0.36775681376457214, "learning_rate": 1.277832226238905e-05, "loss": 0.5287, "step": 38760 }, { "epoch": 0.8220610379419312, "grad_norm": 0.35972779989242554, "learning_rate": 1.2778001895359695e-05, "loss": 0.4087, "step": 38761 }, { "epoch": 0.8220822463998643, "grad_norm": 0.3530289828777313, "learning_rate": 1.2777681525240673e-05, "loss": 0.4508, "step": 38762 }, { "epoch": 0.8221034548577973, "grad_norm": 0.5560362339019775, "learning_rate": 1.2777361152032335e-05, "loss": 0.4494, "step": 38763 }, { "epoch": 0.8221246633157303, "grad_norm": 0.3603828251361847, "learning_rate": 1.277704077573504e-05, "loss": 0.4669, "step": 38764 }, { "epoch": 0.8221458717736634, "grad_norm": 0.38200443983078003, "learning_rate": 1.2776720396349144e-05, "loss": 0.5744, "step": 38765 }, { "epoch": 0.8221670802315963, "grad_norm": 0.34813547134399414, "learning_rate": 1.2776400013875006e-05, "loss": 0.5073, "step": 38766 }, { "epoch": 0.8221882886895294, "grad_norm": 0.32358938455581665, "learning_rate": 1.2776079628312981e-05, "loss": 0.3854, "step": 38767 }, { "epoch": 0.8222094971474624, "grad_norm": 0.33515554666519165, "learning_rate": 1.2775759239663424e-05, "loss": 0.4725, "step": 38768 }, { "epoch": 0.8222307056053955, "grad_norm": 0.4283331632614136, "learning_rate": 1.2775438847926686e-05, "loss": 0.4999, "step": 38769 }, { "epoch": 0.8222519140633284, "grad_norm": 0.41457077860832214, "learning_rate": 1.2775118453103136e-05, "loss": 0.3889, "step": 38770 }, { "epoch": 0.8222731225212615, "grad_norm": 0.3656286597251892, "learning_rate": 1.2774798055193119e-05, "loss": 0.551, "step": 38771 }, { "epoch": 0.8222943309791945, "grad_norm": 0.4058530926704407, "learning_rate": 1.2774477654196998e-05, "loss": 0.5258, "step": 38772 }, { "epoch": 0.8223155394371275, "grad_norm": 0.3497067093849182, "learning_rate": 1.2774157250115127e-05, "loss": 0.511, "step": 38773 }, { "epoch": 0.8223367478950605, "grad_norm": 0.3209187984466553, "learning_rate": 1.277383684294786e-05, "loss": 0.4919, "step": 38774 }, { "epoch": 0.8223579563529936, "grad_norm": 0.4544561505317688, "learning_rate": 1.2773516432695559e-05, "loss": 0.4049, "step": 38775 }, { "epoch": 0.8223791648109265, "grad_norm": 0.4918535649776459, "learning_rate": 1.2773196019358576e-05, "loss": 0.5143, "step": 38776 }, { "epoch": 0.8224003732688596, "grad_norm": 0.3247448205947876, "learning_rate": 1.2772875602937267e-05, "loss": 0.5027, "step": 38777 }, { "epoch": 0.8224215817267927, "grad_norm": 0.34645408391952515, "learning_rate": 1.277255518343199e-05, "loss": 0.4577, "step": 38778 }, { "epoch": 0.8224427901847257, "grad_norm": 0.37090227007865906, "learning_rate": 1.2772234760843104e-05, "loss": 0.6313, "step": 38779 }, { "epoch": 0.8224639986426587, "grad_norm": 0.41584789752960205, "learning_rate": 1.2771914335170958e-05, "loss": 0.557, "step": 38780 }, { "epoch": 0.8224852071005917, "grad_norm": 0.605909526348114, "learning_rate": 1.2771593906415917e-05, "loss": 0.5453, "step": 38781 }, { "epoch": 0.8225064155585248, "grad_norm": 0.37800681591033936, "learning_rate": 1.2771273474578334e-05, "loss": 0.5085, "step": 38782 }, { "epoch": 0.8225276240164577, "grad_norm": 0.3610745072364807, "learning_rate": 1.2770953039658562e-05, "loss": 0.4836, "step": 38783 }, { "epoch": 0.8225488324743908, "grad_norm": 0.30834513902664185, "learning_rate": 1.2770632601656962e-05, "loss": 0.4932, "step": 38784 }, { "epoch": 0.8225700409323238, "grad_norm": 0.323457270860672, "learning_rate": 1.2770312160573886e-05, "loss": 0.4538, "step": 38785 }, { "epoch": 0.8225912493902569, "grad_norm": 0.3425799608230591, "learning_rate": 1.2769991716409693e-05, "loss": 0.5003, "step": 38786 }, { "epoch": 0.8226124578481898, "grad_norm": 0.3645951449871063, "learning_rate": 1.2769671269164741e-05, "loss": 0.46, "step": 38787 }, { "epoch": 0.8226336663061229, "grad_norm": 0.3491220772266388, "learning_rate": 1.2769350818839383e-05, "loss": 0.4661, "step": 38788 }, { "epoch": 0.8226548747640559, "grad_norm": 0.34484705328941345, "learning_rate": 1.276903036543398e-05, "loss": 0.5443, "step": 38789 }, { "epoch": 0.8226760832219889, "grad_norm": 0.38202694058418274, "learning_rate": 1.2768709908948884e-05, "loss": 0.4685, "step": 38790 }, { "epoch": 0.822697291679922, "grad_norm": 0.4076501429080963, "learning_rate": 1.2768389449384451e-05, "loss": 0.5338, "step": 38791 }, { "epoch": 0.822718500137855, "grad_norm": 0.3519912362098694, "learning_rate": 1.276806898674104e-05, "loss": 0.4588, "step": 38792 }, { "epoch": 0.822739708595788, "grad_norm": 0.3767208456993103, "learning_rate": 1.276774852101901e-05, "loss": 0.5236, "step": 38793 }, { "epoch": 0.822760917053721, "grad_norm": 0.3807515501976013, "learning_rate": 1.2767428052218711e-05, "loss": 0.5477, "step": 38794 }, { "epoch": 0.8227821255116541, "grad_norm": 0.33692893385887146, "learning_rate": 1.2767107580340503e-05, "loss": 0.4618, "step": 38795 }, { "epoch": 0.822803333969587, "grad_norm": 0.37937960028648376, "learning_rate": 1.2766787105384743e-05, "loss": 0.4335, "step": 38796 }, { "epoch": 0.8228245424275201, "grad_norm": 0.34844332933425903, "learning_rate": 1.2766466627351785e-05, "loss": 0.4422, "step": 38797 }, { "epoch": 0.8228457508854531, "grad_norm": 0.3776904046535492, "learning_rate": 1.2766146146241987e-05, "loss": 0.5151, "step": 38798 }, { "epoch": 0.8228669593433862, "grad_norm": 0.35767504572868347, "learning_rate": 1.2765825662055706e-05, "loss": 0.5074, "step": 38799 }, { "epoch": 0.8228881678013191, "grad_norm": 0.32275015115737915, "learning_rate": 1.2765505174793298e-05, "loss": 0.4936, "step": 38800 }, { "epoch": 0.8229093762592522, "grad_norm": 0.3767460584640503, "learning_rate": 1.2765184684455121e-05, "loss": 0.5288, "step": 38801 }, { "epoch": 0.8229305847171852, "grad_norm": 0.36578118801116943, "learning_rate": 1.2764864191041526e-05, "loss": 0.4273, "step": 38802 }, { "epoch": 0.8229517931751182, "grad_norm": 0.35762542486190796, "learning_rate": 1.2764543694552875e-05, "loss": 0.5251, "step": 38803 }, { "epoch": 0.8229730016330513, "grad_norm": 0.3740167021751404, "learning_rate": 1.276422319498952e-05, "loss": 0.4247, "step": 38804 }, { "epoch": 0.8229942100909843, "grad_norm": 0.34761664271354675, "learning_rate": 1.2763902692351821e-05, "loss": 0.4609, "step": 38805 }, { "epoch": 0.8230154185489174, "grad_norm": 0.33308500051498413, "learning_rate": 1.2763582186640135e-05, "loss": 0.4793, "step": 38806 }, { "epoch": 0.8230366270068503, "grad_norm": 0.4669354557991028, "learning_rate": 1.2763261677854816e-05, "loss": 0.5637, "step": 38807 }, { "epoch": 0.8230578354647834, "grad_norm": 0.350696325302124, "learning_rate": 1.2762941165996221e-05, "loss": 0.4228, "step": 38808 }, { "epoch": 0.8230790439227164, "grad_norm": 0.34607282280921936, "learning_rate": 1.2762620651064709e-05, "loss": 0.4522, "step": 38809 }, { "epoch": 0.8231002523806494, "grad_norm": 0.33218812942504883, "learning_rate": 1.2762300133060632e-05, "loss": 0.417, "step": 38810 }, { "epoch": 0.8231214608385824, "grad_norm": 0.48782622814178467, "learning_rate": 1.276197961198435e-05, "loss": 0.4808, "step": 38811 }, { "epoch": 0.8231426692965155, "grad_norm": 0.4093116521835327, "learning_rate": 1.2761659087836219e-05, "loss": 0.5691, "step": 38812 }, { "epoch": 0.8231638777544484, "grad_norm": 0.3746848404407501, "learning_rate": 1.2761338560616594e-05, "loss": 0.463, "step": 38813 }, { "epoch": 0.8231850862123815, "grad_norm": 0.3670223653316498, "learning_rate": 1.2761018030325829e-05, "loss": 0.4796, "step": 38814 }, { "epoch": 0.8232062946703145, "grad_norm": 0.49343141913414, "learning_rate": 1.276069749696429e-05, "loss": 0.5032, "step": 38815 }, { "epoch": 0.8232275031282476, "grad_norm": 0.40580347180366516, "learning_rate": 1.2760376960532321e-05, "loss": 0.4813, "step": 38816 }, { "epoch": 0.8232487115861805, "grad_norm": 0.34309279918670654, "learning_rate": 1.2760056421030288e-05, "loss": 0.5556, "step": 38817 }, { "epoch": 0.8232699200441136, "grad_norm": 0.3878170847892761, "learning_rate": 1.2759735878458544e-05, "loss": 0.4743, "step": 38818 }, { "epoch": 0.8232911285020467, "grad_norm": 0.34105026721954346, "learning_rate": 1.2759415332817444e-05, "loss": 0.5091, "step": 38819 }, { "epoch": 0.8233123369599796, "grad_norm": 0.340573251247406, "learning_rate": 1.2759094784107349e-05, "loss": 0.5462, "step": 38820 }, { "epoch": 0.8233335454179127, "grad_norm": 0.4228523373603821, "learning_rate": 1.2758774232328613e-05, "loss": 0.4876, "step": 38821 }, { "epoch": 0.8233547538758457, "grad_norm": 0.3600577414035797, "learning_rate": 1.2758453677481591e-05, "loss": 0.4984, "step": 38822 }, { "epoch": 0.8233759623337787, "grad_norm": 0.34809431433677673, "learning_rate": 1.2758133119566638e-05, "loss": 0.4296, "step": 38823 }, { "epoch": 0.8233971707917117, "grad_norm": 0.37973350286483765, "learning_rate": 1.2757812558584117e-05, "loss": 0.506, "step": 38824 }, { "epoch": 0.8234183792496448, "grad_norm": 0.3772706687450409, "learning_rate": 1.2757491994534378e-05, "loss": 0.5382, "step": 38825 }, { "epoch": 0.8234395877075777, "grad_norm": 0.37295016646385193, "learning_rate": 1.2757171427417783e-05, "loss": 0.5414, "step": 38826 }, { "epoch": 0.8234607961655108, "grad_norm": 0.36167433857917786, "learning_rate": 1.2756850857234686e-05, "loss": 0.4618, "step": 38827 }, { "epoch": 0.8234820046234438, "grad_norm": 0.3213535249233246, "learning_rate": 1.2756530283985442e-05, "loss": 0.5142, "step": 38828 }, { "epoch": 0.8235032130813769, "grad_norm": 0.31845077872276306, "learning_rate": 1.2756209707670411e-05, "loss": 0.4491, "step": 38829 }, { "epoch": 0.8235244215393098, "grad_norm": 0.4615049958229065, "learning_rate": 1.2755889128289944e-05, "loss": 0.4951, "step": 38830 }, { "epoch": 0.8235456299972429, "grad_norm": 0.39700010418891907, "learning_rate": 1.27555685458444e-05, "loss": 0.5139, "step": 38831 }, { "epoch": 0.823566838455176, "grad_norm": 0.46192243695259094, "learning_rate": 1.2755247960334142e-05, "loss": 0.4218, "step": 38832 }, { "epoch": 0.8235880469131089, "grad_norm": 0.3497251272201538, "learning_rate": 1.2754927371759517e-05, "loss": 0.4759, "step": 38833 }, { "epoch": 0.823609255371042, "grad_norm": 0.36616209149360657, "learning_rate": 1.2754606780120888e-05, "loss": 0.4992, "step": 38834 }, { "epoch": 0.823630463828975, "grad_norm": 0.36563462018966675, "learning_rate": 1.2754286185418611e-05, "loss": 0.4697, "step": 38835 }, { "epoch": 0.823651672286908, "grad_norm": 0.4456784129142761, "learning_rate": 1.2753965587653037e-05, "loss": 0.5128, "step": 38836 }, { "epoch": 0.823672880744841, "grad_norm": 0.35056644678115845, "learning_rate": 1.2753644986824527e-05, "loss": 0.4547, "step": 38837 }, { "epoch": 0.8236940892027741, "grad_norm": 0.4160816967487335, "learning_rate": 1.2753324382933439e-05, "loss": 0.5398, "step": 38838 }, { "epoch": 0.8237152976607071, "grad_norm": 0.32461896538734436, "learning_rate": 1.2753003775980123e-05, "loss": 0.3863, "step": 38839 }, { "epoch": 0.8237365061186401, "grad_norm": 0.3425544798374176, "learning_rate": 1.2752683165964945e-05, "loss": 0.4767, "step": 38840 }, { "epoch": 0.8237577145765731, "grad_norm": 0.41392213106155396, "learning_rate": 1.2752362552888257e-05, "loss": 0.4967, "step": 38841 }, { "epoch": 0.8237789230345062, "grad_norm": 0.37436097860336304, "learning_rate": 1.275204193675041e-05, "loss": 0.4967, "step": 38842 }, { "epoch": 0.8238001314924391, "grad_norm": 0.42075857520103455, "learning_rate": 1.275172131755177e-05, "loss": 0.4545, "step": 38843 }, { "epoch": 0.8238213399503722, "grad_norm": 0.46251380443573, "learning_rate": 1.275140069529269e-05, "loss": 0.4409, "step": 38844 }, { "epoch": 0.8238425484083053, "grad_norm": 0.3770029842853546, "learning_rate": 1.2751080069973525e-05, "loss": 0.497, "step": 38845 }, { "epoch": 0.8238637568662383, "grad_norm": 0.43714797496795654, "learning_rate": 1.2750759441594633e-05, "loss": 0.4938, "step": 38846 }, { "epoch": 0.8238849653241713, "grad_norm": 0.37581726908683777, "learning_rate": 1.275043881015637e-05, "loss": 0.5376, "step": 38847 }, { "epoch": 0.8239061737821043, "grad_norm": 0.369209349155426, "learning_rate": 1.2750118175659091e-05, "loss": 0.5343, "step": 38848 }, { "epoch": 0.8239273822400374, "grad_norm": 0.3286208212375641, "learning_rate": 1.2749797538103156e-05, "loss": 0.4505, "step": 38849 }, { "epoch": 0.8239485906979703, "grad_norm": 0.3857506811618805, "learning_rate": 1.2749476897488921e-05, "loss": 0.4758, "step": 38850 }, { "epoch": 0.8239697991559034, "grad_norm": 0.31762462854385376, "learning_rate": 1.2749156253816739e-05, "loss": 0.4508, "step": 38851 }, { "epoch": 0.8239910076138364, "grad_norm": 0.3576434850692749, "learning_rate": 1.2748835607086973e-05, "loss": 0.4826, "step": 38852 }, { "epoch": 0.8240122160717694, "grad_norm": 0.3481867015361786, "learning_rate": 1.2748514957299974e-05, "loss": 0.4873, "step": 38853 }, { "epoch": 0.8240334245297024, "grad_norm": 0.3657025992870331, "learning_rate": 1.27481943044561e-05, "loss": 0.65, "step": 38854 }, { "epoch": 0.8240546329876355, "grad_norm": 0.40685582160949707, "learning_rate": 1.2747873648555706e-05, "loss": 0.5504, "step": 38855 }, { "epoch": 0.8240758414455684, "grad_norm": 0.41937223076820374, "learning_rate": 1.2747552989599155e-05, "loss": 0.4954, "step": 38856 }, { "epoch": 0.8240970499035015, "grad_norm": 0.4169568717479706, "learning_rate": 1.2747232327586799e-05, "loss": 0.5113, "step": 38857 }, { "epoch": 0.8241182583614345, "grad_norm": 0.4525581896305084, "learning_rate": 1.2746911662518993e-05, "loss": 0.4529, "step": 38858 }, { "epoch": 0.8241394668193676, "grad_norm": 0.42013347148895264, "learning_rate": 1.2746590994396097e-05, "loss": 0.5145, "step": 38859 }, { "epoch": 0.8241606752773006, "grad_norm": 0.39722880721092224, "learning_rate": 1.2746270323218465e-05, "loss": 0.4459, "step": 38860 }, { "epoch": 0.8241818837352336, "grad_norm": 0.3945464789867401, "learning_rate": 1.2745949648986456e-05, "loss": 0.4435, "step": 38861 }, { "epoch": 0.8242030921931667, "grad_norm": 0.48770037293434143, "learning_rate": 1.2745628971700426e-05, "loss": 0.4837, "step": 38862 }, { "epoch": 0.8242243006510996, "grad_norm": 0.3481595516204834, "learning_rate": 1.2745308291360731e-05, "loss": 0.5448, "step": 38863 }, { "epoch": 0.8242455091090327, "grad_norm": 0.4683493375778198, "learning_rate": 1.2744987607967729e-05, "loss": 0.5468, "step": 38864 }, { "epoch": 0.8242667175669657, "grad_norm": 0.39437487721443176, "learning_rate": 1.2744666921521774e-05, "loss": 0.5288, "step": 38865 }, { "epoch": 0.8242879260248988, "grad_norm": 0.6143232583999634, "learning_rate": 1.2744346232023226e-05, "loss": 0.4692, "step": 38866 }, { "epoch": 0.8243091344828317, "grad_norm": 0.38306352496147156, "learning_rate": 1.2744025539472438e-05, "loss": 0.5607, "step": 38867 }, { "epoch": 0.8243303429407648, "grad_norm": 0.34894484281539917, "learning_rate": 1.2743704843869766e-05, "loss": 0.4988, "step": 38868 }, { "epoch": 0.8243515513986978, "grad_norm": 0.34699946641921997, "learning_rate": 1.2743384145215576e-05, "loss": 0.547, "step": 38869 }, { "epoch": 0.8243727598566308, "grad_norm": 0.35053348541259766, "learning_rate": 1.2743063443510212e-05, "loss": 0.4523, "step": 38870 }, { "epoch": 0.8243939683145638, "grad_norm": 0.3964332342147827, "learning_rate": 1.2742742738754041e-05, "loss": 0.4611, "step": 38871 }, { "epoch": 0.8244151767724969, "grad_norm": 0.5262165665626526, "learning_rate": 1.2742422030947416e-05, "loss": 0.5196, "step": 38872 }, { "epoch": 0.82443638523043, "grad_norm": 0.43961775302886963, "learning_rate": 1.274210132009069e-05, "loss": 0.4094, "step": 38873 }, { "epoch": 0.8244575936883629, "grad_norm": 0.410918653011322, "learning_rate": 1.2741780606184224e-05, "loss": 0.5701, "step": 38874 }, { "epoch": 0.824478802146296, "grad_norm": 0.36735355854034424, "learning_rate": 1.274145988922837e-05, "loss": 0.4572, "step": 38875 }, { "epoch": 0.824500010604229, "grad_norm": 0.40237680077552795, "learning_rate": 1.274113916922349e-05, "loss": 0.5183, "step": 38876 }, { "epoch": 0.824521219062162, "grad_norm": 0.456000953912735, "learning_rate": 1.2740818446169942e-05, "loss": 0.4431, "step": 38877 }, { "epoch": 0.824542427520095, "grad_norm": 0.39573341608047485, "learning_rate": 1.274049772006808e-05, "loss": 0.4487, "step": 38878 }, { "epoch": 0.8245636359780281, "grad_norm": 0.3929363191127777, "learning_rate": 1.2740176990918256e-05, "loss": 0.5163, "step": 38879 }, { "epoch": 0.824584844435961, "grad_norm": 0.35838794708251953, "learning_rate": 1.2739856258720833e-05, "loss": 0.4781, "step": 38880 }, { "epoch": 0.8246060528938941, "grad_norm": 0.5491182804107666, "learning_rate": 1.2739535523476165e-05, "loss": 0.5461, "step": 38881 }, { "epoch": 0.8246272613518271, "grad_norm": 0.3166046440601349, "learning_rate": 1.2739214785184609e-05, "loss": 0.4802, "step": 38882 }, { "epoch": 0.8246484698097601, "grad_norm": 0.3620378077030182, "learning_rate": 1.2738894043846523e-05, "loss": 0.5545, "step": 38883 }, { "epoch": 0.8246696782676931, "grad_norm": 0.34221306443214417, "learning_rate": 1.2738573299462262e-05, "loss": 0.4583, "step": 38884 }, { "epoch": 0.8246908867256262, "grad_norm": 0.36767643690109253, "learning_rate": 1.2738252552032187e-05, "loss": 0.455, "step": 38885 }, { "epoch": 0.8247120951835593, "grad_norm": 0.9146367907524109, "learning_rate": 1.2737931801556646e-05, "loss": 0.4678, "step": 38886 }, { "epoch": 0.8247333036414922, "grad_norm": 0.38942423462867737, "learning_rate": 1.2737611048036002e-05, "loss": 0.4971, "step": 38887 }, { "epoch": 0.8247545120994253, "grad_norm": 0.37072476744651794, "learning_rate": 1.2737290291470614e-05, "loss": 0.5292, "step": 38888 }, { "epoch": 0.8247757205573583, "grad_norm": 0.3480997085571289, "learning_rate": 1.2736969531860834e-05, "loss": 0.4823, "step": 38889 }, { "epoch": 0.8247969290152913, "grad_norm": 0.38635414838790894, "learning_rate": 1.273664876920702e-05, "loss": 0.5534, "step": 38890 }, { "epoch": 0.8248181374732243, "grad_norm": 0.3432767689228058, "learning_rate": 1.273632800350953e-05, "loss": 0.4646, "step": 38891 }, { "epoch": 0.8248393459311574, "grad_norm": 0.41122645139694214, "learning_rate": 1.2736007234768719e-05, "loss": 0.511, "step": 38892 }, { "epoch": 0.8248605543890903, "grad_norm": 0.3736560046672821, "learning_rate": 1.2735686462984944e-05, "loss": 0.4719, "step": 38893 }, { "epoch": 0.8248817628470234, "grad_norm": 0.361197292804718, "learning_rate": 1.2735365688158563e-05, "loss": 0.6049, "step": 38894 }, { "epoch": 0.8249029713049564, "grad_norm": 0.3635263442993164, "learning_rate": 1.2735044910289931e-05, "loss": 0.4174, "step": 38895 }, { "epoch": 0.8249241797628895, "grad_norm": 0.32292476296424866, "learning_rate": 1.2734724129379405e-05, "loss": 0.3801, "step": 38896 }, { "epoch": 0.8249453882208224, "grad_norm": 0.44931095838546753, "learning_rate": 1.2734403345427347e-05, "loss": 0.5269, "step": 38897 }, { "epoch": 0.8249665966787555, "grad_norm": 0.34814882278442383, "learning_rate": 1.2734082558434104e-05, "loss": 0.5102, "step": 38898 }, { "epoch": 0.8249878051366885, "grad_norm": 0.32074686884880066, "learning_rate": 1.2733761768400043e-05, "loss": 0.4764, "step": 38899 }, { "epoch": 0.8250090135946215, "grad_norm": 0.3064746558666229, "learning_rate": 1.2733440975325513e-05, "loss": 0.4149, "step": 38900 }, { "epoch": 0.8250302220525546, "grad_norm": 0.3542942404747009, "learning_rate": 1.2733120179210872e-05, "loss": 0.4772, "step": 38901 }, { "epoch": 0.8250514305104876, "grad_norm": 0.35064613819122314, "learning_rate": 1.2732799380056482e-05, "loss": 0.4803, "step": 38902 }, { "epoch": 0.8250726389684206, "grad_norm": 0.43662381172180176, "learning_rate": 1.2732478577862697e-05, "loss": 0.4621, "step": 38903 }, { "epoch": 0.8250938474263536, "grad_norm": 0.38465502858161926, "learning_rate": 1.273215777262987e-05, "loss": 0.478, "step": 38904 }, { "epoch": 0.8251150558842867, "grad_norm": 0.36274442076683044, "learning_rate": 1.2731836964358361e-05, "loss": 0.4915, "step": 38905 }, { "epoch": 0.8251362643422196, "grad_norm": 0.36628231406211853, "learning_rate": 1.2731516153048529e-05, "loss": 0.4584, "step": 38906 }, { "epoch": 0.8251574728001527, "grad_norm": 0.3459409177303314, "learning_rate": 1.2731195338700724e-05, "loss": 0.383, "step": 38907 }, { "epoch": 0.8251786812580857, "grad_norm": 0.4245433807373047, "learning_rate": 1.273087452131531e-05, "loss": 0.4798, "step": 38908 }, { "epoch": 0.8251998897160188, "grad_norm": 0.33132725954055786, "learning_rate": 1.2730553700892645e-05, "loss": 0.4317, "step": 38909 }, { "epoch": 0.8252210981739517, "grad_norm": 0.37812018394470215, "learning_rate": 1.2730232877433075e-05, "loss": 0.4742, "step": 38910 }, { "epoch": 0.8252423066318848, "grad_norm": 0.47438669204711914, "learning_rate": 1.272991205093697e-05, "loss": 0.4651, "step": 38911 }, { "epoch": 0.8252635150898178, "grad_norm": 0.3788776695728302, "learning_rate": 1.2729591221404676e-05, "loss": 0.4949, "step": 38912 }, { "epoch": 0.8252847235477508, "grad_norm": 0.3432179093360901, "learning_rate": 1.2729270388836554e-05, "loss": 0.4176, "step": 38913 }, { "epoch": 0.8253059320056839, "grad_norm": 0.350085973739624, "learning_rate": 1.2728949553232966e-05, "loss": 0.4411, "step": 38914 }, { "epoch": 0.8253271404636169, "grad_norm": 0.3148127794265747, "learning_rate": 1.2728628714594257e-05, "loss": 0.4296, "step": 38915 }, { "epoch": 0.82534834892155, "grad_norm": 0.4205676317214966, "learning_rate": 1.2728307872920797e-05, "loss": 0.4885, "step": 38916 }, { "epoch": 0.8253695573794829, "grad_norm": 0.37657099962234497, "learning_rate": 1.2727987028212936e-05, "loss": 0.4436, "step": 38917 }, { "epoch": 0.825390765837416, "grad_norm": 0.377208411693573, "learning_rate": 1.2727666180471027e-05, "loss": 0.4308, "step": 38918 }, { "epoch": 0.825411974295349, "grad_norm": 0.3771553337574005, "learning_rate": 1.2727345329695436e-05, "loss": 0.5003, "step": 38919 }, { "epoch": 0.825433182753282, "grad_norm": 0.35136914253234863, "learning_rate": 1.2727024475886514e-05, "loss": 0.535, "step": 38920 }, { "epoch": 0.825454391211215, "grad_norm": 0.5053922533988953, "learning_rate": 1.2726703619044618e-05, "loss": 0.5151, "step": 38921 }, { "epoch": 0.8254755996691481, "grad_norm": 0.366379976272583, "learning_rate": 1.2726382759170108e-05, "loss": 0.483, "step": 38922 }, { "epoch": 0.825496808127081, "grad_norm": 0.4051613211631775, "learning_rate": 1.2726061896263337e-05, "loss": 0.4991, "step": 38923 }, { "epoch": 0.8255180165850141, "grad_norm": 0.382159948348999, "learning_rate": 1.2725741030324663e-05, "loss": 0.4598, "step": 38924 }, { "epoch": 0.8255392250429471, "grad_norm": 0.6654980182647705, "learning_rate": 1.2725420161354447e-05, "loss": 0.3674, "step": 38925 }, { "epoch": 0.8255604335008802, "grad_norm": 0.4276459813117981, "learning_rate": 1.2725099289353038e-05, "loss": 0.4608, "step": 38926 }, { "epoch": 0.8255816419588132, "grad_norm": 0.34072884917259216, "learning_rate": 1.27247784143208e-05, "loss": 0.4564, "step": 38927 }, { "epoch": 0.8256028504167462, "grad_norm": 0.43127328157424927, "learning_rate": 1.2724457536258086e-05, "loss": 0.5311, "step": 38928 }, { "epoch": 0.8256240588746793, "grad_norm": 0.3666711151599884, "learning_rate": 1.2724136655165254e-05, "loss": 0.4819, "step": 38929 }, { "epoch": 0.8256452673326122, "grad_norm": 0.35254383087158203, "learning_rate": 1.2723815771042663e-05, "loss": 0.4356, "step": 38930 }, { "epoch": 0.8256664757905453, "grad_norm": 0.3632989823818207, "learning_rate": 1.2723494883890666e-05, "loss": 0.5096, "step": 38931 }, { "epoch": 0.8256876842484783, "grad_norm": 0.5018755197525024, "learning_rate": 1.2723173993709621e-05, "loss": 0.4415, "step": 38932 }, { "epoch": 0.8257088927064113, "grad_norm": 0.34875473380088806, "learning_rate": 1.2722853100499886e-05, "loss": 0.5143, "step": 38933 }, { "epoch": 0.8257301011643443, "grad_norm": 0.3549356460571289, "learning_rate": 1.2722532204261817e-05, "loss": 0.5076, "step": 38934 }, { "epoch": 0.8257513096222774, "grad_norm": 0.4002847969532013, "learning_rate": 1.2722211304995771e-05, "loss": 0.4873, "step": 38935 }, { "epoch": 0.8257725180802103, "grad_norm": 0.42424002289772034, "learning_rate": 1.2721890402702108e-05, "loss": 0.496, "step": 38936 }, { "epoch": 0.8257937265381434, "grad_norm": 0.348321795463562, "learning_rate": 1.2721569497381181e-05, "loss": 0.4763, "step": 38937 }, { "epoch": 0.8258149349960764, "grad_norm": 0.4065071940422058, "learning_rate": 1.2721248589033346e-05, "loss": 0.4396, "step": 38938 }, { "epoch": 0.8258361434540095, "grad_norm": 0.346879243850708, "learning_rate": 1.2720927677658961e-05, "loss": 0.4337, "step": 38939 }, { "epoch": 0.8258573519119424, "grad_norm": 0.336143434047699, "learning_rate": 1.2720606763258388e-05, "loss": 0.5587, "step": 38940 }, { "epoch": 0.8258785603698755, "grad_norm": 0.3536379337310791, "learning_rate": 1.2720285845831976e-05, "loss": 0.4657, "step": 38941 }, { "epoch": 0.8258997688278086, "grad_norm": 0.3771921396255493, "learning_rate": 1.271996492538009e-05, "loss": 0.5178, "step": 38942 }, { "epoch": 0.8259209772857415, "grad_norm": 0.42259395122528076, "learning_rate": 1.271964400190308e-05, "loss": 0.4112, "step": 38943 }, { "epoch": 0.8259421857436746, "grad_norm": 0.3774535357952118, "learning_rate": 1.2719323075401306e-05, "loss": 0.4187, "step": 38944 }, { "epoch": 0.8259633942016076, "grad_norm": 0.34827232360839844, "learning_rate": 1.2719002145875123e-05, "loss": 0.4915, "step": 38945 }, { "epoch": 0.8259846026595407, "grad_norm": 0.34836050868034363, "learning_rate": 1.2718681213324892e-05, "loss": 0.5165, "step": 38946 }, { "epoch": 0.8260058111174736, "grad_norm": 0.3813292682170868, "learning_rate": 1.2718360277750966e-05, "loss": 0.4962, "step": 38947 }, { "epoch": 0.8260270195754067, "grad_norm": 0.3623758852481842, "learning_rate": 1.2718039339153706e-05, "loss": 0.5134, "step": 38948 }, { "epoch": 0.8260482280333397, "grad_norm": 0.3839679956436157, "learning_rate": 1.2717718397533461e-05, "loss": 0.4571, "step": 38949 }, { "epoch": 0.8260694364912727, "grad_norm": 0.3553483784198761, "learning_rate": 1.2717397452890597e-05, "loss": 0.4232, "step": 38950 }, { "epoch": 0.8260906449492057, "grad_norm": 0.3800404667854309, "learning_rate": 1.2717076505225464e-05, "loss": 0.4851, "step": 38951 }, { "epoch": 0.8261118534071388, "grad_norm": 0.34920334815979004, "learning_rate": 1.2716755554538425e-05, "loss": 0.4699, "step": 38952 }, { "epoch": 0.8261330618650717, "grad_norm": 0.3634053170681, "learning_rate": 1.2716434600829835e-05, "loss": 0.5429, "step": 38953 }, { "epoch": 0.8261542703230048, "grad_norm": 0.35216206312179565, "learning_rate": 1.2716113644100047e-05, "loss": 0.4398, "step": 38954 }, { "epoch": 0.8261754787809379, "grad_norm": 0.34489351511001587, "learning_rate": 1.2715792684349422e-05, "loss": 0.4217, "step": 38955 }, { "epoch": 0.8261966872388709, "grad_norm": 0.36039698123931885, "learning_rate": 1.271547172157832e-05, "loss": 0.4946, "step": 38956 }, { "epoch": 0.8262178956968039, "grad_norm": 0.39616596698760986, "learning_rate": 1.2715150755787087e-05, "loss": 0.4851, "step": 38957 }, { "epoch": 0.8262391041547369, "grad_norm": 0.3440682590007782, "learning_rate": 1.2714829786976089e-05, "loss": 0.4326, "step": 38958 }, { "epoch": 0.82626031261267, "grad_norm": 0.45627889037132263, "learning_rate": 1.2714508815145682e-05, "loss": 0.4958, "step": 38959 }, { "epoch": 0.8262815210706029, "grad_norm": 0.41195598244667053, "learning_rate": 1.2714187840296224e-05, "loss": 0.4584, "step": 38960 }, { "epoch": 0.826302729528536, "grad_norm": 0.35233429074287415, "learning_rate": 1.2713866862428068e-05, "loss": 0.5105, "step": 38961 }, { "epoch": 0.826323937986469, "grad_norm": 0.35742467641830444, "learning_rate": 1.2713545881541574e-05, "loss": 0.5192, "step": 38962 }, { "epoch": 0.826345146444402, "grad_norm": 0.4169563353061676, "learning_rate": 1.2713224897637094e-05, "loss": 0.4472, "step": 38963 }, { "epoch": 0.826366354902335, "grad_norm": 0.3766794800758362, "learning_rate": 1.2712903910714991e-05, "loss": 0.4357, "step": 38964 }, { "epoch": 0.8263875633602681, "grad_norm": 0.38163691759109497, "learning_rate": 1.271258292077562e-05, "loss": 0.4288, "step": 38965 }, { "epoch": 0.826408771818201, "grad_norm": 0.502453625202179, "learning_rate": 1.2712261927819338e-05, "loss": 0.4502, "step": 38966 }, { "epoch": 0.8264299802761341, "grad_norm": 0.6396665573120117, "learning_rate": 1.2711940931846502e-05, "loss": 0.5993, "step": 38967 }, { "epoch": 0.8264511887340672, "grad_norm": 0.4626162350177765, "learning_rate": 1.271161993285747e-05, "loss": 0.4042, "step": 38968 }, { "epoch": 0.8264723971920002, "grad_norm": 0.35649949312210083, "learning_rate": 1.2711298930852596e-05, "loss": 0.513, "step": 38969 }, { "epoch": 0.8264936056499332, "grad_norm": 0.3723372519016266, "learning_rate": 1.2710977925832242e-05, "loss": 0.5514, "step": 38970 }, { "epoch": 0.8265148141078662, "grad_norm": 0.37028834223747253, "learning_rate": 1.2710656917796758e-05, "loss": 0.3973, "step": 38971 }, { "epoch": 0.8265360225657993, "grad_norm": 0.3959507942199707, "learning_rate": 1.2710335906746505e-05, "loss": 0.4798, "step": 38972 }, { "epoch": 0.8265572310237322, "grad_norm": 0.3945106267929077, "learning_rate": 1.2710014892681842e-05, "loss": 0.5761, "step": 38973 }, { "epoch": 0.8265784394816653, "grad_norm": 0.3698956370353699, "learning_rate": 1.2709693875603123e-05, "loss": 0.5197, "step": 38974 }, { "epoch": 0.8265996479395983, "grad_norm": 0.3615354299545288, "learning_rate": 1.2709372855510706e-05, "loss": 0.5175, "step": 38975 }, { "epoch": 0.8266208563975314, "grad_norm": 0.35824596881866455, "learning_rate": 1.2709051832404948e-05, "loss": 0.5352, "step": 38976 }, { "epoch": 0.8266420648554643, "grad_norm": 0.35128599405288696, "learning_rate": 1.2708730806286207e-05, "loss": 0.5221, "step": 38977 }, { "epoch": 0.8266632733133974, "grad_norm": 0.4298425614833832, "learning_rate": 1.2708409777154838e-05, "loss": 0.4947, "step": 38978 }, { "epoch": 0.8266844817713304, "grad_norm": 0.4133168160915375, "learning_rate": 1.27080887450112e-05, "loss": 0.4842, "step": 38979 }, { "epoch": 0.8267056902292634, "grad_norm": 0.33560267090797424, "learning_rate": 1.2707767709855647e-05, "loss": 0.404, "step": 38980 }, { "epoch": 0.8267268986871964, "grad_norm": 0.3852193057537079, "learning_rate": 1.2707446671688542e-05, "loss": 0.5302, "step": 38981 }, { "epoch": 0.8267481071451295, "grad_norm": 0.37156012654304504, "learning_rate": 1.2707125630510237e-05, "loss": 0.4685, "step": 38982 }, { "epoch": 0.8267693156030625, "grad_norm": 0.4281538128852844, "learning_rate": 1.270680458632109e-05, "loss": 0.5516, "step": 38983 }, { "epoch": 0.8267905240609955, "grad_norm": 0.3653308153152466, "learning_rate": 1.2706483539121458e-05, "loss": 0.4843, "step": 38984 }, { "epoch": 0.8268117325189286, "grad_norm": 0.36900994181632996, "learning_rate": 1.27061624889117e-05, "loss": 0.4338, "step": 38985 }, { "epoch": 0.8268329409768616, "grad_norm": 0.34123435616493225, "learning_rate": 1.2705841435692167e-05, "loss": 0.4692, "step": 38986 }, { "epoch": 0.8268541494347946, "grad_norm": 0.35901981592178345, "learning_rate": 1.2705520379463225e-05, "loss": 0.4674, "step": 38987 }, { "epoch": 0.8268753578927276, "grad_norm": 0.609882116317749, "learning_rate": 1.2705199320225228e-05, "loss": 0.5274, "step": 38988 }, { "epoch": 0.8268965663506607, "grad_norm": 0.43836545944213867, "learning_rate": 1.2704878257978528e-05, "loss": 0.4731, "step": 38989 }, { "epoch": 0.8269177748085936, "grad_norm": 0.39691871404647827, "learning_rate": 1.2704557192723488e-05, "loss": 0.4756, "step": 38990 }, { "epoch": 0.8269389832665267, "grad_norm": 0.34943312406539917, "learning_rate": 1.2704236124460463e-05, "loss": 0.5094, "step": 38991 }, { "epoch": 0.8269601917244597, "grad_norm": 0.45806992053985596, "learning_rate": 1.2703915053189808e-05, "loss": 0.5174, "step": 38992 }, { "epoch": 0.8269814001823927, "grad_norm": 0.40306684374809265, "learning_rate": 1.2703593978911885e-05, "loss": 0.6271, "step": 38993 }, { "epoch": 0.8270026086403257, "grad_norm": 0.38563060760498047, "learning_rate": 1.2703272901627046e-05, "loss": 0.5954, "step": 38994 }, { "epoch": 0.8270238170982588, "grad_norm": 0.3396526277065277, "learning_rate": 1.2702951821335651e-05, "loss": 0.505, "step": 38995 }, { "epoch": 0.8270450255561919, "grad_norm": 0.4018148183822632, "learning_rate": 1.2702630738038057e-05, "loss": 0.3738, "step": 38996 }, { "epoch": 0.8270662340141248, "grad_norm": 0.34957677125930786, "learning_rate": 1.2702309651734619e-05, "loss": 0.5627, "step": 38997 }, { "epoch": 0.8270874424720579, "grad_norm": 0.3465738296508789, "learning_rate": 1.27019885624257e-05, "loss": 0.4261, "step": 38998 }, { "epoch": 0.8271086509299909, "grad_norm": 0.4487687349319458, "learning_rate": 1.270166747011165e-05, "loss": 0.5217, "step": 38999 }, { "epoch": 0.8271298593879239, "grad_norm": 0.34891214966773987, "learning_rate": 1.2701346374792828e-05, "loss": 0.4636, "step": 39000 }, { "epoch": 0.8271510678458569, "grad_norm": 0.34831634163856506, "learning_rate": 1.2701025276469595e-05, "loss": 0.5339, "step": 39001 }, { "epoch": 0.82717227630379, "grad_norm": 0.3296116888523102, "learning_rate": 1.2700704175142305e-05, "loss": 0.3982, "step": 39002 }, { "epoch": 0.8271934847617229, "grad_norm": 0.4640544354915619, "learning_rate": 1.2700383070811309e-05, "loss": 0.4578, "step": 39003 }, { "epoch": 0.827214693219656, "grad_norm": 0.4900774359703064, "learning_rate": 1.2700061963476974e-05, "loss": 0.5, "step": 39004 }, { "epoch": 0.827235901677589, "grad_norm": 0.3221457898616791, "learning_rate": 1.2699740853139658e-05, "loss": 0.4057, "step": 39005 }, { "epoch": 0.8272571101355221, "grad_norm": 0.3759382665157318, "learning_rate": 1.2699419739799707e-05, "loss": 0.4677, "step": 39006 }, { "epoch": 0.827278318593455, "grad_norm": 0.3634374141693115, "learning_rate": 1.2699098623457489e-05, "loss": 0.4758, "step": 39007 }, { "epoch": 0.8272995270513881, "grad_norm": 0.38180363178253174, "learning_rate": 1.2698777504113356e-05, "loss": 0.5601, "step": 39008 }, { "epoch": 0.8273207355093212, "grad_norm": 0.385616272687912, "learning_rate": 1.2698456381767663e-05, "loss": 0.5938, "step": 39009 }, { "epoch": 0.8273419439672541, "grad_norm": 0.3726092576980591, "learning_rate": 1.2698135256420773e-05, "loss": 0.4914, "step": 39010 }, { "epoch": 0.8273631524251872, "grad_norm": 0.3767947554588318, "learning_rate": 1.269781412807304e-05, "loss": 0.5876, "step": 39011 }, { "epoch": 0.8273843608831202, "grad_norm": 0.41870376467704773, "learning_rate": 1.2697492996724824e-05, "loss": 0.41, "step": 39012 }, { "epoch": 0.8274055693410532, "grad_norm": 0.394855260848999, "learning_rate": 1.2697171862376478e-05, "loss": 0.4601, "step": 39013 }, { "epoch": 0.8274267777989862, "grad_norm": 0.3922547996044159, "learning_rate": 1.269685072502836e-05, "loss": 0.4282, "step": 39014 }, { "epoch": 0.8274479862569193, "grad_norm": 0.5886216759681702, "learning_rate": 1.269652958468083e-05, "loss": 0.4281, "step": 39015 }, { "epoch": 0.8274691947148523, "grad_norm": 0.4020126163959503, "learning_rate": 1.2696208441334241e-05, "loss": 0.4283, "step": 39016 }, { "epoch": 0.8274904031727853, "grad_norm": 0.396779328584671, "learning_rate": 1.2695887294988954e-05, "loss": 0.5058, "step": 39017 }, { "epoch": 0.8275116116307183, "grad_norm": 0.34063705801963806, "learning_rate": 1.2695566145645324e-05, "loss": 0.4752, "step": 39018 }, { "epoch": 0.8275328200886514, "grad_norm": 0.3835085928440094, "learning_rate": 1.2695244993303708e-05, "loss": 0.5827, "step": 39019 }, { "epoch": 0.8275540285465843, "grad_norm": 0.32133400440216064, "learning_rate": 1.2694923837964464e-05, "loss": 0.4843, "step": 39020 }, { "epoch": 0.8275752370045174, "grad_norm": 0.4032403826713562, "learning_rate": 1.2694602679627952e-05, "loss": 0.5196, "step": 39021 }, { "epoch": 0.8275964454624505, "grad_norm": 0.35422855615615845, "learning_rate": 1.2694281518294522e-05, "loss": 0.4446, "step": 39022 }, { "epoch": 0.8276176539203834, "grad_norm": 0.3618869483470917, "learning_rate": 1.2693960353964536e-05, "loss": 0.5704, "step": 39023 }, { "epoch": 0.8276388623783165, "grad_norm": 0.3648347556591034, "learning_rate": 1.2693639186638353e-05, "loss": 0.542, "step": 39024 }, { "epoch": 0.8276600708362495, "grad_norm": 0.36851829290390015, "learning_rate": 1.2693318016316326e-05, "loss": 0.5025, "step": 39025 }, { "epoch": 0.8276812792941826, "grad_norm": 0.42856964468955994, "learning_rate": 1.2692996842998817e-05, "loss": 0.4243, "step": 39026 }, { "epoch": 0.8277024877521155, "grad_norm": 0.3772182762622833, "learning_rate": 1.269267566668618e-05, "loss": 0.5423, "step": 39027 }, { "epoch": 0.8277236962100486, "grad_norm": 0.3431045711040497, "learning_rate": 1.2692354487378768e-05, "loss": 0.4823, "step": 39028 }, { "epoch": 0.8277449046679816, "grad_norm": 0.3637225031852722, "learning_rate": 1.2692033305076944e-05, "loss": 0.5116, "step": 39029 }, { "epoch": 0.8277661131259146, "grad_norm": 0.400964617729187, "learning_rate": 1.2691712119781067e-05, "loss": 0.4802, "step": 39030 }, { "epoch": 0.8277873215838476, "grad_norm": 0.3701496124267578, "learning_rate": 1.269139093149149e-05, "loss": 0.529, "step": 39031 }, { "epoch": 0.8278085300417807, "grad_norm": 0.34818363189697266, "learning_rate": 1.2691069740208572e-05, "loss": 0.4363, "step": 39032 }, { "epoch": 0.8278297384997136, "grad_norm": 0.3964386284351349, "learning_rate": 1.2690748545932672e-05, "loss": 0.5022, "step": 39033 }, { "epoch": 0.8278509469576467, "grad_norm": 0.3977692425251007, "learning_rate": 1.2690427348664138e-05, "loss": 0.4815, "step": 39034 }, { "epoch": 0.8278721554155797, "grad_norm": 0.34752514958381653, "learning_rate": 1.2690106148403339e-05, "loss": 0.5101, "step": 39035 }, { "epoch": 0.8278933638735128, "grad_norm": 0.3261047303676605, "learning_rate": 1.2689784945150624e-05, "loss": 0.4761, "step": 39036 }, { "epoch": 0.8279145723314458, "grad_norm": 0.37967950105667114, "learning_rate": 1.2689463738906356e-05, "loss": 0.4833, "step": 39037 }, { "epoch": 0.8279357807893788, "grad_norm": 0.34734243154525757, "learning_rate": 1.268914252967089e-05, "loss": 0.5061, "step": 39038 }, { "epoch": 0.8279569892473119, "grad_norm": 0.4329741299152374, "learning_rate": 1.2688821317444581e-05, "loss": 0.5119, "step": 39039 }, { "epoch": 0.8279781977052448, "grad_norm": 2.599626064300537, "learning_rate": 1.2688500102227792e-05, "loss": 0.5356, "step": 39040 }, { "epoch": 0.8279994061631779, "grad_norm": 0.3842783272266388, "learning_rate": 1.2688178884020873e-05, "loss": 0.538, "step": 39041 }, { "epoch": 0.8280206146211109, "grad_norm": 0.36360296607017517, "learning_rate": 1.2687857662824188e-05, "loss": 0.4844, "step": 39042 }, { "epoch": 0.828041823079044, "grad_norm": 0.35233381390571594, "learning_rate": 1.2687536438638089e-05, "loss": 0.4271, "step": 39043 }, { "epoch": 0.8280630315369769, "grad_norm": 0.6027094721794128, "learning_rate": 1.2687215211462936e-05, "loss": 0.4791, "step": 39044 }, { "epoch": 0.82808423999491, "grad_norm": 0.35496506094932556, "learning_rate": 1.2686893981299084e-05, "loss": 0.4483, "step": 39045 }, { "epoch": 0.828105448452843, "grad_norm": 0.3931390047073364, "learning_rate": 1.2686572748146892e-05, "loss": 0.4632, "step": 39046 }, { "epoch": 0.828126656910776, "grad_norm": 0.37432506680488586, "learning_rate": 1.2686251512006719e-05, "loss": 0.4285, "step": 39047 }, { "epoch": 0.828147865368709, "grad_norm": 0.3985143005847931, "learning_rate": 1.2685930272878917e-05, "loss": 0.5282, "step": 39048 }, { "epoch": 0.8281690738266421, "grad_norm": 0.3452523946762085, "learning_rate": 1.2685609030763851e-05, "loss": 0.4961, "step": 39049 }, { "epoch": 0.8281902822845751, "grad_norm": 0.3896641135215759, "learning_rate": 1.2685287785661873e-05, "loss": 0.4772, "step": 39050 }, { "epoch": 0.8282114907425081, "grad_norm": 0.4170573353767395, "learning_rate": 1.268496653757334e-05, "loss": 0.5108, "step": 39051 }, { "epoch": 0.8282326992004412, "grad_norm": 0.36844968795776367, "learning_rate": 1.2684645286498612e-05, "loss": 0.5034, "step": 39052 }, { "epoch": 0.8282539076583741, "grad_norm": 0.39366695284843445, "learning_rate": 1.268432403243804e-05, "loss": 0.5098, "step": 39053 }, { "epoch": 0.8282751161163072, "grad_norm": 0.4278116226196289, "learning_rate": 1.268400277539199e-05, "loss": 0.4716, "step": 39054 }, { "epoch": 0.8282963245742402, "grad_norm": 0.36854180693626404, "learning_rate": 1.2683681515360817e-05, "loss": 0.6389, "step": 39055 }, { "epoch": 0.8283175330321733, "grad_norm": 0.3493366837501526, "learning_rate": 1.2683360252344876e-05, "loss": 0.426, "step": 39056 }, { "epoch": 0.8283387414901062, "grad_norm": 0.3515525460243225, "learning_rate": 1.2683038986344523e-05, "loss": 0.4669, "step": 39057 }, { "epoch": 0.8283599499480393, "grad_norm": 0.3776216208934784, "learning_rate": 1.268271771736012e-05, "loss": 0.4972, "step": 39058 }, { "epoch": 0.8283811584059723, "grad_norm": 0.3582726716995239, "learning_rate": 1.2682396445392017e-05, "loss": 0.4417, "step": 39059 }, { "epoch": 0.8284023668639053, "grad_norm": 0.3967205882072449, "learning_rate": 1.2682075170440581e-05, "loss": 0.5082, "step": 39060 }, { "epoch": 0.8284235753218383, "grad_norm": 0.36161115765571594, "learning_rate": 1.2681753892506161e-05, "loss": 0.4281, "step": 39061 }, { "epoch": 0.8284447837797714, "grad_norm": 0.342173308134079, "learning_rate": 1.2681432611589117e-05, "loss": 0.5015, "step": 39062 }, { "epoch": 0.8284659922377045, "grad_norm": 0.4648369550704956, "learning_rate": 1.268111132768981e-05, "loss": 0.5386, "step": 39063 }, { "epoch": 0.8284872006956374, "grad_norm": 0.3245335519313812, "learning_rate": 1.2680790040808596e-05, "loss": 0.4347, "step": 39064 }, { "epoch": 0.8285084091535705, "grad_norm": 0.42039579153060913, "learning_rate": 1.2680468750945827e-05, "loss": 0.5662, "step": 39065 }, { "epoch": 0.8285296176115035, "grad_norm": 0.5255882740020752, "learning_rate": 1.2680147458101865e-05, "loss": 0.4211, "step": 39066 }, { "epoch": 0.8285508260694365, "grad_norm": 0.37175044417381287, "learning_rate": 1.2679826162277065e-05, "loss": 0.514, "step": 39067 }, { "epoch": 0.8285720345273695, "grad_norm": 0.33440205454826355, "learning_rate": 1.2679504863471785e-05, "loss": 0.3802, "step": 39068 }, { "epoch": 0.8285932429853026, "grad_norm": 0.39694955945014954, "learning_rate": 1.2679183561686386e-05, "loss": 0.4263, "step": 39069 }, { "epoch": 0.8286144514432355, "grad_norm": 0.3641461133956909, "learning_rate": 1.2678862256921223e-05, "loss": 0.5349, "step": 39070 }, { "epoch": 0.8286356599011686, "grad_norm": 0.39955776929855347, "learning_rate": 1.2678540949176647e-05, "loss": 0.4734, "step": 39071 }, { "epoch": 0.8286568683591016, "grad_norm": 0.3604253828525543, "learning_rate": 1.2678219638453026e-05, "loss": 0.4371, "step": 39072 }, { "epoch": 0.8286780768170346, "grad_norm": 0.3875712752342224, "learning_rate": 1.267789832475071e-05, "loss": 0.4363, "step": 39073 }, { "epoch": 0.8286992852749676, "grad_norm": 0.37649354338645935, "learning_rate": 1.2677577008070058e-05, "loss": 0.4669, "step": 39074 }, { "epoch": 0.8287204937329007, "grad_norm": 0.36530938744544983, "learning_rate": 1.2677255688411432e-05, "loss": 0.3908, "step": 39075 }, { "epoch": 0.8287417021908337, "grad_norm": 0.39633479714393616, "learning_rate": 1.2676934365775181e-05, "loss": 0.394, "step": 39076 }, { "epoch": 0.8287629106487667, "grad_norm": 0.34250810742378235, "learning_rate": 1.2676613040161671e-05, "loss": 0.473, "step": 39077 }, { "epoch": 0.8287841191066998, "grad_norm": 0.4292697608470917, "learning_rate": 1.2676291711571254e-05, "loss": 0.4921, "step": 39078 }, { "epoch": 0.8288053275646328, "grad_norm": 0.372763991355896, "learning_rate": 1.2675970380004288e-05, "loss": 0.5766, "step": 39079 }, { "epoch": 0.8288265360225658, "grad_norm": 0.36471328139305115, "learning_rate": 1.2675649045461129e-05, "loss": 0.5049, "step": 39080 }, { "epoch": 0.8288477444804988, "grad_norm": 0.3884837329387665, "learning_rate": 1.267532770794214e-05, "loss": 0.5971, "step": 39081 }, { "epoch": 0.8288689529384319, "grad_norm": 0.46001917123794556, "learning_rate": 1.2675006367447673e-05, "loss": 0.4654, "step": 39082 }, { "epoch": 0.8288901613963648, "grad_norm": 0.3475026786327362, "learning_rate": 1.2674685023978088e-05, "loss": 0.4852, "step": 39083 }, { "epoch": 0.8289113698542979, "grad_norm": 0.35588836669921875, "learning_rate": 1.267436367753374e-05, "loss": 0.4487, "step": 39084 }, { "epoch": 0.8289325783122309, "grad_norm": 0.35670575499534607, "learning_rate": 1.2674042328114988e-05, "loss": 0.4641, "step": 39085 }, { "epoch": 0.828953786770164, "grad_norm": 0.34242042899131775, "learning_rate": 1.267372097572219e-05, "loss": 0.495, "step": 39086 }, { "epoch": 0.8289749952280969, "grad_norm": 0.45940956473350525, "learning_rate": 1.2673399620355704e-05, "loss": 0.5021, "step": 39087 }, { "epoch": 0.82899620368603, "grad_norm": 0.35074383020401, "learning_rate": 1.2673078262015884e-05, "loss": 0.4302, "step": 39088 }, { "epoch": 0.829017412143963, "grad_norm": 0.32524874806404114, "learning_rate": 1.267275690070309e-05, "loss": 0.4149, "step": 39089 }, { "epoch": 0.829038620601896, "grad_norm": 2.2373316287994385, "learning_rate": 1.267243553641768e-05, "loss": 0.4637, "step": 39090 }, { "epoch": 0.8290598290598291, "grad_norm": 0.3466894030570984, "learning_rate": 1.267211416916001e-05, "loss": 0.5557, "step": 39091 }, { "epoch": 0.8290810375177621, "grad_norm": 0.3632340431213379, "learning_rate": 1.267179279893044e-05, "loss": 0.4572, "step": 39092 }, { "epoch": 0.8291022459756952, "grad_norm": 0.4335329532623291, "learning_rate": 1.267147142572932e-05, "loss": 0.4495, "step": 39093 }, { "epoch": 0.8291234544336281, "grad_norm": 0.44375529885292053, "learning_rate": 1.2671150049557018e-05, "loss": 0.5711, "step": 39094 }, { "epoch": 0.8291446628915612, "grad_norm": 0.38034236431121826, "learning_rate": 1.2670828670413883e-05, "loss": 0.4944, "step": 39095 }, { "epoch": 0.8291658713494942, "grad_norm": 0.3621566593647003, "learning_rate": 1.2670507288300275e-05, "loss": 0.5212, "step": 39096 }, { "epoch": 0.8291870798074272, "grad_norm": 0.3582095801830292, "learning_rate": 1.2670185903216556e-05, "loss": 0.4828, "step": 39097 }, { "epoch": 0.8292082882653602, "grad_norm": 0.34098535776138306, "learning_rate": 1.2669864515163077e-05, "loss": 0.4017, "step": 39098 }, { "epoch": 0.8292294967232933, "grad_norm": 0.4158068597316742, "learning_rate": 1.2669543124140196e-05, "loss": 0.5155, "step": 39099 }, { "epoch": 0.8292507051812262, "grad_norm": 0.35310184955596924, "learning_rate": 1.2669221730148275e-05, "loss": 0.3943, "step": 39100 }, { "epoch": 0.8292719136391593, "grad_norm": 0.35168567299842834, "learning_rate": 1.2668900333187669e-05, "loss": 0.4451, "step": 39101 }, { "epoch": 0.8292931220970923, "grad_norm": 0.3626614809036255, "learning_rate": 1.2668578933258732e-05, "loss": 0.4214, "step": 39102 }, { "epoch": 0.8293143305550253, "grad_norm": 0.355726957321167, "learning_rate": 1.2668257530361827e-05, "loss": 0.462, "step": 39103 }, { "epoch": 0.8293355390129584, "grad_norm": 0.4288730323314667, "learning_rate": 1.2667936124497309e-05, "loss": 0.5471, "step": 39104 }, { "epoch": 0.8293567474708914, "grad_norm": 0.3679177761077881, "learning_rate": 1.2667614715665536e-05, "loss": 0.4992, "step": 39105 }, { "epoch": 0.8293779559288245, "grad_norm": 0.40001216530799866, "learning_rate": 1.2667293303866863e-05, "loss": 0.4957, "step": 39106 }, { "epoch": 0.8293991643867574, "grad_norm": 0.38479915261268616, "learning_rate": 1.2666971889101652e-05, "loss": 0.4789, "step": 39107 }, { "epoch": 0.8294203728446905, "grad_norm": 0.3737245500087738, "learning_rate": 1.2666650471370259e-05, "loss": 0.5489, "step": 39108 }, { "epoch": 0.8294415813026235, "grad_norm": 0.39541420340538025, "learning_rate": 1.2666329050673038e-05, "loss": 0.4798, "step": 39109 }, { "epoch": 0.8294627897605565, "grad_norm": 0.4060172736644745, "learning_rate": 1.266600762701035e-05, "loss": 0.4367, "step": 39110 }, { "epoch": 0.8294839982184895, "grad_norm": 0.36734870076179504, "learning_rate": 1.2665686200382552e-05, "loss": 0.4883, "step": 39111 }, { "epoch": 0.8295052066764226, "grad_norm": 0.5607117414474487, "learning_rate": 1.2665364770789998e-05, "loss": 0.3972, "step": 39112 }, { "epoch": 0.8295264151343555, "grad_norm": 0.42330557107925415, "learning_rate": 1.266504333823305e-05, "loss": 0.4382, "step": 39113 }, { "epoch": 0.8295476235922886, "grad_norm": 0.33907726407051086, "learning_rate": 1.2664721902712067e-05, "loss": 0.5293, "step": 39114 }, { "epoch": 0.8295688320502216, "grad_norm": 0.40349653363227844, "learning_rate": 1.2664400464227401e-05, "loss": 0.5435, "step": 39115 }, { "epoch": 0.8295900405081547, "grad_norm": 0.5231502056121826, "learning_rate": 1.2664079022779412e-05, "loss": 0.4755, "step": 39116 }, { "epoch": 0.8296112489660876, "grad_norm": 0.35748809576034546, "learning_rate": 1.2663757578368458e-05, "loss": 0.4694, "step": 39117 }, { "epoch": 0.8296324574240207, "grad_norm": 0.3505461812019348, "learning_rate": 1.2663436130994895e-05, "loss": 0.551, "step": 39118 }, { "epoch": 0.8296536658819538, "grad_norm": 0.3648506700992584, "learning_rate": 1.266311468065908e-05, "loss": 0.4105, "step": 39119 }, { "epoch": 0.8296748743398867, "grad_norm": 0.3850419521331787, "learning_rate": 1.2662793227361374e-05, "loss": 0.4749, "step": 39120 }, { "epoch": 0.8296960827978198, "grad_norm": 0.39758509397506714, "learning_rate": 1.2662471771102132e-05, "loss": 0.4481, "step": 39121 }, { "epoch": 0.8297172912557528, "grad_norm": 0.3566593825817108, "learning_rate": 1.2662150311881714e-05, "loss": 0.3836, "step": 39122 }, { "epoch": 0.8297384997136859, "grad_norm": 0.6198508143424988, "learning_rate": 1.2661828849700475e-05, "loss": 0.5555, "step": 39123 }, { "epoch": 0.8297597081716188, "grad_norm": 0.3777572810649872, "learning_rate": 1.2661507384558772e-05, "loss": 0.5539, "step": 39124 }, { "epoch": 0.8297809166295519, "grad_norm": 0.3504466414451599, "learning_rate": 1.2661185916456961e-05, "loss": 0.4249, "step": 39125 }, { "epoch": 0.8298021250874849, "grad_norm": 0.38260576128959656, "learning_rate": 1.2660864445395407e-05, "loss": 0.516, "step": 39126 }, { "epoch": 0.8298233335454179, "grad_norm": 0.36022332310676575, "learning_rate": 1.2660542971374458e-05, "loss": 0.5133, "step": 39127 }, { "epoch": 0.8298445420033509, "grad_norm": 0.3633502721786499, "learning_rate": 1.266022149439448e-05, "loss": 0.4901, "step": 39128 }, { "epoch": 0.829865750461284, "grad_norm": 0.3601463735103607, "learning_rate": 1.2659900014455827e-05, "loss": 0.5361, "step": 39129 }, { "epoch": 0.8298869589192169, "grad_norm": 0.35572779178619385, "learning_rate": 1.2659578531558855e-05, "loss": 0.5259, "step": 39130 }, { "epoch": 0.82990816737715, "grad_norm": 0.3565669357776642, "learning_rate": 1.265925704570392e-05, "loss": 0.4501, "step": 39131 }, { "epoch": 0.8299293758350831, "grad_norm": 0.35312798619270325, "learning_rate": 1.2658935556891385e-05, "loss": 0.5241, "step": 39132 }, { "epoch": 0.829950584293016, "grad_norm": 0.3884936273097992, "learning_rate": 1.2658614065121607e-05, "loss": 0.4695, "step": 39133 }, { "epoch": 0.8299717927509491, "grad_norm": 0.36716002225875854, "learning_rate": 1.265829257039494e-05, "loss": 0.5177, "step": 39134 }, { "epoch": 0.8299930012088821, "grad_norm": 0.3652012348175049, "learning_rate": 1.2657971072711741e-05, "loss": 0.5626, "step": 39135 }, { "epoch": 0.8300142096668152, "grad_norm": 0.35662293434143066, "learning_rate": 1.2657649572072373e-05, "loss": 0.5236, "step": 39136 }, { "epoch": 0.8300354181247481, "grad_norm": 0.34430262446403503, "learning_rate": 1.2657328068477186e-05, "loss": 0.4867, "step": 39137 }, { "epoch": 0.8300566265826812, "grad_norm": 0.3520924150943756, "learning_rate": 1.2657006561926545e-05, "loss": 0.4573, "step": 39138 }, { "epoch": 0.8300778350406142, "grad_norm": 0.3602202534675598, "learning_rate": 1.2656685052420803e-05, "loss": 0.5126, "step": 39139 }, { "epoch": 0.8300990434985472, "grad_norm": 0.34245792031288147, "learning_rate": 1.265636353996032e-05, "loss": 0.4997, "step": 39140 }, { "epoch": 0.8301202519564802, "grad_norm": 0.5102928280830383, "learning_rate": 1.2656042024545452e-05, "loss": 0.4629, "step": 39141 }, { "epoch": 0.8301414604144133, "grad_norm": 0.3472120761871338, "learning_rate": 1.2655720506176558e-05, "loss": 0.4533, "step": 39142 }, { "epoch": 0.8301626688723462, "grad_norm": 0.3758241832256317, "learning_rate": 1.2655398984853995e-05, "loss": 0.4728, "step": 39143 }, { "epoch": 0.8301838773302793, "grad_norm": 0.3651409447193146, "learning_rate": 1.2655077460578118e-05, "loss": 0.5691, "step": 39144 }, { "epoch": 0.8302050857882124, "grad_norm": 0.34711501002311707, "learning_rate": 1.2654755933349287e-05, "loss": 0.4236, "step": 39145 }, { "epoch": 0.8302262942461454, "grad_norm": 0.3766089975833893, "learning_rate": 1.2654434403167864e-05, "loss": 0.5102, "step": 39146 }, { "epoch": 0.8302475027040784, "grad_norm": 0.4102523922920227, "learning_rate": 1.2654112870034197e-05, "loss": 0.5277, "step": 39147 }, { "epoch": 0.8302687111620114, "grad_norm": 0.3967072069644928, "learning_rate": 1.2653791333948653e-05, "loss": 0.4997, "step": 39148 }, { "epoch": 0.8302899196199445, "grad_norm": 0.36930468678474426, "learning_rate": 1.2653469794911581e-05, "loss": 0.4547, "step": 39149 }, { "epoch": 0.8303111280778774, "grad_norm": 0.32371580600738525, "learning_rate": 1.2653148252923344e-05, "loss": 0.4454, "step": 39150 }, { "epoch": 0.8303323365358105, "grad_norm": 0.3325798213481903, "learning_rate": 1.26528267079843e-05, "loss": 0.4785, "step": 39151 }, { "epoch": 0.8303535449937435, "grad_norm": 0.37184008955955505, "learning_rate": 1.2652505160094806e-05, "loss": 0.4228, "step": 39152 }, { "epoch": 0.8303747534516766, "grad_norm": 0.4143296182155609, "learning_rate": 1.2652183609255218e-05, "loss": 0.4802, "step": 39153 }, { "epoch": 0.8303959619096095, "grad_norm": 0.35901471972465515, "learning_rate": 1.2651862055465894e-05, "loss": 0.5362, "step": 39154 }, { "epoch": 0.8304171703675426, "grad_norm": 0.43272727727890015, "learning_rate": 1.265154049872719e-05, "loss": 0.5171, "step": 39155 }, { "epoch": 0.8304383788254756, "grad_norm": 0.3737524747848511, "learning_rate": 1.265121893903947e-05, "loss": 0.5348, "step": 39156 }, { "epoch": 0.8304595872834086, "grad_norm": 0.4373587667942047, "learning_rate": 1.2650897376403084e-05, "loss": 0.5865, "step": 39157 }, { "epoch": 0.8304807957413416, "grad_norm": 0.5855289697647095, "learning_rate": 1.2650575810818393e-05, "loss": 0.4386, "step": 39158 }, { "epoch": 0.8305020041992747, "grad_norm": 0.42268264293670654, "learning_rate": 1.2650254242285756e-05, "loss": 0.5143, "step": 39159 }, { "epoch": 0.8305232126572077, "grad_norm": 0.3861366808414459, "learning_rate": 1.264993267080553e-05, "loss": 0.4806, "step": 39160 }, { "epoch": 0.8305444211151407, "grad_norm": 0.33071860671043396, "learning_rate": 1.264961109637807e-05, "loss": 0.4961, "step": 39161 }, { "epoch": 0.8305656295730738, "grad_norm": 0.37895864248275757, "learning_rate": 1.2649289519003739e-05, "loss": 0.482, "step": 39162 }, { "epoch": 0.8305868380310067, "grad_norm": 0.3395881950855255, "learning_rate": 1.2648967938682886e-05, "loss": 0.4548, "step": 39163 }, { "epoch": 0.8306080464889398, "grad_norm": 0.41460251808166504, "learning_rate": 1.2648646355415874e-05, "loss": 0.4686, "step": 39164 }, { "epoch": 0.8306292549468728, "grad_norm": 0.48951467871665955, "learning_rate": 1.2648324769203063e-05, "loss": 0.5174, "step": 39165 }, { "epoch": 0.8306504634048059, "grad_norm": 0.323329895734787, "learning_rate": 1.2648003180044811e-05, "loss": 0.4774, "step": 39166 }, { "epoch": 0.8306716718627388, "grad_norm": 0.3686099350452423, "learning_rate": 1.264768158794147e-05, "loss": 0.4441, "step": 39167 }, { "epoch": 0.8306928803206719, "grad_norm": 0.38783198595046997, "learning_rate": 1.26473599928934e-05, "loss": 0.3975, "step": 39168 }, { "epoch": 0.8307140887786049, "grad_norm": 0.34396931529045105, "learning_rate": 1.264703839490096e-05, "loss": 0.4641, "step": 39169 }, { "epoch": 0.8307352972365379, "grad_norm": 0.4045454263687134, "learning_rate": 1.2646716793964504e-05, "loss": 0.5013, "step": 39170 }, { "epoch": 0.8307565056944709, "grad_norm": 0.39124059677124023, "learning_rate": 1.2646395190084398e-05, "loss": 0.5098, "step": 39171 }, { "epoch": 0.830777714152404, "grad_norm": 0.3460298478603363, "learning_rate": 1.2646073583260989e-05, "loss": 0.5059, "step": 39172 }, { "epoch": 0.830798922610337, "grad_norm": 0.3785546123981476, "learning_rate": 1.2645751973494644e-05, "loss": 0.491, "step": 39173 }, { "epoch": 0.83082013106827, "grad_norm": 0.31876710057258606, "learning_rate": 1.2645430360785715e-05, "loss": 0.4755, "step": 39174 }, { "epoch": 0.8308413395262031, "grad_norm": 0.3399578928947449, "learning_rate": 1.264510874513456e-05, "loss": 0.5323, "step": 39175 }, { "epoch": 0.8308625479841361, "grad_norm": 0.39740973711013794, "learning_rate": 1.264478712654154e-05, "loss": 0.5369, "step": 39176 }, { "epoch": 0.8308837564420691, "grad_norm": 0.37311068177223206, "learning_rate": 1.2644465505007011e-05, "loss": 0.5584, "step": 39177 }, { "epoch": 0.8309049649000021, "grad_norm": 0.3931967318058014, "learning_rate": 1.2644143880531327e-05, "loss": 0.4983, "step": 39178 }, { "epoch": 0.8309261733579352, "grad_norm": 0.34407317638397217, "learning_rate": 1.2643822253114855e-05, "loss": 0.3904, "step": 39179 }, { "epoch": 0.8309473818158681, "grad_norm": 0.35048022866249084, "learning_rate": 1.2643500622757942e-05, "loss": 0.491, "step": 39180 }, { "epoch": 0.8309685902738012, "grad_norm": 0.3952677547931671, "learning_rate": 1.2643178989460954e-05, "loss": 0.5199, "step": 39181 }, { "epoch": 0.8309897987317342, "grad_norm": 0.39628365635871887, "learning_rate": 1.2642857353224241e-05, "loss": 0.4816, "step": 39182 }, { "epoch": 0.8310110071896673, "grad_norm": 0.3386266529560089, "learning_rate": 1.264253571404817e-05, "loss": 0.469, "step": 39183 }, { "epoch": 0.8310322156476002, "grad_norm": 0.35697248578071594, "learning_rate": 1.264221407193309e-05, "loss": 0.5811, "step": 39184 }, { "epoch": 0.8310534241055333, "grad_norm": 0.34048229455947876, "learning_rate": 1.2641892426879364e-05, "loss": 0.4646, "step": 39185 }, { "epoch": 0.8310746325634664, "grad_norm": 0.35831916332244873, "learning_rate": 1.2641570778887347e-05, "loss": 0.5156, "step": 39186 }, { "epoch": 0.8310958410213993, "grad_norm": 0.3732864260673523, "learning_rate": 1.2641249127957397e-05, "loss": 0.4923, "step": 39187 }, { "epoch": 0.8311170494793324, "grad_norm": 0.33643046021461487, "learning_rate": 1.2640927474089879e-05, "loss": 0.4687, "step": 39188 }, { "epoch": 0.8311382579372654, "grad_norm": 0.35549408197402954, "learning_rate": 1.2640605817285136e-05, "loss": 0.4589, "step": 39189 }, { "epoch": 0.8311594663951984, "grad_norm": 0.3677101135253906, "learning_rate": 1.2640284157543538e-05, "loss": 0.4815, "step": 39190 }, { "epoch": 0.8311806748531314, "grad_norm": 0.39254170656204224, "learning_rate": 1.263996249486544e-05, "loss": 0.4999, "step": 39191 }, { "epoch": 0.8312018833110645, "grad_norm": 0.338630348443985, "learning_rate": 1.2639640829251196e-05, "loss": 0.4641, "step": 39192 }, { "epoch": 0.8312230917689974, "grad_norm": 0.3665872812271118, "learning_rate": 1.263931916070117e-05, "loss": 0.4939, "step": 39193 }, { "epoch": 0.8312443002269305, "grad_norm": 0.3871084153652191, "learning_rate": 1.2638997489215714e-05, "loss": 0.5338, "step": 39194 }, { "epoch": 0.8312655086848635, "grad_norm": 0.4108741581439972, "learning_rate": 1.2638675814795186e-05, "loss": 0.4872, "step": 39195 }, { "epoch": 0.8312867171427966, "grad_norm": 0.43599843978881836, "learning_rate": 1.263835413743995e-05, "loss": 0.5715, "step": 39196 }, { "epoch": 0.8313079256007295, "grad_norm": 0.35813048481941223, "learning_rate": 1.2638032457150357e-05, "loss": 0.4938, "step": 39197 }, { "epoch": 0.8313291340586626, "grad_norm": 0.33099955320358276, "learning_rate": 1.2637710773926765e-05, "loss": 0.4174, "step": 39198 }, { "epoch": 0.8313503425165956, "grad_norm": 0.3594070076942444, "learning_rate": 1.2637389087769539e-05, "loss": 0.4734, "step": 39199 }, { "epoch": 0.8313715509745286, "grad_norm": 0.35190147161483765, "learning_rate": 1.2637067398679028e-05, "loss": 0.497, "step": 39200 }, { "epoch": 0.8313927594324617, "grad_norm": 0.369535356760025, "learning_rate": 1.2636745706655595e-05, "loss": 0.5014, "step": 39201 }, { "epoch": 0.8314139678903947, "grad_norm": 0.35670003294944763, "learning_rate": 1.2636424011699596e-05, "loss": 0.4522, "step": 39202 }, { "epoch": 0.8314351763483278, "grad_norm": 0.41083282232284546, "learning_rate": 1.2636102313811387e-05, "loss": 0.5208, "step": 39203 }, { "epoch": 0.8314563848062607, "grad_norm": 0.40257853269577026, "learning_rate": 1.263578061299133e-05, "loss": 0.4657, "step": 39204 }, { "epoch": 0.8314775932641938, "grad_norm": 0.375997930765152, "learning_rate": 1.2635458909239783e-05, "loss": 0.5453, "step": 39205 }, { "epoch": 0.8314988017221268, "grad_norm": 0.35686129331588745, "learning_rate": 1.2635137202557099e-05, "loss": 0.4876, "step": 39206 }, { "epoch": 0.8315200101800598, "grad_norm": 0.3584772050380707, "learning_rate": 1.263481549294364e-05, "loss": 0.4976, "step": 39207 }, { "epoch": 0.8315412186379928, "grad_norm": 0.3996148109436035, "learning_rate": 1.2634493780399759e-05, "loss": 0.5154, "step": 39208 }, { "epoch": 0.8315624270959259, "grad_norm": 0.37668484449386597, "learning_rate": 1.2634172064925816e-05, "loss": 0.5154, "step": 39209 }, { "epoch": 0.8315836355538588, "grad_norm": 0.3845743238925934, "learning_rate": 1.2633850346522174e-05, "loss": 0.4897, "step": 39210 }, { "epoch": 0.8316048440117919, "grad_norm": 0.3441062867641449, "learning_rate": 1.2633528625189186e-05, "loss": 0.4894, "step": 39211 }, { "epoch": 0.8316260524697249, "grad_norm": 0.3641807436943054, "learning_rate": 1.2633206900927208e-05, "loss": 0.4963, "step": 39212 }, { "epoch": 0.831647260927658, "grad_norm": 0.4004051089286804, "learning_rate": 1.2632885173736603e-05, "loss": 0.5306, "step": 39213 }, { "epoch": 0.831668469385591, "grad_norm": 0.3532092571258545, "learning_rate": 1.2632563443617723e-05, "loss": 0.5272, "step": 39214 }, { "epoch": 0.831689677843524, "grad_norm": 0.43892771005630493, "learning_rate": 1.2632241710570928e-05, "loss": 0.5302, "step": 39215 }, { "epoch": 0.8317108863014571, "grad_norm": 0.3686373829841614, "learning_rate": 1.263191997459658e-05, "loss": 0.4656, "step": 39216 }, { "epoch": 0.83173209475939, "grad_norm": 0.3542387783527374, "learning_rate": 1.2631598235695031e-05, "loss": 0.4869, "step": 39217 }, { "epoch": 0.8317533032173231, "grad_norm": 0.33930128812789917, "learning_rate": 1.2631276493866644e-05, "loss": 0.4681, "step": 39218 }, { "epoch": 0.8317745116752561, "grad_norm": 0.4488533139228821, "learning_rate": 1.2630954749111775e-05, "loss": 0.6062, "step": 39219 }, { "epoch": 0.8317957201331891, "grad_norm": 0.3257429301738739, "learning_rate": 1.2630633001430777e-05, "loss": 0.4922, "step": 39220 }, { "epoch": 0.8318169285911221, "grad_norm": 0.373304158449173, "learning_rate": 1.2630311250824011e-05, "loss": 0.4962, "step": 39221 }, { "epoch": 0.8318381370490552, "grad_norm": 0.4369578957557678, "learning_rate": 1.262998949729184e-05, "loss": 0.4886, "step": 39222 }, { "epoch": 0.8318593455069881, "grad_norm": 0.3139299154281616, "learning_rate": 1.2629667740834614e-05, "loss": 0.4421, "step": 39223 }, { "epoch": 0.8318805539649212, "grad_norm": 0.320078045129776, "learning_rate": 1.2629345981452696e-05, "loss": 0.4437, "step": 39224 }, { "epoch": 0.8319017624228542, "grad_norm": 0.4013591706752777, "learning_rate": 1.2629024219146444e-05, "loss": 0.4774, "step": 39225 }, { "epoch": 0.8319229708807873, "grad_norm": 0.3844227194786072, "learning_rate": 1.262870245391621e-05, "loss": 0.532, "step": 39226 }, { "epoch": 0.8319441793387203, "grad_norm": 0.33314332365989685, "learning_rate": 1.2628380685762358e-05, "loss": 0.412, "step": 39227 }, { "epoch": 0.8319653877966533, "grad_norm": 0.42636311054229736, "learning_rate": 1.2628058914685246e-05, "loss": 0.4693, "step": 39228 }, { "epoch": 0.8319865962545864, "grad_norm": 0.3539196252822876, "learning_rate": 1.2627737140685227e-05, "loss": 0.5178, "step": 39229 }, { "epoch": 0.8320078047125193, "grad_norm": 0.41431617736816406, "learning_rate": 1.2627415363762663e-05, "loss": 0.5985, "step": 39230 }, { "epoch": 0.8320290131704524, "grad_norm": 0.3657277822494507, "learning_rate": 1.262709358391791e-05, "loss": 0.5051, "step": 39231 }, { "epoch": 0.8320502216283854, "grad_norm": 0.6052768230438232, "learning_rate": 1.2626771801151327e-05, "loss": 0.5196, "step": 39232 }, { "epoch": 0.8320714300863185, "grad_norm": 1.7924751043319702, "learning_rate": 1.2626450015463273e-05, "loss": 0.4239, "step": 39233 }, { "epoch": 0.8320926385442514, "grad_norm": 0.31857842206954956, "learning_rate": 1.2626128226854098e-05, "loss": 0.3932, "step": 39234 }, { "epoch": 0.8321138470021845, "grad_norm": 0.35669583082199097, "learning_rate": 1.2625806435324169e-05, "loss": 0.3976, "step": 39235 }, { "epoch": 0.8321350554601175, "grad_norm": 0.33916521072387695, "learning_rate": 1.2625484640873841e-05, "loss": 0.42, "step": 39236 }, { "epoch": 0.8321562639180505, "grad_norm": 0.36694926023483276, "learning_rate": 1.2625162843503471e-05, "loss": 0.5131, "step": 39237 }, { "epoch": 0.8321774723759835, "grad_norm": 0.35348042845726013, "learning_rate": 1.262484104321342e-05, "loss": 0.5074, "step": 39238 }, { "epoch": 0.8321986808339166, "grad_norm": 0.3496744930744171, "learning_rate": 1.2624519240004046e-05, "loss": 0.5136, "step": 39239 }, { "epoch": 0.8322198892918495, "grad_norm": 0.366014301776886, "learning_rate": 1.2624197433875697e-05, "loss": 0.5446, "step": 39240 }, { "epoch": 0.8322410977497826, "grad_norm": 0.7777388691902161, "learning_rate": 1.2623875624828744e-05, "loss": 0.5483, "step": 39241 }, { "epoch": 0.8322623062077157, "grad_norm": 0.4252038300037384, "learning_rate": 1.2623553812863539e-05, "loss": 0.4722, "step": 39242 }, { "epoch": 0.8322835146656486, "grad_norm": 0.3883882462978363, "learning_rate": 1.2623231997980437e-05, "loss": 0.543, "step": 39243 }, { "epoch": 0.8323047231235817, "grad_norm": 0.3445456922054291, "learning_rate": 1.2622910180179801e-05, "loss": 0.423, "step": 39244 }, { "epoch": 0.8323259315815147, "grad_norm": 0.36032792925834656, "learning_rate": 1.2622588359461988e-05, "loss": 0.5091, "step": 39245 }, { "epoch": 0.8323471400394478, "grad_norm": 0.342130571603775, "learning_rate": 1.2622266535827355e-05, "loss": 0.4578, "step": 39246 }, { "epoch": 0.8323683484973807, "grad_norm": 0.3303337097167969, "learning_rate": 1.2621944709276258e-05, "loss": 0.4559, "step": 39247 }, { "epoch": 0.8323895569553138, "grad_norm": 0.621995210647583, "learning_rate": 1.262162287980906e-05, "loss": 0.5038, "step": 39248 }, { "epoch": 0.8324107654132468, "grad_norm": 0.42761826515197754, "learning_rate": 1.2621301047426112e-05, "loss": 0.5294, "step": 39249 }, { "epoch": 0.8324319738711798, "grad_norm": 0.31396201252937317, "learning_rate": 1.2620979212127778e-05, "loss": 0.4706, "step": 39250 }, { "epoch": 0.8324531823291128, "grad_norm": 0.3823438286781311, "learning_rate": 1.262065737391441e-05, "loss": 0.5459, "step": 39251 }, { "epoch": 0.8324743907870459, "grad_norm": 0.3473762571811676, "learning_rate": 1.2620335532786376e-05, "loss": 0.4087, "step": 39252 }, { "epoch": 0.8324955992449788, "grad_norm": 0.4051341414451599, "learning_rate": 1.2620013688744024e-05, "loss": 0.4944, "step": 39253 }, { "epoch": 0.8325168077029119, "grad_norm": 0.3792426288127899, "learning_rate": 1.2619691841787715e-05, "loss": 0.4477, "step": 39254 }, { "epoch": 0.832538016160845, "grad_norm": 1.0582295656204224, "learning_rate": 1.261936999191781e-05, "loss": 0.5055, "step": 39255 }, { "epoch": 0.832559224618778, "grad_norm": 0.33312028646469116, "learning_rate": 1.2619048139134664e-05, "loss": 0.4681, "step": 39256 }, { "epoch": 0.832580433076711, "grad_norm": 0.426376074552536, "learning_rate": 1.2618726283438631e-05, "loss": 0.4997, "step": 39257 }, { "epoch": 0.832601641534644, "grad_norm": 0.3389912247657776, "learning_rate": 1.261840442483008e-05, "loss": 0.4192, "step": 39258 }, { "epoch": 0.8326228499925771, "grad_norm": 0.37275412678718567, "learning_rate": 1.2618082563309358e-05, "loss": 0.5826, "step": 39259 }, { "epoch": 0.83264405845051, "grad_norm": 0.3579237163066864, "learning_rate": 1.2617760698876826e-05, "loss": 0.5021, "step": 39260 }, { "epoch": 0.8326652669084431, "grad_norm": 0.3596521019935608, "learning_rate": 1.2617438831532849e-05, "loss": 0.4795, "step": 39261 }, { "epoch": 0.8326864753663761, "grad_norm": 0.33081570267677307, "learning_rate": 1.2617116961277775e-05, "loss": 0.5116, "step": 39262 }, { "epoch": 0.8327076838243092, "grad_norm": 0.42872121930122375, "learning_rate": 1.2616795088111968e-05, "loss": 0.447, "step": 39263 }, { "epoch": 0.8327288922822421, "grad_norm": 0.4600798785686493, "learning_rate": 1.2616473212035786e-05, "loss": 0.5059, "step": 39264 }, { "epoch": 0.8327501007401752, "grad_norm": 0.3338364362716675, "learning_rate": 1.2616151333049578e-05, "loss": 0.4356, "step": 39265 }, { "epoch": 0.8327713091981082, "grad_norm": 0.41539111733436584, "learning_rate": 1.2615829451153713e-05, "loss": 0.5924, "step": 39266 }, { "epoch": 0.8327925176560412, "grad_norm": 0.39463019371032715, "learning_rate": 1.2615507566348548e-05, "loss": 0.4674, "step": 39267 }, { "epoch": 0.8328137261139743, "grad_norm": 0.38553833961486816, "learning_rate": 1.2615185678634437e-05, "loss": 0.4458, "step": 39268 }, { "epoch": 0.8328349345719073, "grad_norm": 0.5032237768173218, "learning_rate": 1.2614863788011738e-05, "loss": 0.5546, "step": 39269 }, { "epoch": 0.8328561430298403, "grad_norm": 0.40339723229408264, "learning_rate": 1.2614541894480812e-05, "loss": 0.426, "step": 39270 }, { "epoch": 0.8328773514877733, "grad_norm": 0.3658435344696045, "learning_rate": 1.2614219998042012e-05, "loss": 0.5515, "step": 39271 }, { "epoch": 0.8328985599457064, "grad_norm": 0.4146692156791687, "learning_rate": 1.26138980986957e-05, "loss": 0.4955, "step": 39272 }, { "epoch": 0.8329197684036393, "grad_norm": 0.3937724530696869, "learning_rate": 1.2613576196442236e-05, "loss": 0.4953, "step": 39273 }, { "epoch": 0.8329409768615724, "grad_norm": 0.33396852016448975, "learning_rate": 1.2613254291281973e-05, "loss": 0.4463, "step": 39274 }, { "epoch": 0.8329621853195054, "grad_norm": 0.37304404377937317, "learning_rate": 1.2612932383215273e-05, "loss": 0.4534, "step": 39275 }, { "epoch": 0.8329833937774385, "grad_norm": 0.45368292927742004, "learning_rate": 1.2612610472242492e-05, "loss": 0.5865, "step": 39276 }, { "epoch": 0.8330046022353714, "grad_norm": 0.3511302173137665, "learning_rate": 1.2612288558363988e-05, "loss": 0.4956, "step": 39277 }, { "epoch": 0.8330258106933045, "grad_norm": 0.45322340726852417, "learning_rate": 1.2611966641580118e-05, "loss": 0.5083, "step": 39278 }, { "epoch": 0.8330470191512375, "grad_norm": 0.35893791913986206, "learning_rate": 1.2611644721891243e-05, "loss": 0.492, "step": 39279 }, { "epoch": 0.8330682276091705, "grad_norm": 0.35329747200012207, "learning_rate": 1.2611322799297717e-05, "loss": 0.3992, "step": 39280 }, { "epoch": 0.8330894360671035, "grad_norm": 0.3921542763710022, "learning_rate": 1.2611000873799903e-05, "loss": 0.4866, "step": 39281 }, { "epoch": 0.8331106445250366, "grad_norm": 0.4711938798427582, "learning_rate": 1.2610678945398154e-05, "loss": 0.4178, "step": 39282 }, { "epoch": 0.8331318529829697, "grad_norm": 0.4386659562587738, "learning_rate": 1.2610357014092834e-05, "loss": 0.4792, "step": 39283 }, { "epoch": 0.8331530614409026, "grad_norm": 0.3546221852302551, "learning_rate": 1.2610035079884298e-05, "loss": 0.5265, "step": 39284 }, { "epoch": 0.8331742698988357, "grad_norm": 0.34339967370033264, "learning_rate": 1.2609713142772897e-05, "loss": 0.482, "step": 39285 }, { "epoch": 0.8331954783567687, "grad_norm": 0.39796996116638184, "learning_rate": 1.2609391202759003e-05, "loss": 0.4974, "step": 39286 }, { "epoch": 0.8332166868147017, "grad_norm": 1.5113718509674072, "learning_rate": 1.2609069259842963e-05, "loss": 0.4729, "step": 39287 }, { "epoch": 0.8332378952726347, "grad_norm": 0.3352576792240143, "learning_rate": 1.2608747314025141e-05, "loss": 0.584, "step": 39288 }, { "epoch": 0.8332591037305678, "grad_norm": 0.334319144487381, "learning_rate": 1.2608425365305892e-05, "loss": 0.4721, "step": 39289 }, { "epoch": 0.8332803121885007, "grad_norm": 0.3858572542667389, "learning_rate": 1.2608103413685572e-05, "loss": 0.4319, "step": 39290 }, { "epoch": 0.8333015206464338, "grad_norm": 0.4332546293735504, "learning_rate": 1.2607781459164546e-05, "loss": 0.5183, "step": 39291 }, { "epoch": 0.8333227291043668, "grad_norm": 0.38138726353645325, "learning_rate": 1.2607459501743167e-05, "loss": 0.4163, "step": 39292 }, { "epoch": 0.8333439375622999, "grad_norm": 0.41593384742736816, "learning_rate": 1.2607137541421794e-05, "loss": 0.5093, "step": 39293 }, { "epoch": 0.8333651460202328, "grad_norm": 0.3878338038921356, "learning_rate": 1.2606815578200785e-05, "loss": 0.4809, "step": 39294 }, { "epoch": 0.8333863544781659, "grad_norm": 0.3546203076839447, "learning_rate": 1.2606493612080502e-05, "loss": 0.4925, "step": 39295 }, { "epoch": 0.833407562936099, "grad_norm": 0.3711507320404053, "learning_rate": 1.2606171643061293e-05, "loss": 0.4785, "step": 39296 }, { "epoch": 0.8334287713940319, "grad_norm": 0.45257824659347534, "learning_rate": 1.2605849671143528e-05, "loss": 0.6031, "step": 39297 }, { "epoch": 0.833449979851965, "grad_norm": 0.3313482403755188, "learning_rate": 1.2605527696327555e-05, "loss": 0.4603, "step": 39298 }, { "epoch": 0.833471188309898, "grad_norm": 0.38906359672546387, "learning_rate": 1.260520571861374e-05, "loss": 0.4992, "step": 39299 }, { "epoch": 0.833492396767831, "grad_norm": 0.42736750841140747, "learning_rate": 1.2604883738002438e-05, "loss": 0.47, "step": 39300 }, { "epoch": 0.833513605225764, "grad_norm": 0.3488725423812866, "learning_rate": 1.2604561754494004e-05, "loss": 0.4883, "step": 39301 }, { "epoch": 0.8335348136836971, "grad_norm": 0.3414904773235321, "learning_rate": 1.2604239768088801e-05, "loss": 0.4543, "step": 39302 }, { "epoch": 0.83355602214163, "grad_norm": 0.3669271171092987, "learning_rate": 1.2603917778787187e-05, "loss": 0.4534, "step": 39303 }, { "epoch": 0.8335772305995631, "grad_norm": 0.33412429690361023, "learning_rate": 1.2603595786589515e-05, "loss": 0.4881, "step": 39304 }, { "epoch": 0.8335984390574961, "grad_norm": 0.3584921360015869, "learning_rate": 1.2603273791496147e-05, "loss": 0.4964, "step": 39305 }, { "epoch": 0.8336196475154292, "grad_norm": 0.36042919754981995, "learning_rate": 1.2602951793507443e-05, "loss": 0.4787, "step": 39306 }, { "epoch": 0.8336408559733621, "grad_norm": 0.3570868670940399, "learning_rate": 1.2602629792623757e-05, "loss": 0.433, "step": 39307 }, { "epoch": 0.8336620644312952, "grad_norm": 0.3849688172340393, "learning_rate": 1.2602307788845447e-05, "loss": 0.5073, "step": 39308 }, { "epoch": 0.8336832728892283, "grad_norm": 0.34966960549354553, "learning_rate": 1.2601985782172876e-05, "loss": 0.431, "step": 39309 }, { "epoch": 0.8337044813471612, "grad_norm": 0.3658832311630249, "learning_rate": 1.2601663772606395e-05, "loss": 0.5422, "step": 39310 }, { "epoch": 0.8337256898050943, "grad_norm": 0.4464130401611328, "learning_rate": 1.2601341760146369e-05, "loss": 0.4902, "step": 39311 }, { "epoch": 0.8337468982630273, "grad_norm": 0.37114956974983215, "learning_rate": 1.2601019744793152e-05, "loss": 0.5291, "step": 39312 }, { "epoch": 0.8337681067209604, "grad_norm": 0.38626933097839355, "learning_rate": 1.2600697726547106e-05, "loss": 0.4789, "step": 39313 }, { "epoch": 0.8337893151788933, "grad_norm": 0.45653828978538513, "learning_rate": 1.2600375705408585e-05, "loss": 0.5723, "step": 39314 }, { "epoch": 0.8338105236368264, "grad_norm": 0.3675896227359772, "learning_rate": 1.2600053681377951e-05, "loss": 0.4403, "step": 39315 }, { "epoch": 0.8338317320947594, "grad_norm": 0.34839051961898804, "learning_rate": 1.2599731654455555e-05, "loss": 0.4264, "step": 39316 }, { "epoch": 0.8338529405526924, "grad_norm": 0.3949086368083954, "learning_rate": 1.2599409624641763e-05, "loss": 0.3913, "step": 39317 }, { "epoch": 0.8338741490106254, "grad_norm": 0.456879198551178, "learning_rate": 1.259908759193693e-05, "loss": 0.5025, "step": 39318 }, { "epoch": 0.8338953574685585, "grad_norm": 0.365016907453537, "learning_rate": 1.2598765556341412e-05, "loss": 0.5536, "step": 39319 }, { "epoch": 0.8339165659264914, "grad_norm": 0.42474818229675293, "learning_rate": 1.2598443517855575e-05, "loss": 0.4468, "step": 39320 }, { "epoch": 0.8339377743844245, "grad_norm": 0.45133745670318604, "learning_rate": 1.2598121476479768e-05, "loss": 0.519, "step": 39321 }, { "epoch": 0.8339589828423575, "grad_norm": 0.4118480384349823, "learning_rate": 1.2597799432214352e-05, "loss": 0.4759, "step": 39322 }, { "epoch": 0.8339801913002906, "grad_norm": 0.30818304419517517, "learning_rate": 1.2597477385059687e-05, "loss": 0.4834, "step": 39323 }, { "epoch": 0.8340013997582236, "grad_norm": 0.38539549708366394, "learning_rate": 1.259715533501613e-05, "loss": 0.476, "step": 39324 }, { "epoch": 0.8340226082161566, "grad_norm": 0.4646458625793457, "learning_rate": 1.259683328208404e-05, "loss": 0.5571, "step": 39325 }, { "epoch": 0.8340438166740897, "grad_norm": 0.36592909693717957, "learning_rate": 1.2596511226263775e-05, "loss": 0.4402, "step": 39326 }, { "epoch": 0.8340650251320226, "grad_norm": 0.3863804042339325, "learning_rate": 1.259618916755569e-05, "loss": 0.465, "step": 39327 }, { "epoch": 0.8340862335899557, "grad_norm": 0.33191436529159546, "learning_rate": 1.2595867105960151e-05, "loss": 0.4194, "step": 39328 }, { "epoch": 0.8341074420478887, "grad_norm": 0.37355905771255493, "learning_rate": 1.2595545041477509e-05, "loss": 0.4179, "step": 39329 }, { "epoch": 0.8341286505058217, "grad_norm": 0.5048816800117493, "learning_rate": 1.2595222974108121e-05, "loss": 0.4268, "step": 39330 }, { "epoch": 0.8341498589637547, "grad_norm": 0.3503667116165161, "learning_rate": 1.2594900903852351e-05, "loss": 0.4502, "step": 39331 }, { "epoch": 0.8341710674216878, "grad_norm": 0.3677901029586792, "learning_rate": 1.2594578830710557e-05, "loss": 0.4845, "step": 39332 }, { "epoch": 0.8341922758796207, "grad_norm": 0.41780149936676025, "learning_rate": 1.2594256754683092e-05, "loss": 0.4409, "step": 39333 }, { "epoch": 0.8342134843375538, "grad_norm": 0.35183045268058777, "learning_rate": 1.259393467577032e-05, "loss": 0.4844, "step": 39334 }, { "epoch": 0.8342346927954868, "grad_norm": 0.34964096546173096, "learning_rate": 1.2593612593972595e-05, "loss": 0.5143, "step": 39335 }, { "epoch": 0.8342559012534199, "grad_norm": 0.4626702070236206, "learning_rate": 1.2593290509290272e-05, "loss": 0.51, "step": 39336 }, { "epoch": 0.8342771097113529, "grad_norm": 0.3354780375957489, "learning_rate": 1.2592968421723719e-05, "loss": 0.5624, "step": 39337 }, { "epoch": 0.8342983181692859, "grad_norm": 0.35649576783180237, "learning_rate": 1.2592646331273291e-05, "loss": 0.5, "step": 39338 }, { "epoch": 0.834319526627219, "grad_norm": 0.395107626914978, "learning_rate": 1.259232423793934e-05, "loss": 0.514, "step": 39339 }, { "epoch": 0.8343407350851519, "grad_norm": 0.48359227180480957, "learning_rate": 1.259200214172223e-05, "loss": 0.4679, "step": 39340 }, { "epoch": 0.834361943543085, "grad_norm": 0.41086912155151367, "learning_rate": 1.2591680042622315e-05, "loss": 0.4591, "step": 39341 }, { "epoch": 0.834383152001018, "grad_norm": 0.3772963881492615, "learning_rate": 1.2591357940639961e-05, "loss": 0.5165, "step": 39342 }, { "epoch": 0.834404360458951, "grad_norm": 0.37042897939682007, "learning_rate": 1.2591035835775518e-05, "loss": 0.5225, "step": 39343 }, { "epoch": 0.834425568916884, "grad_norm": 0.45266032218933105, "learning_rate": 1.2590713728029347e-05, "loss": 0.5138, "step": 39344 }, { "epoch": 0.8344467773748171, "grad_norm": 0.5937135815620422, "learning_rate": 1.259039161740181e-05, "loss": 0.5283, "step": 39345 }, { "epoch": 0.8344679858327501, "grad_norm": 0.3582620322704315, "learning_rate": 1.2590069503893258e-05, "loss": 0.4026, "step": 39346 }, { "epoch": 0.8344891942906831, "grad_norm": 0.3305930197238922, "learning_rate": 1.2589747387504056e-05, "loss": 0.4245, "step": 39347 }, { "epoch": 0.8345104027486161, "grad_norm": 0.4148266613483429, "learning_rate": 1.2589425268234559e-05, "loss": 0.4505, "step": 39348 }, { "epoch": 0.8345316112065492, "grad_norm": 0.4060206413269043, "learning_rate": 1.2589103146085124e-05, "loss": 0.4335, "step": 39349 }, { "epoch": 0.8345528196644822, "grad_norm": 0.411769300699234, "learning_rate": 1.2588781021056111e-05, "loss": 0.4909, "step": 39350 }, { "epoch": 0.8345740281224152, "grad_norm": 0.36414191126823425, "learning_rate": 1.258845889314788e-05, "loss": 0.5047, "step": 39351 }, { "epoch": 0.8345952365803483, "grad_norm": 0.3528663218021393, "learning_rate": 1.2588136762360787e-05, "loss": 0.5001, "step": 39352 }, { "epoch": 0.8346164450382813, "grad_norm": 0.3613836467266083, "learning_rate": 1.2587814628695192e-05, "loss": 0.5559, "step": 39353 }, { "epoch": 0.8346376534962143, "grad_norm": 0.4247600734233856, "learning_rate": 1.2587492492151453e-05, "loss": 0.4686, "step": 39354 }, { "epoch": 0.8346588619541473, "grad_norm": 0.3684350252151489, "learning_rate": 1.2587170352729923e-05, "loss": 0.4614, "step": 39355 }, { "epoch": 0.8346800704120804, "grad_norm": 0.5095522999763489, "learning_rate": 1.2586848210430964e-05, "loss": 0.3964, "step": 39356 }, { "epoch": 0.8347012788700133, "grad_norm": 0.43455854058265686, "learning_rate": 1.258652606525494e-05, "loss": 0.5311, "step": 39357 }, { "epoch": 0.8347224873279464, "grad_norm": 0.32751935720443726, "learning_rate": 1.25862039172022e-05, "loss": 0.4157, "step": 39358 }, { "epoch": 0.8347436957858794, "grad_norm": 0.4465184509754181, "learning_rate": 1.258588176627311e-05, "loss": 0.4639, "step": 39359 }, { "epoch": 0.8347649042438124, "grad_norm": 0.39330601692199707, "learning_rate": 1.2585559612468023e-05, "loss": 0.514, "step": 39360 }, { "epoch": 0.8347861127017454, "grad_norm": 0.35588645935058594, "learning_rate": 1.2585237455787298e-05, "loss": 0.4455, "step": 39361 }, { "epoch": 0.8348073211596785, "grad_norm": 0.3360779881477356, "learning_rate": 1.2584915296231294e-05, "loss": 0.4492, "step": 39362 }, { "epoch": 0.8348285296176116, "grad_norm": 0.3267475664615631, "learning_rate": 1.2584593133800374e-05, "loss": 0.4282, "step": 39363 }, { "epoch": 0.8348497380755445, "grad_norm": 0.373382568359375, "learning_rate": 1.2584270968494887e-05, "loss": 0.4649, "step": 39364 }, { "epoch": 0.8348709465334776, "grad_norm": 0.3457968235015869, "learning_rate": 1.2583948800315197e-05, "loss": 0.4457, "step": 39365 }, { "epoch": 0.8348921549914106, "grad_norm": 0.3519159257411957, "learning_rate": 1.2583626629261665e-05, "loss": 0.5333, "step": 39366 }, { "epoch": 0.8349133634493436, "grad_norm": 0.36808642745018005, "learning_rate": 1.2583304455334644e-05, "loss": 0.5285, "step": 39367 }, { "epoch": 0.8349345719072766, "grad_norm": 0.35877445340156555, "learning_rate": 1.2582982278534493e-05, "loss": 0.5448, "step": 39368 }, { "epoch": 0.8349557803652097, "grad_norm": 0.4030740559101105, "learning_rate": 1.2582660098861572e-05, "loss": 0.5101, "step": 39369 }, { "epoch": 0.8349769888231426, "grad_norm": 0.3448638916015625, "learning_rate": 1.258233791631624e-05, "loss": 0.4957, "step": 39370 }, { "epoch": 0.8349981972810757, "grad_norm": 0.35301846265792847, "learning_rate": 1.2582015730898855e-05, "loss": 0.3987, "step": 39371 }, { "epoch": 0.8350194057390087, "grad_norm": 0.31418994069099426, "learning_rate": 1.2581693542609771e-05, "loss": 0.4064, "step": 39372 }, { "epoch": 0.8350406141969418, "grad_norm": 0.3781346082687378, "learning_rate": 1.2581371351449354e-05, "loss": 0.5345, "step": 39373 }, { "epoch": 0.8350618226548747, "grad_norm": 0.3730647563934326, "learning_rate": 1.2581049157417957e-05, "loss": 0.478, "step": 39374 }, { "epoch": 0.8350830311128078, "grad_norm": 0.3835761547088623, "learning_rate": 1.2580726960515937e-05, "loss": 0.4532, "step": 39375 }, { "epoch": 0.8351042395707408, "grad_norm": 0.3731130063533783, "learning_rate": 1.2580404760743656e-05, "loss": 0.5069, "step": 39376 }, { "epoch": 0.8351254480286738, "grad_norm": 0.3519035577774048, "learning_rate": 1.2580082558101472e-05, "loss": 0.4335, "step": 39377 }, { "epoch": 0.8351466564866069, "grad_norm": 0.3750027120113373, "learning_rate": 1.2579760352589743e-05, "loss": 0.5101, "step": 39378 }, { "epoch": 0.8351678649445399, "grad_norm": 0.3601420223712921, "learning_rate": 1.2579438144208827e-05, "loss": 0.4413, "step": 39379 }, { "epoch": 0.835189073402473, "grad_norm": 0.3884359896183014, "learning_rate": 1.2579115932959082e-05, "loss": 0.4828, "step": 39380 }, { "epoch": 0.8352102818604059, "grad_norm": 0.3637729585170746, "learning_rate": 1.2578793718840865e-05, "loss": 0.4667, "step": 39381 }, { "epoch": 0.835231490318339, "grad_norm": 0.3198978006839752, "learning_rate": 1.2578471501854538e-05, "loss": 0.5288, "step": 39382 }, { "epoch": 0.835252698776272, "grad_norm": 0.5764915943145752, "learning_rate": 1.2578149282000458e-05, "loss": 0.4715, "step": 39383 }, { "epoch": 0.835273907234205, "grad_norm": 0.34414777159690857, "learning_rate": 1.257782705927898e-05, "loss": 0.506, "step": 39384 }, { "epoch": 0.835295115692138, "grad_norm": 0.404009610414505, "learning_rate": 1.2577504833690468e-05, "loss": 0.5721, "step": 39385 }, { "epoch": 0.8353163241500711, "grad_norm": 0.32215026021003723, "learning_rate": 1.2577182605235274e-05, "loss": 0.3778, "step": 39386 }, { "epoch": 0.835337532608004, "grad_norm": 0.3845408260822296, "learning_rate": 1.2576860373913764e-05, "loss": 0.5084, "step": 39387 }, { "epoch": 0.8353587410659371, "grad_norm": 0.5319982171058655, "learning_rate": 1.2576538139726289e-05, "loss": 0.5607, "step": 39388 }, { "epoch": 0.8353799495238701, "grad_norm": 0.3571482002735138, "learning_rate": 1.2576215902673214e-05, "loss": 0.513, "step": 39389 }, { "epoch": 0.8354011579818031, "grad_norm": 0.3452117443084717, "learning_rate": 1.257589366275489e-05, "loss": 0.4699, "step": 39390 }, { "epoch": 0.8354223664397362, "grad_norm": 0.3624784052371979, "learning_rate": 1.2575571419971684e-05, "loss": 0.4072, "step": 39391 }, { "epoch": 0.8354435748976692, "grad_norm": 0.3714611530303955, "learning_rate": 1.2575249174323946e-05, "loss": 0.5171, "step": 39392 }, { "epoch": 0.8354647833556023, "grad_norm": 0.3397080898284912, "learning_rate": 1.2574926925812041e-05, "loss": 0.4998, "step": 39393 }, { "epoch": 0.8354859918135352, "grad_norm": 0.3732486069202423, "learning_rate": 1.257460467443632e-05, "loss": 0.5013, "step": 39394 }, { "epoch": 0.8355072002714683, "grad_norm": 0.3790573477745056, "learning_rate": 1.257428242019715e-05, "loss": 0.4935, "step": 39395 }, { "epoch": 0.8355284087294013, "grad_norm": 0.405955046415329, "learning_rate": 1.2573960163094887e-05, "loss": 0.5073, "step": 39396 }, { "epoch": 0.8355496171873343, "grad_norm": 0.40284740924835205, "learning_rate": 1.2573637903129887e-05, "loss": 0.4706, "step": 39397 }, { "epoch": 0.8355708256452673, "grad_norm": 0.3481241762638092, "learning_rate": 1.2573315640302506e-05, "loss": 0.4324, "step": 39398 }, { "epoch": 0.8355920341032004, "grad_norm": 0.4127824604511261, "learning_rate": 1.257299337461311e-05, "loss": 0.5348, "step": 39399 }, { "epoch": 0.8356132425611333, "grad_norm": 0.379720538854599, "learning_rate": 1.2572671106062048e-05, "loss": 0.4938, "step": 39400 }, { "epoch": 0.8356344510190664, "grad_norm": 0.4890899956226349, "learning_rate": 1.2572348834649686e-05, "loss": 0.4782, "step": 39401 }, { "epoch": 0.8356556594769994, "grad_norm": 0.40465179085731506, "learning_rate": 1.257202656037638e-05, "loss": 0.5716, "step": 39402 }, { "epoch": 0.8356768679349325, "grad_norm": 0.38411906361579895, "learning_rate": 1.2571704283242493e-05, "loss": 0.4918, "step": 39403 }, { "epoch": 0.8356980763928655, "grad_norm": 0.3264049291610718, "learning_rate": 1.2571382003248372e-05, "loss": 0.5025, "step": 39404 }, { "epoch": 0.8357192848507985, "grad_norm": 0.33216822147369385, "learning_rate": 1.2571059720394385e-05, "loss": 0.5058, "step": 39405 }, { "epoch": 0.8357404933087316, "grad_norm": 0.4123755991458893, "learning_rate": 1.2570737434680886e-05, "loss": 0.46, "step": 39406 }, { "epoch": 0.8357617017666645, "grad_norm": 0.34564507007598877, "learning_rate": 1.2570415146108237e-05, "loss": 0.5073, "step": 39407 }, { "epoch": 0.8357829102245976, "grad_norm": 0.34768733382225037, "learning_rate": 1.2570092854676794e-05, "loss": 0.5528, "step": 39408 }, { "epoch": 0.8358041186825306, "grad_norm": 0.3894568383693695, "learning_rate": 1.2569770560386916e-05, "loss": 0.4425, "step": 39409 }, { "epoch": 0.8358253271404636, "grad_norm": 0.35076868534088135, "learning_rate": 1.2569448263238963e-05, "loss": 0.4267, "step": 39410 }, { "epoch": 0.8358465355983966, "grad_norm": 0.39125677943229675, "learning_rate": 1.2569125963233292e-05, "loss": 0.4292, "step": 39411 }, { "epoch": 0.8358677440563297, "grad_norm": 0.34427115321159363, "learning_rate": 1.256880366037026e-05, "loss": 0.4918, "step": 39412 }, { "epoch": 0.8358889525142627, "grad_norm": 0.34252893924713135, "learning_rate": 1.2568481354650226e-05, "loss": 0.5352, "step": 39413 }, { "epoch": 0.8359101609721957, "grad_norm": 0.43173500895500183, "learning_rate": 1.2568159046073551e-05, "loss": 0.4344, "step": 39414 }, { "epoch": 0.8359313694301287, "grad_norm": 0.4484478831291199, "learning_rate": 1.256783673464059e-05, "loss": 0.6067, "step": 39415 }, { "epoch": 0.8359525778880618, "grad_norm": 0.4583186209201813, "learning_rate": 1.2567514420351706e-05, "loss": 0.4802, "step": 39416 }, { "epoch": 0.8359737863459947, "grad_norm": 0.3685190677642822, "learning_rate": 1.2567192103207253e-05, "loss": 0.5691, "step": 39417 }, { "epoch": 0.8359949948039278, "grad_norm": 0.3972603380680084, "learning_rate": 1.2566869783207592e-05, "loss": 0.5058, "step": 39418 }, { "epoch": 0.8360162032618609, "grad_norm": 0.33066949248313904, "learning_rate": 1.256654746035308e-05, "loss": 0.4434, "step": 39419 }, { "epoch": 0.8360374117197938, "grad_norm": 0.45196810364723206, "learning_rate": 1.2566225134644076e-05, "loss": 0.4756, "step": 39420 }, { "epoch": 0.8360586201777269, "grad_norm": 0.40804049372673035, "learning_rate": 1.2565902806080939e-05, "loss": 0.5893, "step": 39421 }, { "epoch": 0.8360798286356599, "grad_norm": 0.3521098494529724, "learning_rate": 1.256558047466403e-05, "loss": 0.5002, "step": 39422 }, { "epoch": 0.836101037093593, "grad_norm": 0.38388893008232117, "learning_rate": 1.2565258140393698e-05, "loss": 0.4852, "step": 39423 }, { "epoch": 0.8361222455515259, "grad_norm": 0.39237433671951294, "learning_rate": 1.2564935803270315e-05, "loss": 0.5169, "step": 39424 }, { "epoch": 0.836143454009459, "grad_norm": 0.3728826940059662, "learning_rate": 1.256461346329423e-05, "loss": 0.5504, "step": 39425 }, { "epoch": 0.836164662467392, "grad_norm": 0.3674871325492859, "learning_rate": 1.2564291120465801e-05, "loss": 0.4689, "step": 39426 }, { "epoch": 0.836185870925325, "grad_norm": 0.36779671907424927, "learning_rate": 1.2563968774785395e-05, "loss": 0.4249, "step": 39427 }, { "epoch": 0.836207079383258, "grad_norm": 0.3582030236721039, "learning_rate": 1.2563646426253364e-05, "loss": 0.4256, "step": 39428 }, { "epoch": 0.8362282878411911, "grad_norm": 0.36321353912353516, "learning_rate": 1.2563324074870065e-05, "loss": 0.5069, "step": 39429 }, { "epoch": 0.836249496299124, "grad_norm": 0.38161513209342957, "learning_rate": 1.2563001720635863e-05, "loss": 0.4234, "step": 39430 }, { "epoch": 0.8362707047570571, "grad_norm": 0.3940739631652832, "learning_rate": 1.2562679363551112e-05, "loss": 0.4456, "step": 39431 }, { "epoch": 0.8362919132149902, "grad_norm": 0.35297298431396484, "learning_rate": 1.2562357003616168e-05, "loss": 0.4451, "step": 39432 }, { "epoch": 0.8363131216729232, "grad_norm": 0.3518902063369751, "learning_rate": 1.2562034640831393e-05, "loss": 0.5258, "step": 39433 }, { "epoch": 0.8363343301308562, "grad_norm": 0.4049692451953888, "learning_rate": 1.2561712275197149e-05, "loss": 0.539, "step": 39434 }, { "epoch": 0.8363555385887892, "grad_norm": 0.3407476842403412, "learning_rate": 1.2561389906713787e-05, "loss": 0.4993, "step": 39435 }, { "epoch": 0.8363767470467223, "grad_norm": 0.3810403347015381, "learning_rate": 1.2561067535381674e-05, "loss": 0.4585, "step": 39436 }, { "epoch": 0.8363979555046552, "grad_norm": 0.3721654713153839, "learning_rate": 1.256074516120116e-05, "loss": 0.4639, "step": 39437 }, { "epoch": 0.8364191639625883, "grad_norm": 0.3325769305229187, "learning_rate": 1.256042278417261e-05, "loss": 0.5246, "step": 39438 }, { "epoch": 0.8364403724205213, "grad_norm": 0.38349103927612305, "learning_rate": 1.2560100404296378e-05, "loss": 0.4692, "step": 39439 }, { "epoch": 0.8364615808784543, "grad_norm": 0.36236971616744995, "learning_rate": 1.2559778021572824e-05, "loss": 0.5429, "step": 39440 }, { "epoch": 0.8364827893363873, "grad_norm": 0.39984947443008423, "learning_rate": 1.255945563600231e-05, "loss": 0.493, "step": 39441 }, { "epoch": 0.8365039977943204, "grad_norm": 0.3879804313182831, "learning_rate": 1.2559133247585191e-05, "loss": 0.4737, "step": 39442 }, { "epoch": 0.8365252062522534, "grad_norm": 0.37718072533607483, "learning_rate": 1.2558810856321824e-05, "loss": 0.4837, "step": 39443 }, { "epoch": 0.8365464147101864, "grad_norm": 0.5170369148254395, "learning_rate": 1.2558488462212573e-05, "loss": 0.462, "step": 39444 }, { "epoch": 0.8365676231681195, "grad_norm": 0.3692392408847809, "learning_rate": 1.2558166065257791e-05, "loss": 0.5218, "step": 39445 }, { "epoch": 0.8365888316260525, "grad_norm": 0.3265421688556671, "learning_rate": 1.2557843665457838e-05, "loss": 0.4464, "step": 39446 }, { "epoch": 0.8366100400839855, "grad_norm": 0.3633948862552643, "learning_rate": 1.2557521262813075e-05, "loss": 0.5569, "step": 39447 }, { "epoch": 0.8366312485419185, "grad_norm": 0.42096763849258423, "learning_rate": 1.2557198857323862e-05, "loss": 0.4473, "step": 39448 }, { "epoch": 0.8366524569998516, "grad_norm": 0.353722482919693, "learning_rate": 1.255687644899055e-05, "loss": 0.492, "step": 39449 }, { "epoch": 0.8366736654577845, "grad_norm": 0.40535348653793335, "learning_rate": 1.2556554037813504e-05, "loss": 0.4907, "step": 39450 }, { "epoch": 0.8366948739157176, "grad_norm": 0.41616979241371155, "learning_rate": 1.255623162379308e-05, "loss": 0.4666, "step": 39451 }, { "epoch": 0.8367160823736506, "grad_norm": 0.42105668783187866, "learning_rate": 1.2555909206929637e-05, "loss": 0.5219, "step": 39452 }, { "epoch": 0.8367372908315837, "grad_norm": 0.34147533774375916, "learning_rate": 1.2555586787223536e-05, "loss": 0.4464, "step": 39453 }, { "epoch": 0.8367584992895166, "grad_norm": 0.41439253091812134, "learning_rate": 1.2555264364675132e-05, "loss": 0.5735, "step": 39454 }, { "epoch": 0.8367797077474497, "grad_norm": 0.3389910161495209, "learning_rate": 1.2554941939284789e-05, "loss": 0.4844, "step": 39455 }, { "epoch": 0.8368009162053827, "grad_norm": 0.3489186763763428, "learning_rate": 1.2554619511052857e-05, "loss": 0.501, "step": 39456 }, { "epoch": 0.8368221246633157, "grad_norm": 0.3508204519748688, "learning_rate": 1.25542970799797e-05, "loss": 0.4504, "step": 39457 }, { "epoch": 0.8368433331212487, "grad_norm": 0.33248284459114075, "learning_rate": 1.2553974646065676e-05, "loss": 0.4789, "step": 39458 }, { "epoch": 0.8368645415791818, "grad_norm": 0.5006144046783447, "learning_rate": 1.2553652209311147e-05, "loss": 0.4719, "step": 39459 }, { "epoch": 0.8368857500371149, "grad_norm": 0.36775124073028564, "learning_rate": 1.2553329769716464e-05, "loss": 0.4438, "step": 39460 }, { "epoch": 0.8369069584950478, "grad_norm": 0.36316975951194763, "learning_rate": 1.2553007327281992e-05, "loss": 0.5032, "step": 39461 }, { "epoch": 0.8369281669529809, "grad_norm": 1.2119351625442505, "learning_rate": 1.2552684882008088e-05, "loss": 0.5079, "step": 39462 }, { "epoch": 0.8369493754109139, "grad_norm": 0.3731675148010254, "learning_rate": 1.2552362433895109e-05, "loss": 0.6038, "step": 39463 }, { "epoch": 0.8369705838688469, "grad_norm": 1.0503443479537964, "learning_rate": 1.2552039982943417e-05, "loss": 0.5064, "step": 39464 }, { "epoch": 0.8369917923267799, "grad_norm": 0.3714178800582886, "learning_rate": 1.2551717529153365e-05, "loss": 0.4778, "step": 39465 }, { "epoch": 0.837013000784713, "grad_norm": 0.34947121143341064, "learning_rate": 1.2551395072525313e-05, "loss": 0.5348, "step": 39466 }, { "epoch": 0.8370342092426459, "grad_norm": 0.4011813998222351, "learning_rate": 1.2551072613059627e-05, "loss": 0.5281, "step": 39467 }, { "epoch": 0.837055417700579, "grad_norm": 0.35782963037490845, "learning_rate": 1.2550750150756657e-05, "loss": 0.4928, "step": 39468 }, { "epoch": 0.837076626158512, "grad_norm": 0.35763904452323914, "learning_rate": 1.2550427685616767e-05, "loss": 0.5197, "step": 39469 }, { "epoch": 0.837097834616445, "grad_norm": 0.38121578097343445, "learning_rate": 1.2550105217640312e-05, "loss": 0.5198, "step": 39470 }, { "epoch": 0.837119043074378, "grad_norm": 0.36304062604904175, "learning_rate": 1.254978274682765e-05, "loss": 0.4741, "step": 39471 }, { "epoch": 0.8371402515323111, "grad_norm": 0.3352384567260742, "learning_rate": 1.2549460273179143e-05, "loss": 0.4897, "step": 39472 }, { "epoch": 0.8371614599902442, "grad_norm": 0.34989696741104126, "learning_rate": 1.2549137796695152e-05, "loss": 0.4862, "step": 39473 }, { "epoch": 0.8371826684481771, "grad_norm": 0.3863292634487152, "learning_rate": 1.2548815317376029e-05, "loss": 0.4558, "step": 39474 }, { "epoch": 0.8372038769061102, "grad_norm": 0.32512134313583374, "learning_rate": 1.2548492835222137e-05, "loss": 0.3833, "step": 39475 }, { "epoch": 0.8372250853640432, "grad_norm": 0.38396361470222473, "learning_rate": 1.2548170350233833e-05, "loss": 0.4944, "step": 39476 }, { "epoch": 0.8372462938219762, "grad_norm": 0.35572147369384766, "learning_rate": 1.2547847862411473e-05, "loss": 0.5198, "step": 39477 }, { "epoch": 0.8372675022799092, "grad_norm": 0.37051743268966675, "learning_rate": 1.2547525371755422e-05, "loss": 0.4418, "step": 39478 }, { "epoch": 0.8372887107378423, "grad_norm": 0.3651423156261444, "learning_rate": 1.2547202878266037e-05, "loss": 0.4913, "step": 39479 }, { "epoch": 0.8373099191957752, "grad_norm": 0.38039541244506836, "learning_rate": 1.2546880381943672e-05, "loss": 0.4288, "step": 39480 }, { "epoch": 0.8373311276537083, "grad_norm": 0.3389555513858795, "learning_rate": 1.254655788278869e-05, "loss": 0.4653, "step": 39481 }, { "epoch": 0.8373523361116413, "grad_norm": 0.35632145404815674, "learning_rate": 1.2546235380801445e-05, "loss": 0.409, "step": 39482 }, { "epoch": 0.8373735445695744, "grad_norm": 0.4858859181404114, "learning_rate": 1.2545912875982306e-05, "loss": 0.5251, "step": 39483 }, { "epoch": 0.8373947530275073, "grad_norm": 0.3578038215637207, "learning_rate": 1.254559036833162e-05, "loss": 0.4447, "step": 39484 }, { "epoch": 0.8374159614854404, "grad_norm": 0.35269734263420105, "learning_rate": 1.2545267857849753e-05, "loss": 0.4535, "step": 39485 }, { "epoch": 0.8374371699433735, "grad_norm": 0.37823814153671265, "learning_rate": 1.2544945344537059e-05, "loss": 0.4845, "step": 39486 }, { "epoch": 0.8374583784013064, "grad_norm": 0.37100327014923096, "learning_rate": 1.25446228283939e-05, "loss": 0.4616, "step": 39487 }, { "epoch": 0.8374795868592395, "grad_norm": 0.34564894437789917, "learning_rate": 1.2544300309420632e-05, "loss": 0.4924, "step": 39488 }, { "epoch": 0.8375007953171725, "grad_norm": 0.3622574806213379, "learning_rate": 1.2543977787617619e-05, "loss": 0.4105, "step": 39489 }, { "epoch": 0.8375220037751056, "grad_norm": 0.3631405830383301, "learning_rate": 1.2543655262985212e-05, "loss": 0.4873, "step": 39490 }, { "epoch": 0.8375432122330385, "grad_norm": 0.3817116618156433, "learning_rate": 1.2543332735523775e-05, "loss": 0.4576, "step": 39491 }, { "epoch": 0.8375644206909716, "grad_norm": 0.3806895613670349, "learning_rate": 1.2543010205233665e-05, "loss": 0.4557, "step": 39492 }, { "epoch": 0.8375856291489046, "grad_norm": 0.36392733454704285, "learning_rate": 1.2542687672115243e-05, "loss": 0.4194, "step": 39493 }, { "epoch": 0.8376068376068376, "grad_norm": 0.3926752507686615, "learning_rate": 1.2542365136168864e-05, "loss": 0.4529, "step": 39494 }, { "epoch": 0.8376280460647706, "grad_norm": 0.33357131481170654, "learning_rate": 1.254204259739489e-05, "loss": 0.4829, "step": 39495 }, { "epoch": 0.8376492545227037, "grad_norm": 0.35903453826904297, "learning_rate": 1.2541720055793678e-05, "loss": 0.5366, "step": 39496 }, { "epoch": 0.8376704629806366, "grad_norm": 0.3328452408313751, "learning_rate": 1.2541397511365584e-05, "loss": 0.5236, "step": 39497 }, { "epoch": 0.8376916714385697, "grad_norm": 0.4083012342453003, "learning_rate": 1.2541074964110976e-05, "loss": 0.5234, "step": 39498 }, { "epoch": 0.8377128798965027, "grad_norm": 0.3608327805995941, "learning_rate": 1.2540752414030202e-05, "loss": 0.537, "step": 39499 }, { "epoch": 0.8377340883544357, "grad_norm": 0.354667067527771, "learning_rate": 1.2540429861123627e-05, "loss": 0.4445, "step": 39500 }, { "epoch": 0.8377552968123688, "grad_norm": 0.34975913166999817, "learning_rate": 1.2540107305391606e-05, "loss": 0.4513, "step": 39501 }, { "epoch": 0.8377765052703018, "grad_norm": 0.4058026969432831, "learning_rate": 1.25397847468345e-05, "loss": 0.46, "step": 39502 }, { "epoch": 0.8377977137282349, "grad_norm": 0.3073616027832031, "learning_rate": 1.2539462185452668e-05, "loss": 0.4494, "step": 39503 }, { "epoch": 0.8378189221861678, "grad_norm": 0.349182665348053, "learning_rate": 1.253913962124647e-05, "loss": 0.5719, "step": 39504 }, { "epoch": 0.8378401306441009, "grad_norm": 0.37901246547698975, "learning_rate": 1.2538817054216259e-05, "loss": 0.4514, "step": 39505 }, { "epoch": 0.8378613391020339, "grad_norm": 0.342724084854126, "learning_rate": 1.2538494484362402e-05, "loss": 0.494, "step": 39506 }, { "epoch": 0.8378825475599669, "grad_norm": 0.4173021912574768, "learning_rate": 1.2538171911685253e-05, "loss": 0.5336, "step": 39507 }, { "epoch": 0.8379037560178999, "grad_norm": 0.32294848561286926, "learning_rate": 1.2537849336185169e-05, "loss": 0.438, "step": 39508 }, { "epoch": 0.837924964475833, "grad_norm": 0.34309712052345276, "learning_rate": 1.253752675786251e-05, "loss": 0.4283, "step": 39509 }, { "epoch": 0.8379461729337659, "grad_norm": 0.40744104981422424, "learning_rate": 1.253720417671764e-05, "loss": 0.5216, "step": 39510 }, { "epoch": 0.837967381391699, "grad_norm": 0.3735417425632477, "learning_rate": 1.2536881592750909e-05, "loss": 0.4776, "step": 39511 }, { "epoch": 0.837988589849632, "grad_norm": 0.3311539590358734, "learning_rate": 1.2536559005962686e-05, "loss": 0.4166, "step": 39512 }, { "epoch": 0.8380097983075651, "grad_norm": 0.3757282495498657, "learning_rate": 1.253623641635332e-05, "loss": 0.4098, "step": 39513 }, { "epoch": 0.8380310067654981, "grad_norm": 0.3362991511821747, "learning_rate": 1.2535913823923173e-05, "loss": 0.4056, "step": 39514 }, { "epoch": 0.8380522152234311, "grad_norm": 0.3589981198310852, "learning_rate": 1.253559122867261e-05, "loss": 0.4274, "step": 39515 }, { "epoch": 0.8380734236813642, "grad_norm": 0.41411834955215454, "learning_rate": 1.253526863060198e-05, "loss": 0.504, "step": 39516 }, { "epoch": 0.8380946321392971, "grad_norm": 0.36249998211860657, "learning_rate": 1.2534946029711646e-05, "loss": 0.496, "step": 39517 }, { "epoch": 0.8381158405972302, "grad_norm": 0.3835707902908325, "learning_rate": 1.2534623426001968e-05, "loss": 0.5235, "step": 39518 }, { "epoch": 0.8381370490551632, "grad_norm": 0.4263286590576172, "learning_rate": 1.2534300819473302e-05, "loss": 0.5316, "step": 39519 }, { "epoch": 0.8381582575130962, "grad_norm": 0.35197925567626953, "learning_rate": 1.2533978210126012e-05, "loss": 0.5236, "step": 39520 }, { "epoch": 0.8381794659710292, "grad_norm": 0.33619433641433716, "learning_rate": 1.2533655597960453e-05, "loss": 0.4681, "step": 39521 }, { "epoch": 0.8382006744289623, "grad_norm": 0.4195786714553833, "learning_rate": 1.253333298297698e-05, "loss": 0.5568, "step": 39522 }, { "epoch": 0.8382218828868953, "grad_norm": 0.3307880163192749, "learning_rate": 1.2533010365175962e-05, "loss": 0.454, "step": 39523 }, { "epoch": 0.8382430913448283, "grad_norm": 0.4115419387817383, "learning_rate": 1.253268774455775e-05, "loss": 0.5392, "step": 39524 }, { "epoch": 0.8382642998027613, "grad_norm": 0.33645927906036377, "learning_rate": 1.2532365121122703e-05, "loss": 0.4689, "step": 39525 }, { "epoch": 0.8382855082606944, "grad_norm": 0.4385969042778015, "learning_rate": 1.2532042494871184e-05, "loss": 0.4296, "step": 39526 }, { "epoch": 0.8383067167186274, "grad_norm": 0.4354969263076782, "learning_rate": 1.2531719865803549e-05, "loss": 0.5174, "step": 39527 }, { "epoch": 0.8383279251765604, "grad_norm": 0.35212376713752747, "learning_rate": 1.2531397233920153e-05, "loss": 0.5128, "step": 39528 }, { "epoch": 0.8383491336344935, "grad_norm": 0.3323356807231903, "learning_rate": 1.2531074599221363e-05, "loss": 0.4296, "step": 39529 }, { "epoch": 0.8383703420924264, "grad_norm": 0.3290892541408539, "learning_rate": 1.2530751961707532e-05, "loss": 0.4355, "step": 39530 }, { "epoch": 0.8383915505503595, "grad_norm": 0.3674180209636688, "learning_rate": 1.253042932137902e-05, "loss": 0.5391, "step": 39531 }, { "epoch": 0.8384127590082925, "grad_norm": 0.3125770688056946, "learning_rate": 1.253010667823619e-05, "loss": 0.4177, "step": 39532 }, { "epoch": 0.8384339674662256, "grad_norm": 0.41809552907943726, "learning_rate": 1.2529784032279393e-05, "loss": 0.4857, "step": 39533 }, { "epoch": 0.8384551759241585, "grad_norm": 0.38780274987220764, "learning_rate": 1.2529461383508996e-05, "loss": 0.5097, "step": 39534 }, { "epoch": 0.8384763843820916, "grad_norm": 0.3862222731113434, "learning_rate": 1.2529138731925351e-05, "loss": 0.5159, "step": 39535 }, { "epoch": 0.8384975928400246, "grad_norm": 0.32627245783805847, "learning_rate": 1.252881607752882e-05, "loss": 0.3866, "step": 39536 }, { "epoch": 0.8385188012979576, "grad_norm": 0.37979063391685486, "learning_rate": 1.2528493420319766e-05, "loss": 0.4791, "step": 39537 }, { "epoch": 0.8385400097558906, "grad_norm": 0.40517884492874146, "learning_rate": 1.2528170760298541e-05, "loss": 0.4621, "step": 39538 }, { "epoch": 0.8385612182138237, "grad_norm": 0.37653523683547974, "learning_rate": 1.2527848097465505e-05, "loss": 0.4603, "step": 39539 }, { "epoch": 0.8385824266717566, "grad_norm": 0.3614198863506317, "learning_rate": 1.252752543182102e-05, "loss": 0.4682, "step": 39540 }, { "epoch": 0.8386036351296897, "grad_norm": 0.33013203740119934, "learning_rate": 1.2527202763365445e-05, "loss": 0.4579, "step": 39541 }, { "epoch": 0.8386248435876228, "grad_norm": 0.33446571230888367, "learning_rate": 1.252688009209913e-05, "loss": 0.434, "step": 39542 }, { "epoch": 0.8386460520455558, "grad_norm": 0.3391929566860199, "learning_rate": 1.2526557418022447e-05, "loss": 0.4136, "step": 39543 }, { "epoch": 0.8386672605034888, "grad_norm": 0.335235595703125, "learning_rate": 1.2526234741135748e-05, "loss": 0.4303, "step": 39544 }, { "epoch": 0.8386884689614218, "grad_norm": 0.41067391633987427, "learning_rate": 1.2525912061439392e-05, "loss": 0.4432, "step": 39545 }, { "epoch": 0.8387096774193549, "grad_norm": 0.3207014501094818, "learning_rate": 1.2525589378933737e-05, "loss": 0.4801, "step": 39546 }, { "epoch": 0.8387308858772878, "grad_norm": 0.33555421233177185, "learning_rate": 1.2525266693619145e-05, "loss": 0.454, "step": 39547 }, { "epoch": 0.8387520943352209, "grad_norm": 0.36268728971481323, "learning_rate": 1.2524944005495971e-05, "loss": 0.4183, "step": 39548 }, { "epoch": 0.8387733027931539, "grad_norm": 0.34852778911590576, "learning_rate": 1.2524621314564582e-05, "loss": 0.4734, "step": 39549 }, { "epoch": 0.838794511251087, "grad_norm": 0.37150558829307556, "learning_rate": 1.2524298620825325e-05, "loss": 0.4712, "step": 39550 }, { "epoch": 0.8388157197090199, "grad_norm": 0.35430437326431274, "learning_rate": 1.2523975924278568e-05, "loss": 0.4421, "step": 39551 }, { "epoch": 0.838836928166953, "grad_norm": 0.4359830915927887, "learning_rate": 1.2523653224924668e-05, "loss": 0.5034, "step": 39552 }, { "epoch": 0.838858136624886, "grad_norm": 0.4352993071079254, "learning_rate": 1.2523330522763981e-05, "loss": 0.4839, "step": 39553 }, { "epoch": 0.838879345082819, "grad_norm": 0.31992003321647644, "learning_rate": 1.2523007817796866e-05, "loss": 0.4194, "step": 39554 }, { "epoch": 0.8389005535407521, "grad_norm": 0.4134684205055237, "learning_rate": 1.2522685110023686e-05, "loss": 0.5706, "step": 39555 }, { "epoch": 0.8389217619986851, "grad_norm": 0.3349858224391937, "learning_rate": 1.2522362399444798e-05, "loss": 0.3984, "step": 39556 }, { "epoch": 0.8389429704566181, "grad_norm": 0.3453086316585541, "learning_rate": 1.2522039686060561e-05, "loss": 0.433, "step": 39557 }, { "epoch": 0.8389641789145511, "grad_norm": 0.38331490755081177, "learning_rate": 1.2521716969871332e-05, "loss": 0.4952, "step": 39558 }, { "epoch": 0.8389853873724842, "grad_norm": 0.3627672493457794, "learning_rate": 1.252139425087747e-05, "loss": 0.4875, "step": 39559 }, { "epoch": 0.8390065958304171, "grad_norm": 0.4318982660770416, "learning_rate": 1.2521071529079339e-05, "loss": 0.5313, "step": 39560 }, { "epoch": 0.8390278042883502, "grad_norm": 0.3531823456287384, "learning_rate": 1.252074880447729e-05, "loss": 0.5058, "step": 39561 }, { "epoch": 0.8390490127462832, "grad_norm": 0.36745506525039673, "learning_rate": 1.2520426077071686e-05, "loss": 0.4754, "step": 39562 }, { "epoch": 0.8390702212042163, "grad_norm": 0.36295053362846375, "learning_rate": 1.252010334686289e-05, "loss": 0.4879, "step": 39563 }, { "epoch": 0.8390914296621492, "grad_norm": 0.34824338555336, "learning_rate": 1.2519780613851254e-05, "loss": 0.525, "step": 39564 }, { "epoch": 0.8391126381200823, "grad_norm": 0.3716152012348175, "learning_rate": 1.251945787803714e-05, "loss": 0.5662, "step": 39565 }, { "epoch": 0.8391338465780153, "grad_norm": 0.37310636043548584, "learning_rate": 1.2519135139420907e-05, "loss": 0.4645, "step": 39566 }, { "epoch": 0.8391550550359483, "grad_norm": 0.35535332560539246, "learning_rate": 1.2518812398002915e-05, "loss": 0.5209, "step": 39567 }, { "epoch": 0.8391762634938814, "grad_norm": 11.260985374450684, "learning_rate": 1.251848965378352e-05, "loss": 0.5093, "step": 39568 }, { "epoch": 0.8391974719518144, "grad_norm": 0.38592609763145447, "learning_rate": 1.2518166906763085e-05, "loss": 0.4949, "step": 39569 }, { "epoch": 0.8392186804097475, "grad_norm": 0.3369855284690857, "learning_rate": 1.2517844156941964e-05, "loss": 0.4663, "step": 39570 }, { "epoch": 0.8392398888676804, "grad_norm": 0.3760256767272949, "learning_rate": 1.251752140432052e-05, "loss": 0.637, "step": 39571 }, { "epoch": 0.8392610973256135, "grad_norm": 0.354432076215744, "learning_rate": 1.2517198648899112e-05, "loss": 0.49, "step": 39572 }, { "epoch": 0.8392823057835465, "grad_norm": 0.3679298460483551, "learning_rate": 1.2516875890678096e-05, "loss": 0.5099, "step": 39573 }, { "epoch": 0.8393035142414795, "grad_norm": 0.380816251039505, "learning_rate": 1.2516553129657831e-05, "loss": 0.5232, "step": 39574 }, { "epoch": 0.8393247226994125, "grad_norm": 0.4464423656463623, "learning_rate": 1.2516230365838679e-05, "loss": 0.4252, "step": 39575 }, { "epoch": 0.8393459311573456, "grad_norm": 0.5595711469650269, "learning_rate": 1.2515907599220996e-05, "loss": 0.471, "step": 39576 }, { "epoch": 0.8393671396152785, "grad_norm": 0.38980183005332947, "learning_rate": 1.2515584829805145e-05, "loss": 0.4875, "step": 39577 }, { "epoch": 0.8393883480732116, "grad_norm": 0.3659915626049042, "learning_rate": 1.2515262057591479e-05, "loss": 0.5371, "step": 39578 }, { "epoch": 0.8394095565311446, "grad_norm": 0.3300512433052063, "learning_rate": 1.2514939282580365e-05, "loss": 0.4527, "step": 39579 }, { "epoch": 0.8394307649890776, "grad_norm": 0.3947840631008148, "learning_rate": 1.2514616504772152e-05, "loss": 0.4709, "step": 39580 }, { "epoch": 0.8394519734470106, "grad_norm": 0.3327481746673584, "learning_rate": 1.251429372416721e-05, "loss": 0.4579, "step": 39581 }, { "epoch": 0.8394731819049437, "grad_norm": 0.36759597063064575, "learning_rate": 1.251397094076589e-05, "loss": 0.5669, "step": 39582 }, { "epoch": 0.8394943903628768, "grad_norm": 0.32363274693489075, "learning_rate": 1.2513648154568552e-05, "loss": 0.4701, "step": 39583 }, { "epoch": 0.8395155988208097, "grad_norm": 0.3816424012184143, "learning_rate": 1.2513325365575557e-05, "loss": 0.5845, "step": 39584 }, { "epoch": 0.8395368072787428, "grad_norm": 0.3379261791706085, "learning_rate": 1.2513002573787265e-05, "loss": 0.4784, "step": 39585 }, { "epoch": 0.8395580157366758, "grad_norm": 0.3832685649394989, "learning_rate": 1.2512679779204032e-05, "loss": 0.5506, "step": 39586 }, { "epoch": 0.8395792241946088, "grad_norm": 0.4033265709877014, "learning_rate": 1.2512356981826217e-05, "loss": 0.516, "step": 39587 }, { "epoch": 0.8396004326525418, "grad_norm": 0.3589722514152527, "learning_rate": 1.2512034181654182e-05, "loss": 0.4759, "step": 39588 }, { "epoch": 0.8396216411104749, "grad_norm": 0.38970792293548584, "learning_rate": 1.2511711378688284e-05, "loss": 0.5708, "step": 39589 }, { "epoch": 0.8396428495684078, "grad_norm": 0.3372872769832611, "learning_rate": 1.2511388572928885e-05, "loss": 0.4972, "step": 39590 }, { "epoch": 0.8396640580263409, "grad_norm": 0.6557304859161377, "learning_rate": 1.2511065764376337e-05, "loss": 0.496, "step": 39591 }, { "epoch": 0.8396852664842739, "grad_norm": 0.3170608878135681, "learning_rate": 1.2510742953031005e-05, "loss": 0.4899, "step": 39592 }, { "epoch": 0.839706474942207, "grad_norm": 0.3940390944480896, "learning_rate": 1.2510420138893247e-05, "loss": 0.3891, "step": 39593 }, { "epoch": 0.8397276834001399, "grad_norm": 0.5377344489097595, "learning_rate": 1.2510097321963424e-05, "loss": 0.4436, "step": 39594 }, { "epoch": 0.839748891858073, "grad_norm": 0.399993360042572, "learning_rate": 1.250977450224189e-05, "loss": 0.4449, "step": 39595 }, { "epoch": 0.8397701003160061, "grad_norm": 0.35230037569999695, "learning_rate": 1.2509451679729006e-05, "loss": 0.4012, "step": 39596 }, { "epoch": 0.839791308773939, "grad_norm": 0.4952578544616699, "learning_rate": 1.2509128854425133e-05, "loss": 0.5412, "step": 39597 }, { "epoch": 0.8398125172318721, "grad_norm": 0.3528933525085449, "learning_rate": 1.2508806026330627e-05, "loss": 0.4136, "step": 39598 }, { "epoch": 0.8398337256898051, "grad_norm": 0.38329586386680603, "learning_rate": 1.2508483195445848e-05, "loss": 0.4589, "step": 39599 }, { "epoch": 0.8398549341477382, "grad_norm": 0.40320754051208496, "learning_rate": 1.250816036177116e-05, "loss": 0.4998, "step": 39600 }, { "epoch": 0.8398761426056711, "grad_norm": 0.4248812794685364, "learning_rate": 1.2507837525306913e-05, "loss": 0.4755, "step": 39601 }, { "epoch": 0.8398973510636042, "grad_norm": 0.38354331254959106, "learning_rate": 1.2507514686053477e-05, "loss": 0.5439, "step": 39602 }, { "epoch": 0.8399185595215372, "grad_norm": 0.43840229511260986, "learning_rate": 1.2507191844011201e-05, "loss": 0.5299, "step": 39603 }, { "epoch": 0.8399397679794702, "grad_norm": 0.34405481815338135, "learning_rate": 1.2506868999180449e-05, "loss": 0.4172, "step": 39604 }, { "epoch": 0.8399609764374032, "grad_norm": 0.41606876254081726, "learning_rate": 1.2506546151561578e-05, "loss": 0.5196, "step": 39605 }, { "epoch": 0.8399821848953363, "grad_norm": 0.3957231938838959, "learning_rate": 1.2506223301154946e-05, "loss": 0.5167, "step": 39606 }, { "epoch": 0.8400033933532692, "grad_norm": 0.3644977807998657, "learning_rate": 1.2505900447960916e-05, "loss": 0.4345, "step": 39607 }, { "epoch": 0.8400246018112023, "grad_norm": 0.3486063480377197, "learning_rate": 1.2505577591979848e-05, "loss": 0.4559, "step": 39608 }, { "epoch": 0.8400458102691354, "grad_norm": 0.36197522282600403, "learning_rate": 1.2505254733212098e-05, "loss": 0.4239, "step": 39609 }, { "epoch": 0.8400670187270683, "grad_norm": 0.33113011717796326, "learning_rate": 1.2504931871658024e-05, "loss": 0.4735, "step": 39610 }, { "epoch": 0.8400882271850014, "grad_norm": 0.3875053822994232, "learning_rate": 1.2504609007317987e-05, "loss": 0.4884, "step": 39611 }, { "epoch": 0.8401094356429344, "grad_norm": 0.3766266107559204, "learning_rate": 1.2504286140192343e-05, "loss": 0.4733, "step": 39612 }, { "epoch": 0.8401306441008675, "grad_norm": 0.41121771931648254, "learning_rate": 1.2503963270281454e-05, "loss": 0.5605, "step": 39613 }, { "epoch": 0.8401518525588004, "grad_norm": 0.343079149723053, "learning_rate": 1.2503640397585684e-05, "loss": 0.4852, "step": 39614 }, { "epoch": 0.8401730610167335, "grad_norm": 0.37642523646354675, "learning_rate": 1.250331752210538e-05, "loss": 0.459, "step": 39615 }, { "epoch": 0.8401942694746665, "grad_norm": 0.37770962715148926, "learning_rate": 1.2502994643840913e-05, "loss": 0.5295, "step": 39616 }, { "epoch": 0.8402154779325995, "grad_norm": 0.4468597173690796, "learning_rate": 1.2502671762792638e-05, "loss": 0.5298, "step": 39617 }, { "epoch": 0.8402366863905325, "grad_norm": 0.32414889335632324, "learning_rate": 1.2502348878960906e-05, "loss": 0.4663, "step": 39618 }, { "epoch": 0.8402578948484656, "grad_norm": 0.43782559037208557, "learning_rate": 1.250202599234609e-05, "loss": 0.513, "step": 39619 }, { "epoch": 0.8402791033063985, "grad_norm": 0.33987316489219666, "learning_rate": 1.2501703102948541e-05, "loss": 0.4693, "step": 39620 }, { "epoch": 0.8403003117643316, "grad_norm": 0.34922415018081665, "learning_rate": 1.2501380210768618e-05, "loss": 0.4631, "step": 39621 }, { "epoch": 0.8403215202222646, "grad_norm": 0.36785808205604553, "learning_rate": 1.2501057315806684e-05, "loss": 0.4581, "step": 39622 }, { "epoch": 0.8403427286801977, "grad_norm": 0.3713463246822357, "learning_rate": 1.2500734418063096e-05, "loss": 0.5123, "step": 39623 }, { "epoch": 0.8403639371381307, "grad_norm": 0.47668859362602234, "learning_rate": 1.2500411517538211e-05, "loss": 0.4875, "step": 39624 }, { "epoch": 0.8403851455960637, "grad_norm": 0.36535167694091797, "learning_rate": 1.2500088614232389e-05, "loss": 0.4531, "step": 39625 }, { "epoch": 0.8404063540539968, "grad_norm": 0.36817511916160583, "learning_rate": 1.2499765708145993e-05, "loss": 0.4641, "step": 39626 }, { "epoch": 0.8404275625119297, "grad_norm": 0.39236512780189514, "learning_rate": 1.2499442799279375e-05, "loss": 0.4703, "step": 39627 }, { "epoch": 0.8404487709698628, "grad_norm": 0.35887783765792847, "learning_rate": 1.2499119887632904e-05, "loss": 0.5002, "step": 39628 }, { "epoch": 0.8404699794277958, "grad_norm": 0.3796658515930176, "learning_rate": 1.2498796973206928e-05, "loss": 0.5239, "step": 39629 }, { "epoch": 0.8404911878857289, "grad_norm": 0.36582672595977783, "learning_rate": 1.2498474056001817e-05, "loss": 0.4982, "step": 39630 }, { "epoch": 0.8405123963436618, "grad_norm": 0.3623785376548767, "learning_rate": 1.2498151136017922e-05, "loss": 0.5383, "step": 39631 }, { "epoch": 0.8405336048015949, "grad_norm": 0.3618386387825012, "learning_rate": 1.2497828213255602e-05, "loss": 0.462, "step": 39632 }, { "epoch": 0.8405548132595279, "grad_norm": 0.335464745759964, "learning_rate": 1.2497505287715226e-05, "loss": 0.4666, "step": 39633 }, { "epoch": 0.8405760217174609, "grad_norm": 0.3839542269706726, "learning_rate": 1.2497182359397144e-05, "loss": 0.5419, "step": 39634 }, { "epoch": 0.8405972301753939, "grad_norm": 0.35900232195854187, "learning_rate": 1.2496859428301715e-05, "loss": 0.5085, "step": 39635 }, { "epoch": 0.840618438633327, "grad_norm": 0.44264549016952515, "learning_rate": 1.2496536494429303e-05, "loss": 0.4893, "step": 39636 }, { "epoch": 0.84063964709126, "grad_norm": 0.37934091687202454, "learning_rate": 1.2496213557780265e-05, "loss": 0.423, "step": 39637 }, { "epoch": 0.840660855549193, "grad_norm": 0.33779972791671753, "learning_rate": 1.2495890618354956e-05, "loss": 0.4321, "step": 39638 }, { "epoch": 0.8406820640071261, "grad_norm": 0.3815365135669708, "learning_rate": 1.2495567676153741e-05, "loss": 0.4836, "step": 39639 }, { "epoch": 0.840703272465059, "grad_norm": 0.41493988037109375, "learning_rate": 1.249524473117698e-05, "loss": 0.5071, "step": 39640 }, { "epoch": 0.8407244809229921, "grad_norm": 0.3782181143760681, "learning_rate": 1.2494921783425027e-05, "loss": 0.5869, "step": 39641 }, { "epoch": 0.8407456893809251, "grad_norm": 0.34123900532722473, "learning_rate": 1.2494598832898246e-05, "loss": 0.4403, "step": 39642 }, { "epoch": 0.8407668978388582, "grad_norm": 0.42995762825012207, "learning_rate": 1.2494275879596992e-05, "loss": 0.4542, "step": 39643 }, { "epoch": 0.8407881062967911, "grad_norm": 0.35919758677482605, "learning_rate": 1.2493952923521625e-05, "loss": 0.4428, "step": 39644 }, { "epoch": 0.8408093147547242, "grad_norm": 0.36689528822898865, "learning_rate": 1.2493629964672506e-05, "loss": 0.4996, "step": 39645 }, { "epoch": 0.8408305232126572, "grad_norm": 0.4675358533859253, "learning_rate": 1.2493307003049993e-05, "loss": 0.4929, "step": 39646 }, { "epoch": 0.8408517316705902, "grad_norm": 0.34475213289260864, "learning_rate": 1.2492984038654447e-05, "loss": 0.4688, "step": 39647 }, { "epoch": 0.8408729401285232, "grad_norm": 0.3459582030773163, "learning_rate": 1.2492661071486228e-05, "loss": 0.4507, "step": 39648 }, { "epoch": 0.8408941485864563, "grad_norm": 0.3595857322216034, "learning_rate": 1.2492338101545689e-05, "loss": 0.5037, "step": 39649 }, { "epoch": 0.8409153570443894, "grad_norm": 0.32706770300865173, "learning_rate": 1.2492015128833194e-05, "loss": 0.4513, "step": 39650 }, { "epoch": 0.8409365655023223, "grad_norm": 0.37145307660102844, "learning_rate": 1.2491692153349102e-05, "loss": 0.5176, "step": 39651 }, { "epoch": 0.8409577739602554, "grad_norm": 0.35460808873176575, "learning_rate": 1.249136917509377e-05, "loss": 0.4691, "step": 39652 }, { "epoch": 0.8409789824181884, "grad_norm": 0.4112778306007385, "learning_rate": 1.2491046194067563e-05, "loss": 0.4919, "step": 39653 }, { "epoch": 0.8410001908761214, "grad_norm": 0.4048273265361786, "learning_rate": 1.2490723210270834e-05, "loss": 0.5591, "step": 39654 }, { "epoch": 0.8410213993340544, "grad_norm": 0.3958776891231537, "learning_rate": 1.2490400223703942e-05, "loss": 0.5437, "step": 39655 }, { "epoch": 0.8410426077919875, "grad_norm": 0.380170077085495, "learning_rate": 1.2490077234367253e-05, "loss": 0.4966, "step": 39656 }, { "epoch": 0.8410638162499204, "grad_norm": 0.33426302671432495, "learning_rate": 1.248975424226112e-05, "loss": 0.4693, "step": 39657 }, { "epoch": 0.8410850247078535, "grad_norm": 0.36259451508522034, "learning_rate": 1.2489431247385899e-05, "loss": 0.6229, "step": 39658 }, { "epoch": 0.8411062331657865, "grad_norm": 0.34402987360954285, "learning_rate": 1.248910824974196e-05, "loss": 0.5292, "step": 39659 }, { "epoch": 0.8411274416237196, "grad_norm": 0.37696442008018494, "learning_rate": 1.2488785249329654e-05, "loss": 0.5559, "step": 39660 }, { "epoch": 0.8411486500816525, "grad_norm": 0.35426807403564453, "learning_rate": 1.2488462246149345e-05, "loss": 0.5281, "step": 39661 }, { "epoch": 0.8411698585395856, "grad_norm": 0.36969587206840515, "learning_rate": 1.2488139240201386e-05, "loss": 0.4483, "step": 39662 }, { "epoch": 0.8411910669975186, "grad_norm": 0.30468860268592834, "learning_rate": 1.2487816231486142e-05, "loss": 0.4758, "step": 39663 }, { "epoch": 0.8412122754554516, "grad_norm": 0.36491695046424866, "learning_rate": 1.248749322000397e-05, "loss": 0.4412, "step": 39664 }, { "epoch": 0.8412334839133847, "grad_norm": 0.34311455488204956, "learning_rate": 1.2487170205755232e-05, "loss": 0.4468, "step": 39665 }, { "epoch": 0.8412546923713177, "grad_norm": 0.3580920398235321, "learning_rate": 1.2486847188740283e-05, "loss": 0.473, "step": 39666 }, { "epoch": 0.8412759008292507, "grad_norm": 0.4188193082809448, "learning_rate": 1.2486524168959484e-05, "loss": 0.5701, "step": 39667 }, { "epoch": 0.8412971092871837, "grad_norm": 0.41904106736183167, "learning_rate": 1.2486201146413198e-05, "loss": 0.4634, "step": 39668 }, { "epoch": 0.8413183177451168, "grad_norm": 0.37261682748794556, "learning_rate": 1.2485878121101776e-05, "loss": 0.5004, "step": 39669 }, { "epoch": 0.8413395262030497, "grad_norm": 0.4177846610546112, "learning_rate": 1.2485555093025581e-05, "loss": 0.5294, "step": 39670 }, { "epoch": 0.8413607346609828, "grad_norm": 0.3883174657821655, "learning_rate": 1.2485232062184978e-05, "loss": 0.5926, "step": 39671 }, { "epoch": 0.8413819431189158, "grad_norm": 0.3607150912284851, "learning_rate": 1.2484909028580319e-05, "loss": 0.4591, "step": 39672 }, { "epoch": 0.8414031515768489, "grad_norm": 0.4453117251396179, "learning_rate": 1.2484585992211966e-05, "loss": 0.435, "step": 39673 }, { "epoch": 0.8414243600347818, "grad_norm": 0.3866330683231354, "learning_rate": 1.2484262953080277e-05, "loss": 0.4821, "step": 39674 }, { "epoch": 0.8414455684927149, "grad_norm": 0.4035085439682007, "learning_rate": 1.2483939911185614e-05, "loss": 0.555, "step": 39675 }, { "epoch": 0.8414667769506479, "grad_norm": 0.33487796783447266, "learning_rate": 1.2483616866528334e-05, "loss": 0.5121, "step": 39676 }, { "epoch": 0.8414879854085809, "grad_norm": 0.3735869228839874, "learning_rate": 1.2483293819108797e-05, "loss": 0.4643, "step": 39677 }, { "epoch": 0.841509193866514, "grad_norm": 0.338325560092926, "learning_rate": 1.248297076892736e-05, "loss": 0.4305, "step": 39678 }, { "epoch": 0.841530402324447, "grad_norm": 0.3671863377094269, "learning_rate": 1.2482647715984389e-05, "loss": 0.4398, "step": 39679 }, { "epoch": 0.84155161078238, "grad_norm": 0.39050406217575073, "learning_rate": 1.2482324660280236e-05, "loss": 0.5236, "step": 39680 }, { "epoch": 0.841572819240313, "grad_norm": 0.3349871039390564, "learning_rate": 1.2482001601815264e-05, "loss": 0.4055, "step": 39681 }, { "epoch": 0.8415940276982461, "grad_norm": 0.34544312953948975, "learning_rate": 1.2481678540589833e-05, "loss": 0.5252, "step": 39682 }, { "epoch": 0.8416152361561791, "grad_norm": 0.4416602551937103, "learning_rate": 1.2481355476604293e-05, "loss": 0.4571, "step": 39683 }, { "epoch": 0.8416364446141121, "grad_norm": 0.36257463693618774, "learning_rate": 1.2481032409859018e-05, "loss": 0.4876, "step": 39684 }, { "epoch": 0.8416576530720451, "grad_norm": 0.3699280023574829, "learning_rate": 1.248070934035436e-05, "loss": 0.5194, "step": 39685 }, { "epoch": 0.8416788615299782, "grad_norm": 0.35706835985183716, "learning_rate": 1.2480386268090676e-05, "loss": 0.4715, "step": 39686 }, { "epoch": 0.8417000699879111, "grad_norm": 0.4187358021736145, "learning_rate": 1.2480063193068331e-05, "loss": 0.4901, "step": 39687 }, { "epoch": 0.8417212784458442, "grad_norm": 0.34576576948165894, "learning_rate": 1.2479740115287677e-05, "loss": 0.4247, "step": 39688 }, { "epoch": 0.8417424869037772, "grad_norm": 0.3808209002017975, "learning_rate": 1.2479417034749079e-05, "loss": 0.4165, "step": 39689 }, { "epoch": 0.8417636953617103, "grad_norm": 0.3758426010608673, "learning_rate": 1.2479093951452899e-05, "loss": 0.4988, "step": 39690 }, { "epoch": 0.8417849038196433, "grad_norm": 0.3702094852924347, "learning_rate": 1.2478770865399489e-05, "loss": 0.5003, "step": 39691 }, { "epoch": 0.8418061122775763, "grad_norm": 0.4197303056716919, "learning_rate": 1.2478447776589211e-05, "loss": 0.5183, "step": 39692 }, { "epoch": 0.8418273207355094, "grad_norm": 0.4083578884601593, "learning_rate": 1.2478124685022428e-05, "loss": 0.4966, "step": 39693 }, { "epoch": 0.8418485291934423, "grad_norm": 0.40837499499320984, "learning_rate": 1.2477801590699492e-05, "loss": 0.5278, "step": 39694 }, { "epoch": 0.8418697376513754, "grad_norm": 0.36410120129585266, "learning_rate": 1.2477478493620768e-05, "loss": 0.4229, "step": 39695 }, { "epoch": 0.8418909461093084, "grad_norm": 0.3524482250213623, "learning_rate": 1.2477155393786615e-05, "loss": 0.4854, "step": 39696 }, { "epoch": 0.8419121545672414, "grad_norm": 0.3735082447528839, "learning_rate": 1.2476832291197391e-05, "loss": 0.456, "step": 39697 }, { "epoch": 0.8419333630251744, "grad_norm": 0.3785151243209839, "learning_rate": 1.2476509185853456e-05, "loss": 0.548, "step": 39698 }, { "epoch": 0.8419545714831075, "grad_norm": 0.3512382507324219, "learning_rate": 1.247618607775517e-05, "loss": 0.5254, "step": 39699 }, { "epoch": 0.8419757799410404, "grad_norm": 0.39950457215309143, "learning_rate": 1.247586296690289e-05, "loss": 0.5667, "step": 39700 }, { "epoch": 0.8419969883989735, "grad_norm": 0.3682236075401306, "learning_rate": 1.2475539853296976e-05, "loss": 0.5175, "step": 39701 }, { "epoch": 0.8420181968569065, "grad_norm": 0.31620773673057556, "learning_rate": 1.2475216736937788e-05, "loss": 0.3852, "step": 39702 }, { "epoch": 0.8420394053148396, "grad_norm": 0.36312511563301086, "learning_rate": 1.2474893617825685e-05, "loss": 0.4989, "step": 39703 }, { "epoch": 0.8420606137727726, "grad_norm": 0.3719288110733032, "learning_rate": 1.2474570495961029e-05, "loss": 0.4887, "step": 39704 }, { "epoch": 0.8420818222307056, "grad_norm": 0.39249539375305176, "learning_rate": 1.2474247371344177e-05, "loss": 0.4289, "step": 39705 }, { "epoch": 0.8421030306886387, "grad_norm": 0.8254511952400208, "learning_rate": 1.2473924243975486e-05, "loss": 0.4872, "step": 39706 }, { "epoch": 0.8421242391465716, "grad_norm": 0.3066924512386322, "learning_rate": 1.247360111385532e-05, "loss": 0.4435, "step": 39707 }, { "epoch": 0.8421454476045047, "grad_norm": 0.3575393259525299, "learning_rate": 1.2473277980984037e-05, "loss": 0.4327, "step": 39708 }, { "epoch": 0.8421666560624377, "grad_norm": 0.40735483169555664, "learning_rate": 1.2472954845361993e-05, "loss": 0.4855, "step": 39709 }, { "epoch": 0.8421878645203708, "grad_norm": 0.3522385060787201, "learning_rate": 1.2472631706989552e-05, "loss": 0.4079, "step": 39710 }, { "epoch": 0.8422090729783037, "grad_norm": 0.35986340045928955, "learning_rate": 1.247230856586707e-05, "loss": 0.5487, "step": 39711 }, { "epoch": 0.8422302814362368, "grad_norm": 0.3778166174888611, "learning_rate": 1.2471985421994912e-05, "loss": 0.4816, "step": 39712 }, { "epoch": 0.8422514898941698, "grad_norm": 0.42305728793144226, "learning_rate": 1.247166227537343e-05, "loss": 0.58, "step": 39713 }, { "epoch": 0.8422726983521028, "grad_norm": 0.36165833473205566, "learning_rate": 1.2471339126002989e-05, "loss": 0.542, "step": 39714 }, { "epoch": 0.8422939068100358, "grad_norm": 0.3313432037830353, "learning_rate": 1.2471015973883941e-05, "loss": 0.3949, "step": 39715 }, { "epoch": 0.8423151152679689, "grad_norm": 0.35274365544319153, "learning_rate": 1.2470692819016654e-05, "loss": 0.528, "step": 39716 }, { "epoch": 0.8423363237259018, "grad_norm": 0.3736686110496521, "learning_rate": 1.2470369661401484e-05, "loss": 0.4489, "step": 39717 }, { "epoch": 0.8423575321838349, "grad_norm": 0.3641085624694824, "learning_rate": 1.247004650103879e-05, "loss": 0.4969, "step": 39718 }, { "epoch": 0.842378740641768, "grad_norm": 0.37183767557144165, "learning_rate": 1.2469723337928934e-05, "loss": 0.5017, "step": 39719 }, { "epoch": 0.842399949099701, "grad_norm": 0.571959376335144, "learning_rate": 1.246940017207227e-05, "loss": 0.5018, "step": 39720 }, { "epoch": 0.842421157557634, "grad_norm": 0.3653579652309418, "learning_rate": 1.2469077003469158e-05, "loss": 0.4831, "step": 39721 }, { "epoch": 0.842442366015567, "grad_norm": 0.4987291097640991, "learning_rate": 1.2468753832119966e-05, "loss": 0.4122, "step": 39722 }, { "epoch": 0.8424635744735001, "grad_norm": 0.3604469299316406, "learning_rate": 1.2468430658025044e-05, "loss": 0.5009, "step": 39723 }, { "epoch": 0.842484782931433, "grad_norm": 0.3578135073184967, "learning_rate": 1.2468107481184756e-05, "loss": 0.4695, "step": 39724 }, { "epoch": 0.8425059913893661, "grad_norm": 0.3225932717323303, "learning_rate": 1.246778430159946e-05, "loss": 0.4409, "step": 39725 }, { "epoch": 0.8425271998472991, "grad_norm": 0.35185909271240234, "learning_rate": 1.2467461119269517e-05, "loss": 0.4789, "step": 39726 }, { "epoch": 0.8425484083052321, "grad_norm": 0.444659948348999, "learning_rate": 1.2467137934195286e-05, "loss": 0.4722, "step": 39727 }, { "epoch": 0.8425696167631651, "grad_norm": 0.3175477385520935, "learning_rate": 1.246681474637712e-05, "loss": 0.4453, "step": 39728 }, { "epoch": 0.8425908252210982, "grad_norm": 0.37716785073280334, "learning_rate": 1.2466491555815387e-05, "loss": 0.5211, "step": 39729 }, { "epoch": 0.8426120336790311, "grad_norm": 0.45783188939094543, "learning_rate": 1.2466168362510447e-05, "loss": 0.5027, "step": 39730 }, { "epoch": 0.8426332421369642, "grad_norm": 0.391262948513031, "learning_rate": 1.2465845166462652e-05, "loss": 0.5691, "step": 39731 }, { "epoch": 0.8426544505948973, "grad_norm": 0.38125231862068176, "learning_rate": 1.2465521967672367e-05, "loss": 0.4988, "step": 39732 }, { "epoch": 0.8426756590528303, "grad_norm": 0.34278160333633423, "learning_rate": 1.246519876613995e-05, "loss": 0.4854, "step": 39733 }, { "epoch": 0.8426968675107633, "grad_norm": 0.4012027978897095, "learning_rate": 1.2464875561865756e-05, "loss": 0.5029, "step": 39734 }, { "epoch": 0.8427180759686963, "grad_norm": 0.7870703935623169, "learning_rate": 1.2464552354850155e-05, "loss": 0.4579, "step": 39735 }, { "epoch": 0.8427392844266294, "grad_norm": 0.5341917872428894, "learning_rate": 1.2464229145093499e-05, "loss": 0.4806, "step": 39736 }, { "epoch": 0.8427604928845623, "grad_norm": 0.4194803833961487, "learning_rate": 1.2463905932596148e-05, "loss": 0.3896, "step": 39737 }, { "epoch": 0.8427817013424954, "grad_norm": 0.3686757981777191, "learning_rate": 1.2463582717358462e-05, "loss": 0.4939, "step": 39738 }, { "epoch": 0.8428029098004284, "grad_norm": 0.3467456102371216, "learning_rate": 1.24632594993808e-05, "loss": 0.4661, "step": 39739 }, { "epoch": 0.8428241182583615, "grad_norm": 0.3293549418449402, "learning_rate": 1.246293627866352e-05, "loss": 0.4299, "step": 39740 }, { "epoch": 0.8428453267162944, "grad_norm": 0.3834896981716156, "learning_rate": 1.2462613055206989e-05, "loss": 0.5027, "step": 39741 }, { "epoch": 0.8428665351742275, "grad_norm": 0.43848320841789246, "learning_rate": 1.2462289829011558e-05, "loss": 0.4017, "step": 39742 }, { "epoch": 0.8428877436321605, "grad_norm": 0.36228615045547485, "learning_rate": 1.246196660007759e-05, "loss": 0.4382, "step": 39743 }, { "epoch": 0.8429089520900935, "grad_norm": 0.33994829654693604, "learning_rate": 1.2461643368405444e-05, "loss": 0.5196, "step": 39744 }, { "epoch": 0.8429301605480266, "grad_norm": 0.3405555188655853, "learning_rate": 1.2461320133995481e-05, "loss": 0.4551, "step": 39745 }, { "epoch": 0.8429513690059596, "grad_norm": 0.3337390720844269, "learning_rate": 1.2460996896848058e-05, "loss": 0.4589, "step": 39746 }, { "epoch": 0.8429725774638926, "grad_norm": 0.3897472023963928, "learning_rate": 1.2460673656963535e-05, "loss": 0.4904, "step": 39747 }, { "epoch": 0.8429937859218256, "grad_norm": 0.35904398560523987, "learning_rate": 1.2460350414342273e-05, "loss": 0.4445, "step": 39748 }, { "epoch": 0.8430149943797587, "grad_norm": 0.36587807536125183, "learning_rate": 1.2460027168984631e-05, "loss": 0.4669, "step": 39749 }, { "epoch": 0.8430362028376917, "grad_norm": 0.3741094172000885, "learning_rate": 1.245970392089097e-05, "loss": 0.5151, "step": 39750 }, { "epoch": 0.8430574112956247, "grad_norm": 0.4258420765399933, "learning_rate": 1.2459380670061642e-05, "loss": 0.5351, "step": 39751 }, { "epoch": 0.8430786197535577, "grad_norm": 0.2956438362598419, "learning_rate": 1.2459057416497018e-05, "loss": 0.4065, "step": 39752 }, { "epoch": 0.8430998282114908, "grad_norm": 0.3955211043357849, "learning_rate": 1.2458734160197447e-05, "loss": 0.4883, "step": 39753 }, { "epoch": 0.8431210366694237, "grad_norm": 0.4375980496406555, "learning_rate": 1.2458410901163296e-05, "loss": 0.4749, "step": 39754 }, { "epoch": 0.8431422451273568, "grad_norm": 0.3333809971809387, "learning_rate": 1.2458087639394922e-05, "loss": 0.4631, "step": 39755 }, { "epoch": 0.8431634535852898, "grad_norm": 0.3638283312320709, "learning_rate": 1.2457764374892682e-05, "loss": 0.4125, "step": 39756 }, { "epoch": 0.8431846620432228, "grad_norm": 0.3212173879146576, "learning_rate": 1.245744110765694e-05, "loss": 0.4432, "step": 39757 }, { "epoch": 0.8432058705011558, "grad_norm": 0.44591593742370605, "learning_rate": 1.2457117837688056e-05, "loss": 0.4457, "step": 39758 }, { "epoch": 0.8432270789590889, "grad_norm": 0.36532121896743774, "learning_rate": 1.2456794564986381e-05, "loss": 0.5767, "step": 39759 }, { "epoch": 0.843248287417022, "grad_norm": 0.489693284034729, "learning_rate": 1.2456471289552281e-05, "loss": 0.4433, "step": 39760 }, { "epoch": 0.8432694958749549, "grad_norm": 0.444406121969223, "learning_rate": 1.2456148011386117e-05, "loss": 0.5276, "step": 39761 }, { "epoch": 0.843290704332888, "grad_norm": 0.449476957321167, "learning_rate": 1.2455824730488247e-05, "loss": 0.5746, "step": 39762 }, { "epoch": 0.843311912790821, "grad_norm": 0.40420523285865784, "learning_rate": 1.245550144685903e-05, "loss": 0.4523, "step": 39763 }, { "epoch": 0.843333121248754, "grad_norm": 0.34798067808151245, "learning_rate": 1.2455178160498827e-05, "loss": 0.5325, "step": 39764 }, { "epoch": 0.843354329706687, "grad_norm": 0.3318270444869995, "learning_rate": 1.2454854871407993e-05, "loss": 0.4539, "step": 39765 }, { "epoch": 0.8433755381646201, "grad_norm": 0.36753571033477783, "learning_rate": 1.245453157958689e-05, "loss": 0.4784, "step": 39766 }, { "epoch": 0.843396746622553, "grad_norm": 0.3672132194042206, "learning_rate": 1.2454208285035882e-05, "loss": 0.5562, "step": 39767 }, { "epoch": 0.8434179550804861, "grad_norm": 0.34461382031440735, "learning_rate": 1.2453884987755322e-05, "loss": 0.4911, "step": 39768 }, { "epoch": 0.8434391635384191, "grad_norm": 0.3757510185241699, "learning_rate": 1.2453561687745576e-05, "loss": 0.4987, "step": 39769 }, { "epoch": 0.8434603719963522, "grad_norm": 0.3760324716567993, "learning_rate": 1.2453238385006997e-05, "loss": 0.5008, "step": 39770 }, { "epoch": 0.8434815804542851, "grad_norm": 0.36773768067359924, "learning_rate": 1.245291507953995e-05, "loss": 0.4704, "step": 39771 }, { "epoch": 0.8435027889122182, "grad_norm": 0.34008923172950745, "learning_rate": 1.2452591771344789e-05, "loss": 0.4604, "step": 39772 }, { "epoch": 0.8435239973701513, "grad_norm": 0.41027170419692993, "learning_rate": 1.2452268460421877e-05, "loss": 0.5104, "step": 39773 }, { "epoch": 0.8435452058280842, "grad_norm": 0.3787328004837036, "learning_rate": 1.2451945146771575e-05, "loss": 0.5813, "step": 39774 }, { "epoch": 0.8435664142860173, "grad_norm": 0.3993244469165802, "learning_rate": 1.245162183039424e-05, "loss": 0.4631, "step": 39775 }, { "epoch": 0.8435876227439503, "grad_norm": 1.9981722831726074, "learning_rate": 1.2451298511290232e-05, "loss": 0.4201, "step": 39776 }, { "epoch": 0.8436088312018833, "grad_norm": 0.3617197871208191, "learning_rate": 1.2450975189459915e-05, "loss": 0.5145, "step": 39777 }, { "epoch": 0.8436300396598163, "grad_norm": 0.35698768496513367, "learning_rate": 1.2450651864903642e-05, "loss": 0.4803, "step": 39778 }, { "epoch": 0.8436512481177494, "grad_norm": 0.3651971220970154, "learning_rate": 1.2450328537621773e-05, "loss": 0.4268, "step": 39779 }, { "epoch": 0.8436724565756824, "grad_norm": 0.40973183512687683, "learning_rate": 1.245000520761467e-05, "loss": 0.5282, "step": 39780 }, { "epoch": 0.8436936650336154, "grad_norm": 0.40178894996643066, "learning_rate": 1.2449681874882698e-05, "loss": 0.5502, "step": 39781 }, { "epoch": 0.8437148734915484, "grad_norm": 0.31370604038238525, "learning_rate": 1.2449358539426207e-05, "loss": 0.4691, "step": 39782 }, { "epoch": 0.8437360819494815, "grad_norm": 0.3630959391593933, "learning_rate": 1.2449035201245561e-05, "loss": 0.4472, "step": 39783 }, { "epoch": 0.8437572904074144, "grad_norm": 0.6968401670455933, "learning_rate": 1.2448711860341119e-05, "loss": 0.5305, "step": 39784 }, { "epoch": 0.8437784988653475, "grad_norm": 0.37257856130599976, "learning_rate": 1.2448388516713239e-05, "loss": 0.4842, "step": 39785 }, { "epoch": 0.8437997073232806, "grad_norm": 0.3350714147090912, "learning_rate": 1.2448065170362286e-05, "loss": 0.4794, "step": 39786 }, { "epoch": 0.8438209157812135, "grad_norm": 0.360847145318985, "learning_rate": 1.2447741821288614e-05, "loss": 0.4903, "step": 39787 }, { "epoch": 0.8438421242391466, "grad_norm": 0.5237055420875549, "learning_rate": 1.2447418469492587e-05, "loss": 0.5181, "step": 39788 }, { "epoch": 0.8438633326970796, "grad_norm": 0.38723477721214294, "learning_rate": 1.2447095114974563e-05, "loss": 0.5017, "step": 39789 }, { "epoch": 0.8438845411550127, "grad_norm": 0.31812584400177, "learning_rate": 1.2446771757734898e-05, "loss": 0.4944, "step": 39790 }, { "epoch": 0.8439057496129456, "grad_norm": 0.35286280512809753, "learning_rate": 1.2446448397773958e-05, "loss": 0.5209, "step": 39791 }, { "epoch": 0.8439269580708787, "grad_norm": 0.39419683814048767, "learning_rate": 1.2446125035092097e-05, "loss": 0.5286, "step": 39792 }, { "epoch": 0.8439481665288117, "grad_norm": 0.4018310010433197, "learning_rate": 1.2445801669689677e-05, "loss": 0.5037, "step": 39793 }, { "epoch": 0.8439693749867447, "grad_norm": 0.4005979299545288, "learning_rate": 1.2445478301567058e-05, "loss": 0.5023, "step": 39794 }, { "epoch": 0.8439905834446777, "grad_norm": 0.3971259593963623, "learning_rate": 1.2445154930724602e-05, "loss": 0.4475, "step": 39795 }, { "epoch": 0.8440117919026108, "grad_norm": 0.323182612657547, "learning_rate": 1.244483155716266e-05, "loss": 0.4989, "step": 39796 }, { "epoch": 0.8440330003605437, "grad_norm": 0.38185936212539673, "learning_rate": 1.2444508180881603e-05, "loss": 0.5344, "step": 39797 }, { "epoch": 0.8440542088184768, "grad_norm": 0.37408119440078735, "learning_rate": 1.2444184801881783e-05, "loss": 0.5258, "step": 39798 }, { "epoch": 0.8440754172764098, "grad_norm": 0.30643609166145325, "learning_rate": 1.244386142016356e-05, "loss": 0.3879, "step": 39799 }, { "epoch": 0.8440966257343429, "grad_norm": 0.38720667362213135, "learning_rate": 1.2443538035727298e-05, "loss": 0.4916, "step": 39800 }, { "epoch": 0.8441178341922759, "grad_norm": 0.36058661341667175, "learning_rate": 1.2443214648573351e-05, "loss": 0.5446, "step": 39801 }, { "epoch": 0.8441390426502089, "grad_norm": 0.40120306611061096, "learning_rate": 1.2442891258702088e-05, "loss": 0.466, "step": 39802 }, { "epoch": 0.844160251108142, "grad_norm": 0.3568766713142395, "learning_rate": 1.244256786611386e-05, "loss": 0.5152, "step": 39803 }, { "epoch": 0.8441814595660749, "grad_norm": 0.39255061745643616, "learning_rate": 1.2442244470809028e-05, "loss": 0.4782, "step": 39804 }, { "epoch": 0.844202668024008, "grad_norm": 0.3395780026912689, "learning_rate": 1.2441921072787953e-05, "loss": 0.44, "step": 39805 }, { "epoch": 0.844223876481941, "grad_norm": 0.34609678387641907, "learning_rate": 1.2441597672050995e-05, "loss": 0.5169, "step": 39806 }, { "epoch": 0.844245084939874, "grad_norm": 0.34193187952041626, "learning_rate": 1.2441274268598513e-05, "loss": 0.4826, "step": 39807 }, { "epoch": 0.844266293397807, "grad_norm": 0.33519446849823, "learning_rate": 1.2440950862430868e-05, "loss": 0.4819, "step": 39808 }, { "epoch": 0.8442875018557401, "grad_norm": 0.338589608669281, "learning_rate": 1.244062745354842e-05, "loss": 0.4563, "step": 39809 }, { "epoch": 0.844308710313673, "grad_norm": 0.36714062094688416, "learning_rate": 1.2440304041951524e-05, "loss": 0.4296, "step": 39810 }, { "epoch": 0.8443299187716061, "grad_norm": 0.35134223103523254, "learning_rate": 1.2439980627640543e-05, "loss": 0.4882, "step": 39811 }, { "epoch": 0.8443511272295391, "grad_norm": 0.4296756386756897, "learning_rate": 1.2439657210615838e-05, "loss": 0.5065, "step": 39812 }, { "epoch": 0.8443723356874722, "grad_norm": 0.3404921889305115, "learning_rate": 1.2439333790877766e-05, "loss": 0.5461, "step": 39813 }, { "epoch": 0.8443935441454052, "grad_norm": 0.39032280445098877, "learning_rate": 1.2439010368426692e-05, "loss": 0.5511, "step": 39814 }, { "epoch": 0.8444147526033382, "grad_norm": 11.758111000061035, "learning_rate": 1.2438686943262968e-05, "loss": 0.4933, "step": 39815 }, { "epoch": 0.8444359610612713, "grad_norm": 0.4302755296230316, "learning_rate": 1.243836351538696e-05, "loss": 0.4374, "step": 39816 }, { "epoch": 0.8444571695192042, "grad_norm": 0.3542475402355194, "learning_rate": 1.2438040084799024e-05, "loss": 0.5104, "step": 39817 }, { "epoch": 0.8444783779771373, "grad_norm": 0.39074185490608215, "learning_rate": 1.2437716651499523e-05, "loss": 0.5174, "step": 39818 }, { "epoch": 0.8444995864350703, "grad_norm": 0.33329710364341736, "learning_rate": 1.2437393215488812e-05, "loss": 0.4212, "step": 39819 }, { "epoch": 0.8445207948930034, "grad_norm": 0.3871121108531952, "learning_rate": 1.2437069776767255e-05, "loss": 0.5918, "step": 39820 }, { "epoch": 0.8445420033509363, "grad_norm": 0.4208509624004364, "learning_rate": 1.2436746335335207e-05, "loss": 0.4966, "step": 39821 }, { "epoch": 0.8445632118088694, "grad_norm": 0.3500366508960724, "learning_rate": 1.2436422891193036e-05, "loss": 0.483, "step": 39822 }, { "epoch": 0.8445844202668024, "grad_norm": 0.35744598507881165, "learning_rate": 1.2436099444341094e-05, "loss": 0.4512, "step": 39823 }, { "epoch": 0.8446056287247354, "grad_norm": 0.6228976845741272, "learning_rate": 1.2435775994779742e-05, "loss": 0.6149, "step": 39824 }, { "epoch": 0.8446268371826684, "grad_norm": 0.31636446714401245, "learning_rate": 1.2435452542509343e-05, "loss": 0.4152, "step": 39825 }, { "epoch": 0.8446480456406015, "grad_norm": 0.3638570308685303, "learning_rate": 1.2435129087530255e-05, "loss": 0.5372, "step": 39826 }, { "epoch": 0.8446692540985345, "grad_norm": 0.4235767126083374, "learning_rate": 1.2434805629842836e-05, "loss": 0.5121, "step": 39827 }, { "epoch": 0.8446904625564675, "grad_norm": 0.413818359375, "learning_rate": 1.243448216944745e-05, "loss": 0.5118, "step": 39828 }, { "epoch": 0.8447116710144006, "grad_norm": 0.4301668107509613, "learning_rate": 1.2434158706344453e-05, "loss": 0.5406, "step": 39829 }, { "epoch": 0.8447328794723336, "grad_norm": 0.3293123245239258, "learning_rate": 1.2433835240534203e-05, "loss": 0.4205, "step": 39830 }, { "epoch": 0.8447540879302666, "grad_norm": 0.37462231516838074, "learning_rate": 1.2433511772017066e-05, "loss": 0.3615, "step": 39831 }, { "epoch": 0.8447752963881996, "grad_norm": 0.3364621102809906, "learning_rate": 1.2433188300793399e-05, "loss": 0.4806, "step": 39832 }, { "epoch": 0.8447965048461327, "grad_norm": 0.5197258591651917, "learning_rate": 1.2432864826863557e-05, "loss": 0.4603, "step": 39833 }, { "epoch": 0.8448177133040656, "grad_norm": 0.3764953911304474, "learning_rate": 1.2432541350227907e-05, "loss": 0.4855, "step": 39834 }, { "epoch": 0.8448389217619987, "grad_norm": 0.3461325466632843, "learning_rate": 1.2432217870886805e-05, "loss": 0.4895, "step": 39835 }, { "epoch": 0.8448601302199317, "grad_norm": 0.3863166868686676, "learning_rate": 1.2431894388840612e-05, "loss": 0.5408, "step": 39836 }, { "epoch": 0.8448813386778647, "grad_norm": 0.3903297781944275, "learning_rate": 1.2431570904089686e-05, "loss": 0.47, "step": 39837 }, { "epoch": 0.8449025471357977, "grad_norm": 0.3352680206298828, "learning_rate": 1.2431247416634386e-05, "loss": 0.432, "step": 39838 }, { "epoch": 0.8449237555937308, "grad_norm": 0.37724462151527405, "learning_rate": 1.2430923926475078e-05, "loss": 0.4655, "step": 39839 }, { "epoch": 0.8449449640516637, "grad_norm": 0.5805528163909912, "learning_rate": 1.2430600433612116e-05, "loss": 0.4714, "step": 39840 }, { "epoch": 0.8449661725095968, "grad_norm": 0.36587241291999817, "learning_rate": 1.243027693804586e-05, "loss": 0.4399, "step": 39841 }, { "epoch": 0.8449873809675299, "grad_norm": 0.37122586369514465, "learning_rate": 1.2429953439776675e-05, "loss": 0.5216, "step": 39842 }, { "epoch": 0.8450085894254629, "grad_norm": 0.33732977509498596, "learning_rate": 1.2429629938804913e-05, "loss": 0.4935, "step": 39843 }, { "epoch": 0.8450297978833959, "grad_norm": 0.3625359833240509, "learning_rate": 1.2429306435130936e-05, "loss": 0.4408, "step": 39844 }, { "epoch": 0.8450510063413289, "grad_norm": 0.37756264209747314, "learning_rate": 1.242898292875511e-05, "loss": 0.5608, "step": 39845 }, { "epoch": 0.845072214799262, "grad_norm": 0.3865099251270294, "learning_rate": 1.242865941967779e-05, "loss": 0.5031, "step": 39846 }, { "epoch": 0.8450934232571949, "grad_norm": 0.4068560004234314, "learning_rate": 1.2428335907899334e-05, "loss": 0.5352, "step": 39847 }, { "epoch": 0.845114631715128, "grad_norm": 0.3412444293498993, "learning_rate": 1.2428012393420105e-05, "loss": 0.4927, "step": 39848 }, { "epoch": 0.845135840173061, "grad_norm": 0.40086865425109863, "learning_rate": 1.242768887624046e-05, "loss": 0.4924, "step": 39849 }, { "epoch": 0.8451570486309941, "grad_norm": 0.3925347626209259, "learning_rate": 1.242736535636076e-05, "loss": 0.5035, "step": 39850 }, { "epoch": 0.845178257088927, "grad_norm": 1.1047968864440918, "learning_rate": 1.2427041833781368e-05, "loss": 0.4763, "step": 39851 }, { "epoch": 0.8451994655468601, "grad_norm": 0.3494320809841156, "learning_rate": 1.2426718308502639e-05, "loss": 0.4061, "step": 39852 }, { "epoch": 0.8452206740047931, "grad_norm": 0.554760217666626, "learning_rate": 1.2426394780524937e-05, "loss": 0.4623, "step": 39853 }, { "epoch": 0.8452418824627261, "grad_norm": 0.48863843083381653, "learning_rate": 1.242607124984862e-05, "loss": 0.4142, "step": 39854 }, { "epoch": 0.8452630909206592, "grad_norm": 0.37953534722328186, "learning_rate": 1.2425747716474046e-05, "loss": 0.5051, "step": 39855 }, { "epoch": 0.8452842993785922, "grad_norm": 0.3885614573955536, "learning_rate": 1.2425424180401577e-05, "loss": 0.5393, "step": 39856 }, { "epoch": 0.8453055078365252, "grad_norm": 0.38355550169944763, "learning_rate": 1.2425100641631573e-05, "loss": 0.5266, "step": 39857 }, { "epoch": 0.8453267162944582, "grad_norm": 0.3512750267982483, "learning_rate": 1.2424777100164391e-05, "loss": 0.511, "step": 39858 }, { "epoch": 0.8453479247523913, "grad_norm": 0.38048744201660156, "learning_rate": 1.2424453556000395e-05, "loss": 0.453, "step": 39859 }, { "epoch": 0.8453691332103243, "grad_norm": 0.3642500936985016, "learning_rate": 1.2424130009139945e-05, "loss": 0.4601, "step": 39860 }, { "epoch": 0.8453903416682573, "grad_norm": 0.319037526845932, "learning_rate": 1.2423806459583393e-05, "loss": 0.4466, "step": 39861 }, { "epoch": 0.8454115501261903, "grad_norm": 0.3903089463710785, "learning_rate": 1.2423482907331108e-05, "loss": 0.4289, "step": 39862 }, { "epoch": 0.8454327585841234, "grad_norm": 0.341296523809433, "learning_rate": 1.2423159352383447e-05, "loss": 0.4844, "step": 39863 }, { "epoch": 0.8454539670420563, "grad_norm": 0.347641259431839, "learning_rate": 1.2422835794740767e-05, "loss": 0.4562, "step": 39864 }, { "epoch": 0.8454751754999894, "grad_norm": 0.36716556549072266, "learning_rate": 1.2422512234403432e-05, "loss": 0.5242, "step": 39865 }, { "epoch": 0.8454963839579224, "grad_norm": 0.3507677912712097, "learning_rate": 1.2422188671371797e-05, "loss": 0.555, "step": 39866 }, { "epoch": 0.8455175924158554, "grad_norm": 0.4581148326396942, "learning_rate": 1.242186510564623e-05, "loss": 0.5084, "step": 39867 }, { "epoch": 0.8455388008737885, "grad_norm": 0.3666853904724121, "learning_rate": 1.2421541537227083e-05, "loss": 0.395, "step": 39868 }, { "epoch": 0.8455600093317215, "grad_norm": 0.371865838766098, "learning_rate": 1.2421217966114714e-05, "loss": 0.5048, "step": 39869 }, { "epoch": 0.8455812177896546, "grad_norm": 0.5093604326248169, "learning_rate": 1.2420894392309493e-05, "loss": 0.4466, "step": 39870 }, { "epoch": 0.8456024262475875, "grad_norm": 0.38931065797805786, "learning_rate": 1.2420570815811774e-05, "loss": 0.5179, "step": 39871 }, { "epoch": 0.8456236347055206, "grad_norm": 0.3543072044849396, "learning_rate": 1.2420247236621914e-05, "loss": 0.4725, "step": 39872 }, { "epoch": 0.8456448431634536, "grad_norm": 0.3546997606754303, "learning_rate": 1.241992365474028e-05, "loss": 0.4766, "step": 39873 }, { "epoch": 0.8456660516213866, "grad_norm": 0.4112362265586853, "learning_rate": 1.2419600070167226e-05, "loss": 0.5596, "step": 39874 }, { "epoch": 0.8456872600793196, "grad_norm": 0.401121586561203, "learning_rate": 1.2419276482903111e-05, "loss": 0.6613, "step": 39875 }, { "epoch": 0.8457084685372527, "grad_norm": 0.36727848649024963, "learning_rate": 1.2418952892948302e-05, "loss": 0.4546, "step": 39876 }, { "epoch": 0.8457296769951856, "grad_norm": 0.3585762679576874, "learning_rate": 1.2418629300303154e-05, "loss": 0.4147, "step": 39877 }, { "epoch": 0.8457508854531187, "grad_norm": 0.35249021649360657, "learning_rate": 1.2418305704968025e-05, "loss": 0.4712, "step": 39878 }, { "epoch": 0.8457720939110517, "grad_norm": 0.3677123188972473, "learning_rate": 1.2417982106943281e-05, "loss": 0.5792, "step": 39879 }, { "epoch": 0.8457933023689848, "grad_norm": 0.34446853399276733, "learning_rate": 1.2417658506229275e-05, "loss": 0.41, "step": 39880 }, { "epoch": 0.8458145108269177, "grad_norm": 0.3322121202945709, "learning_rate": 1.241733490282637e-05, "loss": 0.4683, "step": 39881 }, { "epoch": 0.8458357192848508, "grad_norm": 0.3497510254383087, "learning_rate": 1.2417011296734928e-05, "loss": 0.5048, "step": 39882 }, { "epoch": 0.8458569277427839, "grad_norm": 0.35979655385017395, "learning_rate": 1.2416687687955307e-05, "loss": 0.4497, "step": 39883 }, { "epoch": 0.8458781362007168, "grad_norm": 0.39350706338882446, "learning_rate": 1.2416364076487868e-05, "loss": 0.5042, "step": 39884 }, { "epoch": 0.8458993446586499, "grad_norm": 0.37378472089767456, "learning_rate": 1.241604046233297e-05, "loss": 0.5295, "step": 39885 }, { "epoch": 0.8459205531165829, "grad_norm": 0.3324877917766571, "learning_rate": 1.2415716845490969e-05, "loss": 0.4767, "step": 39886 }, { "epoch": 0.845941761574516, "grad_norm": 0.3903048038482666, "learning_rate": 1.2415393225962231e-05, "loss": 0.5013, "step": 39887 }, { "epoch": 0.8459629700324489, "grad_norm": 0.42309510707855225, "learning_rate": 1.2415069603747113e-05, "loss": 0.5322, "step": 39888 }, { "epoch": 0.845984178490382, "grad_norm": 0.3778187036514282, "learning_rate": 1.2414745978845976e-05, "loss": 0.5254, "step": 39889 }, { "epoch": 0.846005386948315, "grad_norm": 0.4160909652709961, "learning_rate": 1.241442235125918e-05, "loss": 0.4789, "step": 39890 }, { "epoch": 0.846026595406248, "grad_norm": 0.34554624557495117, "learning_rate": 1.2414098720987084e-05, "loss": 0.4853, "step": 39891 }, { "epoch": 0.846047803864181, "grad_norm": 0.34311598539352417, "learning_rate": 1.2413775088030048e-05, "loss": 0.5366, "step": 39892 }, { "epoch": 0.8460690123221141, "grad_norm": 0.5531333088874817, "learning_rate": 1.2413451452388434e-05, "loss": 0.5604, "step": 39893 }, { "epoch": 0.846090220780047, "grad_norm": 0.3571348786354065, "learning_rate": 1.24131278140626e-05, "loss": 0.5318, "step": 39894 }, { "epoch": 0.8461114292379801, "grad_norm": 0.39804962277412415, "learning_rate": 1.2412804173052904e-05, "loss": 0.4854, "step": 39895 }, { "epoch": 0.8461326376959132, "grad_norm": 0.37329503893852234, "learning_rate": 1.2412480529359709e-05, "loss": 0.4313, "step": 39896 }, { "epoch": 0.8461538461538461, "grad_norm": 0.33807143568992615, "learning_rate": 1.2412156882983374e-05, "loss": 0.4431, "step": 39897 }, { "epoch": 0.8461750546117792, "grad_norm": 0.3907642960548401, "learning_rate": 1.241183323392426e-05, "loss": 0.5003, "step": 39898 }, { "epoch": 0.8461962630697122, "grad_norm": 0.415142297744751, "learning_rate": 1.2411509582182729e-05, "loss": 0.5359, "step": 39899 }, { "epoch": 0.8462174715276453, "grad_norm": 0.40499347448349, "learning_rate": 1.241118592775913e-05, "loss": 0.4724, "step": 39900 }, { "epoch": 0.8462386799855782, "grad_norm": 0.36022478342056274, "learning_rate": 1.2410862270653836e-05, "loss": 0.5159, "step": 39901 }, { "epoch": 0.8462598884435113, "grad_norm": 0.45737069845199585, "learning_rate": 1.2410538610867202e-05, "loss": 0.52, "step": 39902 }, { "epoch": 0.8462810969014443, "grad_norm": 0.39554205536842346, "learning_rate": 1.2410214948399588e-05, "loss": 0.5455, "step": 39903 }, { "epoch": 0.8463023053593773, "grad_norm": 0.2959422469139099, "learning_rate": 1.2409891283251355e-05, "loss": 0.4245, "step": 39904 }, { "epoch": 0.8463235138173103, "grad_norm": 0.3156150281429291, "learning_rate": 1.2409567615422859e-05, "loss": 0.4572, "step": 39905 }, { "epoch": 0.8463447222752434, "grad_norm": 0.34398505091667175, "learning_rate": 1.2409243944914464e-05, "loss": 0.5043, "step": 39906 }, { "epoch": 0.8463659307331763, "grad_norm": 0.37634173035621643, "learning_rate": 1.2408920271726528e-05, "loss": 0.3928, "step": 39907 }, { "epoch": 0.8463871391911094, "grad_norm": 0.36366260051727295, "learning_rate": 1.2408596595859412e-05, "loss": 0.5069, "step": 39908 }, { "epoch": 0.8464083476490425, "grad_norm": 0.3750227987766266, "learning_rate": 1.2408272917313476e-05, "loss": 0.4699, "step": 39909 }, { "epoch": 0.8464295561069755, "grad_norm": 0.4478263556957245, "learning_rate": 1.2407949236089081e-05, "loss": 0.5256, "step": 39910 }, { "epoch": 0.8464507645649085, "grad_norm": 0.4098782539367676, "learning_rate": 1.2407625552186584e-05, "loss": 0.4806, "step": 39911 }, { "epoch": 0.8464719730228415, "grad_norm": 0.32858097553253174, "learning_rate": 1.2407301865606349e-05, "loss": 0.4654, "step": 39912 }, { "epoch": 0.8464931814807746, "grad_norm": 0.3901468515396118, "learning_rate": 1.2406978176348734e-05, "loss": 0.4775, "step": 39913 }, { "epoch": 0.8465143899387075, "grad_norm": 0.48497721552848816, "learning_rate": 1.2406654484414096e-05, "loss": 0.4967, "step": 39914 }, { "epoch": 0.8465355983966406, "grad_norm": 0.8513311743736267, "learning_rate": 1.2406330789802799e-05, "loss": 0.5739, "step": 39915 }, { "epoch": 0.8465568068545736, "grad_norm": 0.3729415237903595, "learning_rate": 1.2406007092515202e-05, "loss": 0.4933, "step": 39916 }, { "epoch": 0.8465780153125066, "grad_norm": 0.4086071252822876, "learning_rate": 1.2405683392551664e-05, "loss": 0.5216, "step": 39917 }, { "epoch": 0.8465992237704396, "grad_norm": 0.33322247862815857, "learning_rate": 1.2405359689912546e-05, "loss": 0.4613, "step": 39918 }, { "epoch": 0.8466204322283727, "grad_norm": 0.4011402428150177, "learning_rate": 1.2405035984598211e-05, "loss": 0.4915, "step": 39919 }, { "epoch": 0.8466416406863057, "grad_norm": 0.3473789095878601, "learning_rate": 1.240471227660901e-05, "loss": 0.4391, "step": 39920 }, { "epoch": 0.8466628491442387, "grad_norm": 0.38831624388694763, "learning_rate": 1.2404388565945311e-05, "loss": 0.5654, "step": 39921 }, { "epoch": 0.8466840576021717, "grad_norm": 0.571797788143158, "learning_rate": 1.2404064852607476e-05, "loss": 0.5021, "step": 39922 }, { "epoch": 0.8467052660601048, "grad_norm": 0.3587625324726105, "learning_rate": 1.2403741136595857e-05, "loss": 0.4414, "step": 39923 }, { "epoch": 0.8467264745180378, "grad_norm": 0.37321704626083374, "learning_rate": 1.2403417417910818e-05, "loss": 0.5195, "step": 39924 }, { "epoch": 0.8467476829759708, "grad_norm": 0.41051042079925537, "learning_rate": 1.2403093696552719e-05, "loss": 0.513, "step": 39925 }, { "epoch": 0.8467688914339039, "grad_norm": 0.3321722447872162, "learning_rate": 1.2402769972521921e-05, "loss": 0.4902, "step": 39926 }, { "epoch": 0.8467900998918368, "grad_norm": 0.398073673248291, "learning_rate": 1.2402446245818784e-05, "loss": 0.5315, "step": 39927 }, { "epoch": 0.8468113083497699, "grad_norm": 0.3320387601852417, "learning_rate": 1.2402122516443668e-05, "loss": 0.4671, "step": 39928 }, { "epoch": 0.8468325168077029, "grad_norm": 0.38502010703086853, "learning_rate": 1.2401798784396931e-05, "loss": 0.4651, "step": 39929 }, { "epoch": 0.846853725265636, "grad_norm": 0.8409316539764404, "learning_rate": 1.2401475049678933e-05, "loss": 0.4711, "step": 39930 }, { "epoch": 0.8468749337235689, "grad_norm": 0.353962779045105, "learning_rate": 1.2401151312290036e-05, "loss": 0.4977, "step": 39931 }, { "epoch": 0.846896142181502, "grad_norm": 0.43711650371551514, "learning_rate": 1.2400827572230601e-05, "loss": 0.4527, "step": 39932 }, { "epoch": 0.846917350639435, "grad_norm": 0.3694233000278473, "learning_rate": 1.2400503829500982e-05, "loss": 0.492, "step": 39933 }, { "epoch": 0.846938559097368, "grad_norm": 0.4910826086997986, "learning_rate": 1.2400180084101544e-05, "loss": 0.5285, "step": 39934 }, { "epoch": 0.846959767555301, "grad_norm": 0.37647175788879395, "learning_rate": 1.2399856336032652e-05, "loss": 0.5195, "step": 39935 }, { "epoch": 0.8469809760132341, "grad_norm": 0.33607184886932373, "learning_rate": 1.2399532585294659e-05, "loss": 0.5519, "step": 39936 }, { "epoch": 0.8470021844711672, "grad_norm": 0.49397963285446167, "learning_rate": 1.2399208831887924e-05, "loss": 0.5253, "step": 39937 }, { "epoch": 0.8470233929291001, "grad_norm": 0.4492350220680237, "learning_rate": 1.2398885075812811e-05, "loss": 0.5773, "step": 39938 }, { "epoch": 0.8470446013870332, "grad_norm": 0.3581838309764862, "learning_rate": 1.2398561317069679e-05, "loss": 0.4639, "step": 39939 }, { "epoch": 0.8470658098449662, "grad_norm": 0.3534764349460602, "learning_rate": 1.2398237555658888e-05, "loss": 0.493, "step": 39940 }, { "epoch": 0.8470870183028992, "grad_norm": 0.4678036570549011, "learning_rate": 1.2397913791580798e-05, "loss": 0.4609, "step": 39941 }, { "epoch": 0.8471082267608322, "grad_norm": 0.36506086587905884, "learning_rate": 1.239759002483577e-05, "loss": 0.4902, "step": 39942 }, { "epoch": 0.8471294352187653, "grad_norm": 0.37047481536865234, "learning_rate": 1.239726625542416e-05, "loss": 0.4542, "step": 39943 }, { "epoch": 0.8471506436766982, "grad_norm": 0.42128127813339233, "learning_rate": 1.2396942483346336e-05, "loss": 0.5165, "step": 39944 }, { "epoch": 0.8471718521346313, "grad_norm": 0.34828808903694153, "learning_rate": 1.2396618708602648e-05, "loss": 0.4905, "step": 39945 }, { "epoch": 0.8471930605925643, "grad_norm": 0.33286353945732117, "learning_rate": 1.2396294931193464e-05, "loss": 0.4807, "step": 39946 }, { "epoch": 0.8472142690504973, "grad_norm": 0.38323771953582764, "learning_rate": 1.2395971151119143e-05, "loss": 0.5037, "step": 39947 }, { "epoch": 0.8472354775084303, "grad_norm": 0.3514796793460846, "learning_rate": 1.2395647368380042e-05, "loss": 0.4454, "step": 39948 }, { "epoch": 0.8472566859663634, "grad_norm": 0.3512836992740631, "learning_rate": 1.2395323582976524e-05, "loss": 0.4761, "step": 39949 }, { "epoch": 0.8472778944242965, "grad_norm": 0.3453713655471802, "learning_rate": 1.2394999794908948e-05, "loss": 0.4262, "step": 39950 }, { "epoch": 0.8472991028822294, "grad_norm": 0.35106945037841797, "learning_rate": 1.2394676004177673e-05, "loss": 0.5111, "step": 39951 }, { "epoch": 0.8473203113401625, "grad_norm": 0.32949382066726685, "learning_rate": 1.2394352210783061e-05, "loss": 0.4908, "step": 39952 }, { "epoch": 0.8473415197980955, "grad_norm": 0.35261160135269165, "learning_rate": 1.239402841472547e-05, "loss": 0.5021, "step": 39953 }, { "epoch": 0.8473627282560285, "grad_norm": 0.34559646248817444, "learning_rate": 1.2393704616005262e-05, "loss": 0.4953, "step": 39954 }, { "epoch": 0.8473839367139615, "grad_norm": 0.34764495491981506, "learning_rate": 1.2393380814622798e-05, "loss": 0.4652, "step": 39955 }, { "epoch": 0.8474051451718946, "grad_norm": 0.4035618305206299, "learning_rate": 1.2393057010578438e-05, "loss": 0.4857, "step": 39956 }, { "epoch": 0.8474263536298275, "grad_norm": 0.35431429743766785, "learning_rate": 1.2392733203872536e-05, "loss": 0.4808, "step": 39957 }, { "epoch": 0.8474475620877606, "grad_norm": 0.33250612020492554, "learning_rate": 1.2392409394505458e-05, "loss": 0.4703, "step": 39958 }, { "epoch": 0.8474687705456936, "grad_norm": 0.3323303163051605, "learning_rate": 1.2392085582477565e-05, "loss": 0.4143, "step": 39959 }, { "epoch": 0.8474899790036267, "grad_norm": 0.3676232695579529, "learning_rate": 1.2391761767789214e-05, "loss": 0.5772, "step": 39960 }, { "epoch": 0.8475111874615596, "grad_norm": 0.3411203622817993, "learning_rate": 1.2391437950440768e-05, "loss": 0.408, "step": 39961 }, { "epoch": 0.8475323959194927, "grad_norm": 0.37048932909965515, "learning_rate": 1.2391114130432581e-05, "loss": 0.5006, "step": 39962 }, { "epoch": 0.8475536043774257, "grad_norm": 0.3431960344314575, "learning_rate": 1.2390790307765025e-05, "loss": 0.4822, "step": 39963 }, { "epoch": 0.8475748128353587, "grad_norm": 0.36810797452926636, "learning_rate": 1.2390466482438448e-05, "loss": 0.4426, "step": 39964 }, { "epoch": 0.8475960212932918, "grad_norm": 0.34730201959609985, "learning_rate": 1.2390142654453212e-05, "loss": 0.562, "step": 39965 }, { "epoch": 0.8476172297512248, "grad_norm": 0.4216580390930176, "learning_rate": 1.2389818823809684e-05, "loss": 0.5217, "step": 39966 }, { "epoch": 0.8476384382091579, "grad_norm": 0.44580215215682983, "learning_rate": 1.2389494990508222e-05, "loss": 0.4688, "step": 39967 }, { "epoch": 0.8476596466670908, "grad_norm": 0.32852089405059814, "learning_rate": 1.238917115454918e-05, "loss": 0.4906, "step": 39968 }, { "epoch": 0.8476808551250239, "grad_norm": 0.34419453144073486, "learning_rate": 1.2388847315932928e-05, "loss": 0.5278, "step": 39969 }, { "epoch": 0.8477020635829569, "grad_norm": 0.38869568705558777, "learning_rate": 1.2388523474659818e-05, "loss": 0.4512, "step": 39970 }, { "epoch": 0.8477232720408899, "grad_norm": 0.44688916206359863, "learning_rate": 1.238819963073021e-05, "loss": 0.5007, "step": 39971 }, { "epoch": 0.8477444804988229, "grad_norm": 0.3768298029899597, "learning_rate": 1.238787578414447e-05, "loss": 0.5042, "step": 39972 }, { "epoch": 0.847765688956756, "grad_norm": 0.32039281725883484, "learning_rate": 1.2387551934902957e-05, "loss": 0.4714, "step": 39973 }, { "epoch": 0.8477868974146889, "grad_norm": 0.3279626667499542, "learning_rate": 1.2387228083006026e-05, "loss": 0.5167, "step": 39974 }, { "epoch": 0.847808105872622, "grad_norm": 0.36024001240730286, "learning_rate": 1.2386904228454041e-05, "loss": 0.4948, "step": 39975 }, { "epoch": 0.847829314330555, "grad_norm": 0.4408302307128906, "learning_rate": 1.2386580371247363e-05, "loss": 0.4775, "step": 39976 }, { "epoch": 0.847850522788488, "grad_norm": 0.36606264114379883, "learning_rate": 1.2386256511386354e-05, "loss": 0.4951, "step": 39977 }, { "epoch": 0.8478717312464211, "grad_norm": 0.3673948645591736, "learning_rate": 1.2385932648871367e-05, "loss": 0.4527, "step": 39978 }, { "epoch": 0.8478929397043541, "grad_norm": 0.5110893845558167, "learning_rate": 1.2385608783702768e-05, "loss": 0.5061, "step": 39979 }, { "epoch": 0.8479141481622872, "grad_norm": 0.3885490596294403, "learning_rate": 1.2385284915880915e-05, "loss": 0.5911, "step": 39980 }, { "epoch": 0.8479353566202201, "grad_norm": 0.3797411322593689, "learning_rate": 1.238496104540617e-05, "loss": 0.5629, "step": 39981 }, { "epoch": 0.8479565650781532, "grad_norm": 0.38842451572418213, "learning_rate": 1.2384637172278891e-05, "loss": 0.5028, "step": 39982 }, { "epoch": 0.8479777735360862, "grad_norm": 0.325406938791275, "learning_rate": 1.238431329649944e-05, "loss": 0.4795, "step": 39983 }, { "epoch": 0.8479989819940192, "grad_norm": 0.34055396914482117, "learning_rate": 1.2383989418068173e-05, "loss": 0.433, "step": 39984 }, { "epoch": 0.8480201904519522, "grad_norm": 0.36313048005104065, "learning_rate": 1.2383665536985458e-05, "loss": 0.5021, "step": 39985 }, { "epoch": 0.8480413989098853, "grad_norm": 0.3799333870410919, "learning_rate": 1.238334165325165e-05, "loss": 0.4808, "step": 39986 }, { "epoch": 0.8480626073678182, "grad_norm": 0.3810122311115265, "learning_rate": 1.2383017766867111e-05, "loss": 0.4939, "step": 39987 }, { "epoch": 0.8480838158257513, "grad_norm": 0.3552022874355316, "learning_rate": 1.2382693877832197e-05, "loss": 0.4864, "step": 39988 }, { "epoch": 0.8481050242836843, "grad_norm": 0.3328563868999481, "learning_rate": 1.2382369986147276e-05, "loss": 0.4913, "step": 39989 }, { "epoch": 0.8481262327416174, "grad_norm": 0.3312624394893646, "learning_rate": 1.23820460918127e-05, "loss": 0.5359, "step": 39990 }, { "epoch": 0.8481474411995504, "grad_norm": 0.3421829640865326, "learning_rate": 1.2381722194828834e-05, "loss": 0.508, "step": 39991 }, { "epoch": 0.8481686496574834, "grad_norm": 0.3493265211582184, "learning_rate": 1.2381398295196038e-05, "loss": 0.4756, "step": 39992 }, { "epoch": 0.8481898581154165, "grad_norm": 0.37248584628105164, "learning_rate": 1.2381074392914672e-05, "loss": 0.5148, "step": 39993 }, { "epoch": 0.8482110665733494, "grad_norm": 0.3533073961734772, "learning_rate": 1.2380750487985093e-05, "loss": 0.4759, "step": 39994 }, { "epoch": 0.8482322750312825, "grad_norm": 0.36448466777801514, "learning_rate": 1.2380426580407669e-05, "loss": 0.512, "step": 39995 }, { "epoch": 0.8482534834892155, "grad_norm": 0.3512764573097229, "learning_rate": 1.238010267018275e-05, "loss": 0.4237, "step": 39996 }, { "epoch": 0.8482746919471486, "grad_norm": 0.39835283160209656, "learning_rate": 1.2379778757310704e-05, "loss": 0.4206, "step": 39997 }, { "epoch": 0.8482959004050815, "grad_norm": 0.33526384830474854, "learning_rate": 1.2379454841791888e-05, "loss": 0.4282, "step": 39998 }, { "epoch": 0.8483171088630146, "grad_norm": 0.3703326880931854, "learning_rate": 1.2379130923626664e-05, "loss": 0.522, "step": 39999 }, { "epoch": 0.8483383173209476, "grad_norm": 0.3670554757118225, "learning_rate": 1.2378807002815392e-05, "loss": 0.4486, "step": 40000 }, { "epoch": 0.8483595257788806, "grad_norm": 0.35673168301582336, "learning_rate": 1.2378483079358429e-05, "loss": 0.5088, "step": 40001 }, { "epoch": 0.8483807342368136, "grad_norm": 0.3503980338573456, "learning_rate": 1.237815915325614e-05, "loss": 0.5052, "step": 40002 }, { "epoch": 0.8484019426947467, "grad_norm": 0.3385274112224579, "learning_rate": 1.237783522450888e-05, "loss": 0.5202, "step": 40003 }, { "epoch": 0.8484231511526796, "grad_norm": 0.317432165145874, "learning_rate": 1.2377511293117015e-05, "loss": 0.398, "step": 40004 }, { "epoch": 0.8484443596106127, "grad_norm": 0.3722030222415924, "learning_rate": 1.2377187359080902e-05, "loss": 0.4737, "step": 40005 }, { "epoch": 0.8484655680685458, "grad_norm": 0.33862507343292236, "learning_rate": 1.2376863422400903e-05, "loss": 0.5187, "step": 40006 }, { "epoch": 0.8484867765264787, "grad_norm": 0.46516963839530945, "learning_rate": 1.2376539483077375e-05, "loss": 0.576, "step": 40007 }, { "epoch": 0.8485079849844118, "grad_norm": 0.45198532938957214, "learning_rate": 1.237621554111068e-05, "loss": 0.5437, "step": 40008 }, { "epoch": 0.8485291934423448, "grad_norm": 0.39862167835235596, "learning_rate": 1.2375891596501181e-05, "loss": 0.4656, "step": 40009 }, { "epoch": 0.8485504019002779, "grad_norm": 0.4364124536514282, "learning_rate": 1.2375567649249236e-05, "loss": 0.5118, "step": 40010 }, { "epoch": 0.8485716103582108, "grad_norm": 0.38053229451179504, "learning_rate": 1.2375243699355201e-05, "loss": 0.5119, "step": 40011 }, { "epoch": 0.8485928188161439, "grad_norm": 0.3671865165233612, "learning_rate": 1.2374919746819447e-05, "loss": 0.4666, "step": 40012 }, { "epoch": 0.8486140272740769, "grad_norm": 0.40850773453712463, "learning_rate": 1.2374595791642323e-05, "loss": 0.5398, "step": 40013 }, { "epoch": 0.8486352357320099, "grad_norm": 0.47619742155075073, "learning_rate": 1.2374271833824199e-05, "loss": 0.5672, "step": 40014 }, { "epoch": 0.8486564441899429, "grad_norm": 0.369152307510376, "learning_rate": 1.2373947873365428e-05, "loss": 0.4828, "step": 40015 }, { "epoch": 0.848677652647876, "grad_norm": 0.32812273502349854, "learning_rate": 1.2373623910266368e-05, "loss": 0.4416, "step": 40016 }, { "epoch": 0.8486988611058089, "grad_norm": 0.3842742443084717, "learning_rate": 1.2373299944527391e-05, "loss": 0.6377, "step": 40017 }, { "epoch": 0.848720069563742, "grad_norm": 0.33909574151039124, "learning_rate": 1.2372975976148849e-05, "loss": 0.4523, "step": 40018 }, { "epoch": 0.8487412780216751, "grad_norm": 0.3304324448108673, "learning_rate": 1.23726520051311e-05, "loss": 0.5551, "step": 40019 }, { "epoch": 0.8487624864796081, "grad_norm": 0.34618857502937317, "learning_rate": 1.2372328031474513e-05, "loss": 0.4483, "step": 40020 }, { "epoch": 0.8487836949375411, "grad_norm": 0.342308908700943, "learning_rate": 1.2372004055179438e-05, "loss": 0.5462, "step": 40021 }, { "epoch": 0.8488049033954741, "grad_norm": 0.3932120203971863, "learning_rate": 1.2371680076246246e-05, "loss": 0.4906, "step": 40022 }, { "epoch": 0.8488261118534072, "grad_norm": 0.34620022773742676, "learning_rate": 1.237135609467529e-05, "loss": 0.5048, "step": 40023 }, { "epoch": 0.8488473203113401, "grad_norm": 0.4688224494457245, "learning_rate": 1.2371032110466932e-05, "loss": 0.4976, "step": 40024 }, { "epoch": 0.8488685287692732, "grad_norm": 0.5377649068832397, "learning_rate": 1.2370708123621532e-05, "loss": 0.4888, "step": 40025 }, { "epoch": 0.8488897372272062, "grad_norm": 0.3458838164806366, "learning_rate": 1.2370384134139453e-05, "loss": 0.4904, "step": 40026 }, { "epoch": 0.8489109456851393, "grad_norm": 0.35331013798713684, "learning_rate": 1.237006014202105e-05, "loss": 0.5136, "step": 40027 }, { "epoch": 0.8489321541430722, "grad_norm": 0.7367115020751953, "learning_rate": 1.2369736147266691e-05, "loss": 0.4739, "step": 40028 }, { "epoch": 0.8489533626010053, "grad_norm": 0.39100492000579834, "learning_rate": 1.2369412149876729e-05, "loss": 0.5157, "step": 40029 }, { "epoch": 0.8489745710589383, "grad_norm": 0.34373924136161804, "learning_rate": 1.2369088149851526e-05, "loss": 0.4551, "step": 40030 }, { "epoch": 0.8489957795168713, "grad_norm": 0.3584400415420532, "learning_rate": 1.2368764147191447e-05, "loss": 0.5644, "step": 40031 }, { "epoch": 0.8490169879748044, "grad_norm": 0.39020419120788574, "learning_rate": 1.236844014189685e-05, "loss": 0.471, "step": 40032 }, { "epoch": 0.8490381964327374, "grad_norm": 0.35131022334098816, "learning_rate": 1.2368116133968091e-05, "loss": 0.5054, "step": 40033 }, { "epoch": 0.8490594048906704, "grad_norm": 0.373288094997406, "learning_rate": 1.2367792123405538e-05, "loss": 0.5016, "step": 40034 }, { "epoch": 0.8490806133486034, "grad_norm": 0.4176386296749115, "learning_rate": 1.2367468110209541e-05, "loss": 0.4512, "step": 40035 }, { "epoch": 0.8491018218065365, "grad_norm": 0.3427639901638031, "learning_rate": 1.236714409438047e-05, "loss": 0.4216, "step": 40036 }, { "epoch": 0.8491230302644694, "grad_norm": 0.32114943861961365, "learning_rate": 1.2366820075918683e-05, "loss": 0.4232, "step": 40037 }, { "epoch": 0.8491442387224025, "grad_norm": 0.335574209690094, "learning_rate": 1.236649605482454e-05, "loss": 0.4599, "step": 40038 }, { "epoch": 0.8491654471803355, "grad_norm": 0.33580705523490906, "learning_rate": 1.2366172031098398e-05, "loss": 0.4707, "step": 40039 }, { "epoch": 0.8491866556382686, "grad_norm": 0.3705790042877197, "learning_rate": 1.2365848004740622e-05, "loss": 0.4923, "step": 40040 }, { "epoch": 0.8492078640962015, "grad_norm": 0.389365553855896, "learning_rate": 1.2365523975751567e-05, "loss": 0.5589, "step": 40041 }, { "epoch": 0.8492290725541346, "grad_norm": 0.5060868263244629, "learning_rate": 1.2365199944131599e-05, "loss": 0.4582, "step": 40042 }, { "epoch": 0.8492502810120676, "grad_norm": 0.4026363492012024, "learning_rate": 1.2364875909881078e-05, "loss": 0.4141, "step": 40043 }, { "epoch": 0.8492714894700006, "grad_norm": 0.33887484669685364, "learning_rate": 1.236455187300036e-05, "loss": 0.4468, "step": 40044 }, { "epoch": 0.8492926979279337, "grad_norm": 0.32878679037094116, "learning_rate": 1.236422783348981e-05, "loss": 0.4805, "step": 40045 }, { "epoch": 0.8493139063858667, "grad_norm": 0.4101826548576355, "learning_rate": 1.2363903791349786e-05, "loss": 0.4669, "step": 40046 }, { "epoch": 0.8493351148437998, "grad_norm": 0.36767566204071045, "learning_rate": 1.2363579746580646e-05, "loss": 0.5012, "step": 40047 }, { "epoch": 0.8493563233017327, "grad_norm": 0.34813040494918823, "learning_rate": 1.2363255699182754e-05, "loss": 0.5351, "step": 40048 }, { "epoch": 0.8493775317596658, "grad_norm": 0.3688717484474182, "learning_rate": 1.236293164915647e-05, "loss": 0.4361, "step": 40049 }, { "epoch": 0.8493987402175988, "grad_norm": 0.32792001962661743, "learning_rate": 1.2362607596502153e-05, "loss": 0.4454, "step": 40050 }, { "epoch": 0.8494199486755318, "grad_norm": 0.3647948205471039, "learning_rate": 1.2362283541220167e-05, "loss": 0.4772, "step": 40051 }, { "epoch": 0.8494411571334648, "grad_norm": 0.41331642866134644, "learning_rate": 1.2361959483310873e-05, "loss": 0.5407, "step": 40052 }, { "epoch": 0.8494623655913979, "grad_norm": 0.4841245710849762, "learning_rate": 1.2361635422774621e-05, "loss": 0.5559, "step": 40053 }, { "epoch": 0.8494835740493308, "grad_norm": 0.32650426030158997, "learning_rate": 1.2361311359611783e-05, "loss": 0.5062, "step": 40054 }, { "epoch": 0.8495047825072639, "grad_norm": 0.4911855161190033, "learning_rate": 1.2360987293822712e-05, "loss": 0.478, "step": 40055 }, { "epoch": 0.8495259909651969, "grad_norm": 0.37544700503349304, "learning_rate": 1.2360663225407772e-05, "loss": 0.4456, "step": 40056 }, { "epoch": 0.84954719942313, "grad_norm": 0.36593469977378845, "learning_rate": 1.2360339154367324e-05, "loss": 0.4094, "step": 40057 }, { "epoch": 0.8495684078810629, "grad_norm": 0.3290223777294159, "learning_rate": 1.2360015080701726e-05, "loss": 0.5256, "step": 40058 }, { "epoch": 0.849589616338996, "grad_norm": 0.3723059892654419, "learning_rate": 1.2359691004411342e-05, "loss": 0.594, "step": 40059 }, { "epoch": 0.8496108247969291, "grad_norm": 0.6152921915054321, "learning_rate": 1.2359366925496529e-05, "loss": 0.5003, "step": 40060 }, { "epoch": 0.849632033254862, "grad_norm": 0.3759302496910095, "learning_rate": 1.2359042843957646e-05, "loss": 0.5695, "step": 40061 }, { "epoch": 0.8496532417127951, "grad_norm": 0.41491052508354187, "learning_rate": 1.235871875979506e-05, "loss": 0.5752, "step": 40062 }, { "epoch": 0.8496744501707281, "grad_norm": 0.3676871061325073, "learning_rate": 1.2358394673009129e-05, "loss": 0.4894, "step": 40063 }, { "epoch": 0.8496956586286611, "grad_norm": 0.4170539379119873, "learning_rate": 1.2358070583600206e-05, "loss": 0.4913, "step": 40064 }, { "epoch": 0.8497168670865941, "grad_norm": 0.3490305542945862, "learning_rate": 1.2357746491568664e-05, "loss": 0.4304, "step": 40065 }, { "epoch": 0.8497380755445272, "grad_norm": 0.3924384117126465, "learning_rate": 1.2357422396914853e-05, "loss": 0.4973, "step": 40066 }, { "epoch": 0.8497592840024601, "grad_norm": 0.36136937141418457, "learning_rate": 1.2357098299639133e-05, "loss": 0.501, "step": 40067 }, { "epoch": 0.8497804924603932, "grad_norm": 0.3508368134498596, "learning_rate": 1.2356774199741874e-05, "loss": 0.5158, "step": 40068 }, { "epoch": 0.8498017009183262, "grad_norm": 0.32679855823516846, "learning_rate": 1.2356450097223434e-05, "loss": 0.4658, "step": 40069 }, { "epoch": 0.8498229093762593, "grad_norm": 0.4325413405895233, "learning_rate": 1.2356125992084165e-05, "loss": 0.5372, "step": 40070 }, { "epoch": 0.8498441178341922, "grad_norm": 0.35421666502952576, "learning_rate": 1.2355801884324437e-05, "loss": 0.4801, "step": 40071 }, { "epoch": 0.8498653262921253, "grad_norm": 0.36834144592285156, "learning_rate": 1.2355477773944601e-05, "loss": 0.4241, "step": 40072 }, { "epoch": 0.8498865347500584, "grad_norm": 0.398921936750412, "learning_rate": 1.235515366094503e-05, "loss": 0.5578, "step": 40073 }, { "epoch": 0.8499077432079913, "grad_norm": 0.35179102420806885, "learning_rate": 1.2354829545326073e-05, "loss": 0.4969, "step": 40074 }, { "epoch": 0.8499289516659244, "grad_norm": 0.36443889141082764, "learning_rate": 1.2354505427088095e-05, "loss": 0.4843, "step": 40075 }, { "epoch": 0.8499501601238574, "grad_norm": 0.3527674078941345, "learning_rate": 1.235418130623146e-05, "loss": 0.5537, "step": 40076 }, { "epoch": 0.8499713685817905, "grad_norm": 0.33934780955314636, "learning_rate": 1.2353857182756523e-05, "loss": 0.5378, "step": 40077 }, { "epoch": 0.8499925770397234, "grad_norm": 0.45184579491615295, "learning_rate": 1.2353533056663646e-05, "loss": 0.4915, "step": 40078 }, { "epoch": 0.8500137854976565, "grad_norm": 0.3471370339393616, "learning_rate": 1.2353208927953194e-05, "loss": 0.4464, "step": 40079 }, { "epoch": 0.8500349939555895, "grad_norm": 0.3589523136615753, "learning_rate": 1.2352884796625518e-05, "loss": 0.54, "step": 40080 }, { "epoch": 0.8500562024135225, "grad_norm": 0.34855085611343384, "learning_rate": 1.2352560662680983e-05, "loss": 0.4157, "step": 40081 }, { "epoch": 0.8500774108714555, "grad_norm": 0.32484498620033264, "learning_rate": 1.2352236526119957e-05, "loss": 0.4988, "step": 40082 }, { "epoch": 0.8500986193293886, "grad_norm": 0.35941067337989807, "learning_rate": 1.2351912386942791e-05, "loss": 0.524, "step": 40083 }, { "epoch": 0.8501198277873215, "grad_norm": 0.3492785096168518, "learning_rate": 1.2351588245149845e-05, "loss": 0.4525, "step": 40084 }, { "epoch": 0.8501410362452546, "grad_norm": 0.9139114618301392, "learning_rate": 1.2351264100741487e-05, "loss": 0.5186, "step": 40085 }, { "epoch": 0.8501622447031877, "grad_norm": 0.3434036076068878, "learning_rate": 1.2350939953718072e-05, "loss": 0.4319, "step": 40086 }, { "epoch": 0.8501834531611207, "grad_norm": 0.34167730808258057, "learning_rate": 1.2350615804079962e-05, "loss": 0.4992, "step": 40087 }, { "epoch": 0.8502046616190537, "grad_norm": 0.35482174158096313, "learning_rate": 1.2350291651827518e-05, "loss": 0.5174, "step": 40088 }, { "epoch": 0.8502258700769867, "grad_norm": 0.3710961937904358, "learning_rate": 1.23499674969611e-05, "loss": 0.4813, "step": 40089 }, { "epoch": 0.8502470785349198, "grad_norm": 0.38798820972442627, "learning_rate": 1.2349643339481068e-05, "loss": 0.5322, "step": 40090 }, { "epoch": 0.8502682869928527, "grad_norm": 0.42715373635292053, "learning_rate": 1.2349319179387784e-05, "loss": 0.4998, "step": 40091 }, { "epoch": 0.8502894954507858, "grad_norm": 0.32882848381996155, "learning_rate": 1.2348995016681607e-05, "loss": 0.461, "step": 40092 }, { "epoch": 0.8503107039087188, "grad_norm": 0.3861527740955353, "learning_rate": 1.2348670851362899e-05, "loss": 0.4276, "step": 40093 }, { "epoch": 0.8503319123666518, "grad_norm": 0.4269816279411316, "learning_rate": 1.2348346683432019e-05, "loss": 0.5949, "step": 40094 }, { "epoch": 0.8503531208245848, "grad_norm": 0.34524473547935486, "learning_rate": 1.2348022512889327e-05, "loss": 0.4331, "step": 40095 }, { "epoch": 0.8503743292825179, "grad_norm": 0.365503191947937, "learning_rate": 1.2347698339735188e-05, "loss": 0.4913, "step": 40096 }, { "epoch": 0.8503955377404508, "grad_norm": 0.3618393540382385, "learning_rate": 1.2347374163969956e-05, "loss": 0.4821, "step": 40097 }, { "epoch": 0.8504167461983839, "grad_norm": 0.3487445116043091, "learning_rate": 1.2347049985593995e-05, "loss": 0.3785, "step": 40098 }, { "epoch": 0.8504379546563169, "grad_norm": 0.35016363859176636, "learning_rate": 1.2346725804607669e-05, "loss": 0.5628, "step": 40099 }, { "epoch": 0.85045916311425, "grad_norm": 0.3644821345806122, "learning_rate": 1.2346401621011333e-05, "loss": 0.4858, "step": 40100 }, { "epoch": 0.850480371572183, "grad_norm": 0.3617245852947235, "learning_rate": 1.2346077434805347e-05, "loss": 0.4133, "step": 40101 }, { "epoch": 0.850501580030116, "grad_norm": 0.41243189573287964, "learning_rate": 1.2345753245990078e-05, "loss": 0.5549, "step": 40102 }, { "epoch": 0.8505227884880491, "grad_norm": 0.3894760012626648, "learning_rate": 1.2345429054565879e-05, "loss": 0.5306, "step": 40103 }, { "epoch": 0.850543996945982, "grad_norm": 0.3631826937198639, "learning_rate": 1.2345104860533117e-05, "loss": 0.5077, "step": 40104 }, { "epoch": 0.8505652054039151, "grad_norm": 0.3952125608921051, "learning_rate": 1.2344780663892148e-05, "loss": 0.4765, "step": 40105 }, { "epoch": 0.8505864138618481, "grad_norm": 0.3931138813495636, "learning_rate": 1.2344456464643335e-05, "loss": 0.4694, "step": 40106 }, { "epoch": 0.8506076223197812, "grad_norm": 0.3781486451625824, "learning_rate": 1.2344132262787036e-05, "loss": 0.4831, "step": 40107 }, { "epoch": 0.8506288307777141, "grad_norm": 0.33548012375831604, "learning_rate": 1.2343808058323616e-05, "loss": 0.4949, "step": 40108 }, { "epoch": 0.8506500392356472, "grad_norm": 0.3122204840183258, "learning_rate": 1.2343483851253433e-05, "loss": 0.4786, "step": 40109 }, { "epoch": 0.8506712476935802, "grad_norm": 0.335004061460495, "learning_rate": 1.2343159641576848e-05, "loss": 0.4337, "step": 40110 }, { "epoch": 0.8506924561515132, "grad_norm": 0.3880643844604492, "learning_rate": 1.234283542929422e-05, "loss": 0.4843, "step": 40111 }, { "epoch": 0.8507136646094462, "grad_norm": 0.3386578857898712, "learning_rate": 1.2342511214405909e-05, "loss": 0.4799, "step": 40112 }, { "epoch": 0.8507348730673793, "grad_norm": 0.3825271725654602, "learning_rate": 1.234218699691228e-05, "loss": 0.5113, "step": 40113 }, { "epoch": 0.8507560815253123, "grad_norm": 0.3754621744155884, "learning_rate": 1.2341862776813691e-05, "loss": 0.5212, "step": 40114 }, { "epoch": 0.8507772899832453, "grad_norm": 0.47291240096092224, "learning_rate": 1.23415385541105e-05, "loss": 0.5009, "step": 40115 }, { "epoch": 0.8507984984411784, "grad_norm": 0.3443929851055145, "learning_rate": 1.2341214328803074e-05, "loss": 0.4951, "step": 40116 }, { "epoch": 0.8508197068991113, "grad_norm": 0.3467977046966553, "learning_rate": 1.2340890100891766e-05, "loss": 0.5086, "step": 40117 }, { "epoch": 0.8508409153570444, "grad_norm": 0.3748561143875122, "learning_rate": 1.2340565870376944e-05, "loss": 0.4938, "step": 40118 }, { "epoch": 0.8508621238149774, "grad_norm": 0.3641907870769501, "learning_rate": 1.2340241637258961e-05, "loss": 0.5006, "step": 40119 }, { "epoch": 0.8508833322729105, "grad_norm": 0.4597100615501404, "learning_rate": 1.2339917401538185e-05, "loss": 0.467, "step": 40120 }, { "epoch": 0.8509045407308434, "grad_norm": 0.37050819396972656, "learning_rate": 1.233959316321497e-05, "loss": 0.5027, "step": 40121 }, { "epoch": 0.8509257491887765, "grad_norm": 0.3637400269508362, "learning_rate": 1.2339268922289685e-05, "loss": 0.4946, "step": 40122 }, { "epoch": 0.8509469576467095, "grad_norm": 0.38010790944099426, "learning_rate": 1.2338944678762678e-05, "loss": 0.4844, "step": 40123 }, { "epoch": 0.8509681661046425, "grad_norm": 0.38420137763023376, "learning_rate": 1.2338620432634323e-05, "loss": 0.5248, "step": 40124 }, { "epoch": 0.8509893745625755, "grad_norm": 0.3489869236946106, "learning_rate": 1.2338296183904972e-05, "loss": 0.4073, "step": 40125 }, { "epoch": 0.8510105830205086, "grad_norm": 0.3840957581996918, "learning_rate": 1.2337971932574988e-05, "loss": 0.5238, "step": 40126 }, { "epoch": 0.8510317914784417, "grad_norm": 0.3633906841278076, "learning_rate": 1.2337647678644736e-05, "loss": 0.5416, "step": 40127 }, { "epoch": 0.8510529999363746, "grad_norm": 0.351222962141037, "learning_rate": 1.2337323422114569e-05, "loss": 0.5653, "step": 40128 }, { "epoch": 0.8510742083943077, "grad_norm": 0.411170095205307, "learning_rate": 1.2336999162984851e-05, "loss": 0.5632, "step": 40129 }, { "epoch": 0.8510954168522407, "grad_norm": 0.34184783697128296, "learning_rate": 1.2336674901255945e-05, "loss": 0.4564, "step": 40130 }, { "epoch": 0.8511166253101737, "grad_norm": 0.4061853587627411, "learning_rate": 1.2336350636928208e-05, "loss": 0.5145, "step": 40131 }, { "epoch": 0.8511378337681067, "grad_norm": 0.35133039951324463, "learning_rate": 1.2336026370001999e-05, "loss": 0.3783, "step": 40132 }, { "epoch": 0.8511590422260398, "grad_norm": 0.36985915899276733, "learning_rate": 1.2335702100477688e-05, "loss": 0.4845, "step": 40133 }, { "epoch": 0.8511802506839727, "grad_norm": 0.3875104784965515, "learning_rate": 1.2335377828355626e-05, "loss": 0.5239, "step": 40134 }, { "epoch": 0.8512014591419058, "grad_norm": 0.40050649642944336, "learning_rate": 1.2335053553636175e-05, "loss": 0.4812, "step": 40135 }, { "epoch": 0.8512226675998388, "grad_norm": 0.42616716027259827, "learning_rate": 1.2334729276319703e-05, "loss": 0.4491, "step": 40136 }, { "epoch": 0.8512438760577719, "grad_norm": 0.3676663637161255, "learning_rate": 1.2334404996406561e-05, "loss": 0.5409, "step": 40137 }, { "epoch": 0.8512650845157048, "grad_norm": 0.3304685950279236, "learning_rate": 1.2334080713897115e-05, "loss": 0.5285, "step": 40138 }, { "epoch": 0.8512862929736379, "grad_norm": 0.3798469007015228, "learning_rate": 1.2333756428791725e-05, "loss": 0.4011, "step": 40139 }, { "epoch": 0.8513075014315709, "grad_norm": 0.40563318133354187, "learning_rate": 1.2333432141090751e-05, "loss": 0.514, "step": 40140 }, { "epoch": 0.8513287098895039, "grad_norm": 0.36009353399276733, "learning_rate": 1.2333107850794558e-05, "loss": 0.5071, "step": 40141 }, { "epoch": 0.851349918347437, "grad_norm": 0.41955921053886414, "learning_rate": 1.2332783557903499e-05, "loss": 0.3673, "step": 40142 }, { "epoch": 0.85137112680537, "grad_norm": 0.37990933656692505, "learning_rate": 1.2332459262417938e-05, "loss": 0.4851, "step": 40143 }, { "epoch": 0.851392335263303, "grad_norm": 0.39864200353622437, "learning_rate": 1.2332134964338237e-05, "loss": 0.4632, "step": 40144 }, { "epoch": 0.851413543721236, "grad_norm": 0.3870754837989807, "learning_rate": 1.2331810663664756e-05, "loss": 0.52, "step": 40145 }, { "epoch": 0.8514347521791691, "grad_norm": 0.47653383016586304, "learning_rate": 1.2331486360397852e-05, "loss": 0.4783, "step": 40146 }, { "epoch": 0.851455960637102, "grad_norm": 0.3878920376300812, "learning_rate": 1.2331162054537896e-05, "loss": 0.4895, "step": 40147 }, { "epoch": 0.8514771690950351, "grad_norm": 0.4493144154548645, "learning_rate": 1.2330837746085238e-05, "loss": 0.4983, "step": 40148 }, { "epoch": 0.8514983775529681, "grad_norm": 0.40511685609817505, "learning_rate": 1.233051343504024e-05, "loss": 0.4836, "step": 40149 }, { "epoch": 0.8515195860109012, "grad_norm": 0.563986599445343, "learning_rate": 1.2330189121403271e-05, "loss": 0.4617, "step": 40150 }, { "epoch": 0.8515407944688341, "grad_norm": 0.347430557012558, "learning_rate": 1.232986480517468e-05, "loss": 0.4993, "step": 40151 }, { "epoch": 0.8515620029267672, "grad_norm": 0.36491113901138306, "learning_rate": 1.2329540486354836e-05, "loss": 0.4447, "step": 40152 }, { "epoch": 0.8515832113847002, "grad_norm": 0.3372376561164856, "learning_rate": 1.2329216164944097e-05, "loss": 0.4245, "step": 40153 }, { "epoch": 0.8516044198426332, "grad_norm": 0.35005560517311096, "learning_rate": 1.2328891840942825e-05, "loss": 0.5099, "step": 40154 }, { "epoch": 0.8516256283005663, "grad_norm": 0.35358986258506775, "learning_rate": 1.232856751435138e-05, "loss": 0.4285, "step": 40155 }, { "epoch": 0.8516468367584993, "grad_norm": 0.3436228036880493, "learning_rate": 1.2328243185170123e-05, "loss": 0.4969, "step": 40156 }, { "epoch": 0.8516680452164324, "grad_norm": 0.49430564045906067, "learning_rate": 1.232791885339941e-05, "loss": 0.4633, "step": 40157 }, { "epoch": 0.8516892536743653, "grad_norm": 0.34280359745025635, "learning_rate": 1.232759451903961e-05, "loss": 0.46, "step": 40158 }, { "epoch": 0.8517104621322984, "grad_norm": 0.3741435110569, "learning_rate": 1.2327270182091078e-05, "loss": 0.4659, "step": 40159 }, { "epoch": 0.8517316705902314, "grad_norm": 0.36149105429649353, "learning_rate": 1.2326945842554176e-05, "loss": 0.5041, "step": 40160 }, { "epoch": 0.8517528790481644, "grad_norm": 0.404021680355072, "learning_rate": 1.2326621500429268e-05, "loss": 0.5393, "step": 40161 }, { "epoch": 0.8517740875060974, "grad_norm": 0.4106123149394989, "learning_rate": 1.2326297155716712e-05, "loss": 0.4779, "step": 40162 }, { "epoch": 0.8517952959640305, "grad_norm": 0.35336336493492126, "learning_rate": 1.2325972808416861e-05, "loss": 0.4901, "step": 40163 }, { "epoch": 0.8518165044219634, "grad_norm": 0.421694815158844, "learning_rate": 1.2325648458530089e-05, "loss": 0.4999, "step": 40164 }, { "epoch": 0.8518377128798965, "grad_norm": 0.3733639717102051, "learning_rate": 1.232532410605675e-05, "loss": 0.49, "step": 40165 }, { "epoch": 0.8518589213378295, "grad_norm": 0.4240792393684387, "learning_rate": 1.2324999750997206e-05, "loss": 0.5196, "step": 40166 }, { "epoch": 0.8518801297957626, "grad_norm": 0.42970824241638184, "learning_rate": 1.2324675393351818e-05, "loss": 0.428, "step": 40167 }, { "epoch": 0.8519013382536956, "grad_norm": 0.37498822808265686, "learning_rate": 1.2324351033120944e-05, "loss": 0.5452, "step": 40168 }, { "epoch": 0.8519225467116286, "grad_norm": 0.44076886773109436, "learning_rate": 1.2324026670304951e-05, "loss": 0.5208, "step": 40169 }, { "epoch": 0.8519437551695617, "grad_norm": 0.3584860861301422, "learning_rate": 1.2323702304904193e-05, "loss": 0.4598, "step": 40170 }, { "epoch": 0.8519649636274946, "grad_norm": 0.34977248311042786, "learning_rate": 1.2323377936919032e-05, "loss": 0.5102, "step": 40171 }, { "epoch": 0.8519861720854277, "grad_norm": 0.3930938243865967, "learning_rate": 1.2323053566349834e-05, "loss": 0.5118, "step": 40172 }, { "epoch": 0.8520073805433607, "grad_norm": 0.42870771884918213, "learning_rate": 1.2322729193196954e-05, "loss": 0.5116, "step": 40173 }, { "epoch": 0.8520285890012937, "grad_norm": 0.34686794877052307, "learning_rate": 1.2322404817460754e-05, "loss": 0.4431, "step": 40174 }, { "epoch": 0.8520497974592267, "grad_norm": 0.3429499864578247, "learning_rate": 1.2322080439141598e-05, "loss": 0.4437, "step": 40175 }, { "epoch": 0.8520710059171598, "grad_norm": 0.3903205394744873, "learning_rate": 1.2321756058239842e-05, "loss": 0.4838, "step": 40176 }, { "epoch": 0.8520922143750927, "grad_norm": 0.3727061152458191, "learning_rate": 1.2321431674755848e-05, "loss": 0.5497, "step": 40177 }, { "epoch": 0.8521134228330258, "grad_norm": 0.35209834575653076, "learning_rate": 1.2321107288689981e-05, "loss": 0.4645, "step": 40178 }, { "epoch": 0.8521346312909588, "grad_norm": 0.36514753103256226, "learning_rate": 1.2320782900042598e-05, "loss": 0.452, "step": 40179 }, { "epoch": 0.8521558397488919, "grad_norm": 0.3968368470668793, "learning_rate": 1.2320458508814059e-05, "loss": 0.5242, "step": 40180 }, { "epoch": 0.8521770482068248, "grad_norm": 0.37918370962142944, "learning_rate": 1.2320134115004727e-05, "loss": 0.4951, "step": 40181 }, { "epoch": 0.8521982566647579, "grad_norm": 0.36129966378211975, "learning_rate": 1.2319809718614961e-05, "loss": 0.5584, "step": 40182 }, { "epoch": 0.852219465122691, "grad_norm": 0.3510797321796417, "learning_rate": 1.2319485319645123e-05, "loss": 0.4985, "step": 40183 }, { "epoch": 0.8522406735806239, "grad_norm": 0.3528565764427185, "learning_rate": 1.2319160918095575e-05, "loss": 0.4976, "step": 40184 }, { "epoch": 0.852261882038557, "grad_norm": 0.3467799723148346, "learning_rate": 1.2318836513966677e-05, "loss": 0.4334, "step": 40185 }, { "epoch": 0.85228309049649, "grad_norm": 0.6499207615852356, "learning_rate": 1.2318512107258786e-05, "loss": 0.494, "step": 40186 }, { "epoch": 0.8523042989544231, "grad_norm": 0.33617648482322693, "learning_rate": 1.231818769797227e-05, "loss": 0.3998, "step": 40187 }, { "epoch": 0.852325507412356, "grad_norm": 0.36839261651039124, "learning_rate": 1.2317863286107484e-05, "loss": 0.4495, "step": 40188 }, { "epoch": 0.8523467158702891, "grad_norm": 0.37461236119270325, "learning_rate": 1.2317538871664786e-05, "loss": 0.435, "step": 40189 }, { "epoch": 0.8523679243282221, "grad_norm": 0.34141841530799866, "learning_rate": 1.2317214454644548e-05, "loss": 0.5509, "step": 40190 }, { "epoch": 0.8523891327861551, "grad_norm": 0.6095414161682129, "learning_rate": 1.231689003504712e-05, "loss": 0.5239, "step": 40191 }, { "epoch": 0.8524103412440881, "grad_norm": 0.3813164532184601, "learning_rate": 1.231656561287287e-05, "loss": 0.4591, "step": 40192 }, { "epoch": 0.8524315497020212, "grad_norm": 0.3791371285915375, "learning_rate": 1.2316241188122158e-05, "loss": 0.4923, "step": 40193 }, { "epoch": 0.8524527581599541, "grad_norm": 0.3642479181289673, "learning_rate": 1.2315916760795336e-05, "loss": 0.4771, "step": 40194 }, { "epoch": 0.8524739666178872, "grad_norm": 0.3598419725894928, "learning_rate": 1.2315592330892776e-05, "loss": 0.4424, "step": 40195 }, { "epoch": 0.8524951750758203, "grad_norm": 0.38949820399284363, "learning_rate": 1.2315267898414833e-05, "loss": 0.5415, "step": 40196 }, { "epoch": 0.8525163835337533, "grad_norm": 0.3717156648635864, "learning_rate": 1.2314943463361868e-05, "loss": 0.5012, "step": 40197 }, { "epoch": 0.8525375919916863, "grad_norm": 0.359592467546463, "learning_rate": 1.2314619025734244e-05, "loss": 0.4842, "step": 40198 }, { "epoch": 0.8525588004496193, "grad_norm": 0.4432127773761749, "learning_rate": 1.2314294585532323e-05, "loss": 0.4984, "step": 40199 }, { "epoch": 0.8525800089075524, "grad_norm": 0.36410632729530334, "learning_rate": 1.2313970142756462e-05, "loss": 0.4598, "step": 40200 }, { "epoch": 0.8526012173654853, "grad_norm": 0.36803627014160156, "learning_rate": 1.2313645697407023e-05, "loss": 0.6001, "step": 40201 }, { "epoch": 0.8526224258234184, "grad_norm": 0.6331847906112671, "learning_rate": 1.2313321249484366e-05, "loss": 0.5112, "step": 40202 }, { "epoch": 0.8526436342813514, "grad_norm": 0.36783334612846375, "learning_rate": 1.2312996798988855e-05, "loss": 0.5601, "step": 40203 }, { "epoch": 0.8526648427392844, "grad_norm": 0.3627326786518097, "learning_rate": 1.231267234592085e-05, "loss": 0.4853, "step": 40204 }, { "epoch": 0.8526860511972174, "grad_norm": 0.3547305166721344, "learning_rate": 1.2312347890280709e-05, "loss": 0.4132, "step": 40205 }, { "epoch": 0.8527072596551505, "grad_norm": 0.3655182123184204, "learning_rate": 1.2312023432068795e-05, "loss": 0.484, "step": 40206 }, { "epoch": 0.8527284681130834, "grad_norm": 0.35346806049346924, "learning_rate": 1.2311698971285471e-05, "loss": 0.4746, "step": 40207 }, { "epoch": 0.8527496765710165, "grad_norm": 0.3978651165962219, "learning_rate": 1.2311374507931094e-05, "loss": 0.5334, "step": 40208 }, { "epoch": 0.8527708850289496, "grad_norm": 0.32751238346099854, "learning_rate": 1.2311050042006025e-05, "loss": 0.4821, "step": 40209 }, { "epoch": 0.8527920934868826, "grad_norm": 0.45386457443237305, "learning_rate": 1.2310725573510627e-05, "loss": 0.5337, "step": 40210 }, { "epoch": 0.8528133019448156, "grad_norm": 0.42537954449653625, "learning_rate": 1.231040110244526e-05, "loss": 0.4832, "step": 40211 }, { "epoch": 0.8528345104027486, "grad_norm": 0.43641138076782227, "learning_rate": 1.2310076628810286e-05, "loss": 0.413, "step": 40212 }, { "epoch": 0.8528557188606817, "grad_norm": 0.3474145233631134, "learning_rate": 1.2309752152606063e-05, "loss": 0.4021, "step": 40213 }, { "epoch": 0.8528769273186146, "grad_norm": 0.3772178590297699, "learning_rate": 1.2309427673832956e-05, "loss": 0.4958, "step": 40214 }, { "epoch": 0.8528981357765477, "grad_norm": 0.3760624825954437, "learning_rate": 1.2309103192491322e-05, "loss": 0.4738, "step": 40215 }, { "epoch": 0.8529193442344807, "grad_norm": 0.3477846086025238, "learning_rate": 1.2308778708581525e-05, "loss": 0.5301, "step": 40216 }, { "epoch": 0.8529405526924138, "grad_norm": 0.3861689567565918, "learning_rate": 1.2308454222103923e-05, "loss": 0.5464, "step": 40217 }, { "epoch": 0.8529617611503467, "grad_norm": 0.4170076549053192, "learning_rate": 1.2308129733058878e-05, "loss": 0.5221, "step": 40218 }, { "epoch": 0.8529829696082798, "grad_norm": 0.34447965025901794, "learning_rate": 1.2307805241446751e-05, "loss": 0.5085, "step": 40219 }, { "epoch": 0.8530041780662128, "grad_norm": 0.7525641322135925, "learning_rate": 1.2307480747267906e-05, "loss": 0.5344, "step": 40220 }, { "epoch": 0.8530253865241458, "grad_norm": 0.3310370445251465, "learning_rate": 1.2307156250522696e-05, "loss": 0.4857, "step": 40221 }, { "epoch": 0.8530465949820788, "grad_norm": 0.3552183210849762, "learning_rate": 1.230683175121149e-05, "loss": 0.4214, "step": 40222 }, { "epoch": 0.8530678034400119, "grad_norm": 0.3810909390449524, "learning_rate": 1.2306507249334647e-05, "loss": 0.4474, "step": 40223 }, { "epoch": 0.853089011897945, "grad_norm": 0.3283178210258484, "learning_rate": 1.2306182744892525e-05, "loss": 0.5079, "step": 40224 }, { "epoch": 0.8531102203558779, "grad_norm": 0.35883450508117676, "learning_rate": 1.2305858237885487e-05, "loss": 0.4886, "step": 40225 }, { "epoch": 0.853131428813811, "grad_norm": 0.39272814989089966, "learning_rate": 1.2305533728313895e-05, "loss": 0.5378, "step": 40226 }, { "epoch": 0.853152637271744, "grad_norm": 0.37320569157600403, "learning_rate": 1.2305209216178104e-05, "loss": 0.4638, "step": 40227 }, { "epoch": 0.853173845729677, "grad_norm": 3.9417145252227783, "learning_rate": 1.2304884701478481e-05, "loss": 0.3952, "step": 40228 }, { "epoch": 0.85319505418761, "grad_norm": 0.37519651651382446, "learning_rate": 1.2304560184215386e-05, "loss": 0.5582, "step": 40229 }, { "epoch": 0.8532162626455431, "grad_norm": 0.428405225276947, "learning_rate": 1.2304235664389182e-05, "loss": 0.4414, "step": 40230 }, { "epoch": 0.853237471103476, "grad_norm": 0.31590595841407776, "learning_rate": 1.2303911142000223e-05, "loss": 0.4024, "step": 40231 }, { "epoch": 0.8532586795614091, "grad_norm": 0.35284653306007385, "learning_rate": 1.2303586617048877e-05, "loss": 0.4556, "step": 40232 }, { "epoch": 0.8532798880193421, "grad_norm": 0.3801342844963074, "learning_rate": 1.23032620895355e-05, "loss": 0.4403, "step": 40233 }, { "epoch": 0.8533010964772751, "grad_norm": 0.47486281394958496, "learning_rate": 1.2302937559460452e-05, "loss": 0.5258, "step": 40234 }, { "epoch": 0.8533223049352081, "grad_norm": 0.4017886817455292, "learning_rate": 1.2302613026824102e-05, "loss": 0.5094, "step": 40235 }, { "epoch": 0.8533435133931412, "grad_norm": 0.4328137934207916, "learning_rate": 1.2302288491626804e-05, "loss": 0.4536, "step": 40236 }, { "epoch": 0.8533647218510743, "grad_norm": 0.4001287817955017, "learning_rate": 1.230196395386892e-05, "loss": 0.493, "step": 40237 }, { "epoch": 0.8533859303090072, "grad_norm": 0.3889367878437042, "learning_rate": 1.2301639413550815e-05, "loss": 0.4784, "step": 40238 }, { "epoch": 0.8534071387669403, "grad_norm": 0.46564653515815735, "learning_rate": 1.2301314870672843e-05, "loss": 0.5168, "step": 40239 }, { "epoch": 0.8534283472248733, "grad_norm": 0.43983474373817444, "learning_rate": 1.2300990325235372e-05, "loss": 0.5755, "step": 40240 }, { "epoch": 0.8534495556828063, "grad_norm": 0.8272444605827332, "learning_rate": 1.2300665777238754e-05, "loss": 0.4465, "step": 40241 }, { "epoch": 0.8534707641407393, "grad_norm": 0.38778820633888245, "learning_rate": 1.2300341226683359e-05, "loss": 0.4614, "step": 40242 }, { "epoch": 0.8534919725986724, "grad_norm": 0.3885429799556732, "learning_rate": 1.2300016673569544e-05, "loss": 0.4718, "step": 40243 }, { "epoch": 0.8535131810566053, "grad_norm": 0.41084039211273193, "learning_rate": 1.2299692117897672e-05, "loss": 0.4369, "step": 40244 }, { "epoch": 0.8535343895145384, "grad_norm": 0.416476845741272, "learning_rate": 1.22993675596681e-05, "loss": 0.5016, "step": 40245 }, { "epoch": 0.8535555979724714, "grad_norm": 0.3467600643634796, "learning_rate": 1.2299042998881194e-05, "loss": 0.4234, "step": 40246 }, { "epoch": 0.8535768064304045, "grad_norm": 0.3261779546737671, "learning_rate": 1.2298718435537308e-05, "loss": 0.4309, "step": 40247 }, { "epoch": 0.8535980148883374, "grad_norm": 0.3863053023815155, "learning_rate": 1.229839386963681e-05, "loss": 0.4756, "step": 40248 }, { "epoch": 0.8536192233462705, "grad_norm": 0.354218453168869, "learning_rate": 1.229806930118006e-05, "loss": 0.4814, "step": 40249 }, { "epoch": 0.8536404318042036, "grad_norm": 0.34474095702171326, "learning_rate": 1.2297744730167416e-05, "loss": 0.5181, "step": 40250 }, { "epoch": 0.8536616402621365, "grad_norm": 0.6228916049003601, "learning_rate": 1.229742015659924e-05, "loss": 0.4279, "step": 40251 }, { "epoch": 0.8536828487200696, "grad_norm": 0.4536161422729492, "learning_rate": 1.2297095580475894e-05, "loss": 0.4325, "step": 40252 }, { "epoch": 0.8537040571780026, "grad_norm": 0.36980220675468445, "learning_rate": 1.2296771001797736e-05, "loss": 0.5348, "step": 40253 }, { "epoch": 0.8537252656359356, "grad_norm": 0.45357099175453186, "learning_rate": 1.2296446420565132e-05, "loss": 0.4866, "step": 40254 }, { "epoch": 0.8537464740938686, "grad_norm": 0.5495283007621765, "learning_rate": 1.229612183677844e-05, "loss": 0.4848, "step": 40255 }, { "epoch": 0.8537676825518017, "grad_norm": 0.4232320189476013, "learning_rate": 1.2295797250438017e-05, "loss": 0.452, "step": 40256 }, { "epoch": 0.8537888910097347, "grad_norm": 0.3510587811470032, "learning_rate": 1.2295472661544232e-05, "loss": 0.5341, "step": 40257 }, { "epoch": 0.8538100994676677, "grad_norm": 0.39066100120544434, "learning_rate": 1.2295148070097445e-05, "loss": 0.4811, "step": 40258 }, { "epoch": 0.8538313079256007, "grad_norm": 0.3346079885959625, "learning_rate": 1.229482347609801e-05, "loss": 0.4552, "step": 40259 }, { "epoch": 0.8538525163835338, "grad_norm": 0.4069165289402008, "learning_rate": 1.229449887954629e-05, "loss": 0.5467, "step": 40260 }, { "epoch": 0.8538737248414667, "grad_norm": 0.4368146061897278, "learning_rate": 1.2294174280442655e-05, "loss": 0.5033, "step": 40261 }, { "epoch": 0.8538949332993998, "grad_norm": 0.41916313767433167, "learning_rate": 1.2293849678787453e-05, "loss": 0.5268, "step": 40262 }, { "epoch": 0.8539161417573328, "grad_norm": 0.36337515711784363, "learning_rate": 1.2293525074581056e-05, "loss": 0.5425, "step": 40263 }, { "epoch": 0.8539373502152658, "grad_norm": 0.41362640261650085, "learning_rate": 1.2293200467823816e-05, "loss": 0.442, "step": 40264 }, { "epoch": 0.8539585586731989, "grad_norm": 0.4469965100288391, "learning_rate": 1.2292875858516102e-05, "loss": 0.4465, "step": 40265 }, { "epoch": 0.8539797671311319, "grad_norm": 0.355252742767334, "learning_rate": 1.229255124665827e-05, "loss": 0.5068, "step": 40266 }, { "epoch": 0.854000975589065, "grad_norm": 0.38090279698371887, "learning_rate": 1.2292226632250681e-05, "loss": 0.5132, "step": 40267 }, { "epoch": 0.8540221840469979, "grad_norm": 0.36755451560020447, "learning_rate": 1.2291902015293702e-05, "loss": 0.5325, "step": 40268 }, { "epoch": 0.854043392504931, "grad_norm": 0.4202203154563904, "learning_rate": 1.2291577395787686e-05, "loss": 0.5079, "step": 40269 }, { "epoch": 0.854064600962864, "grad_norm": 0.3772030770778656, "learning_rate": 1.2291252773732999e-05, "loss": 0.4292, "step": 40270 }, { "epoch": 0.854085809420797, "grad_norm": 0.3490617275238037, "learning_rate": 1.2290928149129999e-05, "loss": 0.4829, "step": 40271 }, { "epoch": 0.85410701787873, "grad_norm": 0.3642469644546509, "learning_rate": 1.229060352197905e-05, "loss": 0.5482, "step": 40272 }, { "epoch": 0.8541282263366631, "grad_norm": 0.3341820538043976, "learning_rate": 1.2290278892280507e-05, "loss": 0.4017, "step": 40273 }, { "epoch": 0.854149434794596, "grad_norm": 0.3923109173774719, "learning_rate": 1.2289954260034742e-05, "loss": 0.441, "step": 40274 }, { "epoch": 0.8541706432525291, "grad_norm": 0.4084134101867676, "learning_rate": 1.2289629625242108e-05, "loss": 0.5166, "step": 40275 }, { "epoch": 0.8541918517104621, "grad_norm": 0.3380161225795746, "learning_rate": 1.2289304987902967e-05, "loss": 0.4606, "step": 40276 }, { "epoch": 0.8542130601683952, "grad_norm": 0.3451673090457916, "learning_rate": 1.2288980348017683e-05, "loss": 0.5055, "step": 40277 }, { "epoch": 0.8542342686263282, "grad_norm": 0.36501461267471313, "learning_rate": 1.2288655705586612e-05, "loss": 0.5077, "step": 40278 }, { "epoch": 0.8542554770842612, "grad_norm": 0.3421774208545685, "learning_rate": 1.2288331060610117e-05, "loss": 0.4814, "step": 40279 }, { "epoch": 0.8542766855421943, "grad_norm": 0.3580451011657715, "learning_rate": 1.2288006413088563e-05, "loss": 0.4945, "step": 40280 }, { "epoch": 0.8542978940001272, "grad_norm": 0.3420034348964691, "learning_rate": 1.2287681763022306e-05, "loss": 0.4747, "step": 40281 }, { "epoch": 0.8543191024580603, "grad_norm": 0.40365099906921387, "learning_rate": 1.2287357110411713e-05, "loss": 0.5163, "step": 40282 }, { "epoch": 0.8543403109159933, "grad_norm": 0.6808876991271973, "learning_rate": 1.228703245525714e-05, "loss": 0.44, "step": 40283 }, { "epoch": 0.8543615193739263, "grad_norm": 0.35622498393058777, "learning_rate": 1.2286707797558945e-05, "loss": 0.561, "step": 40284 }, { "epoch": 0.8543827278318593, "grad_norm": 0.4009837806224823, "learning_rate": 1.2286383137317498e-05, "loss": 0.4871, "step": 40285 }, { "epoch": 0.8544039362897924, "grad_norm": 0.3433026373386383, "learning_rate": 1.2286058474533154e-05, "loss": 0.557, "step": 40286 }, { "epoch": 0.8544251447477254, "grad_norm": 0.361880362033844, "learning_rate": 1.2285733809206275e-05, "loss": 0.5044, "step": 40287 }, { "epoch": 0.8544463532056584, "grad_norm": 0.3495800495147705, "learning_rate": 1.2285409141337223e-05, "loss": 0.4444, "step": 40288 }, { "epoch": 0.8544675616635914, "grad_norm": 0.3274622857570648, "learning_rate": 1.2285084470926361e-05, "loss": 0.4132, "step": 40289 }, { "epoch": 0.8544887701215245, "grad_norm": 0.40162304043769836, "learning_rate": 1.2284759797974047e-05, "loss": 0.5595, "step": 40290 }, { "epoch": 0.8545099785794575, "grad_norm": 0.3671269118785858, "learning_rate": 1.2284435122480642e-05, "loss": 0.4436, "step": 40291 }, { "epoch": 0.8545311870373905, "grad_norm": 0.4292418360710144, "learning_rate": 1.2284110444446506e-05, "loss": 0.485, "step": 40292 }, { "epoch": 0.8545523954953236, "grad_norm": 0.3709561824798584, "learning_rate": 1.2283785763872004e-05, "loss": 0.5052, "step": 40293 }, { "epoch": 0.8545736039532565, "grad_norm": 0.3638669550418854, "learning_rate": 1.2283461080757497e-05, "loss": 0.517, "step": 40294 }, { "epoch": 0.8545948124111896, "grad_norm": 0.39141204953193665, "learning_rate": 1.2283136395103344e-05, "loss": 0.5168, "step": 40295 }, { "epoch": 0.8546160208691226, "grad_norm": 0.3672638237476349, "learning_rate": 1.2282811706909905e-05, "loss": 0.49, "step": 40296 }, { "epoch": 0.8546372293270557, "grad_norm": 0.32646504044532776, "learning_rate": 1.2282487016177544e-05, "loss": 0.4536, "step": 40297 }, { "epoch": 0.8546584377849886, "grad_norm": 0.49034780263900757, "learning_rate": 1.2282162322906617e-05, "loss": 0.4839, "step": 40298 }, { "epoch": 0.8546796462429217, "grad_norm": 0.34072133898735046, "learning_rate": 1.2281837627097494e-05, "loss": 0.4819, "step": 40299 }, { "epoch": 0.8547008547008547, "grad_norm": 0.37290534377098083, "learning_rate": 1.228151292875053e-05, "loss": 0.419, "step": 40300 }, { "epoch": 0.8547220631587877, "grad_norm": 0.3256925940513611, "learning_rate": 1.2281188227866085e-05, "loss": 0.4657, "step": 40301 }, { "epoch": 0.8547432716167207, "grad_norm": 0.379529744386673, "learning_rate": 1.2280863524444524e-05, "loss": 0.5118, "step": 40302 }, { "epoch": 0.8547644800746538, "grad_norm": 0.39817899465560913, "learning_rate": 1.2280538818486207e-05, "loss": 0.5494, "step": 40303 }, { "epoch": 0.8547856885325867, "grad_norm": 0.3801916539669037, "learning_rate": 1.2280214109991494e-05, "loss": 0.4577, "step": 40304 }, { "epoch": 0.8548068969905198, "grad_norm": 0.3487255871295929, "learning_rate": 1.2279889398960745e-05, "loss": 0.4219, "step": 40305 }, { "epoch": 0.8548281054484529, "grad_norm": 0.4928033947944641, "learning_rate": 1.2279564685394326e-05, "loss": 0.5685, "step": 40306 }, { "epoch": 0.8548493139063859, "grad_norm": 0.4060869514942169, "learning_rate": 1.227923996929259e-05, "loss": 0.5419, "step": 40307 }, { "epoch": 0.8548705223643189, "grad_norm": 0.3766418397426605, "learning_rate": 1.2278915250655907e-05, "loss": 0.4793, "step": 40308 }, { "epoch": 0.8548917308222519, "grad_norm": 0.40715035796165466, "learning_rate": 1.2278590529484633e-05, "loss": 0.4293, "step": 40309 }, { "epoch": 0.854912939280185, "grad_norm": 0.36934277415275574, "learning_rate": 1.227826580577913e-05, "loss": 0.4394, "step": 40310 }, { "epoch": 0.8549341477381179, "grad_norm": 0.3360602557659149, "learning_rate": 1.2277941079539758e-05, "loss": 0.4482, "step": 40311 }, { "epoch": 0.854955356196051, "grad_norm": 0.3772874176502228, "learning_rate": 1.2277616350766884e-05, "loss": 0.4978, "step": 40312 }, { "epoch": 0.854976564653984, "grad_norm": 0.3691574037075043, "learning_rate": 1.2277291619460863e-05, "loss": 0.4644, "step": 40313 }, { "epoch": 0.854997773111917, "grad_norm": 0.34636953473091125, "learning_rate": 1.2276966885622059e-05, "loss": 0.4513, "step": 40314 }, { "epoch": 0.85501898156985, "grad_norm": 0.43950551748275757, "learning_rate": 1.227664214925083e-05, "loss": 0.5076, "step": 40315 }, { "epoch": 0.8550401900277831, "grad_norm": 0.3507673442363739, "learning_rate": 1.2276317410347542e-05, "loss": 0.5196, "step": 40316 }, { "epoch": 0.855061398485716, "grad_norm": 0.3964022994041443, "learning_rate": 1.2275992668912553e-05, "loss": 0.506, "step": 40317 }, { "epoch": 0.8550826069436491, "grad_norm": 0.38579121232032776, "learning_rate": 1.2275667924946219e-05, "loss": 0.4698, "step": 40318 }, { "epoch": 0.8551038154015822, "grad_norm": 0.3658292293548584, "learning_rate": 1.2275343178448914e-05, "loss": 0.4637, "step": 40319 }, { "epoch": 0.8551250238595152, "grad_norm": 0.37665408849716187, "learning_rate": 1.227501842942099e-05, "loss": 0.4856, "step": 40320 }, { "epoch": 0.8551462323174482, "grad_norm": 0.3976440131664276, "learning_rate": 1.2274693677862811e-05, "loss": 0.4835, "step": 40321 }, { "epoch": 0.8551674407753812, "grad_norm": 0.3393898904323578, "learning_rate": 1.2274368923774735e-05, "loss": 0.4237, "step": 40322 }, { "epoch": 0.8551886492333143, "grad_norm": 0.37816810607910156, "learning_rate": 1.2274044167157126e-05, "loss": 0.4957, "step": 40323 }, { "epoch": 0.8552098576912472, "grad_norm": 0.3669869303703308, "learning_rate": 1.2273719408010347e-05, "loss": 0.5118, "step": 40324 }, { "epoch": 0.8552310661491803, "grad_norm": 0.35594165325164795, "learning_rate": 1.2273394646334755e-05, "loss": 0.529, "step": 40325 }, { "epoch": 0.8552522746071133, "grad_norm": 0.36961856484413147, "learning_rate": 1.2273069882130714e-05, "loss": 0.4931, "step": 40326 }, { "epoch": 0.8552734830650464, "grad_norm": 0.32488366961479187, "learning_rate": 1.2272745115398586e-05, "loss": 0.449, "step": 40327 }, { "epoch": 0.8552946915229793, "grad_norm": 0.4400925934314728, "learning_rate": 1.2272420346138729e-05, "loss": 0.4712, "step": 40328 }, { "epoch": 0.8553158999809124, "grad_norm": 0.46335771679878235, "learning_rate": 1.2272095574351505e-05, "loss": 0.5192, "step": 40329 }, { "epoch": 0.8553371084388454, "grad_norm": 0.3645864725112915, "learning_rate": 1.2271770800037277e-05, "loss": 0.5704, "step": 40330 }, { "epoch": 0.8553583168967784, "grad_norm": 0.4049986004829407, "learning_rate": 1.2271446023196407e-05, "loss": 0.4269, "step": 40331 }, { "epoch": 0.8553795253547115, "grad_norm": 0.47425907850265503, "learning_rate": 1.2271121243829251e-05, "loss": 0.4447, "step": 40332 }, { "epoch": 0.8554007338126445, "grad_norm": 0.3645109236240387, "learning_rate": 1.2270796461936176e-05, "loss": 0.5098, "step": 40333 }, { "epoch": 0.8554219422705776, "grad_norm": 0.36462321877479553, "learning_rate": 1.227047167751754e-05, "loss": 0.4559, "step": 40334 }, { "epoch": 0.8554431507285105, "grad_norm": 0.3455054461956024, "learning_rate": 1.2270146890573706e-05, "loss": 0.5128, "step": 40335 }, { "epoch": 0.8554643591864436, "grad_norm": 0.39647361636161804, "learning_rate": 1.2269822101105035e-05, "loss": 0.5074, "step": 40336 }, { "epoch": 0.8554855676443766, "grad_norm": 0.3606128692626953, "learning_rate": 1.2269497309111886e-05, "loss": 0.4628, "step": 40337 }, { "epoch": 0.8555067761023096, "grad_norm": 0.3259393572807312, "learning_rate": 1.226917251459462e-05, "loss": 0.4485, "step": 40338 }, { "epoch": 0.8555279845602426, "grad_norm": 0.7954513430595398, "learning_rate": 1.2268847717553604e-05, "loss": 0.6175, "step": 40339 }, { "epoch": 0.8555491930181757, "grad_norm": 0.38849735260009766, "learning_rate": 1.2268522917989193e-05, "loss": 0.4789, "step": 40340 }, { "epoch": 0.8555704014761086, "grad_norm": 0.47129812836647034, "learning_rate": 1.2268198115901753e-05, "loss": 0.5467, "step": 40341 }, { "epoch": 0.8555916099340417, "grad_norm": 0.43842563033103943, "learning_rate": 1.226787331129164e-05, "loss": 0.4578, "step": 40342 }, { "epoch": 0.8556128183919747, "grad_norm": 0.38008931279182434, "learning_rate": 1.226754850415922e-05, "loss": 0.525, "step": 40343 }, { "epoch": 0.8556340268499077, "grad_norm": 0.5614256858825684, "learning_rate": 1.226722369450485e-05, "loss": 0.4964, "step": 40344 }, { "epoch": 0.8556552353078408, "grad_norm": 0.34184730052948, "learning_rate": 1.2266898882328895e-05, "loss": 0.4786, "step": 40345 }, { "epoch": 0.8556764437657738, "grad_norm": 0.3540736138820648, "learning_rate": 1.2266574067631714e-05, "loss": 0.4105, "step": 40346 }, { "epoch": 0.8556976522237069, "grad_norm": 0.5519212484359741, "learning_rate": 1.2266249250413668e-05, "loss": 0.525, "step": 40347 }, { "epoch": 0.8557188606816398, "grad_norm": 0.3399289548397064, "learning_rate": 1.2265924430675123e-05, "loss": 0.5186, "step": 40348 }, { "epoch": 0.8557400691395729, "grad_norm": 0.40267637372016907, "learning_rate": 1.2265599608416431e-05, "loss": 0.5033, "step": 40349 }, { "epoch": 0.8557612775975059, "grad_norm": 0.38681042194366455, "learning_rate": 1.2265274783637964e-05, "loss": 0.4837, "step": 40350 }, { "epoch": 0.8557824860554389, "grad_norm": 0.3888774812221527, "learning_rate": 1.2264949956340075e-05, "loss": 0.5018, "step": 40351 }, { "epoch": 0.8558036945133719, "grad_norm": 0.3138931393623352, "learning_rate": 1.2264625126523129e-05, "loss": 0.4394, "step": 40352 }, { "epoch": 0.855824902971305, "grad_norm": 0.5485257506370544, "learning_rate": 1.2264300294187487e-05, "loss": 0.4541, "step": 40353 }, { "epoch": 0.8558461114292379, "grad_norm": 0.3889940679073334, "learning_rate": 1.226397545933351e-05, "loss": 0.4403, "step": 40354 }, { "epoch": 0.855867319887171, "grad_norm": 0.3360103964805603, "learning_rate": 1.226365062196156e-05, "loss": 0.4347, "step": 40355 }, { "epoch": 0.855888528345104, "grad_norm": 0.39813756942749023, "learning_rate": 1.2263325782071995e-05, "loss": 0.4689, "step": 40356 }, { "epoch": 0.8559097368030371, "grad_norm": 0.3601028323173523, "learning_rate": 1.2263000939665181e-05, "loss": 0.4613, "step": 40357 }, { "epoch": 0.85593094526097, "grad_norm": 0.3304753601551056, "learning_rate": 1.2262676094741477e-05, "loss": 0.4456, "step": 40358 }, { "epoch": 0.8559521537189031, "grad_norm": 0.41044750809669495, "learning_rate": 1.2262351247301243e-05, "loss": 0.462, "step": 40359 }, { "epoch": 0.8559733621768362, "grad_norm": 0.3617939054965973, "learning_rate": 1.2262026397344841e-05, "loss": 0.5025, "step": 40360 }, { "epoch": 0.8559945706347691, "grad_norm": 0.3672042787075043, "learning_rate": 1.2261701544872635e-05, "loss": 0.4882, "step": 40361 }, { "epoch": 0.8560157790927022, "grad_norm": 0.3568142056465149, "learning_rate": 1.2261376689884983e-05, "loss": 0.4382, "step": 40362 }, { "epoch": 0.8560369875506352, "grad_norm": 0.3505220413208008, "learning_rate": 1.2261051832382246e-05, "loss": 0.435, "step": 40363 }, { "epoch": 0.8560581960085683, "grad_norm": 0.6343216896057129, "learning_rate": 1.226072697236479e-05, "loss": 0.4572, "step": 40364 }, { "epoch": 0.8560794044665012, "grad_norm": 0.32590633630752563, "learning_rate": 1.2260402109832974e-05, "loss": 0.4615, "step": 40365 }, { "epoch": 0.8561006129244343, "grad_norm": 0.3111949563026428, "learning_rate": 1.2260077244787153e-05, "loss": 0.4026, "step": 40366 }, { "epoch": 0.8561218213823673, "grad_norm": 0.3577355146408081, "learning_rate": 1.2259752377227698e-05, "loss": 0.4318, "step": 40367 }, { "epoch": 0.8561430298403003, "grad_norm": 0.38449016213417053, "learning_rate": 1.2259427507154964e-05, "loss": 0.4777, "step": 40368 }, { "epoch": 0.8561642382982333, "grad_norm": 0.345741331577301, "learning_rate": 1.2259102634569313e-05, "loss": 0.5141, "step": 40369 }, { "epoch": 0.8561854467561664, "grad_norm": 0.3486301302909851, "learning_rate": 1.2258777759471112e-05, "loss": 0.4564, "step": 40370 }, { "epoch": 0.8562066552140993, "grad_norm": 0.37327298521995544, "learning_rate": 1.2258452881860716e-05, "loss": 0.6118, "step": 40371 }, { "epoch": 0.8562278636720324, "grad_norm": 0.3639090359210968, "learning_rate": 1.2258128001738488e-05, "loss": 0.539, "step": 40372 }, { "epoch": 0.8562490721299655, "grad_norm": 0.4154665470123291, "learning_rate": 1.225780311910479e-05, "loss": 0.5171, "step": 40373 }, { "epoch": 0.8562702805878984, "grad_norm": 0.3428134620189667, "learning_rate": 1.2257478233959982e-05, "loss": 0.5562, "step": 40374 }, { "epoch": 0.8562914890458315, "grad_norm": 0.34976258873939514, "learning_rate": 1.2257153346304426e-05, "loss": 0.3529, "step": 40375 }, { "epoch": 0.8563126975037645, "grad_norm": 0.3556205928325653, "learning_rate": 1.2256828456138485e-05, "loss": 0.5104, "step": 40376 }, { "epoch": 0.8563339059616976, "grad_norm": 0.397203654050827, "learning_rate": 1.2256503563462519e-05, "loss": 0.5099, "step": 40377 }, { "epoch": 0.8563551144196305, "grad_norm": 0.37584036588668823, "learning_rate": 1.225617866827689e-05, "loss": 0.4558, "step": 40378 }, { "epoch": 0.8563763228775636, "grad_norm": 0.5174495577812195, "learning_rate": 1.2255853770581958e-05, "loss": 0.5195, "step": 40379 }, { "epoch": 0.8563975313354966, "grad_norm": 0.39803725481033325, "learning_rate": 1.2255528870378085e-05, "loss": 0.5044, "step": 40380 }, { "epoch": 0.8564187397934296, "grad_norm": 0.37168845534324646, "learning_rate": 1.2255203967665632e-05, "loss": 0.4757, "step": 40381 }, { "epoch": 0.8564399482513626, "grad_norm": 0.39735880494117737, "learning_rate": 1.225487906244496e-05, "loss": 0.5141, "step": 40382 }, { "epoch": 0.8564611567092957, "grad_norm": 0.33942994475364685, "learning_rate": 1.2254554154716431e-05, "loss": 0.4996, "step": 40383 }, { "epoch": 0.8564823651672286, "grad_norm": 0.3496294915676117, "learning_rate": 1.2254229244480408e-05, "loss": 0.4396, "step": 40384 }, { "epoch": 0.8565035736251617, "grad_norm": 0.36906805634498596, "learning_rate": 1.2253904331737253e-05, "loss": 0.5079, "step": 40385 }, { "epoch": 0.8565247820830948, "grad_norm": 0.33996647596359253, "learning_rate": 1.225357941648732e-05, "loss": 0.4299, "step": 40386 }, { "epoch": 0.8565459905410278, "grad_norm": 0.3682402968406677, "learning_rate": 1.225325449873098e-05, "loss": 0.4591, "step": 40387 }, { "epoch": 0.8565671989989608, "grad_norm": 0.38600054383277893, "learning_rate": 1.2252929578468587e-05, "loss": 0.4995, "step": 40388 }, { "epoch": 0.8565884074568938, "grad_norm": 0.33291539549827576, "learning_rate": 1.2252604655700505e-05, "loss": 0.4292, "step": 40389 }, { "epoch": 0.8566096159148269, "grad_norm": 0.4158478379249573, "learning_rate": 1.2252279730427097e-05, "loss": 0.4928, "step": 40390 }, { "epoch": 0.8566308243727598, "grad_norm": 0.36946988105773926, "learning_rate": 1.2251954802648722e-05, "loss": 0.5025, "step": 40391 }, { "epoch": 0.8566520328306929, "grad_norm": 0.36659377813339233, "learning_rate": 1.2251629872365745e-05, "loss": 0.5127, "step": 40392 }, { "epoch": 0.8566732412886259, "grad_norm": 0.3891003727912903, "learning_rate": 1.2251304939578524e-05, "loss": 0.5259, "step": 40393 }, { "epoch": 0.856694449746559, "grad_norm": 0.3714480996131897, "learning_rate": 1.2250980004287419e-05, "loss": 0.4458, "step": 40394 }, { "epoch": 0.8567156582044919, "grad_norm": 0.39591649174690247, "learning_rate": 1.2250655066492794e-05, "loss": 0.4711, "step": 40395 }, { "epoch": 0.856736866662425, "grad_norm": 0.3401886820793152, "learning_rate": 1.2250330126195011e-05, "loss": 0.415, "step": 40396 }, { "epoch": 0.856758075120358, "grad_norm": 0.3888690173625946, "learning_rate": 1.225000518339443e-05, "loss": 0.3972, "step": 40397 }, { "epoch": 0.856779283578291, "grad_norm": 0.38092944025993347, "learning_rate": 1.2249680238091415e-05, "loss": 0.4918, "step": 40398 }, { "epoch": 0.856800492036224, "grad_norm": 0.8015326261520386, "learning_rate": 1.2249355290286324e-05, "loss": 0.529, "step": 40399 }, { "epoch": 0.8568217004941571, "grad_norm": 0.3998960256576538, "learning_rate": 1.2249030339979517e-05, "loss": 0.533, "step": 40400 }, { "epoch": 0.8568429089520901, "grad_norm": 0.5230826139450073, "learning_rate": 1.224870538717136e-05, "loss": 0.4894, "step": 40401 }, { "epoch": 0.8568641174100231, "grad_norm": 0.3717437982559204, "learning_rate": 1.2248380431862213e-05, "loss": 0.4633, "step": 40402 }, { "epoch": 0.8568853258679562, "grad_norm": 0.3891046345233917, "learning_rate": 1.2248055474052434e-05, "loss": 0.5217, "step": 40403 }, { "epoch": 0.8569065343258891, "grad_norm": 0.3320191502571106, "learning_rate": 1.2247730513742391e-05, "loss": 0.4832, "step": 40404 }, { "epoch": 0.8569277427838222, "grad_norm": 0.36634406447410583, "learning_rate": 1.2247405550932439e-05, "loss": 0.5284, "step": 40405 }, { "epoch": 0.8569489512417552, "grad_norm": 0.35134217143058777, "learning_rate": 1.2247080585622944e-05, "loss": 0.4706, "step": 40406 }, { "epoch": 0.8569701596996883, "grad_norm": 0.34286120533943176, "learning_rate": 1.2246755617814263e-05, "loss": 0.4791, "step": 40407 }, { "epoch": 0.8569913681576212, "grad_norm": 0.33674150705337524, "learning_rate": 1.2246430647506761e-05, "loss": 0.4944, "step": 40408 }, { "epoch": 0.8570125766155543, "grad_norm": 0.4107431173324585, "learning_rate": 1.2246105674700799e-05, "loss": 0.4805, "step": 40409 }, { "epoch": 0.8570337850734873, "grad_norm": 0.3849036395549774, "learning_rate": 1.2245780699396738e-05, "loss": 0.4533, "step": 40410 }, { "epoch": 0.8570549935314203, "grad_norm": 0.34670618176460266, "learning_rate": 1.2245455721594939e-05, "loss": 0.4364, "step": 40411 }, { "epoch": 0.8570762019893533, "grad_norm": 0.40209925174713135, "learning_rate": 1.2245130741295766e-05, "loss": 0.4906, "step": 40412 }, { "epoch": 0.8570974104472864, "grad_norm": 0.47622817754745483, "learning_rate": 1.2244805758499577e-05, "loss": 0.5652, "step": 40413 }, { "epoch": 0.8571186189052195, "grad_norm": 0.44185712933540344, "learning_rate": 1.2244480773206728e-05, "loss": 0.4646, "step": 40414 }, { "epoch": 0.8571398273631524, "grad_norm": 0.45121046900749207, "learning_rate": 1.2244155785417593e-05, "loss": 0.5327, "step": 40415 }, { "epoch": 0.8571610358210855, "grad_norm": 0.6295198798179626, "learning_rate": 1.2243830795132528e-05, "loss": 0.4609, "step": 40416 }, { "epoch": 0.8571822442790185, "grad_norm": 0.42794710397720337, "learning_rate": 1.224350580235189e-05, "loss": 0.4511, "step": 40417 }, { "epoch": 0.8572034527369515, "grad_norm": 0.3596939146518707, "learning_rate": 1.2243180807076049e-05, "loss": 0.4968, "step": 40418 }, { "epoch": 0.8572246611948845, "grad_norm": 0.3635871708393097, "learning_rate": 1.2242855809305359e-05, "loss": 0.3952, "step": 40419 }, { "epoch": 0.8572458696528176, "grad_norm": 0.3214026093482971, "learning_rate": 1.2242530809040183e-05, "loss": 0.4505, "step": 40420 }, { "epoch": 0.8572670781107505, "grad_norm": 0.3889800012111664, "learning_rate": 1.2242205806280888e-05, "loss": 0.442, "step": 40421 }, { "epoch": 0.8572882865686836, "grad_norm": 0.35156556963920593, "learning_rate": 1.2241880801027826e-05, "loss": 0.421, "step": 40422 }, { "epoch": 0.8573094950266166, "grad_norm": 0.3361671268939972, "learning_rate": 1.2241555793281368e-05, "loss": 0.4654, "step": 40423 }, { "epoch": 0.8573307034845496, "grad_norm": 0.3422347605228424, "learning_rate": 1.224123078304187e-05, "loss": 0.4339, "step": 40424 }, { "epoch": 0.8573519119424826, "grad_norm": 0.38096243143081665, "learning_rate": 1.2240905770309693e-05, "loss": 0.4938, "step": 40425 }, { "epoch": 0.8573731204004157, "grad_norm": 0.3108328580856323, "learning_rate": 1.2240580755085203e-05, "loss": 0.5124, "step": 40426 }, { "epoch": 0.8573943288583488, "grad_norm": 0.3367782235145569, "learning_rate": 1.2240255737368756e-05, "loss": 0.4485, "step": 40427 }, { "epoch": 0.8574155373162817, "grad_norm": 1.5763992071151733, "learning_rate": 1.2239930717160716e-05, "loss": 0.5277, "step": 40428 }, { "epoch": 0.8574367457742148, "grad_norm": 0.37107187509536743, "learning_rate": 1.2239605694461445e-05, "loss": 0.5223, "step": 40429 }, { "epoch": 0.8574579542321478, "grad_norm": 0.4240385890007019, "learning_rate": 1.2239280669271305e-05, "loss": 0.5045, "step": 40430 }, { "epoch": 0.8574791626900808, "grad_norm": 0.3892725706100464, "learning_rate": 1.2238955641590655e-05, "loss": 0.487, "step": 40431 }, { "epoch": 0.8575003711480138, "grad_norm": 0.336186945438385, "learning_rate": 1.223863061141986e-05, "loss": 0.5064, "step": 40432 }, { "epoch": 0.8575215796059469, "grad_norm": 0.3415803611278534, "learning_rate": 1.2238305578759276e-05, "loss": 0.4476, "step": 40433 }, { "epoch": 0.8575427880638798, "grad_norm": 0.5619575381278992, "learning_rate": 1.223798054360927e-05, "loss": 0.5676, "step": 40434 }, { "epoch": 0.8575639965218129, "grad_norm": 0.4007599353790283, "learning_rate": 1.2237655505970203e-05, "loss": 0.6504, "step": 40435 }, { "epoch": 0.8575852049797459, "grad_norm": 0.33616575598716736, "learning_rate": 1.2237330465842431e-05, "loss": 0.5012, "step": 40436 }, { "epoch": 0.857606413437679, "grad_norm": 0.3892259895801544, "learning_rate": 1.2237005423226323e-05, "loss": 0.5239, "step": 40437 }, { "epoch": 0.8576276218956119, "grad_norm": 0.4517272114753723, "learning_rate": 1.2236680378122236e-05, "loss": 0.5135, "step": 40438 }, { "epoch": 0.857648830353545, "grad_norm": 0.3618279993534088, "learning_rate": 1.2236355330530534e-05, "loss": 0.5014, "step": 40439 }, { "epoch": 0.857670038811478, "grad_norm": 0.38178616762161255, "learning_rate": 1.2236030280451574e-05, "loss": 0.522, "step": 40440 }, { "epoch": 0.857691247269411, "grad_norm": 0.3584062159061432, "learning_rate": 1.2235705227885723e-05, "loss": 0.5088, "step": 40441 }, { "epoch": 0.8577124557273441, "grad_norm": 0.3735949993133545, "learning_rate": 1.2235380172833338e-05, "loss": 0.4932, "step": 40442 }, { "epoch": 0.8577336641852771, "grad_norm": 0.39928701519966125, "learning_rate": 1.2235055115294785e-05, "loss": 0.5658, "step": 40443 }, { "epoch": 0.8577548726432102, "grad_norm": 0.3472208082675934, "learning_rate": 1.2234730055270423e-05, "loss": 0.481, "step": 40444 }, { "epoch": 0.8577760811011431, "grad_norm": 0.3993307948112488, "learning_rate": 1.223440499276061e-05, "loss": 0.5671, "step": 40445 }, { "epoch": 0.8577972895590762, "grad_norm": 0.3573227524757385, "learning_rate": 1.2234079927765714e-05, "loss": 0.4879, "step": 40446 }, { "epoch": 0.8578184980170092, "grad_norm": 0.3466792702674866, "learning_rate": 1.2233754860286096e-05, "loss": 0.4803, "step": 40447 }, { "epoch": 0.8578397064749422, "grad_norm": 0.32342979311943054, "learning_rate": 1.2233429790322111e-05, "loss": 0.4887, "step": 40448 }, { "epoch": 0.8578609149328752, "grad_norm": 0.3461590111255646, "learning_rate": 1.2233104717874127e-05, "loss": 0.467, "step": 40449 }, { "epoch": 0.8578821233908083, "grad_norm": 0.3387519121170044, "learning_rate": 1.2232779642942503e-05, "loss": 0.4535, "step": 40450 }, { "epoch": 0.8579033318487412, "grad_norm": 0.36175432801246643, "learning_rate": 1.2232454565527601e-05, "loss": 0.4924, "step": 40451 }, { "epoch": 0.8579245403066743, "grad_norm": 0.36900582909584045, "learning_rate": 1.223212948562978e-05, "loss": 0.3959, "step": 40452 }, { "epoch": 0.8579457487646073, "grad_norm": 0.43101316690444946, "learning_rate": 1.2231804403249407e-05, "loss": 0.4718, "step": 40453 }, { "epoch": 0.8579669572225403, "grad_norm": 0.3582055866718292, "learning_rate": 1.223147931838684e-05, "loss": 0.4692, "step": 40454 }, { "epoch": 0.8579881656804734, "grad_norm": 0.40047886967658997, "learning_rate": 1.2231154231042441e-05, "loss": 0.5723, "step": 40455 }, { "epoch": 0.8580093741384064, "grad_norm": 0.3390650153160095, "learning_rate": 1.2230829141216572e-05, "loss": 0.4747, "step": 40456 }, { "epoch": 0.8580305825963395, "grad_norm": 0.3783021867275238, "learning_rate": 1.2230504048909593e-05, "loss": 0.4991, "step": 40457 }, { "epoch": 0.8580517910542724, "grad_norm": 0.3751755952835083, "learning_rate": 1.2230178954121868e-05, "loss": 0.4685, "step": 40458 }, { "epoch": 0.8580729995122055, "grad_norm": 0.40951019525527954, "learning_rate": 1.2229853856853755e-05, "loss": 0.5564, "step": 40459 }, { "epoch": 0.8580942079701385, "grad_norm": 0.3806573450565338, "learning_rate": 1.222952875710562e-05, "loss": 0.5087, "step": 40460 }, { "epoch": 0.8581154164280715, "grad_norm": 0.41282859444618225, "learning_rate": 1.2229203654877824e-05, "loss": 0.5222, "step": 40461 }, { "epoch": 0.8581366248860045, "grad_norm": 0.3944445550441742, "learning_rate": 1.2228878550170724e-05, "loss": 0.4833, "step": 40462 }, { "epoch": 0.8581578333439376, "grad_norm": 0.3666742146015167, "learning_rate": 1.2228553442984686e-05, "loss": 0.4942, "step": 40463 }, { "epoch": 0.8581790418018705, "grad_norm": 0.36879608035087585, "learning_rate": 1.222822833332007e-05, "loss": 0.437, "step": 40464 }, { "epoch": 0.8582002502598036, "grad_norm": 0.38580241799354553, "learning_rate": 1.2227903221177235e-05, "loss": 0.4073, "step": 40465 }, { "epoch": 0.8582214587177366, "grad_norm": 0.4552901089191437, "learning_rate": 1.222757810655655e-05, "loss": 0.5136, "step": 40466 }, { "epoch": 0.8582426671756697, "grad_norm": 0.3542511761188507, "learning_rate": 1.2227252989458372e-05, "loss": 0.4308, "step": 40467 }, { "epoch": 0.8582638756336027, "grad_norm": 0.402951717376709, "learning_rate": 1.2226927869883059e-05, "loss": 0.4638, "step": 40468 }, { "epoch": 0.8582850840915357, "grad_norm": 0.38054341077804565, "learning_rate": 1.222660274783098e-05, "loss": 0.424, "step": 40469 }, { "epoch": 0.8583062925494688, "grad_norm": 0.3988049328327179, "learning_rate": 1.222627762330249e-05, "loss": 0.4831, "step": 40470 }, { "epoch": 0.8583275010074017, "grad_norm": 0.3732846975326538, "learning_rate": 1.2225952496297952e-05, "loss": 0.5005, "step": 40471 }, { "epoch": 0.8583487094653348, "grad_norm": 0.3824879825115204, "learning_rate": 1.2225627366817732e-05, "loss": 0.5394, "step": 40472 }, { "epoch": 0.8583699179232678, "grad_norm": 0.3570750653743744, "learning_rate": 1.2225302234862187e-05, "loss": 0.4907, "step": 40473 }, { "epoch": 0.8583911263812009, "grad_norm": 0.5334427356719971, "learning_rate": 1.2224977100431681e-05, "loss": 0.5541, "step": 40474 }, { "epoch": 0.8584123348391338, "grad_norm": 0.33429795503616333, "learning_rate": 1.2224651963526575e-05, "loss": 0.4997, "step": 40475 }, { "epoch": 0.8584335432970669, "grad_norm": 0.3581264615058899, "learning_rate": 1.222432682414723e-05, "loss": 0.4683, "step": 40476 }, { "epoch": 0.8584547517549999, "grad_norm": 0.31661197543144226, "learning_rate": 1.2224001682294009e-05, "loss": 0.4619, "step": 40477 }, { "epoch": 0.8584759602129329, "grad_norm": 0.40935733914375305, "learning_rate": 1.2223676537967271e-05, "loss": 0.5014, "step": 40478 }, { "epoch": 0.8584971686708659, "grad_norm": 0.4256855845451355, "learning_rate": 1.2223351391167379e-05, "loss": 0.49, "step": 40479 }, { "epoch": 0.858518377128799, "grad_norm": 0.358651727437973, "learning_rate": 1.2223026241894696e-05, "loss": 0.4304, "step": 40480 }, { "epoch": 0.8585395855867319, "grad_norm": 0.37694990634918213, "learning_rate": 1.2222701090149585e-05, "loss": 0.4781, "step": 40481 }, { "epoch": 0.858560794044665, "grad_norm": 0.3573671579360962, "learning_rate": 1.2222375935932401e-05, "loss": 0.5084, "step": 40482 }, { "epoch": 0.8585820025025981, "grad_norm": 0.6689243316650391, "learning_rate": 1.2222050779243512e-05, "loss": 0.532, "step": 40483 }, { "epoch": 0.858603210960531, "grad_norm": 0.3290403485298157, "learning_rate": 1.2221725620083276e-05, "loss": 0.4919, "step": 40484 }, { "epoch": 0.8586244194184641, "grad_norm": 0.34129995107650757, "learning_rate": 1.2221400458452056e-05, "loss": 0.4885, "step": 40485 }, { "epoch": 0.8586456278763971, "grad_norm": 0.35057932138442993, "learning_rate": 1.2221075294350216e-05, "loss": 0.5467, "step": 40486 }, { "epoch": 0.8586668363343302, "grad_norm": 0.35242754220962524, "learning_rate": 1.2220750127778112e-05, "loss": 0.4373, "step": 40487 }, { "epoch": 0.8586880447922631, "grad_norm": 0.38384532928466797, "learning_rate": 1.2220424958736113e-05, "loss": 0.4457, "step": 40488 }, { "epoch": 0.8587092532501962, "grad_norm": 0.35899618268013, "learning_rate": 1.2220099787224573e-05, "loss": 0.5358, "step": 40489 }, { "epoch": 0.8587304617081292, "grad_norm": 2.004638671875, "learning_rate": 1.2219774613243858e-05, "loss": 0.5607, "step": 40490 }, { "epoch": 0.8587516701660622, "grad_norm": 0.3605746924877167, "learning_rate": 1.2219449436794331e-05, "loss": 0.5025, "step": 40491 }, { "epoch": 0.8587728786239952, "grad_norm": 0.38064154982566833, "learning_rate": 1.2219124257876349e-05, "loss": 0.4486, "step": 40492 }, { "epoch": 0.8587940870819283, "grad_norm": 0.43081703782081604, "learning_rate": 1.2218799076490278e-05, "loss": 0.5051, "step": 40493 }, { "epoch": 0.8588152955398612, "grad_norm": 0.3645123839378357, "learning_rate": 1.2218473892636477e-05, "loss": 0.4773, "step": 40494 }, { "epoch": 0.8588365039977943, "grad_norm": 0.32659366726875305, "learning_rate": 1.2218148706315312e-05, "loss": 0.4975, "step": 40495 }, { "epoch": 0.8588577124557274, "grad_norm": 0.37883278727531433, "learning_rate": 1.2217823517527137e-05, "loss": 0.4526, "step": 40496 }, { "epoch": 0.8588789209136604, "grad_norm": 0.4111822545528412, "learning_rate": 1.221749832627232e-05, "loss": 0.5185, "step": 40497 }, { "epoch": 0.8589001293715934, "grad_norm": 0.37588393688201904, "learning_rate": 1.2217173132551219e-05, "loss": 0.469, "step": 40498 }, { "epoch": 0.8589213378295264, "grad_norm": 0.3405784070491791, "learning_rate": 1.2216847936364198e-05, "loss": 0.3883, "step": 40499 }, { "epoch": 0.8589425462874595, "grad_norm": 0.35473892092704773, "learning_rate": 1.221652273771162e-05, "loss": 0.4793, "step": 40500 }, { "epoch": 0.8589637547453924, "grad_norm": 0.342296302318573, "learning_rate": 1.221619753659384e-05, "loss": 0.5168, "step": 40501 }, { "epoch": 0.8589849632033255, "grad_norm": 0.33235403895378113, "learning_rate": 1.221587233301123e-05, "loss": 0.425, "step": 40502 }, { "epoch": 0.8590061716612585, "grad_norm": 0.42152491211891174, "learning_rate": 1.2215547126964144e-05, "loss": 0.4316, "step": 40503 }, { "epoch": 0.8590273801191916, "grad_norm": 0.3721671998500824, "learning_rate": 1.2215221918452941e-05, "loss": 0.4981, "step": 40504 }, { "epoch": 0.8590485885771245, "grad_norm": 0.346184104681015, "learning_rate": 1.2214896707477994e-05, "loss": 0.474, "step": 40505 }, { "epoch": 0.8590697970350576, "grad_norm": 0.3441511392593384, "learning_rate": 1.2214571494039658e-05, "loss": 0.5339, "step": 40506 }, { "epoch": 0.8590910054929906, "grad_norm": 0.4042871594429016, "learning_rate": 1.2214246278138291e-05, "loss": 0.5176, "step": 40507 }, { "epoch": 0.8591122139509236, "grad_norm": 0.404281347990036, "learning_rate": 1.221392105977426e-05, "loss": 0.5416, "step": 40508 }, { "epoch": 0.8591334224088567, "grad_norm": 0.33987805247306824, "learning_rate": 1.2213595838947927e-05, "loss": 0.4722, "step": 40509 }, { "epoch": 0.8591546308667897, "grad_norm": 0.3583903908729553, "learning_rate": 1.2213270615659647e-05, "loss": 0.4857, "step": 40510 }, { "epoch": 0.8591758393247227, "grad_norm": 0.3377227485179901, "learning_rate": 1.2212945389909792e-05, "loss": 0.4649, "step": 40511 }, { "epoch": 0.8591970477826557, "grad_norm": 0.35525721311569214, "learning_rate": 1.2212620161698715e-05, "loss": 0.5175, "step": 40512 }, { "epoch": 0.8592182562405888, "grad_norm": 0.3848992586135864, "learning_rate": 1.2212294931026783e-05, "loss": 0.5202, "step": 40513 }, { "epoch": 0.8592394646985217, "grad_norm": 0.3654464781284332, "learning_rate": 1.2211969697894355e-05, "loss": 0.5081, "step": 40514 }, { "epoch": 0.8592606731564548, "grad_norm": 0.379922091960907, "learning_rate": 1.2211644462301793e-05, "loss": 0.4789, "step": 40515 }, { "epoch": 0.8592818816143878, "grad_norm": 0.32743945717811584, "learning_rate": 1.2211319224249459e-05, "loss": 0.481, "step": 40516 }, { "epoch": 0.8593030900723209, "grad_norm": 0.41638505458831787, "learning_rate": 1.2210993983737719e-05, "loss": 0.5251, "step": 40517 }, { "epoch": 0.8593242985302538, "grad_norm": 0.6073370575904846, "learning_rate": 1.2210668740766925e-05, "loss": 0.463, "step": 40518 }, { "epoch": 0.8593455069881869, "grad_norm": 0.38233551383018494, "learning_rate": 1.2210343495337447e-05, "loss": 0.5548, "step": 40519 }, { "epoch": 0.8593667154461199, "grad_norm": 0.36775290966033936, "learning_rate": 1.2210018247449646e-05, "loss": 0.4282, "step": 40520 }, { "epoch": 0.8593879239040529, "grad_norm": 0.36739012598991394, "learning_rate": 1.220969299710388e-05, "loss": 0.5885, "step": 40521 }, { "epoch": 0.8594091323619859, "grad_norm": 0.3834519386291504, "learning_rate": 1.2209367744300514e-05, "loss": 0.4144, "step": 40522 }, { "epoch": 0.859430340819919, "grad_norm": 0.368639200925827, "learning_rate": 1.2209042489039904e-05, "loss": 0.5198, "step": 40523 }, { "epoch": 0.8594515492778521, "grad_norm": 0.37059473991394043, "learning_rate": 1.2208717231322418e-05, "loss": 0.4297, "step": 40524 }, { "epoch": 0.859472757735785, "grad_norm": 0.40231460332870483, "learning_rate": 1.220839197114842e-05, "loss": 0.4563, "step": 40525 }, { "epoch": 0.8594939661937181, "grad_norm": 0.362490177154541, "learning_rate": 1.2208066708518264e-05, "loss": 0.4687, "step": 40526 }, { "epoch": 0.8595151746516511, "grad_norm": 0.33408358693122864, "learning_rate": 1.2207741443432314e-05, "loss": 0.4028, "step": 40527 }, { "epoch": 0.8595363831095841, "grad_norm": 0.3053273558616638, "learning_rate": 1.2207416175890938e-05, "loss": 0.3815, "step": 40528 }, { "epoch": 0.8595575915675171, "grad_norm": 0.33587461709976196, "learning_rate": 1.220709090589449e-05, "loss": 0.425, "step": 40529 }, { "epoch": 0.8595788000254502, "grad_norm": 0.34131473302841187, "learning_rate": 1.2206765633443334e-05, "loss": 0.54, "step": 40530 }, { "epoch": 0.8596000084833831, "grad_norm": 0.37661513686180115, "learning_rate": 1.2206440358537832e-05, "loss": 0.5239, "step": 40531 }, { "epoch": 0.8596212169413162, "grad_norm": 0.33955222368240356, "learning_rate": 1.2206115081178346e-05, "loss": 0.5214, "step": 40532 }, { "epoch": 0.8596424253992492, "grad_norm": 0.3962257504463196, "learning_rate": 1.2205789801365241e-05, "loss": 0.5054, "step": 40533 }, { "epoch": 0.8596636338571823, "grad_norm": 0.36106541752815247, "learning_rate": 1.2205464519098875e-05, "loss": 0.5105, "step": 40534 }, { "epoch": 0.8596848423151152, "grad_norm": 0.35885608196258545, "learning_rate": 1.220513923437961e-05, "loss": 0.5053, "step": 40535 }, { "epoch": 0.8597060507730483, "grad_norm": 0.38863131403923035, "learning_rate": 1.2204813947207808e-05, "loss": 0.5431, "step": 40536 }, { "epoch": 0.8597272592309814, "grad_norm": 0.37917256355285645, "learning_rate": 1.220448865758383e-05, "loss": 0.4824, "step": 40537 }, { "epoch": 0.8597484676889143, "grad_norm": 0.38049933314323425, "learning_rate": 1.220416336550804e-05, "loss": 0.4914, "step": 40538 }, { "epoch": 0.8597696761468474, "grad_norm": 0.45401445031166077, "learning_rate": 1.22038380709808e-05, "loss": 0.4268, "step": 40539 }, { "epoch": 0.8597908846047804, "grad_norm": 0.3665259778499603, "learning_rate": 1.220351277400247e-05, "loss": 0.4609, "step": 40540 }, { "epoch": 0.8598120930627134, "grad_norm": 0.3429635465145111, "learning_rate": 1.220318747457341e-05, "loss": 0.5652, "step": 40541 }, { "epoch": 0.8598333015206464, "grad_norm": 0.9118282198905945, "learning_rate": 1.2202862172693983e-05, "loss": 0.5076, "step": 40542 }, { "epoch": 0.8598545099785795, "grad_norm": 0.3539431095123291, "learning_rate": 1.2202536868364556e-05, "loss": 0.4733, "step": 40543 }, { "epoch": 0.8598757184365124, "grad_norm": 0.6950374841690063, "learning_rate": 1.2202211561585483e-05, "loss": 0.4684, "step": 40544 }, { "epoch": 0.8598969268944455, "grad_norm": 0.327511191368103, "learning_rate": 1.2201886252357133e-05, "loss": 0.3947, "step": 40545 }, { "epoch": 0.8599181353523785, "grad_norm": 0.34169328212738037, "learning_rate": 1.2201560940679862e-05, "loss": 0.4858, "step": 40546 }, { "epoch": 0.8599393438103116, "grad_norm": 0.3915421664714813, "learning_rate": 1.2201235626554035e-05, "loss": 0.5312, "step": 40547 }, { "epoch": 0.8599605522682445, "grad_norm": 0.3877007067203522, "learning_rate": 1.2200910309980012e-05, "loss": 0.5002, "step": 40548 }, { "epoch": 0.8599817607261776, "grad_norm": 0.34483620524406433, "learning_rate": 1.2200584990958157e-05, "loss": 0.4551, "step": 40549 }, { "epoch": 0.8600029691841107, "grad_norm": 0.4418887794017792, "learning_rate": 1.2200259669488828e-05, "loss": 0.4883, "step": 40550 }, { "epoch": 0.8600241776420436, "grad_norm": 0.42976489663124084, "learning_rate": 1.2199934345572393e-05, "loss": 0.4337, "step": 40551 }, { "epoch": 0.8600453860999767, "grad_norm": 0.39202946424484253, "learning_rate": 1.2199609019209207e-05, "loss": 0.5024, "step": 40552 }, { "epoch": 0.8600665945579097, "grad_norm": 0.34013116359710693, "learning_rate": 1.2199283690399635e-05, "loss": 0.5421, "step": 40553 }, { "epoch": 0.8600878030158428, "grad_norm": 0.34593087434768677, "learning_rate": 1.2198958359144041e-05, "loss": 0.5284, "step": 40554 }, { "epoch": 0.8601090114737757, "grad_norm": 0.3264097571372986, "learning_rate": 1.2198633025442782e-05, "loss": 0.4164, "step": 40555 }, { "epoch": 0.8601302199317088, "grad_norm": 0.32216641306877136, "learning_rate": 1.2198307689296223e-05, "loss": 0.3748, "step": 40556 }, { "epoch": 0.8601514283896418, "grad_norm": 0.38437893986701965, "learning_rate": 1.2197982350704727e-05, "loss": 0.4744, "step": 40557 }, { "epoch": 0.8601726368475748, "grad_norm": 0.3557521104812622, "learning_rate": 1.2197657009668653e-05, "loss": 0.5864, "step": 40558 }, { "epoch": 0.8601938453055078, "grad_norm": 0.3530529737472534, "learning_rate": 1.2197331666188366e-05, "loss": 0.4265, "step": 40559 }, { "epoch": 0.8602150537634409, "grad_norm": 0.39652422070503235, "learning_rate": 1.2197006320264221e-05, "loss": 0.5121, "step": 40560 }, { "epoch": 0.8602362622213738, "grad_norm": 0.3422226011753082, "learning_rate": 1.2196680971896586e-05, "loss": 0.5054, "step": 40561 }, { "epoch": 0.8602574706793069, "grad_norm": 0.39106640219688416, "learning_rate": 1.2196355621085825e-05, "loss": 0.4694, "step": 40562 }, { "epoch": 0.8602786791372399, "grad_norm": 0.3943229019641876, "learning_rate": 1.2196030267832298e-05, "loss": 0.5127, "step": 40563 }, { "epoch": 0.860299887595173, "grad_norm": 0.5964352488517761, "learning_rate": 1.2195704912136359e-05, "loss": 0.547, "step": 40564 }, { "epoch": 0.860321096053106, "grad_norm": 0.3359511196613312, "learning_rate": 1.2195379553998378e-05, "loss": 0.4532, "step": 40565 }, { "epoch": 0.860342304511039, "grad_norm": 0.39742669463157654, "learning_rate": 1.2195054193418717e-05, "loss": 0.5127, "step": 40566 }, { "epoch": 0.8603635129689721, "grad_norm": 0.3960902690887451, "learning_rate": 1.2194728830397735e-05, "loss": 0.5294, "step": 40567 }, { "epoch": 0.860384721426905, "grad_norm": 0.3599247634410858, "learning_rate": 1.2194403464935793e-05, "loss": 0.489, "step": 40568 }, { "epoch": 0.8604059298848381, "grad_norm": 0.3531956970691681, "learning_rate": 1.2194078097033256e-05, "loss": 0.5103, "step": 40569 }, { "epoch": 0.8604271383427711, "grad_norm": 0.34315648674964905, "learning_rate": 1.2193752726690484e-05, "loss": 0.4603, "step": 40570 }, { "epoch": 0.8604483468007041, "grad_norm": 0.36532342433929443, "learning_rate": 1.2193427353907841e-05, "loss": 0.4421, "step": 40571 }, { "epoch": 0.8604695552586371, "grad_norm": 0.3581736087799072, "learning_rate": 1.2193101978685684e-05, "loss": 0.4727, "step": 40572 }, { "epoch": 0.8604907637165702, "grad_norm": 0.3445345163345337, "learning_rate": 1.2192776601024382e-05, "loss": 0.4438, "step": 40573 }, { "epoch": 0.8605119721745031, "grad_norm": 0.3553830683231354, "learning_rate": 1.2192451220924287e-05, "loss": 0.4867, "step": 40574 }, { "epoch": 0.8605331806324362, "grad_norm": 0.377817839384079, "learning_rate": 1.2192125838385769e-05, "loss": 0.5262, "step": 40575 }, { "epoch": 0.8605543890903692, "grad_norm": 0.3430899381637573, "learning_rate": 1.219180045340919e-05, "loss": 0.4741, "step": 40576 }, { "epoch": 0.8605755975483023, "grad_norm": 0.4118034839630127, "learning_rate": 1.2191475065994909e-05, "loss": 0.5571, "step": 40577 }, { "epoch": 0.8605968060062353, "grad_norm": 0.3994101881980896, "learning_rate": 1.2191149676143288e-05, "loss": 0.4774, "step": 40578 }, { "epoch": 0.8606180144641683, "grad_norm": 0.4119466543197632, "learning_rate": 1.219082428385469e-05, "loss": 0.4713, "step": 40579 }, { "epoch": 0.8606392229221014, "grad_norm": 0.3685188591480255, "learning_rate": 1.2190498889129474e-05, "loss": 0.4775, "step": 40580 }, { "epoch": 0.8606604313800343, "grad_norm": 0.3577705919742584, "learning_rate": 1.2190173491968005e-05, "loss": 0.4654, "step": 40581 }, { "epoch": 0.8606816398379674, "grad_norm": 0.4047260582447052, "learning_rate": 1.2189848092370646e-05, "loss": 0.5461, "step": 40582 }, { "epoch": 0.8607028482959004, "grad_norm": 0.3488110899925232, "learning_rate": 1.2189522690337757e-05, "loss": 0.5341, "step": 40583 }, { "epoch": 0.8607240567538335, "grad_norm": 0.41278985142707825, "learning_rate": 1.2189197285869698e-05, "loss": 0.6102, "step": 40584 }, { "epoch": 0.8607452652117664, "grad_norm": 0.33177047967910767, "learning_rate": 1.2188871878966835e-05, "loss": 0.4794, "step": 40585 }, { "epoch": 0.8607664736696995, "grad_norm": 0.33525389432907104, "learning_rate": 1.2188546469629526e-05, "loss": 0.5021, "step": 40586 }, { "epoch": 0.8607876821276325, "grad_norm": 0.3686882555484772, "learning_rate": 1.2188221057858131e-05, "loss": 0.4684, "step": 40587 }, { "epoch": 0.8608088905855655, "grad_norm": 0.35204222798347473, "learning_rate": 1.2187895643653022e-05, "loss": 0.483, "step": 40588 }, { "epoch": 0.8608300990434985, "grad_norm": 0.3909200131893158, "learning_rate": 1.2187570227014552e-05, "loss": 0.5028, "step": 40589 }, { "epoch": 0.8608513075014316, "grad_norm": 0.397676020860672, "learning_rate": 1.2187244807943084e-05, "loss": 0.4812, "step": 40590 }, { "epoch": 0.8608725159593646, "grad_norm": 0.36443760991096497, "learning_rate": 1.2186919386438984e-05, "loss": 0.4841, "step": 40591 }, { "epoch": 0.8608937244172976, "grad_norm": 0.36256229877471924, "learning_rate": 1.2186593962502608e-05, "loss": 0.4886, "step": 40592 }, { "epoch": 0.8609149328752307, "grad_norm": 0.4477732479572296, "learning_rate": 1.2186268536134323e-05, "loss": 0.5192, "step": 40593 }, { "epoch": 0.8609361413331637, "grad_norm": 0.3393036425113678, "learning_rate": 1.2185943107334491e-05, "loss": 0.4265, "step": 40594 }, { "epoch": 0.8609573497910967, "grad_norm": 0.4394492208957672, "learning_rate": 1.2185617676103468e-05, "loss": 0.4831, "step": 40595 }, { "epoch": 0.8609785582490297, "grad_norm": 0.4714881181716919, "learning_rate": 1.2185292242441624e-05, "loss": 0.4905, "step": 40596 }, { "epoch": 0.8609997667069628, "grad_norm": 0.33764752745628357, "learning_rate": 1.2184966806349314e-05, "loss": 0.4532, "step": 40597 }, { "epoch": 0.8610209751648957, "grad_norm": 0.351850688457489, "learning_rate": 1.2184641367826906e-05, "loss": 0.4436, "step": 40598 }, { "epoch": 0.8610421836228288, "grad_norm": 0.3584017753601074, "learning_rate": 1.2184315926874756e-05, "loss": 0.4821, "step": 40599 }, { "epoch": 0.8610633920807618, "grad_norm": 0.38456621766090393, "learning_rate": 1.2183990483493226e-05, "loss": 0.486, "step": 40600 }, { "epoch": 0.8610846005386948, "grad_norm": 0.43559515476226807, "learning_rate": 1.2183665037682686e-05, "loss": 0.4252, "step": 40601 }, { "epoch": 0.8611058089966278, "grad_norm": 0.3849964737892151, "learning_rate": 1.2183339589443493e-05, "loss": 0.5588, "step": 40602 }, { "epoch": 0.8611270174545609, "grad_norm": 0.3437166213989258, "learning_rate": 1.2183014138776006e-05, "loss": 0.4377, "step": 40603 }, { "epoch": 0.8611482259124938, "grad_norm": 0.47012603282928467, "learning_rate": 1.218268868568059e-05, "loss": 0.4826, "step": 40604 }, { "epoch": 0.8611694343704269, "grad_norm": 0.4011806547641754, "learning_rate": 1.218236323015761e-05, "loss": 0.5155, "step": 40605 }, { "epoch": 0.86119064282836, "grad_norm": 0.30992260575294495, "learning_rate": 1.2182037772207418e-05, "loss": 0.4831, "step": 40606 }, { "epoch": 0.861211851286293, "grad_norm": 0.3396928310394287, "learning_rate": 1.2181712311830387e-05, "loss": 0.4379, "step": 40607 }, { "epoch": 0.861233059744226, "grad_norm": 0.3589562475681305, "learning_rate": 1.2181386849026875e-05, "loss": 0.564, "step": 40608 }, { "epoch": 0.861254268202159, "grad_norm": 0.3230564594268799, "learning_rate": 1.2181061383797241e-05, "loss": 0.4399, "step": 40609 }, { "epoch": 0.8612754766600921, "grad_norm": 0.34288904070854187, "learning_rate": 1.2180735916141853e-05, "loss": 0.4901, "step": 40610 }, { "epoch": 0.861296685118025, "grad_norm": 0.3923272490501404, "learning_rate": 1.2180410446061065e-05, "loss": 0.5344, "step": 40611 }, { "epoch": 0.8613178935759581, "grad_norm": 0.364101380109787, "learning_rate": 1.2180084973555246e-05, "loss": 0.5292, "step": 40612 }, { "epoch": 0.8613391020338911, "grad_norm": 0.34312671422958374, "learning_rate": 1.2179759498624755e-05, "loss": 0.4345, "step": 40613 }, { "epoch": 0.8613603104918242, "grad_norm": 0.3540401756763458, "learning_rate": 1.2179434021269953e-05, "loss": 0.4655, "step": 40614 }, { "epoch": 0.8613815189497571, "grad_norm": 0.35148558020591736, "learning_rate": 1.2179108541491207e-05, "loss": 0.4969, "step": 40615 }, { "epoch": 0.8614027274076902, "grad_norm": 0.34233585000038147, "learning_rate": 1.2178783059288875e-05, "loss": 0.5113, "step": 40616 }, { "epoch": 0.8614239358656232, "grad_norm": 0.3666257858276367, "learning_rate": 1.2178457574663316e-05, "loss": 0.4359, "step": 40617 }, { "epoch": 0.8614451443235562, "grad_norm": 0.3934270739555359, "learning_rate": 1.2178132087614898e-05, "loss": 0.4637, "step": 40618 }, { "epoch": 0.8614663527814893, "grad_norm": 0.3631020784378052, "learning_rate": 1.2177806598143978e-05, "loss": 0.4963, "step": 40619 }, { "epoch": 0.8614875612394223, "grad_norm": 0.34639015793800354, "learning_rate": 1.2177481106250921e-05, "loss": 0.4838, "step": 40620 }, { "epoch": 0.8615087696973553, "grad_norm": 0.34540000557899475, "learning_rate": 1.2177155611936091e-05, "loss": 0.5305, "step": 40621 }, { "epoch": 0.8615299781552883, "grad_norm": 0.42264389991760254, "learning_rate": 1.2176830115199847e-05, "loss": 0.4654, "step": 40622 }, { "epoch": 0.8615511866132214, "grad_norm": 0.4158242642879486, "learning_rate": 1.2176504616042548e-05, "loss": 0.4876, "step": 40623 }, { "epoch": 0.8615723950711544, "grad_norm": 0.34617021679878235, "learning_rate": 1.2176179114464564e-05, "loss": 0.4443, "step": 40624 }, { "epoch": 0.8615936035290874, "grad_norm": 0.394043505191803, "learning_rate": 1.2175853610466248e-05, "loss": 0.4823, "step": 40625 }, { "epoch": 0.8616148119870204, "grad_norm": 0.3549899458885193, "learning_rate": 1.2175528104047968e-05, "loss": 0.5032, "step": 40626 }, { "epoch": 0.8616360204449535, "grad_norm": 0.3986491858959198, "learning_rate": 1.2175202595210088e-05, "loss": 0.5624, "step": 40627 }, { "epoch": 0.8616572289028864, "grad_norm": 0.5040980577468872, "learning_rate": 1.217487708395296e-05, "loss": 0.49, "step": 40628 }, { "epoch": 0.8616784373608195, "grad_norm": 0.3519437909126282, "learning_rate": 1.2174551570276957e-05, "loss": 0.5113, "step": 40629 }, { "epoch": 0.8616996458187525, "grad_norm": 0.35426440834999084, "learning_rate": 1.2174226054182438e-05, "loss": 0.4152, "step": 40630 }, { "epoch": 0.8617208542766855, "grad_norm": 0.3680216670036316, "learning_rate": 1.2173900535669762e-05, "loss": 0.4548, "step": 40631 }, { "epoch": 0.8617420627346186, "grad_norm": 0.32916462421417236, "learning_rate": 1.2173575014739291e-05, "loss": 0.4842, "step": 40632 }, { "epoch": 0.8617632711925516, "grad_norm": 0.4367300271987915, "learning_rate": 1.217324949139139e-05, "loss": 0.5333, "step": 40633 }, { "epoch": 0.8617844796504847, "grad_norm": 0.5512860417366028, "learning_rate": 1.217292396562642e-05, "loss": 0.5214, "step": 40634 }, { "epoch": 0.8618056881084176, "grad_norm": 0.4753355085849762, "learning_rate": 1.2172598437444745e-05, "loss": 0.4898, "step": 40635 }, { "epoch": 0.8618268965663507, "grad_norm": 0.38596656918525696, "learning_rate": 1.2172272906846723e-05, "loss": 0.466, "step": 40636 }, { "epoch": 0.8618481050242837, "grad_norm": 0.3720041513442993, "learning_rate": 1.2171947373832716e-05, "loss": 0.4497, "step": 40637 }, { "epoch": 0.8618693134822167, "grad_norm": 0.34580186009407043, "learning_rate": 1.2171621838403089e-05, "loss": 0.4411, "step": 40638 }, { "epoch": 0.8618905219401497, "grad_norm": 0.3699985146522522, "learning_rate": 1.2171296300558204e-05, "loss": 0.525, "step": 40639 }, { "epoch": 0.8619117303980828, "grad_norm": 0.6248937249183655, "learning_rate": 1.2170970760298422e-05, "loss": 0.462, "step": 40640 }, { "epoch": 0.8619329388560157, "grad_norm": 0.3644835650920868, "learning_rate": 1.2170645217624106e-05, "loss": 0.5081, "step": 40641 }, { "epoch": 0.8619541473139488, "grad_norm": 0.34128883481025696, "learning_rate": 1.2170319672535615e-05, "loss": 0.5042, "step": 40642 }, { "epoch": 0.8619753557718818, "grad_norm": 0.3727648854255676, "learning_rate": 1.2169994125033316e-05, "loss": 0.4963, "step": 40643 }, { "epoch": 0.8619965642298149, "grad_norm": 0.38569819927215576, "learning_rate": 1.2169668575117569e-05, "loss": 0.5182, "step": 40644 }, { "epoch": 0.8620177726877478, "grad_norm": 0.34988677501678467, "learning_rate": 1.2169343022788732e-05, "loss": 0.444, "step": 40645 }, { "epoch": 0.8620389811456809, "grad_norm": 0.3396313488483429, "learning_rate": 1.216901746804717e-05, "loss": 0.4985, "step": 40646 }, { "epoch": 0.862060189603614, "grad_norm": 0.333223432302475, "learning_rate": 1.216869191089325e-05, "loss": 0.5922, "step": 40647 }, { "epoch": 0.8620813980615469, "grad_norm": 0.38994482159614563, "learning_rate": 1.2168366351327326e-05, "loss": 0.431, "step": 40648 }, { "epoch": 0.86210260651948, "grad_norm": 0.3501071631908417, "learning_rate": 1.2168040789349763e-05, "loss": 0.5067, "step": 40649 }, { "epoch": 0.862123814977413, "grad_norm": 0.3760828375816345, "learning_rate": 1.216771522496093e-05, "loss": 0.5401, "step": 40650 }, { "epoch": 0.862145023435346, "grad_norm": 3.252967596054077, "learning_rate": 1.2167389658161174e-05, "loss": 0.5273, "step": 40651 }, { "epoch": 0.862166231893279, "grad_norm": 0.365963876247406, "learning_rate": 1.2167064088950873e-05, "loss": 0.4989, "step": 40652 }, { "epoch": 0.8621874403512121, "grad_norm": 0.39074522256851196, "learning_rate": 1.216673851733038e-05, "loss": 0.471, "step": 40653 }, { "epoch": 0.862208648809145, "grad_norm": 0.3615822196006775, "learning_rate": 1.2166412943300058e-05, "loss": 0.4664, "step": 40654 }, { "epoch": 0.8622298572670781, "grad_norm": 0.33964914083480835, "learning_rate": 1.2166087366860272e-05, "loss": 0.5629, "step": 40655 }, { "epoch": 0.8622510657250111, "grad_norm": 0.36133840680122375, "learning_rate": 1.2165761788011383e-05, "loss": 0.4584, "step": 40656 }, { "epoch": 0.8622722741829442, "grad_norm": 0.3392060101032257, "learning_rate": 1.2165436206753752e-05, "loss": 0.4721, "step": 40657 }, { "epoch": 0.8622934826408771, "grad_norm": 0.411394327878952, "learning_rate": 1.2165110623087741e-05, "loss": 0.4033, "step": 40658 }, { "epoch": 0.8623146910988102, "grad_norm": 0.3840651214122772, "learning_rate": 1.2164785037013713e-05, "loss": 0.4192, "step": 40659 }, { "epoch": 0.8623358995567433, "grad_norm": 0.6555067300796509, "learning_rate": 1.2164459448532031e-05, "loss": 0.5135, "step": 40660 }, { "epoch": 0.8623571080146762, "grad_norm": 0.6932557225227356, "learning_rate": 1.2164133857643054e-05, "loss": 0.4923, "step": 40661 }, { "epoch": 0.8623783164726093, "grad_norm": 0.34085166454315186, "learning_rate": 1.2163808264347147e-05, "loss": 0.4118, "step": 40662 }, { "epoch": 0.8623995249305423, "grad_norm": 0.4008578360080719, "learning_rate": 1.2163482668644673e-05, "loss": 0.5337, "step": 40663 }, { "epoch": 0.8624207333884754, "grad_norm": 0.3447384834289551, "learning_rate": 1.2163157070535989e-05, "loss": 0.4948, "step": 40664 }, { "epoch": 0.8624419418464083, "grad_norm": 0.3876623213291168, "learning_rate": 1.2162831470021462e-05, "loss": 0.4858, "step": 40665 }, { "epoch": 0.8624631503043414, "grad_norm": 0.3813881278038025, "learning_rate": 1.2162505867101453e-05, "loss": 0.4756, "step": 40666 }, { "epoch": 0.8624843587622744, "grad_norm": 0.3619421720504761, "learning_rate": 1.2162180261776326e-05, "loss": 0.5065, "step": 40667 }, { "epoch": 0.8625055672202074, "grad_norm": 0.3836408853530884, "learning_rate": 1.2161854654046436e-05, "loss": 0.5305, "step": 40668 }, { "epoch": 0.8625267756781404, "grad_norm": 0.36632344126701355, "learning_rate": 1.2161529043912154e-05, "loss": 0.5083, "step": 40669 }, { "epoch": 0.8625479841360735, "grad_norm": 0.3403013348579407, "learning_rate": 1.2161203431373835e-05, "loss": 0.4483, "step": 40670 }, { "epoch": 0.8625691925940064, "grad_norm": 0.37848466634750366, "learning_rate": 1.2160877816431847e-05, "loss": 0.4856, "step": 40671 }, { "epoch": 0.8625904010519395, "grad_norm": 0.38847899436950684, "learning_rate": 1.2160552199086548e-05, "loss": 0.4966, "step": 40672 }, { "epoch": 0.8626116095098726, "grad_norm": 0.4106709361076355, "learning_rate": 1.2160226579338303e-05, "loss": 0.406, "step": 40673 }, { "epoch": 0.8626328179678056, "grad_norm": 0.32689493894577026, "learning_rate": 1.2159900957187471e-05, "loss": 0.4967, "step": 40674 }, { "epoch": 0.8626540264257386, "grad_norm": 0.35666799545288086, "learning_rate": 1.2159575332634419e-05, "loss": 0.5139, "step": 40675 }, { "epoch": 0.8626752348836716, "grad_norm": 0.5353825688362122, "learning_rate": 1.2159249705679504e-05, "loss": 0.5005, "step": 40676 }, { "epoch": 0.8626964433416047, "grad_norm": 0.39876821637153625, "learning_rate": 1.2158924076323087e-05, "loss": 0.5208, "step": 40677 }, { "epoch": 0.8627176517995376, "grad_norm": 0.4412418305873871, "learning_rate": 1.2158598444565539e-05, "loss": 0.5359, "step": 40678 }, { "epoch": 0.8627388602574707, "grad_norm": 0.3884654641151428, "learning_rate": 1.2158272810407212e-05, "loss": 0.4723, "step": 40679 }, { "epoch": 0.8627600687154037, "grad_norm": 0.4270387887954712, "learning_rate": 1.2157947173848477e-05, "loss": 0.4491, "step": 40680 }, { "epoch": 0.8627812771733367, "grad_norm": 0.6794838905334473, "learning_rate": 1.215762153488969e-05, "loss": 0.4964, "step": 40681 }, { "epoch": 0.8628024856312697, "grad_norm": 0.3487626314163208, "learning_rate": 1.2157295893531213e-05, "loss": 0.4696, "step": 40682 }, { "epoch": 0.8628236940892028, "grad_norm": 0.33242887258529663, "learning_rate": 1.2156970249773413e-05, "loss": 0.4927, "step": 40683 }, { "epoch": 0.8628449025471358, "grad_norm": 0.3480639159679413, "learning_rate": 1.2156644603616648e-05, "loss": 0.4402, "step": 40684 }, { "epoch": 0.8628661110050688, "grad_norm": 0.38798171281814575, "learning_rate": 1.2156318955061281e-05, "loss": 0.4626, "step": 40685 }, { "epoch": 0.8628873194630019, "grad_norm": 0.3894026577472687, "learning_rate": 1.2155993304107679e-05, "loss": 0.4909, "step": 40686 }, { "epoch": 0.8629085279209349, "grad_norm": 0.37041959166526794, "learning_rate": 1.2155667650756195e-05, "loss": 0.5189, "step": 40687 }, { "epoch": 0.8629297363788679, "grad_norm": 0.38311779499053955, "learning_rate": 1.2155341995007197e-05, "loss": 0.4455, "step": 40688 }, { "epoch": 0.8629509448368009, "grad_norm": 0.3709043860435486, "learning_rate": 1.215501633686105e-05, "loss": 0.532, "step": 40689 }, { "epoch": 0.862972153294734, "grad_norm": 0.3455197215080261, "learning_rate": 1.2154690676318109e-05, "loss": 0.4132, "step": 40690 }, { "epoch": 0.8629933617526669, "grad_norm": 0.34499603509902954, "learning_rate": 1.2154365013378739e-05, "loss": 0.4884, "step": 40691 }, { "epoch": 0.8630145702106, "grad_norm": 0.3577350974082947, "learning_rate": 1.2154039348043305e-05, "loss": 0.4501, "step": 40692 }, { "epoch": 0.863035778668533, "grad_norm": 0.3708648085594177, "learning_rate": 1.2153713680312166e-05, "loss": 0.4895, "step": 40693 }, { "epoch": 0.8630569871264661, "grad_norm": 0.38420188426971436, "learning_rate": 1.2153388010185686e-05, "loss": 0.5147, "step": 40694 }, { "epoch": 0.863078195584399, "grad_norm": 0.33485734462738037, "learning_rate": 1.215306233766423e-05, "loss": 0.4549, "step": 40695 }, { "epoch": 0.8630994040423321, "grad_norm": 0.39216136932373047, "learning_rate": 1.215273666274815e-05, "loss": 0.5194, "step": 40696 }, { "epoch": 0.8631206125002651, "grad_norm": 0.3871572017669678, "learning_rate": 1.2152410985437819e-05, "loss": 0.5977, "step": 40697 }, { "epoch": 0.8631418209581981, "grad_norm": 0.3476361632347107, "learning_rate": 1.2152085305733597e-05, "loss": 0.4247, "step": 40698 }, { "epoch": 0.8631630294161311, "grad_norm": 0.366902619600296, "learning_rate": 1.215175962363584e-05, "loss": 0.4832, "step": 40699 }, { "epoch": 0.8631842378740642, "grad_norm": 0.34441593289375305, "learning_rate": 1.2151433939144916e-05, "loss": 0.4393, "step": 40700 }, { "epoch": 0.8632054463319973, "grad_norm": 0.44679003953933716, "learning_rate": 1.2151108252261187e-05, "loss": 0.5283, "step": 40701 }, { "epoch": 0.8632266547899302, "grad_norm": 0.3792172074317932, "learning_rate": 1.2150782562985014e-05, "loss": 0.5687, "step": 40702 }, { "epoch": 0.8632478632478633, "grad_norm": 0.37240222096443176, "learning_rate": 1.2150456871316758e-05, "loss": 0.4834, "step": 40703 }, { "epoch": 0.8632690717057963, "grad_norm": 0.3831128478050232, "learning_rate": 1.2150131177256785e-05, "loss": 0.4841, "step": 40704 }, { "epoch": 0.8632902801637293, "grad_norm": 0.3870764970779419, "learning_rate": 1.2149805480805453e-05, "loss": 0.5072, "step": 40705 }, { "epoch": 0.8633114886216623, "grad_norm": 0.3649531602859497, "learning_rate": 1.2149479781963128e-05, "loss": 0.5242, "step": 40706 }, { "epoch": 0.8633326970795954, "grad_norm": 0.3621404469013214, "learning_rate": 1.2149154080730166e-05, "loss": 0.4649, "step": 40707 }, { "epoch": 0.8633539055375283, "grad_norm": 0.3850855827331543, "learning_rate": 1.2148828377106938e-05, "loss": 0.5783, "step": 40708 }, { "epoch": 0.8633751139954614, "grad_norm": 0.3443080186843872, "learning_rate": 1.2148502671093799e-05, "loss": 0.4885, "step": 40709 }, { "epoch": 0.8633963224533944, "grad_norm": 0.3929535746574402, "learning_rate": 1.2148176962691115e-05, "loss": 0.611, "step": 40710 }, { "epoch": 0.8634175309113274, "grad_norm": 0.37918218970298767, "learning_rate": 1.2147851251899245e-05, "loss": 0.4918, "step": 40711 }, { "epoch": 0.8634387393692604, "grad_norm": 0.35218438506126404, "learning_rate": 1.2147525538718559e-05, "loss": 0.3835, "step": 40712 }, { "epoch": 0.8634599478271935, "grad_norm": 0.40127021074295044, "learning_rate": 1.2147199823149407e-05, "loss": 0.5276, "step": 40713 }, { "epoch": 0.8634811562851266, "grad_norm": 0.3468814194202423, "learning_rate": 1.2146874105192164e-05, "loss": 0.4901, "step": 40714 }, { "epoch": 0.8635023647430595, "grad_norm": 0.3861319422721863, "learning_rate": 1.2146548384847181e-05, "loss": 0.4636, "step": 40715 }, { "epoch": 0.8635235732009926, "grad_norm": 0.43375518918037415, "learning_rate": 1.2146222662114829e-05, "loss": 0.5364, "step": 40716 }, { "epoch": 0.8635447816589256, "grad_norm": 0.40892165899276733, "learning_rate": 1.2145896936995465e-05, "loss": 0.5087, "step": 40717 }, { "epoch": 0.8635659901168586, "grad_norm": 0.3430444002151489, "learning_rate": 1.2145571209489456e-05, "loss": 0.4851, "step": 40718 }, { "epoch": 0.8635871985747916, "grad_norm": 0.3968026041984558, "learning_rate": 1.2145245479597158e-05, "loss": 0.5165, "step": 40719 }, { "epoch": 0.8636084070327247, "grad_norm": 0.4674428701400757, "learning_rate": 1.214491974731894e-05, "loss": 0.3939, "step": 40720 }, { "epoch": 0.8636296154906576, "grad_norm": 0.4187714159488678, "learning_rate": 1.2144594012655158e-05, "loss": 0.4906, "step": 40721 }, { "epoch": 0.8636508239485907, "grad_norm": 0.3511083126068115, "learning_rate": 1.2144268275606175e-05, "loss": 0.4705, "step": 40722 }, { "epoch": 0.8636720324065237, "grad_norm": 0.4181358516216278, "learning_rate": 1.214394253617236e-05, "loss": 0.5351, "step": 40723 }, { "epoch": 0.8636932408644568, "grad_norm": 0.4533350169658661, "learning_rate": 1.2143616794354068e-05, "loss": 0.4909, "step": 40724 }, { "epoch": 0.8637144493223897, "grad_norm": 0.35404783487319946, "learning_rate": 1.2143291050151667e-05, "loss": 0.4033, "step": 40725 }, { "epoch": 0.8637356577803228, "grad_norm": 0.747053861618042, "learning_rate": 1.2142965303565515e-05, "loss": 0.4736, "step": 40726 }, { "epoch": 0.8637568662382559, "grad_norm": 0.3529963791370392, "learning_rate": 1.2142639554595974e-05, "loss": 0.4575, "step": 40727 }, { "epoch": 0.8637780746961888, "grad_norm": 0.3530285358428955, "learning_rate": 1.2142313803243408e-05, "loss": 0.4247, "step": 40728 }, { "epoch": 0.8637992831541219, "grad_norm": 0.46177130937576294, "learning_rate": 1.2141988049508182e-05, "loss": 0.5121, "step": 40729 }, { "epoch": 0.8638204916120549, "grad_norm": 0.3452458381652832, "learning_rate": 1.2141662293390653e-05, "loss": 0.5148, "step": 40730 }, { "epoch": 0.863841700069988, "grad_norm": 0.39593589305877686, "learning_rate": 1.2141336534891186e-05, "loss": 0.4989, "step": 40731 }, { "epoch": 0.8638629085279209, "grad_norm": 0.38652604818344116, "learning_rate": 1.2141010774010144e-05, "loss": 0.3849, "step": 40732 }, { "epoch": 0.863884116985854, "grad_norm": 0.43509605526924133, "learning_rate": 1.2140685010747889e-05, "loss": 0.4572, "step": 40733 }, { "epoch": 0.863905325443787, "grad_norm": 0.3999689519405365, "learning_rate": 1.214035924510478e-05, "loss": 0.5279, "step": 40734 }, { "epoch": 0.86392653390172, "grad_norm": 3.4527416229248047, "learning_rate": 1.2140033477081186e-05, "loss": 0.5192, "step": 40735 }, { "epoch": 0.863947742359653, "grad_norm": 0.42897841334342957, "learning_rate": 1.2139707706677461e-05, "loss": 0.4756, "step": 40736 }, { "epoch": 0.8639689508175861, "grad_norm": 0.3333992063999176, "learning_rate": 1.2139381933893974e-05, "loss": 0.4699, "step": 40737 }, { "epoch": 0.863990159275519, "grad_norm": 0.4098431468009949, "learning_rate": 1.2139056158731084e-05, "loss": 0.5169, "step": 40738 }, { "epoch": 0.8640113677334521, "grad_norm": 0.3926095962524414, "learning_rate": 1.2138730381189156e-05, "loss": 0.5484, "step": 40739 }, { "epoch": 0.8640325761913851, "grad_norm": 0.3696252107620239, "learning_rate": 1.2138404601268552e-05, "loss": 0.5278, "step": 40740 }, { "epoch": 0.8640537846493181, "grad_norm": 0.3893285095691681, "learning_rate": 1.2138078818969629e-05, "loss": 0.4937, "step": 40741 }, { "epoch": 0.8640749931072512, "grad_norm": 0.5160437226295471, "learning_rate": 1.2137753034292755e-05, "loss": 0.4958, "step": 40742 }, { "epoch": 0.8640962015651842, "grad_norm": 0.39107033610343933, "learning_rate": 1.2137427247238291e-05, "loss": 0.5648, "step": 40743 }, { "epoch": 0.8641174100231173, "grad_norm": 0.41371557116508484, "learning_rate": 1.2137101457806599e-05, "loss": 0.5755, "step": 40744 }, { "epoch": 0.8641386184810502, "grad_norm": 0.33945000171661377, "learning_rate": 1.2136775665998043e-05, "loss": 0.4713, "step": 40745 }, { "epoch": 0.8641598269389833, "grad_norm": 0.33824026584625244, "learning_rate": 1.2136449871812981e-05, "loss": 0.4587, "step": 40746 }, { "epoch": 0.8641810353969163, "grad_norm": 0.3761885464191437, "learning_rate": 1.2136124075251775e-05, "loss": 0.5205, "step": 40747 }, { "epoch": 0.8642022438548493, "grad_norm": 0.32314079999923706, "learning_rate": 1.2135798276314796e-05, "loss": 0.4813, "step": 40748 }, { "epoch": 0.8642234523127823, "grad_norm": 0.37802237272262573, "learning_rate": 1.2135472475002398e-05, "loss": 0.4872, "step": 40749 }, { "epoch": 0.8642446607707154, "grad_norm": 0.3304567039012909, "learning_rate": 1.2135146671314949e-05, "loss": 0.3778, "step": 40750 }, { "epoch": 0.8642658692286483, "grad_norm": 0.3546784222126007, "learning_rate": 1.2134820865252805e-05, "loss": 0.5102, "step": 40751 }, { "epoch": 0.8642870776865814, "grad_norm": 0.36196357011795044, "learning_rate": 1.2134495056816333e-05, "loss": 0.4606, "step": 40752 }, { "epoch": 0.8643082861445144, "grad_norm": 0.4045514464378357, "learning_rate": 1.2134169246005896e-05, "loss": 0.4545, "step": 40753 }, { "epoch": 0.8643294946024475, "grad_norm": 0.5107483863830566, "learning_rate": 1.213384343282185e-05, "loss": 0.475, "step": 40754 }, { "epoch": 0.8643507030603805, "grad_norm": 0.31912681460380554, "learning_rate": 1.2133517617264565e-05, "loss": 0.4739, "step": 40755 }, { "epoch": 0.8643719115183135, "grad_norm": 0.3924955725669861, "learning_rate": 1.21331917993344e-05, "loss": 0.5394, "step": 40756 }, { "epoch": 0.8643931199762466, "grad_norm": 0.346198707818985, "learning_rate": 1.2132865979031719e-05, "loss": 0.4612, "step": 40757 }, { "epoch": 0.8644143284341795, "grad_norm": 0.4570958912372589, "learning_rate": 1.213254015635688e-05, "loss": 0.5447, "step": 40758 }, { "epoch": 0.8644355368921126, "grad_norm": 0.4026775658130646, "learning_rate": 1.213221433131025e-05, "loss": 0.4744, "step": 40759 }, { "epoch": 0.8644567453500456, "grad_norm": 0.3838668763637543, "learning_rate": 1.213188850389219e-05, "loss": 0.541, "step": 40760 }, { "epoch": 0.8644779538079786, "grad_norm": 0.3191636800765991, "learning_rate": 1.2131562674103058e-05, "loss": 0.4672, "step": 40761 }, { "epoch": 0.8644991622659116, "grad_norm": 0.3787241578102112, "learning_rate": 1.2131236841943228e-05, "loss": 0.4925, "step": 40762 }, { "epoch": 0.8645203707238447, "grad_norm": 0.4424038231372833, "learning_rate": 1.213091100741305e-05, "loss": 0.5527, "step": 40763 }, { "epoch": 0.8645415791817777, "grad_norm": 0.36543309688568115, "learning_rate": 1.2130585170512892e-05, "loss": 0.4697, "step": 40764 }, { "epoch": 0.8645627876397107, "grad_norm": 0.3551098108291626, "learning_rate": 1.2130259331243118e-05, "loss": 0.4664, "step": 40765 }, { "epoch": 0.8645839960976437, "grad_norm": 0.4184567630290985, "learning_rate": 1.2129933489604084e-05, "loss": 0.5032, "step": 40766 }, { "epoch": 0.8646052045555768, "grad_norm": 0.3472081124782562, "learning_rate": 1.2129607645596158e-05, "loss": 0.4685, "step": 40767 }, { "epoch": 0.8646264130135098, "grad_norm": 0.3702585995197296, "learning_rate": 1.2129281799219703e-05, "loss": 0.4091, "step": 40768 }, { "epoch": 0.8646476214714428, "grad_norm": 0.6646284461021423, "learning_rate": 1.212895595047508e-05, "loss": 0.4974, "step": 40769 }, { "epoch": 0.8646688299293759, "grad_norm": 0.3431234657764435, "learning_rate": 1.2128630099362646e-05, "loss": 0.4577, "step": 40770 }, { "epoch": 0.8646900383873088, "grad_norm": 0.954361617565155, "learning_rate": 1.2128304245882771e-05, "loss": 0.4625, "step": 40771 }, { "epoch": 0.8647112468452419, "grad_norm": 0.3817403018474579, "learning_rate": 1.2127978390035812e-05, "loss": 0.5456, "step": 40772 }, { "epoch": 0.8647324553031749, "grad_norm": 0.36789730191230774, "learning_rate": 1.2127652531822136e-05, "loss": 0.4681, "step": 40773 }, { "epoch": 0.864753663761108, "grad_norm": 0.3962060809135437, "learning_rate": 1.2127326671242106e-05, "loss": 0.4747, "step": 40774 }, { "epoch": 0.8647748722190409, "grad_norm": 0.4216369688510895, "learning_rate": 1.2127000808296077e-05, "loss": 0.5149, "step": 40775 }, { "epoch": 0.864796080676974, "grad_norm": 0.3393458425998688, "learning_rate": 1.212667494298442e-05, "loss": 0.4398, "step": 40776 }, { "epoch": 0.864817289134907, "grad_norm": 0.46980252861976624, "learning_rate": 1.2126349075307494e-05, "loss": 0.4704, "step": 40777 }, { "epoch": 0.86483849759284, "grad_norm": 0.4211783707141876, "learning_rate": 1.2126023205265656e-05, "loss": 0.4074, "step": 40778 }, { "epoch": 0.864859706050773, "grad_norm": 0.604812741279602, "learning_rate": 1.2125697332859274e-05, "loss": 0.5471, "step": 40779 }, { "epoch": 0.8648809145087061, "grad_norm": 0.4157544672489166, "learning_rate": 1.2125371458088714e-05, "loss": 0.4768, "step": 40780 }, { "epoch": 0.864902122966639, "grad_norm": 0.33972954750061035, "learning_rate": 1.2125045580954331e-05, "loss": 0.4628, "step": 40781 }, { "epoch": 0.8649233314245721, "grad_norm": 0.47108620405197144, "learning_rate": 1.2124719701456496e-05, "loss": 0.5137, "step": 40782 }, { "epoch": 0.8649445398825052, "grad_norm": 0.3763367533683777, "learning_rate": 1.212439381959556e-05, "loss": 0.5226, "step": 40783 }, { "epoch": 0.8649657483404382, "grad_norm": 0.38054269552230835, "learning_rate": 1.2124067935371895e-05, "loss": 0.5703, "step": 40784 }, { "epoch": 0.8649869567983712, "grad_norm": 0.41262713074684143, "learning_rate": 1.212374204878586e-05, "loss": 0.4442, "step": 40785 }, { "epoch": 0.8650081652563042, "grad_norm": 0.43900176882743835, "learning_rate": 1.2123416159837816e-05, "loss": 0.524, "step": 40786 }, { "epoch": 0.8650293737142373, "grad_norm": 0.3831615746021271, "learning_rate": 1.2123090268528127e-05, "loss": 0.4357, "step": 40787 }, { "epoch": 0.8650505821721702, "grad_norm": 0.39423832297325134, "learning_rate": 1.2122764374857157e-05, "loss": 0.54, "step": 40788 }, { "epoch": 0.8650717906301033, "grad_norm": 0.38750702142715454, "learning_rate": 1.2122438478825265e-05, "loss": 0.5605, "step": 40789 }, { "epoch": 0.8650929990880363, "grad_norm": 0.4135178029537201, "learning_rate": 1.2122112580432818e-05, "loss": 0.4802, "step": 40790 }, { "epoch": 0.8651142075459693, "grad_norm": 0.3802053928375244, "learning_rate": 1.2121786679680177e-05, "loss": 0.5384, "step": 40791 }, { "epoch": 0.8651354160039023, "grad_norm": 0.3631434738636017, "learning_rate": 1.2121460776567695e-05, "loss": 0.4567, "step": 40792 }, { "epoch": 0.8651566244618354, "grad_norm": 0.34497320652008057, "learning_rate": 1.212113487109575e-05, "loss": 0.509, "step": 40793 }, { "epoch": 0.8651778329197684, "grad_norm": 0.3685147762298584, "learning_rate": 1.2120808963264697e-05, "loss": 0.4529, "step": 40794 }, { "epoch": 0.8651990413777014, "grad_norm": 0.3728826940059662, "learning_rate": 1.2120483053074897e-05, "loss": 0.5232, "step": 40795 }, { "epoch": 0.8652202498356345, "grad_norm": 0.3623599112033844, "learning_rate": 1.2120157140526713e-05, "loss": 0.4766, "step": 40796 }, { "epoch": 0.8652414582935675, "grad_norm": 0.38221725821495056, "learning_rate": 1.2119831225620512e-05, "loss": 0.4781, "step": 40797 }, { "epoch": 0.8652626667515005, "grad_norm": 0.4380435347557068, "learning_rate": 1.211950530835665e-05, "loss": 0.5174, "step": 40798 }, { "epoch": 0.8652838752094335, "grad_norm": 0.45698538422584534, "learning_rate": 1.2119179388735493e-05, "loss": 0.5624, "step": 40799 }, { "epoch": 0.8653050836673666, "grad_norm": 0.3715161681175232, "learning_rate": 1.2118853466757406e-05, "loss": 0.4493, "step": 40800 }, { "epoch": 0.8653262921252995, "grad_norm": 0.3455316424369812, "learning_rate": 1.2118527542422745e-05, "loss": 0.4417, "step": 40801 }, { "epoch": 0.8653475005832326, "grad_norm": 0.38337913155555725, "learning_rate": 1.211820161573188e-05, "loss": 0.5305, "step": 40802 }, { "epoch": 0.8653687090411656, "grad_norm": 0.3317511975765228, "learning_rate": 1.2117875686685165e-05, "loss": 0.3898, "step": 40803 }, { "epoch": 0.8653899174990987, "grad_norm": 0.40116679668426514, "learning_rate": 1.211754975528297e-05, "loss": 0.4913, "step": 40804 }, { "epoch": 0.8654111259570316, "grad_norm": 0.33031943440437317, "learning_rate": 1.2117223821525652e-05, "loss": 0.4288, "step": 40805 }, { "epoch": 0.8654323344149647, "grad_norm": 0.36283212900161743, "learning_rate": 1.2116897885413576e-05, "loss": 0.4708, "step": 40806 }, { "epoch": 0.8654535428728977, "grad_norm": 0.3591141104698181, "learning_rate": 1.2116571946947108e-05, "loss": 0.497, "step": 40807 }, { "epoch": 0.8654747513308307, "grad_norm": 0.38499581813812256, "learning_rate": 1.2116246006126606e-05, "loss": 0.5298, "step": 40808 }, { "epoch": 0.8654959597887638, "grad_norm": 0.3621399998664856, "learning_rate": 1.2115920062952434e-05, "loss": 0.5069, "step": 40809 }, { "epoch": 0.8655171682466968, "grad_norm": 0.4636923670768738, "learning_rate": 1.2115594117424953e-05, "loss": 0.5604, "step": 40810 }, { "epoch": 0.8655383767046299, "grad_norm": 0.5729001760482788, "learning_rate": 1.2115268169544527e-05, "loss": 0.5036, "step": 40811 }, { "epoch": 0.8655595851625628, "grad_norm": 0.38450729846954346, "learning_rate": 1.2114942219311516e-05, "loss": 0.5288, "step": 40812 }, { "epoch": 0.8655807936204959, "grad_norm": 0.37070319056510925, "learning_rate": 1.2114616266726287e-05, "loss": 0.4935, "step": 40813 }, { "epoch": 0.8656020020784289, "grad_norm": 0.3446132242679596, "learning_rate": 1.21142903117892e-05, "loss": 0.4143, "step": 40814 }, { "epoch": 0.8656232105363619, "grad_norm": 1.2885571718215942, "learning_rate": 1.2113964354500618e-05, "loss": 0.4748, "step": 40815 }, { "epoch": 0.8656444189942949, "grad_norm": 0.33038604259490967, "learning_rate": 1.2113638394860903e-05, "loss": 0.4737, "step": 40816 }, { "epoch": 0.865665627452228, "grad_norm": 0.3297244906425476, "learning_rate": 1.2113312432870417e-05, "loss": 0.4849, "step": 40817 }, { "epoch": 0.8656868359101609, "grad_norm": 0.36512917280197144, "learning_rate": 1.2112986468529521e-05, "loss": 0.5142, "step": 40818 }, { "epoch": 0.865708044368094, "grad_norm": 0.3548673987388611, "learning_rate": 1.2112660501838585e-05, "loss": 0.4555, "step": 40819 }, { "epoch": 0.865729252826027, "grad_norm": 0.3540233075618744, "learning_rate": 1.2112334532797963e-05, "loss": 0.513, "step": 40820 }, { "epoch": 0.86575046128396, "grad_norm": 0.387796014547348, "learning_rate": 1.2112008561408022e-05, "loss": 0.457, "step": 40821 }, { "epoch": 0.865771669741893, "grad_norm": 0.3328133523464203, "learning_rate": 1.2111682587669123e-05, "loss": 0.4805, "step": 40822 }, { "epoch": 0.8657928781998261, "grad_norm": 0.3620895445346832, "learning_rate": 1.2111356611581629e-05, "loss": 0.4482, "step": 40823 }, { "epoch": 0.8658140866577592, "grad_norm": 0.3698868751525879, "learning_rate": 1.2111030633145902e-05, "loss": 0.4616, "step": 40824 }, { "epoch": 0.8658352951156921, "grad_norm": 0.4067613482475281, "learning_rate": 1.2110704652362306e-05, "loss": 0.5057, "step": 40825 }, { "epoch": 0.8658565035736252, "grad_norm": 0.3555808961391449, "learning_rate": 1.2110378669231199e-05, "loss": 0.4495, "step": 40826 }, { "epoch": 0.8658777120315582, "grad_norm": 0.3594129979610443, "learning_rate": 1.2110052683752954e-05, "loss": 0.5548, "step": 40827 }, { "epoch": 0.8658989204894912, "grad_norm": 0.38516053557395935, "learning_rate": 1.2109726695927924e-05, "loss": 0.4901, "step": 40828 }, { "epoch": 0.8659201289474242, "grad_norm": 0.3924444615840912, "learning_rate": 1.210940070575647e-05, "loss": 0.4869, "step": 40829 }, { "epoch": 0.8659413374053573, "grad_norm": 0.36454126238822937, "learning_rate": 1.2109074713238965e-05, "loss": 0.4769, "step": 40830 }, { "epoch": 0.8659625458632902, "grad_norm": 0.371637225151062, "learning_rate": 1.210874871837576e-05, "loss": 0.4383, "step": 40831 }, { "epoch": 0.8659837543212233, "grad_norm": 0.3451182246208191, "learning_rate": 1.2108422721167225e-05, "loss": 0.4533, "step": 40832 }, { "epoch": 0.8660049627791563, "grad_norm": 0.3379416763782501, "learning_rate": 1.2108096721613722e-05, "loss": 0.4504, "step": 40833 }, { "epoch": 0.8660261712370894, "grad_norm": 0.41447678208351135, "learning_rate": 1.210777071971561e-05, "loss": 0.5265, "step": 40834 }, { "epoch": 0.8660473796950223, "grad_norm": 0.3383771479129791, "learning_rate": 1.2107444715473258e-05, "loss": 0.4598, "step": 40835 }, { "epoch": 0.8660685881529554, "grad_norm": 0.3837866485118866, "learning_rate": 1.2107118708887022e-05, "loss": 0.4842, "step": 40836 }, { "epoch": 0.8660897966108885, "grad_norm": 0.3782082796096802, "learning_rate": 1.2106792699957264e-05, "loss": 0.4487, "step": 40837 }, { "epoch": 0.8661110050688214, "grad_norm": 0.4043104648590088, "learning_rate": 1.2106466688684351e-05, "loss": 0.5206, "step": 40838 }, { "epoch": 0.8661322135267545, "grad_norm": 0.3392152190208435, "learning_rate": 1.2106140675068647e-05, "loss": 0.4534, "step": 40839 }, { "epoch": 0.8661534219846875, "grad_norm": 0.3402208089828491, "learning_rate": 1.2105814659110509e-05, "loss": 0.4215, "step": 40840 }, { "epoch": 0.8661746304426206, "grad_norm": 0.371591717004776, "learning_rate": 1.2105488640810303e-05, "loss": 0.5103, "step": 40841 }, { "epoch": 0.8661958389005535, "grad_norm": 0.5106751322746277, "learning_rate": 1.2105162620168391e-05, "loss": 0.5163, "step": 40842 }, { "epoch": 0.8662170473584866, "grad_norm": 0.40575462579727173, "learning_rate": 1.2104836597185133e-05, "loss": 0.4597, "step": 40843 }, { "epoch": 0.8662382558164196, "grad_norm": 0.34547021985054016, "learning_rate": 1.2104510571860897e-05, "loss": 0.4892, "step": 40844 }, { "epoch": 0.8662594642743526, "grad_norm": 0.4305173456668854, "learning_rate": 1.210418454419604e-05, "loss": 0.5309, "step": 40845 }, { "epoch": 0.8662806727322856, "grad_norm": 0.40677928924560547, "learning_rate": 1.210385851419093e-05, "loss": 0.4291, "step": 40846 }, { "epoch": 0.8663018811902187, "grad_norm": 0.3289531171321869, "learning_rate": 1.2103532481845925e-05, "loss": 0.5261, "step": 40847 }, { "epoch": 0.8663230896481516, "grad_norm": 0.37708696722984314, "learning_rate": 1.210320644716139e-05, "loss": 0.3886, "step": 40848 }, { "epoch": 0.8663442981060847, "grad_norm": 0.390011727809906, "learning_rate": 1.2102880410137687e-05, "loss": 0.4953, "step": 40849 }, { "epoch": 0.8663655065640178, "grad_norm": 0.42960983514785767, "learning_rate": 1.2102554370775179e-05, "loss": 0.455, "step": 40850 }, { "epoch": 0.8663867150219507, "grad_norm": 0.454461932182312, "learning_rate": 1.2102228329074225e-05, "loss": 0.5734, "step": 40851 }, { "epoch": 0.8664079234798838, "grad_norm": 0.38044077157974243, "learning_rate": 1.2101902285035197e-05, "loss": 0.555, "step": 40852 }, { "epoch": 0.8664291319378168, "grad_norm": 0.39331334829330444, "learning_rate": 1.210157623865845e-05, "loss": 0.4252, "step": 40853 }, { "epoch": 0.8664503403957499, "grad_norm": 0.36036059260368347, "learning_rate": 1.2101250189944345e-05, "loss": 0.463, "step": 40854 }, { "epoch": 0.8664715488536828, "grad_norm": 0.4322536587715149, "learning_rate": 1.2100924138893251e-05, "loss": 0.5087, "step": 40855 }, { "epoch": 0.8664927573116159, "grad_norm": 0.3865157663822174, "learning_rate": 1.2100598085505523e-05, "loss": 0.4907, "step": 40856 }, { "epoch": 0.8665139657695489, "grad_norm": 0.3774988055229187, "learning_rate": 1.210027202978153e-05, "loss": 0.4804, "step": 40857 }, { "epoch": 0.8665351742274819, "grad_norm": 0.3819025158882141, "learning_rate": 1.2099945971721636e-05, "loss": 0.5065, "step": 40858 }, { "epoch": 0.8665563826854149, "grad_norm": 0.35146695375442505, "learning_rate": 1.20996199113262e-05, "loss": 0.4787, "step": 40859 }, { "epoch": 0.866577591143348, "grad_norm": 0.3133676052093506, "learning_rate": 1.2099293848595581e-05, "loss": 0.5305, "step": 40860 }, { "epoch": 0.866598799601281, "grad_norm": 0.35271164774894714, "learning_rate": 1.2098967783530149e-05, "loss": 0.3979, "step": 40861 }, { "epoch": 0.866620008059214, "grad_norm": 0.32303401827812195, "learning_rate": 1.209864171613026e-05, "loss": 0.5088, "step": 40862 }, { "epoch": 0.866641216517147, "grad_norm": 0.5188260078430176, "learning_rate": 1.2098315646396279e-05, "loss": 0.4065, "step": 40863 }, { "epoch": 0.8666624249750801, "grad_norm": 0.38465777039527893, "learning_rate": 1.2097989574328574e-05, "loss": 0.553, "step": 40864 }, { "epoch": 0.8666836334330131, "grad_norm": 0.3361474871635437, "learning_rate": 1.20976634999275e-05, "loss": 0.4707, "step": 40865 }, { "epoch": 0.8667048418909461, "grad_norm": 0.35327959060668945, "learning_rate": 1.2097337423193427e-05, "loss": 0.535, "step": 40866 }, { "epoch": 0.8667260503488792, "grad_norm": 0.3361063003540039, "learning_rate": 1.2097011344126713e-05, "loss": 0.4291, "step": 40867 }, { "epoch": 0.8667472588068121, "grad_norm": 0.3324006497859955, "learning_rate": 1.2096685262727717e-05, "loss": 0.5597, "step": 40868 }, { "epoch": 0.8667684672647452, "grad_norm": 0.4118121862411499, "learning_rate": 1.2096359178996806e-05, "loss": 0.5161, "step": 40869 }, { "epoch": 0.8667896757226782, "grad_norm": 0.36647146940231323, "learning_rate": 1.2096033092934347e-05, "loss": 0.4528, "step": 40870 }, { "epoch": 0.8668108841806113, "grad_norm": 0.4260915219783783, "learning_rate": 1.2095707004540694e-05, "loss": 0.5262, "step": 40871 }, { "epoch": 0.8668320926385442, "grad_norm": 0.4023553729057312, "learning_rate": 1.2095380913816218e-05, "loss": 0.4525, "step": 40872 }, { "epoch": 0.8668533010964773, "grad_norm": 0.3180348575115204, "learning_rate": 1.2095054820761276e-05, "loss": 0.4576, "step": 40873 }, { "epoch": 0.8668745095544103, "grad_norm": 0.37992310523986816, "learning_rate": 1.209472872537623e-05, "loss": 0.4581, "step": 40874 }, { "epoch": 0.8668957180123433, "grad_norm": 0.467509388923645, "learning_rate": 1.2094402627661447e-05, "loss": 0.4546, "step": 40875 }, { "epoch": 0.8669169264702763, "grad_norm": 0.390421599149704, "learning_rate": 1.2094076527617285e-05, "loss": 0.4568, "step": 40876 }, { "epoch": 0.8669381349282094, "grad_norm": 0.35644009709358215, "learning_rate": 1.209375042524411e-05, "loss": 0.4474, "step": 40877 }, { "epoch": 0.8669593433861424, "grad_norm": 0.3913170397281647, "learning_rate": 1.2093424320542286e-05, "loss": 0.4704, "step": 40878 }, { "epoch": 0.8669805518440754, "grad_norm": 0.35089755058288574, "learning_rate": 1.2093098213512173e-05, "loss": 0.5307, "step": 40879 }, { "epoch": 0.8670017603020085, "grad_norm": 0.3662430942058563, "learning_rate": 1.2092772104154136e-05, "loss": 0.4712, "step": 40880 }, { "epoch": 0.8670229687599414, "grad_norm": 0.4453442394733429, "learning_rate": 1.2092445992468534e-05, "loss": 0.4789, "step": 40881 }, { "epoch": 0.8670441772178745, "grad_norm": 0.36334067583084106, "learning_rate": 1.2092119878455731e-05, "loss": 0.4611, "step": 40882 }, { "epoch": 0.8670653856758075, "grad_norm": 0.391238272190094, "learning_rate": 1.2091793762116091e-05, "loss": 0.5123, "step": 40883 }, { "epoch": 0.8670865941337406, "grad_norm": 0.3624308407306671, "learning_rate": 1.2091467643449977e-05, "loss": 0.5264, "step": 40884 }, { "epoch": 0.8671078025916735, "grad_norm": 0.37002110481262207, "learning_rate": 1.209114152245775e-05, "loss": 0.4521, "step": 40885 }, { "epoch": 0.8671290110496066, "grad_norm": 0.3158439099788666, "learning_rate": 1.2090815399139774e-05, "loss": 0.3589, "step": 40886 }, { "epoch": 0.8671502195075396, "grad_norm": 0.33061930537223816, "learning_rate": 1.2090489273496412e-05, "loss": 0.4387, "step": 40887 }, { "epoch": 0.8671714279654726, "grad_norm": 0.35982614755630493, "learning_rate": 1.2090163145528023e-05, "loss": 0.5292, "step": 40888 }, { "epoch": 0.8671926364234056, "grad_norm": 0.37109342217445374, "learning_rate": 1.2089837015234975e-05, "loss": 0.4926, "step": 40889 }, { "epoch": 0.8672138448813387, "grad_norm": 0.3616538941860199, "learning_rate": 1.208951088261763e-05, "loss": 0.4674, "step": 40890 }, { "epoch": 0.8672350533392718, "grad_norm": 0.3496384620666504, "learning_rate": 1.2089184747676346e-05, "loss": 0.5063, "step": 40891 }, { "epoch": 0.8672562617972047, "grad_norm": 0.3890874683856964, "learning_rate": 1.2088858610411492e-05, "loss": 0.5052, "step": 40892 }, { "epoch": 0.8672774702551378, "grad_norm": 0.39136913418769836, "learning_rate": 1.2088532470823423e-05, "loss": 0.4782, "step": 40893 }, { "epoch": 0.8672986787130708, "grad_norm": 0.35141700506210327, "learning_rate": 1.2088206328912513e-05, "loss": 0.4773, "step": 40894 }, { "epoch": 0.8673198871710038, "grad_norm": 0.4361499547958374, "learning_rate": 1.2087880184679112e-05, "loss": 0.4916, "step": 40895 }, { "epoch": 0.8673410956289368, "grad_norm": 0.396330863237381, "learning_rate": 1.2087554038123593e-05, "loss": 0.422, "step": 40896 }, { "epoch": 0.8673623040868699, "grad_norm": 0.39726874232292175, "learning_rate": 1.2087227889246312e-05, "loss": 0.5905, "step": 40897 }, { "epoch": 0.8673835125448028, "grad_norm": 0.4051726162433624, "learning_rate": 1.2086901738047636e-05, "loss": 0.4557, "step": 40898 }, { "epoch": 0.8674047210027359, "grad_norm": 0.32964444160461426, "learning_rate": 1.2086575584527922e-05, "loss": 0.4099, "step": 40899 }, { "epoch": 0.8674259294606689, "grad_norm": 0.35757017135620117, "learning_rate": 1.2086249428687542e-05, "loss": 0.4541, "step": 40900 }, { "epoch": 0.867447137918602, "grad_norm": 0.5243462324142456, "learning_rate": 1.208592327052685e-05, "loss": 0.4876, "step": 40901 }, { "epoch": 0.8674683463765349, "grad_norm": 0.39530861377716064, "learning_rate": 1.2085597110046212e-05, "loss": 0.4665, "step": 40902 }, { "epoch": 0.867489554834468, "grad_norm": 0.39126133918762207, "learning_rate": 1.2085270947245995e-05, "loss": 0.4677, "step": 40903 }, { "epoch": 0.867510763292401, "grad_norm": 0.3854530453681946, "learning_rate": 1.2084944782126554e-05, "loss": 0.5069, "step": 40904 }, { "epoch": 0.867531971750334, "grad_norm": 0.3278493583202362, "learning_rate": 1.2084618614688258e-05, "loss": 0.4343, "step": 40905 }, { "epoch": 0.8675531802082671, "grad_norm": 0.38394713401794434, "learning_rate": 1.2084292444931468e-05, "loss": 0.5568, "step": 40906 }, { "epoch": 0.8675743886662001, "grad_norm": 0.37832531332969666, "learning_rate": 1.2083966272856543e-05, "loss": 0.5127, "step": 40907 }, { "epoch": 0.8675955971241331, "grad_norm": 0.38052618503570557, "learning_rate": 1.2083640098463846e-05, "loss": 0.5025, "step": 40908 }, { "epoch": 0.8676168055820661, "grad_norm": 0.38558492064476013, "learning_rate": 1.208331392175375e-05, "loss": 0.4276, "step": 40909 }, { "epoch": 0.8676380140399992, "grad_norm": 0.3948323130607605, "learning_rate": 1.2082987742726607e-05, "loss": 0.4838, "step": 40910 }, { "epoch": 0.8676592224979321, "grad_norm": 0.3618698716163635, "learning_rate": 1.2082661561382782e-05, "loss": 0.5145, "step": 40911 }, { "epoch": 0.8676804309558652, "grad_norm": 0.33090972900390625, "learning_rate": 1.208233537772264e-05, "loss": 0.4978, "step": 40912 }, { "epoch": 0.8677016394137982, "grad_norm": 0.42325401306152344, "learning_rate": 1.2082009191746541e-05, "loss": 0.5318, "step": 40913 }, { "epoch": 0.8677228478717313, "grad_norm": 0.36402177810668945, "learning_rate": 1.2081683003454851e-05, "loss": 0.4795, "step": 40914 }, { "epoch": 0.8677440563296642, "grad_norm": 0.3821500241756439, "learning_rate": 1.2081356812847933e-05, "loss": 0.4535, "step": 40915 }, { "epoch": 0.8677652647875973, "grad_norm": 0.34125059843063354, "learning_rate": 1.2081030619926147e-05, "loss": 0.5045, "step": 40916 }, { "epoch": 0.8677864732455303, "grad_norm": 0.48092687129974365, "learning_rate": 1.2080704424689855e-05, "loss": 0.4285, "step": 40917 }, { "epoch": 0.8678076817034633, "grad_norm": 0.39483824372291565, "learning_rate": 1.2080378227139424e-05, "loss": 0.5291, "step": 40918 }, { "epoch": 0.8678288901613964, "grad_norm": 0.35142385959625244, "learning_rate": 1.2080052027275213e-05, "loss": 0.5032, "step": 40919 }, { "epoch": 0.8678500986193294, "grad_norm": 0.37718716263771057, "learning_rate": 1.2079725825097586e-05, "loss": 0.5227, "step": 40920 }, { "epoch": 0.8678713070772625, "grad_norm": 0.38075265288352966, "learning_rate": 1.2079399620606906e-05, "loss": 0.5387, "step": 40921 }, { "epoch": 0.8678925155351954, "grad_norm": 0.3533449172973633, "learning_rate": 1.2079073413803535e-05, "loss": 0.4664, "step": 40922 }, { "epoch": 0.8679137239931285, "grad_norm": 0.3793134093284607, "learning_rate": 1.207874720468784e-05, "loss": 0.4422, "step": 40923 }, { "epoch": 0.8679349324510615, "grad_norm": 0.4653979241847992, "learning_rate": 1.2078420993260178e-05, "loss": 0.4795, "step": 40924 }, { "epoch": 0.8679561409089945, "grad_norm": 0.34459397196769714, "learning_rate": 1.2078094779520915e-05, "loss": 0.4247, "step": 40925 }, { "epoch": 0.8679773493669275, "grad_norm": 0.33794525265693665, "learning_rate": 1.2077768563470414e-05, "loss": 0.4749, "step": 40926 }, { "epoch": 0.8679985578248606, "grad_norm": 0.43721601366996765, "learning_rate": 1.2077442345109035e-05, "loss": 0.4911, "step": 40927 }, { "epoch": 0.8680197662827935, "grad_norm": 0.32588741183280945, "learning_rate": 1.207711612443714e-05, "loss": 0.4423, "step": 40928 }, { "epoch": 0.8680409747407266, "grad_norm": 0.3546043634414673, "learning_rate": 1.20767899014551e-05, "loss": 0.498, "step": 40929 }, { "epoch": 0.8680621831986596, "grad_norm": 0.3717157542705536, "learning_rate": 1.2076463676163268e-05, "loss": 0.5861, "step": 40930 }, { "epoch": 0.8680833916565927, "grad_norm": 0.3569621443748474, "learning_rate": 1.2076137448562014e-05, "loss": 0.4773, "step": 40931 }, { "epoch": 0.8681046001145257, "grad_norm": 0.3790479302406311, "learning_rate": 1.2075811218651698e-05, "loss": 0.439, "step": 40932 }, { "epoch": 0.8681258085724587, "grad_norm": 0.37345853447914124, "learning_rate": 1.2075484986432677e-05, "loss": 0.5251, "step": 40933 }, { "epoch": 0.8681470170303918, "grad_norm": 0.40614259243011475, "learning_rate": 1.2075158751905327e-05, "loss": 0.5492, "step": 40934 }, { "epoch": 0.8681682254883247, "grad_norm": 0.4394623339176178, "learning_rate": 1.207483251507e-05, "loss": 0.4711, "step": 40935 }, { "epoch": 0.8681894339462578, "grad_norm": 0.38684922456741333, "learning_rate": 1.2074506275927063e-05, "loss": 0.4727, "step": 40936 }, { "epoch": 0.8682106424041908, "grad_norm": 1.6879152059555054, "learning_rate": 1.207418003447688e-05, "loss": 0.4958, "step": 40937 }, { "epoch": 0.8682318508621238, "grad_norm": 0.3821844160556793, "learning_rate": 1.2073853790719811e-05, "loss": 0.5192, "step": 40938 }, { "epoch": 0.8682530593200568, "grad_norm": 0.3316684067249298, "learning_rate": 1.2073527544656218e-05, "loss": 0.4089, "step": 40939 }, { "epoch": 0.8682742677779899, "grad_norm": 0.3529486656188965, "learning_rate": 1.2073201296286466e-05, "loss": 0.4473, "step": 40940 }, { "epoch": 0.8682954762359228, "grad_norm": 0.37447914481163025, "learning_rate": 1.207287504561092e-05, "loss": 0.4226, "step": 40941 }, { "epoch": 0.8683166846938559, "grad_norm": 0.3569074273109436, "learning_rate": 1.2072548792629937e-05, "loss": 0.4706, "step": 40942 }, { "epoch": 0.8683378931517889, "grad_norm": 0.3735664486885071, "learning_rate": 1.2072222537343884e-05, "loss": 0.4431, "step": 40943 }, { "epoch": 0.868359101609722, "grad_norm": 0.5977345108985901, "learning_rate": 1.2071896279753123e-05, "loss": 0.4919, "step": 40944 }, { "epoch": 0.8683803100676549, "grad_norm": 0.3580647110939026, "learning_rate": 1.2071570019858021e-05, "loss": 0.4887, "step": 40945 }, { "epoch": 0.868401518525588, "grad_norm": 0.34704625606536865, "learning_rate": 1.2071243757658933e-05, "loss": 0.4891, "step": 40946 }, { "epoch": 0.8684227269835211, "grad_norm": 0.37225666642189026, "learning_rate": 1.2070917493156224e-05, "loss": 0.452, "step": 40947 }, { "epoch": 0.868443935441454, "grad_norm": 0.37711265683174133, "learning_rate": 1.2070591226350263e-05, "loss": 0.496, "step": 40948 }, { "epoch": 0.8684651438993871, "grad_norm": 0.37940260767936707, "learning_rate": 1.2070264957241407e-05, "loss": 0.5557, "step": 40949 }, { "epoch": 0.8684863523573201, "grad_norm": 0.3373527228832245, "learning_rate": 1.2069938685830018e-05, "loss": 0.4617, "step": 40950 }, { "epoch": 0.8685075608152532, "grad_norm": 0.39489954710006714, "learning_rate": 1.2069612412116465e-05, "loss": 0.5076, "step": 40951 }, { "epoch": 0.8685287692731861, "grad_norm": 0.35436293482780457, "learning_rate": 1.2069286136101106e-05, "loss": 0.5062, "step": 40952 }, { "epoch": 0.8685499777311192, "grad_norm": 0.36126187443733215, "learning_rate": 1.2068959857784301e-05, "loss": 0.5504, "step": 40953 }, { "epoch": 0.8685711861890522, "grad_norm": 0.3624267876148224, "learning_rate": 1.206863357716642e-05, "loss": 0.4491, "step": 40954 }, { "epoch": 0.8685923946469852, "grad_norm": 0.34872040152549744, "learning_rate": 1.2068307294247825e-05, "loss": 0.4566, "step": 40955 }, { "epoch": 0.8686136031049182, "grad_norm": 0.4442873001098633, "learning_rate": 1.2067981009028873e-05, "loss": 0.4951, "step": 40956 }, { "epoch": 0.8686348115628513, "grad_norm": 0.3630783259868622, "learning_rate": 1.206765472150993e-05, "loss": 0.5496, "step": 40957 }, { "epoch": 0.8686560200207842, "grad_norm": 0.38040515780448914, "learning_rate": 1.206732843169136e-05, "loss": 0.5091, "step": 40958 }, { "epoch": 0.8686772284787173, "grad_norm": 0.4114920198917389, "learning_rate": 1.2067002139573527e-05, "loss": 0.506, "step": 40959 }, { "epoch": 0.8686984369366504, "grad_norm": 0.3905394971370697, "learning_rate": 1.2066675845156792e-05, "loss": 0.5093, "step": 40960 }, { "epoch": 0.8687196453945834, "grad_norm": 0.38074493408203125, "learning_rate": 1.2066349548441517e-05, "loss": 0.5612, "step": 40961 }, { "epoch": 0.8687408538525164, "grad_norm": 0.3639032244682312, "learning_rate": 1.2066023249428068e-05, "loss": 0.5287, "step": 40962 }, { "epoch": 0.8687620623104494, "grad_norm": 0.37604203820228577, "learning_rate": 1.2065696948116805e-05, "loss": 0.5413, "step": 40963 }, { "epoch": 0.8687832707683825, "grad_norm": 0.4135354161262512, "learning_rate": 1.206537064450809e-05, "loss": 0.4689, "step": 40964 }, { "epoch": 0.8688044792263154, "grad_norm": 0.3929368853569031, "learning_rate": 1.2065044338602287e-05, "loss": 0.4994, "step": 40965 }, { "epoch": 0.8688256876842485, "grad_norm": 0.34348264336586, "learning_rate": 1.2064718030399763e-05, "loss": 0.5458, "step": 40966 }, { "epoch": 0.8688468961421815, "grad_norm": 0.35039666295051575, "learning_rate": 1.2064391719900874e-05, "loss": 0.481, "step": 40967 }, { "epoch": 0.8688681046001145, "grad_norm": 0.4031965732574463, "learning_rate": 1.2064065407105989e-05, "loss": 0.4984, "step": 40968 }, { "epoch": 0.8688893130580475, "grad_norm": 0.39460110664367676, "learning_rate": 1.206373909201547e-05, "loss": 0.368, "step": 40969 }, { "epoch": 0.8689105215159806, "grad_norm": 0.36149272322654724, "learning_rate": 1.2063412774629675e-05, "loss": 0.4867, "step": 40970 }, { "epoch": 0.8689317299739135, "grad_norm": 0.4687455892562866, "learning_rate": 1.2063086454948972e-05, "loss": 0.3858, "step": 40971 }, { "epoch": 0.8689529384318466, "grad_norm": 0.3801109790802002, "learning_rate": 1.2062760132973722e-05, "loss": 0.5504, "step": 40972 }, { "epoch": 0.8689741468897797, "grad_norm": 0.4081627428531647, "learning_rate": 1.2062433808704286e-05, "loss": 0.5119, "step": 40973 }, { "epoch": 0.8689953553477127, "grad_norm": 0.3746793866157532, "learning_rate": 1.2062107482141032e-05, "loss": 0.4831, "step": 40974 }, { "epoch": 0.8690165638056457, "grad_norm": 0.33655181527137756, "learning_rate": 1.2061781153284318e-05, "loss": 0.3741, "step": 40975 }, { "epoch": 0.8690377722635787, "grad_norm": 0.42710885405540466, "learning_rate": 1.2061454822134509e-05, "loss": 0.517, "step": 40976 }, { "epoch": 0.8690589807215118, "grad_norm": 0.37790730595588684, "learning_rate": 1.206112848869197e-05, "loss": 0.4496, "step": 40977 }, { "epoch": 0.8690801891794447, "grad_norm": 0.36747968196868896, "learning_rate": 1.2060802152957059e-05, "loss": 0.4217, "step": 40978 }, { "epoch": 0.8691013976373778, "grad_norm": 0.4080619215965271, "learning_rate": 1.2060475814930142e-05, "loss": 0.4441, "step": 40979 }, { "epoch": 0.8691226060953108, "grad_norm": 0.34186697006225586, "learning_rate": 1.2060149474611584e-05, "loss": 0.4324, "step": 40980 }, { "epoch": 0.8691438145532439, "grad_norm": 0.34456056356430054, "learning_rate": 1.2059823132001742e-05, "loss": 0.437, "step": 40981 }, { "epoch": 0.8691650230111768, "grad_norm": 0.3790833652019501, "learning_rate": 1.2059496787100986e-05, "loss": 0.4999, "step": 40982 }, { "epoch": 0.8691862314691099, "grad_norm": 0.4395608603954315, "learning_rate": 1.2059170439909678e-05, "loss": 0.5217, "step": 40983 }, { "epoch": 0.8692074399270429, "grad_norm": 0.3337290585041046, "learning_rate": 1.2058844090428173e-05, "loss": 0.406, "step": 40984 }, { "epoch": 0.8692286483849759, "grad_norm": 0.4212389886379242, "learning_rate": 1.2058517738656841e-05, "loss": 0.5445, "step": 40985 }, { "epoch": 0.8692498568429089, "grad_norm": 0.362915962934494, "learning_rate": 1.2058191384596043e-05, "loss": 0.4945, "step": 40986 }, { "epoch": 0.869271065300842, "grad_norm": 0.34912389516830444, "learning_rate": 1.2057865028246143e-05, "loss": 0.5121, "step": 40987 }, { "epoch": 0.869292273758775, "grad_norm": 0.35093122720718384, "learning_rate": 1.2057538669607505e-05, "loss": 0.4952, "step": 40988 }, { "epoch": 0.869313482216708, "grad_norm": 0.4284331202507019, "learning_rate": 1.2057212308680488e-05, "loss": 0.5765, "step": 40989 }, { "epoch": 0.8693346906746411, "grad_norm": 0.4375518262386322, "learning_rate": 1.2056885945465459e-05, "loss": 0.4512, "step": 40990 }, { "epoch": 0.869355899132574, "grad_norm": 0.43715032935142517, "learning_rate": 1.2056559579962778e-05, "loss": 0.4425, "step": 40991 }, { "epoch": 0.8693771075905071, "grad_norm": 0.3725917637348175, "learning_rate": 1.2056233212172812e-05, "loss": 0.5464, "step": 40992 }, { "epoch": 0.8693983160484401, "grad_norm": 0.4160575270652771, "learning_rate": 1.2055906842095916e-05, "loss": 0.5168, "step": 40993 }, { "epoch": 0.8694195245063732, "grad_norm": 0.34383487701416016, "learning_rate": 1.2055580469732464e-05, "loss": 0.6248, "step": 40994 }, { "epoch": 0.8694407329643061, "grad_norm": 0.39961835741996765, "learning_rate": 1.2055254095082811e-05, "loss": 0.561, "step": 40995 }, { "epoch": 0.8694619414222392, "grad_norm": 0.43177834153175354, "learning_rate": 1.2054927718147322e-05, "loss": 0.4095, "step": 40996 }, { "epoch": 0.8694831498801722, "grad_norm": 0.41145291924476624, "learning_rate": 1.2054601338926359e-05, "loss": 0.5186, "step": 40997 }, { "epoch": 0.8695043583381052, "grad_norm": 0.38501718640327454, "learning_rate": 1.2054274957420286e-05, "loss": 0.3744, "step": 40998 }, { "epoch": 0.8695255667960382, "grad_norm": 0.35425421595573425, "learning_rate": 1.205394857362947e-05, "loss": 0.5428, "step": 40999 }, { "epoch": 0.8695467752539713, "grad_norm": 0.390112966299057, "learning_rate": 1.205362218755427e-05, "loss": 0.5636, "step": 41000 }, { "epoch": 0.8695679837119044, "grad_norm": 0.33257943391799927, "learning_rate": 1.2053295799195045e-05, "loss": 0.4641, "step": 41001 }, { "epoch": 0.8695891921698373, "grad_norm": 0.3548925817012787, "learning_rate": 1.2052969408552166e-05, "loss": 0.4291, "step": 41002 }, { "epoch": 0.8696104006277704, "grad_norm": 0.35109177231788635, "learning_rate": 1.205264301562599e-05, "loss": 0.5266, "step": 41003 }, { "epoch": 0.8696316090857034, "grad_norm": 0.34668681025505066, "learning_rate": 1.2052316620416882e-05, "loss": 0.4886, "step": 41004 }, { "epoch": 0.8696528175436364, "grad_norm": 0.3779065012931824, "learning_rate": 1.2051990222925208e-05, "loss": 0.5072, "step": 41005 }, { "epoch": 0.8696740260015694, "grad_norm": 0.39046093821525574, "learning_rate": 1.2051663823151327e-05, "loss": 0.4308, "step": 41006 }, { "epoch": 0.8696952344595025, "grad_norm": 0.34747377038002014, "learning_rate": 1.2051337421095603e-05, "loss": 0.5016, "step": 41007 }, { "epoch": 0.8697164429174354, "grad_norm": 0.3922111690044403, "learning_rate": 1.20510110167584e-05, "loss": 0.5493, "step": 41008 }, { "epoch": 0.8697376513753685, "grad_norm": 0.3663003742694855, "learning_rate": 1.2050684610140078e-05, "loss": 0.5452, "step": 41009 }, { "epoch": 0.8697588598333015, "grad_norm": 0.36598122119903564, "learning_rate": 1.2050358201241004e-05, "loss": 0.549, "step": 41010 }, { "epoch": 0.8697800682912346, "grad_norm": 0.3693360984325409, "learning_rate": 1.205003179006154e-05, "loss": 0.5229, "step": 41011 }, { "epoch": 0.8698012767491675, "grad_norm": 0.3371370732784271, "learning_rate": 1.2049705376602047e-05, "loss": 0.3946, "step": 41012 }, { "epoch": 0.8698224852071006, "grad_norm": 0.3595833480358124, "learning_rate": 1.2049378960862891e-05, "loss": 0.4833, "step": 41013 }, { "epoch": 0.8698436936650337, "grad_norm": 0.341930091381073, "learning_rate": 1.2049052542844435e-05, "loss": 0.5181, "step": 41014 }, { "epoch": 0.8698649021229666, "grad_norm": 0.340366393327713, "learning_rate": 1.204872612254704e-05, "loss": 0.5194, "step": 41015 }, { "epoch": 0.8698861105808997, "grad_norm": 0.3598606288433075, "learning_rate": 1.2048399699971067e-05, "loss": 0.5338, "step": 41016 }, { "epoch": 0.8699073190388327, "grad_norm": 0.4814733862876892, "learning_rate": 1.2048073275116883e-05, "loss": 0.5006, "step": 41017 }, { "epoch": 0.8699285274967657, "grad_norm": 0.3536584973335266, "learning_rate": 1.2047746847984847e-05, "loss": 0.4117, "step": 41018 }, { "epoch": 0.8699497359546987, "grad_norm": 0.3595851957798004, "learning_rate": 1.2047420418575328e-05, "loss": 0.4042, "step": 41019 }, { "epoch": 0.8699709444126318, "grad_norm": 0.44093722105026245, "learning_rate": 1.2047093986888688e-05, "loss": 0.4331, "step": 41020 }, { "epoch": 0.8699921528705647, "grad_norm": 0.3931862413883209, "learning_rate": 1.2046767552925282e-05, "loss": 0.442, "step": 41021 }, { "epoch": 0.8700133613284978, "grad_norm": 0.3850446045398712, "learning_rate": 1.2046441116685483e-05, "loss": 0.4665, "step": 41022 }, { "epoch": 0.8700345697864308, "grad_norm": 0.3718140423297882, "learning_rate": 1.2046114678169646e-05, "loss": 0.516, "step": 41023 }, { "epoch": 0.8700557782443639, "grad_norm": 0.4659450650215149, "learning_rate": 1.2045788237378142e-05, "loss": 0.4491, "step": 41024 }, { "epoch": 0.8700769867022968, "grad_norm": 0.3632528781890869, "learning_rate": 1.204546179431133e-05, "loss": 0.5556, "step": 41025 }, { "epoch": 0.8700981951602299, "grad_norm": 0.32993221282958984, "learning_rate": 1.204513534896957e-05, "loss": 0.5038, "step": 41026 }, { "epoch": 0.870119403618163, "grad_norm": 0.35619181394577026, "learning_rate": 1.2044808901353232e-05, "loss": 0.4491, "step": 41027 }, { "epoch": 0.8701406120760959, "grad_norm": 0.3533383905887604, "learning_rate": 1.2044482451462676e-05, "loss": 0.4503, "step": 41028 }, { "epoch": 0.870161820534029, "grad_norm": 0.3787629008293152, "learning_rate": 1.2044155999298257e-05, "loss": 0.4562, "step": 41029 }, { "epoch": 0.870183028991962, "grad_norm": 0.3710213899612427, "learning_rate": 1.2043829544860353e-05, "loss": 0.4145, "step": 41030 }, { "epoch": 0.8702042374498951, "grad_norm": 0.6030250191688538, "learning_rate": 1.2043503088149317e-05, "loss": 0.4315, "step": 41031 }, { "epoch": 0.870225445907828, "grad_norm": 0.3552298843860626, "learning_rate": 1.2043176629165514e-05, "loss": 0.4455, "step": 41032 }, { "epoch": 0.8702466543657611, "grad_norm": 0.341293603181839, "learning_rate": 1.204285016790931e-05, "loss": 0.4741, "step": 41033 }, { "epoch": 0.8702678628236941, "grad_norm": 0.3979978859424591, "learning_rate": 1.2042523704381064e-05, "loss": 0.6031, "step": 41034 }, { "epoch": 0.8702890712816271, "grad_norm": 0.5050206184387207, "learning_rate": 1.204219723858114e-05, "loss": 0.5083, "step": 41035 }, { "epoch": 0.8703102797395601, "grad_norm": 0.40467992424964905, "learning_rate": 1.2041870770509902e-05, "loss": 0.4494, "step": 41036 }, { "epoch": 0.8703314881974932, "grad_norm": 0.356799840927124, "learning_rate": 1.2041544300167716e-05, "loss": 0.4957, "step": 41037 }, { "epoch": 0.8703526966554261, "grad_norm": 0.49858224391937256, "learning_rate": 1.2041217827554939e-05, "loss": 0.5012, "step": 41038 }, { "epoch": 0.8703739051133592, "grad_norm": 0.4737342596054077, "learning_rate": 1.204089135267194e-05, "loss": 0.4467, "step": 41039 }, { "epoch": 0.8703951135712922, "grad_norm": 0.5563136339187622, "learning_rate": 1.2040564875519076e-05, "loss": 0.5121, "step": 41040 }, { "epoch": 0.8704163220292253, "grad_norm": 0.34002333879470825, "learning_rate": 1.2040238396096715e-05, "loss": 0.4518, "step": 41041 }, { "epoch": 0.8704375304871583, "grad_norm": 0.42839816212654114, "learning_rate": 1.2039911914405218e-05, "loss": 0.4749, "step": 41042 }, { "epoch": 0.8704587389450913, "grad_norm": 0.3844485878944397, "learning_rate": 1.2039585430444949e-05, "loss": 0.5691, "step": 41043 }, { "epoch": 0.8704799474030244, "grad_norm": 0.3380524814128876, "learning_rate": 1.2039258944216271e-05, "loss": 0.4789, "step": 41044 }, { "epoch": 0.8705011558609573, "grad_norm": 0.4338975250720978, "learning_rate": 1.203893245571955e-05, "loss": 0.491, "step": 41045 }, { "epoch": 0.8705223643188904, "grad_norm": 0.34909194707870483, "learning_rate": 1.2038605964955142e-05, "loss": 0.4549, "step": 41046 }, { "epoch": 0.8705435727768234, "grad_norm": 0.3573474586009979, "learning_rate": 1.2038279471923418e-05, "loss": 0.4068, "step": 41047 }, { "epoch": 0.8705647812347564, "grad_norm": 0.5216096639633179, "learning_rate": 1.2037952976624734e-05, "loss": 0.4974, "step": 41048 }, { "epoch": 0.8705859896926894, "grad_norm": 0.4986151158809662, "learning_rate": 1.2037626479059454e-05, "loss": 0.4475, "step": 41049 }, { "epoch": 0.8706071981506225, "grad_norm": 0.40731751918792725, "learning_rate": 1.2037299979227948e-05, "loss": 0.4933, "step": 41050 }, { "epoch": 0.8706284066085554, "grad_norm": 0.8813435435295105, "learning_rate": 1.2036973477130574e-05, "loss": 0.5179, "step": 41051 }, { "epoch": 0.8706496150664885, "grad_norm": 0.34768158197402954, "learning_rate": 1.2036646972767694e-05, "loss": 0.4611, "step": 41052 }, { "epoch": 0.8706708235244215, "grad_norm": 0.47324493527412415, "learning_rate": 1.2036320466139675e-05, "loss": 0.49, "step": 41053 }, { "epoch": 0.8706920319823546, "grad_norm": 0.3631625175476074, "learning_rate": 1.2035993957246876e-05, "loss": 0.4579, "step": 41054 }, { "epoch": 0.8707132404402876, "grad_norm": 0.33991536498069763, "learning_rate": 1.2035667446089663e-05, "loss": 0.4531, "step": 41055 }, { "epoch": 0.8707344488982206, "grad_norm": 0.4761614501476288, "learning_rate": 1.2035340932668401e-05, "loss": 0.5251, "step": 41056 }, { "epoch": 0.8707556573561537, "grad_norm": 0.4827134609222412, "learning_rate": 1.2035014416983447e-05, "loss": 0.5742, "step": 41057 }, { "epoch": 0.8707768658140866, "grad_norm": 0.37357231974601746, "learning_rate": 1.203468789903517e-05, "loss": 0.5598, "step": 41058 }, { "epoch": 0.8707980742720197, "grad_norm": 0.38432061672210693, "learning_rate": 1.2034361378823928e-05, "loss": 0.5719, "step": 41059 }, { "epoch": 0.8708192827299527, "grad_norm": 0.36675381660461426, "learning_rate": 1.203403485635009e-05, "loss": 0.4648, "step": 41060 }, { "epoch": 0.8708404911878858, "grad_norm": 0.3655664622783661, "learning_rate": 1.2033708331614014e-05, "loss": 0.4788, "step": 41061 }, { "epoch": 0.8708616996458187, "grad_norm": 0.3400990962982178, "learning_rate": 1.2033381804616067e-05, "loss": 0.4188, "step": 41062 }, { "epoch": 0.8708829081037518, "grad_norm": 0.3849000632762909, "learning_rate": 1.2033055275356606e-05, "loss": 0.4694, "step": 41063 }, { "epoch": 0.8709041165616848, "grad_norm": 0.44095081090927124, "learning_rate": 1.2032728743836004e-05, "loss": 0.4852, "step": 41064 }, { "epoch": 0.8709253250196178, "grad_norm": 0.37956100702285767, "learning_rate": 1.2032402210054617e-05, "loss": 0.5547, "step": 41065 }, { "epoch": 0.8709465334775508, "grad_norm": 0.4226757287979126, "learning_rate": 1.2032075674012808e-05, "loss": 0.4654, "step": 41066 }, { "epoch": 0.8709677419354839, "grad_norm": 0.379860520362854, "learning_rate": 1.2031749135710946e-05, "loss": 0.5586, "step": 41067 }, { "epoch": 0.870988950393417, "grad_norm": 0.357745498418808, "learning_rate": 1.2031422595149387e-05, "loss": 0.4376, "step": 41068 }, { "epoch": 0.8710101588513499, "grad_norm": 0.3461076319217682, "learning_rate": 1.2031096052328498e-05, "loss": 0.5693, "step": 41069 }, { "epoch": 0.871031367309283, "grad_norm": 0.40288305282592773, "learning_rate": 1.203076950724864e-05, "loss": 0.5325, "step": 41070 }, { "epoch": 0.871052575767216, "grad_norm": 0.40500202775001526, "learning_rate": 1.203044295991018e-05, "loss": 0.5373, "step": 41071 }, { "epoch": 0.871073784225149, "grad_norm": 0.39550408720970154, "learning_rate": 1.2030116410313482e-05, "loss": 0.4996, "step": 41072 }, { "epoch": 0.871094992683082, "grad_norm": 0.3992803394794464, "learning_rate": 1.2029789858458903e-05, "loss": 0.4546, "step": 41073 }, { "epoch": 0.8711162011410151, "grad_norm": 0.36588263511657715, "learning_rate": 1.202946330434681e-05, "loss": 0.5149, "step": 41074 }, { "epoch": 0.871137409598948, "grad_norm": 0.3701937198638916, "learning_rate": 1.2029136747977562e-05, "loss": 0.495, "step": 41075 }, { "epoch": 0.8711586180568811, "grad_norm": 0.39311307668685913, "learning_rate": 1.202881018935153e-05, "loss": 0.5689, "step": 41076 }, { "epoch": 0.8711798265148141, "grad_norm": 0.5000174641609192, "learning_rate": 1.202848362846907e-05, "loss": 0.4753, "step": 41077 }, { "epoch": 0.8712010349727471, "grad_norm": 0.3689098060131073, "learning_rate": 1.202815706533055e-05, "loss": 0.4762, "step": 41078 }, { "epoch": 0.8712222434306801, "grad_norm": 0.6274818778038025, "learning_rate": 1.2027830499936332e-05, "loss": 0.4598, "step": 41079 }, { "epoch": 0.8712434518886132, "grad_norm": 0.3686554729938507, "learning_rate": 1.2027503932286777e-05, "loss": 0.5128, "step": 41080 }, { "epoch": 0.8712646603465461, "grad_norm": 0.43283841013908386, "learning_rate": 1.202717736238225e-05, "loss": 0.5002, "step": 41081 }, { "epoch": 0.8712858688044792, "grad_norm": 0.4109060764312744, "learning_rate": 1.2026850790223115e-05, "loss": 0.4819, "step": 41082 }, { "epoch": 0.8713070772624123, "grad_norm": 0.3514653742313385, "learning_rate": 1.2026524215809732e-05, "loss": 0.4739, "step": 41083 }, { "epoch": 0.8713282857203453, "grad_norm": 0.3438173532485962, "learning_rate": 1.2026197639142466e-05, "loss": 0.4539, "step": 41084 }, { "epoch": 0.8713494941782783, "grad_norm": 0.41081249713897705, "learning_rate": 1.2025871060221683e-05, "loss": 0.4967, "step": 41085 }, { "epoch": 0.8713707026362113, "grad_norm": 0.3615584373474121, "learning_rate": 1.2025544479047744e-05, "loss": 0.5871, "step": 41086 }, { "epoch": 0.8713919110941444, "grad_norm": 0.3494603633880615, "learning_rate": 1.2025217895621011e-05, "loss": 0.4435, "step": 41087 }, { "epoch": 0.8714131195520773, "grad_norm": 0.3450026512145996, "learning_rate": 1.2024891309941849e-05, "loss": 0.4711, "step": 41088 }, { "epoch": 0.8714343280100104, "grad_norm": 0.4050140678882599, "learning_rate": 1.2024564722010618e-05, "loss": 0.5316, "step": 41089 }, { "epoch": 0.8714555364679434, "grad_norm": 0.34592363238334656, "learning_rate": 1.2024238131827688e-05, "loss": 0.4329, "step": 41090 }, { "epoch": 0.8714767449258765, "grad_norm": 0.3565787374973297, "learning_rate": 1.2023911539393414e-05, "loss": 0.4361, "step": 41091 }, { "epoch": 0.8714979533838094, "grad_norm": 0.3489149510860443, "learning_rate": 1.2023584944708166e-05, "loss": 0.503, "step": 41092 }, { "epoch": 0.8715191618417425, "grad_norm": 0.32790809869766235, "learning_rate": 1.2023258347772305e-05, "loss": 0.4404, "step": 41093 }, { "epoch": 0.8715403702996755, "grad_norm": 0.3994956612586975, "learning_rate": 1.2022931748586187e-05, "loss": 0.4166, "step": 41094 }, { "epoch": 0.8715615787576085, "grad_norm": 0.31817153096199036, "learning_rate": 1.2022605147150188e-05, "loss": 0.4245, "step": 41095 }, { "epoch": 0.8715827872155416, "grad_norm": 0.40461647510528564, "learning_rate": 1.2022278543464665e-05, "loss": 0.6341, "step": 41096 }, { "epoch": 0.8716039956734746, "grad_norm": 0.37106823921203613, "learning_rate": 1.2021951937529978e-05, "loss": 0.5176, "step": 41097 }, { "epoch": 0.8716252041314076, "grad_norm": 0.40946269035339355, "learning_rate": 1.2021625329346498e-05, "loss": 0.5662, "step": 41098 }, { "epoch": 0.8716464125893406, "grad_norm": 0.47019273042678833, "learning_rate": 1.202129871891458e-05, "loss": 0.4645, "step": 41099 }, { "epoch": 0.8716676210472737, "grad_norm": 0.3535029888153076, "learning_rate": 1.2020972106234592e-05, "loss": 0.4972, "step": 41100 }, { "epoch": 0.8716888295052067, "grad_norm": 0.38091179728507996, "learning_rate": 1.2020645491306897e-05, "loss": 0.4988, "step": 41101 }, { "epoch": 0.8717100379631397, "grad_norm": 0.3777588903903961, "learning_rate": 1.202031887413186e-05, "loss": 0.4977, "step": 41102 }, { "epoch": 0.8717312464210727, "grad_norm": 0.35359466075897217, "learning_rate": 1.2019992254709838e-05, "loss": 0.4608, "step": 41103 }, { "epoch": 0.8717524548790058, "grad_norm": 0.36201462149620056, "learning_rate": 1.20196656330412e-05, "loss": 0.4843, "step": 41104 }, { "epoch": 0.8717736633369387, "grad_norm": 0.3978627920150757, "learning_rate": 1.2019339009126308e-05, "loss": 0.503, "step": 41105 }, { "epoch": 0.8717948717948718, "grad_norm": 0.40904054045677185, "learning_rate": 1.2019012382965523e-05, "loss": 0.5451, "step": 41106 }, { "epoch": 0.8718160802528048, "grad_norm": 0.39798226952552795, "learning_rate": 1.2018685754559212e-05, "loss": 0.531, "step": 41107 }, { "epoch": 0.8718372887107378, "grad_norm": 0.44207435846328735, "learning_rate": 1.2018359123907733e-05, "loss": 0.5474, "step": 41108 }, { "epoch": 0.8718584971686709, "grad_norm": 0.3812820315361023, "learning_rate": 1.2018032491011455e-05, "loss": 0.4693, "step": 41109 }, { "epoch": 0.8718797056266039, "grad_norm": 0.798285186290741, "learning_rate": 1.201770585587074e-05, "loss": 0.5074, "step": 41110 }, { "epoch": 0.871900914084537, "grad_norm": 0.33026376366615295, "learning_rate": 1.2017379218485947e-05, "loss": 0.4573, "step": 41111 }, { "epoch": 0.8719221225424699, "grad_norm": 0.34826353192329407, "learning_rate": 1.2017052578857445e-05, "loss": 0.4828, "step": 41112 }, { "epoch": 0.871943331000403, "grad_norm": 0.39600881934165955, "learning_rate": 1.2016725936985595e-05, "loss": 0.5057, "step": 41113 }, { "epoch": 0.871964539458336, "grad_norm": 0.38748645782470703, "learning_rate": 1.2016399292870755e-05, "loss": 0.4774, "step": 41114 }, { "epoch": 0.871985747916269, "grad_norm": 0.4372888505458832, "learning_rate": 1.2016072646513298e-05, "loss": 0.5388, "step": 41115 }, { "epoch": 0.872006956374202, "grad_norm": 0.4186387360095978, "learning_rate": 1.2015745997913582e-05, "loss": 0.5241, "step": 41116 }, { "epoch": 0.8720281648321351, "grad_norm": 0.3900967836380005, "learning_rate": 1.2015419347071967e-05, "loss": 0.4832, "step": 41117 }, { "epoch": 0.872049373290068, "grad_norm": 0.3372742533683777, "learning_rate": 1.2015092693988826e-05, "loss": 0.4018, "step": 41118 }, { "epoch": 0.8720705817480011, "grad_norm": 0.357137531042099, "learning_rate": 1.2014766038664512e-05, "loss": 0.5013, "step": 41119 }, { "epoch": 0.8720917902059341, "grad_norm": 0.35659438371658325, "learning_rate": 1.2014439381099394e-05, "loss": 0.4652, "step": 41120 }, { "epoch": 0.8721129986638672, "grad_norm": 0.40631821751594543, "learning_rate": 1.2014112721293835e-05, "loss": 0.4681, "step": 41121 }, { "epoch": 0.8721342071218001, "grad_norm": 0.3199087381362915, "learning_rate": 1.2013786059248196e-05, "loss": 0.4611, "step": 41122 }, { "epoch": 0.8721554155797332, "grad_norm": 0.3326854109764099, "learning_rate": 1.2013459394962843e-05, "loss": 0.4053, "step": 41123 }, { "epoch": 0.8721766240376663, "grad_norm": 0.37203559279441833, "learning_rate": 1.2013132728438137e-05, "loss": 0.4806, "step": 41124 }, { "epoch": 0.8721978324955992, "grad_norm": 0.3458585739135742, "learning_rate": 1.2012806059674443e-05, "loss": 0.525, "step": 41125 }, { "epoch": 0.8722190409535323, "grad_norm": 0.41424787044525146, "learning_rate": 1.201247938867212e-05, "loss": 0.5388, "step": 41126 }, { "epoch": 0.8722402494114653, "grad_norm": 0.38396546244621277, "learning_rate": 1.201215271543154e-05, "loss": 0.5169, "step": 41127 }, { "epoch": 0.8722614578693983, "grad_norm": 0.43822959065437317, "learning_rate": 1.2011826039953059e-05, "loss": 0.5374, "step": 41128 }, { "epoch": 0.8722826663273313, "grad_norm": 0.3420279324054718, "learning_rate": 1.2011499362237042e-05, "loss": 0.4835, "step": 41129 }, { "epoch": 0.8723038747852644, "grad_norm": 0.3663565516471863, "learning_rate": 1.2011172682283856e-05, "loss": 0.4827, "step": 41130 }, { "epoch": 0.8723250832431974, "grad_norm": 0.39055538177490234, "learning_rate": 1.2010846000093856e-05, "loss": 0.5442, "step": 41131 }, { "epoch": 0.8723462917011304, "grad_norm": 0.3577142655849457, "learning_rate": 1.2010519315667412e-05, "loss": 0.4831, "step": 41132 }, { "epoch": 0.8723675001590634, "grad_norm": 0.35515663027763367, "learning_rate": 1.2010192629004892e-05, "loss": 0.4297, "step": 41133 }, { "epoch": 0.8723887086169965, "grad_norm": 0.3823680877685547, "learning_rate": 1.2009865940106646e-05, "loss": 0.4176, "step": 41134 }, { "epoch": 0.8724099170749294, "grad_norm": 0.3448534905910492, "learning_rate": 1.2009539248973049e-05, "loss": 0.4394, "step": 41135 }, { "epoch": 0.8724311255328625, "grad_norm": 1.6874264478683472, "learning_rate": 1.2009212555604455e-05, "loss": 0.4734, "step": 41136 }, { "epoch": 0.8724523339907956, "grad_norm": 0.334127277135849, "learning_rate": 1.2008885860001235e-05, "loss": 0.4875, "step": 41137 }, { "epoch": 0.8724735424487285, "grad_norm": 0.35264310240745544, "learning_rate": 1.2008559162163753e-05, "loss": 0.483, "step": 41138 }, { "epoch": 0.8724947509066616, "grad_norm": 0.37200409173965454, "learning_rate": 1.2008232462092362e-05, "loss": 0.4433, "step": 41139 }, { "epoch": 0.8725159593645946, "grad_norm": 0.36283499002456665, "learning_rate": 1.2007905759787437e-05, "loss": 0.4495, "step": 41140 }, { "epoch": 0.8725371678225277, "grad_norm": 0.4115943908691406, "learning_rate": 1.2007579055249336e-05, "loss": 0.4995, "step": 41141 }, { "epoch": 0.8725583762804606, "grad_norm": 0.3702928125858307, "learning_rate": 1.2007252348478423e-05, "loss": 0.4811, "step": 41142 }, { "epoch": 0.8725795847383937, "grad_norm": 0.409598708152771, "learning_rate": 1.2006925639475062e-05, "loss": 0.4193, "step": 41143 }, { "epoch": 0.8726007931963267, "grad_norm": 0.38659870624542236, "learning_rate": 1.2006598928239614e-05, "loss": 0.4792, "step": 41144 }, { "epoch": 0.8726220016542597, "grad_norm": 0.37125205993652344, "learning_rate": 1.2006272214772443e-05, "loss": 0.4167, "step": 41145 }, { "epoch": 0.8726432101121927, "grad_norm": 0.3626473844051361, "learning_rate": 1.2005945499073916e-05, "loss": 0.551, "step": 41146 }, { "epoch": 0.8726644185701258, "grad_norm": 0.5271266102790833, "learning_rate": 1.2005618781144396e-05, "loss": 0.4837, "step": 41147 }, { "epoch": 0.8726856270280587, "grad_norm": 0.34441784024238586, "learning_rate": 1.200529206098424e-05, "loss": 0.5014, "step": 41148 }, { "epoch": 0.8727068354859918, "grad_norm": 0.43420520424842834, "learning_rate": 1.2004965338593816e-05, "loss": 0.4332, "step": 41149 }, { "epoch": 0.8727280439439249, "grad_norm": 0.34879156947135925, "learning_rate": 1.2004638613973488e-05, "loss": 0.5263, "step": 41150 }, { "epoch": 0.8727492524018579, "grad_norm": 0.4413563311100006, "learning_rate": 1.200431188712362e-05, "loss": 0.4918, "step": 41151 }, { "epoch": 0.8727704608597909, "grad_norm": 0.38123619556427, "learning_rate": 1.200398515804457e-05, "loss": 0.4262, "step": 41152 }, { "epoch": 0.8727916693177239, "grad_norm": 0.3980490565299988, "learning_rate": 1.2003658426736707e-05, "loss": 0.5503, "step": 41153 }, { "epoch": 0.872812877775657, "grad_norm": 0.35919448733329773, "learning_rate": 1.2003331693200392e-05, "loss": 0.476, "step": 41154 }, { "epoch": 0.8728340862335899, "grad_norm": 0.3426498770713806, "learning_rate": 1.2003004957435993e-05, "loss": 0.5454, "step": 41155 }, { "epoch": 0.872855294691523, "grad_norm": 0.373492956161499, "learning_rate": 1.2002678219443864e-05, "loss": 0.5892, "step": 41156 }, { "epoch": 0.872876503149456, "grad_norm": 0.3549005091190338, "learning_rate": 1.2002351479224377e-05, "loss": 0.4948, "step": 41157 }, { "epoch": 0.872897711607389, "grad_norm": 0.40421539545059204, "learning_rate": 1.200202473677789e-05, "loss": 0.4648, "step": 41158 }, { "epoch": 0.872918920065322, "grad_norm": 0.45259833335876465, "learning_rate": 1.2001697992104769e-05, "loss": 0.4534, "step": 41159 }, { "epoch": 0.8729401285232551, "grad_norm": 0.36866098642349243, "learning_rate": 1.2001371245205375e-05, "loss": 0.4771, "step": 41160 }, { "epoch": 0.872961336981188, "grad_norm": 0.4003436863422394, "learning_rate": 1.2001044496080078e-05, "loss": 0.4768, "step": 41161 }, { "epoch": 0.8729825454391211, "grad_norm": 0.3520570993423462, "learning_rate": 1.2000717744729233e-05, "loss": 0.4972, "step": 41162 }, { "epoch": 0.8730037538970541, "grad_norm": 0.43019697070121765, "learning_rate": 1.2000390991153208e-05, "loss": 0.4622, "step": 41163 }, { "epoch": 0.8730249623549872, "grad_norm": 0.4198771119117737, "learning_rate": 1.2000064235352364e-05, "loss": 0.5827, "step": 41164 }, { "epoch": 0.8730461708129202, "grad_norm": 0.3744056522846222, "learning_rate": 1.1999737477327066e-05, "loss": 0.4596, "step": 41165 }, { "epoch": 0.8730673792708532, "grad_norm": 1.6091128587722778, "learning_rate": 1.1999410717077683e-05, "loss": 0.4793, "step": 41166 }, { "epoch": 0.8730885877287863, "grad_norm": 0.3224160969257355, "learning_rate": 1.1999083954604566e-05, "loss": 0.4921, "step": 41167 }, { "epoch": 0.8731097961867192, "grad_norm": 0.374650239944458, "learning_rate": 1.1998757189908089e-05, "loss": 0.5019, "step": 41168 }, { "epoch": 0.8731310046446523, "grad_norm": 0.35770383477211, "learning_rate": 1.1998430422988612e-05, "loss": 0.5058, "step": 41169 }, { "epoch": 0.8731522131025853, "grad_norm": 0.36822038888931274, "learning_rate": 1.1998103653846495e-05, "loss": 0.4946, "step": 41170 }, { "epoch": 0.8731734215605184, "grad_norm": 0.323111355304718, "learning_rate": 1.1997776882482104e-05, "loss": 0.4597, "step": 41171 }, { "epoch": 0.8731946300184513, "grad_norm": 0.3567068576812744, "learning_rate": 1.1997450108895807e-05, "loss": 0.4635, "step": 41172 }, { "epoch": 0.8732158384763844, "grad_norm": 0.3699289560317993, "learning_rate": 1.1997123333087962e-05, "loss": 0.413, "step": 41173 }, { "epoch": 0.8732370469343174, "grad_norm": 0.32804206013679504, "learning_rate": 1.1996796555058931e-05, "loss": 0.469, "step": 41174 }, { "epoch": 0.8732582553922504, "grad_norm": 0.3874170184135437, "learning_rate": 1.1996469774809085e-05, "loss": 0.4807, "step": 41175 }, { "epoch": 0.8732794638501834, "grad_norm": 0.363351970911026, "learning_rate": 1.1996142992338778e-05, "loss": 0.4771, "step": 41176 }, { "epoch": 0.8733006723081165, "grad_norm": 0.30593863129615784, "learning_rate": 1.1995816207648379e-05, "loss": 0.4176, "step": 41177 }, { "epoch": 0.8733218807660496, "grad_norm": 0.374748557806015, "learning_rate": 1.1995489420738254e-05, "loss": 0.4194, "step": 41178 }, { "epoch": 0.8733430892239825, "grad_norm": 0.4434768259525299, "learning_rate": 1.199516263160876e-05, "loss": 0.4671, "step": 41179 }, { "epoch": 0.8733642976819156, "grad_norm": 0.42072755098342896, "learning_rate": 1.1994835840260263e-05, "loss": 0.4966, "step": 41180 }, { "epoch": 0.8733855061398486, "grad_norm": 0.4046032130718231, "learning_rate": 1.1994509046693126e-05, "loss": 0.4817, "step": 41181 }, { "epoch": 0.8734067145977816, "grad_norm": 0.3840900659561157, "learning_rate": 1.1994182250907717e-05, "loss": 0.493, "step": 41182 }, { "epoch": 0.8734279230557146, "grad_norm": 0.3577400743961334, "learning_rate": 1.1993855452904393e-05, "loss": 0.4983, "step": 41183 }, { "epoch": 0.8734491315136477, "grad_norm": 0.41337278485298157, "learning_rate": 1.1993528652683524e-05, "loss": 0.4928, "step": 41184 }, { "epoch": 0.8734703399715806, "grad_norm": 0.37267276644706726, "learning_rate": 1.1993201850245464e-05, "loss": 0.5074, "step": 41185 }, { "epoch": 0.8734915484295137, "grad_norm": 0.3669127821922302, "learning_rate": 1.1992875045590586e-05, "loss": 0.4856, "step": 41186 }, { "epoch": 0.8735127568874467, "grad_norm": 0.3672398626804352, "learning_rate": 1.1992548238719249e-05, "loss": 0.4128, "step": 41187 }, { "epoch": 0.8735339653453797, "grad_norm": 0.3567280173301697, "learning_rate": 1.1992221429631819e-05, "loss": 0.4567, "step": 41188 }, { "epoch": 0.8735551738033127, "grad_norm": 0.8133552670478821, "learning_rate": 1.1991894618328657e-05, "loss": 0.4108, "step": 41189 }, { "epoch": 0.8735763822612458, "grad_norm": 0.3325783610343933, "learning_rate": 1.1991567804810122e-05, "loss": 0.4756, "step": 41190 }, { "epoch": 0.8735975907191789, "grad_norm": 0.36405983567237854, "learning_rate": 1.1991240989076587e-05, "loss": 0.466, "step": 41191 }, { "epoch": 0.8736187991771118, "grad_norm": 0.43807968497276306, "learning_rate": 1.1990914171128412e-05, "loss": 0.5472, "step": 41192 }, { "epoch": 0.8736400076350449, "grad_norm": 0.3469340205192566, "learning_rate": 1.1990587350965956e-05, "loss": 0.4741, "step": 41193 }, { "epoch": 0.8736612160929779, "grad_norm": 0.3930619955062866, "learning_rate": 1.199026052858959e-05, "loss": 0.4182, "step": 41194 }, { "epoch": 0.8736824245509109, "grad_norm": 0.40835508704185486, "learning_rate": 1.198993370399967e-05, "loss": 0.4959, "step": 41195 }, { "epoch": 0.8737036330088439, "grad_norm": 0.36349043250083923, "learning_rate": 1.1989606877196564e-05, "loss": 0.4645, "step": 41196 }, { "epoch": 0.873724841466777, "grad_norm": 0.41814789175987244, "learning_rate": 1.1989280048180638e-05, "loss": 0.4606, "step": 41197 }, { "epoch": 0.8737460499247099, "grad_norm": 0.32186323404312134, "learning_rate": 1.1988953216952251e-05, "loss": 0.4761, "step": 41198 }, { "epoch": 0.873767258382643, "grad_norm": 0.3629426658153534, "learning_rate": 1.1988626383511763e-05, "loss": 0.5066, "step": 41199 }, { "epoch": 0.873788466840576, "grad_norm": 0.39904361963272095, "learning_rate": 1.1988299547859546e-05, "loss": 0.4285, "step": 41200 }, { "epoch": 0.8738096752985091, "grad_norm": 0.3771907687187195, "learning_rate": 1.1987972709995957e-05, "loss": 0.4477, "step": 41201 }, { "epoch": 0.873830883756442, "grad_norm": 0.36951926350593567, "learning_rate": 1.1987645869921365e-05, "loss": 0.5141, "step": 41202 }, { "epoch": 0.8738520922143751, "grad_norm": 0.42951130867004395, "learning_rate": 1.1987319027636129e-05, "loss": 0.5338, "step": 41203 }, { "epoch": 0.8738733006723081, "grad_norm": 0.40313223004341125, "learning_rate": 1.198699218314061e-05, "loss": 0.4627, "step": 41204 }, { "epoch": 0.8738945091302411, "grad_norm": 0.3690159320831299, "learning_rate": 1.1986665336435182e-05, "loss": 0.4723, "step": 41205 }, { "epoch": 0.8739157175881742, "grad_norm": 0.3304879367351532, "learning_rate": 1.1986338487520199e-05, "loss": 0.5034, "step": 41206 }, { "epoch": 0.8739369260461072, "grad_norm": 0.38791623711586, "learning_rate": 1.1986011636396028e-05, "loss": 0.526, "step": 41207 }, { "epoch": 0.8739581345040403, "grad_norm": 0.3257947564125061, "learning_rate": 1.1985684783063034e-05, "loss": 0.4229, "step": 41208 }, { "epoch": 0.8739793429619732, "grad_norm": 0.3664022386074066, "learning_rate": 1.1985357927521575e-05, "loss": 0.5349, "step": 41209 }, { "epoch": 0.8740005514199063, "grad_norm": 0.3895690441131592, "learning_rate": 1.198503106977202e-05, "loss": 0.444, "step": 41210 }, { "epoch": 0.8740217598778393, "grad_norm": 0.351046621799469, "learning_rate": 1.198470420981473e-05, "loss": 0.4739, "step": 41211 }, { "epoch": 0.8740429683357723, "grad_norm": 0.39946603775024414, "learning_rate": 1.198437734765007e-05, "loss": 0.5191, "step": 41212 }, { "epoch": 0.8740641767937053, "grad_norm": 0.3488602638244629, "learning_rate": 1.1984050483278405e-05, "loss": 0.411, "step": 41213 }, { "epoch": 0.8740853852516384, "grad_norm": 0.38647326827049255, "learning_rate": 1.1983723616700094e-05, "loss": 0.4529, "step": 41214 }, { "epoch": 0.8741065937095713, "grad_norm": 0.35565510392189026, "learning_rate": 1.1983396747915502e-05, "loss": 0.5391, "step": 41215 }, { "epoch": 0.8741278021675044, "grad_norm": 0.3856433928012848, "learning_rate": 1.1983069876924993e-05, "loss": 0.4644, "step": 41216 }, { "epoch": 0.8741490106254374, "grad_norm": 0.4211776852607727, "learning_rate": 1.1982743003728931e-05, "loss": 0.5103, "step": 41217 }, { "epoch": 0.8741702190833704, "grad_norm": 0.3286326229572296, "learning_rate": 1.1982416128327681e-05, "loss": 0.4355, "step": 41218 }, { "epoch": 0.8741914275413035, "grad_norm": 0.3826428949832916, "learning_rate": 1.1982089250721605e-05, "loss": 0.5245, "step": 41219 }, { "epoch": 0.8742126359992365, "grad_norm": 0.42262500524520874, "learning_rate": 1.1981762370911068e-05, "loss": 0.5278, "step": 41220 }, { "epoch": 0.8742338444571696, "grad_norm": 0.3661200702190399, "learning_rate": 1.1981435488896429e-05, "loss": 0.4998, "step": 41221 }, { "epoch": 0.8742550529151025, "grad_norm": 0.3380783498287201, "learning_rate": 1.1981108604678054e-05, "loss": 0.426, "step": 41222 }, { "epoch": 0.8742762613730356, "grad_norm": 0.3579871952533722, "learning_rate": 1.1980781718256311e-05, "loss": 0.4666, "step": 41223 }, { "epoch": 0.8742974698309686, "grad_norm": 0.3610415458679199, "learning_rate": 1.1980454829631556e-05, "loss": 0.449, "step": 41224 }, { "epoch": 0.8743186782889016, "grad_norm": 0.36895063519477844, "learning_rate": 1.198012793880416e-05, "loss": 0.5429, "step": 41225 }, { "epoch": 0.8743398867468346, "grad_norm": 0.36683008074760437, "learning_rate": 1.1979801045774482e-05, "loss": 0.4176, "step": 41226 }, { "epoch": 0.8743610952047677, "grad_norm": 0.39978358149528503, "learning_rate": 1.1979474150542884e-05, "loss": 0.4692, "step": 41227 }, { "epoch": 0.8743823036627006, "grad_norm": 0.3855137228965759, "learning_rate": 1.1979147253109734e-05, "loss": 0.5044, "step": 41228 }, { "epoch": 0.8744035121206337, "grad_norm": 0.3240222930908203, "learning_rate": 1.1978820353475393e-05, "loss": 0.4004, "step": 41229 }, { "epoch": 0.8744247205785667, "grad_norm": 0.3891126215457916, "learning_rate": 1.1978493451640225e-05, "loss": 0.4465, "step": 41230 }, { "epoch": 0.8744459290364998, "grad_norm": 0.35089215636253357, "learning_rate": 1.1978166547604595e-05, "loss": 0.4431, "step": 41231 }, { "epoch": 0.8744671374944328, "grad_norm": 0.3629322946071625, "learning_rate": 1.1977839641368863e-05, "loss": 0.5635, "step": 41232 }, { "epoch": 0.8744883459523658, "grad_norm": 0.37068605422973633, "learning_rate": 1.1977512732933399e-05, "loss": 0.5174, "step": 41233 }, { "epoch": 0.8745095544102989, "grad_norm": 0.3908570110797882, "learning_rate": 1.197718582229856e-05, "loss": 0.5906, "step": 41234 }, { "epoch": 0.8745307628682318, "grad_norm": 0.4058583974838257, "learning_rate": 1.1976858909464709e-05, "loss": 0.4738, "step": 41235 }, { "epoch": 0.8745519713261649, "grad_norm": 0.3640727996826172, "learning_rate": 1.1976531994432217e-05, "loss": 0.5486, "step": 41236 }, { "epoch": 0.8745731797840979, "grad_norm": 0.3717416524887085, "learning_rate": 1.1976205077201443e-05, "loss": 0.5208, "step": 41237 }, { "epoch": 0.874594388242031, "grad_norm": 0.46387654542922974, "learning_rate": 1.197587815777275e-05, "loss": 0.4913, "step": 41238 }, { "epoch": 0.8746155966999639, "grad_norm": 0.6229773163795471, "learning_rate": 1.1975551236146501e-05, "loss": 0.5075, "step": 41239 }, { "epoch": 0.874636805157897, "grad_norm": 0.4459698796272278, "learning_rate": 1.1975224312323065e-05, "loss": 0.476, "step": 41240 }, { "epoch": 0.87465801361583, "grad_norm": 0.3186415731906891, "learning_rate": 1.1974897386302796e-05, "loss": 0.4869, "step": 41241 }, { "epoch": 0.874679222073763, "grad_norm": 0.5427165031433105, "learning_rate": 1.1974570458086069e-05, "loss": 0.5111, "step": 41242 }, { "epoch": 0.874700430531696, "grad_norm": 0.3339998424053192, "learning_rate": 1.197424352767324e-05, "loss": 0.4887, "step": 41243 }, { "epoch": 0.8747216389896291, "grad_norm": 0.4586074650287628, "learning_rate": 1.1973916595064672e-05, "loss": 0.4933, "step": 41244 }, { "epoch": 0.874742847447562, "grad_norm": 0.35028576850891113, "learning_rate": 1.1973589660260734e-05, "loss": 0.4766, "step": 41245 }, { "epoch": 0.8747640559054951, "grad_norm": 0.3681814968585968, "learning_rate": 1.1973262723261785e-05, "loss": 0.5077, "step": 41246 }, { "epoch": 0.8747852643634282, "grad_norm": 0.36868366599082947, "learning_rate": 1.1972935784068192e-05, "loss": 0.4802, "step": 41247 }, { "epoch": 0.8748064728213611, "grad_norm": 0.3802128732204437, "learning_rate": 1.1972608842680314e-05, "loss": 0.4465, "step": 41248 }, { "epoch": 0.8748276812792942, "grad_norm": 0.3593629002571106, "learning_rate": 1.197228189909852e-05, "loss": 0.5184, "step": 41249 }, { "epoch": 0.8748488897372272, "grad_norm": 0.36771050095558167, "learning_rate": 1.197195495332317e-05, "loss": 0.416, "step": 41250 }, { "epoch": 0.8748700981951603, "grad_norm": 0.3787490725517273, "learning_rate": 1.1971628005354631e-05, "loss": 0.522, "step": 41251 }, { "epoch": 0.8748913066530932, "grad_norm": 0.901221752166748, "learning_rate": 1.1971301055193263e-05, "loss": 0.436, "step": 41252 }, { "epoch": 0.8749125151110263, "grad_norm": 0.4067196846008301, "learning_rate": 1.1970974102839432e-05, "loss": 0.4987, "step": 41253 }, { "epoch": 0.8749337235689593, "grad_norm": 0.4487753212451935, "learning_rate": 1.1970647148293497e-05, "loss": 0.465, "step": 41254 }, { "epoch": 0.8749549320268923, "grad_norm": 0.34997987747192383, "learning_rate": 1.197032019155583e-05, "loss": 0.4396, "step": 41255 }, { "epoch": 0.8749761404848253, "grad_norm": 0.3427424430847168, "learning_rate": 1.1969993232626789e-05, "loss": 0.4425, "step": 41256 }, { "epoch": 0.8749973489427584, "grad_norm": 0.3784465193748474, "learning_rate": 1.1969666271506737e-05, "loss": 0.5056, "step": 41257 }, { "epoch": 0.8750185574006913, "grad_norm": 0.389001727104187, "learning_rate": 1.196933930819604e-05, "loss": 0.4676, "step": 41258 }, { "epoch": 0.8750397658586244, "grad_norm": 0.3771701455116272, "learning_rate": 1.1969012342695061e-05, "loss": 0.5482, "step": 41259 }, { "epoch": 0.8750609743165575, "grad_norm": 0.33054542541503906, "learning_rate": 1.1968685375004163e-05, "loss": 0.4561, "step": 41260 }, { "epoch": 0.8750821827744905, "grad_norm": 0.37279170751571655, "learning_rate": 1.196835840512371e-05, "loss": 0.494, "step": 41261 }, { "epoch": 0.8751033912324235, "grad_norm": 0.36813589930534363, "learning_rate": 1.1968031433054067e-05, "loss": 0.4876, "step": 41262 }, { "epoch": 0.8751245996903565, "grad_norm": 0.3589194416999817, "learning_rate": 1.1967704458795597e-05, "loss": 0.5379, "step": 41263 }, { "epoch": 0.8751458081482896, "grad_norm": 0.350861519575119, "learning_rate": 1.1967377482348664e-05, "loss": 0.5162, "step": 41264 }, { "epoch": 0.8751670166062225, "grad_norm": 0.3652086853981018, "learning_rate": 1.1967050503713629e-05, "loss": 0.4953, "step": 41265 }, { "epoch": 0.8751882250641556, "grad_norm": 0.3442534804344177, "learning_rate": 1.1966723522890858e-05, "loss": 0.4679, "step": 41266 }, { "epoch": 0.8752094335220886, "grad_norm": 0.4075523614883423, "learning_rate": 1.1966396539880712e-05, "loss": 0.4965, "step": 41267 }, { "epoch": 0.8752306419800217, "grad_norm": 0.44753968715667725, "learning_rate": 1.196606955468356e-05, "loss": 0.4735, "step": 41268 }, { "epoch": 0.8752518504379546, "grad_norm": 0.420908123254776, "learning_rate": 1.196574256729976e-05, "loss": 0.4762, "step": 41269 }, { "epoch": 0.8752730588958877, "grad_norm": 0.3955856263637543, "learning_rate": 1.1965415577729682e-05, "loss": 0.4555, "step": 41270 }, { "epoch": 0.8752942673538207, "grad_norm": 0.3692631125450134, "learning_rate": 1.1965088585973683e-05, "loss": 0.5414, "step": 41271 }, { "epoch": 0.8753154758117537, "grad_norm": 0.3363039791584015, "learning_rate": 1.196476159203213e-05, "loss": 0.5454, "step": 41272 }, { "epoch": 0.8753366842696868, "grad_norm": 0.3340178430080414, "learning_rate": 1.1964434595905385e-05, "loss": 0.513, "step": 41273 }, { "epoch": 0.8753578927276198, "grad_norm": 0.3460483253002167, "learning_rate": 1.1964107597593815e-05, "loss": 0.5803, "step": 41274 }, { "epoch": 0.8753791011855528, "grad_norm": 0.4172375798225403, "learning_rate": 1.1963780597097781e-05, "loss": 0.5048, "step": 41275 }, { "epoch": 0.8754003096434858, "grad_norm": 0.39611563086509705, "learning_rate": 1.1963453594417647e-05, "loss": 0.4776, "step": 41276 }, { "epoch": 0.8754215181014189, "grad_norm": 0.39900052547454834, "learning_rate": 1.1963126589553775e-05, "loss": 0.5214, "step": 41277 }, { "epoch": 0.8754427265593518, "grad_norm": 0.36174532771110535, "learning_rate": 1.1962799582506535e-05, "loss": 0.4758, "step": 41278 }, { "epoch": 0.8754639350172849, "grad_norm": 0.3323014974594116, "learning_rate": 1.1962472573276284e-05, "loss": 0.4106, "step": 41279 }, { "epoch": 0.8754851434752179, "grad_norm": 0.35760262608528137, "learning_rate": 1.1962145561863388e-05, "loss": 0.492, "step": 41280 }, { "epoch": 0.875506351933151, "grad_norm": 0.3795359134674072, "learning_rate": 1.196181854826821e-05, "loss": 0.5557, "step": 41281 }, { "epoch": 0.8755275603910839, "grad_norm": 0.37581780552864075, "learning_rate": 1.1961491532491116e-05, "loss": 0.4913, "step": 41282 }, { "epoch": 0.875548768849017, "grad_norm": 0.4932684302330017, "learning_rate": 1.1961164514532466e-05, "loss": 0.5216, "step": 41283 }, { "epoch": 0.87556997730695, "grad_norm": 0.41063910722732544, "learning_rate": 1.196083749439263e-05, "loss": 0.4524, "step": 41284 }, { "epoch": 0.875591185764883, "grad_norm": 0.39939337968826294, "learning_rate": 1.1960510472071964e-05, "loss": 0.5513, "step": 41285 }, { "epoch": 0.875612394222816, "grad_norm": 0.3768419921398163, "learning_rate": 1.1960183447570835e-05, "loss": 0.4994, "step": 41286 }, { "epoch": 0.8756336026807491, "grad_norm": 0.35928893089294434, "learning_rate": 1.1959856420889608e-05, "loss": 0.4746, "step": 41287 }, { "epoch": 0.8756548111386822, "grad_norm": 0.5985678434371948, "learning_rate": 1.1959529392028646e-05, "loss": 0.5224, "step": 41288 }, { "epoch": 0.8756760195966151, "grad_norm": 0.3606046736240387, "learning_rate": 1.1959202360988313e-05, "loss": 0.4481, "step": 41289 }, { "epoch": 0.8756972280545482, "grad_norm": 0.3930162787437439, "learning_rate": 1.1958875327768972e-05, "loss": 0.4801, "step": 41290 }, { "epoch": 0.8757184365124812, "grad_norm": 0.34506383538246155, "learning_rate": 1.1958548292370985e-05, "loss": 0.4655, "step": 41291 }, { "epoch": 0.8757396449704142, "grad_norm": 0.33078882098197937, "learning_rate": 1.1958221254794718e-05, "loss": 0.4768, "step": 41292 }, { "epoch": 0.8757608534283472, "grad_norm": 0.3978019952774048, "learning_rate": 1.1957894215040535e-05, "loss": 0.4866, "step": 41293 }, { "epoch": 0.8757820618862803, "grad_norm": 0.3324095606803894, "learning_rate": 1.1957567173108801e-05, "loss": 0.4722, "step": 41294 }, { "epoch": 0.8758032703442132, "grad_norm": 0.3908887207508087, "learning_rate": 1.1957240128999876e-05, "loss": 0.4461, "step": 41295 }, { "epoch": 0.8758244788021463, "grad_norm": 0.3673931658267975, "learning_rate": 1.1956913082714125e-05, "loss": 0.4194, "step": 41296 }, { "epoch": 0.8758456872600793, "grad_norm": 0.40485304594039917, "learning_rate": 1.1956586034251913e-05, "loss": 0.5595, "step": 41297 }, { "epoch": 0.8758668957180124, "grad_norm": 0.46009209752082825, "learning_rate": 1.1956258983613604e-05, "loss": 0.49, "step": 41298 }, { "epoch": 0.8758881041759453, "grad_norm": 0.3165993094444275, "learning_rate": 1.1955931930799557e-05, "loss": 0.4665, "step": 41299 }, { "epoch": 0.8759093126338784, "grad_norm": 0.41977396607398987, "learning_rate": 1.1955604875810143e-05, "loss": 0.4694, "step": 41300 }, { "epoch": 0.8759305210918115, "grad_norm": 0.4083794355392456, "learning_rate": 1.1955277818645723e-05, "loss": 0.5429, "step": 41301 }, { "epoch": 0.8759517295497444, "grad_norm": 0.34675002098083496, "learning_rate": 1.1954950759306658e-05, "loss": 0.4451, "step": 41302 }, { "epoch": 0.8759729380076775, "grad_norm": 0.38850998878479004, "learning_rate": 1.1954623697793315e-05, "loss": 0.3632, "step": 41303 }, { "epoch": 0.8759941464656105, "grad_norm": 0.3509390652179718, "learning_rate": 1.1954296634106055e-05, "loss": 0.5479, "step": 41304 }, { "epoch": 0.8760153549235435, "grad_norm": 0.36679428815841675, "learning_rate": 1.1953969568245242e-05, "loss": 0.5413, "step": 41305 }, { "epoch": 0.8760365633814765, "grad_norm": 0.3388752043247223, "learning_rate": 1.1953642500211245e-05, "loss": 0.4683, "step": 41306 }, { "epoch": 0.8760577718394096, "grad_norm": 0.31570515036582947, "learning_rate": 1.1953315430004421e-05, "loss": 0.4265, "step": 41307 }, { "epoch": 0.8760789802973425, "grad_norm": 0.35938647389411926, "learning_rate": 1.195298835762514e-05, "loss": 0.4997, "step": 41308 }, { "epoch": 0.8761001887552756, "grad_norm": 0.34755876660346985, "learning_rate": 1.1952661283073757e-05, "loss": 0.5659, "step": 41309 }, { "epoch": 0.8761213972132086, "grad_norm": 0.35470321774482727, "learning_rate": 1.1952334206350647e-05, "loss": 0.3937, "step": 41310 }, { "epoch": 0.8761426056711417, "grad_norm": 0.35905393958091736, "learning_rate": 1.1952007127456162e-05, "loss": 0.5568, "step": 41311 }, { "epoch": 0.8761638141290746, "grad_norm": 0.40210068225860596, "learning_rate": 1.1951680046390673e-05, "loss": 0.4174, "step": 41312 }, { "epoch": 0.8761850225870077, "grad_norm": 0.38805916905403137, "learning_rate": 1.1951352963154545e-05, "loss": 0.5667, "step": 41313 }, { "epoch": 0.8762062310449408, "grad_norm": 0.30787408351898193, "learning_rate": 1.1951025877748137e-05, "loss": 0.4965, "step": 41314 }, { "epoch": 0.8762274395028737, "grad_norm": 0.4681723713874817, "learning_rate": 1.1950698790171815e-05, "loss": 0.4623, "step": 41315 }, { "epoch": 0.8762486479608068, "grad_norm": 0.3693885803222656, "learning_rate": 1.1950371700425946e-05, "loss": 0.4607, "step": 41316 }, { "epoch": 0.8762698564187398, "grad_norm": 0.3110436201095581, "learning_rate": 1.1950044608510886e-05, "loss": 0.4149, "step": 41317 }, { "epoch": 0.8762910648766729, "grad_norm": 0.4544612467288971, "learning_rate": 1.1949717514427004e-05, "loss": 0.444, "step": 41318 }, { "epoch": 0.8763122733346058, "grad_norm": 0.36270982027053833, "learning_rate": 1.1949390418174666e-05, "loss": 0.5074, "step": 41319 }, { "epoch": 0.8763334817925389, "grad_norm": 0.4859152138233185, "learning_rate": 1.194906331975423e-05, "loss": 0.4817, "step": 41320 }, { "epoch": 0.8763546902504719, "grad_norm": 0.32489919662475586, "learning_rate": 1.1948736219166067e-05, "loss": 0.381, "step": 41321 }, { "epoch": 0.8763758987084049, "grad_norm": 0.3943835496902466, "learning_rate": 1.1948409116410533e-05, "loss": 0.5247, "step": 41322 }, { "epoch": 0.8763971071663379, "grad_norm": 0.3623318672180176, "learning_rate": 1.1948082011487995e-05, "loss": 0.4528, "step": 41323 }, { "epoch": 0.876418315624271, "grad_norm": 0.33826401829719543, "learning_rate": 1.1947754904398818e-05, "loss": 0.4522, "step": 41324 }, { "epoch": 0.8764395240822039, "grad_norm": 0.34544649720191956, "learning_rate": 1.1947427795143364e-05, "loss": 0.4442, "step": 41325 }, { "epoch": 0.876460732540137, "grad_norm": 0.3493145704269409, "learning_rate": 1.1947100683722e-05, "loss": 0.4551, "step": 41326 }, { "epoch": 0.87648194099807, "grad_norm": 0.36111506819725037, "learning_rate": 1.1946773570135087e-05, "loss": 0.4316, "step": 41327 }, { "epoch": 0.876503149456003, "grad_norm": 0.3554324805736542, "learning_rate": 1.1946446454382987e-05, "loss": 0.4999, "step": 41328 }, { "epoch": 0.8765243579139361, "grad_norm": 0.4328514039516449, "learning_rate": 1.1946119336466069e-05, "loss": 0.5539, "step": 41329 }, { "epoch": 0.8765455663718691, "grad_norm": 0.363577663898468, "learning_rate": 1.1945792216384693e-05, "loss": 0.565, "step": 41330 }, { "epoch": 0.8765667748298022, "grad_norm": 0.3783904016017914, "learning_rate": 1.1945465094139222e-05, "loss": 0.5345, "step": 41331 }, { "epoch": 0.8765879832877351, "grad_norm": 0.38906750082969666, "learning_rate": 1.1945137969730023e-05, "loss": 0.4568, "step": 41332 }, { "epoch": 0.8766091917456682, "grad_norm": 0.384738028049469, "learning_rate": 1.194481084315746e-05, "loss": 0.4866, "step": 41333 }, { "epoch": 0.8766304002036012, "grad_norm": 0.41598761081695557, "learning_rate": 1.1944483714421893e-05, "loss": 0.4116, "step": 41334 }, { "epoch": 0.8766516086615342, "grad_norm": 0.3601718842983246, "learning_rate": 1.1944156583523689e-05, "loss": 0.483, "step": 41335 }, { "epoch": 0.8766728171194672, "grad_norm": 0.3522106111049652, "learning_rate": 1.1943829450463214e-05, "loss": 0.4897, "step": 41336 }, { "epoch": 0.8766940255774003, "grad_norm": 0.34969496726989746, "learning_rate": 1.194350231524082e-05, "loss": 0.4503, "step": 41337 }, { "epoch": 0.8767152340353332, "grad_norm": 0.3322356641292572, "learning_rate": 1.1943175177856888e-05, "loss": 0.4403, "step": 41338 }, { "epoch": 0.8767364424932663, "grad_norm": 0.42830124497413635, "learning_rate": 1.1942848038311771e-05, "loss": 0.4354, "step": 41339 }, { "epoch": 0.8767576509511993, "grad_norm": 0.38417986035346985, "learning_rate": 1.1942520896605834e-05, "loss": 0.4882, "step": 41340 }, { "epoch": 0.8767788594091324, "grad_norm": 0.366086483001709, "learning_rate": 1.1942193752739444e-05, "loss": 0.5055, "step": 41341 }, { "epoch": 0.8768000678670654, "grad_norm": 0.44946733117103577, "learning_rate": 1.1941866606712961e-05, "loss": 0.422, "step": 41342 }, { "epoch": 0.8768212763249984, "grad_norm": 0.3547564744949341, "learning_rate": 1.1941539458526753e-05, "loss": 0.541, "step": 41343 }, { "epoch": 0.8768424847829315, "grad_norm": 0.35901013016700745, "learning_rate": 1.194121230818118e-05, "loss": 0.4432, "step": 41344 }, { "epoch": 0.8768636932408644, "grad_norm": 0.3502587378025055, "learning_rate": 1.1940885155676607e-05, "loss": 0.485, "step": 41345 }, { "epoch": 0.8768849016987975, "grad_norm": 0.3228069245815277, "learning_rate": 1.1940558001013401e-05, "loss": 0.4699, "step": 41346 }, { "epoch": 0.8769061101567305, "grad_norm": 0.42720258235931396, "learning_rate": 1.1940230844191923e-05, "loss": 0.4794, "step": 41347 }, { "epoch": 0.8769273186146636, "grad_norm": 0.3360571563243866, "learning_rate": 1.1939903685212534e-05, "loss": 0.4912, "step": 41348 }, { "epoch": 0.8769485270725965, "grad_norm": 0.3318840265274048, "learning_rate": 1.1939576524075604e-05, "loss": 0.4704, "step": 41349 }, { "epoch": 0.8769697355305296, "grad_norm": 0.36352619528770447, "learning_rate": 1.1939249360781492e-05, "loss": 0.3962, "step": 41350 }, { "epoch": 0.8769909439884626, "grad_norm": 0.4366452693939209, "learning_rate": 1.1938922195330562e-05, "loss": 0.5923, "step": 41351 }, { "epoch": 0.8770121524463956, "grad_norm": 0.3877928853034973, "learning_rate": 1.1938595027723183e-05, "loss": 0.5832, "step": 41352 }, { "epoch": 0.8770333609043286, "grad_norm": 0.3452342450618744, "learning_rate": 1.1938267857959715e-05, "loss": 0.459, "step": 41353 }, { "epoch": 0.8770545693622617, "grad_norm": 0.37503841519355774, "learning_rate": 1.1937940686040523e-05, "loss": 0.4518, "step": 41354 }, { "epoch": 0.8770757778201947, "grad_norm": 0.37935811281204224, "learning_rate": 1.193761351196597e-05, "loss": 0.4961, "step": 41355 }, { "epoch": 0.8770969862781277, "grad_norm": 0.35799041390419006, "learning_rate": 1.1937286335736418e-05, "loss": 0.444, "step": 41356 }, { "epoch": 0.8771181947360608, "grad_norm": 0.4052810072898865, "learning_rate": 1.1936959157352232e-05, "loss": 0.4406, "step": 41357 }, { "epoch": 0.8771394031939937, "grad_norm": 0.3909779489040375, "learning_rate": 1.193663197681378e-05, "loss": 0.4718, "step": 41358 }, { "epoch": 0.8771606116519268, "grad_norm": 0.4223863482475281, "learning_rate": 1.193630479412142e-05, "loss": 0.5029, "step": 41359 }, { "epoch": 0.8771818201098598, "grad_norm": 0.35885748267173767, "learning_rate": 1.1935977609275521e-05, "loss": 0.5069, "step": 41360 }, { "epoch": 0.8772030285677929, "grad_norm": 0.40519437193870544, "learning_rate": 1.1935650422276446e-05, "loss": 0.5275, "step": 41361 }, { "epoch": 0.8772242370257258, "grad_norm": 0.3588441014289856, "learning_rate": 1.1935323233124553e-05, "loss": 0.5354, "step": 41362 }, { "epoch": 0.8772454454836589, "grad_norm": 0.37302789092063904, "learning_rate": 1.1934996041820213e-05, "loss": 0.4977, "step": 41363 }, { "epoch": 0.8772666539415919, "grad_norm": 0.32741719484329224, "learning_rate": 1.1934668848363789e-05, "loss": 0.4574, "step": 41364 }, { "epoch": 0.8772878623995249, "grad_norm": 0.3586338460445404, "learning_rate": 1.1934341652755638e-05, "loss": 0.4877, "step": 41365 }, { "epoch": 0.8773090708574579, "grad_norm": 0.3336074650287628, "learning_rate": 1.1934014454996132e-05, "loss": 0.4911, "step": 41366 }, { "epoch": 0.877330279315391, "grad_norm": 0.40867647528648376, "learning_rate": 1.1933687255085632e-05, "loss": 0.5064, "step": 41367 }, { "epoch": 0.8773514877733241, "grad_norm": 0.3890199661254883, "learning_rate": 1.1933360053024504e-05, "loss": 0.4162, "step": 41368 }, { "epoch": 0.877372696231257, "grad_norm": 0.37715646624565125, "learning_rate": 1.1933032848813104e-05, "loss": 0.4745, "step": 41369 }, { "epoch": 0.8773939046891901, "grad_norm": 0.33464160561561584, "learning_rate": 1.1932705642451807e-05, "loss": 0.5057, "step": 41370 }, { "epoch": 0.8774151131471231, "grad_norm": 0.37904128432273865, "learning_rate": 1.1932378433940968e-05, "loss": 0.4927, "step": 41371 }, { "epoch": 0.8774363216050561, "grad_norm": 0.3959856331348419, "learning_rate": 1.1932051223280958e-05, "loss": 0.5054, "step": 41372 }, { "epoch": 0.8774575300629891, "grad_norm": 0.357694536447525, "learning_rate": 1.1931724010472135e-05, "loss": 0.5135, "step": 41373 }, { "epoch": 0.8774787385209222, "grad_norm": 0.32872945070266724, "learning_rate": 1.1931396795514867e-05, "loss": 0.527, "step": 41374 }, { "epoch": 0.8774999469788551, "grad_norm": 0.332974910736084, "learning_rate": 1.1931069578409515e-05, "loss": 0.4851, "step": 41375 }, { "epoch": 0.8775211554367882, "grad_norm": 0.39847883582115173, "learning_rate": 1.1930742359156443e-05, "loss": 0.5511, "step": 41376 }, { "epoch": 0.8775423638947212, "grad_norm": 0.3540970981121063, "learning_rate": 1.1930415137756017e-05, "loss": 0.4778, "step": 41377 }, { "epoch": 0.8775635723526543, "grad_norm": 0.3979673981666565, "learning_rate": 1.1930087914208602e-05, "loss": 0.5006, "step": 41378 }, { "epoch": 0.8775847808105872, "grad_norm": 1.2747572660446167, "learning_rate": 1.1929760688514556e-05, "loss": 0.4364, "step": 41379 }, { "epoch": 0.8776059892685203, "grad_norm": 0.3588721752166748, "learning_rate": 1.192943346067425e-05, "loss": 0.5371, "step": 41380 }, { "epoch": 0.8776271977264533, "grad_norm": 0.3591034710407257, "learning_rate": 1.1929106230688046e-05, "loss": 0.5647, "step": 41381 }, { "epoch": 0.8776484061843863, "grad_norm": 0.4144068956375122, "learning_rate": 1.1928778998556301e-05, "loss": 0.5316, "step": 41382 }, { "epoch": 0.8776696146423194, "grad_norm": 0.5139257311820984, "learning_rate": 1.1928451764279392e-05, "loss": 0.5629, "step": 41383 }, { "epoch": 0.8776908231002524, "grad_norm": 0.3777647912502289, "learning_rate": 1.1928124527857672e-05, "loss": 0.5341, "step": 41384 }, { "epoch": 0.8777120315581854, "grad_norm": 0.36589163541793823, "learning_rate": 1.1927797289291507e-05, "loss": 0.4526, "step": 41385 }, { "epoch": 0.8777332400161184, "grad_norm": 0.5884193778038025, "learning_rate": 1.1927470048581267e-05, "loss": 0.5557, "step": 41386 }, { "epoch": 0.8777544484740515, "grad_norm": 0.376304566860199, "learning_rate": 1.1927142805727307e-05, "loss": 0.5, "step": 41387 }, { "epoch": 0.8777756569319844, "grad_norm": 0.4448499083518982, "learning_rate": 1.192681556073e-05, "loss": 0.474, "step": 41388 }, { "epoch": 0.8777968653899175, "grad_norm": 0.41329270601272583, "learning_rate": 1.1926488313589702e-05, "loss": 0.4983, "step": 41389 }, { "epoch": 0.8778180738478505, "grad_norm": 0.36257290840148926, "learning_rate": 1.192616106430678e-05, "loss": 0.5062, "step": 41390 }, { "epoch": 0.8778392823057836, "grad_norm": 0.4103509485721588, "learning_rate": 1.1925833812881603e-05, "loss": 0.5057, "step": 41391 }, { "epoch": 0.8778604907637165, "grad_norm": 0.3323458135128021, "learning_rate": 1.1925506559314528e-05, "loss": 0.4821, "step": 41392 }, { "epoch": 0.8778816992216496, "grad_norm": 0.3668372631072998, "learning_rate": 1.1925179303605919e-05, "loss": 0.449, "step": 41393 }, { "epoch": 0.8779029076795826, "grad_norm": 0.3973515033721924, "learning_rate": 1.1924852045756146e-05, "loss": 0.4766, "step": 41394 }, { "epoch": 0.8779241161375156, "grad_norm": 0.49548444151878357, "learning_rate": 1.1924524785765565e-05, "loss": 0.4115, "step": 41395 }, { "epoch": 0.8779453245954487, "grad_norm": 0.38125208020210266, "learning_rate": 1.1924197523634547e-05, "loss": 0.4889, "step": 41396 }, { "epoch": 0.8779665330533817, "grad_norm": 0.3653564751148224, "learning_rate": 1.1923870259363453e-05, "loss": 0.5543, "step": 41397 }, { "epoch": 0.8779877415113148, "grad_norm": 0.43614572286605835, "learning_rate": 1.1923542992952648e-05, "loss": 0.4826, "step": 41398 }, { "epoch": 0.8780089499692477, "grad_norm": 0.36971428990364075, "learning_rate": 1.1923215724402493e-05, "loss": 0.4726, "step": 41399 }, { "epoch": 0.8780301584271808, "grad_norm": 0.3514811098575592, "learning_rate": 1.1922888453713357e-05, "loss": 0.5081, "step": 41400 }, { "epoch": 0.8780513668851138, "grad_norm": 0.3766539394855499, "learning_rate": 1.1922561180885599e-05, "loss": 0.5362, "step": 41401 }, { "epoch": 0.8780725753430468, "grad_norm": 0.3303276002407074, "learning_rate": 1.1922233905919585e-05, "loss": 0.4115, "step": 41402 }, { "epoch": 0.8780937838009798, "grad_norm": 0.47626808285713196, "learning_rate": 1.1921906628815681e-05, "loss": 0.5818, "step": 41403 }, { "epoch": 0.8781149922589129, "grad_norm": 0.3477868139743805, "learning_rate": 1.1921579349574247e-05, "loss": 0.5081, "step": 41404 }, { "epoch": 0.8781362007168458, "grad_norm": 0.33624371886253357, "learning_rate": 1.192125206819565e-05, "loss": 0.46, "step": 41405 }, { "epoch": 0.8781574091747789, "grad_norm": 0.38080260157585144, "learning_rate": 1.1920924784680255e-05, "loss": 0.5239, "step": 41406 }, { "epoch": 0.8781786176327119, "grad_norm": 0.432353138923645, "learning_rate": 1.1920597499028422e-05, "loss": 0.5277, "step": 41407 }, { "epoch": 0.878199826090645, "grad_norm": 0.40536224842071533, "learning_rate": 1.1920270211240516e-05, "loss": 0.3842, "step": 41408 }, { "epoch": 0.878221034548578, "grad_norm": 0.3521001636981964, "learning_rate": 1.1919942921316906e-05, "loss": 0.4134, "step": 41409 }, { "epoch": 0.878242243006511, "grad_norm": 0.3482392430305481, "learning_rate": 1.1919615629257949e-05, "loss": 0.5168, "step": 41410 }, { "epoch": 0.8782634514644441, "grad_norm": 0.3294428288936615, "learning_rate": 1.1919288335064011e-05, "loss": 0.4604, "step": 41411 }, { "epoch": 0.878284659922377, "grad_norm": 0.39978259801864624, "learning_rate": 1.1918961038735461e-05, "loss": 0.3983, "step": 41412 }, { "epoch": 0.8783058683803101, "grad_norm": 0.3900638520717621, "learning_rate": 1.1918633740272659e-05, "loss": 0.4072, "step": 41413 }, { "epoch": 0.8783270768382431, "grad_norm": 0.3438667953014374, "learning_rate": 1.1918306439675967e-05, "loss": 0.4863, "step": 41414 }, { "epoch": 0.8783482852961761, "grad_norm": 0.36800268292427063, "learning_rate": 1.1917979136945752e-05, "loss": 0.5165, "step": 41415 }, { "epoch": 0.8783694937541091, "grad_norm": 0.38442981243133545, "learning_rate": 1.1917651832082376e-05, "loss": 0.5195, "step": 41416 }, { "epoch": 0.8783907022120422, "grad_norm": 0.3511720299720764, "learning_rate": 1.1917324525086206e-05, "loss": 0.4686, "step": 41417 }, { "epoch": 0.8784119106699751, "grad_norm": 0.4132575988769531, "learning_rate": 1.1916997215957604e-05, "loss": 0.5615, "step": 41418 }, { "epoch": 0.8784331191279082, "grad_norm": 0.3659118115901947, "learning_rate": 1.1916669904696933e-05, "loss": 0.4735, "step": 41419 }, { "epoch": 0.8784543275858412, "grad_norm": 0.3774784505367279, "learning_rate": 1.1916342591304562e-05, "loss": 0.5216, "step": 41420 }, { "epoch": 0.8784755360437743, "grad_norm": 0.3970164954662323, "learning_rate": 1.1916015275780848e-05, "loss": 0.4599, "step": 41421 }, { "epoch": 0.8784967445017072, "grad_norm": 0.3658795654773712, "learning_rate": 1.191568795812616e-05, "loss": 0.5138, "step": 41422 }, { "epoch": 0.8785179529596403, "grad_norm": 0.3777320086956024, "learning_rate": 1.1915360638340858e-05, "loss": 0.4847, "step": 41423 }, { "epoch": 0.8785391614175734, "grad_norm": 0.3545786142349243, "learning_rate": 1.1915033316425311e-05, "loss": 0.4674, "step": 41424 }, { "epoch": 0.8785603698755063, "grad_norm": 0.35830995440483093, "learning_rate": 1.1914705992379881e-05, "loss": 0.5217, "step": 41425 }, { "epoch": 0.8785815783334394, "grad_norm": 0.34415319561958313, "learning_rate": 1.191437866620493e-05, "loss": 0.455, "step": 41426 }, { "epoch": 0.8786027867913724, "grad_norm": 0.39550870656967163, "learning_rate": 1.1914051337900822e-05, "loss": 0.4343, "step": 41427 }, { "epoch": 0.8786239952493055, "grad_norm": 0.3569833040237427, "learning_rate": 1.1913724007467927e-05, "loss": 0.462, "step": 41428 }, { "epoch": 0.8786452037072384, "grad_norm": 0.50491863489151, "learning_rate": 1.1913396674906604e-05, "loss": 0.4908, "step": 41429 }, { "epoch": 0.8786664121651715, "grad_norm": 0.34974971413612366, "learning_rate": 1.1913069340217215e-05, "loss": 0.4721, "step": 41430 }, { "epoch": 0.8786876206231045, "grad_norm": 0.3640383780002594, "learning_rate": 1.1912742003400131e-05, "loss": 0.4327, "step": 41431 }, { "epoch": 0.8787088290810375, "grad_norm": 0.35528671741485596, "learning_rate": 1.1912414664455707e-05, "loss": 0.3499, "step": 41432 }, { "epoch": 0.8787300375389705, "grad_norm": 0.35889026522636414, "learning_rate": 1.1912087323384316e-05, "loss": 0.4623, "step": 41433 }, { "epoch": 0.8787512459969036, "grad_norm": 0.34225091338157654, "learning_rate": 1.1911759980186318e-05, "loss": 0.4842, "step": 41434 }, { "epoch": 0.8787724544548365, "grad_norm": 0.3965758979320526, "learning_rate": 1.1911432634862075e-05, "loss": 0.5661, "step": 41435 }, { "epoch": 0.8787936629127696, "grad_norm": 0.38291996717453003, "learning_rate": 1.1911105287411952e-05, "loss": 0.4612, "step": 41436 }, { "epoch": 0.8788148713707027, "grad_norm": 0.35284727811813354, "learning_rate": 1.1910777937836317e-05, "loss": 0.4821, "step": 41437 }, { "epoch": 0.8788360798286357, "grad_norm": 0.37743237614631653, "learning_rate": 1.1910450586135531e-05, "loss": 0.5454, "step": 41438 }, { "epoch": 0.8788572882865687, "grad_norm": 0.3338763117790222, "learning_rate": 1.191012323230996e-05, "loss": 0.4406, "step": 41439 }, { "epoch": 0.8788784967445017, "grad_norm": 0.3542935252189636, "learning_rate": 1.1909795876359964e-05, "loss": 0.4526, "step": 41440 }, { "epoch": 0.8788997052024348, "grad_norm": 0.3745557963848114, "learning_rate": 1.1909468518285909e-05, "loss": 0.5845, "step": 41441 }, { "epoch": 0.8789209136603677, "grad_norm": 0.3699631094932556, "learning_rate": 1.1909141158088162e-05, "loss": 0.5097, "step": 41442 }, { "epoch": 0.8789421221183008, "grad_norm": 0.37064385414123535, "learning_rate": 1.1908813795767085e-05, "loss": 0.4938, "step": 41443 }, { "epoch": 0.8789633305762338, "grad_norm": 0.33438917994499207, "learning_rate": 1.1908486431323039e-05, "loss": 0.457, "step": 41444 }, { "epoch": 0.8789845390341668, "grad_norm": 0.38601166009902954, "learning_rate": 1.1908159064756392e-05, "loss": 0.4866, "step": 41445 }, { "epoch": 0.8790057474920998, "grad_norm": 0.3419347107410431, "learning_rate": 1.190783169606751e-05, "loss": 0.4754, "step": 41446 }, { "epoch": 0.8790269559500329, "grad_norm": 0.38840311765670776, "learning_rate": 1.1907504325256749e-05, "loss": 0.4851, "step": 41447 }, { "epoch": 0.8790481644079658, "grad_norm": 0.3760415315628052, "learning_rate": 1.1907176952324482e-05, "loss": 0.5478, "step": 41448 }, { "epoch": 0.8790693728658989, "grad_norm": 0.3830201029777527, "learning_rate": 1.1906849577271072e-05, "loss": 0.5313, "step": 41449 }, { "epoch": 0.879090581323832, "grad_norm": 0.35252535343170166, "learning_rate": 1.1906522200096875e-05, "loss": 0.4743, "step": 41450 }, { "epoch": 0.879111789781765, "grad_norm": 0.33973753452301025, "learning_rate": 1.1906194820802263e-05, "loss": 0.4269, "step": 41451 }, { "epoch": 0.879132998239698, "grad_norm": 0.3957076966762543, "learning_rate": 1.1905867439387598e-05, "loss": 0.5709, "step": 41452 }, { "epoch": 0.879154206697631, "grad_norm": 0.3613019585609436, "learning_rate": 1.190554005585324e-05, "loss": 0.3799, "step": 41453 }, { "epoch": 0.8791754151555641, "grad_norm": 0.3863653838634491, "learning_rate": 1.1905212670199561e-05, "loss": 0.4832, "step": 41454 }, { "epoch": 0.879196623613497, "grad_norm": 0.34390297532081604, "learning_rate": 1.190488528242692e-05, "loss": 0.4692, "step": 41455 }, { "epoch": 0.8792178320714301, "grad_norm": 0.40287911891937256, "learning_rate": 1.1904557892535683e-05, "loss": 0.4989, "step": 41456 }, { "epoch": 0.8792390405293631, "grad_norm": 0.3618307411670685, "learning_rate": 1.1904230500526213e-05, "loss": 0.4993, "step": 41457 }, { "epoch": 0.8792602489872962, "grad_norm": 0.37568873167037964, "learning_rate": 1.1903903106398873e-05, "loss": 0.5491, "step": 41458 }, { "epoch": 0.8792814574452291, "grad_norm": 0.49979597330093384, "learning_rate": 1.1903575710154028e-05, "loss": 0.4608, "step": 41459 }, { "epoch": 0.8793026659031622, "grad_norm": 0.36178991198539734, "learning_rate": 1.1903248311792045e-05, "loss": 0.5116, "step": 41460 }, { "epoch": 0.8793238743610952, "grad_norm": 0.4536609649658203, "learning_rate": 1.1902920911313282e-05, "loss": 0.4302, "step": 41461 }, { "epoch": 0.8793450828190282, "grad_norm": 0.36048388481140137, "learning_rate": 1.1902593508718113e-05, "loss": 0.4212, "step": 41462 }, { "epoch": 0.8793662912769612, "grad_norm": 0.3316458761692047, "learning_rate": 1.1902266104006893e-05, "loss": 0.4246, "step": 41463 }, { "epoch": 0.8793874997348943, "grad_norm": 0.3948843777179718, "learning_rate": 1.1901938697179987e-05, "loss": 0.445, "step": 41464 }, { "epoch": 0.8794087081928273, "grad_norm": 0.34011736512184143, "learning_rate": 1.1901611288237765e-05, "loss": 0.4606, "step": 41465 }, { "epoch": 0.8794299166507603, "grad_norm": 0.3898541033267975, "learning_rate": 1.1901283877180582e-05, "loss": 0.5943, "step": 41466 }, { "epoch": 0.8794511251086934, "grad_norm": 0.5000911951065063, "learning_rate": 1.1900956464008812e-05, "loss": 0.5499, "step": 41467 }, { "epoch": 0.8794723335666264, "grad_norm": 0.37760987877845764, "learning_rate": 1.1900629048722814e-05, "loss": 0.5095, "step": 41468 }, { "epoch": 0.8794935420245594, "grad_norm": 0.37973660230636597, "learning_rate": 1.1900301631322952e-05, "loss": 0.5488, "step": 41469 }, { "epoch": 0.8795147504824924, "grad_norm": 0.36719781160354614, "learning_rate": 1.1899974211809593e-05, "loss": 0.4584, "step": 41470 }, { "epoch": 0.8795359589404255, "grad_norm": 0.36473578214645386, "learning_rate": 1.1899646790183099e-05, "loss": 0.4515, "step": 41471 }, { "epoch": 0.8795571673983584, "grad_norm": 0.40568047761917114, "learning_rate": 1.1899319366443829e-05, "loss": 0.5616, "step": 41472 }, { "epoch": 0.8795783758562915, "grad_norm": 0.396017462015152, "learning_rate": 1.1898991940592159e-05, "loss": 0.4955, "step": 41473 }, { "epoch": 0.8795995843142245, "grad_norm": 0.4206954538822174, "learning_rate": 1.1898664512628444e-05, "loss": 0.5722, "step": 41474 }, { "epoch": 0.8796207927721575, "grad_norm": 0.3991922438144684, "learning_rate": 1.1898337082553051e-05, "loss": 0.4943, "step": 41475 }, { "epoch": 0.8796420012300905, "grad_norm": 0.4027532935142517, "learning_rate": 1.1898009650366347e-05, "loss": 0.4479, "step": 41476 }, { "epoch": 0.8796632096880236, "grad_norm": 0.3783729672431946, "learning_rate": 1.189768221606869e-05, "loss": 0.4818, "step": 41477 }, { "epoch": 0.8796844181459567, "grad_norm": 0.36253491044044495, "learning_rate": 1.1897354779660444e-05, "loss": 0.4821, "step": 41478 }, { "epoch": 0.8797056266038896, "grad_norm": 0.3724660575389862, "learning_rate": 1.1897027341141983e-05, "loss": 0.4554, "step": 41479 }, { "epoch": 0.8797268350618227, "grad_norm": 0.6277908682823181, "learning_rate": 1.1896699900513662e-05, "loss": 0.4891, "step": 41480 }, { "epoch": 0.8797480435197557, "grad_norm": 0.3798106610774994, "learning_rate": 1.1896372457775846e-05, "loss": 0.5335, "step": 41481 }, { "epoch": 0.8797692519776887, "grad_norm": 0.43559691309928894, "learning_rate": 1.1896045012928905e-05, "loss": 0.494, "step": 41482 }, { "epoch": 0.8797904604356217, "grad_norm": 0.36175671219825745, "learning_rate": 1.1895717565973196e-05, "loss": 0.4944, "step": 41483 }, { "epoch": 0.8798116688935548, "grad_norm": 0.39268365502357483, "learning_rate": 1.1895390116909087e-05, "loss": 0.4914, "step": 41484 }, { "epoch": 0.8798328773514877, "grad_norm": 0.3468940854072571, "learning_rate": 1.1895062665736941e-05, "loss": 0.5442, "step": 41485 }, { "epoch": 0.8798540858094208, "grad_norm": 0.348939448595047, "learning_rate": 1.1894735212457123e-05, "loss": 0.4308, "step": 41486 }, { "epoch": 0.8798752942673538, "grad_norm": 0.3601721227169037, "learning_rate": 1.189440775707e-05, "loss": 0.519, "step": 41487 }, { "epoch": 0.8798965027252869, "grad_norm": 0.3083484172821045, "learning_rate": 1.189408029957593e-05, "loss": 0.3814, "step": 41488 }, { "epoch": 0.8799177111832198, "grad_norm": 0.3388613760471344, "learning_rate": 1.189375283997528e-05, "loss": 0.4133, "step": 41489 }, { "epoch": 0.8799389196411529, "grad_norm": 0.4219159185886383, "learning_rate": 1.1893425378268418e-05, "loss": 0.4863, "step": 41490 }, { "epoch": 0.879960128099086, "grad_norm": 0.3680651783943176, "learning_rate": 1.18930979144557e-05, "loss": 0.5068, "step": 41491 }, { "epoch": 0.8799813365570189, "grad_norm": 0.408435195684433, "learning_rate": 1.1892770448537497e-05, "loss": 0.4808, "step": 41492 }, { "epoch": 0.880002545014952, "grad_norm": 0.3417434096336365, "learning_rate": 1.1892442980514172e-05, "loss": 0.4691, "step": 41493 }, { "epoch": 0.880023753472885, "grad_norm": 0.4509502947330475, "learning_rate": 1.189211551038609e-05, "loss": 0.5105, "step": 41494 }, { "epoch": 0.880044961930818, "grad_norm": 0.37007614970207214, "learning_rate": 1.189178803815361e-05, "loss": 0.5072, "step": 41495 }, { "epoch": 0.880066170388751, "grad_norm": 0.7232416272163391, "learning_rate": 1.18914605638171e-05, "loss": 0.5588, "step": 41496 }, { "epoch": 0.8800873788466841, "grad_norm": 0.3421179950237274, "learning_rate": 1.1891133087376928e-05, "loss": 0.4773, "step": 41497 }, { "epoch": 0.880108587304617, "grad_norm": 0.38275057077407837, "learning_rate": 1.1890805608833449e-05, "loss": 0.4768, "step": 41498 }, { "epoch": 0.8801297957625501, "grad_norm": 0.4351176619529724, "learning_rate": 1.1890478128187034e-05, "loss": 0.4378, "step": 41499 }, { "epoch": 0.8801510042204831, "grad_norm": 0.37835243344306946, "learning_rate": 1.1890150645438047e-05, "loss": 0.482, "step": 41500 }, { "epoch": 0.8801722126784162, "grad_norm": 0.33030062913894653, "learning_rate": 1.1889823160586851e-05, "loss": 0.4069, "step": 41501 }, { "epoch": 0.8801934211363491, "grad_norm": 0.34077200293540955, "learning_rate": 1.188949567363381e-05, "loss": 0.4725, "step": 41502 }, { "epoch": 0.8802146295942822, "grad_norm": 0.42984747886657715, "learning_rate": 1.1889168184579288e-05, "loss": 0.5107, "step": 41503 }, { "epoch": 0.8802358380522152, "grad_norm": 0.33128622174263, "learning_rate": 1.1888840693423647e-05, "loss": 0.4487, "step": 41504 }, { "epoch": 0.8802570465101482, "grad_norm": 1.3918745517730713, "learning_rate": 1.1888513200167257e-05, "loss": 0.4841, "step": 41505 }, { "epoch": 0.8802782549680813, "grad_norm": 0.3617537319660187, "learning_rate": 1.1888185704810477e-05, "loss": 0.4661, "step": 41506 }, { "epoch": 0.8802994634260143, "grad_norm": 0.33218109607696533, "learning_rate": 1.1887858207353678e-05, "loss": 0.4701, "step": 41507 }, { "epoch": 0.8803206718839474, "grad_norm": 0.39198920130729675, "learning_rate": 1.1887530707797215e-05, "loss": 0.5117, "step": 41508 }, { "epoch": 0.8803418803418803, "grad_norm": 0.3899354636669159, "learning_rate": 1.1887203206141458e-05, "loss": 0.4906, "step": 41509 }, { "epoch": 0.8803630887998134, "grad_norm": 0.3633105754852295, "learning_rate": 1.1886875702386769e-05, "loss": 0.4675, "step": 41510 }, { "epoch": 0.8803842972577464, "grad_norm": 0.3638313114643097, "learning_rate": 1.1886548196533514e-05, "loss": 0.4965, "step": 41511 }, { "epoch": 0.8804055057156794, "grad_norm": 0.3912445306777954, "learning_rate": 1.1886220688582056e-05, "loss": 0.5003, "step": 41512 }, { "epoch": 0.8804267141736124, "grad_norm": 0.3603641092777252, "learning_rate": 1.1885893178532761e-05, "loss": 0.4438, "step": 41513 }, { "epoch": 0.8804479226315455, "grad_norm": 0.3500765860080719, "learning_rate": 1.188556566638599e-05, "loss": 0.4343, "step": 41514 }, { "epoch": 0.8804691310894784, "grad_norm": 0.38290727138519287, "learning_rate": 1.1885238152142111e-05, "loss": 0.4874, "step": 41515 }, { "epoch": 0.8804903395474115, "grad_norm": 0.37396496534347534, "learning_rate": 1.1884910635801488e-05, "loss": 0.4014, "step": 41516 }, { "epoch": 0.8805115480053445, "grad_norm": 0.3549829125404358, "learning_rate": 1.1884583117364478e-05, "loss": 0.4529, "step": 41517 }, { "epoch": 0.8805327564632776, "grad_norm": 0.3748539686203003, "learning_rate": 1.1884255596831455e-05, "loss": 0.4367, "step": 41518 }, { "epoch": 0.8805539649212106, "grad_norm": 0.42191174626350403, "learning_rate": 1.188392807420278e-05, "loss": 0.5137, "step": 41519 }, { "epoch": 0.8805751733791436, "grad_norm": 0.6123518943786621, "learning_rate": 1.1883600549478814e-05, "loss": 0.4664, "step": 41520 }, { "epoch": 0.8805963818370767, "grad_norm": 0.36944296956062317, "learning_rate": 1.1883273022659927e-05, "loss": 0.4532, "step": 41521 }, { "epoch": 0.8806175902950096, "grad_norm": 0.32554322481155396, "learning_rate": 1.1882945493746478e-05, "loss": 0.4243, "step": 41522 }, { "epoch": 0.8806387987529427, "grad_norm": 0.38218313455581665, "learning_rate": 1.1882617962738832e-05, "loss": 0.495, "step": 41523 }, { "epoch": 0.8806600072108757, "grad_norm": 0.39024075865745544, "learning_rate": 1.1882290429637358e-05, "loss": 0.5184, "step": 41524 }, { "epoch": 0.8806812156688087, "grad_norm": 0.32780492305755615, "learning_rate": 1.1881962894442417e-05, "loss": 0.4314, "step": 41525 }, { "epoch": 0.8807024241267417, "grad_norm": 0.34456202387809753, "learning_rate": 1.1881635357154368e-05, "loss": 0.4366, "step": 41526 }, { "epoch": 0.8807236325846748, "grad_norm": 0.3684734106063843, "learning_rate": 1.1881307817773587e-05, "loss": 0.5377, "step": 41527 }, { "epoch": 0.8807448410426078, "grad_norm": 0.35145896673202515, "learning_rate": 1.1880980276300427e-05, "loss": 0.486, "step": 41528 }, { "epoch": 0.8807660495005408, "grad_norm": 0.3346518278121948, "learning_rate": 1.188065273273526e-05, "loss": 0.504, "step": 41529 }, { "epoch": 0.8807872579584738, "grad_norm": 0.34779226779937744, "learning_rate": 1.1880325187078445e-05, "loss": 0.4613, "step": 41530 }, { "epoch": 0.8808084664164069, "grad_norm": 0.40221846103668213, "learning_rate": 1.1879997639330352e-05, "loss": 0.5289, "step": 41531 }, { "epoch": 0.8808296748743399, "grad_norm": 0.4051002562046051, "learning_rate": 1.187967008949134e-05, "loss": 0.4824, "step": 41532 }, { "epoch": 0.8808508833322729, "grad_norm": 0.43748435378074646, "learning_rate": 1.1879342537561775e-05, "loss": 0.4598, "step": 41533 }, { "epoch": 0.880872091790206, "grad_norm": 0.37338337302207947, "learning_rate": 1.1879014983542021e-05, "loss": 0.5167, "step": 41534 }, { "epoch": 0.8808933002481389, "grad_norm": 0.3900882601737976, "learning_rate": 1.1878687427432446e-05, "loss": 0.4464, "step": 41535 }, { "epoch": 0.880914508706072, "grad_norm": 0.37075772881507874, "learning_rate": 1.1878359869233407e-05, "loss": 0.4699, "step": 41536 }, { "epoch": 0.880935717164005, "grad_norm": 0.43756553530693054, "learning_rate": 1.1878032308945274e-05, "loss": 0.4949, "step": 41537 }, { "epoch": 0.8809569256219381, "grad_norm": 0.36584004759788513, "learning_rate": 1.187770474656841e-05, "loss": 0.4689, "step": 41538 }, { "epoch": 0.880978134079871, "grad_norm": 0.3327612280845642, "learning_rate": 1.1877377182103181e-05, "loss": 0.4131, "step": 41539 }, { "epoch": 0.8809993425378041, "grad_norm": 0.3180602490901947, "learning_rate": 1.1877049615549945e-05, "loss": 0.4479, "step": 41540 }, { "epoch": 0.8810205509957371, "grad_norm": 0.33908554911613464, "learning_rate": 1.1876722046909075e-05, "loss": 0.3794, "step": 41541 }, { "epoch": 0.8810417594536701, "grad_norm": 0.38230475783348083, "learning_rate": 1.1876394476180927e-05, "loss": 0.4409, "step": 41542 }, { "epoch": 0.8810629679116031, "grad_norm": 0.33962181210517883, "learning_rate": 1.1876066903365873e-05, "loss": 0.4216, "step": 41543 }, { "epoch": 0.8810841763695362, "grad_norm": 0.3886740505695343, "learning_rate": 1.1875739328464273e-05, "loss": 0.5657, "step": 41544 }, { "epoch": 0.8811053848274691, "grad_norm": 0.36181071400642395, "learning_rate": 1.187541175147649e-05, "loss": 0.5384, "step": 41545 }, { "epoch": 0.8811265932854022, "grad_norm": 0.3715003430843353, "learning_rate": 1.1875084172402893e-05, "loss": 0.4448, "step": 41546 }, { "epoch": 0.8811478017433353, "grad_norm": 0.43236619234085083, "learning_rate": 1.1874756591243844e-05, "loss": 0.4383, "step": 41547 }, { "epoch": 0.8811690102012683, "grad_norm": 0.3317932188510895, "learning_rate": 1.1874429007999703e-05, "loss": 0.4468, "step": 41548 }, { "epoch": 0.8811902186592013, "grad_norm": 0.45316359400749207, "learning_rate": 1.1874101422670838e-05, "loss": 0.5456, "step": 41549 }, { "epoch": 0.8812114271171343, "grad_norm": 0.3493730127811432, "learning_rate": 1.1873773835257618e-05, "loss": 0.5049, "step": 41550 }, { "epoch": 0.8812326355750674, "grad_norm": 0.43806493282318115, "learning_rate": 1.18734462457604e-05, "loss": 0.4924, "step": 41551 }, { "epoch": 0.8812538440330003, "grad_norm": 0.38433852791786194, "learning_rate": 1.1873118654179557e-05, "loss": 0.4952, "step": 41552 }, { "epoch": 0.8812750524909334, "grad_norm": 0.4062047600746155, "learning_rate": 1.1872791060515444e-05, "loss": 0.5794, "step": 41553 }, { "epoch": 0.8812962609488664, "grad_norm": 0.34140974283218384, "learning_rate": 1.1872463464768427e-05, "loss": 0.4587, "step": 41554 }, { "epoch": 0.8813174694067994, "grad_norm": 0.39470574259757996, "learning_rate": 1.1872135866938872e-05, "loss": 0.5337, "step": 41555 }, { "epoch": 0.8813386778647324, "grad_norm": 0.3777237832546234, "learning_rate": 1.1871808267027147e-05, "loss": 0.4709, "step": 41556 }, { "epoch": 0.8813598863226655, "grad_norm": 0.32760754227638245, "learning_rate": 1.187148066503361e-05, "loss": 0.3925, "step": 41557 }, { "epoch": 0.8813810947805985, "grad_norm": 0.35629627108573914, "learning_rate": 1.1871153060958634e-05, "loss": 0.4122, "step": 41558 }, { "epoch": 0.8814023032385315, "grad_norm": 0.38490030169487, "learning_rate": 1.1870825454802572e-05, "loss": 0.483, "step": 41559 }, { "epoch": 0.8814235116964646, "grad_norm": 0.36618390679359436, "learning_rate": 1.1870497846565796e-05, "loss": 0.4415, "step": 41560 }, { "epoch": 0.8814447201543976, "grad_norm": 0.3617578446865082, "learning_rate": 1.187017023624867e-05, "loss": 0.4587, "step": 41561 }, { "epoch": 0.8814659286123306, "grad_norm": 0.3538961410522461, "learning_rate": 1.1869842623851556e-05, "loss": 0.5085, "step": 41562 }, { "epoch": 0.8814871370702636, "grad_norm": 0.3931136429309845, "learning_rate": 1.1869515009374816e-05, "loss": 0.5467, "step": 41563 }, { "epoch": 0.8815083455281967, "grad_norm": 0.3669433891773224, "learning_rate": 1.1869187392818823e-05, "loss": 0.4119, "step": 41564 }, { "epoch": 0.8815295539861296, "grad_norm": 0.45016637444496155, "learning_rate": 1.1868859774183931e-05, "loss": 0.38, "step": 41565 }, { "epoch": 0.8815507624440627, "grad_norm": 0.4398209750652313, "learning_rate": 1.1868532153470516e-05, "loss": 0.4536, "step": 41566 }, { "epoch": 0.8815719709019957, "grad_norm": 0.34705477952957153, "learning_rate": 1.186820453067893e-05, "loss": 0.538, "step": 41567 }, { "epoch": 0.8815931793599288, "grad_norm": 0.39884480834007263, "learning_rate": 1.1867876905809544e-05, "loss": 0.478, "step": 41568 }, { "epoch": 0.8816143878178617, "grad_norm": 0.35369524359703064, "learning_rate": 1.1867549278862721e-05, "loss": 0.5147, "step": 41569 }, { "epoch": 0.8816355962757948, "grad_norm": 0.3654768764972687, "learning_rate": 1.1867221649838832e-05, "loss": 0.464, "step": 41570 }, { "epoch": 0.8816568047337278, "grad_norm": 0.4858768880367279, "learning_rate": 1.1866894018738229e-05, "loss": 0.5298, "step": 41571 }, { "epoch": 0.8816780131916608, "grad_norm": 0.3860054612159729, "learning_rate": 1.1866566385561284e-05, "loss": 0.5178, "step": 41572 }, { "epoch": 0.8816992216495939, "grad_norm": 0.5736833214759827, "learning_rate": 1.1866238750308363e-05, "loss": 0.4813, "step": 41573 }, { "epoch": 0.8817204301075269, "grad_norm": 0.6211674213409424, "learning_rate": 1.1865911112979824e-05, "loss": 0.5219, "step": 41574 }, { "epoch": 0.88174163856546, "grad_norm": 0.43918997049331665, "learning_rate": 1.1865583473576035e-05, "loss": 0.5869, "step": 41575 }, { "epoch": 0.8817628470233929, "grad_norm": 0.38409101963043213, "learning_rate": 1.1865255832097363e-05, "loss": 0.5679, "step": 41576 }, { "epoch": 0.881784055481326, "grad_norm": 0.3511565923690796, "learning_rate": 1.1864928188544164e-05, "loss": 0.4444, "step": 41577 }, { "epoch": 0.881805263939259, "grad_norm": 0.37264248728752136, "learning_rate": 1.1864600542916813e-05, "loss": 0.4909, "step": 41578 }, { "epoch": 0.881826472397192, "grad_norm": 0.37864744663238525, "learning_rate": 1.1864272895215667e-05, "loss": 0.4886, "step": 41579 }, { "epoch": 0.881847680855125, "grad_norm": 0.35825350880622864, "learning_rate": 1.1863945245441095e-05, "loss": 0.4296, "step": 41580 }, { "epoch": 0.8818688893130581, "grad_norm": 0.3461088538169861, "learning_rate": 1.186361759359346e-05, "loss": 0.5578, "step": 41581 }, { "epoch": 0.881890097770991, "grad_norm": 0.3668915927410126, "learning_rate": 1.1863289939673122e-05, "loss": 0.5055, "step": 41582 }, { "epoch": 0.8819113062289241, "grad_norm": 0.37912440299987793, "learning_rate": 1.1862962283680452e-05, "loss": 0.4836, "step": 41583 }, { "epoch": 0.8819325146868571, "grad_norm": 0.340669184923172, "learning_rate": 1.1862634625615813e-05, "loss": 0.5044, "step": 41584 }, { "epoch": 0.8819537231447901, "grad_norm": 0.37773486971855164, "learning_rate": 1.1862306965479564e-05, "loss": 0.5401, "step": 41585 }, { "epoch": 0.8819749316027231, "grad_norm": 0.3960534334182739, "learning_rate": 1.1861979303272075e-05, "loss": 0.4952, "step": 41586 }, { "epoch": 0.8819961400606562, "grad_norm": 0.43561169505119324, "learning_rate": 1.186165163899371e-05, "loss": 0.495, "step": 41587 }, { "epoch": 0.8820173485185893, "grad_norm": 0.30187898874282837, "learning_rate": 1.1861323972644829e-05, "loss": 0.4706, "step": 41588 }, { "epoch": 0.8820385569765222, "grad_norm": 0.3576701283454895, "learning_rate": 1.1860996304225803e-05, "loss": 0.4128, "step": 41589 }, { "epoch": 0.8820597654344553, "grad_norm": 0.33654752373695374, "learning_rate": 1.1860668633736994e-05, "loss": 0.4651, "step": 41590 }, { "epoch": 0.8820809738923883, "grad_norm": 0.3334167003631592, "learning_rate": 1.1860340961178762e-05, "loss": 0.4271, "step": 41591 }, { "epoch": 0.8821021823503213, "grad_norm": 0.39561060070991516, "learning_rate": 1.1860013286551477e-05, "loss": 0.541, "step": 41592 }, { "epoch": 0.8821233908082543, "grad_norm": 0.37620553374290466, "learning_rate": 1.18596856098555e-05, "loss": 0.5445, "step": 41593 }, { "epoch": 0.8821445992661874, "grad_norm": 0.3788430392742157, "learning_rate": 1.1859357931091195e-05, "loss": 0.4407, "step": 41594 }, { "epoch": 0.8821658077241203, "grad_norm": 0.34642165899276733, "learning_rate": 1.1859030250258933e-05, "loss": 0.4959, "step": 41595 }, { "epoch": 0.8821870161820534, "grad_norm": 0.3791978061199188, "learning_rate": 1.1858702567359071e-05, "loss": 0.4475, "step": 41596 }, { "epoch": 0.8822082246399864, "grad_norm": 0.4251701831817627, "learning_rate": 1.1858374882391979e-05, "loss": 0.4925, "step": 41597 }, { "epoch": 0.8822294330979195, "grad_norm": 0.4015926420688629, "learning_rate": 1.1858047195358016e-05, "loss": 0.5359, "step": 41598 }, { "epoch": 0.8822506415558524, "grad_norm": 0.40641558170318604, "learning_rate": 1.1857719506257547e-05, "loss": 0.4632, "step": 41599 }, { "epoch": 0.8822718500137855, "grad_norm": 0.38121339678764343, "learning_rate": 1.1857391815090942e-05, "loss": 0.521, "step": 41600 }, { "epoch": 0.8822930584717186, "grad_norm": 0.3633449971675873, "learning_rate": 1.1857064121858561e-05, "loss": 0.4954, "step": 41601 }, { "epoch": 0.8823142669296515, "grad_norm": 0.377260684967041, "learning_rate": 1.1856736426560769e-05, "loss": 0.5106, "step": 41602 }, { "epoch": 0.8823354753875846, "grad_norm": 0.40452083945274353, "learning_rate": 1.185640872919793e-05, "loss": 0.5016, "step": 41603 }, { "epoch": 0.8823566838455176, "grad_norm": 0.32216978073120117, "learning_rate": 1.1856081029770412e-05, "loss": 0.4544, "step": 41604 }, { "epoch": 0.8823778923034507, "grad_norm": 0.37497785687446594, "learning_rate": 1.1855753328278575e-05, "loss": 0.5049, "step": 41605 }, { "epoch": 0.8823991007613836, "grad_norm": 0.46549707651138306, "learning_rate": 1.1855425624722787e-05, "loss": 0.5349, "step": 41606 }, { "epoch": 0.8824203092193167, "grad_norm": 0.364712119102478, "learning_rate": 1.1855097919103406e-05, "loss": 0.4608, "step": 41607 }, { "epoch": 0.8824415176772497, "grad_norm": 0.40470120310783386, "learning_rate": 1.1854770211420806e-05, "loss": 0.5074, "step": 41608 }, { "epoch": 0.8824627261351827, "grad_norm": 0.36892664432525635, "learning_rate": 1.1854442501675346e-05, "loss": 0.5839, "step": 41609 }, { "epoch": 0.8824839345931157, "grad_norm": 0.38225266337394714, "learning_rate": 1.1854114789867389e-05, "loss": 0.5438, "step": 41610 }, { "epoch": 0.8825051430510488, "grad_norm": 0.340090811252594, "learning_rate": 1.1853787075997303e-05, "loss": 0.5213, "step": 41611 }, { "epoch": 0.8825263515089817, "grad_norm": 0.33571895956993103, "learning_rate": 1.1853459360065451e-05, "loss": 0.4534, "step": 41612 }, { "epoch": 0.8825475599669148, "grad_norm": 0.36251601576805115, "learning_rate": 1.1853131642072197e-05, "loss": 0.4098, "step": 41613 }, { "epoch": 0.8825687684248479, "grad_norm": 0.37165990471839905, "learning_rate": 1.1852803922017905e-05, "loss": 0.5097, "step": 41614 }, { "epoch": 0.8825899768827808, "grad_norm": 0.36268025636672974, "learning_rate": 1.1852476199902945e-05, "loss": 0.3821, "step": 41615 }, { "epoch": 0.8826111853407139, "grad_norm": 0.3820255994796753, "learning_rate": 1.1852148475727673e-05, "loss": 0.3662, "step": 41616 }, { "epoch": 0.8826323937986469, "grad_norm": 0.3791426420211792, "learning_rate": 1.185182074949246e-05, "loss": 0.5495, "step": 41617 }, { "epoch": 0.88265360225658, "grad_norm": 0.35562771558761597, "learning_rate": 1.185149302119767e-05, "loss": 0.513, "step": 41618 }, { "epoch": 0.8826748107145129, "grad_norm": 0.3897707462310791, "learning_rate": 1.185116529084366e-05, "loss": 0.4221, "step": 41619 }, { "epoch": 0.882696019172446, "grad_norm": 0.3621198832988739, "learning_rate": 1.18508375584308e-05, "loss": 0.5038, "step": 41620 }, { "epoch": 0.882717227630379, "grad_norm": 0.3456280827522278, "learning_rate": 1.1850509823959459e-05, "loss": 0.4316, "step": 41621 }, { "epoch": 0.882738436088312, "grad_norm": 0.5626790523529053, "learning_rate": 1.1850182087429995e-05, "loss": 0.4781, "step": 41622 }, { "epoch": 0.882759644546245, "grad_norm": 0.43460267782211304, "learning_rate": 1.1849854348842775e-05, "loss": 0.4386, "step": 41623 }, { "epoch": 0.8827808530041781, "grad_norm": 0.46342986822128296, "learning_rate": 1.1849526608198162e-05, "loss": 0.5191, "step": 41624 }, { "epoch": 0.882802061462111, "grad_norm": 0.3387848436832428, "learning_rate": 1.1849198865496524e-05, "loss": 0.4985, "step": 41625 }, { "epoch": 0.8828232699200441, "grad_norm": 0.42617347836494446, "learning_rate": 1.1848871120738221e-05, "loss": 0.5193, "step": 41626 }, { "epoch": 0.8828444783779771, "grad_norm": 0.3734971284866333, "learning_rate": 1.184854337392362e-05, "loss": 0.5544, "step": 41627 }, { "epoch": 0.8828656868359102, "grad_norm": 0.33961549401283264, "learning_rate": 1.1848215625053083e-05, "loss": 0.4886, "step": 41628 }, { "epoch": 0.8828868952938432, "grad_norm": 0.40548139810562134, "learning_rate": 1.184788787412698e-05, "loss": 0.5083, "step": 41629 }, { "epoch": 0.8829081037517762, "grad_norm": 0.37852832674980164, "learning_rate": 1.1847560121145671e-05, "loss": 0.4971, "step": 41630 }, { "epoch": 0.8829293122097093, "grad_norm": 0.34075090289115906, "learning_rate": 1.1847232366109522e-05, "loss": 0.4749, "step": 41631 }, { "epoch": 0.8829505206676422, "grad_norm": 0.38804224133491516, "learning_rate": 1.1846904609018897e-05, "loss": 0.4818, "step": 41632 }, { "epoch": 0.8829717291255753, "grad_norm": 0.36741766333580017, "learning_rate": 1.1846576849874159e-05, "loss": 0.5151, "step": 41633 }, { "epoch": 0.8829929375835083, "grad_norm": 0.4374280571937561, "learning_rate": 1.1846249088675677e-05, "loss": 0.4233, "step": 41634 }, { "epoch": 0.8830141460414414, "grad_norm": 0.3598770797252655, "learning_rate": 1.1845921325423813e-05, "loss": 0.5503, "step": 41635 }, { "epoch": 0.8830353544993743, "grad_norm": 0.35861098766326904, "learning_rate": 1.1845593560118929e-05, "loss": 0.4642, "step": 41636 }, { "epoch": 0.8830565629573074, "grad_norm": 0.5190975069999695, "learning_rate": 1.1845265792761394e-05, "loss": 0.5301, "step": 41637 }, { "epoch": 0.8830777714152404, "grad_norm": 0.3488016128540039, "learning_rate": 1.1844938023351568e-05, "loss": 0.5515, "step": 41638 }, { "epoch": 0.8830989798731734, "grad_norm": 0.34437295794487, "learning_rate": 1.1844610251889818e-05, "loss": 0.4618, "step": 41639 }, { "epoch": 0.8831201883311064, "grad_norm": 0.3716551959514618, "learning_rate": 1.1844282478376509e-05, "loss": 0.5222, "step": 41640 }, { "epoch": 0.8831413967890395, "grad_norm": 0.3617057800292969, "learning_rate": 1.1843954702812008e-05, "loss": 0.5016, "step": 41641 }, { "epoch": 0.8831626052469725, "grad_norm": 0.37871500849723816, "learning_rate": 1.1843626925196674e-05, "loss": 0.4975, "step": 41642 }, { "epoch": 0.8831838137049055, "grad_norm": 0.3625352382659912, "learning_rate": 1.1843299145530875e-05, "loss": 0.5084, "step": 41643 }, { "epoch": 0.8832050221628386, "grad_norm": 0.3353154957294464, "learning_rate": 1.1842971363814975e-05, "loss": 0.4777, "step": 41644 }, { "epoch": 0.8832262306207715, "grad_norm": 0.415689617395401, "learning_rate": 1.1842643580049336e-05, "loss": 0.5491, "step": 41645 }, { "epoch": 0.8832474390787046, "grad_norm": 0.3616337776184082, "learning_rate": 1.1842315794234327e-05, "loss": 0.5121, "step": 41646 }, { "epoch": 0.8832686475366376, "grad_norm": 0.40598607063293457, "learning_rate": 1.1841988006370308e-05, "loss": 0.5392, "step": 41647 }, { "epoch": 0.8832898559945707, "grad_norm": 0.37517157196998596, "learning_rate": 1.184166021645765e-05, "loss": 0.5605, "step": 41648 }, { "epoch": 0.8833110644525036, "grad_norm": 0.43029457330703735, "learning_rate": 1.1841332424496711e-05, "loss": 0.4826, "step": 41649 }, { "epoch": 0.8833322729104367, "grad_norm": 0.37677815556526184, "learning_rate": 1.1841004630487858e-05, "loss": 0.4935, "step": 41650 }, { "epoch": 0.8833534813683697, "grad_norm": 0.3638571798801422, "learning_rate": 1.1840676834431458e-05, "loss": 0.5276, "step": 41651 }, { "epoch": 0.8833746898263027, "grad_norm": 0.3802846670150757, "learning_rate": 1.1840349036327867e-05, "loss": 0.5228, "step": 41652 }, { "epoch": 0.8833958982842357, "grad_norm": 0.3262847363948822, "learning_rate": 1.1840021236177459e-05, "loss": 0.3721, "step": 41653 }, { "epoch": 0.8834171067421688, "grad_norm": 0.3904331624507904, "learning_rate": 1.1839693433980599e-05, "loss": 0.4567, "step": 41654 }, { "epoch": 0.8834383152001019, "grad_norm": 0.4222233295440674, "learning_rate": 1.1839365629737644e-05, "loss": 0.532, "step": 41655 }, { "epoch": 0.8834595236580348, "grad_norm": 0.35913172364234924, "learning_rate": 1.1839037823448964e-05, "loss": 0.477, "step": 41656 }, { "epoch": 0.8834807321159679, "grad_norm": 0.3542204797267914, "learning_rate": 1.1838710015114923e-05, "loss": 0.5006, "step": 41657 }, { "epoch": 0.8835019405739009, "grad_norm": 0.388069212436676, "learning_rate": 1.1838382204735884e-05, "loss": 0.5043, "step": 41658 }, { "epoch": 0.8835231490318339, "grad_norm": 0.41867685317993164, "learning_rate": 1.1838054392312209e-05, "loss": 0.4734, "step": 41659 }, { "epoch": 0.8835443574897669, "grad_norm": 0.36730000376701355, "learning_rate": 1.183772657784427e-05, "loss": 0.4827, "step": 41660 }, { "epoch": 0.8835655659477, "grad_norm": 0.4005768597126007, "learning_rate": 1.1837398761332426e-05, "loss": 0.4732, "step": 41661 }, { "epoch": 0.8835867744056329, "grad_norm": 0.48716649413108826, "learning_rate": 1.1837070942777042e-05, "loss": 0.5225, "step": 41662 }, { "epoch": 0.883607982863566, "grad_norm": 0.3563559055328369, "learning_rate": 1.1836743122178486e-05, "loss": 0.4891, "step": 41663 }, { "epoch": 0.883629191321499, "grad_norm": 0.4546794593334198, "learning_rate": 1.1836415299537117e-05, "loss": 0.5623, "step": 41664 }, { "epoch": 0.883650399779432, "grad_norm": 0.3491171896457672, "learning_rate": 1.1836087474853305e-05, "loss": 0.4694, "step": 41665 }, { "epoch": 0.883671608237365, "grad_norm": 0.3429713547229767, "learning_rate": 1.1835759648127414e-05, "loss": 0.4557, "step": 41666 }, { "epoch": 0.8836928166952981, "grad_norm": 0.3673076927661896, "learning_rate": 1.1835431819359803e-05, "loss": 0.3861, "step": 41667 }, { "epoch": 0.883714025153231, "grad_norm": 0.37065181136131287, "learning_rate": 1.1835103988550846e-05, "loss": 0.441, "step": 41668 }, { "epoch": 0.8837352336111641, "grad_norm": 0.3547096252441406, "learning_rate": 1.18347761557009e-05, "loss": 0.4983, "step": 41669 }, { "epoch": 0.8837564420690972, "grad_norm": 0.37480539083480835, "learning_rate": 1.1834448320810332e-05, "loss": 0.4522, "step": 41670 }, { "epoch": 0.8837776505270302, "grad_norm": 0.416328489780426, "learning_rate": 1.1834120483879503e-05, "loss": 0.5283, "step": 41671 }, { "epoch": 0.8837988589849632, "grad_norm": 0.40355822443962097, "learning_rate": 1.1833792644908785e-05, "loss": 0.4673, "step": 41672 }, { "epoch": 0.8838200674428962, "grad_norm": 0.36559733748435974, "learning_rate": 1.1833464803898537e-05, "loss": 0.5632, "step": 41673 }, { "epoch": 0.8838412759008293, "grad_norm": 0.3581101894378662, "learning_rate": 1.1833136960849127e-05, "loss": 0.4369, "step": 41674 }, { "epoch": 0.8838624843587622, "grad_norm": 0.3971997797489166, "learning_rate": 1.1832809115760917e-05, "loss": 0.4136, "step": 41675 }, { "epoch": 0.8838836928166953, "grad_norm": 0.3655858039855957, "learning_rate": 1.1832481268634274e-05, "loss": 0.5025, "step": 41676 }, { "epoch": 0.8839049012746283, "grad_norm": 0.3812462091445923, "learning_rate": 1.1832153419469558e-05, "loss": 0.5625, "step": 41677 }, { "epoch": 0.8839261097325614, "grad_norm": 0.33996179699897766, "learning_rate": 1.183182556826714e-05, "loss": 0.4971, "step": 41678 }, { "epoch": 0.8839473181904943, "grad_norm": 0.40531235933303833, "learning_rate": 1.183149771502738e-05, "loss": 0.513, "step": 41679 }, { "epoch": 0.8839685266484274, "grad_norm": 0.3571567237377167, "learning_rate": 1.1831169859750644e-05, "loss": 0.483, "step": 41680 }, { "epoch": 0.8839897351063604, "grad_norm": 0.4028974175453186, "learning_rate": 1.1830842002437297e-05, "loss": 0.517, "step": 41681 }, { "epoch": 0.8840109435642934, "grad_norm": 0.3506406843662262, "learning_rate": 1.1830514143087705e-05, "loss": 0.4569, "step": 41682 }, { "epoch": 0.8840321520222265, "grad_norm": 0.3348059058189392, "learning_rate": 1.1830186281702232e-05, "loss": 0.5107, "step": 41683 }, { "epoch": 0.8840533604801595, "grad_norm": 0.3263363838195801, "learning_rate": 1.1829858418281234e-05, "loss": 0.5069, "step": 41684 }, { "epoch": 0.8840745689380926, "grad_norm": 0.34576931595802307, "learning_rate": 1.1829530552825092e-05, "loss": 0.4151, "step": 41685 }, { "epoch": 0.8840957773960255, "grad_norm": 0.3562468886375427, "learning_rate": 1.182920268533416e-05, "loss": 0.5114, "step": 41686 }, { "epoch": 0.8841169858539586, "grad_norm": 0.3870742917060852, "learning_rate": 1.1828874815808802e-05, "loss": 0.4579, "step": 41687 }, { "epoch": 0.8841381943118916, "grad_norm": 0.315979927778244, "learning_rate": 1.1828546944249388e-05, "loss": 0.417, "step": 41688 }, { "epoch": 0.8841594027698246, "grad_norm": 0.3531493842601776, "learning_rate": 1.1828219070656277e-05, "loss": 0.4481, "step": 41689 }, { "epoch": 0.8841806112277576, "grad_norm": 0.35553714632987976, "learning_rate": 1.1827891195029839e-05, "loss": 0.4844, "step": 41690 }, { "epoch": 0.8842018196856907, "grad_norm": 0.37129274010658264, "learning_rate": 1.1827563317370437e-05, "loss": 0.4494, "step": 41691 }, { "epoch": 0.8842230281436236, "grad_norm": 0.35388073325157166, "learning_rate": 1.1827235437678432e-05, "loss": 0.5111, "step": 41692 }, { "epoch": 0.8842442366015567, "grad_norm": 0.48427146673202515, "learning_rate": 1.1826907555954194e-05, "loss": 0.5086, "step": 41693 }, { "epoch": 0.8842654450594897, "grad_norm": 0.37010595202445984, "learning_rate": 1.1826579672198087e-05, "loss": 0.5212, "step": 41694 }, { "epoch": 0.8842866535174227, "grad_norm": 0.36581093072891235, "learning_rate": 1.182625178641047e-05, "loss": 0.4355, "step": 41695 }, { "epoch": 0.8843078619753558, "grad_norm": 0.338387668132782, "learning_rate": 1.1825923898591712e-05, "loss": 0.3925, "step": 41696 }, { "epoch": 0.8843290704332888, "grad_norm": 0.4024052917957306, "learning_rate": 1.182559600874218e-05, "loss": 0.4769, "step": 41697 }, { "epoch": 0.8843502788912219, "grad_norm": 0.518258810043335, "learning_rate": 1.1825268116862231e-05, "loss": 0.5038, "step": 41698 }, { "epoch": 0.8843714873491548, "grad_norm": 0.40196382999420166, "learning_rate": 1.1824940222952242e-05, "loss": 0.5892, "step": 41699 }, { "epoch": 0.8843926958070879, "grad_norm": 0.3894917964935303, "learning_rate": 1.1824612327012566e-05, "loss": 0.4705, "step": 41700 }, { "epoch": 0.8844139042650209, "grad_norm": 0.41263818740844727, "learning_rate": 1.1824284429043571e-05, "loss": 0.4208, "step": 41701 }, { "epoch": 0.8844351127229539, "grad_norm": 0.3378877341747284, "learning_rate": 1.1823956529045625e-05, "loss": 0.4303, "step": 41702 }, { "epoch": 0.8844563211808869, "grad_norm": 0.35993102192878723, "learning_rate": 1.182362862701909e-05, "loss": 0.392, "step": 41703 }, { "epoch": 0.88447752963882, "grad_norm": 0.4441828429698944, "learning_rate": 1.182330072296433e-05, "loss": 0.4604, "step": 41704 }, { "epoch": 0.884498738096753, "grad_norm": 0.3565046787261963, "learning_rate": 1.1822972816881712e-05, "loss": 0.476, "step": 41705 }, { "epoch": 0.884519946554686, "grad_norm": 0.4036751091480255, "learning_rate": 1.18226449087716e-05, "loss": 0.5203, "step": 41706 }, { "epoch": 0.884541155012619, "grad_norm": 0.3300827741622925, "learning_rate": 1.1822316998634356e-05, "loss": 0.43, "step": 41707 }, { "epoch": 0.8845623634705521, "grad_norm": 0.3501075506210327, "learning_rate": 1.1821989086470349e-05, "loss": 0.4779, "step": 41708 }, { "epoch": 0.8845835719284851, "grad_norm": 0.3391968607902527, "learning_rate": 1.182166117227994e-05, "loss": 0.4759, "step": 41709 }, { "epoch": 0.8846047803864181, "grad_norm": 0.3844509422779083, "learning_rate": 1.1821333256063495e-05, "loss": 0.5196, "step": 41710 }, { "epoch": 0.8846259888443512, "grad_norm": 0.3954112231731415, "learning_rate": 1.182100533782138e-05, "loss": 0.4666, "step": 41711 }, { "epoch": 0.8846471973022841, "grad_norm": 0.45339497923851013, "learning_rate": 1.1820677417553959e-05, "loss": 0.5267, "step": 41712 }, { "epoch": 0.8846684057602172, "grad_norm": 0.3473224937915802, "learning_rate": 1.1820349495261596e-05, "loss": 0.5182, "step": 41713 }, { "epoch": 0.8846896142181502, "grad_norm": 0.37518277764320374, "learning_rate": 1.1820021570944658e-05, "loss": 0.5729, "step": 41714 }, { "epoch": 0.8847108226760833, "grad_norm": 0.3397591710090637, "learning_rate": 1.1819693644603503e-05, "loss": 0.4748, "step": 41715 }, { "epoch": 0.8847320311340162, "grad_norm": 0.37174201011657715, "learning_rate": 1.1819365716238502e-05, "loss": 0.5181, "step": 41716 }, { "epoch": 0.8847532395919493, "grad_norm": 0.3534240126609802, "learning_rate": 1.181903778585002e-05, "loss": 0.528, "step": 41717 }, { "epoch": 0.8847744480498823, "grad_norm": 0.3642823100090027, "learning_rate": 1.181870985343842e-05, "loss": 0.4906, "step": 41718 }, { "epoch": 0.8847956565078153, "grad_norm": 0.35713472962379456, "learning_rate": 1.1818381919004068e-05, "loss": 0.4741, "step": 41719 }, { "epoch": 0.8848168649657483, "grad_norm": 0.37848880887031555, "learning_rate": 1.1818053982547324e-05, "loss": 0.4414, "step": 41720 }, { "epoch": 0.8848380734236814, "grad_norm": 0.34901705384254456, "learning_rate": 1.1817726044068558e-05, "loss": 0.4845, "step": 41721 }, { "epoch": 0.8848592818816143, "grad_norm": 0.35039615631103516, "learning_rate": 1.1817398103568132e-05, "loss": 0.4818, "step": 41722 }, { "epoch": 0.8848804903395474, "grad_norm": 0.3306577801704407, "learning_rate": 1.1817070161046414e-05, "loss": 0.4119, "step": 41723 }, { "epoch": 0.8849016987974805, "grad_norm": 0.35920828580856323, "learning_rate": 1.1816742216503762e-05, "loss": 0.5193, "step": 41724 }, { "epoch": 0.8849229072554134, "grad_norm": 0.43926307559013367, "learning_rate": 1.181641426994055e-05, "loss": 0.4768, "step": 41725 }, { "epoch": 0.8849441157133465, "grad_norm": 0.358962744474411, "learning_rate": 1.1816086321357135e-05, "loss": 0.5516, "step": 41726 }, { "epoch": 0.8849653241712795, "grad_norm": 0.34243467450141907, "learning_rate": 1.1815758370753886e-05, "loss": 0.4158, "step": 41727 }, { "epoch": 0.8849865326292126, "grad_norm": 0.3290751278400421, "learning_rate": 1.1815430418131166e-05, "loss": 0.4996, "step": 41728 }, { "epoch": 0.8850077410871455, "grad_norm": 0.35805076360702515, "learning_rate": 1.1815102463489336e-05, "loss": 0.4881, "step": 41729 }, { "epoch": 0.8850289495450786, "grad_norm": 0.3917660117149353, "learning_rate": 1.181477450682877e-05, "loss": 0.4629, "step": 41730 }, { "epoch": 0.8850501580030116, "grad_norm": 0.330156534910202, "learning_rate": 1.1814446548149827e-05, "loss": 0.5645, "step": 41731 }, { "epoch": 0.8850713664609446, "grad_norm": 0.3561898469924927, "learning_rate": 1.1814118587452872e-05, "loss": 0.4828, "step": 41732 }, { "epoch": 0.8850925749188776, "grad_norm": 0.38227373361587524, "learning_rate": 1.1813790624738269e-05, "loss": 0.5265, "step": 41733 }, { "epoch": 0.8851137833768107, "grad_norm": 0.3641722798347473, "learning_rate": 1.1813462660006383e-05, "loss": 0.4731, "step": 41734 }, { "epoch": 0.8851349918347436, "grad_norm": 0.3637743592262268, "learning_rate": 1.181313469325758e-05, "loss": 0.4566, "step": 41735 }, { "epoch": 0.8851562002926767, "grad_norm": 0.3459787368774414, "learning_rate": 1.1812806724492224e-05, "loss": 0.5315, "step": 41736 }, { "epoch": 0.8851774087506098, "grad_norm": 0.3889157176017761, "learning_rate": 1.1812478753710683e-05, "loss": 0.454, "step": 41737 }, { "epoch": 0.8851986172085428, "grad_norm": 0.3666127026081085, "learning_rate": 1.1812150780913317e-05, "loss": 0.5058, "step": 41738 }, { "epoch": 0.8852198256664758, "grad_norm": 0.4192814528942108, "learning_rate": 1.1811822806100495e-05, "loss": 0.4995, "step": 41739 }, { "epoch": 0.8852410341244088, "grad_norm": 0.3694194555282593, "learning_rate": 1.1811494829272574e-05, "loss": 0.4048, "step": 41740 }, { "epoch": 0.8852622425823419, "grad_norm": 0.36488115787506104, "learning_rate": 1.1811166850429927e-05, "loss": 0.452, "step": 41741 }, { "epoch": 0.8852834510402748, "grad_norm": 0.3725482225418091, "learning_rate": 1.1810838869572919e-05, "loss": 0.4479, "step": 41742 }, { "epoch": 0.8853046594982079, "grad_norm": 0.37998858094215393, "learning_rate": 1.1810510886701906e-05, "loss": 0.5108, "step": 41743 }, { "epoch": 0.8853258679561409, "grad_norm": 0.38263407349586487, "learning_rate": 1.1810182901817263e-05, "loss": 0.4296, "step": 41744 }, { "epoch": 0.885347076414074, "grad_norm": 0.34597253799438477, "learning_rate": 1.180985491491935e-05, "loss": 0.5511, "step": 41745 }, { "epoch": 0.8853682848720069, "grad_norm": 0.35244637727737427, "learning_rate": 1.180952692600853e-05, "loss": 0.5089, "step": 41746 }, { "epoch": 0.88538949332994, "grad_norm": 0.3343837857246399, "learning_rate": 1.1809198935085171e-05, "loss": 0.4824, "step": 41747 }, { "epoch": 0.885410701787873, "grad_norm": 0.35108423233032227, "learning_rate": 1.1808870942149636e-05, "loss": 0.4983, "step": 41748 }, { "epoch": 0.885431910245806, "grad_norm": 0.3724019229412079, "learning_rate": 1.180854294720229e-05, "loss": 0.5448, "step": 41749 }, { "epoch": 0.8854531187037391, "grad_norm": 0.3718861937522888, "learning_rate": 1.18082149502435e-05, "loss": 0.5607, "step": 41750 }, { "epoch": 0.8854743271616721, "grad_norm": 0.43616822361946106, "learning_rate": 1.1807886951273629e-05, "loss": 0.5193, "step": 41751 }, { "epoch": 0.8854955356196051, "grad_norm": 0.4863547384738922, "learning_rate": 1.1807558950293042e-05, "loss": 0.4516, "step": 41752 }, { "epoch": 0.8855167440775381, "grad_norm": 0.37130990624427795, "learning_rate": 1.1807230947302102e-05, "loss": 0.5204, "step": 41753 }, { "epoch": 0.8855379525354712, "grad_norm": 0.36101317405700684, "learning_rate": 1.1806902942301175e-05, "loss": 0.5294, "step": 41754 }, { "epoch": 0.8855591609934041, "grad_norm": 0.39172980189323425, "learning_rate": 1.1806574935290626e-05, "loss": 0.5058, "step": 41755 }, { "epoch": 0.8855803694513372, "grad_norm": 0.3243868947029114, "learning_rate": 1.1806246926270824e-05, "loss": 0.4018, "step": 41756 }, { "epoch": 0.8856015779092702, "grad_norm": 0.37949925661087036, "learning_rate": 1.1805918915242124e-05, "loss": 0.4989, "step": 41757 }, { "epoch": 0.8856227863672033, "grad_norm": 0.3336053788661957, "learning_rate": 1.1805590902204903e-05, "loss": 0.4345, "step": 41758 }, { "epoch": 0.8856439948251362, "grad_norm": 0.44558054208755493, "learning_rate": 1.1805262887159517e-05, "loss": 0.5248, "step": 41759 }, { "epoch": 0.8856652032830693, "grad_norm": 0.331128865480423, "learning_rate": 1.180493487010633e-05, "loss": 0.4474, "step": 41760 }, { "epoch": 0.8856864117410023, "grad_norm": 0.3322674632072449, "learning_rate": 1.1804606851045712e-05, "loss": 0.4022, "step": 41761 }, { "epoch": 0.8857076201989353, "grad_norm": 0.3375226557254791, "learning_rate": 1.1804278829978028e-05, "loss": 0.5126, "step": 41762 }, { "epoch": 0.8857288286568683, "grad_norm": 0.3856370747089386, "learning_rate": 1.1803950806903639e-05, "loss": 0.4646, "step": 41763 }, { "epoch": 0.8857500371148014, "grad_norm": 0.38318580389022827, "learning_rate": 1.1803622781822911e-05, "loss": 0.5551, "step": 41764 }, { "epoch": 0.8857712455727345, "grad_norm": 0.35547706484794617, "learning_rate": 1.1803294754736213e-05, "loss": 0.54, "step": 41765 }, { "epoch": 0.8857924540306674, "grad_norm": 0.3776388168334961, "learning_rate": 1.1802966725643902e-05, "loss": 0.4702, "step": 41766 }, { "epoch": 0.8858136624886005, "grad_norm": 0.421281099319458, "learning_rate": 1.180263869454635e-05, "loss": 0.4113, "step": 41767 }, { "epoch": 0.8858348709465335, "grad_norm": 0.3564282953739166, "learning_rate": 1.1802310661443918e-05, "loss": 0.4424, "step": 41768 }, { "epoch": 0.8858560794044665, "grad_norm": 0.36812329292297363, "learning_rate": 1.180198262633697e-05, "loss": 0.4457, "step": 41769 }, { "epoch": 0.8858772878623995, "grad_norm": 0.3371904194355011, "learning_rate": 1.1801654589225874e-05, "loss": 0.4389, "step": 41770 }, { "epoch": 0.8858984963203326, "grad_norm": 0.37105482816696167, "learning_rate": 1.1801326550110993e-05, "loss": 0.4631, "step": 41771 }, { "epoch": 0.8859197047782655, "grad_norm": 0.3559030294418335, "learning_rate": 1.1800998508992693e-05, "loss": 0.4663, "step": 41772 }, { "epoch": 0.8859409132361986, "grad_norm": 0.3308289647102356, "learning_rate": 1.1800670465871338e-05, "loss": 0.4164, "step": 41773 }, { "epoch": 0.8859621216941316, "grad_norm": 0.42500656843185425, "learning_rate": 1.1800342420747292e-05, "loss": 0.4409, "step": 41774 }, { "epoch": 0.8859833301520647, "grad_norm": 0.33307579159736633, "learning_rate": 1.1800014373620923e-05, "loss": 0.4417, "step": 41775 }, { "epoch": 0.8860045386099976, "grad_norm": 0.33322203159332275, "learning_rate": 1.1799686324492593e-05, "loss": 0.4039, "step": 41776 }, { "epoch": 0.8860257470679307, "grad_norm": 0.334576278924942, "learning_rate": 1.1799358273362665e-05, "loss": 0.4994, "step": 41777 }, { "epoch": 0.8860469555258638, "grad_norm": 0.3446783125400543, "learning_rate": 1.179903022023151e-05, "loss": 0.4885, "step": 41778 }, { "epoch": 0.8860681639837967, "grad_norm": 0.41109994053840637, "learning_rate": 1.1798702165099488e-05, "loss": 0.4794, "step": 41779 }, { "epoch": 0.8860893724417298, "grad_norm": 0.32927727699279785, "learning_rate": 1.179837410796696e-05, "loss": 0.4728, "step": 41780 }, { "epoch": 0.8861105808996628, "grad_norm": 0.32190167903900146, "learning_rate": 1.1798046048834303e-05, "loss": 0.4463, "step": 41781 }, { "epoch": 0.8861317893575958, "grad_norm": 0.3718491196632385, "learning_rate": 1.1797717987701873e-05, "loss": 0.5745, "step": 41782 }, { "epoch": 0.8861529978155288, "grad_norm": 0.34121668338775635, "learning_rate": 1.1797389924570035e-05, "loss": 0.5936, "step": 41783 }, { "epoch": 0.8861742062734619, "grad_norm": 0.37144196033477783, "learning_rate": 1.1797061859439157e-05, "loss": 0.4185, "step": 41784 }, { "epoch": 0.8861954147313948, "grad_norm": 0.3852526545524597, "learning_rate": 1.17967337923096e-05, "loss": 0.4883, "step": 41785 }, { "epoch": 0.8862166231893279, "grad_norm": 0.3342113792896271, "learning_rate": 1.1796405723181733e-05, "loss": 0.3761, "step": 41786 }, { "epoch": 0.8862378316472609, "grad_norm": 0.3742597997188568, "learning_rate": 1.179607765205592e-05, "loss": 0.5158, "step": 41787 }, { "epoch": 0.886259040105194, "grad_norm": 0.3360316753387451, "learning_rate": 1.1795749578932522e-05, "loss": 0.4907, "step": 41788 }, { "epoch": 0.8862802485631269, "grad_norm": 0.4175112843513489, "learning_rate": 1.179542150381191e-05, "loss": 0.5672, "step": 41789 }, { "epoch": 0.88630145702106, "grad_norm": 0.41164037585258484, "learning_rate": 1.1795093426694445e-05, "loss": 0.5082, "step": 41790 }, { "epoch": 0.8863226654789931, "grad_norm": 0.343014657497406, "learning_rate": 1.1794765347580492e-05, "loss": 0.5293, "step": 41791 }, { "epoch": 0.886343873936926, "grad_norm": 0.31831952929496765, "learning_rate": 1.179443726647042e-05, "loss": 0.3969, "step": 41792 }, { "epoch": 0.8863650823948591, "grad_norm": 0.35372188687324524, "learning_rate": 1.1794109183364585e-05, "loss": 0.5044, "step": 41793 }, { "epoch": 0.8863862908527921, "grad_norm": 0.3591240644454956, "learning_rate": 1.1793781098263359e-05, "loss": 0.5446, "step": 41794 }, { "epoch": 0.8864074993107252, "grad_norm": 0.3299942910671234, "learning_rate": 1.179345301116711e-05, "loss": 0.4677, "step": 41795 }, { "epoch": 0.8864287077686581, "grad_norm": 0.3448595404624939, "learning_rate": 1.1793124922076193e-05, "loss": 0.5626, "step": 41796 }, { "epoch": 0.8864499162265912, "grad_norm": 0.3695525527000427, "learning_rate": 1.179279683099098e-05, "loss": 0.4553, "step": 41797 }, { "epoch": 0.8864711246845242, "grad_norm": 0.5081596970558167, "learning_rate": 1.1792468737911832e-05, "loss": 0.4899, "step": 41798 }, { "epoch": 0.8864923331424572, "grad_norm": 0.3622775077819824, "learning_rate": 1.1792140642839119e-05, "loss": 0.4601, "step": 41799 }, { "epoch": 0.8865135416003902, "grad_norm": 0.33098989725112915, "learning_rate": 1.1791812545773198e-05, "loss": 0.4493, "step": 41800 }, { "epoch": 0.8865347500583233, "grad_norm": 0.3702089190483093, "learning_rate": 1.1791484446714443e-05, "loss": 0.441, "step": 41801 }, { "epoch": 0.8865559585162562, "grad_norm": 0.3935619294643402, "learning_rate": 1.1791156345663213e-05, "loss": 0.5353, "step": 41802 }, { "epoch": 0.8865771669741893, "grad_norm": 0.41330647468566895, "learning_rate": 1.1790828242619876e-05, "loss": 0.46, "step": 41803 }, { "epoch": 0.8865983754321223, "grad_norm": 0.334575355052948, "learning_rate": 1.1790500137584795e-05, "loss": 0.4495, "step": 41804 }, { "epoch": 0.8866195838900554, "grad_norm": 0.374256432056427, "learning_rate": 1.1790172030558333e-05, "loss": 0.419, "step": 41805 }, { "epoch": 0.8866407923479884, "grad_norm": 0.3670244514942169, "learning_rate": 1.1789843921540858e-05, "loss": 0.4651, "step": 41806 }, { "epoch": 0.8866620008059214, "grad_norm": 0.3807706832885742, "learning_rate": 1.1789515810532735e-05, "loss": 0.5638, "step": 41807 }, { "epoch": 0.8866832092638545, "grad_norm": 0.337246835231781, "learning_rate": 1.1789187697534326e-05, "loss": 0.4957, "step": 41808 }, { "epoch": 0.8867044177217874, "grad_norm": 0.3732263743877411, "learning_rate": 1.1788859582546003e-05, "loss": 0.509, "step": 41809 }, { "epoch": 0.8867256261797205, "grad_norm": 0.3974446952342987, "learning_rate": 1.1788531465568123e-05, "loss": 0.4363, "step": 41810 }, { "epoch": 0.8867468346376535, "grad_norm": 0.4389169216156006, "learning_rate": 1.178820334660105e-05, "loss": 0.6036, "step": 41811 }, { "epoch": 0.8867680430955865, "grad_norm": 0.3846619427204132, "learning_rate": 1.1787875225645155e-05, "loss": 0.477, "step": 41812 }, { "epoch": 0.8867892515535195, "grad_norm": 0.5726940035820007, "learning_rate": 1.1787547102700806e-05, "loss": 0.4715, "step": 41813 }, { "epoch": 0.8868104600114526, "grad_norm": 0.3736995756626129, "learning_rate": 1.1787218977768355e-05, "loss": 0.486, "step": 41814 }, { "epoch": 0.8868316684693855, "grad_norm": 0.4103323817253113, "learning_rate": 1.178689085084818e-05, "loss": 0.4861, "step": 41815 }, { "epoch": 0.8868528769273186, "grad_norm": 0.38765040040016174, "learning_rate": 1.1786562721940637e-05, "loss": 0.4931, "step": 41816 }, { "epoch": 0.8868740853852516, "grad_norm": 0.4340893030166626, "learning_rate": 1.17862345910461e-05, "loss": 0.559, "step": 41817 }, { "epoch": 0.8868952938431847, "grad_norm": 0.3564397990703583, "learning_rate": 1.1785906458164922e-05, "loss": 0.5614, "step": 41818 }, { "epoch": 0.8869165023011177, "grad_norm": 0.3489168584346771, "learning_rate": 1.1785578323297478e-05, "loss": 0.5196, "step": 41819 }, { "epoch": 0.8869377107590507, "grad_norm": 0.3718474805355072, "learning_rate": 1.1785250186444129e-05, "loss": 0.4765, "step": 41820 }, { "epoch": 0.8869589192169838, "grad_norm": 0.50126713514328, "learning_rate": 1.1784922047605239e-05, "loss": 0.4951, "step": 41821 }, { "epoch": 0.8869801276749167, "grad_norm": 0.4350423812866211, "learning_rate": 1.1784593906781172e-05, "loss": 0.4978, "step": 41822 }, { "epoch": 0.8870013361328498, "grad_norm": 0.39671674370765686, "learning_rate": 1.17842657639723e-05, "loss": 0.4833, "step": 41823 }, { "epoch": 0.8870225445907828, "grad_norm": 0.32663217186927795, "learning_rate": 1.1783937619178982e-05, "loss": 0.4405, "step": 41824 }, { "epoch": 0.8870437530487159, "grad_norm": 0.3396346867084503, "learning_rate": 1.178360947240158e-05, "loss": 0.5146, "step": 41825 }, { "epoch": 0.8870649615066488, "grad_norm": 0.3417295813560486, "learning_rate": 1.1783281323640468e-05, "loss": 0.4583, "step": 41826 }, { "epoch": 0.8870861699645819, "grad_norm": 0.3642410933971405, "learning_rate": 1.1782953172896003e-05, "loss": 0.4449, "step": 41827 }, { "epoch": 0.8871073784225149, "grad_norm": 0.33995169401168823, "learning_rate": 1.1782625020168554e-05, "loss": 0.4567, "step": 41828 }, { "epoch": 0.8871285868804479, "grad_norm": 0.41526272892951965, "learning_rate": 1.1782296865458487e-05, "loss": 0.4857, "step": 41829 }, { "epoch": 0.8871497953383809, "grad_norm": 0.34378787875175476, "learning_rate": 1.1781968708766161e-05, "loss": 0.43, "step": 41830 }, { "epoch": 0.887171003796314, "grad_norm": 0.3976086676120758, "learning_rate": 1.1781640550091946e-05, "loss": 0.5604, "step": 41831 }, { "epoch": 0.887192212254247, "grad_norm": 0.3526262640953064, "learning_rate": 1.1781312389436208e-05, "loss": 0.4677, "step": 41832 }, { "epoch": 0.88721342071218, "grad_norm": 0.3437381088733673, "learning_rate": 1.178098422679931e-05, "loss": 0.3986, "step": 41833 }, { "epoch": 0.8872346291701131, "grad_norm": 0.5613137483596802, "learning_rate": 1.1780656062181612e-05, "loss": 0.5253, "step": 41834 }, { "epoch": 0.887255837628046, "grad_norm": 0.36866843700408936, "learning_rate": 1.178032789558349e-05, "loss": 0.455, "step": 41835 }, { "epoch": 0.8872770460859791, "grad_norm": 0.7692210674285889, "learning_rate": 1.1779999727005298e-05, "loss": 0.5791, "step": 41836 }, { "epoch": 0.8872982545439121, "grad_norm": 0.3303929269313812, "learning_rate": 1.177967155644741e-05, "loss": 0.45, "step": 41837 }, { "epoch": 0.8873194630018452, "grad_norm": 0.37920889258384705, "learning_rate": 1.1779343383910182e-05, "loss": 0.4748, "step": 41838 }, { "epoch": 0.8873406714597781, "grad_norm": 0.3364720344543457, "learning_rate": 1.1779015209393984e-05, "loss": 0.4718, "step": 41839 }, { "epoch": 0.8873618799177112, "grad_norm": 0.35210737586021423, "learning_rate": 1.1778687032899186e-05, "loss": 0.4646, "step": 41840 }, { "epoch": 0.8873830883756442, "grad_norm": 0.4272044897079468, "learning_rate": 1.1778358854426145e-05, "loss": 0.4943, "step": 41841 }, { "epoch": 0.8874042968335772, "grad_norm": 0.341812402009964, "learning_rate": 1.1778030673975227e-05, "loss": 0.4868, "step": 41842 }, { "epoch": 0.8874255052915102, "grad_norm": 0.33191579580307007, "learning_rate": 1.17777024915468e-05, "loss": 0.4486, "step": 41843 }, { "epoch": 0.8874467137494433, "grad_norm": 0.37435004115104675, "learning_rate": 1.1777374307141227e-05, "loss": 0.4675, "step": 41844 }, { "epoch": 0.8874679222073762, "grad_norm": 0.32620587944984436, "learning_rate": 1.1777046120758875e-05, "loss": 0.4494, "step": 41845 }, { "epoch": 0.8874891306653093, "grad_norm": 1.2450346946716309, "learning_rate": 1.1776717932400108e-05, "loss": 0.5488, "step": 41846 }, { "epoch": 0.8875103391232424, "grad_norm": 0.46126535534858704, "learning_rate": 1.177638974206529e-05, "loss": 0.4932, "step": 41847 }, { "epoch": 0.8875315475811754, "grad_norm": 0.35333144664764404, "learning_rate": 1.1776061549754787e-05, "loss": 0.4258, "step": 41848 }, { "epoch": 0.8875527560391084, "grad_norm": 0.36620113253593445, "learning_rate": 1.1775733355468964e-05, "loss": 0.4584, "step": 41849 }, { "epoch": 0.8875739644970414, "grad_norm": 0.3735975921154022, "learning_rate": 1.1775405159208183e-05, "loss": 0.522, "step": 41850 }, { "epoch": 0.8875951729549745, "grad_norm": 0.41851145029067993, "learning_rate": 1.1775076960972815e-05, "loss": 0.4776, "step": 41851 }, { "epoch": 0.8876163814129074, "grad_norm": 0.40506577491760254, "learning_rate": 1.1774748760763222e-05, "loss": 0.4807, "step": 41852 }, { "epoch": 0.8876375898708405, "grad_norm": 0.34579265117645264, "learning_rate": 1.1774420558579767e-05, "loss": 0.4329, "step": 41853 }, { "epoch": 0.8876587983287735, "grad_norm": 0.43726372718811035, "learning_rate": 1.1774092354422816e-05, "loss": 0.5402, "step": 41854 }, { "epoch": 0.8876800067867066, "grad_norm": 0.3744891881942749, "learning_rate": 1.1773764148292739e-05, "loss": 0.5105, "step": 41855 }, { "epoch": 0.8877012152446395, "grad_norm": 0.36224111914634705, "learning_rate": 1.1773435940189894e-05, "loss": 0.5575, "step": 41856 }, { "epoch": 0.8877224237025726, "grad_norm": 0.6578789353370667, "learning_rate": 1.1773107730114648e-05, "loss": 0.5441, "step": 41857 }, { "epoch": 0.8877436321605056, "grad_norm": 0.3624040186405182, "learning_rate": 1.177277951806737e-05, "loss": 0.4889, "step": 41858 }, { "epoch": 0.8877648406184386, "grad_norm": 0.386770635843277, "learning_rate": 1.177245130404842e-05, "loss": 0.565, "step": 41859 }, { "epoch": 0.8877860490763717, "grad_norm": 0.3483441472053528, "learning_rate": 1.1772123088058165e-05, "loss": 0.5004, "step": 41860 }, { "epoch": 0.8878072575343047, "grad_norm": 0.39415740966796875, "learning_rate": 1.1771794870096973e-05, "loss": 0.5632, "step": 41861 }, { "epoch": 0.8878284659922377, "grad_norm": 0.363126665353775, "learning_rate": 1.1771466650165202e-05, "loss": 0.4787, "step": 41862 }, { "epoch": 0.8878496744501707, "grad_norm": 0.36115890741348267, "learning_rate": 1.1771138428263223e-05, "loss": 0.4697, "step": 41863 }, { "epoch": 0.8878708829081038, "grad_norm": 0.3546879291534424, "learning_rate": 1.1770810204391398e-05, "loss": 0.5275, "step": 41864 }, { "epoch": 0.8878920913660368, "grad_norm": 0.3440936803817749, "learning_rate": 1.1770481978550094e-05, "loss": 0.4333, "step": 41865 }, { "epoch": 0.8879132998239698, "grad_norm": 0.3433734178543091, "learning_rate": 1.1770153750739676e-05, "loss": 0.4879, "step": 41866 }, { "epoch": 0.8879345082819028, "grad_norm": 0.37310728430747986, "learning_rate": 1.176982552096051e-05, "loss": 0.4869, "step": 41867 }, { "epoch": 0.8879557167398359, "grad_norm": 0.39248567819595337, "learning_rate": 1.1769497289212957e-05, "loss": 0.5052, "step": 41868 }, { "epoch": 0.8879769251977688, "grad_norm": 0.4045683443546295, "learning_rate": 1.1769169055497386e-05, "loss": 0.561, "step": 41869 }, { "epoch": 0.8879981336557019, "grad_norm": 0.3490675687789917, "learning_rate": 1.1768840819814156e-05, "loss": 0.454, "step": 41870 }, { "epoch": 0.8880193421136349, "grad_norm": 0.38980400562286377, "learning_rate": 1.1768512582163643e-05, "loss": 0.5138, "step": 41871 }, { "epoch": 0.8880405505715679, "grad_norm": 0.5266249179840088, "learning_rate": 1.1768184342546202e-05, "loss": 0.5502, "step": 41872 }, { "epoch": 0.888061759029501, "grad_norm": 0.34905561804771423, "learning_rate": 1.1767856100962203e-05, "loss": 0.4561, "step": 41873 }, { "epoch": 0.888082967487434, "grad_norm": 0.399809867143631, "learning_rate": 1.176752785741201e-05, "loss": 0.5247, "step": 41874 }, { "epoch": 0.8881041759453671, "grad_norm": 0.34671467542648315, "learning_rate": 1.1767199611895988e-05, "loss": 0.5302, "step": 41875 }, { "epoch": 0.8881253844033, "grad_norm": 0.35456621646881104, "learning_rate": 1.1766871364414499e-05, "loss": 0.4702, "step": 41876 }, { "epoch": 0.8881465928612331, "grad_norm": 0.3851318061351776, "learning_rate": 1.1766543114967915e-05, "loss": 0.4101, "step": 41877 }, { "epoch": 0.8881678013191661, "grad_norm": 0.3344780504703522, "learning_rate": 1.1766214863556596e-05, "loss": 0.4275, "step": 41878 }, { "epoch": 0.8881890097770991, "grad_norm": 0.3900288939476013, "learning_rate": 1.1765886610180906e-05, "loss": 0.5074, "step": 41879 }, { "epoch": 0.8882102182350321, "grad_norm": 0.3890340030193329, "learning_rate": 1.1765558354841214e-05, "loss": 0.52, "step": 41880 }, { "epoch": 0.8882314266929652, "grad_norm": 0.35168755054473877, "learning_rate": 1.1765230097537883e-05, "loss": 0.5019, "step": 41881 }, { "epoch": 0.8882526351508981, "grad_norm": 0.37001481652259827, "learning_rate": 1.1764901838271277e-05, "loss": 0.5786, "step": 41882 }, { "epoch": 0.8882738436088312, "grad_norm": 0.3807416260242462, "learning_rate": 1.1764573577041765e-05, "loss": 0.5556, "step": 41883 }, { "epoch": 0.8882950520667642, "grad_norm": 0.39497727155685425, "learning_rate": 1.1764245313849707e-05, "loss": 0.4433, "step": 41884 }, { "epoch": 0.8883162605246973, "grad_norm": 0.42326781153678894, "learning_rate": 1.1763917048695474e-05, "loss": 0.4939, "step": 41885 }, { "epoch": 0.8883374689826302, "grad_norm": 0.38566336035728455, "learning_rate": 1.1763588781579427e-05, "loss": 0.5673, "step": 41886 }, { "epoch": 0.8883586774405633, "grad_norm": 0.3699636459350586, "learning_rate": 1.176326051250193e-05, "loss": 0.3979, "step": 41887 }, { "epoch": 0.8883798858984964, "grad_norm": 0.42236071825027466, "learning_rate": 1.1762932241463352e-05, "loss": 0.5295, "step": 41888 }, { "epoch": 0.8884010943564293, "grad_norm": 0.32479342818260193, "learning_rate": 1.1762603968464053e-05, "loss": 0.4456, "step": 41889 }, { "epoch": 0.8884223028143624, "grad_norm": 0.35860389471054077, "learning_rate": 1.1762275693504403e-05, "loss": 0.5093, "step": 41890 }, { "epoch": 0.8884435112722954, "grad_norm": 0.36254429817199707, "learning_rate": 1.1761947416584766e-05, "loss": 0.4818, "step": 41891 }, { "epoch": 0.8884647197302284, "grad_norm": 0.3351541757583618, "learning_rate": 1.1761619137705507e-05, "loss": 0.5345, "step": 41892 }, { "epoch": 0.8884859281881614, "grad_norm": 0.41964247822761536, "learning_rate": 1.176129085686699e-05, "loss": 0.4514, "step": 41893 }, { "epoch": 0.8885071366460945, "grad_norm": 0.3986645042896271, "learning_rate": 1.176096257406958e-05, "loss": 0.5333, "step": 41894 }, { "epoch": 0.8885283451040275, "grad_norm": 0.3448745012283325, "learning_rate": 1.1760634289313641e-05, "loss": 0.4457, "step": 41895 }, { "epoch": 0.8885495535619605, "grad_norm": 0.48881796002388, "learning_rate": 1.176030600259954e-05, "loss": 0.4903, "step": 41896 }, { "epoch": 0.8885707620198935, "grad_norm": 0.3907061815261841, "learning_rate": 1.1759977713927647e-05, "loss": 0.5229, "step": 41897 }, { "epoch": 0.8885919704778266, "grad_norm": 0.3936101794242859, "learning_rate": 1.1759649423298316e-05, "loss": 0.4646, "step": 41898 }, { "epoch": 0.8886131789357595, "grad_norm": 0.33774226903915405, "learning_rate": 1.175932113071192e-05, "loss": 0.437, "step": 41899 }, { "epoch": 0.8886343873936926, "grad_norm": 0.38697776198387146, "learning_rate": 1.1758992836168825e-05, "loss": 0.4785, "step": 41900 }, { "epoch": 0.8886555958516257, "grad_norm": 0.3862922787666321, "learning_rate": 1.1758664539669391e-05, "loss": 0.5109, "step": 41901 }, { "epoch": 0.8886768043095586, "grad_norm": 0.3676796853542328, "learning_rate": 1.1758336241213986e-05, "loss": 0.484, "step": 41902 }, { "epoch": 0.8886980127674917, "grad_norm": 0.4032975435256958, "learning_rate": 1.1758007940802975e-05, "loss": 0.5719, "step": 41903 }, { "epoch": 0.8887192212254247, "grad_norm": 0.38868045806884766, "learning_rate": 1.175767963843672e-05, "loss": 0.5308, "step": 41904 }, { "epoch": 0.8887404296833578, "grad_norm": 0.4757976233959198, "learning_rate": 1.1757351334115595e-05, "loss": 0.5287, "step": 41905 }, { "epoch": 0.8887616381412907, "grad_norm": 0.3499402105808258, "learning_rate": 1.1757023027839956e-05, "loss": 0.5361, "step": 41906 }, { "epoch": 0.8887828465992238, "grad_norm": 0.35721656680107117, "learning_rate": 1.1756694719610172e-05, "loss": 0.3985, "step": 41907 }, { "epoch": 0.8888040550571568, "grad_norm": 0.33586710691452026, "learning_rate": 1.1756366409426602e-05, "loss": 0.509, "step": 41908 }, { "epoch": 0.8888252635150898, "grad_norm": 0.3956626355648041, "learning_rate": 1.1756038097289624e-05, "loss": 0.44, "step": 41909 }, { "epoch": 0.8888464719730228, "grad_norm": 0.7059634923934937, "learning_rate": 1.1755709783199592e-05, "loss": 0.6281, "step": 41910 }, { "epoch": 0.8888676804309559, "grad_norm": 0.36086127161979675, "learning_rate": 1.1755381467156874e-05, "loss": 0.417, "step": 41911 }, { "epoch": 0.8888888888888888, "grad_norm": 0.435252845287323, "learning_rate": 1.1755053149161836e-05, "loss": 0.4775, "step": 41912 }, { "epoch": 0.8889100973468219, "grad_norm": 0.4188435971736908, "learning_rate": 1.1754724829214847e-05, "loss": 0.4214, "step": 41913 }, { "epoch": 0.888931305804755, "grad_norm": 0.41961100697517395, "learning_rate": 1.1754396507316268e-05, "loss": 0.5139, "step": 41914 }, { "epoch": 0.888952514262688, "grad_norm": 0.42931267619132996, "learning_rate": 1.1754068183466458e-05, "loss": 0.4001, "step": 41915 }, { "epoch": 0.888973722720621, "grad_norm": 0.38019490242004395, "learning_rate": 1.1753739857665793e-05, "loss": 0.5272, "step": 41916 }, { "epoch": 0.888994931178554, "grad_norm": 0.9245540499687195, "learning_rate": 1.1753411529914635e-05, "loss": 0.423, "step": 41917 }, { "epoch": 0.8890161396364871, "grad_norm": 0.38123711943626404, "learning_rate": 1.1753083200213347e-05, "loss": 0.5104, "step": 41918 }, { "epoch": 0.88903734809442, "grad_norm": 0.3652222454547882, "learning_rate": 1.1752754868562295e-05, "loss": 0.5145, "step": 41919 }, { "epoch": 0.8890585565523531, "grad_norm": 0.3222496509552002, "learning_rate": 1.1752426534961844e-05, "loss": 0.4906, "step": 41920 }, { "epoch": 0.8890797650102861, "grad_norm": 0.3425765037536621, "learning_rate": 1.1752098199412356e-05, "loss": 0.455, "step": 41921 }, { "epoch": 0.8891009734682191, "grad_norm": 0.39071252942085266, "learning_rate": 1.1751769861914207e-05, "loss": 0.5486, "step": 41922 }, { "epoch": 0.8891221819261521, "grad_norm": 0.33816802501678467, "learning_rate": 1.175144152246775e-05, "loss": 0.4177, "step": 41923 }, { "epoch": 0.8891433903840852, "grad_norm": 0.3932870626449585, "learning_rate": 1.1751113181073354e-05, "loss": 0.5458, "step": 41924 }, { "epoch": 0.8891645988420182, "grad_norm": 0.34961339831352234, "learning_rate": 1.1750784837731388e-05, "loss": 0.452, "step": 41925 }, { "epoch": 0.8891858072999512, "grad_norm": 0.3481319546699524, "learning_rate": 1.1750456492442213e-05, "loss": 0.4428, "step": 41926 }, { "epoch": 0.8892070157578842, "grad_norm": 0.4217260181903839, "learning_rate": 1.1750128145206195e-05, "loss": 0.5094, "step": 41927 }, { "epoch": 0.8892282242158173, "grad_norm": 0.38950246572494507, "learning_rate": 1.17497997960237e-05, "loss": 0.5614, "step": 41928 }, { "epoch": 0.8892494326737503, "grad_norm": 0.3969789743423462, "learning_rate": 1.1749471444895093e-05, "loss": 0.4571, "step": 41929 }, { "epoch": 0.8892706411316833, "grad_norm": 0.6474772095680237, "learning_rate": 1.174914309182074e-05, "loss": 0.4427, "step": 41930 }, { "epoch": 0.8892918495896164, "grad_norm": 0.34317880868911743, "learning_rate": 1.1748814736801006e-05, "loss": 0.4553, "step": 41931 }, { "epoch": 0.8893130580475493, "grad_norm": 0.3812211751937866, "learning_rate": 1.1748486379836253e-05, "loss": 0.4471, "step": 41932 }, { "epoch": 0.8893342665054824, "grad_norm": 0.350033700466156, "learning_rate": 1.1748158020926849e-05, "loss": 0.4392, "step": 41933 }, { "epoch": 0.8893554749634154, "grad_norm": 0.5910564064979553, "learning_rate": 1.174782966007316e-05, "loss": 0.5258, "step": 41934 }, { "epoch": 0.8893766834213485, "grad_norm": 0.3616776168346405, "learning_rate": 1.1747501297275547e-05, "loss": 0.415, "step": 41935 }, { "epoch": 0.8893978918792814, "grad_norm": 0.42894822359085083, "learning_rate": 1.1747172932534383e-05, "loss": 0.4417, "step": 41936 }, { "epoch": 0.8894191003372145, "grad_norm": 0.40351808071136475, "learning_rate": 1.1746844565850026e-05, "loss": 0.6141, "step": 41937 }, { "epoch": 0.8894403087951475, "grad_norm": 0.3901095986366272, "learning_rate": 1.1746516197222844e-05, "loss": 0.4566, "step": 41938 }, { "epoch": 0.8894615172530805, "grad_norm": 0.39572176337242126, "learning_rate": 1.1746187826653203e-05, "loss": 0.5327, "step": 41939 }, { "epoch": 0.8894827257110135, "grad_norm": 0.3361514210700989, "learning_rate": 1.1745859454141463e-05, "loss": 0.4107, "step": 41940 }, { "epoch": 0.8895039341689466, "grad_norm": 0.3748834431171417, "learning_rate": 1.1745531079687995e-05, "loss": 0.441, "step": 41941 }, { "epoch": 0.8895251426268797, "grad_norm": 0.34653159976005554, "learning_rate": 1.1745202703293167e-05, "loss": 0.4468, "step": 41942 }, { "epoch": 0.8895463510848126, "grad_norm": 0.3428086042404175, "learning_rate": 1.1744874324957333e-05, "loss": 0.4516, "step": 41943 }, { "epoch": 0.8895675595427457, "grad_norm": 0.3621242046356201, "learning_rate": 1.174454594468087e-05, "loss": 0.5402, "step": 41944 }, { "epoch": 0.8895887680006787, "grad_norm": 0.3313019871711731, "learning_rate": 1.1744217562464138e-05, "loss": 0.5516, "step": 41945 }, { "epoch": 0.8896099764586117, "grad_norm": 1.0036066770553589, "learning_rate": 1.1743889178307499e-05, "loss": 0.4406, "step": 41946 }, { "epoch": 0.8896311849165447, "grad_norm": 0.38332098722457886, "learning_rate": 1.1743560792211321e-05, "loss": 0.5616, "step": 41947 }, { "epoch": 0.8896523933744778, "grad_norm": 0.48064321279525757, "learning_rate": 1.1743232404175974e-05, "loss": 0.5023, "step": 41948 }, { "epoch": 0.8896736018324107, "grad_norm": 0.3960089087486267, "learning_rate": 1.1742904014201817e-05, "loss": 0.4819, "step": 41949 }, { "epoch": 0.8896948102903438, "grad_norm": 0.35356786847114563, "learning_rate": 1.1742575622289218e-05, "loss": 0.386, "step": 41950 }, { "epoch": 0.8897160187482768, "grad_norm": 0.2966562509536743, "learning_rate": 1.1742247228438541e-05, "loss": 0.3579, "step": 41951 }, { "epoch": 0.8897372272062098, "grad_norm": 0.39103931188583374, "learning_rate": 1.1741918832650153e-05, "loss": 0.5519, "step": 41952 }, { "epoch": 0.8897584356641428, "grad_norm": 0.3611607849597931, "learning_rate": 1.1741590434924416e-05, "loss": 0.4716, "step": 41953 }, { "epoch": 0.8897796441220759, "grad_norm": 0.3187408447265625, "learning_rate": 1.1741262035261699e-05, "loss": 0.5011, "step": 41954 }, { "epoch": 0.889800852580009, "grad_norm": 0.5411563515663147, "learning_rate": 1.1740933633662361e-05, "loss": 0.4609, "step": 41955 }, { "epoch": 0.8898220610379419, "grad_norm": 0.39321157336235046, "learning_rate": 1.1740605230126778e-05, "loss": 0.4926, "step": 41956 }, { "epoch": 0.889843269495875, "grad_norm": 0.39619868993759155, "learning_rate": 1.1740276824655304e-05, "loss": 0.5378, "step": 41957 }, { "epoch": 0.889864477953808, "grad_norm": 0.3803856372833252, "learning_rate": 1.1739948417248312e-05, "loss": 0.5039, "step": 41958 }, { "epoch": 0.889885686411741, "grad_norm": 0.3525582253932953, "learning_rate": 1.1739620007906163e-05, "loss": 0.4755, "step": 41959 }, { "epoch": 0.889906894869674, "grad_norm": 0.42534932494163513, "learning_rate": 1.1739291596629227e-05, "loss": 0.5532, "step": 41960 }, { "epoch": 0.8899281033276071, "grad_norm": 0.3822525441646576, "learning_rate": 1.1738963183417862e-05, "loss": 0.439, "step": 41961 }, { "epoch": 0.88994931178554, "grad_norm": 0.36598339676856995, "learning_rate": 1.1738634768272438e-05, "loss": 0.5338, "step": 41962 }, { "epoch": 0.8899705202434731, "grad_norm": 0.32098639011383057, "learning_rate": 1.173830635119332e-05, "loss": 0.4914, "step": 41963 }, { "epoch": 0.8899917287014061, "grad_norm": 0.47138315439224243, "learning_rate": 1.1737977932180876e-05, "loss": 0.4925, "step": 41964 }, { "epoch": 0.8900129371593392, "grad_norm": 0.35787415504455566, "learning_rate": 1.1737649511235465e-05, "loss": 0.5375, "step": 41965 }, { "epoch": 0.8900341456172721, "grad_norm": 0.3651159107685089, "learning_rate": 1.1737321088357454e-05, "loss": 0.5226, "step": 41966 }, { "epoch": 0.8900553540752052, "grad_norm": 0.45086827874183655, "learning_rate": 1.1736992663547211e-05, "loss": 0.455, "step": 41967 }, { "epoch": 0.8900765625331382, "grad_norm": 0.35495108366012573, "learning_rate": 1.1736664236805102e-05, "loss": 0.4664, "step": 41968 }, { "epoch": 0.8900977709910712, "grad_norm": 0.3673759698867798, "learning_rate": 1.1736335808131488e-05, "loss": 0.5105, "step": 41969 }, { "epoch": 0.8901189794490043, "grad_norm": 0.37084001302719116, "learning_rate": 1.1736007377526737e-05, "loss": 0.5336, "step": 41970 }, { "epoch": 0.8901401879069373, "grad_norm": 0.3675266206264496, "learning_rate": 1.1735678944991212e-05, "loss": 0.4251, "step": 41971 }, { "epoch": 0.8901613963648703, "grad_norm": 0.34118568897247314, "learning_rate": 1.1735350510525278e-05, "loss": 0.4618, "step": 41972 }, { "epoch": 0.8901826048228033, "grad_norm": 0.3806723654270172, "learning_rate": 1.1735022074129307e-05, "loss": 0.4289, "step": 41973 }, { "epoch": 0.8902038132807364, "grad_norm": 0.48111453652381897, "learning_rate": 1.173469363580366e-05, "loss": 0.469, "step": 41974 }, { "epoch": 0.8902250217386694, "grad_norm": 0.4146239757537842, "learning_rate": 1.1734365195548698e-05, "loss": 0.4619, "step": 41975 }, { "epoch": 0.8902462301966024, "grad_norm": 0.3773472309112549, "learning_rate": 1.1734036753364793e-05, "loss": 0.5358, "step": 41976 }, { "epoch": 0.8902674386545354, "grad_norm": 0.3513139486312866, "learning_rate": 1.1733708309252304e-05, "loss": 0.5051, "step": 41977 }, { "epoch": 0.8902886471124685, "grad_norm": 0.3765067458152771, "learning_rate": 1.1733379863211605e-05, "loss": 0.6235, "step": 41978 }, { "epoch": 0.8903098555704014, "grad_norm": 0.34653016924858093, "learning_rate": 1.1733051415243051e-05, "loss": 0.4793, "step": 41979 }, { "epoch": 0.8903310640283345, "grad_norm": 0.34056487679481506, "learning_rate": 1.1732722965347015e-05, "loss": 0.5267, "step": 41980 }, { "epoch": 0.8903522724862675, "grad_norm": 0.38889384269714355, "learning_rate": 1.173239451352386e-05, "loss": 0.5388, "step": 41981 }, { "epoch": 0.8903734809442005, "grad_norm": 0.3552883565425873, "learning_rate": 1.173206605977395e-05, "loss": 0.49, "step": 41982 }, { "epoch": 0.8903946894021336, "grad_norm": 0.4204878807067871, "learning_rate": 1.1731737604097648e-05, "loss": 0.4816, "step": 41983 }, { "epoch": 0.8904158978600666, "grad_norm": 0.2979162633419037, "learning_rate": 1.1731409146495328e-05, "loss": 0.3667, "step": 41984 }, { "epoch": 0.8904371063179997, "grad_norm": 0.4171784818172455, "learning_rate": 1.1731080686967344e-05, "loss": 0.5433, "step": 41985 }, { "epoch": 0.8904583147759326, "grad_norm": 0.3239855170249939, "learning_rate": 1.173075222551407e-05, "loss": 0.4088, "step": 41986 }, { "epoch": 0.8904795232338657, "grad_norm": 0.3552405536174774, "learning_rate": 1.173042376213587e-05, "loss": 0.4421, "step": 41987 }, { "epoch": 0.8905007316917987, "grad_norm": 0.4727812111377716, "learning_rate": 1.1730095296833106e-05, "loss": 0.5322, "step": 41988 }, { "epoch": 0.8905219401497317, "grad_norm": 0.32395192980766296, "learning_rate": 1.1729766829606147e-05, "loss": 0.4974, "step": 41989 }, { "epoch": 0.8905431486076647, "grad_norm": 0.33744847774505615, "learning_rate": 1.1729438360455354e-05, "loss": 0.5107, "step": 41990 }, { "epoch": 0.8905643570655978, "grad_norm": 0.40188056230545044, "learning_rate": 1.1729109889381094e-05, "loss": 0.518, "step": 41991 }, { "epoch": 0.8905855655235307, "grad_norm": 0.35131311416625977, "learning_rate": 1.1728781416383734e-05, "loss": 0.4306, "step": 41992 }, { "epoch": 0.8906067739814638, "grad_norm": 0.39917078614234924, "learning_rate": 1.172845294146364e-05, "loss": 0.4195, "step": 41993 }, { "epoch": 0.8906279824393968, "grad_norm": 0.3846561908721924, "learning_rate": 1.1728124464621171e-05, "loss": 0.4654, "step": 41994 }, { "epoch": 0.8906491908973299, "grad_norm": 0.40309491753578186, "learning_rate": 1.1727795985856702e-05, "loss": 0.4444, "step": 41995 }, { "epoch": 0.8906703993552629, "grad_norm": 0.397260844707489, "learning_rate": 1.1727467505170593e-05, "loss": 0.5172, "step": 41996 }, { "epoch": 0.8906916078131959, "grad_norm": 0.4156267046928406, "learning_rate": 1.1727139022563208e-05, "loss": 0.553, "step": 41997 }, { "epoch": 0.890712816271129, "grad_norm": 0.33140942454338074, "learning_rate": 1.1726810538034911e-05, "loss": 0.4595, "step": 41998 }, { "epoch": 0.8907340247290619, "grad_norm": 0.3869777023792267, "learning_rate": 1.1726482051586076e-05, "loss": 0.4813, "step": 41999 }, { "epoch": 0.890755233186995, "grad_norm": 0.4023858606815338, "learning_rate": 1.172615356321706e-05, "loss": 0.494, "step": 42000 }, { "epoch": 0.890776441644928, "grad_norm": 0.4067506492137909, "learning_rate": 1.1725825072928231e-05, "loss": 0.5086, "step": 42001 }, { "epoch": 0.890797650102861, "grad_norm": 0.3728897273540497, "learning_rate": 1.1725496580719957e-05, "loss": 0.4422, "step": 42002 }, { "epoch": 0.890818858560794, "grad_norm": 0.36578404903411865, "learning_rate": 1.1725168086592595e-05, "loss": 0.4392, "step": 42003 }, { "epoch": 0.8908400670187271, "grad_norm": 0.35867226123809814, "learning_rate": 1.1724839590546519e-05, "loss": 0.4194, "step": 42004 }, { "epoch": 0.89086127547666, "grad_norm": 0.38990846276283264, "learning_rate": 1.1724511092582093e-05, "loss": 0.5343, "step": 42005 }, { "epoch": 0.8908824839345931, "grad_norm": 0.4054560661315918, "learning_rate": 1.1724182592699678e-05, "loss": 0.4445, "step": 42006 }, { "epoch": 0.8909036923925261, "grad_norm": 0.5295083522796631, "learning_rate": 1.1723854090899644e-05, "loss": 0.572, "step": 42007 }, { "epoch": 0.8909249008504592, "grad_norm": 0.34799787402153015, "learning_rate": 1.1723525587182354e-05, "loss": 0.467, "step": 42008 }, { "epoch": 0.8909461093083922, "grad_norm": 0.36634278297424316, "learning_rate": 1.1723197081548175e-05, "loss": 0.4801, "step": 42009 }, { "epoch": 0.8909673177663252, "grad_norm": 0.40109673142433167, "learning_rate": 1.172286857399747e-05, "loss": 0.4938, "step": 42010 }, { "epoch": 0.8909885262242583, "grad_norm": 0.3704588711261749, "learning_rate": 1.1722540064530603e-05, "loss": 0.5179, "step": 42011 }, { "epoch": 0.8910097346821912, "grad_norm": 0.3606296181678772, "learning_rate": 1.1722211553147947e-05, "loss": 0.4608, "step": 42012 }, { "epoch": 0.8910309431401243, "grad_norm": 0.34750527143478394, "learning_rate": 1.172188303984986e-05, "loss": 0.4675, "step": 42013 }, { "epoch": 0.8910521515980573, "grad_norm": 0.3980146050453186, "learning_rate": 1.1721554524636707e-05, "loss": 0.5005, "step": 42014 }, { "epoch": 0.8910733600559904, "grad_norm": 0.34026193618774414, "learning_rate": 1.172122600750886e-05, "loss": 0.4814, "step": 42015 }, { "epoch": 0.8910945685139233, "grad_norm": 0.44434884190559387, "learning_rate": 1.172089748846668e-05, "loss": 0.5105, "step": 42016 }, { "epoch": 0.8911157769718564, "grad_norm": 0.425330251455307, "learning_rate": 1.1720568967510528e-05, "loss": 0.441, "step": 42017 }, { "epoch": 0.8911369854297894, "grad_norm": 0.34823712706565857, "learning_rate": 1.1720240444640779e-05, "loss": 0.4878, "step": 42018 }, { "epoch": 0.8911581938877224, "grad_norm": 0.393298864364624, "learning_rate": 1.1719911919857794e-05, "loss": 0.4983, "step": 42019 }, { "epoch": 0.8911794023456554, "grad_norm": 0.34252601861953735, "learning_rate": 1.1719583393161936e-05, "loss": 0.4919, "step": 42020 }, { "epoch": 0.8912006108035885, "grad_norm": 0.4018155038356781, "learning_rate": 1.171925486455357e-05, "loss": 0.558, "step": 42021 }, { "epoch": 0.8912218192615214, "grad_norm": 0.37833473086357117, "learning_rate": 1.1718926334033067e-05, "loss": 0.5271, "step": 42022 }, { "epoch": 0.8912430277194545, "grad_norm": 0.3744402229785919, "learning_rate": 1.1718597801600788e-05, "loss": 0.4723, "step": 42023 }, { "epoch": 0.8912642361773876, "grad_norm": 0.3355333209037781, "learning_rate": 1.1718269267257099e-05, "loss": 0.4346, "step": 42024 }, { "epoch": 0.8912854446353206, "grad_norm": 0.35526835918426514, "learning_rate": 1.1717940731002365e-05, "loss": 0.4619, "step": 42025 }, { "epoch": 0.8913066530932536, "grad_norm": 0.35559049248695374, "learning_rate": 1.1717612192836956e-05, "loss": 0.5724, "step": 42026 }, { "epoch": 0.8913278615511866, "grad_norm": 0.3503463566303253, "learning_rate": 1.171728365276123e-05, "loss": 0.4285, "step": 42027 }, { "epoch": 0.8913490700091197, "grad_norm": 0.35951098799705505, "learning_rate": 1.1716955110775558e-05, "loss": 0.5297, "step": 42028 }, { "epoch": 0.8913702784670526, "grad_norm": 0.364522248506546, "learning_rate": 1.1716626566880303e-05, "loss": 0.4533, "step": 42029 }, { "epoch": 0.8913914869249857, "grad_norm": 0.3823828399181366, "learning_rate": 1.171629802107583e-05, "loss": 0.5168, "step": 42030 }, { "epoch": 0.8914126953829187, "grad_norm": 0.3629035949707031, "learning_rate": 1.1715969473362505e-05, "loss": 0.4792, "step": 42031 }, { "epoch": 0.8914339038408517, "grad_norm": 0.41229677200317383, "learning_rate": 1.1715640923740696e-05, "loss": 0.4916, "step": 42032 }, { "epoch": 0.8914551122987847, "grad_norm": 0.42975282669067383, "learning_rate": 1.1715312372210765e-05, "loss": 0.4473, "step": 42033 }, { "epoch": 0.8914763207567178, "grad_norm": 0.375774621963501, "learning_rate": 1.1714983818773078e-05, "loss": 0.4628, "step": 42034 }, { "epoch": 0.8914975292146508, "grad_norm": 0.39714741706848145, "learning_rate": 1.1714655263428e-05, "loss": 0.5695, "step": 42035 }, { "epoch": 0.8915187376725838, "grad_norm": 0.46634596586227417, "learning_rate": 1.1714326706175898e-05, "loss": 0.5601, "step": 42036 }, { "epoch": 0.8915399461305169, "grad_norm": 0.381009966135025, "learning_rate": 1.1713998147017136e-05, "loss": 0.4199, "step": 42037 }, { "epoch": 0.8915611545884499, "grad_norm": 0.3692432940006256, "learning_rate": 1.1713669585952083e-05, "loss": 0.5327, "step": 42038 }, { "epoch": 0.8915823630463829, "grad_norm": 0.5437320470809937, "learning_rate": 1.1713341022981097e-05, "loss": 0.473, "step": 42039 }, { "epoch": 0.8916035715043159, "grad_norm": 0.3903282880783081, "learning_rate": 1.1713012458104553e-05, "loss": 0.505, "step": 42040 }, { "epoch": 0.891624779962249, "grad_norm": 0.39202240109443665, "learning_rate": 1.1712683891322812e-05, "loss": 0.5336, "step": 42041 }, { "epoch": 0.891645988420182, "grad_norm": 0.36679607629776, "learning_rate": 1.1712355322636234e-05, "loss": 0.441, "step": 42042 }, { "epoch": 0.891667196878115, "grad_norm": 0.35974565148353577, "learning_rate": 1.1712026752045189e-05, "loss": 0.4886, "step": 42043 }, { "epoch": 0.891688405336048, "grad_norm": 0.36472275853157043, "learning_rate": 1.1711698179550046e-05, "loss": 0.4865, "step": 42044 }, { "epoch": 0.8917096137939811, "grad_norm": 0.3946114182472229, "learning_rate": 1.1711369605151166e-05, "loss": 0.5028, "step": 42045 }, { "epoch": 0.891730822251914, "grad_norm": 0.3614508807659149, "learning_rate": 1.1711041028848915e-05, "loss": 0.5335, "step": 42046 }, { "epoch": 0.8917520307098471, "grad_norm": 0.38029393553733826, "learning_rate": 1.1710712450643662e-05, "loss": 0.4588, "step": 42047 }, { "epoch": 0.8917732391677801, "grad_norm": 0.3788645565509796, "learning_rate": 1.1710383870535766e-05, "loss": 0.5171, "step": 42048 }, { "epoch": 0.8917944476257131, "grad_norm": 0.3597104847431183, "learning_rate": 1.1710055288525597e-05, "loss": 0.4224, "step": 42049 }, { "epoch": 0.8918156560836462, "grad_norm": 0.37344369292259216, "learning_rate": 1.170972670461352e-05, "loss": 0.5253, "step": 42050 }, { "epoch": 0.8918368645415792, "grad_norm": 0.35371074080467224, "learning_rate": 1.1709398118799897e-05, "loss": 0.4889, "step": 42051 }, { "epoch": 0.8918580729995123, "grad_norm": 0.39347317814826965, "learning_rate": 1.17090695310851e-05, "loss": 0.5424, "step": 42052 }, { "epoch": 0.8918792814574452, "grad_norm": 0.39264506101608276, "learning_rate": 1.170874094146949e-05, "loss": 0.5757, "step": 42053 }, { "epoch": 0.8919004899153783, "grad_norm": 0.3772898018360138, "learning_rate": 1.1708412349953433e-05, "loss": 0.4156, "step": 42054 }, { "epoch": 0.8919216983733113, "grad_norm": 0.48139485716819763, "learning_rate": 1.1708083756537295e-05, "loss": 0.4981, "step": 42055 }, { "epoch": 0.8919429068312443, "grad_norm": 0.3394928276538849, "learning_rate": 1.170775516122144e-05, "loss": 0.4884, "step": 42056 }, { "epoch": 0.8919641152891773, "grad_norm": 0.38189825415611267, "learning_rate": 1.1707426564006233e-05, "loss": 0.5049, "step": 42057 }, { "epoch": 0.8919853237471104, "grad_norm": 0.35693663358688354, "learning_rate": 1.1707097964892044e-05, "loss": 0.4977, "step": 42058 }, { "epoch": 0.8920065322050433, "grad_norm": 0.3366324007511139, "learning_rate": 1.1706769363879233e-05, "loss": 0.4544, "step": 42059 }, { "epoch": 0.8920277406629764, "grad_norm": 0.439171701669693, "learning_rate": 1.170644076096817e-05, "loss": 0.4, "step": 42060 }, { "epoch": 0.8920489491209094, "grad_norm": 0.37871256470680237, "learning_rate": 1.170611215615922e-05, "loss": 0.5575, "step": 42061 }, { "epoch": 0.8920701575788424, "grad_norm": 0.39321210980415344, "learning_rate": 1.170578354945274e-05, "loss": 0.4874, "step": 42062 }, { "epoch": 0.8920913660367754, "grad_norm": 0.39254310727119446, "learning_rate": 1.1705454940849108e-05, "loss": 0.5131, "step": 42063 }, { "epoch": 0.8921125744947085, "grad_norm": 0.34294193983078003, "learning_rate": 1.1705126330348684e-05, "loss": 0.4032, "step": 42064 }, { "epoch": 0.8921337829526416, "grad_norm": 0.40275898575782776, "learning_rate": 1.170479771795183e-05, "loss": 0.5158, "step": 42065 }, { "epoch": 0.8921549914105745, "grad_norm": 0.3602803945541382, "learning_rate": 1.1704469103658918e-05, "loss": 0.4684, "step": 42066 }, { "epoch": 0.8921761998685076, "grad_norm": 0.38230305910110474, "learning_rate": 1.1704140487470309e-05, "loss": 0.5309, "step": 42067 }, { "epoch": 0.8921974083264406, "grad_norm": 0.4169921875, "learning_rate": 1.1703811869386368e-05, "loss": 0.4939, "step": 42068 }, { "epoch": 0.8922186167843736, "grad_norm": 0.3777455985546112, "learning_rate": 1.1703483249407466e-05, "loss": 0.4457, "step": 42069 }, { "epoch": 0.8922398252423066, "grad_norm": 0.3971027135848999, "learning_rate": 1.1703154627533962e-05, "loss": 0.523, "step": 42070 }, { "epoch": 0.8922610337002397, "grad_norm": 0.4057709276676178, "learning_rate": 1.1702826003766226e-05, "loss": 0.4685, "step": 42071 }, { "epoch": 0.8922822421581726, "grad_norm": 0.38898059725761414, "learning_rate": 1.170249737810462e-05, "loss": 0.4876, "step": 42072 }, { "epoch": 0.8923034506161057, "grad_norm": 0.38731512427330017, "learning_rate": 1.1702168750549512e-05, "loss": 0.5349, "step": 42073 }, { "epoch": 0.8923246590740387, "grad_norm": 0.3588363528251648, "learning_rate": 1.1701840121101266e-05, "loss": 0.4728, "step": 42074 }, { "epoch": 0.8923458675319718, "grad_norm": 0.4005036950111389, "learning_rate": 1.1701511489760249e-05, "loss": 0.4864, "step": 42075 }, { "epoch": 0.8923670759899047, "grad_norm": 0.4127812385559082, "learning_rate": 1.1701182856526823e-05, "loss": 0.5328, "step": 42076 }, { "epoch": 0.8923882844478378, "grad_norm": 0.36150670051574707, "learning_rate": 1.170085422140136e-05, "loss": 0.5046, "step": 42077 }, { "epoch": 0.8924094929057709, "grad_norm": 0.3854284882545471, "learning_rate": 1.1700525584384221e-05, "loss": 0.5579, "step": 42078 }, { "epoch": 0.8924307013637038, "grad_norm": 0.5745787024497986, "learning_rate": 1.1700196945475773e-05, "loss": 0.5034, "step": 42079 }, { "epoch": 0.8924519098216369, "grad_norm": 0.36877891421318054, "learning_rate": 1.1699868304676378e-05, "loss": 0.4905, "step": 42080 }, { "epoch": 0.8924731182795699, "grad_norm": 0.381642609834671, "learning_rate": 1.1699539661986405e-05, "loss": 0.4903, "step": 42081 }, { "epoch": 0.892494326737503, "grad_norm": 0.4008314907550812, "learning_rate": 1.1699211017406219e-05, "loss": 0.4792, "step": 42082 }, { "epoch": 0.8925155351954359, "grad_norm": 0.4116881787776947, "learning_rate": 1.1698882370936187e-05, "loss": 0.4474, "step": 42083 }, { "epoch": 0.892536743653369, "grad_norm": 0.3903009593486786, "learning_rate": 1.1698553722576672e-05, "loss": 0.6053, "step": 42084 }, { "epoch": 0.892557952111302, "grad_norm": 0.33608973026275635, "learning_rate": 1.1698225072328038e-05, "loss": 0.3997, "step": 42085 }, { "epoch": 0.892579160569235, "grad_norm": 0.47265738248825073, "learning_rate": 1.1697896420190656e-05, "loss": 0.4839, "step": 42086 }, { "epoch": 0.892600369027168, "grad_norm": 0.3374958038330078, "learning_rate": 1.1697567766164885e-05, "loss": 0.5425, "step": 42087 }, { "epoch": 0.8926215774851011, "grad_norm": 0.3363637626171112, "learning_rate": 1.1697239110251095e-05, "loss": 0.5056, "step": 42088 }, { "epoch": 0.892642785943034, "grad_norm": 0.36830297112464905, "learning_rate": 1.1696910452449653e-05, "loss": 0.4267, "step": 42089 }, { "epoch": 0.8926639944009671, "grad_norm": 0.4032357931137085, "learning_rate": 1.169658179276092e-05, "loss": 0.5485, "step": 42090 }, { "epoch": 0.8926852028589002, "grad_norm": 0.4674213230609894, "learning_rate": 1.1696253131185263e-05, "loss": 0.5224, "step": 42091 }, { "epoch": 0.8927064113168331, "grad_norm": 0.3252502381801605, "learning_rate": 1.169592446772305e-05, "loss": 0.4588, "step": 42092 }, { "epoch": 0.8927276197747662, "grad_norm": 0.44547802209854126, "learning_rate": 1.1695595802374641e-05, "loss": 0.4893, "step": 42093 }, { "epoch": 0.8927488282326992, "grad_norm": 0.3781173527240753, "learning_rate": 1.1695267135140406e-05, "loss": 0.4745, "step": 42094 }, { "epoch": 0.8927700366906323, "grad_norm": 0.5888226628303528, "learning_rate": 1.1694938466020713e-05, "loss": 0.517, "step": 42095 }, { "epoch": 0.8927912451485652, "grad_norm": 0.346941202878952, "learning_rate": 1.1694609795015922e-05, "loss": 0.4261, "step": 42096 }, { "epoch": 0.8928124536064983, "grad_norm": 0.33850225806236267, "learning_rate": 1.16942811221264e-05, "loss": 0.4718, "step": 42097 }, { "epoch": 0.8928336620644313, "grad_norm": 0.37873393297195435, "learning_rate": 1.1693952447352515e-05, "loss": 0.4767, "step": 42098 }, { "epoch": 0.8928548705223643, "grad_norm": 0.6694181561470032, "learning_rate": 1.1693623770694629e-05, "loss": 0.5862, "step": 42099 }, { "epoch": 0.8928760789802973, "grad_norm": 0.36093252897262573, "learning_rate": 1.1693295092153112e-05, "loss": 0.4838, "step": 42100 }, { "epoch": 0.8928972874382304, "grad_norm": 0.4045805335044861, "learning_rate": 1.1692966411728323e-05, "loss": 0.5455, "step": 42101 }, { "epoch": 0.8929184958961633, "grad_norm": 0.36702314019203186, "learning_rate": 1.1692637729420634e-05, "loss": 0.5109, "step": 42102 }, { "epoch": 0.8929397043540964, "grad_norm": 0.5151333212852478, "learning_rate": 1.1692309045230409e-05, "loss": 0.5278, "step": 42103 }, { "epoch": 0.8929609128120294, "grad_norm": 0.32539695501327515, "learning_rate": 1.1691980359158009e-05, "loss": 0.4964, "step": 42104 }, { "epoch": 0.8929821212699625, "grad_norm": 0.3724253177642822, "learning_rate": 1.1691651671203805e-05, "loss": 0.4354, "step": 42105 }, { "epoch": 0.8930033297278955, "grad_norm": 0.36569085717201233, "learning_rate": 1.1691322981368162e-05, "loss": 0.5004, "step": 42106 }, { "epoch": 0.8930245381858285, "grad_norm": 0.3759220242500305, "learning_rate": 1.169099428965144e-05, "loss": 0.5551, "step": 42107 }, { "epoch": 0.8930457466437616, "grad_norm": 0.3740137815475464, "learning_rate": 1.1690665596054011e-05, "loss": 0.4393, "step": 42108 }, { "epoch": 0.8930669551016945, "grad_norm": 0.3843507170677185, "learning_rate": 1.1690336900576242e-05, "loss": 0.5623, "step": 42109 }, { "epoch": 0.8930881635596276, "grad_norm": 0.40675055980682373, "learning_rate": 1.1690008203218493e-05, "loss": 0.5316, "step": 42110 }, { "epoch": 0.8931093720175606, "grad_norm": 0.37999236583709717, "learning_rate": 1.1689679503981131e-05, "loss": 0.5512, "step": 42111 }, { "epoch": 0.8931305804754937, "grad_norm": 0.3642652928829193, "learning_rate": 1.1689350802864523e-05, "loss": 0.4921, "step": 42112 }, { "epoch": 0.8931517889334266, "grad_norm": 0.42139825224876404, "learning_rate": 1.168902209986903e-05, "loss": 0.4852, "step": 42113 }, { "epoch": 0.8931729973913597, "grad_norm": 0.37224605679512024, "learning_rate": 1.1688693394995026e-05, "loss": 0.4768, "step": 42114 }, { "epoch": 0.8931942058492927, "grad_norm": 0.3811781406402588, "learning_rate": 1.168836468824287e-05, "loss": 0.4135, "step": 42115 }, { "epoch": 0.8932154143072257, "grad_norm": 0.35021254420280457, "learning_rate": 1.1688035979612929e-05, "loss": 0.5549, "step": 42116 }, { "epoch": 0.8932366227651587, "grad_norm": 0.3537242114543915, "learning_rate": 1.168770726910557e-05, "loss": 0.4593, "step": 42117 }, { "epoch": 0.8932578312230918, "grad_norm": 0.41611242294311523, "learning_rate": 1.1687378556721157e-05, "loss": 0.4516, "step": 42118 }, { "epoch": 0.8932790396810248, "grad_norm": 0.3331935703754425, "learning_rate": 1.1687049842460056e-05, "loss": 0.4616, "step": 42119 }, { "epoch": 0.8933002481389578, "grad_norm": 0.33630993962287903, "learning_rate": 1.1686721126322631e-05, "loss": 0.4451, "step": 42120 }, { "epoch": 0.8933214565968909, "grad_norm": 0.3506450355052948, "learning_rate": 1.1686392408309251e-05, "loss": 0.4738, "step": 42121 }, { "epoch": 0.8933426650548238, "grad_norm": 0.3651486337184906, "learning_rate": 1.1686063688420282e-05, "loss": 0.5092, "step": 42122 }, { "epoch": 0.8933638735127569, "grad_norm": 0.33269238471984863, "learning_rate": 1.1685734966656089e-05, "loss": 0.5019, "step": 42123 }, { "epoch": 0.8933850819706899, "grad_norm": 0.4596346616744995, "learning_rate": 1.168540624301703e-05, "loss": 0.4157, "step": 42124 }, { "epoch": 0.893406290428623, "grad_norm": 0.34102869033813477, "learning_rate": 1.1685077517503483e-05, "loss": 0.4177, "step": 42125 }, { "epoch": 0.8934274988865559, "grad_norm": 0.3768901228904724, "learning_rate": 1.1684748790115802e-05, "loss": 0.4646, "step": 42126 }, { "epoch": 0.893448707344489, "grad_norm": 0.3347139358520508, "learning_rate": 1.168442006085436e-05, "loss": 0.5298, "step": 42127 }, { "epoch": 0.893469915802422, "grad_norm": 0.3529316782951355, "learning_rate": 1.1684091329719522e-05, "loss": 0.461, "step": 42128 }, { "epoch": 0.893491124260355, "grad_norm": 0.4220631718635559, "learning_rate": 1.1683762596711654e-05, "loss": 0.4496, "step": 42129 }, { "epoch": 0.893512332718288, "grad_norm": 0.3752072751522064, "learning_rate": 1.1683433861831115e-05, "loss": 0.4744, "step": 42130 }, { "epoch": 0.8935335411762211, "grad_norm": 0.4710964262485504, "learning_rate": 1.1683105125078281e-05, "loss": 0.5183, "step": 42131 }, { "epoch": 0.8935547496341542, "grad_norm": 0.3728894591331482, "learning_rate": 1.1682776386453507e-05, "loss": 0.5058, "step": 42132 }, { "epoch": 0.8935759580920871, "grad_norm": 0.344685822725296, "learning_rate": 1.1682447645957165e-05, "loss": 0.4616, "step": 42133 }, { "epoch": 0.8935971665500202, "grad_norm": 0.33757978677749634, "learning_rate": 1.168211890358962e-05, "loss": 0.3938, "step": 42134 }, { "epoch": 0.8936183750079532, "grad_norm": 0.4752342402935028, "learning_rate": 1.1681790159351236e-05, "loss": 0.4589, "step": 42135 }, { "epoch": 0.8936395834658862, "grad_norm": 0.3450157344341278, "learning_rate": 1.1681461413242384e-05, "loss": 0.4181, "step": 42136 }, { "epoch": 0.8936607919238192, "grad_norm": 0.4077949821949005, "learning_rate": 1.1681132665263422e-05, "loss": 0.5997, "step": 42137 }, { "epoch": 0.8936820003817523, "grad_norm": 0.36289024353027344, "learning_rate": 1.1680803915414718e-05, "loss": 0.5311, "step": 42138 }, { "epoch": 0.8937032088396852, "grad_norm": 0.4198257029056549, "learning_rate": 1.1680475163696636e-05, "loss": 0.3973, "step": 42139 }, { "epoch": 0.8937244172976183, "grad_norm": 0.3841937780380249, "learning_rate": 1.1680146410109549e-05, "loss": 0.4563, "step": 42140 }, { "epoch": 0.8937456257555513, "grad_norm": 0.3930743336677551, "learning_rate": 1.1679817654653816e-05, "loss": 0.54, "step": 42141 }, { "epoch": 0.8937668342134844, "grad_norm": 0.40705475211143494, "learning_rate": 1.1679488897329805e-05, "loss": 0.4952, "step": 42142 }, { "epoch": 0.8937880426714173, "grad_norm": 0.517041802406311, "learning_rate": 1.167916013813788e-05, "loss": 0.408, "step": 42143 }, { "epoch": 0.8938092511293504, "grad_norm": 0.41053470969200134, "learning_rate": 1.167883137707841e-05, "loss": 0.5734, "step": 42144 }, { "epoch": 0.8938304595872834, "grad_norm": 0.41974207758903503, "learning_rate": 1.1678502614151754e-05, "loss": 0.5589, "step": 42145 }, { "epoch": 0.8938516680452164, "grad_norm": 0.35381466150283813, "learning_rate": 1.1678173849358285e-05, "loss": 0.4415, "step": 42146 }, { "epoch": 0.8938728765031495, "grad_norm": 0.3920145630836487, "learning_rate": 1.1677845082698364e-05, "loss": 0.5082, "step": 42147 }, { "epoch": 0.8938940849610825, "grad_norm": 0.3516743779182434, "learning_rate": 1.1677516314172362e-05, "loss": 0.5047, "step": 42148 }, { "epoch": 0.8939152934190155, "grad_norm": 0.3648364245891571, "learning_rate": 1.1677187543780637e-05, "loss": 0.4786, "step": 42149 }, { "epoch": 0.8939365018769485, "grad_norm": 0.3931717574596405, "learning_rate": 1.167685877152356e-05, "loss": 0.4957, "step": 42150 }, { "epoch": 0.8939577103348816, "grad_norm": 0.36066243052482605, "learning_rate": 1.1676529997401497e-05, "loss": 0.406, "step": 42151 }, { "epoch": 0.8939789187928145, "grad_norm": 0.32354795932769775, "learning_rate": 1.1676201221414808e-05, "loss": 0.4329, "step": 42152 }, { "epoch": 0.8940001272507476, "grad_norm": 0.3910517394542694, "learning_rate": 1.1675872443563864e-05, "loss": 0.4877, "step": 42153 }, { "epoch": 0.8940213357086806, "grad_norm": 0.36929282546043396, "learning_rate": 1.1675543663849031e-05, "loss": 0.5397, "step": 42154 }, { "epoch": 0.8940425441666137, "grad_norm": 0.3623519539833069, "learning_rate": 1.1675214882270671e-05, "loss": 0.5136, "step": 42155 }, { "epoch": 0.8940637526245466, "grad_norm": 0.3722045421600342, "learning_rate": 1.1674886098829154e-05, "loss": 0.4405, "step": 42156 }, { "epoch": 0.8940849610824797, "grad_norm": 0.39426887035369873, "learning_rate": 1.1674557313524842e-05, "loss": 0.5747, "step": 42157 }, { "epoch": 0.8941061695404127, "grad_norm": 0.3860216736793518, "learning_rate": 1.1674228526358097e-05, "loss": 0.6131, "step": 42158 }, { "epoch": 0.8941273779983457, "grad_norm": 0.36634910106658936, "learning_rate": 1.1673899737329298e-05, "loss": 0.4868, "step": 42159 }, { "epoch": 0.8941485864562788, "grad_norm": 0.33137908577919006, "learning_rate": 1.1673570946438798e-05, "loss": 0.4827, "step": 42160 }, { "epoch": 0.8941697949142118, "grad_norm": 0.34879937767982483, "learning_rate": 1.1673242153686966e-05, "loss": 0.4259, "step": 42161 }, { "epoch": 0.8941910033721449, "grad_norm": 0.35894620418548584, "learning_rate": 1.1672913359074172e-05, "loss": 0.3961, "step": 42162 }, { "epoch": 0.8942122118300778, "grad_norm": 0.373368501663208, "learning_rate": 1.1672584562600773e-05, "loss": 0.44, "step": 42163 }, { "epoch": 0.8942334202880109, "grad_norm": 0.324430912733078, "learning_rate": 1.1672255764267146e-05, "loss": 0.4934, "step": 42164 }, { "epoch": 0.8942546287459439, "grad_norm": 0.33541297912597656, "learning_rate": 1.1671926964073646e-05, "loss": 0.4691, "step": 42165 }, { "epoch": 0.8942758372038769, "grad_norm": 0.46881163120269775, "learning_rate": 1.1671598162020645e-05, "loss": 0.4328, "step": 42166 }, { "epoch": 0.8942970456618099, "grad_norm": 0.446446031332016, "learning_rate": 1.1671269358108508e-05, "loss": 0.5073, "step": 42167 }, { "epoch": 0.894318254119743, "grad_norm": 0.3569362759590149, "learning_rate": 1.16709405523376e-05, "loss": 0.5116, "step": 42168 }, { "epoch": 0.8943394625776759, "grad_norm": 0.3219762444496155, "learning_rate": 1.1670611744708284e-05, "loss": 0.4628, "step": 42169 }, { "epoch": 0.894360671035609, "grad_norm": 0.45874813199043274, "learning_rate": 1.1670282935220929e-05, "loss": 0.4544, "step": 42170 }, { "epoch": 0.894381879493542, "grad_norm": 0.3711375892162323, "learning_rate": 1.16699541238759e-05, "loss": 0.4681, "step": 42171 }, { "epoch": 0.894403087951475, "grad_norm": 0.3647925853729248, "learning_rate": 1.1669625310673563e-05, "loss": 0.4705, "step": 42172 }, { "epoch": 0.8944242964094081, "grad_norm": 0.34145721793174744, "learning_rate": 1.1669296495614284e-05, "loss": 0.4904, "step": 42173 }, { "epoch": 0.8944455048673411, "grad_norm": 0.38553696870803833, "learning_rate": 1.1668967678698426e-05, "loss": 0.4736, "step": 42174 }, { "epoch": 0.8944667133252742, "grad_norm": 0.39851832389831543, "learning_rate": 1.1668638859926358e-05, "loss": 0.49, "step": 42175 }, { "epoch": 0.8944879217832071, "grad_norm": 0.40870827436447144, "learning_rate": 1.1668310039298445e-05, "loss": 0.4781, "step": 42176 }, { "epoch": 0.8945091302411402, "grad_norm": 0.42787450551986694, "learning_rate": 1.166798121681505e-05, "loss": 0.4703, "step": 42177 }, { "epoch": 0.8945303386990732, "grad_norm": 0.3557722568511963, "learning_rate": 1.166765239247654e-05, "loss": 0.5034, "step": 42178 }, { "epoch": 0.8945515471570062, "grad_norm": 0.39244168996810913, "learning_rate": 1.1667323566283284e-05, "loss": 0.4608, "step": 42179 }, { "epoch": 0.8945727556149392, "grad_norm": 0.35487428307533264, "learning_rate": 1.1666994738235647e-05, "loss": 0.4349, "step": 42180 }, { "epoch": 0.8945939640728723, "grad_norm": 0.3578453063964844, "learning_rate": 1.166666590833399e-05, "loss": 0.4379, "step": 42181 }, { "epoch": 0.8946151725308052, "grad_norm": 0.3543645143508911, "learning_rate": 1.1666337076578682e-05, "loss": 0.4753, "step": 42182 }, { "epoch": 0.8946363809887383, "grad_norm": 0.3623172342777252, "learning_rate": 1.1666008242970088e-05, "loss": 0.4838, "step": 42183 }, { "epoch": 0.8946575894466713, "grad_norm": 0.9582987427711487, "learning_rate": 1.1665679407508574e-05, "loss": 0.5557, "step": 42184 }, { "epoch": 0.8946787979046044, "grad_norm": 0.3408261239528656, "learning_rate": 1.1665350570194506e-05, "loss": 0.4002, "step": 42185 }, { "epoch": 0.8947000063625373, "grad_norm": 0.33895233273506165, "learning_rate": 1.166502173102825e-05, "loss": 0.523, "step": 42186 }, { "epoch": 0.8947212148204704, "grad_norm": 0.33707478642463684, "learning_rate": 1.1664692890010173e-05, "loss": 0.4761, "step": 42187 }, { "epoch": 0.8947424232784035, "grad_norm": 0.33621007204055786, "learning_rate": 1.1664364047140637e-05, "loss": 0.4362, "step": 42188 }, { "epoch": 0.8947636317363364, "grad_norm": 0.39151516556739807, "learning_rate": 1.166403520242001e-05, "loss": 0.4587, "step": 42189 }, { "epoch": 0.8947848401942695, "grad_norm": 0.44432902336120605, "learning_rate": 1.1663706355848656e-05, "loss": 0.4966, "step": 42190 }, { "epoch": 0.8948060486522025, "grad_norm": 0.4006226658821106, "learning_rate": 1.1663377507426945e-05, "loss": 0.5313, "step": 42191 }, { "epoch": 0.8948272571101356, "grad_norm": 0.34869900345802307, "learning_rate": 1.1663048657155238e-05, "loss": 0.4681, "step": 42192 }, { "epoch": 0.8948484655680685, "grad_norm": 0.3163158595561981, "learning_rate": 1.1662719805033904e-05, "loss": 0.3931, "step": 42193 }, { "epoch": 0.8948696740260016, "grad_norm": 0.4041016399860382, "learning_rate": 1.166239095106331e-05, "loss": 0.5021, "step": 42194 }, { "epoch": 0.8948908824839346, "grad_norm": 0.39385929703712463, "learning_rate": 1.1662062095243814e-05, "loss": 0.4667, "step": 42195 }, { "epoch": 0.8949120909418676, "grad_norm": 0.34648609161376953, "learning_rate": 1.166173323757579e-05, "loss": 0.5127, "step": 42196 }, { "epoch": 0.8949332993998006, "grad_norm": 0.3693019449710846, "learning_rate": 1.16614043780596e-05, "loss": 0.4541, "step": 42197 }, { "epoch": 0.8949545078577337, "grad_norm": 0.5829606056213379, "learning_rate": 1.1661075516695608e-05, "loss": 0.4917, "step": 42198 }, { "epoch": 0.8949757163156666, "grad_norm": 0.3941478729248047, "learning_rate": 1.1660746653484187e-05, "loss": 0.4593, "step": 42199 }, { "epoch": 0.8949969247735997, "grad_norm": 0.3593232333660126, "learning_rate": 1.1660417788425693e-05, "loss": 0.4052, "step": 42200 }, { "epoch": 0.8950181332315328, "grad_norm": 0.34801843762397766, "learning_rate": 1.1660088921520502e-05, "loss": 0.4561, "step": 42201 }, { "epoch": 0.8950393416894658, "grad_norm": 0.4127294421195984, "learning_rate": 1.1659760052768974e-05, "loss": 0.5004, "step": 42202 }, { "epoch": 0.8950605501473988, "grad_norm": 0.38848334550857544, "learning_rate": 1.1659431182171468e-05, "loss": 0.4402, "step": 42203 }, { "epoch": 0.8950817586053318, "grad_norm": 0.4433363080024719, "learning_rate": 1.1659102309728363e-05, "loss": 0.4054, "step": 42204 }, { "epoch": 0.8951029670632649, "grad_norm": 0.4156414866447449, "learning_rate": 1.1658773435440021e-05, "loss": 0.4434, "step": 42205 }, { "epoch": 0.8951241755211978, "grad_norm": 0.44754758477211, "learning_rate": 1.1658444559306801e-05, "loss": 0.5008, "step": 42206 }, { "epoch": 0.8951453839791309, "grad_norm": 0.3486812114715576, "learning_rate": 1.1658115681329076e-05, "loss": 0.4823, "step": 42207 }, { "epoch": 0.8951665924370639, "grad_norm": 0.32282108068466187, "learning_rate": 1.1657786801507209e-05, "loss": 0.4402, "step": 42208 }, { "epoch": 0.8951878008949969, "grad_norm": 0.5059934854507446, "learning_rate": 1.1657457919841564e-05, "loss": 0.4884, "step": 42209 }, { "epoch": 0.8952090093529299, "grad_norm": 0.3618720471858978, "learning_rate": 1.165712903633251e-05, "loss": 0.431, "step": 42210 }, { "epoch": 0.895230217810863, "grad_norm": 0.3855517506599426, "learning_rate": 1.1656800150980413e-05, "loss": 0.4936, "step": 42211 }, { "epoch": 0.895251426268796, "grad_norm": 0.35756897926330566, "learning_rate": 1.1656471263785633e-05, "loss": 0.4717, "step": 42212 }, { "epoch": 0.895272634726729, "grad_norm": 0.38195937871932983, "learning_rate": 1.1656142374748545e-05, "loss": 0.4226, "step": 42213 }, { "epoch": 0.8952938431846621, "grad_norm": 0.3571249842643738, "learning_rate": 1.1655813483869504e-05, "loss": 0.369, "step": 42214 }, { "epoch": 0.8953150516425951, "grad_norm": 0.3541509211063385, "learning_rate": 1.1655484591148887e-05, "loss": 0.4892, "step": 42215 }, { "epoch": 0.8953362601005281, "grad_norm": 0.4965684115886688, "learning_rate": 1.1655155696587053e-05, "loss": 0.4639, "step": 42216 }, { "epoch": 0.8953574685584611, "grad_norm": 0.36355528235435486, "learning_rate": 1.1654826800184366e-05, "loss": 0.4951, "step": 42217 }, { "epoch": 0.8953786770163942, "grad_norm": 0.35931938886642456, "learning_rate": 1.16544979019412e-05, "loss": 0.4702, "step": 42218 }, { "epoch": 0.8953998854743271, "grad_norm": 0.3324304521083832, "learning_rate": 1.1654169001857914e-05, "loss": 0.5138, "step": 42219 }, { "epoch": 0.8954210939322602, "grad_norm": 0.35577887296676636, "learning_rate": 1.1653840099934875e-05, "loss": 0.5298, "step": 42220 }, { "epoch": 0.8954423023901932, "grad_norm": 0.4266263544559479, "learning_rate": 1.165351119617245e-05, "loss": 0.5287, "step": 42221 }, { "epoch": 0.8954635108481263, "grad_norm": 0.3523392677307129, "learning_rate": 1.1653182290571003e-05, "loss": 0.5247, "step": 42222 }, { "epoch": 0.8954847193060592, "grad_norm": 0.4002355635166168, "learning_rate": 1.16528533831309e-05, "loss": 0.5956, "step": 42223 }, { "epoch": 0.8955059277639923, "grad_norm": 0.3317880630493164, "learning_rate": 1.165252447385251e-05, "loss": 0.4102, "step": 42224 }, { "epoch": 0.8955271362219253, "grad_norm": 0.4149206578731537, "learning_rate": 1.16521955627362e-05, "loss": 0.5201, "step": 42225 }, { "epoch": 0.8955483446798583, "grad_norm": 0.3915291428565979, "learning_rate": 1.1651866649782326e-05, "loss": 0.434, "step": 42226 }, { "epoch": 0.8955695531377913, "grad_norm": 0.34535688161849976, "learning_rate": 1.1651537734991266e-05, "loss": 0.4935, "step": 42227 }, { "epoch": 0.8955907615957244, "grad_norm": 0.37990203499794006, "learning_rate": 1.1651208818363375e-05, "loss": 0.4705, "step": 42228 }, { "epoch": 0.8956119700536574, "grad_norm": 0.3650285005569458, "learning_rate": 1.1650879899899024e-05, "loss": 0.4985, "step": 42229 }, { "epoch": 0.8956331785115904, "grad_norm": 0.3839055001735687, "learning_rate": 1.1650550979598582e-05, "loss": 0.4684, "step": 42230 }, { "epoch": 0.8956543869695235, "grad_norm": 0.33494409918785095, "learning_rate": 1.1650222057462411e-05, "loss": 0.4524, "step": 42231 }, { "epoch": 0.8956755954274565, "grad_norm": 0.36789771914482117, "learning_rate": 1.1649893133490877e-05, "loss": 0.503, "step": 42232 }, { "epoch": 0.8956968038853895, "grad_norm": 0.3553776144981384, "learning_rate": 1.1649564207684345e-05, "loss": 0.4539, "step": 42233 }, { "epoch": 0.8957180123433225, "grad_norm": 0.33863165974617004, "learning_rate": 1.1649235280043183e-05, "loss": 0.4016, "step": 42234 }, { "epoch": 0.8957392208012556, "grad_norm": 0.3840480446815491, "learning_rate": 1.1648906350567754e-05, "loss": 0.4616, "step": 42235 }, { "epoch": 0.8957604292591885, "grad_norm": 0.3465246558189392, "learning_rate": 1.1648577419258428e-05, "loss": 0.5501, "step": 42236 }, { "epoch": 0.8957816377171216, "grad_norm": 0.36921656131744385, "learning_rate": 1.1648248486115567e-05, "loss": 0.4439, "step": 42237 }, { "epoch": 0.8958028461750546, "grad_norm": 0.36126044392585754, "learning_rate": 1.1647919551139542e-05, "loss": 0.5063, "step": 42238 }, { "epoch": 0.8958240546329876, "grad_norm": 0.452228844165802, "learning_rate": 1.1647590614330712e-05, "loss": 0.5522, "step": 42239 }, { "epoch": 0.8958452630909206, "grad_norm": 0.34037965536117554, "learning_rate": 1.1647261675689445e-05, "loss": 0.5223, "step": 42240 }, { "epoch": 0.8958664715488537, "grad_norm": 0.3731398582458496, "learning_rate": 1.164693273521611e-05, "loss": 0.4988, "step": 42241 }, { "epoch": 0.8958876800067868, "grad_norm": 0.37170806527137756, "learning_rate": 1.164660379291107e-05, "loss": 0.4731, "step": 42242 }, { "epoch": 0.8959088884647197, "grad_norm": 0.38146844506263733, "learning_rate": 1.164627484877469e-05, "loss": 0.4788, "step": 42243 }, { "epoch": 0.8959300969226528, "grad_norm": 0.3434622883796692, "learning_rate": 1.164594590280734e-05, "loss": 0.4428, "step": 42244 }, { "epoch": 0.8959513053805858, "grad_norm": 0.3549596965312958, "learning_rate": 1.1645616955009381e-05, "loss": 0.435, "step": 42245 }, { "epoch": 0.8959725138385188, "grad_norm": 0.3349224030971527, "learning_rate": 1.1645288005381184e-05, "loss": 0.4573, "step": 42246 }, { "epoch": 0.8959937222964518, "grad_norm": 0.33148661255836487, "learning_rate": 1.164495905392311e-05, "loss": 0.444, "step": 42247 }, { "epoch": 0.8960149307543849, "grad_norm": 0.33716610074043274, "learning_rate": 1.1644630100635528e-05, "loss": 0.4373, "step": 42248 }, { "epoch": 0.8960361392123178, "grad_norm": 0.3849544823169708, "learning_rate": 1.16443011455188e-05, "loss": 0.4958, "step": 42249 }, { "epoch": 0.8960573476702509, "grad_norm": 0.35137540102005005, "learning_rate": 1.1643972188573297e-05, "loss": 0.4481, "step": 42250 }, { "epoch": 0.8960785561281839, "grad_norm": 0.3769557476043701, "learning_rate": 1.1643643229799382e-05, "loss": 0.4709, "step": 42251 }, { "epoch": 0.896099764586117, "grad_norm": 0.35794609785079956, "learning_rate": 1.164331426919742e-05, "loss": 0.4584, "step": 42252 }, { "epoch": 0.8961209730440499, "grad_norm": 0.42195072770118713, "learning_rate": 1.1642985306767782e-05, "loss": 0.5062, "step": 42253 }, { "epoch": 0.896142181501983, "grad_norm": 0.35736551880836487, "learning_rate": 1.1642656342510826e-05, "loss": 0.4041, "step": 42254 }, { "epoch": 0.8961633899599161, "grad_norm": 0.3453383445739746, "learning_rate": 1.1642327376426923e-05, "loss": 0.4195, "step": 42255 }, { "epoch": 0.896184598417849, "grad_norm": 0.3784119188785553, "learning_rate": 1.164199840851644e-05, "loss": 0.5256, "step": 42256 }, { "epoch": 0.8962058068757821, "grad_norm": 0.39832624793052673, "learning_rate": 1.1641669438779737e-05, "loss": 0.4697, "step": 42257 }, { "epoch": 0.8962270153337151, "grad_norm": 0.454712450504303, "learning_rate": 1.1641340467217186e-05, "loss": 0.501, "step": 42258 }, { "epoch": 0.8962482237916481, "grad_norm": 0.3824663758277893, "learning_rate": 1.1641011493829149e-05, "loss": 0.4763, "step": 42259 }, { "epoch": 0.8962694322495811, "grad_norm": 0.38255074620246887, "learning_rate": 1.1640682518615993e-05, "loss": 0.5149, "step": 42260 }, { "epoch": 0.8962906407075142, "grad_norm": 0.4131454825401306, "learning_rate": 1.1640353541578085e-05, "loss": 0.5296, "step": 42261 }, { "epoch": 0.8963118491654471, "grad_norm": 0.39803841710090637, "learning_rate": 1.164002456271579e-05, "loss": 0.4606, "step": 42262 }, { "epoch": 0.8963330576233802, "grad_norm": 0.35104361176490784, "learning_rate": 1.1639695582029473e-05, "loss": 0.4288, "step": 42263 }, { "epoch": 0.8963542660813132, "grad_norm": 0.44249022006988525, "learning_rate": 1.1639366599519504e-05, "loss": 0.4444, "step": 42264 }, { "epoch": 0.8963754745392463, "grad_norm": 0.5148264169692993, "learning_rate": 1.1639037615186241e-05, "loss": 0.5274, "step": 42265 }, { "epoch": 0.8963966829971792, "grad_norm": 0.3828580677509308, "learning_rate": 1.1638708629030058e-05, "loss": 0.5107, "step": 42266 }, { "epoch": 0.8964178914551123, "grad_norm": 0.3954281806945801, "learning_rate": 1.1638379641051315e-05, "loss": 0.5125, "step": 42267 }, { "epoch": 0.8964390999130453, "grad_norm": 0.3762601912021637, "learning_rate": 1.1638050651250381e-05, "loss": 0.4596, "step": 42268 }, { "epoch": 0.8964603083709783, "grad_norm": 0.4724024832248688, "learning_rate": 1.1637721659627624e-05, "loss": 0.4917, "step": 42269 }, { "epoch": 0.8964815168289114, "grad_norm": 0.3918137848377228, "learning_rate": 1.1637392666183406e-05, "loss": 0.466, "step": 42270 }, { "epoch": 0.8965027252868444, "grad_norm": 0.3543113172054291, "learning_rate": 1.1637063670918092e-05, "loss": 0.5161, "step": 42271 }, { "epoch": 0.8965239337447775, "grad_norm": 0.37842798233032227, "learning_rate": 1.1636734673832053e-05, "loss": 0.4773, "step": 42272 }, { "epoch": 0.8965451422027104, "grad_norm": 0.4136600196361542, "learning_rate": 1.163640567492565e-05, "loss": 0.6054, "step": 42273 }, { "epoch": 0.8965663506606435, "grad_norm": 0.3327701985836029, "learning_rate": 1.163607667419925e-05, "loss": 0.46, "step": 42274 }, { "epoch": 0.8965875591185765, "grad_norm": 0.3740050792694092, "learning_rate": 1.1635747671653222e-05, "loss": 0.4878, "step": 42275 }, { "epoch": 0.8966087675765095, "grad_norm": 0.4424232542514801, "learning_rate": 1.163541866728793e-05, "loss": 0.5615, "step": 42276 }, { "epoch": 0.8966299760344425, "grad_norm": 0.3920116424560547, "learning_rate": 1.1635089661103736e-05, "loss": 0.3919, "step": 42277 }, { "epoch": 0.8966511844923756, "grad_norm": 0.39307504892349243, "learning_rate": 1.1634760653101013e-05, "loss": 0.5631, "step": 42278 }, { "epoch": 0.8966723929503085, "grad_norm": 0.4772142767906189, "learning_rate": 1.1634431643280122e-05, "loss": 0.5021, "step": 42279 }, { "epoch": 0.8966936014082416, "grad_norm": 0.38661810755729675, "learning_rate": 1.1634102631641428e-05, "loss": 0.5396, "step": 42280 }, { "epoch": 0.8967148098661746, "grad_norm": 0.3701619803905487, "learning_rate": 1.1633773618185302e-05, "loss": 0.5417, "step": 42281 }, { "epoch": 0.8967360183241077, "grad_norm": 0.33838072419166565, "learning_rate": 1.1633444602912107e-05, "loss": 0.4632, "step": 42282 }, { "epoch": 0.8967572267820407, "grad_norm": 0.44281867146492004, "learning_rate": 1.163311558582221e-05, "loss": 0.4054, "step": 42283 }, { "epoch": 0.8967784352399737, "grad_norm": 0.40998172760009766, "learning_rate": 1.1632786566915977e-05, "loss": 0.4533, "step": 42284 }, { "epoch": 0.8967996436979068, "grad_norm": 0.3916489779949188, "learning_rate": 1.163245754619377e-05, "loss": 0.4942, "step": 42285 }, { "epoch": 0.8968208521558397, "grad_norm": 0.33906447887420654, "learning_rate": 1.163212852365596e-05, "loss": 0.4858, "step": 42286 }, { "epoch": 0.8968420606137728, "grad_norm": 0.4151589572429657, "learning_rate": 1.1631799499302907e-05, "loss": 0.4419, "step": 42287 }, { "epoch": 0.8968632690717058, "grad_norm": 0.3535716235637665, "learning_rate": 1.1631470473134983e-05, "loss": 0.407, "step": 42288 }, { "epoch": 0.8968844775296388, "grad_norm": 0.38328817486763, "learning_rate": 1.1631141445152551e-05, "loss": 0.5439, "step": 42289 }, { "epoch": 0.8969056859875718, "grad_norm": 0.3704969584941864, "learning_rate": 1.163081241535598e-05, "loss": 0.5242, "step": 42290 }, { "epoch": 0.8969268944455049, "grad_norm": 0.3455412685871124, "learning_rate": 1.1630483383745632e-05, "loss": 0.4746, "step": 42291 }, { "epoch": 0.8969481029034378, "grad_norm": 0.3491229712963104, "learning_rate": 1.1630154350321877e-05, "loss": 0.4819, "step": 42292 }, { "epoch": 0.8969693113613709, "grad_norm": 0.35614514350891113, "learning_rate": 1.1629825315085076e-05, "loss": 0.4553, "step": 42293 }, { "epoch": 0.8969905198193039, "grad_norm": 0.3418770730495453, "learning_rate": 1.1629496278035597e-05, "loss": 0.4844, "step": 42294 }, { "epoch": 0.897011728277237, "grad_norm": 0.339153528213501, "learning_rate": 1.1629167239173807e-05, "loss": 0.5034, "step": 42295 }, { "epoch": 0.89703293673517, "grad_norm": 0.35684454441070557, "learning_rate": 1.1628838198500069e-05, "loss": 0.5121, "step": 42296 }, { "epoch": 0.897054145193103, "grad_norm": 0.3863179683685303, "learning_rate": 1.1628509156014758e-05, "loss": 0.5168, "step": 42297 }, { "epoch": 0.8970753536510361, "grad_norm": 0.33017614483833313, "learning_rate": 1.1628180111718228e-05, "loss": 0.4423, "step": 42298 }, { "epoch": 0.897096562108969, "grad_norm": 0.5948657989501953, "learning_rate": 1.1627851065610849e-05, "loss": 0.5194, "step": 42299 }, { "epoch": 0.8971177705669021, "grad_norm": 0.34516069293022156, "learning_rate": 1.1627522017692992e-05, "loss": 0.4975, "step": 42300 }, { "epoch": 0.8971389790248351, "grad_norm": 0.35175007581710815, "learning_rate": 1.1627192967965018e-05, "loss": 0.4936, "step": 42301 }, { "epoch": 0.8971601874827682, "grad_norm": 0.36958473920822144, "learning_rate": 1.1626863916427293e-05, "loss": 0.5333, "step": 42302 }, { "epoch": 0.8971813959407011, "grad_norm": 0.33800047636032104, "learning_rate": 1.1626534863080186e-05, "loss": 0.4763, "step": 42303 }, { "epoch": 0.8972026043986342, "grad_norm": 0.3892171382904053, "learning_rate": 1.1626205807924063e-05, "loss": 0.5578, "step": 42304 }, { "epoch": 0.8972238128565672, "grad_norm": 0.4072509706020355, "learning_rate": 1.1625876750959281e-05, "loss": 0.4661, "step": 42305 }, { "epoch": 0.8972450213145002, "grad_norm": 0.3284194767475128, "learning_rate": 1.1625547692186218e-05, "loss": 0.4898, "step": 42306 }, { "epoch": 0.8972662297724332, "grad_norm": 0.34712353348731995, "learning_rate": 1.1625218631605234e-05, "loss": 0.4923, "step": 42307 }, { "epoch": 0.8972874382303663, "grad_norm": 0.3248739540576935, "learning_rate": 1.1624889569216696e-05, "loss": 0.3631, "step": 42308 }, { "epoch": 0.8973086466882992, "grad_norm": 0.38271933794021606, "learning_rate": 1.162456050502097e-05, "loss": 0.4539, "step": 42309 }, { "epoch": 0.8973298551462323, "grad_norm": 0.4397604763507843, "learning_rate": 1.1624231439018421e-05, "loss": 0.5022, "step": 42310 }, { "epoch": 0.8973510636041654, "grad_norm": 0.5850247144699097, "learning_rate": 1.1623902371209419e-05, "loss": 0.5271, "step": 42311 }, { "epoch": 0.8973722720620984, "grad_norm": 0.31828588247299194, "learning_rate": 1.1623573301594322e-05, "loss": 0.4521, "step": 42312 }, { "epoch": 0.8973934805200314, "grad_norm": 0.4105921685695648, "learning_rate": 1.1623244230173503e-05, "loss": 0.4611, "step": 42313 }, { "epoch": 0.8974146889779644, "grad_norm": 0.38701000809669495, "learning_rate": 1.1622915156947327e-05, "loss": 0.5132, "step": 42314 }, { "epoch": 0.8974358974358975, "grad_norm": 0.4402594268321991, "learning_rate": 1.1622586081916158e-05, "loss": 0.5429, "step": 42315 }, { "epoch": 0.8974571058938304, "grad_norm": 0.42936190962791443, "learning_rate": 1.1622257005080364e-05, "loss": 0.4538, "step": 42316 }, { "epoch": 0.8974783143517635, "grad_norm": 0.4744779169559479, "learning_rate": 1.1621927926440311e-05, "loss": 0.4527, "step": 42317 }, { "epoch": 0.8974995228096965, "grad_norm": 0.36234721541404724, "learning_rate": 1.1621598845996362e-05, "loss": 0.4695, "step": 42318 }, { "epoch": 0.8975207312676295, "grad_norm": 0.3549782931804657, "learning_rate": 1.1621269763748882e-05, "loss": 0.4583, "step": 42319 }, { "epoch": 0.8975419397255625, "grad_norm": 0.3697061240673065, "learning_rate": 1.1620940679698243e-05, "loss": 0.4588, "step": 42320 }, { "epoch": 0.8975631481834956, "grad_norm": 0.43227633833885193, "learning_rate": 1.1620611593844808e-05, "loss": 0.5995, "step": 42321 }, { "epoch": 0.8975843566414285, "grad_norm": 0.3387562036514282, "learning_rate": 1.162028250618894e-05, "loss": 0.455, "step": 42322 }, { "epoch": 0.8976055650993616, "grad_norm": 0.3426012098789215, "learning_rate": 1.1619953416731012e-05, "loss": 0.5216, "step": 42323 }, { "epoch": 0.8976267735572947, "grad_norm": 1.1287323236465454, "learning_rate": 1.1619624325471384e-05, "loss": 0.4707, "step": 42324 }, { "epoch": 0.8976479820152277, "grad_norm": 0.6806184649467468, "learning_rate": 1.1619295232410422e-05, "loss": 0.4163, "step": 42325 }, { "epoch": 0.8976691904731607, "grad_norm": 0.3639119863510132, "learning_rate": 1.1618966137548498e-05, "loss": 0.4968, "step": 42326 }, { "epoch": 0.8976903989310937, "grad_norm": 0.374645471572876, "learning_rate": 1.1618637040885969e-05, "loss": 0.5539, "step": 42327 }, { "epoch": 0.8977116073890268, "grad_norm": 0.3569706380367279, "learning_rate": 1.161830794242321e-05, "loss": 0.473, "step": 42328 }, { "epoch": 0.8977328158469597, "grad_norm": 0.3405267894268036, "learning_rate": 1.1617978842160583e-05, "loss": 0.5143, "step": 42329 }, { "epoch": 0.8977540243048928, "grad_norm": 0.4014240801334381, "learning_rate": 1.161764974009845e-05, "loss": 0.5211, "step": 42330 }, { "epoch": 0.8977752327628258, "grad_norm": 0.4262840747833252, "learning_rate": 1.1617320636237183e-05, "loss": 0.5274, "step": 42331 }, { "epoch": 0.8977964412207589, "grad_norm": 0.3829742670059204, "learning_rate": 1.1616991530577148e-05, "loss": 0.4423, "step": 42332 }, { "epoch": 0.8978176496786918, "grad_norm": 0.39969944953918457, "learning_rate": 1.1616662423118708e-05, "loss": 0.421, "step": 42333 }, { "epoch": 0.8978388581366249, "grad_norm": 0.3422369956970215, "learning_rate": 1.1616333313862229e-05, "loss": 0.4514, "step": 42334 }, { "epoch": 0.8978600665945579, "grad_norm": 0.38773325085639954, "learning_rate": 1.161600420280808e-05, "loss": 0.4742, "step": 42335 }, { "epoch": 0.8978812750524909, "grad_norm": 0.3596608340740204, "learning_rate": 1.1615675089956625e-05, "loss": 0.4802, "step": 42336 }, { "epoch": 0.897902483510424, "grad_norm": 0.36989545822143555, "learning_rate": 1.1615345975308227e-05, "loss": 0.4865, "step": 42337 }, { "epoch": 0.897923691968357, "grad_norm": 0.3509935438632965, "learning_rate": 1.161501685886326e-05, "loss": 0.4749, "step": 42338 }, { "epoch": 0.89794490042629, "grad_norm": 0.3867816925048828, "learning_rate": 1.1614687740622079e-05, "loss": 0.5135, "step": 42339 }, { "epoch": 0.897966108884223, "grad_norm": 0.3741694688796997, "learning_rate": 1.1614358620585062e-05, "loss": 0.5542, "step": 42340 }, { "epoch": 0.8979873173421561, "grad_norm": 0.5563371181488037, "learning_rate": 1.1614029498752566e-05, "loss": 0.523, "step": 42341 }, { "epoch": 0.898008525800089, "grad_norm": 0.3576594293117523, "learning_rate": 1.1613700375124963e-05, "loss": 0.5532, "step": 42342 }, { "epoch": 0.8980297342580221, "grad_norm": 0.7512716054916382, "learning_rate": 1.1613371249702616e-05, "loss": 0.4989, "step": 42343 }, { "epoch": 0.8980509427159551, "grad_norm": 0.35209715366363525, "learning_rate": 1.1613042122485891e-05, "loss": 0.4774, "step": 42344 }, { "epoch": 0.8980721511738882, "grad_norm": 0.43458184599876404, "learning_rate": 1.1612712993475153e-05, "loss": 0.5433, "step": 42345 }, { "epoch": 0.8980933596318211, "grad_norm": 0.3369053602218628, "learning_rate": 1.1612383862670772e-05, "loss": 0.4598, "step": 42346 }, { "epoch": 0.8981145680897542, "grad_norm": 0.34301862120628357, "learning_rate": 1.1612054730073111e-05, "loss": 0.4161, "step": 42347 }, { "epoch": 0.8981357765476872, "grad_norm": 0.33028969168663025, "learning_rate": 1.1611725595682537e-05, "loss": 0.4372, "step": 42348 }, { "epoch": 0.8981569850056202, "grad_norm": 0.3386237323284149, "learning_rate": 1.1611396459499415e-05, "loss": 0.4632, "step": 42349 }, { "epoch": 0.8981781934635533, "grad_norm": 0.38148659467697144, "learning_rate": 1.1611067321524113e-05, "loss": 0.5846, "step": 42350 }, { "epoch": 0.8981994019214863, "grad_norm": 0.3100731670856476, "learning_rate": 1.1610738181756993e-05, "loss": 0.4142, "step": 42351 }, { "epoch": 0.8982206103794194, "grad_norm": 0.3533894121646881, "learning_rate": 1.1610409040198426e-05, "loss": 0.4878, "step": 42352 }, { "epoch": 0.8982418188373523, "grad_norm": 0.3598601818084717, "learning_rate": 1.1610079896848776e-05, "loss": 0.5034, "step": 42353 }, { "epoch": 0.8982630272952854, "grad_norm": 0.37480294704437256, "learning_rate": 1.160975075170841e-05, "loss": 0.4886, "step": 42354 }, { "epoch": 0.8982842357532184, "grad_norm": 0.352811723947525, "learning_rate": 1.160942160477769e-05, "loss": 0.4476, "step": 42355 }, { "epoch": 0.8983054442111514, "grad_norm": 0.3627128601074219, "learning_rate": 1.160909245605699e-05, "loss": 0.4258, "step": 42356 }, { "epoch": 0.8983266526690844, "grad_norm": 0.3768344819545746, "learning_rate": 1.1608763305546668e-05, "loss": 0.5103, "step": 42357 }, { "epoch": 0.8983478611270175, "grad_norm": 0.35444316267967224, "learning_rate": 1.1608434153247097e-05, "loss": 0.47, "step": 42358 }, { "epoch": 0.8983690695849504, "grad_norm": 0.36730849742889404, "learning_rate": 1.1608104999158634e-05, "loss": 0.5329, "step": 42359 }, { "epoch": 0.8983902780428835, "grad_norm": 0.3122188150882721, "learning_rate": 1.1607775843281654e-05, "loss": 0.3879, "step": 42360 }, { "epoch": 0.8984114865008165, "grad_norm": 0.38562002778053284, "learning_rate": 1.1607446685616519e-05, "loss": 0.521, "step": 42361 }, { "epoch": 0.8984326949587496, "grad_norm": 0.40918225049972534, "learning_rate": 1.1607117526163598e-05, "loss": 0.488, "step": 42362 }, { "epoch": 0.8984539034166825, "grad_norm": 0.4959877133369446, "learning_rate": 1.1606788364923252e-05, "loss": 0.5817, "step": 42363 }, { "epoch": 0.8984751118746156, "grad_norm": 0.363655686378479, "learning_rate": 1.1606459201895847e-05, "loss": 0.4733, "step": 42364 }, { "epoch": 0.8984963203325487, "grad_norm": 0.39502158761024475, "learning_rate": 1.1606130037081758e-05, "loss": 0.5324, "step": 42365 }, { "epoch": 0.8985175287904816, "grad_norm": 0.41863352060317993, "learning_rate": 1.1605800870481345e-05, "loss": 0.5331, "step": 42366 }, { "epoch": 0.8985387372484147, "grad_norm": 0.36221835017204285, "learning_rate": 1.160547170209497e-05, "loss": 0.5295, "step": 42367 }, { "epoch": 0.8985599457063477, "grad_norm": 0.35408392548561096, "learning_rate": 1.1605142531923006e-05, "loss": 0.3893, "step": 42368 }, { "epoch": 0.8985811541642807, "grad_norm": 0.3876539468765259, "learning_rate": 1.1604813359965816e-05, "loss": 0.4408, "step": 42369 }, { "epoch": 0.8986023626222137, "grad_norm": 0.33502891659736633, "learning_rate": 1.1604484186223766e-05, "loss": 0.4424, "step": 42370 }, { "epoch": 0.8986235710801468, "grad_norm": 0.35020238161087036, "learning_rate": 1.1604155010697221e-05, "loss": 0.4698, "step": 42371 }, { "epoch": 0.8986447795380798, "grad_norm": 0.4280637502670288, "learning_rate": 1.1603825833386554e-05, "loss": 0.5109, "step": 42372 }, { "epoch": 0.8986659879960128, "grad_norm": 0.3451552093029022, "learning_rate": 1.1603496654292122e-05, "loss": 0.5366, "step": 42373 }, { "epoch": 0.8986871964539458, "grad_norm": 0.3251511752605438, "learning_rate": 1.1603167473414296e-05, "loss": 0.4659, "step": 42374 }, { "epoch": 0.8987084049118789, "grad_norm": 0.3645351529121399, "learning_rate": 1.160283829075344e-05, "loss": 0.4382, "step": 42375 }, { "epoch": 0.8987296133698118, "grad_norm": 0.34871459007263184, "learning_rate": 1.160250910630992e-05, "loss": 0.54, "step": 42376 }, { "epoch": 0.8987508218277449, "grad_norm": 0.38249537348747253, "learning_rate": 1.1602179920084107e-05, "loss": 0.5265, "step": 42377 }, { "epoch": 0.898772030285678, "grad_norm": 0.4986516237258911, "learning_rate": 1.1601850732076361e-05, "loss": 0.5067, "step": 42378 }, { "epoch": 0.898793238743611, "grad_norm": 0.3839430510997772, "learning_rate": 1.160152154228705e-05, "loss": 0.4894, "step": 42379 }, { "epoch": 0.898814447201544, "grad_norm": 0.39102160930633545, "learning_rate": 1.1601192350716543e-05, "loss": 0.4405, "step": 42380 }, { "epoch": 0.898835655659477, "grad_norm": 0.3831624984741211, "learning_rate": 1.1600863157365203e-05, "loss": 0.4659, "step": 42381 }, { "epoch": 0.8988568641174101, "grad_norm": 0.34315118193626404, "learning_rate": 1.1600533962233398e-05, "loss": 0.3877, "step": 42382 }, { "epoch": 0.898878072575343, "grad_norm": 0.3820679783821106, "learning_rate": 1.160020476532149e-05, "loss": 0.5898, "step": 42383 }, { "epoch": 0.8988992810332761, "grad_norm": 0.41835084557533264, "learning_rate": 1.1599875566629848e-05, "loss": 0.4074, "step": 42384 }, { "epoch": 0.8989204894912091, "grad_norm": 0.3535621464252472, "learning_rate": 1.159954636615884e-05, "loss": 0.4753, "step": 42385 }, { "epoch": 0.8989416979491421, "grad_norm": 0.3739813566207886, "learning_rate": 1.1599217163908831e-05, "loss": 0.5034, "step": 42386 }, { "epoch": 0.8989629064070751, "grad_norm": 0.40662020444869995, "learning_rate": 1.1598887959880185e-05, "loss": 0.5269, "step": 42387 }, { "epoch": 0.8989841148650082, "grad_norm": 0.34210819005966187, "learning_rate": 1.1598558754073273e-05, "loss": 0.4346, "step": 42388 }, { "epoch": 0.8990053233229411, "grad_norm": 0.38576677441596985, "learning_rate": 1.1598229546488454e-05, "loss": 0.4905, "step": 42389 }, { "epoch": 0.8990265317808742, "grad_norm": 0.3525741398334503, "learning_rate": 1.1597900337126097e-05, "loss": 0.4355, "step": 42390 }, { "epoch": 0.8990477402388073, "grad_norm": 0.31688395142555237, "learning_rate": 1.1597571125986574e-05, "loss": 0.4421, "step": 42391 }, { "epoch": 0.8990689486967403, "grad_norm": 0.39370986819267273, "learning_rate": 1.1597241913070243e-05, "loss": 0.5026, "step": 42392 }, { "epoch": 0.8990901571546733, "grad_norm": 0.3406127989292145, "learning_rate": 1.1596912698377474e-05, "loss": 0.4489, "step": 42393 }, { "epoch": 0.8991113656126063, "grad_norm": 0.34991106390953064, "learning_rate": 1.1596583481908635e-05, "loss": 0.4338, "step": 42394 }, { "epoch": 0.8991325740705394, "grad_norm": 0.37414422631263733, "learning_rate": 1.1596254263664086e-05, "loss": 0.4738, "step": 42395 }, { "epoch": 0.8991537825284723, "grad_norm": 0.3352086544036865, "learning_rate": 1.1595925043644196e-05, "loss": 0.4867, "step": 42396 }, { "epoch": 0.8991749909864054, "grad_norm": 0.3836418092250824, "learning_rate": 1.1595595821849337e-05, "loss": 0.457, "step": 42397 }, { "epoch": 0.8991961994443384, "grad_norm": 0.3841472864151001, "learning_rate": 1.1595266598279866e-05, "loss": 0.4886, "step": 42398 }, { "epoch": 0.8992174079022714, "grad_norm": 0.534477710723877, "learning_rate": 1.1594937372936155e-05, "loss": 0.4571, "step": 42399 }, { "epoch": 0.8992386163602044, "grad_norm": 0.35145559906959534, "learning_rate": 1.1594608145818571e-05, "loss": 0.5181, "step": 42400 }, { "epoch": 0.8992598248181375, "grad_norm": 0.3775405287742615, "learning_rate": 1.1594278916927473e-05, "loss": 0.5313, "step": 42401 }, { "epoch": 0.8992810332760705, "grad_norm": 0.38940903544425964, "learning_rate": 1.1593949686263233e-05, "loss": 0.4519, "step": 42402 }, { "epoch": 0.8993022417340035, "grad_norm": 0.35151517391204834, "learning_rate": 1.1593620453826218e-05, "loss": 0.5101, "step": 42403 }, { "epoch": 0.8993234501919365, "grad_norm": 0.37085622549057007, "learning_rate": 1.1593291219616792e-05, "loss": 0.5435, "step": 42404 }, { "epoch": 0.8993446586498696, "grad_norm": 0.381923109292984, "learning_rate": 1.159296198363532e-05, "loss": 0.4711, "step": 42405 }, { "epoch": 0.8993658671078026, "grad_norm": 0.3240724802017212, "learning_rate": 1.1592632745882169e-05, "loss": 0.4557, "step": 42406 }, { "epoch": 0.8993870755657356, "grad_norm": 0.4697163701057434, "learning_rate": 1.1592303506357708e-05, "loss": 0.5428, "step": 42407 }, { "epoch": 0.8994082840236687, "grad_norm": 0.39004990458488464, "learning_rate": 1.1591974265062299e-05, "loss": 0.477, "step": 42408 }, { "epoch": 0.8994294924816016, "grad_norm": 0.33524951338768005, "learning_rate": 1.159164502199631e-05, "loss": 0.5076, "step": 42409 }, { "epoch": 0.8994507009395347, "grad_norm": 0.32677900791168213, "learning_rate": 1.1591315777160108e-05, "loss": 0.4556, "step": 42410 }, { "epoch": 0.8994719093974677, "grad_norm": 0.3611208498477936, "learning_rate": 1.1590986530554058e-05, "loss": 0.4822, "step": 42411 }, { "epoch": 0.8994931178554008, "grad_norm": 0.38159748911857605, "learning_rate": 1.1590657282178526e-05, "loss": 0.5396, "step": 42412 }, { "epoch": 0.8995143263133337, "grad_norm": 0.3754892647266388, "learning_rate": 1.1590328032033881e-05, "loss": 0.542, "step": 42413 }, { "epoch": 0.8995355347712668, "grad_norm": 0.3697061538696289, "learning_rate": 1.1589998780120486e-05, "loss": 0.5251, "step": 42414 }, { "epoch": 0.8995567432291998, "grad_norm": 0.37588444352149963, "learning_rate": 1.1589669526438705e-05, "loss": 0.4591, "step": 42415 }, { "epoch": 0.8995779516871328, "grad_norm": 0.38896530866622925, "learning_rate": 1.1589340270988912e-05, "loss": 0.4869, "step": 42416 }, { "epoch": 0.8995991601450658, "grad_norm": 0.34088894724845886, "learning_rate": 1.1589011013771467e-05, "loss": 0.4242, "step": 42417 }, { "epoch": 0.8996203686029989, "grad_norm": 0.39260005950927734, "learning_rate": 1.1588681754786737e-05, "loss": 0.4997, "step": 42418 }, { "epoch": 0.899641577060932, "grad_norm": 0.3565308451652527, "learning_rate": 1.1588352494035091e-05, "loss": 0.4647, "step": 42419 }, { "epoch": 0.8996627855188649, "grad_norm": 0.3874429166316986, "learning_rate": 1.1588023231516889e-05, "loss": 0.5063, "step": 42420 }, { "epoch": 0.899683993976798, "grad_norm": 0.3964272141456604, "learning_rate": 1.1587693967232503e-05, "loss": 0.6068, "step": 42421 }, { "epoch": 0.899705202434731, "grad_norm": 0.3611534535884857, "learning_rate": 1.15873647011823e-05, "loss": 0.467, "step": 42422 }, { "epoch": 0.899726410892664, "grad_norm": 0.3839126527309418, "learning_rate": 1.1587035433366642e-05, "loss": 0.4878, "step": 42423 }, { "epoch": 0.899747619350597, "grad_norm": 0.31164515018463135, "learning_rate": 1.1586706163785897e-05, "loss": 0.4786, "step": 42424 }, { "epoch": 0.8997688278085301, "grad_norm": 0.34399470686912537, "learning_rate": 1.1586376892440434e-05, "loss": 0.4459, "step": 42425 }, { "epoch": 0.899790036266463, "grad_norm": 0.36082860827445984, "learning_rate": 1.1586047619330612e-05, "loss": 0.5038, "step": 42426 }, { "epoch": 0.8998112447243961, "grad_norm": 0.36881619691848755, "learning_rate": 1.1585718344456804e-05, "loss": 0.4246, "step": 42427 }, { "epoch": 0.8998324531823291, "grad_norm": 0.41094812750816345, "learning_rate": 1.1585389067819373e-05, "loss": 0.5559, "step": 42428 }, { "epoch": 0.8998536616402621, "grad_norm": 0.3881760239601135, "learning_rate": 1.1585059789418684e-05, "loss": 0.4189, "step": 42429 }, { "epoch": 0.8998748700981951, "grad_norm": 0.3984154462814331, "learning_rate": 1.1584730509255108e-05, "loss": 0.5765, "step": 42430 }, { "epoch": 0.8998960785561282, "grad_norm": 0.3807115852832794, "learning_rate": 1.158440122732901e-05, "loss": 0.5005, "step": 42431 }, { "epoch": 0.8999172870140613, "grad_norm": 0.3516813814640045, "learning_rate": 1.1584071943640752e-05, "loss": 0.5245, "step": 42432 }, { "epoch": 0.8999384954719942, "grad_norm": 0.3596789538860321, "learning_rate": 1.1583742658190704e-05, "loss": 0.4649, "step": 42433 }, { "epoch": 0.8999597039299273, "grad_norm": 0.3644501268863678, "learning_rate": 1.158341337097923e-05, "loss": 0.4909, "step": 42434 }, { "epoch": 0.8999809123878603, "grad_norm": 0.33014464378356934, "learning_rate": 1.1583084082006696e-05, "loss": 0.4725, "step": 42435 }, { "epoch": 0.9000021208457933, "grad_norm": 0.3838164806365967, "learning_rate": 1.1582754791273476e-05, "loss": 0.5102, "step": 42436 }, { "epoch": 0.9000233293037263, "grad_norm": 0.34910428524017334, "learning_rate": 1.1582425498779923e-05, "loss": 0.4321, "step": 42437 }, { "epoch": 0.9000445377616594, "grad_norm": 0.3580147325992584, "learning_rate": 1.1582096204526416e-05, "loss": 0.4385, "step": 42438 }, { "epoch": 0.9000657462195923, "grad_norm": 0.36694952845573425, "learning_rate": 1.1581766908513313e-05, "loss": 0.4779, "step": 42439 }, { "epoch": 0.9000869546775254, "grad_norm": 0.381151407957077, "learning_rate": 1.1581437610740977e-05, "loss": 0.5077, "step": 42440 }, { "epoch": 0.9001081631354584, "grad_norm": 0.3994928300380707, "learning_rate": 1.1581108311209788e-05, "loss": 0.4942, "step": 42441 }, { "epoch": 0.9001293715933915, "grad_norm": 0.348591685295105, "learning_rate": 1.15807790099201e-05, "loss": 0.5126, "step": 42442 }, { "epoch": 0.9001505800513244, "grad_norm": 0.4037937521934509, "learning_rate": 1.1580449706872284e-05, "loss": 0.5057, "step": 42443 }, { "epoch": 0.9001717885092575, "grad_norm": 0.39104458689689636, "learning_rate": 1.1580120402066705e-05, "loss": 0.5104, "step": 42444 }, { "epoch": 0.9001929969671905, "grad_norm": 0.38736316561698914, "learning_rate": 1.1579791095503735e-05, "loss": 0.5569, "step": 42445 }, { "epoch": 0.9002142054251235, "grad_norm": 0.3845460116863251, "learning_rate": 1.1579461787183728e-05, "loss": 0.5081, "step": 42446 }, { "epoch": 0.9002354138830566, "grad_norm": 0.34140336513519287, "learning_rate": 1.1579132477107057e-05, "loss": 0.4908, "step": 42447 }, { "epoch": 0.9002566223409896, "grad_norm": 0.364551842212677, "learning_rate": 1.1578803165274093e-05, "loss": 0.5629, "step": 42448 }, { "epoch": 0.9002778307989227, "grad_norm": 0.32476842403411865, "learning_rate": 1.1578473851685197e-05, "loss": 0.4969, "step": 42449 }, { "epoch": 0.9002990392568556, "grad_norm": 0.36966368556022644, "learning_rate": 1.1578144536340736e-05, "loss": 0.5176, "step": 42450 }, { "epoch": 0.9003202477147887, "grad_norm": 0.3309534788131714, "learning_rate": 1.1577815219241074e-05, "loss": 0.4736, "step": 42451 }, { "epoch": 0.9003414561727217, "grad_norm": 0.32965996861457825, "learning_rate": 1.1577485900386583e-05, "loss": 0.4318, "step": 42452 }, { "epoch": 0.9003626646306547, "grad_norm": 0.37631312012672424, "learning_rate": 1.1577156579777621e-05, "loss": 0.4686, "step": 42453 }, { "epoch": 0.9003838730885877, "grad_norm": 0.3707848787307739, "learning_rate": 1.1576827257414562e-05, "loss": 0.5369, "step": 42454 }, { "epoch": 0.9004050815465208, "grad_norm": 0.3649364113807678, "learning_rate": 1.1576497933297773e-05, "loss": 0.4822, "step": 42455 }, { "epoch": 0.9004262900044537, "grad_norm": 0.3980441093444824, "learning_rate": 1.1576168607427613e-05, "loss": 0.5108, "step": 42456 }, { "epoch": 0.9004474984623868, "grad_norm": 0.37233516573905945, "learning_rate": 1.1575839279804453e-05, "loss": 0.5048, "step": 42457 }, { "epoch": 0.9004687069203198, "grad_norm": 0.40438660979270935, "learning_rate": 1.1575509950428659e-05, "loss": 0.4695, "step": 42458 }, { "epoch": 0.9004899153782528, "grad_norm": 0.3592197597026825, "learning_rate": 1.1575180619300595e-05, "loss": 0.4792, "step": 42459 }, { "epoch": 0.9005111238361859, "grad_norm": 0.34972769021987915, "learning_rate": 1.1574851286420626e-05, "loss": 0.5268, "step": 42460 }, { "epoch": 0.9005323322941189, "grad_norm": 0.3747151792049408, "learning_rate": 1.1574521951789123e-05, "loss": 0.5331, "step": 42461 }, { "epoch": 0.900553540752052, "grad_norm": 0.38565799593925476, "learning_rate": 1.1574192615406455e-05, "loss": 0.5102, "step": 42462 }, { "epoch": 0.9005747492099849, "grad_norm": 0.3534289598464966, "learning_rate": 1.157386327727298e-05, "loss": 0.5023, "step": 42463 }, { "epoch": 0.900595957667918, "grad_norm": 0.3619671165943146, "learning_rate": 1.1573533937389068e-05, "loss": 0.5238, "step": 42464 }, { "epoch": 0.900617166125851, "grad_norm": 0.36558109521865845, "learning_rate": 1.1573204595755083e-05, "loss": 0.4721, "step": 42465 }, { "epoch": 0.900638374583784, "grad_norm": 0.4283173084259033, "learning_rate": 1.1572875252371395e-05, "loss": 0.544, "step": 42466 }, { "epoch": 0.900659583041717, "grad_norm": 0.3787989020347595, "learning_rate": 1.157254590723837e-05, "loss": 0.5493, "step": 42467 }, { "epoch": 0.9006807914996501, "grad_norm": 0.3694365620613098, "learning_rate": 1.1572216560356374e-05, "loss": 0.4562, "step": 42468 }, { "epoch": 0.900701999957583, "grad_norm": 0.4096091091632843, "learning_rate": 1.157188721172577e-05, "loss": 0.533, "step": 42469 }, { "epoch": 0.9007232084155161, "grad_norm": 0.40878987312316895, "learning_rate": 1.157155786134693e-05, "loss": 0.5875, "step": 42470 }, { "epoch": 0.9007444168734491, "grad_norm": 0.3935292065143585, "learning_rate": 1.1571228509220214e-05, "loss": 0.5122, "step": 42471 }, { "epoch": 0.9007656253313822, "grad_norm": 0.36960768699645996, "learning_rate": 1.1570899155345994e-05, "loss": 0.464, "step": 42472 }, { "epoch": 0.9007868337893152, "grad_norm": 0.3493906259536743, "learning_rate": 1.157056979972463e-05, "loss": 0.4081, "step": 42473 }, { "epoch": 0.9008080422472482, "grad_norm": 0.42239904403686523, "learning_rate": 1.1570240442356493e-05, "loss": 0.5884, "step": 42474 }, { "epoch": 0.9008292507051813, "grad_norm": 0.3167513608932495, "learning_rate": 1.1569911083241949e-05, "loss": 0.4565, "step": 42475 }, { "epoch": 0.9008504591631142, "grad_norm": 0.48989272117614746, "learning_rate": 1.1569581722381364e-05, "loss": 0.4336, "step": 42476 }, { "epoch": 0.9008716676210473, "grad_norm": 0.38316062092781067, "learning_rate": 1.1569252359775103e-05, "loss": 0.5538, "step": 42477 }, { "epoch": 0.9008928760789803, "grad_norm": 0.39050930738449097, "learning_rate": 1.1568922995423533e-05, "loss": 0.4791, "step": 42478 }, { "epoch": 0.9009140845369134, "grad_norm": 0.3752456605434418, "learning_rate": 1.156859362932702e-05, "loss": 0.5715, "step": 42479 }, { "epoch": 0.9009352929948463, "grad_norm": 0.3765731453895569, "learning_rate": 1.156826426148593e-05, "loss": 0.4951, "step": 42480 }, { "epoch": 0.9009565014527794, "grad_norm": 0.34568530321121216, "learning_rate": 1.1567934891900634e-05, "loss": 0.4199, "step": 42481 }, { "epoch": 0.9009777099107124, "grad_norm": 0.6612977385520935, "learning_rate": 1.1567605520571492e-05, "loss": 0.5541, "step": 42482 }, { "epoch": 0.9009989183686454, "grad_norm": 0.3517199158668518, "learning_rate": 1.1567276147498873e-05, "loss": 0.5019, "step": 42483 }, { "epoch": 0.9010201268265784, "grad_norm": 0.3740488886833191, "learning_rate": 1.1566946772683143e-05, "loss": 0.4676, "step": 42484 }, { "epoch": 0.9010413352845115, "grad_norm": 0.3652811050415039, "learning_rate": 1.1566617396124668e-05, "loss": 0.5842, "step": 42485 }, { "epoch": 0.9010625437424444, "grad_norm": 0.4048837721347809, "learning_rate": 1.1566288017823815e-05, "loss": 0.5158, "step": 42486 }, { "epoch": 0.9010837522003775, "grad_norm": 0.33978772163391113, "learning_rate": 1.1565958637780951e-05, "loss": 0.4871, "step": 42487 }, { "epoch": 0.9011049606583106, "grad_norm": 0.40125393867492676, "learning_rate": 1.1565629255996441e-05, "loss": 0.4315, "step": 42488 }, { "epoch": 0.9011261691162435, "grad_norm": 0.3945337235927582, "learning_rate": 1.156529987247065e-05, "loss": 0.4356, "step": 42489 }, { "epoch": 0.9011473775741766, "grad_norm": 0.37376868724823, "learning_rate": 1.156497048720395e-05, "loss": 0.5003, "step": 42490 }, { "epoch": 0.9011685860321096, "grad_norm": 0.38884860277175903, "learning_rate": 1.1564641100196698e-05, "loss": 0.4368, "step": 42491 }, { "epoch": 0.9011897944900427, "grad_norm": 0.3534681797027588, "learning_rate": 1.1564311711449268e-05, "loss": 0.4254, "step": 42492 }, { "epoch": 0.9012110029479756, "grad_norm": 0.3461492657661438, "learning_rate": 1.1563982320962026e-05, "loss": 0.4732, "step": 42493 }, { "epoch": 0.9012322114059087, "grad_norm": 0.40266871452331543, "learning_rate": 1.1563652928735334e-05, "loss": 0.5462, "step": 42494 }, { "epoch": 0.9012534198638417, "grad_norm": 0.36916837096214294, "learning_rate": 1.1563323534769563e-05, "loss": 0.4606, "step": 42495 }, { "epoch": 0.9012746283217747, "grad_norm": 0.3523145616054535, "learning_rate": 1.1562994139065076e-05, "loss": 0.4395, "step": 42496 }, { "epoch": 0.9012958367797077, "grad_norm": 0.38413190841674805, "learning_rate": 1.1562664741622243e-05, "loss": 0.5173, "step": 42497 }, { "epoch": 0.9013170452376408, "grad_norm": 0.32388371229171753, "learning_rate": 1.1562335342441423e-05, "loss": 0.4827, "step": 42498 }, { "epoch": 0.9013382536955737, "grad_norm": 0.37208548188209534, "learning_rate": 1.1562005941522992e-05, "loss": 0.469, "step": 42499 }, { "epoch": 0.9013594621535068, "grad_norm": 0.3640163242816925, "learning_rate": 1.1561676538867308e-05, "loss": 0.4442, "step": 42500 }, { "epoch": 0.9013806706114399, "grad_norm": 0.38379010558128357, "learning_rate": 1.1561347134474743e-05, "loss": 0.4446, "step": 42501 }, { "epoch": 0.9014018790693729, "grad_norm": 0.3543955981731415, "learning_rate": 1.1561017728345659e-05, "loss": 0.4641, "step": 42502 }, { "epoch": 0.9014230875273059, "grad_norm": 0.35541924834251404, "learning_rate": 1.1560688320480426e-05, "loss": 0.4412, "step": 42503 }, { "epoch": 0.9014442959852389, "grad_norm": 0.3979014456272125, "learning_rate": 1.156035891087941e-05, "loss": 0.4081, "step": 42504 }, { "epoch": 0.901465504443172, "grad_norm": 0.33813267946243286, "learning_rate": 1.1560029499542973e-05, "loss": 0.4759, "step": 42505 }, { "epoch": 0.9014867129011049, "grad_norm": 0.36538296937942505, "learning_rate": 1.1559700086471488e-05, "loss": 0.4721, "step": 42506 }, { "epoch": 0.901507921359038, "grad_norm": 0.4011145532131195, "learning_rate": 1.155937067166532e-05, "loss": 0.5316, "step": 42507 }, { "epoch": 0.901529129816971, "grad_norm": 0.34890905022621155, "learning_rate": 1.1559041255124827e-05, "loss": 0.494, "step": 42508 }, { "epoch": 0.901550338274904, "grad_norm": 0.3663211762905121, "learning_rate": 1.155871183685039e-05, "loss": 0.5098, "step": 42509 }, { "epoch": 0.901571546732837, "grad_norm": 0.38653990626335144, "learning_rate": 1.1558382416842359e-05, "loss": 0.4144, "step": 42510 }, { "epoch": 0.9015927551907701, "grad_norm": 0.4124414622783661, "learning_rate": 1.1558052995101112e-05, "loss": 0.4307, "step": 42511 }, { "epoch": 0.901613963648703, "grad_norm": 0.34047284722328186, "learning_rate": 1.1557723571627016e-05, "loss": 0.4749, "step": 42512 }, { "epoch": 0.9016351721066361, "grad_norm": 0.33035606145858765, "learning_rate": 1.1557394146420428e-05, "loss": 0.4624, "step": 42513 }, { "epoch": 0.9016563805645692, "grad_norm": 0.33912450075149536, "learning_rate": 1.1557064719481721e-05, "loss": 0.5516, "step": 42514 }, { "epoch": 0.9016775890225022, "grad_norm": 0.31796345114707947, "learning_rate": 1.1556735290811262e-05, "loss": 0.4254, "step": 42515 }, { "epoch": 0.9016987974804352, "grad_norm": 0.3194456100463867, "learning_rate": 1.1556405860409414e-05, "loss": 0.4163, "step": 42516 }, { "epoch": 0.9017200059383682, "grad_norm": 0.6291137933731079, "learning_rate": 1.1556076428276543e-05, "loss": 0.4742, "step": 42517 }, { "epoch": 0.9017412143963013, "grad_norm": 0.3844616115093231, "learning_rate": 1.1555746994413022e-05, "loss": 0.5164, "step": 42518 }, { "epoch": 0.9017624228542342, "grad_norm": 0.376307874917984, "learning_rate": 1.1555417558819209e-05, "loss": 0.5456, "step": 42519 }, { "epoch": 0.9017836313121673, "grad_norm": 0.3551078140735626, "learning_rate": 1.1555088121495476e-05, "loss": 0.4335, "step": 42520 }, { "epoch": 0.9018048397701003, "grad_norm": 0.3637879192829132, "learning_rate": 1.1554758682442189e-05, "loss": 0.476, "step": 42521 }, { "epoch": 0.9018260482280334, "grad_norm": 0.3242606818675995, "learning_rate": 1.1554429241659711e-05, "loss": 0.4446, "step": 42522 }, { "epoch": 0.9018472566859663, "grad_norm": 0.9034228920936584, "learning_rate": 1.155409979914841e-05, "loss": 0.484, "step": 42523 }, { "epoch": 0.9018684651438994, "grad_norm": 0.39761489629745483, "learning_rate": 1.155377035490865e-05, "loss": 0.4742, "step": 42524 }, { "epoch": 0.9018896736018324, "grad_norm": 0.37692710757255554, "learning_rate": 1.1553440908940803e-05, "loss": 0.4534, "step": 42525 }, { "epoch": 0.9019108820597654, "grad_norm": 0.39828383922576904, "learning_rate": 1.1553111461245235e-05, "loss": 0.4337, "step": 42526 }, { "epoch": 0.9019320905176984, "grad_norm": 0.4397023022174835, "learning_rate": 1.1552782011822308e-05, "loss": 0.5486, "step": 42527 }, { "epoch": 0.9019532989756315, "grad_norm": 0.3438049852848053, "learning_rate": 1.1552452560672387e-05, "loss": 0.5497, "step": 42528 }, { "epoch": 0.9019745074335646, "grad_norm": 0.3796166479587555, "learning_rate": 1.1552123107795846e-05, "loss": 0.4795, "step": 42529 }, { "epoch": 0.9019957158914975, "grad_norm": 0.3571282625198364, "learning_rate": 1.1551793653193045e-05, "loss": 0.5609, "step": 42530 }, { "epoch": 0.9020169243494306, "grad_norm": 0.375975638628006, "learning_rate": 1.1551464196864353e-05, "loss": 0.4324, "step": 42531 }, { "epoch": 0.9020381328073636, "grad_norm": 0.3887198269367218, "learning_rate": 1.1551134738810138e-05, "loss": 0.4705, "step": 42532 }, { "epoch": 0.9020593412652966, "grad_norm": 0.36400696635246277, "learning_rate": 1.1550805279030759e-05, "loss": 0.5548, "step": 42533 }, { "epoch": 0.9020805497232296, "grad_norm": 0.36417579650878906, "learning_rate": 1.1550475817526592e-05, "loss": 0.5451, "step": 42534 }, { "epoch": 0.9021017581811627, "grad_norm": 0.4300003945827484, "learning_rate": 1.1550146354297999e-05, "loss": 0.5679, "step": 42535 }, { "epoch": 0.9021229666390956, "grad_norm": 0.3983829915523529, "learning_rate": 1.1549816889345346e-05, "loss": 0.5089, "step": 42536 }, { "epoch": 0.9021441750970287, "grad_norm": 0.4113309383392334, "learning_rate": 1.1549487422669001e-05, "loss": 0.5494, "step": 42537 }, { "epoch": 0.9021653835549617, "grad_norm": 0.3410402238368988, "learning_rate": 1.154915795426933e-05, "loss": 0.501, "step": 42538 }, { "epoch": 0.9021865920128948, "grad_norm": 0.3784889578819275, "learning_rate": 1.1548828484146697e-05, "loss": 0.4356, "step": 42539 }, { "epoch": 0.9022078004708277, "grad_norm": 0.35085663199424744, "learning_rate": 1.1548499012301473e-05, "loss": 0.4467, "step": 42540 }, { "epoch": 0.9022290089287608, "grad_norm": 0.32892754673957825, "learning_rate": 1.154816953873402e-05, "loss": 0.386, "step": 42541 }, { "epoch": 0.9022502173866939, "grad_norm": 0.47733670473098755, "learning_rate": 1.1547840063444707e-05, "loss": 0.4744, "step": 42542 }, { "epoch": 0.9022714258446268, "grad_norm": 0.4040677547454834, "learning_rate": 1.1547510586433896e-05, "loss": 0.4299, "step": 42543 }, { "epoch": 0.9022926343025599, "grad_norm": 0.37365302443504333, "learning_rate": 1.1547181107701964e-05, "loss": 0.5015, "step": 42544 }, { "epoch": 0.9023138427604929, "grad_norm": 0.37316563725471497, "learning_rate": 1.1546851627249265e-05, "loss": 0.457, "step": 42545 }, { "epoch": 0.9023350512184259, "grad_norm": 0.4082515239715576, "learning_rate": 1.1546522145076173e-05, "loss": 0.5252, "step": 42546 }, { "epoch": 0.9023562596763589, "grad_norm": 0.3927520215511322, "learning_rate": 1.1546192661183054e-05, "loss": 0.4276, "step": 42547 }, { "epoch": 0.902377468134292, "grad_norm": 0.33829447627067566, "learning_rate": 1.1545863175570271e-05, "loss": 0.4867, "step": 42548 }, { "epoch": 0.902398676592225, "grad_norm": 0.368074893951416, "learning_rate": 1.1545533688238193e-05, "loss": 0.5073, "step": 42549 }, { "epoch": 0.902419885050158, "grad_norm": 0.34840649366378784, "learning_rate": 1.1545204199187184e-05, "loss": 0.495, "step": 42550 }, { "epoch": 0.902441093508091, "grad_norm": 0.42051538825035095, "learning_rate": 1.1544874708417616e-05, "loss": 0.5181, "step": 42551 }, { "epoch": 0.9024623019660241, "grad_norm": 0.3711889684200287, "learning_rate": 1.154454521592985e-05, "loss": 0.4809, "step": 42552 }, { "epoch": 0.902483510423957, "grad_norm": 0.3437846899032593, "learning_rate": 1.1544215721724253e-05, "loss": 0.4968, "step": 42553 }, { "epoch": 0.9025047188818901, "grad_norm": 0.4669536054134369, "learning_rate": 1.1543886225801196e-05, "loss": 0.5397, "step": 42554 }, { "epoch": 0.9025259273398232, "grad_norm": 0.3500126302242279, "learning_rate": 1.154355672816104e-05, "loss": 0.4603, "step": 42555 }, { "epoch": 0.9025471357977561, "grad_norm": 0.34133145213127136, "learning_rate": 1.154322722880415e-05, "loss": 0.4904, "step": 42556 }, { "epoch": 0.9025683442556892, "grad_norm": 0.364951491355896, "learning_rate": 1.1542897727730902e-05, "loss": 0.5085, "step": 42557 }, { "epoch": 0.9025895527136222, "grad_norm": 0.36432793736457825, "learning_rate": 1.1542568224941655e-05, "loss": 0.5364, "step": 42558 }, { "epoch": 0.9026107611715553, "grad_norm": 0.3232876658439636, "learning_rate": 1.1542238720436775e-05, "loss": 0.4798, "step": 42559 }, { "epoch": 0.9026319696294882, "grad_norm": 0.41866400837898254, "learning_rate": 1.1541909214216633e-05, "loss": 0.5177, "step": 42560 }, { "epoch": 0.9026531780874213, "grad_norm": 0.3476291596889496, "learning_rate": 1.1541579706281591e-05, "loss": 0.4949, "step": 42561 }, { "epoch": 0.9026743865453543, "grad_norm": 0.37807878851890564, "learning_rate": 1.1541250196632016e-05, "loss": 0.5126, "step": 42562 }, { "epoch": 0.9026955950032873, "grad_norm": 0.38727834820747375, "learning_rate": 1.154092068526828e-05, "loss": 0.4515, "step": 42563 }, { "epoch": 0.9027168034612203, "grad_norm": 0.43214985728263855, "learning_rate": 1.1540591172190742e-05, "loss": 0.6109, "step": 42564 }, { "epoch": 0.9027380119191534, "grad_norm": 0.3281242549419403, "learning_rate": 1.1540261657399775e-05, "loss": 0.4476, "step": 42565 }, { "epoch": 0.9027592203770863, "grad_norm": 1.1684497594833374, "learning_rate": 1.1539932140895742e-05, "loss": 0.4284, "step": 42566 }, { "epoch": 0.9027804288350194, "grad_norm": 0.3624925911426544, "learning_rate": 1.1539602622679006e-05, "loss": 0.4961, "step": 42567 }, { "epoch": 0.9028016372929524, "grad_norm": 0.4006912112236023, "learning_rate": 1.153927310274994e-05, "loss": 0.4667, "step": 42568 }, { "epoch": 0.9028228457508854, "grad_norm": 0.3609394431114197, "learning_rate": 1.153894358110891e-05, "loss": 0.5116, "step": 42569 }, { "epoch": 0.9028440542088185, "grad_norm": 0.43381810188293457, "learning_rate": 1.1538614057756276e-05, "loss": 0.4137, "step": 42570 }, { "epoch": 0.9028652626667515, "grad_norm": 0.34986191987991333, "learning_rate": 1.1538284532692413e-05, "loss": 0.5255, "step": 42571 }, { "epoch": 0.9028864711246846, "grad_norm": 0.40958741307258606, "learning_rate": 1.1537955005917683e-05, "loss": 0.5183, "step": 42572 }, { "epoch": 0.9029076795826175, "grad_norm": 0.3995082378387451, "learning_rate": 1.1537625477432448e-05, "loss": 0.4954, "step": 42573 }, { "epoch": 0.9029288880405506, "grad_norm": 0.4057868719100952, "learning_rate": 1.1537295947237084e-05, "loss": 0.4413, "step": 42574 }, { "epoch": 0.9029500964984836, "grad_norm": 0.37819918990135193, "learning_rate": 1.153696641533195e-05, "loss": 0.498, "step": 42575 }, { "epoch": 0.9029713049564166, "grad_norm": 0.3668040931224823, "learning_rate": 1.1536636881717416e-05, "loss": 0.4384, "step": 42576 }, { "epoch": 0.9029925134143496, "grad_norm": 0.45095592737197876, "learning_rate": 1.1536307346393848e-05, "loss": 0.5567, "step": 42577 }, { "epoch": 0.9030137218722827, "grad_norm": 0.31624630093574524, "learning_rate": 1.153597780936161e-05, "loss": 0.4655, "step": 42578 }, { "epoch": 0.9030349303302156, "grad_norm": 0.43723753094673157, "learning_rate": 1.1535648270621075e-05, "loss": 0.597, "step": 42579 }, { "epoch": 0.9030561387881487, "grad_norm": 0.42632389068603516, "learning_rate": 1.1535318730172606e-05, "loss": 0.4734, "step": 42580 }, { "epoch": 0.9030773472460817, "grad_norm": 1.0203477144241333, "learning_rate": 1.1534989188016566e-05, "loss": 0.493, "step": 42581 }, { "epoch": 0.9030985557040148, "grad_norm": 0.35179591178894043, "learning_rate": 1.1534659644153323e-05, "loss": 0.3583, "step": 42582 }, { "epoch": 0.9031197641619478, "grad_norm": 0.3578849136829376, "learning_rate": 1.1534330098583249e-05, "loss": 0.4317, "step": 42583 }, { "epoch": 0.9031409726198808, "grad_norm": 0.3558413088321686, "learning_rate": 1.1534000551306703e-05, "loss": 0.5143, "step": 42584 }, { "epoch": 0.9031621810778139, "grad_norm": 0.40506088733673096, "learning_rate": 1.1533671002324057e-05, "loss": 0.5786, "step": 42585 }, { "epoch": 0.9031833895357468, "grad_norm": 0.4102013409137726, "learning_rate": 1.1533341451635676e-05, "loss": 0.5426, "step": 42586 }, { "epoch": 0.9032045979936799, "grad_norm": 0.41012999415397644, "learning_rate": 1.1533011899241923e-05, "loss": 0.4946, "step": 42587 }, { "epoch": 0.9032258064516129, "grad_norm": 0.4354850947856903, "learning_rate": 1.1532682345143168e-05, "loss": 0.4375, "step": 42588 }, { "epoch": 0.903247014909546, "grad_norm": 0.3741537034511566, "learning_rate": 1.1532352789339778e-05, "loss": 0.4401, "step": 42589 }, { "epoch": 0.9032682233674789, "grad_norm": 0.3738446533679962, "learning_rate": 1.1532023231832118e-05, "loss": 0.487, "step": 42590 }, { "epoch": 0.903289431825412, "grad_norm": 0.6147978901863098, "learning_rate": 1.1531693672620557e-05, "loss": 0.5178, "step": 42591 }, { "epoch": 0.903310640283345, "grad_norm": 0.35854554176330566, "learning_rate": 1.1531364111705456e-05, "loss": 0.4504, "step": 42592 }, { "epoch": 0.903331848741278, "grad_norm": 0.36443161964416504, "learning_rate": 1.153103454908719e-05, "loss": 0.4467, "step": 42593 }, { "epoch": 0.903353057199211, "grad_norm": 0.3584524095058441, "learning_rate": 1.1530704984766117e-05, "loss": 0.4779, "step": 42594 }, { "epoch": 0.9033742656571441, "grad_norm": 0.3844228982925415, "learning_rate": 1.153037541874261e-05, "loss": 0.4389, "step": 42595 }, { "epoch": 0.9033954741150771, "grad_norm": 0.39474183320999146, "learning_rate": 1.1530045851017029e-05, "loss": 0.468, "step": 42596 }, { "epoch": 0.9034166825730101, "grad_norm": 0.42394575476646423, "learning_rate": 1.1529716281589747e-05, "loss": 0.4531, "step": 42597 }, { "epoch": 0.9034378910309432, "grad_norm": 0.3524799048900604, "learning_rate": 1.1529386710461126e-05, "loss": 0.4514, "step": 42598 }, { "epoch": 0.9034590994888761, "grad_norm": 0.4816446602344513, "learning_rate": 1.1529057137631536e-05, "loss": 0.5076, "step": 42599 }, { "epoch": 0.9034803079468092, "grad_norm": 0.3537384271621704, "learning_rate": 1.1528727563101343e-05, "loss": 0.4962, "step": 42600 }, { "epoch": 0.9035015164047422, "grad_norm": 0.346610426902771, "learning_rate": 1.1528397986870907e-05, "loss": 0.4276, "step": 42601 }, { "epoch": 0.9035227248626753, "grad_norm": 0.3663433790206909, "learning_rate": 1.1528068408940607e-05, "loss": 0.4906, "step": 42602 }, { "epoch": 0.9035439333206082, "grad_norm": 0.34864041209220886, "learning_rate": 1.15277388293108e-05, "loss": 0.5088, "step": 42603 }, { "epoch": 0.9035651417785413, "grad_norm": 0.34573429822921753, "learning_rate": 1.1527409247981854e-05, "loss": 0.4708, "step": 42604 }, { "epoch": 0.9035863502364743, "grad_norm": 1.5058046579360962, "learning_rate": 1.1527079664954138e-05, "loss": 0.4556, "step": 42605 }, { "epoch": 0.9036075586944073, "grad_norm": 0.36345958709716797, "learning_rate": 1.1526750080228016e-05, "loss": 0.5003, "step": 42606 }, { "epoch": 0.9036287671523403, "grad_norm": 0.4394221603870392, "learning_rate": 1.1526420493803854e-05, "loss": 0.464, "step": 42607 }, { "epoch": 0.9036499756102734, "grad_norm": 0.3735479712486267, "learning_rate": 1.1526090905682024e-05, "loss": 0.5657, "step": 42608 }, { "epoch": 0.9036711840682063, "grad_norm": 0.3656046688556671, "learning_rate": 1.152576131586289e-05, "loss": 0.4801, "step": 42609 }, { "epoch": 0.9036923925261394, "grad_norm": 0.3581620454788208, "learning_rate": 1.1525431724346814e-05, "loss": 0.5298, "step": 42610 }, { "epoch": 0.9037136009840725, "grad_norm": 0.32803773880004883, "learning_rate": 1.1525102131134168e-05, "loss": 0.4577, "step": 42611 }, { "epoch": 0.9037348094420055, "grad_norm": 0.3579771816730499, "learning_rate": 1.1524772536225316e-05, "loss": 0.5054, "step": 42612 }, { "epoch": 0.9037560178999385, "grad_norm": 0.3359992504119873, "learning_rate": 1.1524442939620625e-05, "loss": 0.5031, "step": 42613 }, { "epoch": 0.9037772263578715, "grad_norm": 0.3280121386051178, "learning_rate": 1.1524113341320462e-05, "loss": 0.417, "step": 42614 }, { "epoch": 0.9037984348158046, "grad_norm": 0.30848199129104614, "learning_rate": 1.1523783741325192e-05, "loss": 0.4282, "step": 42615 }, { "epoch": 0.9038196432737375, "grad_norm": 0.3656628429889679, "learning_rate": 1.1523454139635185e-05, "loss": 0.4884, "step": 42616 }, { "epoch": 0.9038408517316706, "grad_norm": 0.3733585774898529, "learning_rate": 1.1523124536250806e-05, "loss": 0.489, "step": 42617 }, { "epoch": 0.9038620601896036, "grad_norm": 0.36703962087631226, "learning_rate": 1.1522794931172419e-05, "loss": 0.4766, "step": 42618 }, { "epoch": 0.9038832686475367, "grad_norm": 0.3746587336063385, "learning_rate": 1.1522465324400396e-05, "loss": 0.5244, "step": 42619 }, { "epoch": 0.9039044771054696, "grad_norm": 0.3578082323074341, "learning_rate": 1.1522135715935097e-05, "loss": 0.5431, "step": 42620 }, { "epoch": 0.9039256855634027, "grad_norm": 0.3614581823348999, "learning_rate": 1.1521806105776892e-05, "loss": 0.4596, "step": 42621 }, { "epoch": 0.9039468940213357, "grad_norm": 0.3243573307991028, "learning_rate": 1.1521476493926149e-05, "loss": 0.4765, "step": 42622 }, { "epoch": 0.9039681024792687, "grad_norm": 0.34985610842704773, "learning_rate": 1.1521146880383234e-05, "loss": 0.5023, "step": 42623 }, { "epoch": 0.9039893109372018, "grad_norm": 0.3751697242259979, "learning_rate": 1.1520817265148509e-05, "loss": 0.4726, "step": 42624 }, { "epoch": 0.9040105193951348, "grad_norm": 0.35134080052375793, "learning_rate": 1.1520487648222346e-05, "loss": 0.4778, "step": 42625 }, { "epoch": 0.9040317278530678, "grad_norm": 0.40201422572135925, "learning_rate": 1.1520158029605111e-05, "loss": 0.5117, "step": 42626 }, { "epoch": 0.9040529363110008, "grad_norm": 0.4275505840778351, "learning_rate": 1.1519828409297168e-05, "loss": 0.4865, "step": 42627 }, { "epoch": 0.9040741447689339, "grad_norm": 0.3412635624408722, "learning_rate": 1.1519498787298886e-05, "loss": 0.3922, "step": 42628 }, { "epoch": 0.9040953532268668, "grad_norm": 0.3669702708721161, "learning_rate": 1.1519169163610631e-05, "loss": 0.5041, "step": 42629 }, { "epoch": 0.9041165616847999, "grad_norm": 0.3906905949115753, "learning_rate": 1.1518839538232769e-05, "loss": 0.5242, "step": 42630 }, { "epoch": 0.9041377701427329, "grad_norm": 0.35771307349205017, "learning_rate": 1.1518509911165667e-05, "loss": 0.4679, "step": 42631 }, { "epoch": 0.904158978600666, "grad_norm": 0.4047049582004547, "learning_rate": 1.151818028240969e-05, "loss": 0.4995, "step": 42632 }, { "epoch": 0.9041801870585989, "grad_norm": 0.3479216694831848, "learning_rate": 1.1517850651965205e-05, "loss": 0.5813, "step": 42633 }, { "epoch": 0.904201395516532, "grad_norm": 0.3301897943019867, "learning_rate": 1.1517521019832585e-05, "loss": 0.5169, "step": 42634 }, { "epoch": 0.904222603974465, "grad_norm": 0.3865937292575836, "learning_rate": 1.1517191386012185e-05, "loss": 0.5348, "step": 42635 }, { "epoch": 0.904243812432398, "grad_norm": 0.3985239863395691, "learning_rate": 1.1516861750504385e-05, "loss": 0.4511, "step": 42636 }, { "epoch": 0.9042650208903311, "grad_norm": 0.3964778780937195, "learning_rate": 1.151653211330954e-05, "loss": 0.4726, "step": 42637 }, { "epoch": 0.9042862293482641, "grad_norm": 0.4110247492790222, "learning_rate": 1.151620247442802e-05, "loss": 0.4927, "step": 42638 }, { "epoch": 0.9043074378061972, "grad_norm": 0.33632713556289673, "learning_rate": 1.1515872833860193e-05, "loss": 0.4289, "step": 42639 }, { "epoch": 0.9043286462641301, "grad_norm": 0.36360374093055725, "learning_rate": 1.1515543191606428e-05, "loss": 0.4699, "step": 42640 }, { "epoch": 0.9043498547220632, "grad_norm": 0.38927173614501953, "learning_rate": 1.1515213547667087e-05, "loss": 0.5162, "step": 42641 }, { "epoch": 0.9043710631799962, "grad_norm": 0.3420267701148987, "learning_rate": 1.1514883902042542e-05, "loss": 0.466, "step": 42642 }, { "epoch": 0.9043922716379292, "grad_norm": 0.43641915917396545, "learning_rate": 1.1514554254733152e-05, "loss": 0.4785, "step": 42643 }, { "epoch": 0.9044134800958622, "grad_norm": 0.39329683780670166, "learning_rate": 1.151422460573929e-05, "loss": 0.4703, "step": 42644 }, { "epoch": 0.9044346885537953, "grad_norm": 0.38217732310295105, "learning_rate": 1.1513894955061322e-05, "loss": 0.4286, "step": 42645 }, { "epoch": 0.9044558970117282, "grad_norm": 0.5502063632011414, "learning_rate": 1.1513565302699608e-05, "loss": 0.4902, "step": 42646 }, { "epoch": 0.9044771054696613, "grad_norm": 0.318884938955307, "learning_rate": 1.1513235648654527e-05, "loss": 0.4026, "step": 42647 }, { "epoch": 0.9044983139275943, "grad_norm": 0.39624401926994324, "learning_rate": 1.1512905992926434e-05, "loss": 0.4325, "step": 42648 }, { "epoch": 0.9045195223855274, "grad_norm": 0.3831665515899658, "learning_rate": 1.15125763355157e-05, "loss": 0.5718, "step": 42649 }, { "epoch": 0.9045407308434603, "grad_norm": 0.3340797424316406, "learning_rate": 1.1512246676422694e-05, "loss": 0.5302, "step": 42650 }, { "epoch": 0.9045619393013934, "grad_norm": 0.3826069235801697, "learning_rate": 1.151191701564778e-05, "loss": 0.5241, "step": 42651 }, { "epoch": 0.9045831477593265, "grad_norm": 0.3479723632335663, "learning_rate": 1.151158735319132e-05, "loss": 0.5361, "step": 42652 }, { "epoch": 0.9046043562172594, "grad_norm": 0.3821561336517334, "learning_rate": 1.1511257689053691e-05, "loss": 0.4446, "step": 42653 }, { "epoch": 0.9046255646751925, "grad_norm": 0.35701310634613037, "learning_rate": 1.1510928023235256e-05, "loss": 0.4706, "step": 42654 }, { "epoch": 0.9046467731331255, "grad_norm": 0.3876439929008484, "learning_rate": 1.1510598355736373e-05, "loss": 0.4702, "step": 42655 }, { "epoch": 0.9046679815910585, "grad_norm": 0.37203386425971985, "learning_rate": 1.1510268686557422e-05, "loss": 0.5698, "step": 42656 }, { "epoch": 0.9046891900489915, "grad_norm": 0.35825878381729126, "learning_rate": 1.150993901569876e-05, "loss": 0.4276, "step": 42657 }, { "epoch": 0.9047103985069246, "grad_norm": 0.313667356967926, "learning_rate": 1.1509609343160759e-05, "loss": 0.4058, "step": 42658 }, { "epoch": 0.9047316069648575, "grad_norm": 0.421941339969635, "learning_rate": 1.150927966894378e-05, "loss": 0.4632, "step": 42659 }, { "epoch": 0.9047528154227906, "grad_norm": 0.352285772562027, "learning_rate": 1.1508949993048195e-05, "loss": 0.4331, "step": 42660 }, { "epoch": 0.9047740238807236, "grad_norm": 0.3780902624130249, "learning_rate": 1.150862031547437e-05, "loss": 0.5045, "step": 42661 }, { "epoch": 0.9047952323386567, "grad_norm": 0.3829229176044464, "learning_rate": 1.1508290636222671e-05, "loss": 0.4856, "step": 42662 }, { "epoch": 0.9048164407965896, "grad_norm": 0.3599250316619873, "learning_rate": 1.1507960955293464e-05, "loss": 0.4564, "step": 42663 }, { "epoch": 0.9048376492545227, "grad_norm": 0.43925750255584717, "learning_rate": 1.1507631272687117e-05, "loss": 0.4395, "step": 42664 }, { "epoch": 0.9048588577124558, "grad_norm": 0.3408467769622803, "learning_rate": 1.1507301588403992e-05, "loss": 0.4253, "step": 42665 }, { "epoch": 0.9048800661703887, "grad_norm": 0.3705284893512726, "learning_rate": 1.1506971902444461e-05, "loss": 0.4121, "step": 42666 }, { "epoch": 0.9049012746283218, "grad_norm": 0.41535958647727966, "learning_rate": 1.150664221480889e-05, "loss": 0.4789, "step": 42667 }, { "epoch": 0.9049224830862548, "grad_norm": 0.3137759566307068, "learning_rate": 1.1506312525497644e-05, "loss": 0.4394, "step": 42668 }, { "epoch": 0.9049436915441879, "grad_norm": 0.40764421224594116, "learning_rate": 1.1505982834511088e-05, "loss": 0.5445, "step": 42669 }, { "epoch": 0.9049649000021208, "grad_norm": 0.3493753671646118, "learning_rate": 1.1505653141849596e-05, "loss": 0.4781, "step": 42670 }, { "epoch": 0.9049861084600539, "grad_norm": 0.3421367406845093, "learning_rate": 1.1505323447513526e-05, "loss": 0.5513, "step": 42671 }, { "epoch": 0.9050073169179869, "grad_norm": 0.3885091245174408, "learning_rate": 1.1504993751503248e-05, "loss": 0.5088, "step": 42672 }, { "epoch": 0.9050285253759199, "grad_norm": 0.3895319998264313, "learning_rate": 1.150466405381913e-05, "loss": 0.454, "step": 42673 }, { "epoch": 0.9050497338338529, "grad_norm": 0.40450185537338257, "learning_rate": 1.1504334354461537e-05, "loss": 0.5422, "step": 42674 }, { "epoch": 0.905070942291786, "grad_norm": 0.39165934920310974, "learning_rate": 1.1504004653430841e-05, "loss": 0.5215, "step": 42675 }, { "epoch": 0.9050921507497189, "grad_norm": 0.3787705600261688, "learning_rate": 1.1503674950727398e-05, "loss": 0.6073, "step": 42676 }, { "epoch": 0.905113359207652, "grad_norm": 0.3633536398410797, "learning_rate": 1.1503345246351583e-05, "loss": 0.4839, "step": 42677 }, { "epoch": 0.9051345676655851, "grad_norm": 0.41104593873023987, "learning_rate": 1.150301554030376e-05, "loss": 0.4991, "step": 42678 }, { "epoch": 0.905155776123518, "grad_norm": 0.32338154315948486, "learning_rate": 1.1502685832584298e-05, "loss": 0.4589, "step": 42679 }, { "epoch": 0.9051769845814511, "grad_norm": 0.5305517315864563, "learning_rate": 1.150235612319356e-05, "loss": 0.5963, "step": 42680 }, { "epoch": 0.9051981930393841, "grad_norm": 0.3727482855319977, "learning_rate": 1.1502026412131914e-05, "loss": 0.4813, "step": 42681 }, { "epoch": 0.9052194014973172, "grad_norm": 0.3720241189002991, "learning_rate": 1.1501696699399732e-05, "loss": 0.4801, "step": 42682 }, { "epoch": 0.9052406099552501, "grad_norm": 0.36071687936782837, "learning_rate": 1.150136698499737e-05, "loss": 0.5406, "step": 42683 }, { "epoch": 0.9052618184131832, "grad_norm": 0.3902420401573181, "learning_rate": 1.1501037268925202e-05, "loss": 0.5718, "step": 42684 }, { "epoch": 0.9052830268711162, "grad_norm": 0.3260670602321625, "learning_rate": 1.1500707551183596e-05, "loss": 0.464, "step": 42685 }, { "epoch": 0.9053042353290492, "grad_norm": 0.49569398164749146, "learning_rate": 1.1500377831772913e-05, "loss": 0.451, "step": 42686 }, { "epoch": 0.9053254437869822, "grad_norm": 0.5824894905090332, "learning_rate": 1.1500048110693526e-05, "loss": 0.4953, "step": 42687 }, { "epoch": 0.9053466522449153, "grad_norm": 0.401051789522171, "learning_rate": 1.1499718387945795e-05, "loss": 0.5082, "step": 42688 }, { "epoch": 0.9053678607028482, "grad_norm": 0.36814188957214355, "learning_rate": 1.1499388663530096e-05, "loss": 0.4677, "step": 42689 }, { "epoch": 0.9053890691607813, "grad_norm": 0.42198193073272705, "learning_rate": 1.1499058937446785e-05, "loss": 0.4421, "step": 42690 }, { "epoch": 0.9054102776187144, "grad_norm": 0.38419294357299805, "learning_rate": 1.1498729209696235e-05, "loss": 0.5367, "step": 42691 }, { "epoch": 0.9054314860766474, "grad_norm": 0.38602009415626526, "learning_rate": 1.149839948027881e-05, "loss": 0.4901, "step": 42692 }, { "epoch": 0.9054526945345804, "grad_norm": 0.3585362434387207, "learning_rate": 1.149806974919488e-05, "loss": 0.4494, "step": 42693 }, { "epoch": 0.9054739029925134, "grad_norm": 0.3919743597507477, "learning_rate": 1.1497740016444807e-05, "loss": 0.5048, "step": 42694 }, { "epoch": 0.9054951114504465, "grad_norm": 0.3351418375968933, "learning_rate": 1.1497410282028964e-05, "loss": 0.3668, "step": 42695 }, { "epoch": 0.9055163199083794, "grad_norm": 0.5851226449012756, "learning_rate": 1.1497080545947714e-05, "loss": 0.4746, "step": 42696 }, { "epoch": 0.9055375283663125, "grad_norm": 0.4982723593711853, "learning_rate": 1.149675080820142e-05, "loss": 0.5011, "step": 42697 }, { "epoch": 0.9055587368242455, "grad_norm": 0.4170322120189667, "learning_rate": 1.1496421068790457e-05, "loss": 0.4857, "step": 42698 }, { "epoch": 0.9055799452821786, "grad_norm": 0.333280473947525, "learning_rate": 1.149609132771519e-05, "loss": 0.4373, "step": 42699 }, { "epoch": 0.9056011537401115, "grad_norm": 0.3618936836719513, "learning_rate": 1.1495761584975976e-05, "loss": 0.5578, "step": 42700 }, { "epoch": 0.9056223621980446, "grad_norm": 0.4427664279937744, "learning_rate": 1.1495431840573194e-05, "loss": 0.4511, "step": 42701 }, { "epoch": 0.9056435706559776, "grad_norm": 0.41969045996665955, "learning_rate": 1.1495102094507203e-05, "loss": 0.4907, "step": 42702 }, { "epoch": 0.9056647791139106, "grad_norm": 0.3626767694950104, "learning_rate": 1.1494772346778374e-05, "loss": 0.5442, "step": 42703 }, { "epoch": 0.9056859875718436, "grad_norm": 0.3578062057495117, "learning_rate": 1.1494442597387072e-05, "loss": 0.5227, "step": 42704 }, { "epoch": 0.9057071960297767, "grad_norm": 0.4033721387386322, "learning_rate": 1.1494112846333664e-05, "loss": 0.4715, "step": 42705 }, { "epoch": 0.9057284044877097, "grad_norm": 0.35196763277053833, "learning_rate": 1.1493783093618517e-05, "loss": 0.5043, "step": 42706 }, { "epoch": 0.9057496129456427, "grad_norm": 0.3763696849346161, "learning_rate": 1.1493453339241995e-05, "loss": 0.5074, "step": 42707 }, { "epoch": 0.9057708214035758, "grad_norm": 0.7771838307380676, "learning_rate": 1.149312358320447e-05, "loss": 0.4439, "step": 42708 }, { "epoch": 0.9057920298615088, "grad_norm": 0.3523120582103729, "learning_rate": 1.1492793825506305e-05, "loss": 0.4718, "step": 42709 }, { "epoch": 0.9058132383194418, "grad_norm": 0.335242360830307, "learning_rate": 1.1492464066147866e-05, "loss": 0.4618, "step": 42710 }, { "epoch": 0.9058344467773748, "grad_norm": 0.42482277750968933, "learning_rate": 1.1492134305129524e-05, "loss": 0.5065, "step": 42711 }, { "epoch": 0.9058556552353079, "grad_norm": 0.36088597774505615, "learning_rate": 1.1491804542451641e-05, "loss": 0.4637, "step": 42712 }, { "epoch": 0.9058768636932408, "grad_norm": 0.3770037889480591, "learning_rate": 1.1491474778114588e-05, "loss": 0.5137, "step": 42713 }, { "epoch": 0.9058980721511739, "grad_norm": 0.36049750447273254, "learning_rate": 1.149114501211873e-05, "loss": 0.4503, "step": 42714 }, { "epoch": 0.9059192806091069, "grad_norm": 0.35690340399742126, "learning_rate": 1.1490815244464433e-05, "loss": 0.4829, "step": 42715 }, { "epoch": 0.90594048906704, "grad_norm": 0.36049333214759827, "learning_rate": 1.1490485475152063e-05, "loss": 0.4766, "step": 42716 }, { "epoch": 0.9059616975249729, "grad_norm": 0.36786502599716187, "learning_rate": 1.1490155704181987e-05, "loss": 0.492, "step": 42717 }, { "epoch": 0.905982905982906, "grad_norm": 0.33785107731819153, "learning_rate": 1.1489825931554575e-05, "loss": 0.5128, "step": 42718 }, { "epoch": 0.9060041144408391, "grad_norm": 0.415128231048584, "learning_rate": 1.1489496157270193e-05, "loss": 0.5141, "step": 42719 }, { "epoch": 0.906025322898772, "grad_norm": 0.4145583510398865, "learning_rate": 1.1489166381329202e-05, "loss": 0.5231, "step": 42720 }, { "epoch": 0.9060465313567051, "grad_norm": 0.43167880177497864, "learning_rate": 1.1488836603731977e-05, "loss": 0.4769, "step": 42721 }, { "epoch": 0.9060677398146381, "grad_norm": 0.3336937725543976, "learning_rate": 1.1488506824478877e-05, "loss": 0.5184, "step": 42722 }, { "epoch": 0.9060889482725711, "grad_norm": 0.35836559534072876, "learning_rate": 1.1488177043570274e-05, "loss": 0.4866, "step": 42723 }, { "epoch": 0.9061101567305041, "grad_norm": 0.34083467721939087, "learning_rate": 1.1487847261006535e-05, "loss": 0.4879, "step": 42724 }, { "epoch": 0.9061313651884372, "grad_norm": 0.3451276421546936, "learning_rate": 1.1487517476788024e-05, "loss": 0.4374, "step": 42725 }, { "epoch": 0.9061525736463701, "grad_norm": 0.3891419470310211, "learning_rate": 1.1487187690915109e-05, "loss": 0.4961, "step": 42726 }, { "epoch": 0.9061737821043032, "grad_norm": 0.38521236181259155, "learning_rate": 1.1486857903388157e-05, "loss": 0.5227, "step": 42727 }, { "epoch": 0.9061949905622362, "grad_norm": 0.4057336449623108, "learning_rate": 1.1486528114207535e-05, "loss": 0.477, "step": 42728 }, { "epoch": 0.9062161990201693, "grad_norm": 0.38271769881248474, "learning_rate": 1.1486198323373608e-05, "loss": 0.4561, "step": 42729 }, { "epoch": 0.9062374074781022, "grad_norm": 0.36510372161865234, "learning_rate": 1.1485868530886746e-05, "loss": 0.4403, "step": 42730 }, { "epoch": 0.9062586159360353, "grad_norm": 0.36887821555137634, "learning_rate": 1.1485538736747312e-05, "loss": 0.5465, "step": 42731 }, { "epoch": 0.9062798243939684, "grad_norm": 0.3792528212070465, "learning_rate": 1.1485208940955677e-05, "loss": 0.4979, "step": 42732 }, { "epoch": 0.9063010328519013, "grad_norm": 0.47130370140075684, "learning_rate": 1.1484879143512206e-05, "loss": 0.5549, "step": 42733 }, { "epoch": 0.9063222413098344, "grad_norm": 0.3622327148914337, "learning_rate": 1.1484549344417261e-05, "loss": 0.4922, "step": 42734 }, { "epoch": 0.9063434497677674, "grad_norm": 0.41506630182266235, "learning_rate": 1.1484219543671216e-05, "loss": 0.5025, "step": 42735 }, { "epoch": 0.9063646582257004, "grad_norm": 0.3498915731906891, "learning_rate": 1.1483889741274436e-05, "loss": 0.4391, "step": 42736 }, { "epoch": 0.9063858666836334, "grad_norm": 0.6622715592384338, "learning_rate": 1.1483559937227285e-05, "loss": 0.5674, "step": 42737 }, { "epoch": 0.9064070751415665, "grad_norm": 0.3447987735271454, "learning_rate": 1.1483230131530131e-05, "loss": 0.4187, "step": 42738 }, { "epoch": 0.9064282835994995, "grad_norm": 0.32573753595352173, "learning_rate": 1.148290032418334e-05, "loss": 0.3519, "step": 42739 }, { "epoch": 0.9064494920574325, "grad_norm": 0.40313154458999634, "learning_rate": 1.1482570515187286e-05, "loss": 0.5126, "step": 42740 }, { "epoch": 0.9064707005153655, "grad_norm": 0.3443228006362915, "learning_rate": 1.1482240704542327e-05, "loss": 0.4272, "step": 42741 }, { "epoch": 0.9064919089732986, "grad_norm": 0.3577829897403717, "learning_rate": 1.1481910892248828e-05, "loss": 0.48, "step": 42742 }, { "epoch": 0.9065131174312315, "grad_norm": 0.3615488111972809, "learning_rate": 1.1481581078307168e-05, "loss": 0.4576, "step": 42743 }, { "epoch": 0.9065343258891646, "grad_norm": 0.3280029892921448, "learning_rate": 1.1481251262717703e-05, "loss": 0.4383, "step": 42744 }, { "epoch": 0.9065555343470976, "grad_norm": 0.3385075330734253, "learning_rate": 1.1480921445480802e-05, "loss": 0.522, "step": 42745 }, { "epoch": 0.9065767428050306, "grad_norm": 0.39716580510139465, "learning_rate": 1.1480591626596836e-05, "loss": 0.5087, "step": 42746 }, { "epoch": 0.9065979512629637, "grad_norm": 0.4694741666316986, "learning_rate": 1.1480261806066169e-05, "loss": 0.4909, "step": 42747 }, { "epoch": 0.9066191597208967, "grad_norm": 0.49290916323661804, "learning_rate": 1.1479931983889161e-05, "loss": 0.473, "step": 42748 }, { "epoch": 0.9066403681788298, "grad_norm": 0.34789684414863586, "learning_rate": 1.1479602160066193e-05, "loss": 0.4997, "step": 42749 }, { "epoch": 0.9066615766367627, "grad_norm": 0.6054763197898865, "learning_rate": 1.1479272334597622e-05, "loss": 0.5012, "step": 42750 }, { "epoch": 0.9066827850946958, "grad_norm": 0.37614354491233826, "learning_rate": 1.1478942507483816e-05, "loss": 0.5278, "step": 42751 }, { "epoch": 0.9067039935526288, "grad_norm": 0.3810090720653534, "learning_rate": 1.1478612678725146e-05, "loss": 0.4882, "step": 42752 }, { "epoch": 0.9067252020105618, "grad_norm": 0.3517356514930725, "learning_rate": 1.1478282848321971e-05, "loss": 0.4659, "step": 42753 }, { "epoch": 0.9067464104684948, "grad_norm": 0.3766002953052521, "learning_rate": 1.1477953016274668e-05, "loss": 0.4235, "step": 42754 }, { "epoch": 0.9067676189264279, "grad_norm": 0.39462965726852417, "learning_rate": 1.1477623182583594e-05, "loss": 0.5738, "step": 42755 }, { "epoch": 0.9067888273843608, "grad_norm": 0.365507572889328, "learning_rate": 1.147729334724912e-05, "loss": 0.5306, "step": 42756 }, { "epoch": 0.9068100358422939, "grad_norm": 0.37100768089294434, "learning_rate": 1.1476963510271617e-05, "loss": 0.4899, "step": 42757 }, { "epoch": 0.9068312443002269, "grad_norm": 0.430889755487442, "learning_rate": 1.1476633671651446e-05, "loss": 0.532, "step": 42758 }, { "epoch": 0.90685245275816, "grad_norm": 0.3742513656616211, "learning_rate": 1.1476303831388978e-05, "loss": 0.4923, "step": 42759 }, { "epoch": 0.906873661216093, "grad_norm": 0.3733833134174347, "learning_rate": 1.1475973989484576e-05, "loss": 0.5412, "step": 42760 }, { "epoch": 0.906894869674026, "grad_norm": 0.3529045879840851, "learning_rate": 1.1475644145938608e-05, "loss": 0.5905, "step": 42761 }, { "epoch": 0.9069160781319591, "grad_norm": 0.37428510189056396, "learning_rate": 1.1475314300751438e-05, "loss": 0.4314, "step": 42762 }, { "epoch": 0.906937286589892, "grad_norm": 0.3533964157104492, "learning_rate": 1.1474984453923443e-05, "loss": 0.4105, "step": 42763 }, { "epoch": 0.9069584950478251, "grad_norm": 0.3610275983810425, "learning_rate": 1.1474654605454983e-05, "loss": 0.5245, "step": 42764 }, { "epoch": 0.9069797035057581, "grad_norm": 0.355621337890625, "learning_rate": 1.147432475534642e-05, "loss": 0.5221, "step": 42765 }, { "epoch": 0.9070009119636911, "grad_norm": 0.42337098717689514, "learning_rate": 1.1473994903598129e-05, "loss": 0.575, "step": 42766 }, { "epoch": 0.9070221204216241, "grad_norm": 0.4541378617286682, "learning_rate": 1.1473665050210472e-05, "loss": 0.4782, "step": 42767 }, { "epoch": 0.9070433288795572, "grad_norm": 0.3421425223350525, "learning_rate": 1.1473335195183816e-05, "loss": 0.4997, "step": 42768 }, { "epoch": 0.9070645373374902, "grad_norm": 0.38662147521972656, "learning_rate": 1.1473005338518532e-05, "loss": 0.5423, "step": 42769 }, { "epoch": 0.9070857457954232, "grad_norm": 0.3584635555744171, "learning_rate": 1.1472675480214983e-05, "loss": 0.4459, "step": 42770 }, { "epoch": 0.9071069542533562, "grad_norm": 0.5372462272644043, "learning_rate": 1.147234562027354e-05, "loss": 0.4382, "step": 42771 }, { "epoch": 0.9071281627112893, "grad_norm": 0.39468976855278015, "learning_rate": 1.1472015758694566e-05, "loss": 0.4789, "step": 42772 }, { "epoch": 0.9071493711692223, "grad_norm": 0.38374465703964233, "learning_rate": 1.1471685895478425e-05, "loss": 0.4549, "step": 42773 }, { "epoch": 0.9071705796271553, "grad_norm": 0.3679409325122833, "learning_rate": 1.147135603062549e-05, "loss": 0.4714, "step": 42774 }, { "epoch": 0.9071917880850884, "grad_norm": 0.376954048871994, "learning_rate": 1.1471026164136127e-05, "loss": 0.5005, "step": 42775 }, { "epoch": 0.9072129965430213, "grad_norm": 0.36588969826698303, "learning_rate": 1.14706962960107e-05, "loss": 0.4426, "step": 42776 }, { "epoch": 0.9072342050009544, "grad_norm": 0.39298468828201294, "learning_rate": 1.147036642624958e-05, "loss": 0.5172, "step": 42777 }, { "epoch": 0.9072554134588874, "grad_norm": 0.3863126039505005, "learning_rate": 1.147003655485313e-05, "loss": 0.4582, "step": 42778 }, { "epoch": 0.9072766219168205, "grad_norm": 0.3671478033065796, "learning_rate": 1.1469706681821718e-05, "loss": 0.5076, "step": 42779 }, { "epoch": 0.9072978303747534, "grad_norm": 0.3820982575416565, "learning_rate": 1.1469376807155708e-05, "loss": 0.4688, "step": 42780 }, { "epoch": 0.9073190388326865, "grad_norm": 0.38123905658721924, "learning_rate": 1.1469046930855474e-05, "loss": 0.4953, "step": 42781 }, { "epoch": 0.9073402472906195, "grad_norm": 0.35322999954223633, "learning_rate": 1.1468717052921374e-05, "loss": 0.5044, "step": 42782 }, { "epoch": 0.9073614557485525, "grad_norm": 0.4264307916164398, "learning_rate": 1.1468387173353786e-05, "loss": 0.4667, "step": 42783 }, { "epoch": 0.9073826642064855, "grad_norm": 0.33271536231040955, "learning_rate": 1.1468057292153065e-05, "loss": 0.4094, "step": 42784 }, { "epoch": 0.9074038726644186, "grad_norm": 0.3691900670528412, "learning_rate": 1.1467727409319587e-05, "loss": 0.4756, "step": 42785 }, { "epoch": 0.9074250811223515, "grad_norm": 0.3627917170524597, "learning_rate": 1.1467397524853715e-05, "loss": 0.4781, "step": 42786 }, { "epoch": 0.9074462895802846, "grad_norm": 0.3788910210132599, "learning_rate": 1.1467067638755813e-05, "loss": 0.5398, "step": 42787 }, { "epoch": 0.9074674980382177, "grad_norm": 0.3434121310710907, "learning_rate": 1.1466737751026253e-05, "loss": 0.4646, "step": 42788 }, { "epoch": 0.9074887064961507, "grad_norm": 0.35998597741127014, "learning_rate": 1.14664078616654e-05, "loss": 0.4942, "step": 42789 }, { "epoch": 0.9075099149540837, "grad_norm": 0.41294655203819275, "learning_rate": 1.1466077970673622e-05, "loss": 0.4953, "step": 42790 }, { "epoch": 0.9075311234120167, "grad_norm": 0.3462393283843994, "learning_rate": 1.1465748078051285e-05, "loss": 0.4493, "step": 42791 }, { "epoch": 0.9075523318699498, "grad_norm": 0.4171675443649292, "learning_rate": 1.1465418183798756e-05, "loss": 0.5285, "step": 42792 }, { "epoch": 0.9075735403278827, "grad_norm": 0.5482398271560669, "learning_rate": 1.1465088287916397e-05, "loss": 0.5796, "step": 42793 }, { "epoch": 0.9075947487858158, "grad_norm": 0.41885218024253845, "learning_rate": 1.1464758390404584e-05, "loss": 0.5036, "step": 42794 }, { "epoch": 0.9076159572437488, "grad_norm": 0.37685704231262207, "learning_rate": 1.146442849126368e-05, "loss": 0.5181, "step": 42795 }, { "epoch": 0.9076371657016818, "grad_norm": 0.3864017426967621, "learning_rate": 1.1464098590494047e-05, "loss": 0.3946, "step": 42796 }, { "epoch": 0.9076583741596148, "grad_norm": 0.3295205533504486, "learning_rate": 1.1463768688096062e-05, "loss": 0.428, "step": 42797 }, { "epoch": 0.9076795826175479, "grad_norm": 0.4302973449230194, "learning_rate": 1.146343878407008e-05, "loss": 0.4583, "step": 42798 }, { "epoch": 0.9077007910754809, "grad_norm": 0.38455837965011597, "learning_rate": 1.146310887841648e-05, "loss": 0.4677, "step": 42799 }, { "epoch": 0.9077219995334139, "grad_norm": 0.36771896481513977, "learning_rate": 1.1462778971135618e-05, "loss": 0.4827, "step": 42800 }, { "epoch": 0.907743207991347, "grad_norm": 0.40976983308792114, "learning_rate": 1.146244906222787e-05, "loss": 0.4717, "step": 42801 }, { "epoch": 0.90776441644928, "grad_norm": 0.3418351709842682, "learning_rate": 1.1462119151693596e-05, "loss": 0.4685, "step": 42802 }, { "epoch": 0.907785624907213, "grad_norm": 0.4093749523162842, "learning_rate": 1.1461789239533168e-05, "loss": 0.4729, "step": 42803 }, { "epoch": 0.907806833365146, "grad_norm": 0.3388538658618927, "learning_rate": 1.1461459325746948e-05, "loss": 0.4006, "step": 42804 }, { "epoch": 0.9078280418230791, "grad_norm": 0.36423978209495544, "learning_rate": 1.1461129410335308e-05, "loss": 0.6059, "step": 42805 }, { "epoch": 0.907849250281012, "grad_norm": 0.4648432731628418, "learning_rate": 1.1460799493298612e-05, "loss": 0.4563, "step": 42806 }, { "epoch": 0.9078704587389451, "grad_norm": 0.3624851703643799, "learning_rate": 1.1460469574637226e-05, "loss": 0.532, "step": 42807 }, { "epoch": 0.9078916671968781, "grad_norm": 0.39137300848960876, "learning_rate": 1.1460139654351521e-05, "loss": 0.4558, "step": 42808 }, { "epoch": 0.9079128756548112, "grad_norm": 0.38760435581207275, "learning_rate": 1.145980973244186e-05, "loss": 0.5072, "step": 42809 }, { "epoch": 0.9079340841127441, "grad_norm": 0.3438151180744171, "learning_rate": 1.1459479808908611e-05, "loss": 0.477, "step": 42810 }, { "epoch": 0.9079552925706772, "grad_norm": 0.3373314142227173, "learning_rate": 1.1459149883752141e-05, "loss": 0.4249, "step": 42811 }, { "epoch": 0.9079765010286102, "grad_norm": 0.4229423701763153, "learning_rate": 1.1458819956972818e-05, "loss": 0.5491, "step": 42812 }, { "epoch": 0.9079977094865432, "grad_norm": 0.3447062373161316, "learning_rate": 1.1458490028571006e-05, "loss": 0.5203, "step": 42813 }, { "epoch": 0.9080189179444763, "grad_norm": 0.3612799644470215, "learning_rate": 1.1458160098547078e-05, "loss": 0.4677, "step": 42814 }, { "epoch": 0.9080401264024093, "grad_norm": 0.359422504901886, "learning_rate": 1.1457830166901396e-05, "loss": 0.4876, "step": 42815 }, { "epoch": 0.9080613348603424, "grad_norm": 0.3514341413974762, "learning_rate": 1.1457500233634325e-05, "loss": 0.5166, "step": 42816 }, { "epoch": 0.9080825433182753, "grad_norm": 0.3974412679672241, "learning_rate": 1.145717029874624e-05, "loss": 0.5127, "step": 42817 }, { "epoch": 0.9081037517762084, "grad_norm": 0.36415842175483704, "learning_rate": 1.1456840362237495e-05, "loss": 0.4567, "step": 42818 }, { "epoch": 0.9081249602341414, "grad_norm": 0.4055494964122772, "learning_rate": 1.1456510424108469e-05, "loss": 0.5005, "step": 42819 }, { "epoch": 0.9081461686920744, "grad_norm": 0.34314560890197754, "learning_rate": 1.1456180484359527e-05, "loss": 0.458, "step": 42820 }, { "epoch": 0.9081673771500074, "grad_norm": 0.38364356756210327, "learning_rate": 1.1455850542991031e-05, "loss": 0.5345, "step": 42821 }, { "epoch": 0.9081885856079405, "grad_norm": 0.3775327205657959, "learning_rate": 1.1455520600003354e-05, "loss": 0.5035, "step": 42822 }, { "epoch": 0.9082097940658734, "grad_norm": 0.3121258020401001, "learning_rate": 1.1455190655396857e-05, "loss": 0.4291, "step": 42823 }, { "epoch": 0.9082310025238065, "grad_norm": 0.3455406427383423, "learning_rate": 1.145486070917191e-05, "loss": 0.4485, "step": 42824 }, { "epoch": 0.9082522109817395, "grad_norm": 0.3234366774559021, "learning_rate": 1.1454530761328877e-05, "loss": 0.4033, "step": 42825 }, { "epoch": 0.9082734194396725, "grad_norm": 0.3898514211177826, "learning_rate": 1.145420081186813e-05, "loss": 0.5258, "step": 42826 }, { "epoch": 0.9082946278976055, "grad_norm": 0.4885751008987427, "learning_rate": 1.1453870860790033e-05, "loss": 0.5056, "step": 42827 }, { "epoch": 0.9083158363555386, "grad_norm": 0.3676353693008423, "learning_rate": 1.1453540908094955e-05, "loss": 0.4634, "step": 42828 }, { "epoch": 0.9083370448134717, "grad_norm": 0.36305928230285645, "learning_rate": 1.1453210953783262e-05, "loss": 0.4357, "step": 42829 }, { "epoch": 0.9083582532714046, "grad_norm": 0.4114070236682892, "learning_rate": 1.1452880997855317e-05, "loss": 0.4275, "step": 42830 }, { "epoch": 0.9083794617293377, "grad_norm": 0.37311631441116333, "learning_rate": 1.1452551040311493e-05, "loss": 0.5393, "step": 42831 }, { "epoch": 0.9084006701872707, "grad_norm": 0.3602341413497925, "learning_rate": 1.1452221081152152e-05, "loss": 0.5396, "step": 42832 }, { "epoch": 0.9084218786452037, "grad_norm": 0.37523892521858215, "learning_rate": 1.1451891120377664e-05, "loss": 0.4352, "step": 42833 }, { "epoch": 0.9084430871031367, "grad_norm": 0.3562757670879364, "learning_rate": 1.1451561157988398e-05, "loss": 0.51, "step": 42834 }, { "epoch": 0.9084642955610698, "grad_norm": 0.36615902185440063, "learning_rate": 1.1451231193984715e-05, "loss": 0.5411, "step": 42835 }, { "epoch": 0.9084855040190027, "grad_norm": 0.37570470571517944, "learning_rate": 1.1450901228366986e-05, "loss": 0.4628, "step": 42836 }, { "epoch": 0.9085067124769358, "grad_norm": 0.4400070309638977, "learning_rate": 1.145057126113558e-05, "loss": 0.4359, "step": 42837 }, { "epoch": 0.9085279209348688, "grad_norm": 0.3631763160228729, "learning_rate": 1.1450241292290856e-05, "loss": 0.4701, "step": 42838 }, { "epoch": 0.9085491293928019, "grad_norm": 0.3651108145713806, "learning_rate": 1.1449911321833191e-05, "loss": 0.4223, "step": 42839 }, { "epoch": 0.9085703378507348, "grad_norm": 0.4233524203300476, "learning_rate": 1.1449581349762946e-05, "loss": 0.4543, "step": 42840 }, { "epoch": 0.9085915463086679, "grad_norm": 0.3671538829803467, "learning_rate": 1.1449251376080487e-05, "loss": 0.453, "step": 42841 }, { "epoch": 0.908612754766601, "grad_norm": 0.38455837965011597, "learning_rate": 1.1448921400786186e-05, "loss": 0.4984, "step": 42842 }, { "epoch": 0.9086339632245339, "grad_norm": 0.3994057774543762, "learning_rate": 1.144859142388041e-05, "loss": 0.4779, "step": 42843 }, { "epoch": 0.908655171682467, "grad_norm": 0.3358364999294281, "learning_rate": 1.1448261445363515e-05, "loss": 0.4384, "step": 42844 }, { "epoch": 0.9086763801404, "grad_norm": 0.3664250373840332, "learning_rate": 1.144793146523588e-05, "loss": 0.5646, "step": 42845 }, { "epoch": 0.908697588598333, "grad_norm": 0.32848551869392395, "learning_rate": 1.144760148349787e-05, "loss": 0.4418, "step": 42846 }, { "epoch": 0.908718797056266, "grad_norm": 0.3649522066116333, "learning_rate": 1.1447271500149849e-05, "loss": 0.5454, "step": 42847 }, { "epoch": 0.9087400055141991, "grad_norm": 0.3286939561367035, "learning_rate": 1.1446941515192185e-05, "loss": 0.5524, "step": 42848 }, { "epoch": 0.908761213972132, "grad_norm": 0.3421229422092438, "learning_rate": 1.1446611528625244e-05, "loss": 0.3974, "step": 42849 }, { "epoch": 0.9087824224300651, "grad_norm": 0.44077691435813904, "learning_rate": 1.1446281540449398e-05, "loss": 0.634, "step": 42850 }, { "epoch": 0.9088036308879981, "grad_norm": 0.3683706820011139, "learning_rate": 1.1445951550665007e-05, "loss": 0.5326, "step": 42851 }, { "epoch": 0.9088248393459312, "grad_norm": 0.3540482521057129, "learning_rate": 1.1445621559272441e-05, "loss": 0.4666, "step": 42852 }, { "epoch": 0.9088460478038641, "grad_norm": 0.3592057526111603, "learning_rate": 1.144529156627207e-05, "loss": 0.4538, "step": 42853 }, { "epoch": 0.9088672562617972, "grad_norm": 0.40171822905540466, "learning_rate": 1.1444961571664257e-05, "loss": 0.4997, "step": 42854 }, { "epoch": 0.9088884647197303, "grad_norm": 0.3741883933544159, "learning_rate": 1.1444631575449368e-05, "loss": 0.4955, "step": 42855 }, { "epoch": 0.9089096731776632, "grad_norm": 0.3872653841972351, "learning_rate": 1.1444301577627777e-05, "loss": 0.4575, "step": 42856 }, { "epoch": 0.9089308816355963, "grad_norm": 0.3405812382698059, "learning_rate": 1.1443971578199841e-05, "loss": 0.4623, "step": 42857 }, { "epoch": 0.9089520900935293, "grad_norm": 0.33650338649749756, "learning_rate": 1.1443641577165933e-05, "loss": 0.4145, "step": 42858 }, { "epoch": 0.9089732985514624, "grad_norm": 0.5546278953552246, "learning_rate": 1.1443311574526423e-05, "loss": 0.4755, "step": 42859 }, { "epoch": 0.9089945070093953, "grad_norm": 0.43226367235183716, "learning_rate": 1.1442981570281674e-05, "loss": 0.4718, "step": 42860 }, { "epoch": 0.9090157154673284, "grad_norm": 0.37154674530029297, "learning_rate": 1.1442651564432052e-05, "loss": 0.418, "step": 42861 }, { "epoch": 0.9090369239252614, "grad_norm": 0.4336044490337372, "learning_rate": 1.1442321556977927e-05, "loss": 0.491, "step": 42862 }, { "epoch": 0.9090581323831944, "grad_norm": 0.36016350984573364, "learning_rate": 1.1441991547919662e-05, "loss": 0.483, "step": 42863 }, { "epoch": 0.9090793408411274, "grad_norm": 0.3315054476261139, "learning_rate": 1.1441661537257627e-05, "loss": 0.4918, "step": 42864 }, { "epoch": 0.9091005492990605, "grad_norm": 0.359097421169281, "learning_rate": 1.144133152499219e-05, "loss": 0.4491, "step": 42865 }, { "epoch": 0.9091217577569934, "grad_norm": 0.3854534327983856, "learning_rate": 1.1441001511123715e-05, "loss": 0.463, "step": 42866 }, { "epoch": 0.9091429662149265, "grad_norm": 0.37425336241722107, "learning_rate": 1.144067149565257e-05, "loss": 0.5338, "step": 42867 }, { "epoch": 0.9091641746728595, "grad_norm": 0.36205390095710754, "learning_rate": 1.1440341478579126e-05, "loss": 0.5133, "step": 42868 }, { "epoch": 0.9091853831307926, "grad_norm": 0.3792514503002167, "learning_rate": 1.1440011459903744e-05, "loss": 0.4895, "step": 42869 }, { "epoch": 0.9092065915887256, "grad_norm": 0.5181496739387512, "learning_rate": 1.1439681439626793e-05, "loss": 0.5592, "step": 42870 }, { "epoch": 0.9092278000466586, "grad_norm": 0.49823296070098877, "learning_rate": 1.1439351417748645e-05, "loss": 0.4873, "step": 42871 }, { "epoch": 0.9092490085045917, "grad_norm": 0.4124346673488617, "learning_rate": 1.1439021394269658e-05, "loss": 0.457, "step": 42872 }, { "epoch": 0.9092702169625246, "grad_norm": 0.37164413928985596, "learning_rate": 1.143869136919021e-05, "loss": 0.5047, "step": 42873 }, { "epoch": 0.9092914254204577, "grad_norm": 0.40028122067451477, "learning_rate": 1.1438361342510658e-05, "loss": 0.4595, "step": 42874 }, { "epoch": 0.9093126338783907, "grad_norm": 0.43379339575767517, "learning_rate": 1.143803131423137e-05, "loss": 0.5222, "step": 42875 }, { "epoch": 0.9093338423363237, "grad_norm": 0.3551112711429596, "learning_rate": 1.1437701284352723e-05, "loss": 0.4785, "step": 42876 }, { "epoch": 0.9093550507942567, "grad_norm": 0.34897205233573914, "learning_rate": 1.1437371252875072e-05, "loss": 0.4584, "step": 42877 }, { "epoch": 0.9093762592521898, "grad_norm": 0.3793407082557678, "learning_rate": 1.1437041219798788e-05, "loss": 0.5188, "step": 42878 }, { "epoch": 0.9093974677101228, "grad_norm": 0.3654083013534546, "learning_rate": 1.1436711185124243e-05, "loss": 0.4217, "step": 42879 }, { "epoch": 0.9094186761680558, "grad_norm": 0.3411390781402588, "learning_rate": 1.1436381148851798e-05, "loss": 0.4939, "step": 42880 }, { "epoch": 0.9094398846259888, "grad_norm": 0.3603806793689728, "learning_rate": 1.1436051110981824e-05, "loss": 0.4931, "step": 42881 }, { "epoch": 0.9094610930839219, "grad_norm": 0.3867504298686981, "learning_rate": 1.1435721071514687e-05, "loss": 0.4745, "step": 42882 }, { "epoch": 0.9094823015418549, "grad_norm": 0.36407554149627686, "learning_rate": 1.143539103045075e-05, "loss": 0.4364, "step": 42883 }, { "epoch": 0.9095035099997879, "grad_norm": 0.41479361057281494, "learning_rate": 1.1435060987790388e-05, "loss": 0.5356, "step": 42884 }, { "epoch": 0.909524718457721, "grad_norm": 0.41702765226364136, "learning_rate": 1.143473094353396e-05, "loss": 0.5643, "step": 42885 }, { "epoch": 0.909545926915654, "grad_norm": 0.40185779333114624, "learning_rate": 1.1434400897681838e-05, "loss": 0.467, "step": 42886 }, { "epoch": 0.909567135373587, "grad_norm": 0.34118351340293884, "learning_rate": 1.1434070850234389e-05, "loss": 0.5037, "step": 42887 }, { "epoch": 0.90958834383152, "grad_norm": 0.36032620072364807, "learning_rate": 1.143374080119198e-05, "loss": 0.4854, "step": 42888 }, { "epoch": 0.9096095522894531, "grad_norm": 0.3696405589580536, "learning_rate": 1.1433410750554972e-05, "loss": 0.4867, "step": 42889 }, { "epoch": 0.909630760747386, "grad_norm": 0.34388935565948486, "learning_rate": 1.143308069832374e-05, "loss": 0.4408, "step": 42890 }, { "epoch": 0.9096519692053191, "grad_norm": 0.3755163848400116, "learning_rate": 1.1432750644498648e-05, "loss": 0.4688, "step": 42891 }, { "epoch": 0.9096731776632521, "grad_norm": 0.4052116572856903, "learning_rate": 1.1432420589080065e-05, "loss": 0.545, "step": 42892 }, { "epoch": 0.9096943861211851, "grad_norm": 0.35317572951316833, "learning_rate": 1.1432090532068353e-05, "loss": 0.5361, "step": 42893 }, { "epoch": 0.9097155945791181, "grad_norm": 0.42205315828323364, "learning_rate": 1.1431760473463882e-05, "loss": 0.4938, "step": 42894 }, { "epoch": 0.9097368030370512, "grad_norm": 0.33733007311820984, "learning_rate": 1.1431430413267023e-05, "loss": 0.4867, "step": 42895 }, { "epoch": 0.9097580114949843, "grad_norm": 0.34405213594436646, "learning_rate": 1.1431100351478137e-05, "loss": 0.4363, "step": 42896 }, { "epoch": 0.9097792199529172, "grad_norm": 0.40403372049331665, "learning_rate": 1.1430770288097595e-05, "loss": 0.512, "step": 42897 }, { "epoch": 0.9098004284108503, "grad_norm": 0.37083613872528076, "learning_rate": 1.143044022312576e-05, "loss": 0.413, "step": 42898 }, { "epoch": 0.9098216368687833, "grad_norm": 0.5320044159889221, "learning_rate": 1.1430110156563005e-05, "loss": 0.4809, "step": 42899 }, { "epoch": 0.9098428453267163, "grad_norm": 0.3439125120639801, "learning_rate": 1.1429780088409693e-05, "loss": 0.5108, "step": 42900 }, { "epoch": 0.9098640537846493, "grad_norm": 0.36530932784080505, "learning_rate": 1.1429450018666193e-05, "loss": 0.4627, "step": 42901 }, { "epoch": 0.9098852622425824, "grad_norm": 0.37894922494888306, "learning_rate": 1.1429119947332869e-05, "loss": 0.6117, "step": 42902 }, { "epoch": 0.9099064707005153, "grad_norm": 0.3800247013568878, "learning_rate": 1.1428789874410089e-05, "loss": 0.4767, "step": 42903 }, { "epoch": 0.9099276791584484, "grad_norm": 0.45663541555404663, "learning_rate": 1.1428459799898225e-05, "loss": 0.4778, "step": 42904 }, { "epoch": 0.9099488876163814, "grad_norm": 0.8975268006324768, "learning_rate": 1.142812972379764e-05, "loss": 0.3726, "step": 42905 }, { "epoch": 0.9099700960743144, "grad_norm": 0.4092916250228882, "learning_rate": 1.1427799646108699e-05, "loss": 0.5122, "step": 42906 }, { "epoch": 0.9099913045322474, "grad_norm": 0.338439017534256, "learning_rate": 1.1427469566831776e-05, "loss": 0.471, "step": 42907 }, { "epoch": 0.9100125129901805, "grad_norm": 0.37452831864356995, "learning_rate": 1.1427139485967226e-05, "loss": 0.4372, "step": 42908 }, { "epoch": 0.9100337214481135, "grad_norm": 0.3674863874912262, "learning_rate": 1.1426809403515429e-05, "loss": 0.5164, "step": 42909 }, { "epoch": 0.9100549299060465, "grad_norm": 0.35374709963798523, "learning_rate": 1.1426479319476749e-05, "loss": 0.5119, "step": 42910 }, { "epoch": 0.9100761383639796, "grad_norm": 0.40886038541793823, "learning_rate": 1.142614923385155e-05, "loss": 0.4672, "step": 42911 }, { "epoch": 0.9100973468219126, "grad_norm": 0.36954089999198914, "learning_rate": 1.1425819146640196e-05, "loss": 0.5557, "step": 42912 }, { "epoch": 0.9101185552798456, "grad_norm": 0.32697591185569763, "learning_rate": 1.1425489057843063e-05, "loss": 0.445, "step": 42913 }, { "epoch": 0.9101397637377786, "grad_norm": 0.38862454891204834, "learning_rate": 1.1425158967460511e-05, "loss": 0.4269, "step": 42914 }, { "epoch": 0.9101609721957117, "grad_norm": 0.4518575966358185, "learning_rate": 1.1424828875492908e-05, "loss": 0.4265, "step": 42915 }, { "epoch": 0.9101821806536446, "grad_norm": 0.49609142541885376, "learning_rate": 1.1424498781940627e-05, "loss": 0.5357, "step": 42916 }, { "epoch": 0.9102033891115777, "grad_norm": 0.3913215100765228, "learning_rate": 1.1424168686804026e-05, "loss": 0.4646, "step": 42917 }, { "epoch": 0.9102245975695107, "grad_norm": 0.40269017219543457, "learning_rate": 1.1423838590083483e-05, "loss": 0.5734, "step": 42918 }, { "epoch": 0.9102458060274438, "grad_norm": 0.38779449462890625, "learning_rate": 1.1423508491779355e-05, "loss": 0.4977, "step": 42919 }, { "epoch": 0.9102670144853767, "grad_norm": 0.36988016963005066, "learning_rate": 1.1423178391892013e-05, "loss": 0.514, "step": 42920 }, { "epoch": 0.9102882229433098, "grad_norm": 0.37601563334465027, "learning_rate": 1.1422848290421826e-05, "loss": 0.4874, "step": 42921 }, { "epoch": 0.9103094314012428, "grad_norm": 0.37212178111076355, "learning_rate": 1.142251818736916e-05, "loss": 0.4888, "step": 42922 }, { "epoch": 0.9103306398591758, "grad_norm": 0.41068771481513977, "learning_rate": 1.1422188082734378e-05, "loss": 0.4776, "step": 42923 }, { "epoch": 0.9103518483171089, "grad_norm": 0.3707561492919922, "learning_rate": 1.1421857976517856e-05, "loss": 0.3735, "step": 42924 }, { "epoch": 0.9103730567750419, "grad_norm": 0.38185542821884155, "learning_rate": 1.1421527868719953e-05, "loss": 0.4886, "step": 42925 }, { "epoch": 0.910394265232975, "grad_norm": 0.4425688683986664, "learning_rate": 1.1421197759341038e-05, "loss": 0.4494, "step": 42926 }, { "epoch": 0.9104154736909079, "grad_norm": 0.35514315962791443, "learning_rate": 1.1420867648381482e-05, "loss": 0.5006, "step": 42927 }, { "epoch": 0.910436682148841, "grad_norm": 0.37840285897254944, "learning_rate": 1.1420537535841646e-05, "loss": 0.4726, "step": 42928 }, { "epoch": 0.910457890606774, "grad_norm": 0.3726578950881958, "learning_rate": 1.1420207421721903e-05, "loss": 0.5068, "step": 42929 }, { "epoch": 0.910479099064707, "grad_norm": 0.3792516887187958, "learning_rate": 1.1419877306022617e-05, "loss": 0.4792, "step": 42930 }, { "epoch": 0.91050030752264, "grad_norm": 0.34019020199775696, "learning_rate": 1.1419547188744155e-05, "loss": 0.4623, "step": 42931 }, { "epoch": 0.9105215159805731, "grad_norm": 0.3833669126033783, "learning_rate": 1.1419217069886885e-05, "loss": 0.4919, "step": 42932 }, { "epoch": 0.910542724438506, "grad_norm": 0.38639795780181885, "learning_rate": 1.1418886949451177e-05, "loss": 0.4153, "step": 42933 }, { "epoch": 0.9105639328964391, "grad_norm": 0.3406654894351959, "learning_rate": 1.141855682743739e-05, "loss": 0.4608, "step": 42934 }, { "epoch": 0.9105851413543721, "grad_norm": 0.41195428371429443, "learning_rate": 1.14182267038459e-05, "loss": 0.4384, "step": 42935 }, { "epoch": 0.9106063498123051, "grad_norm": 0.33845746517181396, "learning_rate": 1.1417896578677072e-05, "loss": 0.4644, "step": 42936 }, { "epoch": 0.9106275582702382, "grad_norm": 0.35424524545669556, "learning_rate": 1.1417566451931267e-05, "loss": 0.4333, "step": 42937 }, { "epoch": 0.9106487667281712, "grad_norm": 0.3006996810436249, "learning_rate": 1.1417236323608863e-05, "loss": 0.3901, "step": 42938 }, { "epoch": 0.9106699751861043, "grad_norm": 0.34068065881729126, "learning_rate": 1.1416906193710218e-05, "loss": 0.5107, "step": 42939 }, { "epoch": 0.9106911836440372, "grad_norm": 0.37715351581573486, "learning_rate": 1.14165760622357e-05, "loss": 0.5243, "step": 42940 }, { "epoch": 0.9107123921019703, "grad_norm": 0.3826509118080139, "learning_rate": 1.141624592918568e-05, "loss": 0.5182, "step": 42941 }, { "epoch": 0.9107336005599033, "grad_norm": 0.40469691157341003, "learning_rate": 1.1415915794560526e-05, "loss": 0.4804, "step": 42942 }, { "epoch": 0.9107548090178363, "grad_norm": 0.38155728578567505, "learning_rate": 1.1415585658360599e-05, "loss": 0.4297, "step": 42943 }, { "epoch": 0.9107760174757693, "grad_norm": 0.32947686314582825, "learning_rate": 1.1415255520586272e-05, "loss": 0.4302, "step": 42944 }, { "epoch": 0.9107972259337024, "grad_norm": 0.39305898547172546, "learning_rate": 1.141492538123791e-05, "loss": 0.5746, "step": 42945 }, { "epoch": 0.9108184343916353, "grad_norm": 0.3967646062374115, "learning_rate": 1.141459524031588e-05, "loss": 0.4828, "step": 42946 }, { "epoch": 0.9108396428495684, "grad_norm": 0.4086902141571045, "learning_rate": 1.141426509782055e-05, "loss": 0.5138, "step": 42947 }, { "epoch": 0.9108608513075014, "grad_norm": 0.35698792338371277, "learning_rate": 1.1413934953752284e-05, "loss": 0.4409, "step": 42948 }, { "epoch": 0.9108820597654345, "grad_norm": 0.3705235719680786, "learning_rate": 1.1413604808111456e-05, "loss": 0.4938, "step": 42949 }, { "epoch": 0.9109032682233674, "grad_norm": 0.34134870767593384, "learning_rate": 1.1413274660898428e-05, "loss": 0.5049, "step": 42950 }, { "epoch": 0.9109244766813005, "grad_norm": 0.49363523721694946, "learning_rate": 1.1412944512113566e-05, "loss": 0.5588, "step": 42951 }, { "epoch": 0.9109456851392336, "grad_norm": 0.35978248715400696, "learning_rate": 1.1412614361757241e-05, "loss": 0.4813, "step": 42952 }, { "epoch": 0.9109668935971665, "grad_norm": 0.5031745433807373, "learning_rate": 1.1412284209829818e-05, "loss": 0.4686, "step": 42953 }, { "epoch": 0.9109881020550996, "grad_norm": 0.34706392884254456, "learning_rate": 1.1411954056331663e-05, "loss": 0.5123, "step": 42954 }, { "epoch": 0.9110093105130326, "grad_norm": 0.35453903675079346, "learning_rate": 1.1411623901263146e-05, "loss": 0.4738, "step": 42955 }, { "epoch": 0.9110305189709657, "grad_norm": 0.40773749351501465, "learning_rate": 1.1411293744624636e-05, "loss": 0.48, "step": 42956 }, { "epoch": 0.9110517274288986, "grad_norm": 0.313192218542099, "learning_rate": 1.1410963586416495e-05, "loss": 0.3731, "step": 42957 }, { "epoch": 0.9110729358868317, "grad_norm": 0.35959240794181824, "learning_rate": 1.1410633426639095e-05, "loss": 0.5185, "step": 42958 }, { "epoch": 0.9110941443447647, "grad_norm": 0.3746204078197479, "learning_rate": 1.1410303265292797e-05, "loss": 0.4324, "step": 42959 }, { "epoch": 0.9111153528026977, "grad_norm": 0.3414975106716156, "learning_rate": 1.1409973102377974e-05, "loss": 0.5215, "step": 42960 }, { "epoch": 0.9111365612606307, "grad_norm": 0.39171555638313293, "learning_rate": 1.1409642937894991e-05, "loss": 0.4451, "step": 42961 }, { "epoch": 0.9111577697185638, "grad_norm": 0.4290499985218048, "learning_rate": 1.1409312771844215e-05, "loss": 0.5437, "step": 42962 }, { "epoch": 0.9111789781764967, "grad_norm": 0.34269294142723083, "learning_rate": 1.1408982604226012e-05, "loss": 0.5174, "step": 42963 }, { "epoch": 0.9112001866344298, "grad_norm": 0.3453727066516876, "learning_rate": 1.1408652435040755e-05, "loss": 0.4359, "step": 42964 }, { "epoch": 0.9112213950923629, "grad_norm": 0.3881259560585022, "learning_rate": 1.1408322264288802e-05, "loss": 0.4828, "step": 42965 }, { "epoch": 0.9112426035502958, "grad_norm": 0.3807469606399536, "learning_rate": 1.1407992091970527e-05, "loss": 0.4817, "step": 42966 }, { "epoch": 0.9112638120082289, "grad_norm": 0.9045802354812622, "learning_rate": 1.1407661918086298e-05, "loss": 0.4991, "step": 42967 }, { "epoch": 0.9112850204661619, "grad_norm": 0.3824078440666199, "learning_rate": 1.1407331742636479e-05, "loss": 0.4469, "step": 42968 }, { "epoch": 0.911306228924095, "grad_norm": 0.35099372267723083, "learning_rate": 1.1407001565621437e-05, "loss": 0.4687, "step": 42969 }, { "epoch": 0.9113274373820279, "grad_norm": 0.3604249060153961, "learning_rate": 1.1406671387041542e-05, "loss": 0.5411, "step": 42970 }, { "epoch": 0.911348645839961, "grad_norm": 0.4006010890007019, "learning_rate": 1.1406341206897154e-05, "loss": 0.5674, "step": 42971 }, { "epoch": 0.911369854297894, "grad_norm": 0.33691802620887756, "learning_rate": 1.140601102518865e-05, "loss": 0.4876, "step": 42972 }, { "epoch": 0.911391062755827, "grad_norm": 0.37108057737350464, "learning_rate": 1.140568084191639e-05, "loss": 0.4688, "step": 42973 }, { "epoch": 0.91141227121376, "grad_norm": 0.35840651392936707, "learning_rate": 1.1405350657080744e-05, "loss": 0.5218, "step": 42974 }, { "epoch": 0.9114334796716931, "grad_norm": 0.39758768677711487, "learning_rate": 1.1405020470682082e-05, "loss": 0.4142, "step": 42975 }, { "epoch": 0.911454688129626, "grad_norm": 0.40232744812965393, "learning_rate": 1.1404690282720767e-05, "loss": 0.5247, "step": 42976 }, { "epoch": 0.9114758965875591, "grad_norm": 0.353561133146286, "learning_rate": 1.140436009319717e-05, "loss": 0.4803, "step": 42977 }, { "epoch": 0.9114971050454922, "grad_norm": 0.34838318824768066, "learning_rate": 1.1404029902111653e-05, "loss": 0.5367, "step": 42978 }, { "epoch": 0.9115183135034252, "grad_norm": 0.33684033155441284, "learning_rate": 1.1403699709464583e-05, "loss": 0.4426, "step": 42979 }, { "epoch": 0.9115395219613582, "grad_norm": 0.42788195610046387, "learning_rate": 1.1403369515256335e-05, "loss": 0.4864, "step": 42980 }, { "epoch": 0.9115607304192912, "grad_norm": 0.3743428885936737, "learning_rate": 1.1403039319487273e-05, "loss": 0.4109, "step": 42981 }, { "epoch": 0.9115819388772243, "grad_norm": 0.34348949790000916, "learning_rate": 1.1402709122157758e-05, "loss": 0.4571, "step": 42982 }, { "epoch": 0.9116031473351572, "grad_norm": 0.34028148651123047, "learning_rate": 1.1402378923268167e-05, "loss": 0.47, "step": 42983 }, { "epoch": 0.9116243557930903, "grad_norm": 0.33363306522369385, "learning_rate": 1.1402048722818862e-05, "loss": 0.354, "step": 42984 }, { "epoch": 0.9116455642510233, "grad_norm": 0.38464799523353577, "learning_rate": 1.1401718520810206e-05, "loss": 0.4789, "step": 42985 }, { "epoch": 0.9116667727089564, "grad_norm": 0.35579949617385864, "learning_rate": 1.1401388317242571e-05, "loss": 0.4529, "step": 42986 }, { "epoch": 0.9116879811668893, "grad_norm": 0.34324711561203003, "learning_rate": 1.1401058112116327e-05, "loss": 0.4981, "step": 42987 }, { "epoch": 0.9117091896248224, "grad_norm": 0.33742979168891907, "learning_rate": 1.1400727905431838e-05, "loss": 0.4815, "step": 42988 }, { "epoch": 0.9117303980827554, "grad_norm": 0.38900935649871826, "learning_rate": 1.1400397697189473e-05, "loss": 0.5233, "step": 42989 }, { "epoch": 0.9117516065406884, "grad_norm": 0.430690199136734, "learning_rate": 1.1400067487389594e-05, "loss": 0.5373, "step": 42990 }, { "epoch": 0.9117728149986214, "grad_norm": 0.37797850370407104, "learning_rate": 1.1399737276032576e-05, "loss": 0.4736, "step": 42991 }, { "epoch": 0.9117940234565545, "grad_norm": 0.35016435384750366, "learning_rate": 1.1399407063118782e-05, "loss": 0.4892, "step": 42992 }, { "epoch": 0.9118152319144875, "grad_norm": 0.39091017842292786, "learning_rate": 1.1399076848648576e-05, "loss": 0.5154, "step": 42993 }, { "epoch": 0.9118364403724205, "grad_norm": 0.3314586281776428, "learning_rate": 1.1398746632622332e-05, "loss": 0.4513, "step": 42994 }, { "epoch": 0.9118576488303536, "grad_norm": 0.3791496455669403, "learning_rate": 1.1398416415040414e-05, "loss": 0.5823, "step": 42995 }, { "epoch": 0.9118788572882865, "grad_norm": 0.367191880941391, "learning_rate": 1.1398086195903188e-05, "loss": 0.432, "step": 42996 }, { "epoch": 0.9119000657462196, "grad_norm": 0.40184465050697327, "learning_rate": 1.1397755975211024e-05, "loss": 0.436, "step": 42997 }, { "epoch": 0.9119212742041526, "grad_norm": 0.360238641500473, "learning_rate": 1.139742575296429e-05, "loss": 0.4765, "step": 42998 }, { "epoch": 0.9119424826620857, "grad_norm": 0.4047694206237793, "learning_rate": 1.1397095529163345e-05, "loss": 0.5372, "step": 42999 }, { "epoch": 0.9119636911200186, "grad_norm": 0.49646276235580444, "learning_rate": 1.1396765303808568e-05, "loss": 0.4705, "step": 43000 }, { "epoch": 0.9119848995779517, "grad_norm": 0.3564465045928955, "learning_rate": 1.1396435076900318e-05, "loss": 0.4368, "step": 43001 }, { "epoch": 0.9120061080358847, "grad_norm": 0.3731026351451874, "learning_rate": 1.1396104848438966e-05, "loss": 0.5092, "step": 43002 }, { "epoch": 0.9120273164938177, "grad_norm": 0.41585665941238403, "learning_rate": 1.139577461842488e-05, "loss": 0.5609, "step": 43003 }, { "epoch": 0.9120485249517507, "grad_norm": 0.6937873959541321, "learning_rate": 1.1395444386858422e-05, "loss": 0.4154, "step": 43004 }, { "epoch": 0.9120697334096838, "grad_norm": 0.33053526282310486, "learning_rate": 1.1395114153739963e-05, "loss": 0.4563, "step": 43005 }, { "epoch": 0.9120909418676169, "grad_norm": 0.3087937831878662, "learning_rate": 1.1394783919069874e-05, "loss": 0.4231, "step": 43006 }, { "epoch": 0.9121121503255498, "grad_norm": 0.3162006735801697, "learning_rate": 1.1394453682848516e-05, "loss": 0.4697, "step": 43007 }, { "epoch": 0.9121333587834829, "grad_norm": 0.35244131088256836, "learning_rate": 1.139412344507626e-05, "loss": 0.4549, "step": 43008 }, { "epoch": 0.9121545672414159, "grad_norm": 0.36097127199172974, "learning_rate": 1.1393793205753471e-05, "loss": 0.4494, "step": 43009 }, { "epoch": 0.9121757756993489, "grad_norm": 0.3470951318740845, "learning_rate": 1.1393462964880517e-05, "loss": 0.4342, "step": 43010 }, { "epoch": 0.9121969841572819, "grad_norm": 0.38002076745033264, "learning_rate": 1.1393132722457766e-05, "loss": 0.5078, "step": 43011 }, { "epoch": 0.912218192615215, "grad_norm": 0.34661218523979187, "learning_rate": 1.1392802478485586e-05, "loss": 0.5027, "step": 43012 }, { "epoch": 0.9122394010731479, "grad_norm": 0.31954285502433777, "learning_rate": 1.139247223296434e-05, "loss": 0.4261, "step": 43013 }, { "epoch": 0.912260609531081, "grad_norm": 0.35925206542015076, "learning_rate": 1.13921419858944e-05, "loss": 0.4521, "step": 43014 }, { "epoch": 0.912281817989014, "grad_norm": 0.3815096318721771, "learning_rate": 1.1391811737276136e-05, "loss": 0.4046, "step": 43015 }, { "epoch": 0.912303026446947, "grad_norm": 0.34342601895332336, "learning_rate": 1.1391481487109905e-05, "loss": 0.5116, "step": 43016 }, { "epoch": 0.91232423490488, "grad_norm": 0.3787497580051422, "learning_rate": 1.1391151235396085e-05, "loss": 0.4054, "step": 43017 }, { "epoch": 0.9123454433628131, "grad_norm": 0.4067951440811157, "learning_rate": 1.1390820982135034e-05, "loss": 0.4527, "step": 43018 }, { "epoch": 0.9123666518207462, "grad_norm": 0.37472328543663025, "learning_rate": 1.1390490727327125e-05, "loss": 0.5107, "step": 43019 }, { "epoch": 0.9123878602786791, "grad_norm": 0.33160334825515747, "learning_rate": 1.1390160470972728e-05, "loss": 0.4738, "step": 43020 }, { "epoch": 0.9124090687366122, "grad_norm": 0.42204734683036804, "learning_rate": 1.1389830213072204e-05, "loss": 0.4232, "step": 43021 }, { "epoch": 0.9124302771945452, "grad_norm": 0.3848256766796112, "learning_rate": 1.1389499953625924e-05, "loss": 0.4539, "step": 43022 }, { "epoch": 0.9124514856524782, "grad_norm": 0.32546547055244446, "learning_rate": 1.1389169692634255e-05, "loss": 0.4113, "step": 43023 }, { "epoch": 0.9124726941104112, "grad_norm": 0.4079683721065521, "learning_rate": 1.138883943009756e-05, "loss": 0.4939, "step": 43024 }, { "epoch": 0.9124939025683443, "grad_norm": 0.4443478584289551, "learning_rate": 1.138850916601621e-05, "loss": 0.4504, "step": 43025 }, { "epoch": 0.9125151110262772, "grad_norm": 0.3767159581184387, "learning_rate": 1.1388178900390577e-05, "loss": 0.4372, "step": 43026 }, { "epoch": 0.9125363194842103, "grad_norm": 0.47159454226493835, "learning_rate": 1.1387848633221019e-05, "loss": 0.3933, "step": 43027 }, { "epoch": 0.9125575279421433, "grad_norm": 0.4138416647911072, "learning_rate": 1.138751836450791e-05, "loss": 0.5055, "step": 43028 }, { "epoch": 0.9125787364000764, "grad_norm": 0.32827529311180115, "learning_rate": 1.1387188094251615e-05, "loss": 0.4901, "step": 43029 }, { "epoch": 0.9125999448580093, "grad_norm": 0.3272095024585724, "learning_rate": 1.1386857822452497e-05, "loss": 0.4171, "step": 43030 }, { "epoch": 0.9126211533159424, "grad_norm": 0.4602898061275482, "learning_rate": 1.1386527549110933e-05, "loss": 0.5083, "step": 43031 }, { "epoch": 0.9126423617738755, "grad_norm": 0.3472025394439697, "learning_rate": 1.1386197274227286e-05, "loss": 0.5133, "step": 43032 }, { "epoch": 0.9126635702318084, "grad_norm": 0.3709583282470703, "learning_rate": 1.1385866997801918e-05, "loss": 0.5238, "step": 43033 }, { "epoch": 0.9126847786897415, "grad_norm": 0.4682452976703644, "learning_rate": 1.1385536719835204e-05, "loss": 0.4739, "step": 43034 }, { "epoch": 0.9127059871476745, "grad_norm": 0.3972998261451721, "learning_rate": 1.1385206440327508e-05, "loss": 0.4866, "step": 43035 }, { "epoch": 0.9127271956056076, "grad_norm": 0.35117727518081665, "learning_rate": 1.1384876159279197e-05, "loss": 0.5424, "step": 43036 }, { "epoch": 0.9127484040635405, "grad_norm": 0.35933318734169006, "learning_rate": 1.1384545876690636e-05, "loss": 0.4756, "step": 43037 }, { "epoch": 0.9127696125214736, "grad_norm": 0.37162697315216064, "learning_rate": 1.1384215592562198e-05, "loss": 0.5068, "step": 43038 }, { "epoch": 0.9127908209794066, "grad_norm": 0.3511698246002197, "learning_rate": 1.1383885306894247e-05, "loss": 0.4469, "step": 43039 }, { "epoch": 0.9128120294373396, "grad_norm": 0.3528377413749695, "learning_rate": 1.138355501968715e-05, "loss": 0.4713, "step": 43040 }, { "epoch": 0.9128332378952726, "grad_norm": 0.43095552921295166, "learning_rate": 1.1383224730941275e-05, "loss": 0.498, "step": 43041 }, { "epoch": 0.9128544463532057, "grad_norm": 0.3724808692932129, "learning_rate": 1.1382894440656993e-05, "loss": 0.5208, "step": 43042 }, { "epoch": 0.9128756548111386, "grad_norm": 0.4210536777973175, "learning_rate": 1.1382564148834663e-05, "loss": 0.5196, "step": 43043 }, { "epoch": 0.9128968632690717, "grad_norm": 0.3574357032775879, "learning_rate": 1.1382233855474658e-05, "loss": 0.4563, "step": 43044 }, { "epoch": 0.9129180717270047, "grad_norm": 0.4585782289505005, "learning_rate": 1.1381903560577348e-05, "loss": 0.5411, "step": 43045 }, { "epoch": 0.9129392801849378, "grad_norm": 0.3825102150440216, "learning_rate": 1.1381573264143095e-05, "loss": 0.4148, "step": 43046 }, { "epoch": 0.9129604886428708, "grad_norm": 0.3348381221294403, "learning_rate": 1.1381242966172265e-05, "loss": 0.4851, "step": 43047 }, { "epoch": 0.9129816971008038, "grad_norm": 0.4648350775241852, "learning_rate": 1.1380912666665234e-05, "loss": 0.5052, "step": 43048 }, { "epoch": 0.9130029055587369, "grad_norm": 0.3803057372570038, "learning_rate": 1.1380582365622361e-05, "loss": 0.5302, "step": 43049 }, { "epoch": 0.9130241140166698, "grad_norm": 0.39360183477401733, "learning_rate": 1.1380252063044016e-05, "loss": 0.3889, "step": 43050 }, { "epoch": 0.9130453224746029, "grad_norm": 0.3828537166118622, "learning_rate": 1.1379921758930567e-05, "loss": 0.4959, "step": 43051 }, { "epoch": 0.9130665309325359, "grad_norm": 0.3995368182659149, "learning_rate": 1.1379591453282381e-05, "loss": 0.51, "step": 43052 }, { "epoch": 0.9130877393904689, "grad_norm": 0.4016930162906647, "learning_rate": 1.1379261146099825e-05, "loss": 0.5271, "step": 43053 }, { "epoch": 0.9131089478484019, "grad_norm": 0.3476942479610443, "learning_rate": 1.137893083738327e-05, "loss": 0.4766, "step": 43054 }, { "epoch": 0.913130156306335, "grad_norm": 0.3541795015335083, "learning_rate": 1.1378600527133075e-05, "loss": 0.4808, "step": 43055 }, { "epoch": 0.913151364764268, "grad_norm": 0.4037705659866333, "learning_rate": 1.1378270215349615e-05, "loss": 0.4464, "step": 43056 }, { "epoch": 0.913172573222201, "grad_norm": 0.34654098749160767, "learning_rate": 1.1377939902033255e-05, "loss": 0.4872, "step": 43057 }, { "epoch": 0.913193781680134, "grad_norm": 0.3324229121208191, "learning_rate": 1.1377609587184361e-05, "loss": 0.4226, "step": 43058 }, { "epoch": 0.9132149901380671, "grad_norm": 0.3741917312145233, "learning_rate": 1.1377279270803304e-05, "loss": 0.5056, "step": 43059 }, { "epoch": 0.9132361985960001, "grad_norm": 0.41385653614997864, "learning_rate": 1.1376948952890447e-05, "loss": 0.4493, "step": 43060 }, { "epoch": 0.9132574070539331, "grad_norm": 0.34370487928390503, "learning_rate": 1.1376618633446159e-05, "loss": 0.4467, "step": 43061 }, { "epoch": 0.9132786155118662, "grad_norm": 0.3404875695705414, "learning_rate": 1.1376288312470812e-05, "loss": 0.4623, "step": 43062 }, { "epoch": 0.9132998239697991, "grad_norm": 0.34777382016181946, "learning_rate": 1.1375957989964763e-05, "loss": 0.4687, "step": 43063 }, { "epoch": 0.9133210324277322, "grad_norm": 0.3643876910209656, "learning_rate": 1.1375627665928386e-05, "loss": 0.5534, "step": 43064 }, { "epoch": 0.9133422408856652, "grad_norm": 0.40363627672195435, "learning_rate": 1.1375297340362053e-05, "loss": 0.4487, "step": 43065 }, { "epoch": 0.9133634493435983, "grad_norm": 0.35458943247795105, "learning_rate": 1.1374967013266123e-05, "loss": 0.4973, "step": 43066 }, { "epoch": 0.9133846578015312, "grad_norm": 0.35961440205574036, "learning_rate": 1.1374636684640964e-05, "loss": 0.4249, "step": 43067 }, { "epoch": 0.9134058662594643, "grad_norm": 0.3482266664505005, "learning_rate": 1.1374306354486953e-05, "loss": 0.4736, "step": 43068 }, { "epoch": 0.9134270747173973, "grad_norm": 0.3560916483402252, "learning_rate": 1.1373976022804445e-05, "loss": 0.4504, "step": 43069 }, { "epoch": 0.9134482831753303, "grad_norm": 0.3788173794746399, "learning_rate": 1.1373645689593812e-05, "loss": 0.4722, "step": 43070 }, { "epoch": 0.9134694916332633, "grad_norm": 0.35042402148246765, "learning_rate": 1.1373315354855425e-05, "loss": 0.4632, "step": 43071 }, { "epoch": 0.9134907000911964, "grad_norm": 0.3281191289424896, "learning_rate": 1.1372985018589646e-05, "loss": 0.4178, "step": 43072 }, { "epoch": 0.9135119085491294, "grad_norm": 0.3627420961856842, "learning_rate": 1.1372654680796849e-05, "loss": 0.4729, "step": 43073 }, { "epoch": 0.9135331170070624, "grad_norm": 0.4345669746398926, "learning_rate": 1.1372324341477396e-05, "loss": 0.4769, "step": 43074 }, { "epoch": 0.9135543254649955, "grad_norm": 0.3682572841644287, "learning_rate": 1.1371994000631652e-05, "loss": 0.4378, "step": 43075 }, { "epoch": 0.9135755339229285, "grad_norm": 0.3483439087867737, "learning_rate": 1.1371663658259993e-05, "loss": 0.4674, "step": 43076 }, { "epoch": 0.9135967423808615, "grad_norm": 0.3630687892436981, "learning_rate": 1.1371333314362781e-05, "loss": 0.5868, "step": 43077 }, { "epoch": 0.9136179508387945, "grad_norm": 0.32997557520866394, "learning_rate": 1.1371002968940381e-05, "loss": 0.4194, "step": 43078 }, { "epoch": 0.9136391592967276, "grad_norm": 0.40392443537712097, "learning_rate": 1.1370672621993166e-05, "loss": 0.4684, "step": 43079 }, { "epoch": 0.9136603677546605, "grad_norm": 0.361687570810318, "learning_rate": 1.1370342273521503e-05, "loss": 0.5072, "step": 43080 }, { "epoch": 0.9136815762125936, "grad_norm": 0.3803098499774933, "learning_rate": 1.1370011923525753e-05, "loss": 0.4894, "step": 43081 }, { "epoch": 0.9137027846705266, "grad_norm": 0.36587005853652954, "learning_rate": 1.1369681572006286e-05, "loss": 0.4783, "step": 43082 }, { "epoch": 0.9137239931284596, "grad_norm": 0.35303589701652527, "learning_rate": 1.1369351218963476e-05, "loss": 0.4734, "step": 43083 }, { "epoch": 0.9137452015863926, "grad_norm": 0.3470208942890167, "learning_rate": 1.1369020864397682e-05, "loss": 0.4705, "step": 43084 }, { "epoch": 0.9137664100443257, "grad_norm": 0.4119058847427368, "learning_rate": 1.1368690508309277e-05, "loss": 0.597, "step": 43085 }, { "epoch": 0.9137876185022586, "grad_norm": 0.34469422698020935, "learning_rate": 1.1368360150698625e-05, "loss": 0.4574, "step": 43086 }, { "epoch": 0.9138088269601917, "grad_norm": 0.45265766978263855, "learning_rate": 1.1368029791566097e-05, "loss": 0.478, "step": 43087 }, { "epoch": 0.9138300354181248, "grad_norm": 0.3799555003643036, "learning_rate": 1.1367699430912056e-05, "loss": 0.4881, "step": 43088 }, { "epoch": 0.9138512438760578, "grad_norm": 0.48759952187538147, "learning_rate": 1.136736906873687e-05, "loss": 0.4757, "step": 43089 }, { "epoch": 0.9138724523339908, "grad_norm": 0.456903874874115, "learning_rate": 1.1367038705040912e-05, "loss": 0.4971, "step": 43090 }, { "epoch": 0.9138936607919238, "grad_norm": 0.3735664486885071, "learning_rate": 1.1366708339824544e-05, "loss": 0.4897, "step": 43091 }, { "epoch": 0.9139148692498569, "grad_norm": 0.3691999614238739, "learning_rate": 1.1366377973088134e-05, "loss": 0.496, "step": 43092 }, { "epoch": 0.9139360777077898, "grad_norm": 0.33213531970977783, "learning_rate": 1.1366047604832051e-05, "loss": 0.5137, "step": 43093 }, { "epoch": 0.9139572861657229, "grad_norm": 0.379242867231369, "learning_rate": 1.136571723505666e-05, "loss": 0.5276, "step": 43094 }, { "epoch": 0.9139784946236559, "grad_norm": 0.4661176800727844, "learning_rate": 1.136538686376233e-05, "loss": 0.4495, "step": 43095 }, { "epoch": 0.913999703081589, "grad_norm": 0.46700695157051086, "learning_rate": 1.1365056490949433e-05, "loss": 0.4513, "step": 43096 }, { "epoch": 0.9140209115395219, "grad_norm": 0.38000357151031494, "learning_rate": 1.136472611661833e-05, "loss": 0.5067, "step": 43097 }, { "epoch": 0.914042119997455, "grad_norm": 0.3528255224227905, "learning_rate": 1.1364395740769386e-05, "loss": 0.488, "step": 43098 }, { "epoch": 0.914063328455388, "grad_norm": 0.4083947241306305, "learning_rate": 1.136406536340298e-05, "loss": 0.4721, "step": 43099 }, { "epoch": 0.914084536913321, "grad_norm": 0.37141329050064087, "learning_rate": 1.1363734984519468e-05, "loss": 0.4878, "step": 43100 }, { "epoch": 0.9141057453712541, "grad_norm": 0.4676086902618408, "learning_rate": 1.1363404604119222e-05, "loss": 0.5294, "step": 43101 }, { "epoch": 0.9141269538291871, "grad_norm": 0.4235634207725525, "learning_rate": 1.136307422220261e-05, "loss": 0.529, "step": 43102 }, { "epoch": 0.9141481622871201, "grad_norm": 0.44236406683921814, "learning_rate": 1.1362743838769998e-05, "loss": 0.5594, "step": 43103 }, { "epoch": 0.9141693707450531, "grad_norm": 0.5126263499259949, "learning_rate": 1.1362413453821754e-05, "loss": 0.4647, "step": 43104 }, { "epoch": 0.9141905792029862, "grad_norm": 0.3663859963417053, "learning_rate": 1.136208306735825e-05, "loss": 0.5098, "step": 43105 }, { "epoch": 0.9142117876609192, "grad_norm": 0.4459274113178253, "learning_rate": 1.1361752679379842e-05, "loss": 0.5066, "step": 43106 }, { "epoch": 0.9142329961188522, "grad_norm": 0.4244026839733124, "learning_rate": 1.1361422289886907e-05, "loss": 0.5078, "step": 43107 }, { "epoch": 0.9142542045767852, "grad_norm": 0.39461594820022583, "learning_rate": 1.1361091898879813e-05, "loss": 0.458, "step": 43108 }, { "epoch": 0.9142754130347183, "grad_norm": 0.3368065357208252, "learning_rate": 1.1360761506358921e-05, "loss": 0.517, "step": 43109 }, { "epoch": 0.9142966214926512, "grad_norm": 0.33502286672592163, "learning_rate": 1.1360431112324604e-05, "loss": 0.4758, "step": 43110 }, { "epoch": 0.9143178299505843, "grad_norm": 2.0052318572998047, "learning_rate": 1.1360100716777227e-05, "loss": 0.5134, "step": 43111 }, { "epoch": 0.9143390384085173, "grad_norm": 0.3853219747543335, "learning_rate": 1.1359770319717155e-05, "loss": 0.4982, "step": 43112 }, { "epoch": 0.9143602468664503, "grad_norm": 0.35083651542663574, "learning_rate": 1.1359439921144764e-05, "loss": 0.5147, "step": 43113 }, { "epoch": 0.9143814553243834, "grad_norm": 0.37434273958206177, "learning_rate": 1.1359109521060412e-05, "loss": 0.4105, "step": 43114 }, { "epoch": 0.9144026637823164, "grad_norm": 0.41726627945899963, "learning_rate": 1.1358779119464468e-05, "loss": 0.513, "step": 43115 }, { "epoch": 0.9144238722402495, "grad_norm": 0.34082093834877014, "learning_rate": 1.1358448716357304e-05, "loss": 0.485, "step": 43116 }, { "epoch": 0.9144450806981824, "grad_norm": 0.3617182970046997, "learning_rate": 1.1358118311739285e-05, "loss": 0.4776, "step": 43117 }, { "epoch": 0.9144662891561155, "grad_norm": 0.42840927839279175, "learning_rate": 1.135778790561078e-05, "loss": 0.4626, "step": 43118 }, { "epoch": 0.9144874976140485, "grad_norm": 0.36041685938835144, "learning_rate": 1.1357457497972155e-05, "loss": 0.5115, "step": 43119 }, { "epoch": 0.9145087060719815, "grad_norm": 0.35246753692626953, "learning_rate": 1.1357127088823777e-05, "loss": 0.5109, "step": 43120 }, { "epoch": 0.9145299145299145, "grad_norm": 0.346719890832901, "learning_rate": 1.1356796678166011e-05, "loss": 0.4498, "step": 43121 }, { "epoch": 0.9145511229878476, "grad_norm": 0.3897184729576111, "learning_rate": 1.135646626599923e-05, "loss": 0.5528, "step": 43122 }, { "epoch": 0.9145723314457805, "grad_norm": 0.33615049719810486, "learning_rate": 1.13561358523238e-05, "loss": 0.4359, "step": 43123 }, { "epoch": 0.9145935399037136, "grad_norm": 0.40486541390419006, "learning_rate": 1.1355805437140088e-05, "loss": 0.4533, "step": 43124 }, { "epoch": 0.9146147483616466, "grad_norm": 0.3767361342906952, "learning_rate": 1.1355475020448461e-05, "loss": 0.5108, "step": 43125 }, { "epoch": 0.9146359568195797, "grad_norm": 0.32772424817085266, "learning_rate": 1.1355144602249283e-05, "loss": 0.5016, "step": 43126 }, { "epoch": 0.9146571652775126, "grad_norm": 0.352150559425354, "learning_rate": 1.1354814182542925e-05, "loss": 0.4988, "step": 43127 }, { "epoch": 0.9146783737354457, "grad_norm": 0.4025956690311432, "learning_rate": 1.1354483761329757e-05, "loss": 0.5039, "step": 43128 }, { "epoch": 0.9146995821933788, "grad_norm": 0.3466876149177551, "learning_rate": 1.1354153338610144e-05, "loss": 0.4432, "step": 43129 }, { "epoch": 0.9147207906513117, "grad_norm": 0.3855605125427246, "learning_rate": 1.1353822914384453e-05, "loss": 0.49, "step": 43130 }, { "epoch": 0.9147419991092448, "grad_norm": 0.3637312352657318, "learning_rate": 1.135349248865305e-05, "loss": 0.5335, "step": 43131 }, { "epoch": 0.9147632075671778, "grad_norm": 0.42527395486831665, "learning_rate": 1.1353162061416307e-05, "loss": 0.591, "step": 43132 }, { "epoch": 0.9147844160251108, "grad_norm": 0.3558993339538574, "learning_rate": 1.1352831632674588e-05, "loss": 0.4374, "step": 43133 }, { "epoch": 0.9148056244830438, "grad_norm": 0.3268187642097473, "learning_rate": 1.1352501202428263e-05, "loss": 0.4673, "step": 43134 }, { "epoch": 0.9148268329409769, "grad_norm": 0.39042073488235474, "learning_rate": 1.1352170770677695e-05, "loss": 0.4849, "step": 43135 }, { "epoch": 0.9148480413989099, "grad_norm": 0.36987948417663574, "learning_rate": 1.1351840337423257e-05, "loss": 0.5752, "step": 43136 }, { "epoch": 0.9148692498568429, "grad_norm": 0.43531501293182373, "learning_rate": 1.1351509902665312e-05, "loss": 0.4716, "step": 43137 }, { "epoch": 0.9148904583147759, "grad_norm": 0.37102586030960083, "learning_rate": 1.135117946640423e-05, "loss": 0.5537, "step": 43138 }, { "epoch": 0.914911666772709, "grad_norm": 0.34603479504585266, "learning_rate": 1.1350849028640378e-05, "loss": 0.5042, "step": 43139 }, { "epoch": 0.9149328752306419, "grad_norm": 0.36833134293556213, "learning_rate": 1.1350518589374122e-05, "loss": 0.5326, "step": 43140 }, { "epoch": 0.914954083688575, "grad_norm": 0.41680070757865906, "learning_rate": 1.1350188148605833e-05, "loss": 0.5045, "step": 43141 }, { "epoch": 0.9149752921465081, "grad_norm": 0.40324607491493225, "learning_rate": 1.1349857706335876e-05, "loss": 0.5104, "step": 43142 }, { "epoch": 0.914996500604441, "grad_norm": 0.39302298426628113, "learning_rate": 1.1349527262564619e-05, "loss": 0.5265, "step": 43143 }, { "epoch": 0.9150177090623741, "grad_norm": 0.3473525047302246, "learning_rate": 1.134919681729243e-05, "loss": 0.4648, "step": 43144 }, { "epoch": 0.9150389175203071, "grad_norm": 0.497087299823761, "learning_rate": 1.1348866370519676e-05, "loss": 0.4859, "step": 43145 }, { "epoch": 0.9150601259782402, "grad_norm": 0.4758269786834717, "learning_rate": 1.1348535922246722e-05, "loss": 0.4992, "step": 43146 }, { "epoch": 0.9150813344361731, "grad_norm": 0.33359208703041077, "learning_rate": 1.134820547247394e-05, "loss": 0.5287, "step": 43147 }, { "epoch": 0.9151025428941062, "grad_norm": 0.36693742871284485, "learning_rate": 1.1347875021201697e-05, "loss": 0.4277, "step": 43148 }, { "epoch": 0.9151237513520392, "grad_norm": 0.39104533195495605, "learning_rate": 1.1347544568430355e-05, "loss": 0.4891, "step": 43149 }, { "epoch": 0.9151449598099722, "grad_norm": 0.32951247692108154, "learning_rate": 1.1347214114160292e-05, "loss": 0.4776, "step": 43150 }, { "epoch": 0.9151661682679052, "grad_norm": 0.3057129383087158, "learning_rate": 1.1346883658391862e-05, "loss": 0.4471, "step": 43151 }, { "epoch": 0.9151873767258383, "grad_norm": 0.33322980999946594, "learning_rate": 1.1346553201125443e-05, "loss": 0.4334, "step": 43152 }, { "epoch": 0.9152085851837712, "grad_norm": 0.41086483001708984, "learning_rate": 1.1346222742361401e-05, "loss": 0.4843, "step": 43153 }, { "epoch": 0.9152297936417043, "grad_norm": 0.3654178977012634, "learning_rate": 1.1345892282100099e-05, "loss": 0.4744, "step": 43154 }, { "epoch": 0.9152510020996374, "grad_norm": 0.3507488965988159, "learning_rate": 1.1345561820341911e-05, "loss": 0.4907, "step": 43155 }, { "epoch": 0.9152722105575704, "grad_norm": 0.4877614974975586, "learning_rate": 1.1345231357087199e-05, "loss": 0.4666, "step": 43156 }, { "epoch": 0.9152934190155034, "grad_norm": 0.3632550835609436, "learning_rate": 1.134490089233633e-05, "loss": 0.4845, "step": 43157 }, { "epoch": 0.9153146274734364, "grad_norm": 0.35459500551223755, "learning_rate": 1.1344570426089676e-05, "loss": 0.4811, "step": 43158 }, { "epoch": 0.9153358359313695, "grad_norm": 0.3365909159183502, "learning_rate": 1.13442399583476e-05, "loss": 0.538, "step": 43159 }, { "epoch": 0.9153570443893024, "grad_norm": 0.3584827780723572, "learning_rate": 1.1343909489110474e-05, "loss": 0.5137, "step": 43160 }, { "epoch": 0.9153782528472355, "grad_norm": 0.3938463032245636, "learning_rate": 1.1343579018378665e-05, "loss": 0.4187, "step": 43161 }, { "epoch": 0.9153994613051685, "grad_norm": 0.3529966175556183, "learning_rate": 1.134324854615254e-05, "loss": 0.4423, "step": 43162 }, { "epoch": 0.9154206697631015, "grad_norm": 0.5022026300430298, "learning_rate": 1.1342918072432461e-05, "loss": 0.438, "step": 43163 }, { "epoch": 0.9154418782210345, "grad_norm": 0.3340532183647156, "learning_rate": 1.1342587597218804e-05, "loss": 0.4824, "step": 43164 }, { "epoch": 0.9154630866789676, "grad_norm": 0.3829314410686493, "learning_rate": 1.134225712051193e-05, "loss": 0.4286, "step": 43165 }, { "epoch": 0.9154842951369005, "grad_norm": 0.3827975392341614, "learning_rate": 1.134192664231221e-05, "loss": 0.4711, "step": 43166 }, { "epoch": 0.9155055035948336, "grad_norm": 0.35461127758026123, "learning_rate": 1.1341596162620011e-05, "loss": 0.5527, "step": 43167 }, { "epoch": 0.9155267120527666, "grad_norm": 0.3523899018764496, "learning_rate": 1.13412656814357e-05, "loss": 0.492, "step": 43168 }, { "epoch": 0.9155479205106997, "grad_norm": 0.47673121094703674, "learning_rate": 1.1340935198759646e-05, "loss": 0.4675, "step": 43169 }, { "epoch": 0.9155691289686327, "grad_norm": 0.5484430193901062, "learning_rate": 1.1340604714592218e-05, "loss": 0.4263, "step": 43170 }, { "epoch": 0.9155903374265657, "grad_norm": 0.3755481541156769, "learning_rate": 1.1340274228933776e-05, "loss": 0.4823, "step": 43171 }, { "epoch": 0.9156115458844988, "grad_norm": 0.365235298871994, "learning_rate": 1.1339943741784693e-05, "loss": 0.4863, "step": 43172 }, { "epoch": 0.9156327543424317, "grad_norm": 0.3732090890407562, "learning_rate": 1.133961325314534e-05, "loss": 0.5747, "step": 43173 }, { "epoch": 0.9156539628003648, "grad_norm": 0.39008423686027527, "learning_rate": 1.1339282763016077e-05, "loss": 0.5478, "step": 43174 }, { "epoch": 0.9156751712582978, "grad_norm": 0.4490337073802948, "learning_rate": 1.133895227139728e-05, "loss": 0.4993, "step": 43175 }, { "epoch": 0.9156963797162309, "grad_norm": 0.40780413150787354, "learning_rate": 1.133862177828931e-05, "loss": 0.5527, "step": 43176 }, { "epoch": 0.9157175881741638, "grad_norm": 0.3615929186344147, "learning_rate": 1.1338291283692535e-05, "loss": 0.4407, "step": 43177 }, { "epoch": 0.9157387966320969, "grad_norm": 0.4003591239452362, "learning_rate": 1.1337960787607322e-05, "loss": 0.469, "step": 43178 }, { "epoch": 0.9157600050900299, "grad_norm": 0.33926522731781006, "learning_rate": 1.1337630290034046e-05, "loss": 0.5074, "step": 43179 }, { "epoch": 0.9157812135479629, "grad_norm": 0.3680749237537384, "learning_rate": 1.1337299790973064e-05, "loss": 0.5761, "step": 43180 }, { "epoch": 0.9158024220058959, "grad_norm": 0.33668145537376404, "learning_rate": 1.1336969290424753e-05, "loss": 0.4243, "step": 43181 }, { "epoch": 0.915823630463829, "grad_norm": 0.381794273853302, "learning_rate": 1.1336638788389473e-05, "loss": 0.4948, "step": 43182 }, { "epoch": 0.915844838921762, "grad_norm": 0.3724009692668915, "learning_rate": 1.1336308284867599e-05, "loss": 0.4984, "step": 43183 }, { "epoch": 0.915866047379695, "grad_norm": 0.3470938503742218, "learning_rate": 1.133597777985949e-05, "loss": 0.3646, "step": 43184 }, { "epoch": 0.9158872558376281, "grad_norm": 0.3993149697780609, "learning_rate": 1.133564727336552e-05, "loss": 0.4914, "step": 43185 }, { "epoch": 0.915908464295561, "grad_norm": 0.35975518822669983, "learning_rate": 1.1335316765386058e-05, "loss": 0.4507, "step": 43186 }, { "epoch": 0.9159296727534941, "grad_norm": 0.4052256643772125, "learning_rate": 1.1334986255921465e-05, "loss": 0.5386, "step": 43187 }, { "epoch": 0.9159508812114271, "grad_norm": 0.3917335569858551, "learning_rate": 1.1334655744972111e-05, "loss": 0.5012, "step": 43188 }, { "epoch": 0.9159720896693602, "grad_norm": 0.3466201722621918, "learning_rate": 1.1334325232538368e-05, "loss": 0.4292, "step": 43189 }, { "epoch": 0.9159932981272931, "grad_norm": 0.3976382613182068, "learning_rate": 1.1333994718620598e-05, "loss": 0.4599, "step": 43190 }, { "epoch": 0.9160145065852262, "grad_norm": 0.4268149435520172, "learning_rate": 1.1333664203219168e-05, "loss": 0.5061, "step": 43191 }, { "epoch": 0.9160357150431592, "grad_norm": 0.48986512422561646, "learning_rate": 1.1333333686334452e-05, "loss": 0.4854, "step": 43192 }, { "epoch": 0.9160569235010922, "grad_norm": 0.38365575671195984, "learning_rate": 1.1333003167966811e-05, "loss": 0.5378, "step": 43193 }, { "epoch": 0.9160781319590252, "grad_norm": 0.37531742453575134, "learning_rate": 1.1332672648116618e-05, "loss": 0.5504, "step": 43194 }, { "epoch": 0.9160993404169583, "grad_norm": 0.3769119679927826, "learning_rate": 1.1332342126784237e-05, "loss": 0.5151, "step": 43195 }, { "epoch": 0.9161205488748914, "grad_norm": 0.8453444242477417, "learning_rate": 1.1332011603970037e-05, "loss": 0.568, "step": 43196 }, { "epoch": 0.9161417573328243, "grad_norm": 0.43609780073165894, "learning_rate": 1.1331681079674384e-05, "loss": 0.5708, "step": 43197 }, { "epoch": 0.9161629657907574, "grad_norm": 0.3619401156902313, "learning_rate": 1.133135055389765e-05, "loss": 0.4984, "step": 43198 }, { "epoch": 0.9161841742486904, "grad_norm": 0.3527255654335022, "learning_rate": 1.1331020026640194e-05, "loss": 0.4207, "step": 43199 }, { "epoch": 0.9162053827066234, "grad_norm": 0.3886762857437134, "learning_rate": 1.1330689497902393e-05, "loss": 0.5211, "step": 43200 }, { "epoch": 0.9162265911645564, "grad_norm": 0.3621709644794464, "learning_rate": 1.1330358967684611e-05, "loss": 0.4292, "step": 43201 }, { "epoch": 0.9162477996224895, "grad_norm": 0.3644270896911621, "learning_rate": 1.1330028435987214e-05, "loss": 0.4844, "step": 43202 }, { "epoch": 0.9162690080804224, "grad_norm": 0.3793310821056366, "learning_rate": 1.1329697902810573e-05, "loss": 0.5503, "step": 43203 }, { "epoch": 0.9162902165383555, "grad_norm": 0.3746313750743866, "learning_rate": 1.132936736815505e-05, "loss": 0.4581, "step": 43204 }, { "epoch": 0.9163114249962885, "grad_norm": 0.35710349678993225, "learning_rate": 1.132903683202102e-05, "loss": 0.4414, "step": 43205 }, { "epoch": 0.9163326334542216, "grad_norm": 0.34599506855010986, "learning_rate": 1.1328706294408844e-05, "loss": 0.4952, "step": 43206 }, { "epoch": 0.9163538419121545, "grad_norm": 0.3477385640144348, "learning_rate": 1.1328375755318894e-05, "loss": 0.4955, "step": 43207 }, { "epoch": 0.9163750503700876, "grad_norm": 0.3861047625541687, "learning_rate": 1.1328045214751535e-05, "loss": 0.4751, "step": 43208 }, { "epoch": 0.9163962588280206, "grad_norm": 0.3605585992336273, "learning_rate": 1.1327714672707138e-05, "loss": 0.4639, "step": 43209 }, { "epoch": 0.9164174672859536, "grad_norm": 0.3914366364479065, "learning_rate": 1.1327384129186062e-05, "loss": 0.5585, "step": 43210 }, { "epoch": 0.9164386757438867, "grad_norm": 0.35382798314094543, "learning_rate": 1.1327053584188685e-05, "loss": 0.4839, "step": 43211 }, { "epoch": 0.9164598842018197, "grad_norm": 0.37577059864997864, "learning_rate": 1.1326723037715373e-05, "loss": 0.5003, "step": 43212 }, { "epoch": 0.9164810926597527, "grad_norm": 0.33042627573013306, "learning_rate": 1.1326392489766487e-05, "loss": 0.4565, "step": 43213 }, { "epoch": 0.9165023011176857, "grad_norm": 0.4284682869911194, "learning_rate": 1.1326061940342401e-05, "loss": 0.52, "step": 43214 }, { "epoch": 0.9165235095756188, "grad_norm": 0.44429251551628113, "learning_rate": 1.1325731389443481e-05, "loss": 0.5304, "step": 43215 }, { "epoch": 0.9165447180335518, "grad_norm": 0.3524504601955414, "learning_rate": 1.1325400837070093e-05, "loss": 0.5164, "step": 43216 }, { "epoch": 0.9165659264914848, "grad_norm": 0.37743639945983887, "learning_rate": 1.1325070283222605e-05, "loss": 0.4563, "step": 43217 }, { "epoch": 0.9165871349494178, "grad_norm": 0.41451895236968994, "learning_rate": 1.1324739727901389e-05, "loss": 0.5523, "step": 43218 }, { "epoch": 0.9166083434073509, "grad_norm": 0.4446355402469635, "learning_rate": 1.1324409171106802e-05, "loss": 0.4786, "step": 43219 }, { "epoch": 0.9166295518652838, "grad_norm": 0.3732025623321533, "learning_rate": 1.1324078612839224e-05, "loss": 0.4097, "step": 43220 }, { "epoch": 0.9166507603232169, "grad_norm": 0.4738466441631317, "learning_rate": 1.1323748053099018e-05, "loss": 0.439, "step": 43221 }, { "epoch": 0.9166719687811499, "grad_norm": 0.3876584470272064, "learning_rate": 1.1323417491886548e-05, "loss": 0.4643, "step": 43222 }, { "epoch": 0.916693177239083, "grad_norm": 0.45473411679267883, "learning_rate": 1.1323086929202185e-05, "loss": 0.5528, "step": 43223 }, { "epoch": 0.916714385697016, "grad_norm": 0.3791190981864929, "learning_rate": 1.1322756365046297e-05, "loss": 0.5241, "step": 43224 }, { "epoch": 0.916735594154949, "grad_norm": 0.3356103003025055, "learning_rate": 1.132242579941925e-05, "loss": 0.4041, "step": 43225 }, { "epoch": 0.9167568026128821, "grad_norm": 0.41775602102279663, "learning_rate": 1.1322095232321413e-05, "loss": 0.4395, "step": 43226 }, { "epoch": 0.916778011070815, "grad_norm": 0.4228273332118988, "learning_rate": 1.1321764663753151e-05, "loss": 0.537, "step": 43227 }, { "epoch": 0.9167992195287481, "grad_norm": 0.39914533495903015, "learning_rate": 1.132143409371484e-05, "loss": 0.4802, "step": 43228 }, { "epoch": 0.9168204279866811, "grad_norm": 0.38752302527427673, "learning_rate": 1.1321103522206836e-05, "loss": 0.495, "step": 43229 }, { "epoch": 0.9168416364446141, "grad_norm": 0.36666175723075867, "learning_rate": 1.1320772949229516e-05, "loss": 0.491, "step": 43230 }, { "epoch": 0.9168628449025471, "grad_norm": 0.38016483187675476, "learning_rate": 1.1320442374783238e-05, "loss": 0.4797, "step": 43231 }, { "epoch": 0.9168840533604802, "grad_norm": 0.4771594703197479, "learning_rate": 1.132011179886838e-05, "loss": 0.5008, "step": 43232 }, { "epoch": 0.9169052618184131, "grad_norm": 0.40506529808044434, "learning_rate": 1.1319781221485305e-05, "loss": 0.4749, "step": 43233 }, { "epoch": 0.9169264702763462, "grad_norm": 0.38555917143821716, "learning_rate": 1.131945064263438e-05, "loss": 0.4742, "step": 43234 }, { "epoch": 0.9169476787342792, "grad_norm": 0.3482450842857361, "learning_rate": 1.1319120062315973e-05, "loss": 0.4489, "step": 43235 }, { "epoch": 0.9169688871922123, "grad_norm": 0.33618849515914917, "learning_rate": 1.131878948053045e-05, "loss": 0.3792, "step": 43236 }, { "epoch": 0.9169900956501453, "grad_norm": 0.33583804965019226, "learning_rate": 1.1318458897278185e-05, "loss": 0.4507, "step": 43237 }, { "epoch": 0.9170113041080783, "grad_norm": 0.426234632730484, "learning_rate": 1.131812831255954e-05, "loss": 0.4902, "step": 43238 }, { "epoch": 0.9170325125660114, "grad_norm": 0.3930872082710266, "learning_rate": 1.1317797726374883e-05, "loss": 0.5097, "step": 43239 }, { "epoch": 0.9170537210239443, "grad_norm": 0.35010576248168945, "learning_rate": 1.1317467138724586e-05, "loss": 0.49, "step": 43240 }, { "epoch": 0.9170749294818774, "grad_norm": 0.3407072424888611, "learning_rate": 1.1317136549609009e-05, "loss": 0.4926, "step": 43241 }, { "epoch": 0.9170961379398104, "grad_norm": 0.4239867925643921, "learning_rate": 1.1316805959028525e-05, "loss": 0.5294, "step": 43242 }, { "epoch": 0.9171173463977434, "grad_norm": 0.3838016092777252, "learning_rate": 1.1316475366983503e-05, "loss": 0.5428, "step": 43243 }, { "epoch": 0.9171385548556764, "grad_norm": 0.4432675540447235, "learning_rate": 1.131614477347431e-05, "loss": 0.4674, "step": 43244 }, { "epoch": 0.9171597633136095, "grad_norm": 0.37730878591537476, "learning_rate": 1.1315814178501307e-05, "loss": 0.4851, "step": 43245 }, { "epoch": 0.9171809717715425, "grad_norm": 0.5982044339179993, "learning_rate": 1.1315483582064872e-05, "loss": 0.5281, "step": 43246 }, { "epoch": 0.9172021802294755, "grad_norm": 0.3083787262439728, "learning_rate": 1.1315152984165366e-05, "loss": 0.4584, "step": 43247 }, { "epoch": 0.9172233886874085, "grad_norm": 0.42171111702919006, "learning_rate": 1.1314822384803157e-05, "loss": 0.5419, "step": 43248 }, { "epoch": 0.9172445971453416, "grad_norm": 0.4578779935836792, "learning_rate": 1.1314491783978614e-05, "loss": 0.5158, "step": 43249 }, { "epoch": 0.9172658056032745, "grad_norm": 0.35436493158340454, "learning_rate": 1.1314161181692105e-05, "loss": 0.5044, "step": 43250 }, { "epoch": 0.9172870140612076, "grad_norm": 0.38599735498428345, "learning_rate": 1.1313830577943999e-05, "loss": 0.4736, "step": 43251 }, { "epoch": 0.9173082225191407, "grad_norm": 0.32799941301345825, "learning_rate": 1.1313499972734661e-05, "loss": 0.4685, "step": 43252 }, { "epoch": 0.9173294309770736, "grad_norm": 0.34524887800216675, "learning_rate": 1.1313169366064459e-05, "loss": 0.4635, "step": 43253 }, { "epoch": 0.9173506394350067, "grad_norm": 0.3719719648361206, "learning_rate": 1.1312838757933762e-05, "loss": 0.4765, "step": 43254 }, { "epoch": 0.9173718478929397, "grad_norm": 0.37693876028060913, "learning_rate": 1.1312508148342936e-05, "loss": 0.4562, "step": 43255 }, { "epoch": 0.9173930563508728, "grad_norm": 0.41703975200653076, "learning_rate": 1.131217753729235e-05, "loss": 0.5431, "step": 43256 }, { "epoch": 0.9174142648088057, "grad_norm": 0.3585234582424164, "learning_rate": 1.1311846924782374e-05, "loss": 0.4832, "step": 43257 }, { "epoch": 0.9174354732667388, "grad_norm": 0.4176759719848633, "learning_rate": 1.1311516310813374e-05, "loss": 0.5219, "step": 43258 }, { "epoch": 0.9174566817246718, "grad_norm": 0.4217700958251953, "learning_rate": 1.1311185695385713e-05, "loss": 0.455, "step": 43259 }, { "epoch": 0.9174778901826048, "grad_norm": 0.3806590735912323, "learning_rate": 1.1310855078499765e-05, "loss": 0.4805, "step": 43260 }, { "epoch": 0.9174990986405378, "grad_norm": 0.3538411557674408, "learning_rate": 1.1310524460155894e-05, "loss": 0.4267, "step": 43261 }, { "epoch": 0.9175203070984709, "grad_norm": 0.3774779736995697, "learning_rate": 1.1310193840354468e-05, "loss": 0.4612, "step": 43262 }, { "epoch": 0.9175415155564038, "grad_norm": 0.43314090371131897, "learning_rate": 1.1309863219095858e-05, "loss": 0.4408, "step": 43263 }, { "epoch": 0.9175627240143369, "grad_norm": 0.49886175990104675, "learning_rate": 1.1309532596380428e-05, "loss": 0.5097, "step": 43264 }, { "epoch": 0.91758393247227, "grad_norm": 0.358906626701355, "learning_rate": 1.130920197220855e-05, "loss": 0.4979, "step": 43265 }, { "epoch": 0.917605140930203, "grad_norm": 0.3487749397754669, "learning_rate": 1.1308871346580588e-05, "loss": 0.5037, "step": 43266 }, { "epoch": 0.917626349388136, "grad_norm": 0.47506698966026306, "learning_rate": 1.130854071949691e-05, "loss": 0.4518, "step": 43267 }, { "epoch": 0.917647557846069, "grad_norm": 0.3380051553249359, "learning_rate": 1.1308210090957883e-05, "loss": 0.5225, "step": 43268 }, { "epoch": 0.9176687663040021, "grad_norm": 0.36218753457069397, "learning_rate": 1.1307879460963878e-05, "loss": 0.4782, "step": 43269 }, { "epoch": 0.917689974761935, "grad_norm": 0.36724239587783813, "learning_rate": 1.1307548829515258e-05, "loss": 0.4621, "step": 43270 }, { "epoch": 0.9177111832198681, "grad_norm": 0.4030298888683319, "learning_rate": 1.1307218196612397e-05, "loss": 0.5393, "step": 43271 }, { "epoch": 0.9177323916778011, "grad_norm": 0.4064674377441406, "learning_rate": 1.1306887562255659e-05, "loss": 0.4819, "step": 43272 }, { "epoch": 0.9177536001357341, "grad_norm": 0.33833885192871094, "learning_rate": 1.130655692644541e-05, "loss": 0.4331, "step": 43273 }, { "epoch": 0.9177748085936671, "grad_norm": 0.35620665550231934, "learning_rate": 1.130622628918202e-05, "loss": 0.4793, "step": 43274 }, { "epoch": 0.9177960170516002, "grad_norm": 0.49176761507987976, "learning_rate": 1.130589565046586e-05, "loss": 0.5048, "step": 43275 }, { "epoch": 0.9178172255095332, "grad_norm": 0.38474902510643005, "learning_rate": 1.1305565010297288e-05, "loss": 0.4939, "step": 43276 }, { "epoch": 0.9178384339674662, "grad_norm": 0.38737455010414124, "learning_rate": 1.1305234368676684e-05, "loss": 0.4816, "step": 43277 }, { "epoch": 0.9178596424253993, "grad_norm": 3.918487071990967, "learning_rate": 1.1304903725604405e-05, "loss": 0.3977, "step": 43278 }, { "epoch": 0.9178808508833323, "grad_norm": 0.4124508500099182, "learning_rate": 1.1304573081080828e-05, "loss": 0.4715, "step": 43279 }, { "epoch": 0.9179020593412653, "grad_norm": 0.36786821484565735, "learning_rate": 1.1304242435106314e-05, "loss": 0.4486, "step": 43280 }, { "epoch": 0.9179232677991983, "grad_norm": 0.3429349362850189, "learning_rate": 1.1303911787681227e-05, "loss": 0.4974, "step": 43281 }, { "epoch": 0.9179444762571314, "grad_norm": 0.4245758652687073, "learning_rate": 1.1303581138805948e-05, "loss": 0.3911, "step": 43282 }, { "epoch": 0.9179656847150643, "grad_norm": 0.39816564321517944, "learning_rate": 1.1303250488480837e-05, "loss": 0.4564, "step": 43283 }, { "epoch": 0.9179868931729974, "grad_norm": 0.36946484446525574, "learning_rate": 1.130291983670626e-05, "loss": 0.5273, "step": 43284 }, { "epoch": 0.9180081016309304, "grad_norm": 0.35619091987609863, "learning_rate": 1.1302589183482589e-05, "loss": 0.4745, "step": 43285 }, { "epoch": 0.9180293100888635, "grad_norm": 0.3588024079799652, "learning_rate": 1.1302258528810188e-05, "loss": 0.4984, "step": 43286 }, { "epoch": 0.9180505185467964, "grad_norm": 0.37781432271003723, "learning_rate": 1.1301927872689424e-05, "loss": 0.4346, "step": 43287 }, { "epoch": 0.9180717270047295, "grad_norm": 0.41189268231391907, "learning_rate": 1.130159721512067e-05, "loss": 0.4764, "step": 43288 }, { "epoch": 0.9180929354626625, "grad_norm": 0.37744712829589844, "learning_rate": 1.1301266556104293e-05, "loss": 0.5206, "step": 43289 }, { "epoch": 0.9181141439205955, "grad_norm": 0.3708968162536621, "learning_rate": 1.1300935895640654e-05, "loss": 0.5318, "step": 43290 }, { "epoch": 0.9181353523785285, "grad_norm": 0.3250862956047058, "learning_rate": 1.130060523373013e-05, "loss": 0.458, "step": 43291 }, { "epoch": 0.9181565608364616, "grad_norm": 0.39125779271125793, "learning_rate": 1.130027457037308e-05, "loss": 0.4485, "step": 43292 }, { "epoch": 0.9181777692943947, "grad_norm": 0.336261510848999, "learning_rate": 1.1299943905569877e-05, "loss": 0.3994, "step": 43293 }, { "epoch": 0.9181989777523276, "grad_norm": 0.3828878104686737, "learning_rate": 1.1299613239320889e-05, "loss": 0.4863, "step": 43294 }, { "epoch": 0.9182201862102607, "grad_norm": 0.3444901406764984, "learning_rate": 1.1299282571626479e-05, "loss": 0.4801, "step": 43295 }, { "epoch": 0.9182413946681937, "grad_norm": 0.3671725392341614, "learning_rate": 1.1298951902487023e-05, "loss": 0.5044, "step": 43296 }, { "epoch": 0.9182626031261267, "grad_norm": 0.3888617753982544, "learning_rate": 1.1298621231902884e-05, "loss": 0.4646, "step": 43297 }, { "epoch": 0.9182838115840597, "grad_norm": 0.3309899866580963, "learning_rate": 1.1298290559874425e-05, "loss": 0.4276, "step": 43298 }, { "epoch": 0.9183050200419928, "grad_norm": 0.3433503806591034, "learning_rate": 1.1297959886402023e-05, "loss": 0.4611, "step": 43299 }, { "epoch": 0.9183262284999257, "grad_norm": 0.3981442451477051, "learning_rate": 1.1297629211486039e-05, "loss": 0.49, "step": 43300 }, { "epoch": 0.9183474369578588, "grad_norm": 0.4105416536331177, "learning_rate": 1.1297298535126841e-05, "loss": 0.4814, "step": 43301 }, { "epoch": 0.9183686454157918, "grad_norm": 0.6422803997993469, "learning_rate": 1.1296967857324803e-05, "loss": 0.4795, "step": 43302 }, { "epoch": 0.9183898538737248, "grad_norm": 0.3753143548965454, "learning_rate": 1.1296637178080289e-05, "loss": 0.5059, "step": 43303 }, { "epoch": 0.9184110623316578, "grad_norm": 0.4160822033882141, "learning_rate": 1.1296306497393663e-05, "loss": 0.4861, "step": 43304 }, { "epoch": 0.9184322707895909, "grad_norm": 0.4319213926792145, "learning_rate": 1.1295975815265298e-05, "loss": 0.5311, "step": 43305 }, { "epoch": 0.918453479247524, "grad_norm": 0.3384806215763092, "learning_rate": 1.1295645131695558e-05, "loss": 0.453, "step": 43306 }, { "epoch": 0.9184746877054569, "grad_norm": 0.373704195022583, "learning_rate": 1.1295314446684815e-05, "loss": 0.5239, "step": 43307 }, { "epoch": 0.91849589616339, "grad_norm": 0.3290783166885376, "learning_rate": 1.1294983760233434e-05, "loss": 0.4659, "step": 43308 }, { "epoch": 0.918517104621323, "grad_norm": 0.41372939944267273, "learning_rate": 1.1294653072341784e-05, "loss": 0.5505, "step": 43309 }, { "epoch": 0.918538313079256, "grad_norm": 0.38248151540756226, "learning_rate": 1.129432238301023e-05, "loss": 0.4993, "step": 43310 }, { "epoch": 0.918559521537189, "grad_norm": 0.4049694836139679, "learning_rate": 1.1293991692239146e-05, "loss": 0.6094, "step": 43311 }, { "epoch": 0.9185807299951221, "grad_norm": 0.3735431134700775, "learning_rate": 1.129366100002889e-05, "loss": 0.4938, "step": 43312 }, { "epoch": 0.918601938453055, "grad_norm": 0.34632062911987305, "learning_rate": 1.1293330306379838e-05, "loss": 0.4275, "step": 43313 }, { "epoch": 0.9186231469109881, "grad_norm": 0.3600696921348572, "learning_rate": 1.1292999611292357e-05, "loss": 0.4005, "step": 43314 }, { "epoch": 0.9186443553689211, "grad_norm": 0.4154352843761444, "learning_rate": 1.129266891476681e-05, "loss": 0.4757, "step": 43315 }, { "epoch": 0.9186655638268542, "grad_norm": 0.4452289938926697, "learning_rate": 1.129233821680357e-05, "loss": 0.4748, "step": 43316 }, { "epoch": 0.9186867722847871, "grad_norm": 0.37610360980033875, "learning_rate": 1.1292007517403004e-05, "loss": 0.5481, "step": 43317 }, { "epoch": 0.9187079807427202, "grad_norm": 0.36108893156051636, "learning_rate": 1.1291676816565475e-05, "loss": 0.4091, "step": 43318 }, { "epoch": 0.9187291892006533, "grad_norm": 0.324838250875473, "learning_rate": 1.1291346114291356e-05, "loss": 0.4361, "step": 43319 }, { "epoch": 0.9187503976585862, "grad_norm": 0.4139116704463959, "learning_rate": 1.1291015410581012e-05, "loss": 0.5097, "step": 43320 }, { "epoch": 0.9187716061165193, "grad_norm": 0.3765437602996826, "learning_rate": 1.1290684705434813e-05, "loss": 0.4935, "step": 43321 }, { "epoch": 0.9187928145744523, "grad_norm": 0.297111839056015, "learning_rate": 1.1290353998853125e-05, "loss": 0.4477, "step": 43322 }, { "epoch": 0.9188140230323854, "grad_norm": 0.32704585790634155, "learning_rate": 1.1290023290836318e-05, "loss": 0.4031, "step": 43323 }, { "epoch": 0.9188352314903183, "grad_norm": 0.3722446858882904, "learning_rate": 1.1289692581384758e-05, "loss": 0.5489, "step": 43324 }, { "epoch": 0.9188564399482514, "grad_norm": 0.3334250748157501, "learning_rate": 1.1289361870498813e-05, "loss": 0.4015, "step": 43325 }, { "epoch": 0.9188776484061844, "grad_norm": 0.6694974899291992, "learning_rate": 1.1289031158178849e-05, "loss": 0.5124, "step": 43326 }, { "epoch": 0.9188988568641174, "grad_norm": 0.3650854825973511, "learning_rate": 1.1288700444425235e-05, "loss": 0.4779, "step": 43327 }, { "epoch": 0.9189200653220504, "grad_norm": 0.39497125148773193, "learning_rate": 1.1288369729238344e-05, "loss": 0.5734, "step": 43328 }, { "epoch": 0.9189412737799835, "grad_norm": 0.3641832768917084, "learning_rate": 1.1288039012618537e-05, "loss": 0.5393, "step": 43329 }, { "epoch": 0.9189624822379164, "grad_norm": 0.5511659979820251, "learning_rate": 1.1287708294566184e-05, "loss": 0.46, "step": 43330 }, { "epoch": 0.9189836906958495, "grad_norm": 0.3513821065425873, "learning_rate": 1.1287377575081654e-05, "loss": 0.4925, "step": 43331 }, { "epoch": 0.9190048991537825, "grad_norm": 0.340873122215271, "learning_rate": 1.1287046854165312e-05, "loss": 0.4808, "step": 43332 }, { "epoch": 0.9190261076117155, "grad_norm": 0.3859013020992279, "learning_rate": 1.1286716131817527e-05, "loss": 0.4729, "step": 43333 }, { "epoch": 0.9190473160696486, "grad_norm": 0.37866896390914917, "learning_rate": 1.1286385408038671e-05, "loss": 0.5434, "step": 43334 }, { "epoch": 0.9190685245275816, "grad_norm": 0.3663639426231384, "learning_rate": 1.1286054682829106e-05, "loss": 0.5155, "step": 43335 }, { "epoch": 0.9190897329855147, "grad_norm": 0.3868713676929474, "learning_rate": 1.1285723956189203e-05, "loss": 0.5331, "step": 43336 }, { "epoch": 0.9191109414434476, "grad_norm": 0.4158356785774231, "learning_rate": 1.1285393228119328e-05, "loss": 0.4375, "step": 43337 }, { "epoch": 0.9191321499013807, "grad_norm": 0.3921467363834381, "learning_rate": 1.128506249861985e-05, "loss": 0.4636, "step": 43338 }, { "epoch": 0.9191533583593137, "grad_norm": 0.4116624891757965, "learning_rate": 1.1284731767691138e-05, "loss": 0.5297, "step": 43339 }, { "epoch": 0.9191745668172467, "grad_norm": 0.3490559160709381, "learning_rate": 1.1284401035333559e-05, "loss": 0.4699, "step": 43340 }, { "epoch": 0.9191957752751797, "grad_norm": 0.3968863785266876, "learning_rate": 1.1284070301547479e-05, "loss": 0.4565, "step": 43341 }, { "epoch": 0.9192169837331128, "grad_norm": 0.38387370109558105, "learning_rate": 1.1283739566333267e-05, "loss": 0.5032, "step": 43342 }, { "epoch": 0.9192381921910457, "grad_norm": 0.45341911911964417, "learning_rate": 1.1283408829691289e-05, "loss": 0.4947, "step": 43343 }, { "epoch": 0.9192594006489788, "grad_norm": 0.36767882108688354, "learning_rate": 1.128307809162192e-05, "loss": 0.4856, "step": 43344 }, { "epoch": 0.9192806091069118, "grad_norm": 0.31625470519065857, "learning_rate": 1.1282747352125518e-05, "loss": 0.5001, "step": 43345 }, { "epoch": 0.9193018175648449, "grad_norm": 0.3669724464416504, "learning_rate": 1.1282416611202455e-05, "loss": 0.3407, "step": 43346 }, { "epoch": 0.9193230260227779, "grad_norm": 0.35187116265296936, "learning_rate": 1.1282085868853103e-05, "loss": 0.5009, "step": 43347 }, { "epoch": 0.9193442344807109, "grad_norm": 0.37768271565437317, "learning_rate": 1.1281755125077826e-05, "loss": 0.5207, "step": 43348 }, { "epoch": 0.919365442938644, "grad_norm": 0.457400918006897, "learning_rate": 1.1281424379876991e-05, "loss": 0.4734, "step": 43349 }, { "epoch": 0.9193866513965769, "grad_norm": 0.3802134096622467, "learning_rate": 1.1281093633250968e-05, "loss": 0.4884, "step": 43350 }, { "epoch": 0.91940785985451, "grad_norm": 0.35256776213645935, "learning_rate": 1.1280762885200121e-05, "loss": 0.483, "step": 43351 }, { "epoch": 0.919429068312443, "grad_norm": 0.38501811027526855, "learning_rate": 1.1280432135724821e-05, "loss": 0.4643, "step": 43352 }, { "epoch": 0.919450276770376, "grad_norm": 0.4559789299964905, "learning_rate": 1.1280101384825437e-05, "loss": 0.4924, "step": 43353 }, { "epoch": 0.919471485228309, "grad_norm": 0.40189728140830994, "learning_rate": 1.1279770632502336e-05, "loss": 0.5548, "step": 43354 }, { "epoch": 0.9194926936862421, "grad_norm": 0.33087241649627686, "learning_rate": 1.1279439878755884e-05, "loss": 0.4998, "step": 43355 }, { "epoch": 0.919513902144175, "grad_norm": 0.45389023423194885, "learning_rate": 1.1279109123586452e-05, "loss": 0.4912, "step": 43356 }, { "epoch": 0.9195351106021081, "grad_norm": 0.36139166355133057, "learning_rate": 1.1278778366994403e-05, "loss": 0.4996, "step": 43357 }, { "epoch": 0.9195563190600411, "grad_norm": 0.3560376763343811, "learning_rate": 1.1278447608980108e-05, "loss": 0.4729, "step": 43358 }, { "epoch": 0.9195775275179742, "grad_norm": 0.37056127190589905, "learning_rate": 1.1278116849543936e-05, "loss": 0.4972, "step": 43359 }, { "epoch": 0.9195987359759072, "grad_norm": 0.45709043741226196, "learning_rate": 1.1277786088686253e-05, "loss": 0.4852, "step": 43360 }, { "epoch": 0.9196199444338402, "grad_norm": 0.3981981873512268, "learning_rate": 1.127745532640743e-05, "loss": 0.5327, "step": 43361 }, { "epoch": 0.9196411528917733, "grad_norm": 0.3412564694881439, "learning_rate": 1.127712456270783e-05, "loss": 0.5138, "step": 43362 }, { "epoch": 0.9196623613497062, "grad_norm": 0.4246606230735779, "learning_rate": 1.1276793797587824e-05, "loss": 0.5339, "step": 43363 }, { "epoch": 0.9196835698076393, "grad_norm": 0.3686586022377014, "learning_rate": 1.1276463031047778e-05, "loss": 0.4622, "step": 43364 }, { "epoch": 0.9197047782655723, "grad_norm": 0.41654860973358154, "learning_rate": 1.127613226308806e-05, "loss": 0.5241, "step": 43365 }, { "epoch": 0.9197259867235054, "grad_norm": 0.3725090026855469, "learning_rate": 1.127580149370904e-05, "loss": 0.4745, "step": 43366 }, { "epoch": 0.9197471951814383, "grad_norm": 0.42077282071113586, "learning_rate": 1.1275470722911086e-05, "loss": 0.5052, "step": 43367 }, { "epoch": 0.9197684036393714, "grad_norm": 0.31772875785827637, "learning_rate": 1.1275139950694565e-05, "loss": 0.4991, "step": 43368 }, { "epoch": 0.9197896120973044, "grad_norm": 0.37692317366600037, "learning_rate": 1.127480917705984e-05, "loss": 0.5417, "step": 43369 }, { "epoch": 0.9198108205552374, "grad_norm": 0.34201276302337646, "learning_rate": 1.1274478402007285e-05, "loss": 0.4706, "step": 43370 }, { "epoch": 0.9198320290131704, "grad_norm": 0.387728750705719, "learning_rate": 1.1274147625537268e-05, "loss": 0.4925, "step": 43371 }, { "epoch": 0.9198532374711035, "grad_norm": 0.336313933134079, "learning_rate": 1.1273816847650154e-05, "loss": 0.454, "step": 43372 }, { "epoch": 0.9198744459290366, "grad_norm": 0.33530154824256897, "learning_rate": 1.1273486068346312e-05, "loss": 0.4545, "step": 43373 }, { "epoch": 0.9198956543869695, "grad_norm": 0.3629879057407379, "learning_rate": 1.127315528762611e-05, "loss": 0.5078, "step": 43374 }, { "epoch": 0.9199168628449026, "grad_norm": 0.3467179238796234, "learning_rate": 1.1272824505489917e-05, "loss": 0.4917, "step": 43375 }, { "epoch": 0.9199380713028356, "grad_norm": 0.350901335477829, "learning_rate": 1.12724937219381e-05, "loss": 0.5193, "step": 43376 }, { "epoch": 0.9199592797607686, "grad_norm": 0.36615246534347534, "learning_rate": 1.127216293697102e-05, "loss": 0.4678, "step": 43377 }, { "epoch": 0.9199804882187016, "grad_norm": 0.547101616859436, "learning_rate": 1.127183215058906e-05, "loss": 0.512, "step": 43378 }, { "epoch": 0.9200016966766347, "grad_norm": 0.41253915429115295, "learning_rate": 1.1271501362792576e-05, "loss": 0.5891, "step": 43379 }, { "epoch": 0.9200229051345676, "grad_norm": 0.39103323221206665, "learning_rate": 1.1271170573581938e-05, "loss": 0.4886, "step": 43380 }, { "epoch": 0.9200441135925007, "grad_norm": 0.34571313858032227, "learning_rate": 1.1270839782957516e-05, "loss": 0.4211, "step": 43381 }, { "epoch": 0.9200653220504337, "grad_norm": 0.32441437244415283, "learning_rate": 1.1270508990919678e-05, "loss": 0.3425, "step": 43382 }, { "epoch": 0.9200865305083668, "grad_norm": 0.3584460914134979, "learning_rate": 1.1270178197468788e-05, "loss": 0.5257, "step": 43383 }, { "epoch": 0.9201077389662997, "grad_norm": 0.38773229718208313, "learning_rate": 1.126984740260522e-05, "loss": 0.5378, "step": 43384 }, { "epoch": 0.9201289474242328, "grad_norm": 0.3837626874446869, "learning_rate": 1.1269516606329338e-05, "loss": 0.5458, "step": 43385 }, { "epoch": 0.9201501558821658, "grad_norm": 0.3656286895275116, "learning_rate": 1.126918580864151e-05, "loss": 0.4582, "step": 43386 }, { "epoch": 0.9201713643400988, "grad_norm": 0.33155399560928345, "learning_rate": 1.1268855009542106e-05, "loss": 0.4119, "step": 43387 }, { "epoch": 0.9201925727980319, "grad_norm": 0.39973026514053345, "learning_rate": 1.126852420903149e-05, "loss": 0.4764, "step": 43388 }, { "epoch": 0.9202137812559649, "grad_norm": 0.36701592803001404, "learning_rate": 1.1268193407110036e-05, "loss": 0.4626, "step": 43389 }, { "epoch": 0.9202349897138979, "grad_norm": 0.40010732412338257, "learning_rate": 1.1267862603778104e-05, "loss": 0.471, "step": 43390 }, { "epoch": 0.9202561981718309, "grad_norm": 0.4299871325492859, "learning_rate": 1.1267531799036067e-05, "loss": 0.559, "step": 43391 }, { "epoch": 0.920277406629764, "grad_norm": 0.3947649598121643, "learning_rate": 1.1267200992884295e-05, "loss": 0.5339, "step": 43392 }, { "epoch": 0.920298615087697, "grad_norm": 0.33993446826934814, "learning_rate": 1.1266870185323152e-05, "loss": 0.4717, "step": 43393 }, { "epoch": 0.92031982354563, "grad_norm": 0.4268761873245239, "learning_rate": 1.1266539376353005e-05, "loss": 0.5348, "step": 43394 }, { "epoch": 0.920341032003563, "grad_norm": 0.34215888381004333, "learning_rate": 1.1266208565974226e-05, "loss": 0.4272, "step": 43395 }, { "epoch": 0.9203622404614961, "grad_norm": 0.38160818815231323, "learning_rate": 1.126587775418718e-05, "loss": 0.536, "step": 43396 }, { "epoch": 0.920383448919429, "grad_norm": 0.4809187054634094, "learning_rate": 1.1265546940992235e-05, "loss": 0.5462, "step": 43397 }, { "epoch": 0.9204046573773621, "grad_norm": 0.4007326364517212, "learning_rate": 1.1265216126389763e-05, "loss": 0.4961, "step": 43398 }, { "epoch": 0.9204258658352951, "grad_norm": 0.44789350032806396, "learning_rate": 1.1264885310380124e-05, "loss": 0.4368, "step": 43399 }, { "epoch": 0.9204470742932281, "grad_norm": 0.3623712360858917, "learning_rate": 1.1264554492963693e-05, "loss": 0.5187, "step": 43400 }, { "epoch": 0.9204682827511612, "grad_norm": 0.3824121057987213, "learning_rate": 1.1264223674140837e-05, "loss": 0.4184, "step": 43401 }, { "epoch": 0.9204894912090942, "grad_norm": 0.4048727750778198, "learning_rate": 1.1263892853911919e-05, "loss": 0.4912, "step": 43402 }, { "epoch": 0.9205106996670273, "grad_norm": 0.39095616340637207, "learning_rate": 1.1263562032277311e-05, "loss": 0.4893, "step": 43403 }, { "epoch": 0.9205319081249602, "grad_norm": 0.5286763906478882, "learning_rate": 1.1263231209237384e-05, "loss": 0.4421, "step": 43404 }, { "epoch": 0.9205531165828933, "grad_norm": 0.37242934107780457, "learning_rate": 1.1262900384792497e-05, "loss": 0.4444, "step": 43405 }, { "epoch": 0.9205743250408263, "grad_norm": 0.34862762689590454, "learning_rate": 1.1262569558943028e-05, "loss": 0.4013, "step": 43406 }, { "epoch": 0.9205955334987593, "grad_norm": 0.35222285985946655, "learning_rate": 1.126223873168934e-05, "loss": 0.511, "step": 43407 }, { "epoch": 0.9206167419566923, "grad_norm": 0.41263821721076965, "learning_rate": 1.1261907903031797e-05, "loss": 0.5293, "step": 43408 }, { "epoch": 0.9206379504146254, "grad_norm": 0.36201390624046326, "learning_rate": 1.1261577072970771e-05, "loss": 0.48, "step": 43409 }, { "epoch": 0.9206591588725583, "grad_norm": 0.3912879526615143, "learning_rate": 1.1261246241506634e-05, "loss": 0.5198, "step": 43410 }, { "epoch": 0.9206803673304914, "grad_norm": 0.3752477467060089, "learning_rate": 1.1260915408639746e-05, "loss": 0.4998, "step": 43411 }, { "epoch": 0.9207015757884244, "grad_norm": 0.36153361201286316, "learning_rate": 1.1260584574370482e-05, "loss": 0.4714, "step": 43412 }, { "epoch": 0.9207227842463575, "grad_norm": 0.3795663118362427, "learning_rate": 1.1260253738699205e-05, "loss": 0.5385, "step": 43413 }, { "epoch": 0.9207439927042905, "grad_norm": 0.3840804696083069, "learning_rate": 1.1259922901626286e-05, "loss": 0.4882, "step": 43414 }, { "epoch": 0.9207652011622235, "grad_norm": 0.32879841327667236, "learning_rate": 1.125959206315209e-05, "loss": 0.4627, "step": 43415 }, { "epoch": 0.9207864096201566, "grad_norm": 0.4108985960483551, "learning_rate": 1.1259261223276988e-05, "loss": 0.4194, "step": 43416 }, { "epoch": 0.9208076180780895, "grad_norm": 0.33112260699272156, "learning_rate": 1.1258930382001343e-05, "loss": 0.4746, "step": 43417 }, { "epoch": 0.9208288265360226, "grad_norm": 0.3972003757953644, "learning_rate": 1.1258599539325532e-05, "loss": 0.575, "step": 43418 }, { "epoch": 0.9208500349939556, "grad_norm": 0.3630359470844269, "learning_rate": 1.1258268695249914e-05, "loss": 0.4841, "step": 43419 }, { "epoch": 0.9208712434518886, "grad_norm": 0.5684532523155212, "learning_rate": 1.1257937849774863e-05, "loss": 0.4889, "step": 43420 }, { "epoch": 0.9208924519098216, "grad_norm": 0.4229526221752167, "learning_rate": 1.1257607002900744e-05, "loss": 0.5651, "step": 43421 }, { "epoch": 0.9209136603677547, "grad_norm": 0.33133572340011597, "learning_rate": 1.1257276154627924e-05, "loss": 0.5606, "step": 43422 }, { "epoch": 0.9209348688256876, "grad_norm": 0.3904612362384796, "learning_rate": 1.1256945304956771e-05, "loss": 0.561, "step": 43423 }, { "epoch": 0.9209560772836207, "grad_norm": 0.36703068017959595, "learning_rate": 1.1256614453887658e-05, "loss": 0.4114, "step": 43424 }, { "epoch": 0.9209772857415537, "grad_norm": 0.36700767278671265, "learning_rate": 1.1256283601420946e-05, "loss": 0.5013, "step": 43425 }, { "epoch": 0.9209984941994868, "grad_norm": 0.3458499014377594, "learning_rate": 1.1255952747557009e-05, "loss": 0.437, "step": 43426 }, { "epoch": 0.9210197026574197, "grad_norm": 0.37287402153015137, "learning_rate": 1.1255621892296211e-05, "loss": 0.4896, "step": 43427 }, { "epoch": 0.9210409111153528, "grad_norm": 0.39613375067710876, "learning_rate": 1.1255291035638917e-05, "loss": 0.5095, "step": 43428 }, { "epoch": 0.9210621195732859, "grad_norm": 0.549582302570343, "learning_rate": 1.1254960177585506e-05, "loss": 0.505, "step": 43429 }, { "epoch": 0.9210833280312188, "grad_norm": 0.35754457116127014, "learning_rate": 1.1254629318136339e-05, "loss": 0.4542, "step": 43430 }, { "epoch": 0.9211045364891519, "grad_norm": 0.3913815915584564, "learning_rate": 1.125429845729178e-05, "loss": 0.4776, "step": 43431 }, { "epoch": 0.9211257449470849, "grad_norm": 0.4120521545410156, "learning_rate": 1.1253967595052205e-05, "loss": 0.525, "step": 43432 }, { "epoch": 0.921146953405018, "grad_norm": 0.3582550883293152, "learning_rate": 1.1253636731417973e-05, "loss": 0.4296, "step": 43433 }, { "epoch": 0.9211681618629509, "grad_norm": 0.36896416544914246, "learning_rate": 1.1253305866389464e-05, "loss": 0.4517, "step": 43434 }, { "epoch": 0.921189370320884, "grad_norm": 0.35067692399024963, "learning_rate": 1.1252974999967032e-05, "loss": 0.4304, "step": 43435 }, { "epoch": 0.921210578778817, "grad_norm": 0.5727627277374268, "learning_rate": 1.1252644132151058e-05, "loss": 0.5207, "step": 43436 }, { "epoch": 0.92123178723675, "grad_norm": 0.3173102140426636, "learning_rate": 1.1252313262941901e-05, "loss": 0.4211, "step": 43437 }, { "epoch": 0.921252995694683, "grad_norm": 0.3795437216758728, "learning_rate": 1.1251982392339933e-05, "loss": 0.5212, "step": 43438 }, { "epoch": 0.9212742041526161, "grad_norm": 0.4457985460758209, "learning_rate": 1.125165152034552e-05, "loss": 0.5181, "step": 43439 }, { "epoch": 0.921295412610549, "grad_norm": 0.38685423135757446, "learning_rate": 1.1251320646959032e-05, "loss": 0.567, "step": 43440 }, { "epoch": 0.9213166210684821, "grad_norm": 0.38664308190345764, "learning_rate": 1.1250989772180837e-05, "loss": 0.5202, "step": 43441 }, { "epoch": 0.9213378295264152, "grad_norm": 0.3844943344593048, "learning_rate": 1.1250658896011297e-05, "loss": 0.5363, "step": 43442 }, { "epoch": 0.9213590379843482, "grad_norm": 0.34800484776496887, "learning_rate": 1.1250328018450791e-05, "loss": 0.4591, "step": 43443 }, { "epoch": 0.9213802464422812, "grad_norm": 0.39103689789772034, "learning_rate": 1.124999713949968e-05, "loss": 0.5328, "step": 43444 }, { "epoch": 0.9214014549002142, "grad_norm": 0.5496963262557983, "learning_rate": 1.1249666259158331e-05, "loss": 0.5139, "step": 43445 }, { "epoch": 0.9214226633581473, "grad_norm": 0.3467816114425659, "learning_rate": 1.1249335377427115e-05, "loss": 0.5182, "step": 43446 }, { "epoch": 0.9214438718160802, "grad_norm": 0.3551715016365051, "learning_rate": 1.12490044943064e-05, "loss": 0.4134, "step": 43447 }, { "epoch": 0.9214650802740133, "grad_norm": 0.39804407954216003, "learning_rate": 1.124867360979655e-05, "loss": 0.5201, "step": 43448 }, { "epoch": 0.9214862887319463, "grad_norm": 0.36537760496139526, "learning_rate": 1.1248342723897938e-05, "loss": 0.467, "step": 43449 }, { "epoch": 0.9215074971898793, "grad_norm": 0.34524205327033997, "learning_rate": 1.1248011836610932e-05, "loss": 0.4405, "step": 43450 }, { "epoch": 0.9215287056478123, "grad_norm": 0.34857115149497986, "learning_rate": 1.1247680947935896e-05, "loss": 0.4606, "step": 43451 }, { "epoch": 0.9215499141057454, "grad_norm": 0.3850178122520447, "learning_rate": 1.12473500578732e-05, "loss": 0.489, "step": 43452 }, { "epoch": 0.9215711225636783, "grad_norm": 0.4139726758003235, "learning_rate": 1.1247019166423212e-05, "loss": 0.4707, "step": 43453 }, { "epoch": 0.9215923310216114, "grad_norm": 0.36176612973213196, "learning_rate": 1.12466882735863e-05, "loss": 0.4049, "step": 43454 }, { "epoch": 0.9216135394795445, "grad_norm": 0.3884020745754242, "learning_rate": 1.1246357379362833e-05, "loss": 0.4579, "step": 43455 }, { "epoch": 0.9216347479374775, "grad_norm": 0.4105744957923889, "learning_rate": 1.124602648375318e-05, "loss": 0.5579, "step": 43456 }, { "epoch": 0.9216559563954105, "grad_norm": 0.4126548171043396, "learning_rate": 1.1245695586757703e-05, "loss": 0.5903, "step": 43457 }, { "epoch": 0.9216771648533435, "grad_norm": 0.5065574049949646, "learning_rate": 1.1245364688376779e-05, "loss": 0.4323, "step": 43458 }, { "epoch": 0.9216983733112766, "grad_norm": 0.3444902300834656, "learning_rate": 1.1245033788610766e-05, "loss": 0.4693, "step": 43459 }, { "epoch": 0.9217195817692095, "grad_norm": 0.3446824848651886, "learning_rate": 1.1244702887460039e-05, "loss": 0.5063, "step": 43460 }, { "epoch": 0.9217407902271426, "grad_norm": 0.3849950432777405, "learning_rate": 1.1244371984924967e-05, "loss": 0.4735, "step": 43461 }, { "epoch": 0.9217619986850756, "grad_norm": 0.3627201318740845, "learning_rate": 1.1244041081005913e-05, "loss": 0.487, "step": 43462 }, { "epoch": 0.9217832071430087, "grad_norm": 0.40358713269233704, "learning_rate": 1.1243710175703245e-05, "loss": 0.515, "step": 43463 }, { "epoch": 0.9218044156009416, "grad_norm": 0.40952083468437195, "learning_rate": 1.1243379269017338e-05, "loss": 0.5291, "step": 43464 }, { "epoch": 0.9218256240588747, "grad_norm": 0.35910141468048096, "learning_rate": 1.1243048360948554e-05, "loss": 0.5167, "step": 43465 }, { "epoch": 0.9218468325168077, "grad_norm": 0.385997474193573, "learning_rate": 1.1242717451497263e-05, "loss": 0.5358, "step": 43466 }, { "epoch": 0.9218680409747407, "grad_norm": 0.40889859199523926, "learning_rate": 1.124238654066383e-05, "loss": 0.4785, "step": 43467 }, { "epoch": 0.9218892494326737, "grad_norm": 0.3228505253791809, "learning_rate": 1.1242055628448625e-05, "loss": 0.4258, "step": 43468 }, { "epoch": 0.9219104578906068, "grad_norm": 0.327047199010849, "learning_rate": 1.1241724714852018e-05, "loss": 0.4257, "step": 43469 }, { "epoch": 0.9219316663485398, "grad_norm": 0.41396674513816833, "learning_rate": 1.1241393799874376e-05, "loss": 0.5755, "step": 43470 }, { "epoch": 0.9219528748064728, "grad_norm": 0.4240991771221161, "learning_rate": 1.1241062883516067e-05, "loss": 0.4849, "step": 43471 }, { "epoch": 0.9219740832644059, "grad_norm": 0.4444209635257721, "learning_rate": 1.1240731965777458e-05, "loss": 0.4329, "step": 43472 }, { "epoch": 0.9219952917223388, "grad_norm": 0.3754497766494751, "learning_rate": 1.1240401046658916e-05, "loss": 0.5596, "step": 43473 }, { "epoch": 0.9220165001802719, "grad_norm": 0.3491322696208954, "learning_rate": 1.1240070126160813e-05, "loss": 0.5061, "step": 43474 }, { "epoch": 0.9220377086382049, "grad_norm": 0.46537190675735474, "learning_rate": 1.1239739204283515e-05, "loss": 0.4323, "step": 43475 }, { "epoch": 0.922058917096138, "grad_norm": 0.4075361490249634, "learning_rate": 1.1239408281027388e-05, "loss": 0.4071, "step": 43476 }, { "epoch": 0.9220801255540709, "grad_norm": 0.42879098653793335, "learning_rate": 1.1239077356392805e-05, "loss": 0.4933, "step": 43477 }, { "epoch": 0.922101334012004, "grad_norm": 0.8510452508926392, "learning_rate": 1.123874643038013e-05, "loss": 0.5612, "step": 43478 }, { "epoch": 0.922122542469937, "grad_norm": 0.3429855704307556, "learning_rate": 1.1238415502989726e-05, "loss": 0.4587, "step": 43479 }, { "epoch": 0.92214375092787, "grad_norm": 0.36727628111839294, "learning_rate": 1.1238084574221973e-05, "loss": 0.4756, "step": 43480 }, { "epoch": 0.922164959385803, "grad_norm": 0.3410800099372864, "learning_rate": 1.1237753644077234e-05, "loss": 0.3973, "step": 43481 }, { "epoch": 0.9221861678437361, "grad_norm": 0.3144453465938568, "learning_rate": 1.1237422712555873e-05, "loss": 0.4538, "step": 43482 }, { "epoch": 0.9222073763016692, "grad_norm": 0.3831757605075836, "learning_rate": 1.1237091779658263e-05, "loss": 0.463, "step": 43483 }, { "epoch": 0.9222285847596021, "grad_norm": 0.4331624507904053, "learning_rate": 1.1236760845384768e-05, "loss": 0.4228, "step": 43484 }, { "epoch": 0.9222497932175352, "grad_norm": 0.34399309754371643, "learning_rate": 1.1236429909735761e-05, "loss": 0.5316, "step": 43485 }, { "epoch": 0.9222710016754682, "grad_norm": 0.3962659239768982, "learning_rate": 1.1236098972711605e-05, "loss": 0.5169, "step": 43486 }, { "epoch": 0.9222922101334012, "grad_norm": 0.3627481460571289, "learning_rate": 1.123576803431267e-05, "loss": 0.4733, "step": 43487 }, { "epoch": 0.9223134185913342, "grad_norm": 0.4021240770816803, "learning_rate": 1.1235437094539328e-05, "loss": 0.5219, "step": 43488 }, { "epoch": 0.9223346270492673, "grad_norm": 0.3666212260723114, "learning_rate": 1.1235106153391944e-05, "loss": 0.4912, "step": 43489 }, { "epoch": 0.9223558355072002, "grad_norm": 0.33434081077575684, "learning_rate": 1.1234775210870881e-05, "loss": 0.4212, "step": 43490 }, { "epoch": 0.9223770439651333, "grad_norm": 0.3892562687397003, "learning_rate": 1.1234444266976517e-05, "loss": 0.4506, "step": 43491 }, { "epoch": 0.9223982524230663, "grad_norm": 0.33939823508262634, "learning_rate": 1.123411332170921e-05, "loss": 0.4661, "step": 43492 }, { "epoch": 0.9224194608809994, "grad_norm": 0.33729344606399536, "learning_rate": 1.1233782375069333e-05, "loss": 0.4572, "step": 43493 }, { "epoch": 0.9224406693389323, "grad_norm": 0.38192421197891235, "learning_rate": 1.1233451427057259e-05, "loss": 0.4552, "step": 43494 }, { "epoch": 0.9224618777968654, "grad_norm": 0.4491978585720062, "learning_rate": 1.1233120477673347e-05, "loss": 0.5459, "step": 43495 }, { "epoch": 0.9224830862547985, "grad_norm": 0.4027581810951233, "learning_rate": 1.1232789526917968e-05, "loss": 0.5662, "step": 43496 }, { "epoch": 0.9225042947127314, "grad_norm": 0.40890684723854065, "learning_rate": 1.1232458574791496e-05, "loss": 0.4678, "step": 43497 }, { "epoch": 0.9225255031706645, "grad_norm": 0.3907175362110138, "learning_rate": 1.123212762129429e-05, "loss": 0.4787, "step": 43498 }, { "epoch": 0.9225467116285975, "grad_norm": 0.3639254570007324, "learning_rate": 1.1231796666426724e-05, "loss": 0.4863, "step": 43499 }, { "epoch": 0.9225679200865305, "grad_norm": 0.3517642617225647, "learning_rate": 1.1231465710189164e-05, "loss": 0.5465, "step": 43500 }, { "epoch": 0.9225891285444635, "grad_norm": 0.37815266847610474, "learning_rate": 1.1231134752581978e-05, "loss": 0.4568, "step": 43501 }, { "epoch": 0.9226103370023966, "grad_norm": 0.4207156002521515, "learning_rate": 1.1230803793605539e-05, "loss": 0.4897, "step": 43502 }, { "epoch": 0.9226315454603295, "grad_norm": 0.37101414799690247, "learning_rate": 1.1230472833260207e-05, "loss": 0.4948, "step": 43503 }, { "epoch": 0.9226527539182626, "grad_norm": 0.33463042974472046, "learning_rate": 1.1230141871546352e-05, "loss": 0.4659, "step": 43504 }, { "epoch": 0.9226739623761956, "grad_norm": 0.44522541761398315, "learning_rate": 1.1229810908464345e-05, "loss": 0.4741, "step": 43505 }, { "epoch": 0.9226951708341287, "grad_norm": 0.7254642248153687, "learning_rate": 1.1229479944014554e-05, "loss": 0.5724, "step": 43506 }, { "epoch": 0.9227163792920616, "grad_norm": 0.3920651078224182, "learning_rate": 1.1229148978197346e-05, "loss": 0.535, "step": 43507 }, { "epoch": 0.9227375877499947, "grad_norm": 0.3686843812465668, "learning_rate": 1.122881801101309e-05, "loss": 0.5326, "step": 43508 }, { "epoch": 0.9227587962079277, "grad_norm": 0.33267995715141296, "learning_rate": 1.1228487042462155e-05, "loss": 0.4193, "step": 43509 }, { "epoch": 0.9227800046658607, "grad_norm": 0.36243295669555664, "learning_rate": 1.1228156072544902e-05, "loss": 0.5093, "step": 43510 }, { "epoch": 0.9228012131237938, "grad_norm": 0.36862945556640625, "learning_rate": 1.122782510126171e-05, "loss": 0.5684, "step": 43511 }, { "epoch": 0.9228224215817268, "grad_norm": 0.39173388481140137, "learning_rate": 1.1227494128612936e-05, "loss": 0.4187, "step": 43512 }, { "epoch": 0.9228436300396599, "grad_norm": 0.35797232389450073, "learning_rate": 1.1227163154598956e-05, "loss": 0.5066, "step": 43513 }, { "epoch": 0.9228648384975928, "grad_norm": 0.33775627613067627, "learning_rate": 1.1226832179220137e-05, "loss": 0.3962, "step": 43514 }, { "epoch": 0.9228860469555259, "grad_norm": 0.36915695667266846, "learning_rate": 1.1226501202476846e-05, "loss": 0.457, "step": 43515 }, { "epoch": 0.9229072554134589, "grad_norm": 0.3595311939716339, "learning_rate": 1.122617022436945e-05, "loss": 0.435, "step": 43516 }, { "epoch": 0.9229284638713919, "grad_norm": 0.39027565717697144, "learning_rate": 1.122583924489832e-05, "loss": 0.4824, "step": 43517 }, { "epoch": 0.9229496723293249, "grad_norm": 0.38638609647750854, "learning_rate": 1.1225508264063819e-05, "loss": 0.5903, "step": 43518 }, { "epoch": 0.922970880787258, "grad_norm": 0.40616878867149353, "learning_rate": 1.122517728186632e-05, "loss": 0.4837, "step": 43519 }, { "epoch": 0.9229920892451909, "grad_norm": 0.35046374797821045, "learning_rate": 1.1224846298306192e-05, "loss": 0.3856, "step": 43520 }, { "epoch": 0.923013297703124, "grad_norm": 0.39478570222854614, "learning_rate": 1.1224515313383795e-05, "loss": 0.5609, "step": 43521 }, { "epoch": 0.923034506161057, "grad_norm": 0.35842856764793396, "learning_rate": 1.1224184327099507e-05, "loss": 0.3909, "step": 43522 }, { "epoch": 0.92305571461899, "grad_norm": 0.35324764251708984, "learning_rate": 1.1223853339453693e-05, "loss": 0.5289, "step": 43523 }, { "epoch": 0.9230769230769231, "grad_norm": 0.3436165750026703, "learning_rate": 1.1223522350446714e-05, "loss": 0.4244, "step": 43524 }, { "epoch": 0.9230981315348561, "grad_norm": 0.43983396887779236, "learning_rate": 1.1223191360078949e-05, "loss": 0.5775, "step": 43525 }, { "epoch": 0.9231193399927892, "grad_norm": 0.36511752009391785, "learning_rate": 1.122286036835076e-05, "loss": 0.423, "step": 43526 }, { "epoch": 0.9231405484507221, "grad_norm": 0.5644817352294922, "learning_rate": 1.1222529375262513e-05, "loss": 0.5158, "step": 43527 }, { "epoch": 0.9231617569086552, "grad_norm": 0.3780810534954071, "learning_rate": 1.1222198380814587e-05, "loss": 0.4489, "step": 43528 }, { "epoch": 0.9231829653665882, "grad_norm": 0.3406060039997101, "learning_rate": 1.1221867385007337e-05, "loss": 0.4875, "step": 43529 }, { "epoch": 0.9232041738245212, "grad_norm": 0.40444204211235046, "learning_rate": 1.122153638784114e-05, "loss": 0.4968, "step": 43530 }, { "epoch": 0.9232253822824542, "grad_norm": 0.38011038303375244, "learning_rate": 1.1221205389316357e-05, "loss": 0.474, "step": 43531 }, { "epoch": 0.9232465907403873, "grad_norm": 0.40083062648773193, "learning_rate": 1.1220874389433359e-05, "loss": 0.4794, "step": 43532 }, { "epoch": 0.9232677991983202, "grad_norm": 0.4323796331882477, "learning_rate": 1.122054338819252e-05, "loss": 0.438, "step": 43533 }, { "epoch": 0.9232890076562533, "grad_norm": 0.38776078820228577, "learning_rate": 1.12202123855942e-05, "loss": 0.5042, "step": 43534 }, { "epoch": 0.9233102161141863, "grad_norm": 0.3565954267978668, "learning_rate": 1.121988138163877e-05, "loss": 0.493, "step": 43535 }, { "epoch": 0.9233314245721194, "grad_norm": 0.3577977418899536, "learning_rate": 1.1219550376326603e-05, "loss": 0.4915, "step": 43536 }, { "epoch": 0.9233526330300524, "grad_norm": 0.3205045461654663, "learning_rate": 1.121921936965806e-05, "loss": 0.4208, "step": 43537 }, { "epoch": 0.9233738414879854, "grad_norm": 0.3553527891635895, "learning_rate": 1.121888836163351e-05, "loss": 0.4649, "step": 43538 }, { "epoch": 0.9233950499459185, "grad_norm": 0.36074090003967285, "learning_rate": 1.1218557352253325e-05, "loss": 0.416, "step": 43539 }, { "epoch": 0.9234162584038514, "grad_norm": 0.42121246457099915, "learning_rate": 1.121822634151787e-05, "loss": 0.5502, "step": 43540 }, { "epoch": 0.9234374668617845, "grad_norm": 0.34827330708503723, "learning_rate": 1.1217895329427515e-05, "loss": 0.4586, "step": 43541 }, { "epoch": 0.9234586753197175, "grad_norm": 0.34989064931869507, "learning_rate": 1.1217564315982628e-05, "loss": 0.4832, "step": 43542 }, { "epoch": 0.9234798837776506, "grad_norm": 0.40114283561706543, "learning_rate": 1.1217233301183574e-05, "loss": 0.3948, "step": 43543 }, { "epoch": 0.9235010922355835, "grad_norm": 0.3462248742580414, "learning_rate": 1.1216902285030723e-05, "loss": 0.5234, "step": 43544 }, { "epoch": 0.9235223006935166, "grad_norm": 0.3980633616447449, "learning_rate": 1.121657126752445e-05, "loss": 0.3903, "step": 43545 }, { "epoch": 0.9235435091514496, "grad_norm": 0.33315327763557434, "learning_rate": 1.1216240248665111e-05, "loss": 0.5145, "step": 43546 }, { "epoch": 0.9235647176093826, "grad_norm": 0.3490361273288727, "learning_rate": 1.1215909228453083e-05, "loss": 0.4455, "step": 43547 }, { "epoch": 0.9235859260673156, "grad_norm": 0.37660831212997437, "learning_rate": 1.1215578206888734e-05, "loss": 0.4939, "step": 43548 }, { "epoch": 0.9236071345252487, "grad_norm": 0.42394325137138367, "learning_rate": 1.1215247183972425e-05, "loss": 0.4722, "step": 43549 }, { "epoch": 0.9236283429831816, "grad_norm": 0.3193531930446625, "learning_rate": 1.1214916159704527e-05, "loss": 0.3968, "step": 43550 }, { "epoch": 0.9236495514411147, "grad_norm": 0.34372806549072266, "learning_rate": 1.1214585134085414e-05, "loss": 0.4343, "step": 43551 }, { "epoch": 0.9236707598990478, "grad_norm": 0.5518993139266968, "learning_rate": 1.1214254107115447e-05, "loss": 0.5129, "step": 43552 }, { "epoch": 0.9236919683569808, "grad_norm": 0.3801273703575134, "learning_rate": 1.1213923078795002e-05, "loss": 0.4481, "step": 43553 }, { "epoch": 0.9237131768149138, "grad_norm": 0.37837865948677063, "learning_rate": 1.121359204912444e-05, "loss": 0.4734, "step": 43554 }, { "epoch": 0.9237343852728468, "grad_norm": 0.37529614567756653, "learning_rate": 1.121326101810413e-05, "loss": 0.4795, "step": 43555 }, { "epoch": 0.9237555937307799, "grad_norm": 0.3554553985595703, "learning_rate": 1.121292998573444e-05, "loss": 0.5013, "step": 43556 }, { "epoch": 0.9237768021887128, "grad_norm": 0.36687687039375305, "learning_rate": 1.1212598952015743e-05, "loss": 0.4644, "step": 43557 }, { "epoch": 0.9237980106466459, "grad_norm": 0.3478948473930359, "learning_rate": 1.1212267916948401e-05, "loss": 0.5185, "step": 43558 }, { "epoch": 0.9238192191045789, "grad_norm": 0.32059353590011597, "learning_rate": 1.1211936880532787e-05, "loss": 0.4735, "step": 43559 }, { "epoch": 0.923840427562512, "grad_norm": 0.4059907793998718, "learning_rate": 1.1211605842769268e-05, "loss": 0.4854, "step": 43560 }, { "epoch": 0.9238616360204449, "grad_norm": 0.4211181402206421, "learning_rate": 1.1211274803658212e-05, "loss": 0.4206, "step": 43561 }, { "epoch": 0.923882844478378, "grad_norm": 0.3536883592605591, "learning_rate": 1.1210943763199986e-05, "loss": 0.463, "step": 43562 }, { "epoch": 0.923904052936311, "grad_norm": 0.3385706841945648, "learning_rate": 1.1210612721394958e-05, "loss": 0.487, "step": 43563 }, { "epoch": 0.923925261394244, "grad_norm": 0.33363527059555054, "learning_rate": 1.1210281678243496e-05, "loss": 0.4459, "step": 43564 }, { "epoch": 0.9239464698521771, "grad_norm": 0.5647611618041992, "learning_rate": 1.1209950633745971e-05, "loss": 0.5184, "step": 43565 }, { "epoch": 0.9239676783101101, "grad_norm": 0.43859627842903137, "learning_rate": 1.1209619587902748e-05, "loss": 0.5504, "step": 43566 }, { "epoch": 0.9239888867680431, "grad_norm": 0.5575637817382812, "learning_rate": 1.12092885407142e-05, "loss": 0.4412, "step": 43567 }, { "epoch": 0.9240100952259761, "grad_norm": 0.40114372968673706, "learning_rate": 1.120895749218069e-05, "loss": 0.4478, "step": 43568 }, { "epoch": 0.9240313036839092, "grad_norm": 0.33401310443878174, "learning_rate": 1.1208626442302586e-05, "loss": 0.4367, "step": 43569 }, { "epoch": 0.9240525121418421, "grad_norm": 0.3421763479709625, "learning_rate": 1.1208295391080259e-05, "loss": 0.4777, "step": 43570 }, { "epoch": 0.9240737205997752, "grad_norm": 0.4020228683948517, "learning_rate": 1.120796433851408e-05, "loss": 0.5373, "step": 43571 }, { "epoch": 0.9240949290577082, "grad_norm": 0.7408232092857361, "learning_rate": 1.1207633284604408e-05, "loss": 0.601, "step": 43572 }, { "epoch": 0.9241161375156413, "grad_norm": 0.32707446813583374, "learning_rate": 1.1207302229351622e-05, "loss": 0.475, "step": 43573 }, { "epoch": 0.9241373459735742, "grad_norm": 0.35414156317710876, "learning_rate": 1.120697117275608e-05, "loss": 0.4903, "step": 43574 }, { "epoch": 0.9241585544315073, "grad_norm": 0.3673195242881775, "learning_rate": 1.1206640114818158e-05, "loss": 0.4892, "step": 43575 }, { "epoch": 0.9241797628894403, "grad_norm": 0.3767816126346588, "learning_rate": 1.120630905553822e-05, "loss": 0.5965, "step": 43576 }, { "epoch": 0.9242009713473733, "grad_norm": 0.3413042426109314, "learning_rate": 1.1205977994916639e-05, "loss": 0.5039, "step": 43577 }, { "epoch": 0.9242221798053064, "grad_norm": 0.3738168776035309, "learning_rate": 1.1205646932953775e-05, "loss": 0.4878, "step": 43578 }, { "epoch": 0.9242433882632394, "grad_norm": 0.356973797082901, "learning_rate": 1.1205315869650005e-05, "loss": 0.4911, "step": 43579 }, { "epoch": 0.9242645967211724, "grad_norm": 0.38270556926727295, "learning_rate": 1.120498480500569e-05, "loss": 0.5306, "step": 43580 }, { "epoch": 0.9242858051791054, "grad_norm": 0.3775794506072998, "learning_rate": 1.1204653739021203e-05, "loss": 0.5237, "step": 43581 }, { "epoch": 0.9243070136370385, "grad_norm": 0.4066604673862457, "learning_rate": 1.120432267169691e-05, "loss": 0.5808, "step": 43582 }, { "epoch": 0.9243282220949715, "grad_norm": 0.39765650033950806, "learning_rate": 1.1203991603033177e-05, "loss": 0.5676, "step": 43583 }, { "epoch": 0.9243494305529045, "grad_norm": 0.3568502366542816, "learning_rate": 1.1203660533030381e-05, "loss": 0.4384, "step": 43584 }, { "epoch": 0.9243706390108375, "grad_norm": 0.32114675641059875, "learning_rate": 1.1203329461688882e-05, "loss": 0.3627, "step": 43585 }, { "epoch": 0.9243918474687706, "grad_norm": 0.37654078006744385, "learning_rate": 1.1202998389009048e-05, "loss": 0.332, "step": 43586 }, { "epoch": 0.9244130559267035, "grad_norm": 0.32261037826538086, "learning_rate": 1.1202667314991253e-05, "loss": 0.511, "step": 43587 }, { "epoch": 0.9244342643846366, "grad_norm": 0.3624255061149597, "learning_rate": 1.1202336239635858e-05, "loss": 0.457, "step": 43588 }, { "epoch": 0.9244554728425696, "grad_norm": 0.336774080991745, "learning_rate": 1.1202005162943238e-05, "loss": 0.4927, "step": 43589 }, { "epoch": 0.9244766813005026, "grad_norm": 0.38126716017723083, "learning_rate": 1.1201674084913756e-05, "loss": 0.4692, "step": 43590 }, { "epoch": 0.9244978897584356, "grad_norm": 0.3412570059299469, "learning_rate": 1.1201343005547786e-05, "loss": 0.4966, "step": 43591 }, { "epoch": 0.9245190982163687, "grad_norm": 0.37787601351737976, "learning_rate": 1.120101192484569e-05, "loss": 0.3891, "step": 43592 }, { "epoch": 0.9245403066743018, "grad_norm": 0.34087592363357544, "learning_rate": 1.120068084280784e-05, "loss": 0.5052, "step": 43593 }, { "epoch": 0.9245615151322347, "grad_norm": 0.3889559507369995, "learning_rate": 1.1200349759434603e-05, "loss": 0.4565, "step": 43594 }, { "epoch": 0.9245827235901678, "grad_norm": 0.3673120439052582, "learning_rate": 1.1200018674726344e-05, "loss": 0.5214, "step": 43595 }, { "epoch": 0.9246039320481008, "grad_norm": 0.3948303461074829, "learning_rate": 1.1199687588683441e-05, "loss": 0.5979, "step": 43596 }, { "epoch": 0.9246251405060338, "grad_norm": 0.39407262206077576, "learning_rate": 1.1199356501306251e-05, "loss": 0.5706, "step": 43597 }, { "epoch": 0.9246463489639668, "grad_norm": 0.3941916525363922, "learning_rate": 1.119902541259515e-05, "loss": 0.5833, "step": 43598 }, { "epoch": 0.9246675574218999, "grad_norm": 0.4006603956222534, "learning_rate": 1.1198694322550506e-05, "loss": 0.5068, "step": 43599 }, { "epoch": 0.9246887658798328, "grad_norm": 0.37418508529663086, "learning_rate": 1.1198363231172677e-05, "loss": 0.5089, "step": 43600 }, { "epoch": 0.9247099743377659, "grad_norm": 0.3952227234840393, "learning_rate": 1.1198032138462043e-05, "loss": 0.5104, "step": 43601 }, { "epoch": 0.9247311827956989, "grad_norm": 0.42045965790748596, "learning_rate": 1.1197701044418969e-05, "loss": 0.4995, "step": 43602 }, { "epoch": 0.924752391253632, "grad_norm": 0.433062344789505, "learning_rate": 1.119736994904382e-05, "loss": 0.481, "step": 43603 }, { "epoch": 0.9247735997115649, "grad_norm": 0.3604080379009247, "learning_rate": 1.1197038852336968e-05, "loss": 0.5557, "step": 43604 }, { "epoch": 0.924794808169498, "grad_norm": 0.4704994261264801, "learning_rate": 1.119670775429878e-05, "loss": 0.4171, "step": 43605 }, { "epoch": 0.9248160166274311, "grad_norm": 0.4067002832889557, "learning_rate": 1.1196376654929623e-05, "loss": 0.5081, "step": 43606 }, { "epoch": 0.924837225085364, "grad_norm": 0.35766416788101196, "learning_rate": 1.1196045554229869e-05, "loss": 0.4136, "step": 43607 }, { "epoch": 0.9248584335432971, "grad_norm": 0.43752509355545044, "learning_rate": 1.119571445219988e-05, "loss": 0.4935, "step": 43608 }, { "epoch": 0.9248796420012301, "grad_norm": 0.38653627038002014, "learning_rate": 1.1195383348840028e-05, "loss": 0.5083, "step": 43609 }, { "epoch": 0.9249008504591631, "grad_norm": 0.34530940651893616, "learning_rate": 1.1195052244150684e-05, "loss": 0.4925, "step": 43610 }, { "epoch": 0.9249220589170961, "grad_norm": 0.4051710367202759, "learning_rate": 1.1194721138132209e-05, "loss": 0.5262, "step": 43611 }, { "epoch": 0.9249432673750292, "grad_norm": 0.431893914937973, "learning_rate": 1.1194390030784979e-05, "loss": 0.4787, "step": 43612 }, { "epoch": 0.9249644758329622, "grad_norm": 0.35499367117881775, "learning_rate": 1.1194058922109359e-05, "loss": 0.4475, "step": 43613 }, { "epoch": 0.9249856842908952, "grad_norm": 0.3504129648208618, "learning_rate": 1.1193727812105713e-05, "loss": 0.4322, "step": 43614 }, { "epoch": 0.9250068927488282, "grad_norm": 0.35906460881233215, "learning_rate": 1.1193396700774417e-05, "loss": 0.509, "step": 43615 }, { "epoch": 0.9250281012067613, "grad_norm": 0.3870556950569153, "learning_rate": 1.1193065588115835e-05, "loss": 0.468, "step": 43616 }, { "epoch": 0.9250493096646942, "grad_norm": 0.38449183106422424, "learning_rate": 1.1192734474130333e-05, "loss": 0.4916, "step": 43617 }, { "epoch": 0.9250705181226273, "grad_norm": 0.33239856362342834, "learning_rate": 1.1192403358818287e-05, "loss": 0.4341, "step": 43618 }, { "epoch": 0.9250917265805604, "grad_norm": 0.41244959831237793, "learning_rate": 1.1192072242180058e-05, "loss": 0.4943, "step": 43619 }, { "epoch": 0.9251129350384933, "grad_norm": 0.32614198327064514, "learning_rate": 1.1191741124216015e-05, "loss": 0.4415, "step": 43620 }, { "epoch": 0.9251341434964264, "grad_norm": 0.38543274998664856, "learning_rate": 1.119141000492653e-05, "loss": 0.4855, "step": 43621 }, { "epoch": 0.9251553519543594, "grad_norm": 0.414815753698349, "learning_rate": 1.119107888431197e-05, "loss": 0.5204, "step": 43622 }, { "epoch": 0.9251765604122925, "grad_norm": 0.32698002457618713, "learning_rate": 1.1190747762372698e-05, "loss": 0.3869, "step": 43623 }, { "epoch": 0.9251977688702254, "grad_norm": 0.36817264556884766, "learning_rate": 1.1190416639109092e-05, "loss": 0.4955, "step": 43624 }, { "epoch": 0.9252189773281585, "grad_norm": 0.34109845757484436, "learning_rate": 1.1190085514521511e-05, "loss": 0.5315, "step": 43625 }, { "epoch": 0.9252401857860915, "grad_norm": 0.4183289706707001, "learning_rate": 1.118975438861033e-05, "loss": 0.5104, "step": 43626 }, { "epoch": 0.9252613942440245, "grad_norm": 0.36563006043434143, "learning_rate": 1.1189423261375912e-05, "loss": 0.4802, "step": 43627 }, { "epoch": 0.9252826027019575, "grad_norm": 0.527614176273346, "learning_rate": 1.118909213281863e-05, "loss": 0.5378, "step": 43628 }, { "epoch": 0.9253038111598906, "grad_norm": 0.3508216142654419, "learning_rate": 1.1188761002938849e-05, "loss": 0.4631, "step": 43629 }, { "epoch": 0.9253250196178235, "grad_norm": 0.422597736120224, "learning_rate": 1.1188429871736938e-05, "loss": 0.5, "step": 43630 }, { "epoch": 0.9253462280757566, "grad_norm": 0.3761078119277954, "learning_rate": 1.1188098739213265e-05, "loss": 0.4681, "step": 43631 }, { "epoch": 0.9253674365336896, "grad_norm": 0.3728737235069275, "learning_rate": 1.1187767605368201e-05, "loss": 0.4727, "step": 43632 }, { "epoch": 0.9253886449916227, "grad_norm": 0.38697901368141174, "learning_rate": 1.1187436470202112e-05, "loss": 0.4438, "step": 43633 }, { "epoch": 0.9254098534495557, "grad_norm": 0.35330861806869507, "learning_rate": 1.1187105333715362e-05, "loss": 0.4444, "step": 43634 }, { "epoch": 0.9254310619074887, "grad_norm": 0.4486718773841858, "learning_rate": 1.118677419590833e-05, "loss": 0.5569, "step": 43635 }, { "epoch": 0.9254522703654218, "grad_norm": 0.3921929597854614, "learning_rate": 1.1186443056781375e-05, "loss": 0.5064, "step": 43636 }, { "epoch": 0.9254734788233547, "grad_norm": 0.3486374318599701, "learning_rate": 1.1186111916334867e-05, "loss": 0.458, "step": 43637 }, { "epoch": 0.9254946872812878, "grad_norm": 0.3175175189971924, "learning_rate": 1.118578077456918e-05, "loss": 0.4304, "step": 43638 }, { "epoch": 0.9255158957392208, "grad_norm": 0.513038158416748, "learning_rate": 1.1185449631484675e-05, "loss": 0.5011, "step": 43639 }, { "epoch": 0.9255371041971538, "grad_norm": 0.5573148131370544, "learning_rate": 1.118511848708172e-05, "loss": 0.488, "step": 43640 }, { "epoch": 0.9255583126550868, "grad_norm": 0.36934414505958557, "learning_rate": 1.1184787341360693e-05, "loss": 0.5272, "step": 43641 }, { "epoch": 0.9255795211130199, "grad_norm": 0.6399467587471008, "learning_rate": 1.118445619432195e-05, "loss": 0.5349, "step": 43642 }, { "epoch": 0.9256007295709529, "grad_norm": 0.3820597231388092, "learning_rate": 1.118412504596587e-05, "loss": 0.476, "step": 43643 }, { "epoch": 0.9256219380288859, "grad_norm": 0.4854222536087036, "learning_rate": 1.1183793896292816e-05, "loss": 0.5366, "step": 43644 }, { "epoch": 0.9256431464868189, "grad_norm": 0.4289586842060089, "learning_rate": 1.1183462745303154e-05, "loss": 0.4811, "step": 43645 }, { "epoch": 0.925664354944752, "grad_norm": 0.37269338965415955, "learning_rate": 1.1183131592997254e-05, "loss": 0.4931, "step": 43646 }, { "epoch": 0.925685563402685, "grad_norm": 0.3653879761695862, "learning_rate": 1.118280043937549e-05, "loss": 0.5246, "step": 43647 }, { "epoch": 0.925706771860618, "grad_norm": 0.35127362608909607, "learning_rate": 1.1182469284438223e-05, "loss": 0.4545, "step": 43648 }, { "epoch": 0.9257279803185511, "grad_norm": 0.35634660720825195, "learning_rate": 1.1182138128185824e-05, "loss": 0.4506, "step": 43649 }, { "epoch": 0.925749188776484, "grad_norm": 0.42265546321868896, "learning_rate": 1.1181806970618663e-05, "loss": 0.5206, "step": 43650 }, { "epoch": 0.9257703972344171, "grad_norm": 0.4092414975166321, "learning_rate": 1.11814758117371e-05, "loss": 0.5813, "step": 43651 }, { "epoch": 0.9257916056923501, "grad_norm": 0.4189303517341614, "learning_rate": 1.1181144651541518e-05, "loss": 0.5472, "step": 43652 }, { "epoch": 0.9258128141502832, "grad_norm": 0.3449295461177826, "learning_rate": 1.1180813490032274e-05, "loss": 0.4129, "step": 43653 }, { "epoch": 0.9258340226082161, "grad_norm": 0.4061060845851898, "learning_rate": 1.1180482327209737e-05, "loss": 0.555, "step": 43654 }, { "epoch": 0.9258552310661492, "grad_norm": 0.35051229596138, "learning_rate": 1.1180151163074282e-05, "loss": 0.4914, "step": 43655 }, { "epoch": 0.9258764395240822, "grad_norm": 0.37431636452674866, "learning_rate": 1.1179819997626269e-05, "loss": 0.415, "step": 43656 }, { "epoch": 0.9258976479820152, "grad_norm": 0.3882477879524231, "learning_rate": 1.1179488830866074e-05, "loss": 0.4804, "step": 43657 }, { "epoch": 0.9259188564399482, "grad_norm": 0.3491547107696533, "learning_rate": 1.1179157662794062e-05, "loss": 0.4738, "step": 43658 }, { "epoch": 0.9259400648978813, "grad_norm": 0.3647732734680176, "learning_rate": 1.11788264934106e-05, "loss": 0.3954, "step": 43659 }, { "epoch": 0.9259612733558144, "grad_norm": 0.46368837356567383, "learning_rate": 1.1178495322716054e-05, "loss": 0.4409, "step": 43660 }, { "epoch": 0.9259824818137473, "grad_norm": 0.3475210666656494, "learning_rate": 1.11781641507108e-05, "loss": 0.4573, "step": 43661 }, { "epoch": 0.9260036902716804, "grad_norm": 0.3797849714756012, "learning_rate": 1.11778329773952e-05, "loss": 0.4992, "step": 43662 }, { "epoch": 0.9260248987296134, "grad_norm": 0.4342578649520874, "learning_rate": 1.1177501802769627e-05, "loss": 0.3922, "step": 43663 }, { "epoch": 0.9260461071875464, "grad_norm": 0.33712446689605713, "learning_rate": 1.1177170626834445e-05, "loss": 0.4512, "step": 43664 }, { "epoch": 0.9260673156454794, "grad_norm": 0.37211450934410095, "learning_rate": 1.117683944959002e-05, "loss": 0.5518, "step": 43665 }, { "epoch": 0.9260885241034125, "grad_norm": 0.3329183757305145, "learning_rate": 1.117650827103673e-05, "loss": 0.4527, "step": 43666 }, { "epoch": 0.9261097325613454, "grad_norm": 0.3316839039325714, "learning_rate": 1.1176177091174938e-05, "loss": 0.3927, "step": 43667 }, { "epoch": 0.9261309410192785, "grad_norm": 0.38723400235176086, "learning_rate": 1.1175845910005007e-05, "loss": 0.4897, "step": 43668 }, { "epoch": 0.9261521494772115, "grad_norm": 0.3834591209888458, "learning_rate": 1.1175514727527317e-05, "loss": 0.4592, "step": 43669 }, { "epoch": 0.9261733579351445, "grad_norm": 0.33337193727493286, "learning_rate": 1.1175183543742222e-05, "loss": 0.4339, "step": 43670 }, { "epoch": 0.9261945663930775, "grad_norm": 0.4144441783428192, "learning_rate": 1.1174852358650105e-05, "loss": 0.5666, "step": 43671 }, { "epoch": 0.9262157748510106, "grad_norm": 1.0094493627548218, "learning_rate": 1.1174521172251324e-05, "loss": 0.5091, "step": 43672 }, { "epoch": 0.9262369833089437, "grad_norm": 0.3875858783721924, "learning_rate": 1.1174189984546253e-05, "loss": 0.485, "step": 43673 }, { "epoch": 0.9262581917668766, "grad_norm": 0.3864501118659973, "learning_rate": 1.1173858795535255e-05, "loss": 0.5656, "step": 43674 }, { "epoch": 0.9262794002248097, "grad_norm": 0.349749356508255, "learning_rate": 1.1173527605218703e-05, "loss": 0.4888, "step": 43675 }, { "epoch": 0.9263006086827427, "grad_norm": 0.3897334635257721, "learning_rate": 1.1173196413596963e-05, "loss": 0.4719, "step": 43676 }, { "epoch": 0.9263218171406757, "grad_norm": 0.3570828139781952, "learning_rate": 1.1172865220670408e-05, "loss": 0.5021, "step": 43677 }, { "epoch": 0.9263430255986087, "grad_norm": 0.4499415457248688, "learning_rate": 1.1172534026439396e-05, "loss": 0.4965, "step": 43678 }, { "epoch": 0.9263642340565418, "grad_norm": 0.3674089014530182, "learning_rate": 1.1172202830904307e-05, "loss": 0.5056, "step": 43679 }, { "epoch": 0.9263854425144747, "grad_norm": 0.36248183250427246, "learning_rate": 1.1171871634065501e-05, "loss": 0.5628, "step": 43680 }, { "epoch": 0.9264066509724078, "grad_norm": 0.37974923849105835, "learning_rate": 1.1171540435923354e-05, "loss": 0.4674, "step": 43681 }, { "epoch": 0.9264278594303408, "grad_norm": 0.34130993485450745, "learning_rate": 1.1171209236478225e-05, "loss": 0.4787, "step": 43682 }, { "epoch": 0.9264490678882739, "grad_norm": 0.3866746425628662, "learning_rate": 1.117087803573049e-05, "loss": 0.5099, "step": 43683 }, { "epoch": 0.9264702763462068, "grad_norm": 0.3397587537765503, "learning_rate": 1.1170546833680513e-05, "loss": 0.4328, "step": 43684 }, { "epoch": 0.9264914848041399, "grad_norm": 0.4410659074783325, "learning_rate": 1.1170215630328664e-05, "loss": 0.4836, "step": 43685 }, { "epoch": 0.9265126932620729, "grad_norm": 0.40245503187179565, "learning_rate": 1.1169884425675312e-05, "loss": 0.452, "step": 43686 }, { "epoch": 0.9265339017200059, "grad_norm": 0.36404991149902344, "learning_rate": 1.1169553219720828e-05, "loss": 0.486, "step": 43687 }, { "epoch": 0.926555110177939, "grad_norm": 0.33391085267066956, "learning_rate": 1.1169222012465572e-05, "loss": 0.4415, "step": 43688 }, { "epoch": 0.926576318635872, "grad_norm": 0.35801756381988525, "learning_rate": 1.116889080390992e-05, "loss": 0.4469, "step": 43689 }, { "epoch": 0.926597527093805, "grad_norm": 0.3989585340023041, "learning_rate": 1.1168559594054236e-05, "loss": 0.4419, "step": 43690 }, { "epoch": 0.926618735551738, "grad_norm": 0.3666614890098572, "learning_rate": 1.1168228382898892e-05, "loss": 0.4831, "step": 43691 }, { "epoch": 0.9266399440096711, "grad_norm": 0.4659578800201416, "learning_rate": 1.1167897170444253e-05, "loss": 0.5855, "step": 43692 }, { "epoch": 0.926661152467604, "grad_norm": 0.3493086099624634, "learning_rate": 1.1167565956690689e-05, "loss": 0.4533, "step": 43693 }, { "epoch": 0.9266823609255371, "grad_norm": 0.3306797444820404, "learning_rate": 1.116723474163857e-05, "loss": 0.4881, "step": 43694 }, { "epoch": 0.9267035693834701, "grad_norm": 0.403274267911911, "learning_rate": 1.1166903525288265e-05, "loss": 0.4771, "step": 43695 }, { "epoch": 0.9267247778414032, "grad_norm": 0.447395384311676, "learning_rate": 1.1166572307640133e-05, "loss": 0.5388, "step": 43696 }, { "epoch": 0.9267459862993361, "grad_norm": 0.3639444410800934, "learning_rate": 1.1166241088694557e-05, "loss": 0.4779, "step": 43697 }, { "epoch": 0.9267671947572692, "grad_norm": 0.38299137353897095, "learning_rate": 1.1165909868451892e-05, "loss": 0.4569, "step": 43698 }, { "epoch": 0.9267884032152022, "grad_norm": 0.35173019766807556, "learning_rate": 1.1165578646912511e-05, "loss": 0.4224, "step": 43699 }, { "epoch": 0.9268096116731352, "grad_norm": 0.3474629819393158, "learning_rate": 1.1165247424076789e-05, "loss": 0.4985, "step": 43700 }, { "epoch": 0.9268308201310683, "grad_norm": 0.36322519183158875, "learning_rate": 1.1164916199945088e-05, "loss": 0.4713, "step": 43701 }, { "epoch": 0.9268520285890013, "grad_norm": 0.3541630208492279, "learning_rate": 1.1164584974517773e-05, "loss": 0.4774, "step": 43702 }, { "epoch": 0.9268732370469344, "grad_norm": 0.3912941813468933, "learning_rate": 1.1164253747795222e-05, "loss": 0.5692, "step": 43703 }, { "epoch": 0.9268944455048673, "grad_norm": 0.3419255018234253, "learning_rate": 1.1163922519777794e-05, "loss": 0.5367, "step": 43704 }, { "epoch": 0.9269156539628004, "grad_norm": 0.3885048031806946, "learning_rate": 1.1163591290465863e-05, "loss": 0.4331, "step": 43705 }, { "epoch": 0.9269368624207334, "grad_norm": 0.36521461606025696, "learning_rate": 1.1163260059859797e-05, "loss": 0.4736, "step": 43706 }, { "epoch": 0.9269580708786664, "grad_norm": 0.5681519508361816, "learning_rate": 1.116292882795996e-05, "loss": 0.4077, "step": 43707 }, { "epoch": 0.9269792793365994, "grad_norm": 0.3576832711696625, "learning_rate": 1.116259759476673e-05, "loss": 0.4477, "step": 43708 }, { "epoch": 0.9270004877945325, "grad_norm": 0.3872455060482025, "learning_rate": 1.1162266360280465e-05, "loss": 0.4654, "step": 43709 }, { "epoch": 0.9270216962524654, "grad_norm": 0.3682273328304291, "learning_rate": 1.1161935124501532e-05, "loss": 0.4902, "step": 43710 }, { "epoch": 0.9270429047103985, "grad_norm": 0.34865602850914, "learning_rate": 1.1161603887430312e-05, "loss": 0.4457, "step": 43711 }, { "epoch": 0.9270641131683315, "grad_norm": 1.5821449756622314, "learning_rate": 1.1161272649067168e-05, "loss": 0.378, "step": 43712 }, { "epoch": 0.9270853216262646, "grad_norm": 0.35247695446014404, "learning_rate": 1.1160941409412463e-05, "loss": 0.4269, "step": 43713 }, { "epoch": 0.9271065300841976, "grad_norm": 0.3696478307247162, "learning_rate": 1.1160610168466568e-05, "loss": 0.4694, "step": 43714 }, { "epoch": 0.9271277385421306, "grad_norm": 0.34327569603919983, "learning_rate": 1.1160278926229856e-05, "loss": 0.4615, "step": 43715 }, { "epoch": 0.9271489470000637, "grad_norm": 0.3702429234981537, "learning_rate": 1.1159947682702687e-05, "loss": 0.5859, "step": 43716 }, { "epoch": 0.9271701554579966, "grad_norm": 0.423324316740036, "learning_rate": 1.1159616437885437e-05, "loss": 0.4223, "step": 43717 }, { "epoch": 0.9271913639159297, "grad_norm": 0.35198235511779785, "learning_rate": 1.1159285191778473e-05, "loss": 0.4345, "step": 43718 }, { "epoch": 0.9272125723738627, "grad_norm": 0.37146487832069397, "learning_rate": 1.115895394438216e-05, "loss": 0.5024, "step": 43719 }, { "epoch": 0.9272337808317958, "grad_norm": 0.35661110281944275, "learning_rate": 1.1158622695696871e-05, "loss": 0.4643, "step": 43720 }, { "epoch": 0.9272549892897287, "grad_norm": 0.3561628460884094, "learning_rate": 1.1158291445722967e-05, "loss": 0.4908, "step": 43721 }, { "epoch": 0.9272761977476618, "grad_norm": 0.35342085361480713, "learning_rate": 1.1157960194460826e-05, "loss": 0.4029, "step": 43722 }, { "epoch": 0.9272974062055948, "grad_norm": 0.49539825320243835, "learning_rate": 1.115762894191081e-05, "loss": 0.4627, "step": 43723 }, { "epoch": 0.9273186146635278, "grad_norm": 0.3910146951675415, "learning_rate": 1.1157297688073287e-05, "loss": 0.5224, "step": 43724 }, { "epoch": 0.9273398231214608, "grad_norm": 0.39705023169517517, "learning_rate": 1.115696643294863e-05, "loss": 0.4938, "step": 43725 }, { "epoch": 0.9273610315793939, "grad_norm": 0.36895957589149475, "learning_rate": 1.1156635176537208e-05, "loss": 0.5483, "step": 43726 }, { "epoch": 0.9273822400373268, "grad_norm": 0.41660428047180176, "learning_rate": 1.1156303918839381e-05, "loss": 0.4761, "step": 43727 }, { "epoch": 0.9274034484952599, "grad_norm": 0.3797794580459595, "learning_rate": 1.1155972659855525e-05, "loss": 0.4872, "step": 43728 }, { "epoch": 0.927424656953193, "grad_norm": 0.5279995203018188, "learning_rate": 1.1155641399586008e-05, "loss": 0.6062, "step": 43729 }, { "epoch": 0.927445865411126, "grad_norm": 0.3784652650356293, "learning_rate": 1.1155310138031192e-05, "loss": 0.4949, "step": 43730 }, { "epoch": 0.927467073869059, "grad_norm": 0.3849968910217285, "learning_rate": 1.1154978875191454e-05, "loss": 0.4989, "step": 43731 }, { "epoch": 0.927488282326992, "grad_norm": 0.41362884640693665, "learning_rate": 1.115464761106716e-05, "loss": 0.5165, "step": 43732 }, { "epoch": 0.9275094907849251, "grad_norm": 0.335344523191452, "learning_rate": 1.1154316345658674e-05, "loss": 0.4348, "step": 43733 }, { "epoch": 0.927530699242858, "grad_norm": 0.39223864674568176, "learning_rate": 1.1153985078966367e-05, "loss": 0.5868, "step": 43734 }, { "epoch": 0.9275519077007911, "grad_norm": 0.3438566029071808, "learning_rate": 1.1153653810990607e-05, "loss": 0.5013, "step": 43735 }, { "epoch": 0.9275731161587241, "grad_norm": 0.3558944761753082, "learning_rate": 1.1153322541731765e-05, "loss": 0.4789, "step": 43736 }, { "epoch": 0.9275943246166571, "grad_norm": 0.3773409128189087, "learning_rate": 1.115299127119021e-05, "loss": 0.5376, "step": 43737 }, { "epoch": 0.9276155330745901, "grad_norm": 0.3665490448474884, "learning_rate": 1.1152659999366304e-05, "loss": 0.4679, "step": 43738 }, { "epoch": 0.9276367415325232, "grad_norm": 0.38730373978614807, "learning_rate": 1.1152328726260421e-05, "loss": 0.5227, "step": 43739 }, { "epoch": 0.9276579499904561, "grad_norm": 0.385370671749115, "learning_rate": 1.115199745187293e-05, "loss": 0.5372, "step": 43740 }, { "epoch": 0.9276791584483892, "grad_norm": 0.3405974209308624, "learning_rate": 1.1151666176204194e-05, "loss": 0.4316, "step": 43741 }, { "epoch": 0.9277003669063223, "grad_norm": 0.5646700859069824, "learning_rate": 1.1151334899254586e-05, "loss": 0.4886, "step": 43742 }, { "epoch": 0.9277215753642553, "grad_norm": 0.3547160029411316, "learning_rate": 1.1151003621024476e-05, "loss": 0.5679, "step": 43743 }, { "epoch": 0.9277427838221883, "grad_norm": 1.020621657371521, "learning_rate": 1.1150672341514226e-05, "loss": 0.4737, "step": 43744 }, { "epoch": 0.9277639922801213, "grad_norm": 0.3307172358036041, "learning_rate": 1.1150341060724213e-05, "loss": 0.4936, "step": 43745 }, { "epoch": 0.9277852007380544, "grad_norm": 0.5089271664619446, "learning_rate": 1.1150009778654798e-05, "loss": 0.4582, "step": 43746 }, { "epoch": 0.9278064091959873, "grad_norm": 0.3588918149471283, "learning_rate": 1.114967849530635e-05, "loss": 0.483, "step": 43747 }, { "epoch": 0.9278276176539204, "grad_norm": 0.3568737506866455, "learning_rate": 1.1149347210679243e-05, "loss": 0.5198, "step": 43748 }, { "epoch": 0.9278488261118534, "grad_norm": 0.35531240701675415, "learning_rate": 1.1149015924773838e-05, "loss": 0.4635, "step": 43749 }, { "epoch": 0.9278700345697865, "grad_norm": 0.380957692861557, "learning_rate": 1.114868463759051e-05, "loss": 0.4289, "step": 43750 }, { "epoch": 0.9278912430277194, "grad_norm": 0.36046287417411804, "learning_rate": 1.1148353349129626e-05, "loss": 0.4912, "step": 43751 }, { "epoch": 0.9279124514856525, "grad_norm": 0.4325449466705322, "learning_rate": 1.114802205939155e-05, "loss": 0.4903, "step": 43752 }, { "epoch": 0.9279336599435855, "grad_norm": 0.337006539106369, "learning_rate": 1.1147690768376657e-05, "loss": 0.4218, "step": 43753 }, { "epoch": 0.9279548684015185, "grad_norm": 0.3523862659931183, "learning_rate": 1.1147359476085312e-05, "loss": 0.5176, "step": 43754 }, { "epoch": 0.9279760768594516, "grad_norm": 0.500092625617981, "learning_rate": 1.1147028182517882e-05, "loss": 0.4057, "step": 43755 }, { "epoch": 0.9279972853173846, "grad_norm": 0.40409159660339355, "learning_rate": 1.114669688767474e-05, "loss": 0.5335, "step": 43756 }, { "epoch": 0.9280184937753176, "grad_norm": 0.37950921058654785, "learning_rate": 1.114636559155625e-05, "loss": 0.4647, "step": 43757 }, { "epoch": 0.9280397022332506, "grad_norm": 0.3355654180049896, "learning_rate": 1.114603429416278e-05, "loss": 0.4378, "step": 43758 }, { "epoch": 0.9280609106911837, "grad_norm": 0.3668729066848755, "learning_rate": 1.1145702995494705e-05, "loss": 0.4925, "step": 43759 }, { "epoch": 0.9280821191491166, "grad_norm": 0.37263399362564087, "learning_rate": 1.1145371695552388e-05, "loss": 0.4402, "step": 43760 }, { "epoch": 0.9281033276070497, "grad_norm": 0.3855763077735901, "learning_rate": 1.1145040394336196e-05, "loss": 0.424, "step": 43761 }, { "epoch": 0.9281245360649827, "grad_norm": 0.36072009801864624, "learning_rate": 1.11447090918465e-05, "loss": 0.5006, "step": 43762 }, { "epoch": 0.9281457445229158, "grad_norm": 0.3512305021286011, "learning_rate": 1.114437778808367e-05, "loss": 0.4361, "step": 43763 }, { "epoch": 0.9281669529808487, "grad_norm": 0.3637232780456543, "learning_rate": 1.1144046483048071e-05, "loss": 0.517, "step": 43764 }, { "epoch": 0.9281881614387818, "grad_norm": 0.40125805139541626, "learning_rate": 1.1143715176740078e-05, "loss": 0.5901, "step": 43765 }, { "epoch": 0.9282093698967148, "grad_norm": 0.33263853192329407, "learning_rate": 1.114338386916005e-05, "loss": 0.4438, "step": 43766 }, { "epoch": 0.9282305783546478, "grad_norm": 0.38402053713798523, "learning_rate": 1.1143052560308365e-05, "loss": 0.4727, "step": 43767 }, { "epoch": 0.9282517868125808, "grad_norm": 0.37869611382484436, "learning_rate": 1.1142721250185382e-05, "loss": 0.5488, "step": 43768 }, { "epoch": 0.9282729952705139, "grad_norm": 0.3533838987350464, "learning_rate": 1.1142389938791478e-05, "loss": 0.526, "step": 43769 }, { "epoch": 0.928294203728447, "grad_norm": 0.36713480949401855, "learning_rate": 1.1142058626127017e-05, "loss": 0.3731, "step": 43770 }, { "epoch": 0.9283154121863799, "grad_norm": 0.33717361092567444, "learning_rate": 1.1141727312192367e-05, "loss": 0.3848, "step": 43771 }, { "epoch": 0.928336620644313, "grad_norm": 0.4083094894886017, "learning_rate": 1.1141395996987899e-05, "loss": 0.5299, "step": 43772 }, { "epoch": 0.928357829102246, "grad_norm": 0.34442976117134094, "learning_rate": 1.1141064680513982e-05, "loss": 0.5011, "step": 43773 }, { "epoch": 0.928379037560179, "grad_norm": 0.34804290533065796, "learning_rate": 1.1140733362770979e-05, "loss": 0.4734, "step": 43774 }, { "epoch": 0.928400246018112, "grad_norm": 0.38823285698890686, "learning_rate": 1.1140402043759262e-05, "loss": 0.513, "step": 43775 }, { "epoch": 0.9284214544760451, "grad_norm": 0.40367671847343445, "learning_rate": 1.1140070723479204e-05, "loss": 0.4411, "step": 43776 }, { "epoch": 0.928442662933978, "grad_norm": 0.42577093839645386, "learning_rate": 1.1139739401931168e-05, "loss": 0.5578, "step": 43777 }, { "epoch": 0.9284638713919111, "grad_norm": 0.36994433403015137, "learning_rate": 1.113940807911552e-05, "loss": 0.4917, "step": 43778 }, { "epoch": 0.9284850798498441, "grad_norm": 0.3292524814605713, "learning_rate": 1.1139076755032639e-05, "loss": 0.3535, "step": 43779 }, { "epoch": 0.9285062883077772, "grad_norm": 0.4725489616394043, "learning_rate": 1.113874542968288e-05, "loss": 0.5315, "step": 43780 }, { "epoch": 0.9285274967657101, "grad_norm": 0.3776215612888336, "learning_rate": 1.113841410306662e-05, "loss": 0.4407, "step": 43781 }, { "epoch": 0.9285487052236432, "grad_norm": 0.4107508659362793, "learning_rate": 1.113808277518423e-05, "loss": 0.4635, "step": 43782 }, { "epoch": 0.9285699136815763, "grad_norm": 0.3360616862773895, "learning_rate": 1.1137751446036069e-05, "loss": 0.5142, "step": 43783 }, { "epoch": 0.9285911221395092, "grad_norm": 0.3737391233444214, "learning_rate": 1.1137420115622512e-05, "loss": 0.5068, "step": 43784 }, { "epoch": 0.9286123305974423, "grad_norm": 0.5145851969718933, "learning_rate": 1.1137088783943929e-05, "loss": 0.3834, "step": 43785 }, { "epoch": 0.9286335390553753, "grad_norm": 0.38927167654037476, "learning_rate": 1.113675745100068e-05, "loss": 0.52, "step": 43786 }, { "epoch": 0.9286547475133083, "grad_norm": 0.3820091485977173, "learning_rate": 1.1136426116793145e-05, "loss": 0.5484, "step": 43787 }, { "epoch": 0.9286759559712413, "grad_norm": 0.37764212489128113, "learning_rate": 1.1136094781321685e-05, "loss": 0.4237, "step": 43788 }, { "epoch": 0.9286971644291744, "grad_norm": 0.39113008975982666, "learning_rate": 1.1135763444586669e-05, "loss": 0.4444, "step": 43789 }, { "epoch": 0.9287183728871073, "grad_norm": 0.4965435862541199, "learning_rate": 1.1135432106588469e-05, "loss": 0.4788, "step": 43790 }, { "epoch": 0.9287395813450404, "grad_norm": 0.3689136207103729, "learning_rate": 1.113510076732745e-05, "loss": 0.5308, "step": 43791 }, { "epoch": 0.9287607898029734, "grad_norm": 0.34777697920799255, "learning_rate": 1.1134769426803982e-05, "loss": 0.4321, "step": 43792 }, { "epoch": 0.9287819982609065, "grad_norm": 0.4976445138454437, "learning_rate": 1.1134438085018434e-05, "loss": 0.4439, "step": 43793 }, { "epoch": 0.9288032067188394, "grad_norm": 0.36699357628822327, "learning_rate": 1.1134106741971172e-05, "loss": 0.5248, "step": 43794 }, { "epoch": 0.9288244151767725, "grad_norm": 0.3577316403388977, "learning_rate": 1.1133775397662565e-05, "loss": 0.4913, "step": 43795 }, { "epoch": 0.9288456236347056, "grad_norm": 0.35677996277809143, "learning_rate": 1.1133444052092985e-05, "loss": 0.4188, "step": 43796 }, { "epoch": 0.9288668320926385, "grad_norm": 0.3843681514263153, "learning_rate": 1.1133112705262801e-05, "loss": 0.5866, "step": 43797 }, { "epoch": 0.9288880405505716, "grad_norm": 0.41422370076179504, "learning_rate": 1.1132781357172375e-05, "loss": 0.5416, "step": 43798 }, { "epoch": 0.9289092490085046, "grad_norm": 0.3412831127643585, "learning_rate": 1.1132450007822082e-05, "loss": 0.4697, "step": 43799 }, { "epoch": 0.9289304574664377, "grad_norm": 0.35015204548835754, "learning_rate": 1.1132118657212285e-05, "loss": 0.5869, "step": 43800 }, { "epoch": 0.9289516659243706, "grad_norm": 0.3782360851764679, "learning_rate": 1.1131787305343355e-05, "loss": 0.4459, "step": 43801 }, { "epoch": 0.9289728743823037, "grad_norm": 0.36145782470703125, "learning_rate": 1.1131455952215664e-05, "loss": 0.4836, "step": 43802 }, { "epoch": 0.9289940828402367, "grad_norm": 0.3637332618236542, "learning_rate": 1.1131124597829578e-05, "loss": 0.4372, "step": 43803 }, { "epoch": 0.9290152912981697, "grad_norm": 0.39984703063964844, "learning_rate": 1.1130793242185462e-05, "loss": 0.459, "step": 43804 }, { "epoch": 0.9290364997561027, "grad_norm": 0.36959609389305115, "learning_rate": 1.1130461885283693e-05, "loss": 0.4989, "step": 43805 }, { "epoch": 0.9290577082140358, "grad_norm": 0.3522411584854126, "learning_rate": 1.1130130527124628e-05, "loss": 0.4778, "step": 43806 }, { "epoch": 0.9290789166719687, "grad_norm": 0.34240850806236267, "learning_rate": 1.1129799167708644e-05, "loss": 0.498, "step": 43807 }, { "epoch": 0.9291001251299018, "grad_norm": 0.3816900849342346, "learning_rate": 1.1129467807036108e-05, "loss": 0.4308, "step": 43808 }, { "epoch": 0.9291213335878348, "grad_norm": 0.4818906784057617, "learning_rate": 1.1129136445107384e-05, "loss": 0.5595, "step": 43809 }, { "epoch": 0.9291425420457678, "grad_norm": 0.8290036916732788, "learning_rate": 1.112880508192285e-05, "loss": 0.4708, "step": 43810 }, { "epoch": 0.9291637505037009, "grad_norm": 0.3485895097255707, "learning_rate": 1.1128473717482866e-05, "loss": 0.5128, "step": 43811 }, { "epoch": 0.9291849589616339, "grad_norm": 0.4968464970588684, "learning_rate": 1.1128142351787804e-05, "loss": 0.526, "step": 43812 }, { "epoch": 0.929206167419567, "grad_norm": 0.4184599816799164, "learning_rate": 1.1127810984838028e-05, "loss": 0.5839, "step": 43813 }, { "epoch": 0.9292273758774999, "grad_norm": 0.507011890411377, "learning_rate": 1.1127479616633913e-05, "loss": 0.3591, "step": 43814 }, { "epoch": 0.929248584335433, "grad_norm": 0.3611646294593811, "learning_rate": 1.1127148247175825e-05, "loss": 0.4795, "step": 43815 }, { "epoch": 0.929269792793366, "grad_norm": 0.4072332978248596, "learning_rate": 1.1126816876464136e-05, "loss": 0.5081, "step": 43816 }, { "epoch": 0.929291001251299, "grad_norm": 0.36835089325904846, "learning_rate": 1.1126485504499207e-05, "loss": 0.4332, "step": 43817 }, { "epoch": 0.929312209709232, "grad_norm": 0.43642154335975647, "learning_rate": 1.1126154131281412e-05, "loss": 0.4833, "step": 43818 }, { "epoch": 0.9293334181671651, "grad_norm": 0.3618888854980469, "learning_rate": 1.1125822756811117e-05, "loss": 0.4956, "step": 43819 }, { "epoch": 0.929354626625098, "grad_norm": 0.5193372368812561, "learning_rate": 1.112549138108869e-05, "loss": 0.4964, "step": 43820 }, { "epoch": 0.9293758350830311, "grad_norm": 0.34559550881385803, "learning_rate": 1.1125160004114505e-05, "loss": 0.4724, "step": 43821 }, { "epoch": 0.9293970435409641, "grad_norm": 0.38785824179649353, "learning_rate": 1.1124828625888929e-05, "loss": 0.4913, "step": 43822 }, { "epoch": 0.9294182519988972, "grad_norm": 0.39081713557243347, "learning_rate": 1.1124497246412322e-05, "loss": 0.5216, "step": 43823 }, { "epoch": 0.9294394604568302, "grad_norm": 0.3368026912212372, "learning_rate": 1.1124165865685064e-05, "loss": 0.4604, "step": 43824 }, { "epoch": 0.9294606689147632, "grad_norm": 0.4126680791378021, "learning_rate": 1.1123834483707519e-05, "loss": 0.422, "step": 43825 }, { "epoch": 0.9294818773726963, "grad_norm": 0.522836446762085, "learning_rate": 1.1123503100480048e-05, "loss": 0.456, "step": 43826 }, { "epoch": 0.9295030858306292, "grad_norm": 0.4009617567062378, "learning_rate": 1.1123171716003031e-05, "loss": 0.4886, "step": 43827 }, { "epoch": 0.9295242942885623, "grad_norm": 0.35609665513038635, "learning_rate": 1.1122840330276834e-05, "loss": 0.4864, "step": 43828 }, { "epoch": 0.9295455027464953, "grad_norm": 0.4709700047969818, "learning_rate": 1.1122508943301822e-05, "loss": 0.4754, "step": 43829 }, { "epoch": 0.9295667112044284, "grad_norm": 0.3825921416282654, "learning_rate": 1.1122177555078367e-05, "loss": 0.4793, "step": 43830 }, { "epoch": 0.9295879196623613, "grad_norm": 0.39922085404396057, "learning_rate": 1.1121846165606832e-05, "loss": 0.5366, "step": 43831 }, { "epoch": 0.9296091281202944, "grad_norm": 0.43786126375198364, "learning_rate": 1.112151477488759e-05, "loss": 0.4799, "step": 43832 }, { "epoch": 0.9296303365782274, "grad_norm": 0.38825559616088867, "learning_rate": 1.1121183382921013e-05, "loss": 0.4891, "step": 43833 }, { "epoch": 0.9296515450361604, "grad_norm": 0.4037947356700897, "learning_rate": 1.1120851989707461e-05, "loss": 0.5564, "step": 43834 }, { "epoch": 0.9296727534940934, "grad_norm": 0.33597782254219055, "learning_rate": 1.112052059524731e-05, "loss": 0.5057, "step": 43835 }, { "epoch": 0.9296939619520265, "grad_norm": 0.3640339970588684, "learning_rate": 1.1120189199540927e-05, "loss": 0.4184, "step": 43836 }, { "epoch": 0.9297151704099595, "grad_norm": 0.5038257837295532, "learning_rate": 1.1119857802588675e-05, "loss": 0.4024, "step": 43837 }, { "epoch": 0.9297363788678925, "grad_norm": 0.4110148847103119, "learning_rate": 1.1119526404390932e-05, "loss": 0.3706, "step": 43838 }, { "epoch": 0.9297575873258256, "grad_norm": 0.3775188624858856, "learning_rate": 1.1119195004948058e-05, "loss": 0.569, "step": 43839 }, { "epoch": 0.9297787957837585, "grad_norm": 0.3793768882751465, "learning_rate": 1.1118863604260423e-05, "loss": 0.5108, "step": 43840 }, { "epoch": 0.9298000042416916, "grad_norm": 0.3338721692562103, "learning_rate": 1.1118532202328403e-05, "loss": 0.4312, "step": 43841 }, { "epoch": 0.9298212126996246, "grad_norm": 0.3999212384223938, "learning_rate": 1.1118200799152358e-05, "loss": 0.4592, "step": 43842 }, { "epoch": 0.9298424211575577, "grad_norm": 0.36977291107177734, "learning_rate": 1.111786939473266e-05, "loss": 0.436, "step": 43843 }, { "epoch": 0.9298636296154906, "grad_norm": 0.3999081552028656, "learning_rate": 1.111753798906968e-05, "loss": 0.5402, "step": 43844 }, { "epoch": 0.9298848380734237, "grad_norm": 0.3526937961578369, "learning_rate": 1.111720658216378e-05, "loss": 0.39, "step": 43845 }, { "epoch": 0.9299060465313567, "grad_norm": 0.33356285095214844, "learning_rate": 1.1116875174015333e-05, "loss": 0.4881, "step": 43846 }, { "epoch": 0.9299272549892897, "grad_norm": 0.3344627916812897, "learning_rate": 1.111654376462471e-05, "loss": 0.4837, "step": 43847 }, { "epoch": 0.9299484634472227, "grad_norm": 0.3907402753829956, "learning_rate": 1.1116212353992273e-05, "loss": 0.4565, "step": 43848 }, { "epoch": 0.9299696719051558, "grad_norm": 0.3824220597743988, "learning_rate": 1.1115880942118398e-05, "loss": 0.461, "step": 43849 }, { "epoch": 0.9299908803630887, "grad_norm": 0.46175286173820496, "learning_rate": 1.111554952900345e-05, "loss": 0.605, "step": 43850 }, { "epoch": 0.9300120888210218, "grad_norm": 0.3768237233161926, "learning_rate": 1.1115218114647794e-05, "loss": 0.4105, "step": 43851 }, { "epoch": 0.9300332972789549, "grad_norm": 0.40593600273132324, "learning_rate": 1.1114886699051803e-05, "loss": 0.4591, "step": 43852 }, { "epoch": 0.9300545057368879, "grad_norm": 0.36727073788642883, "learning_rate": 1.1114555282215846e-05, "loss": 0.5217, "step": 43853 }, { "epoch": 0.9300757141948209, "grad_norm": 0.3399892747402191, "learning_rate": 1.111422386414029e-05, "loss": 0.4235, "step": 43854 }, { "epoch": 0.9300969226527539, "grad_norm": 0.3743042051792145, "learning_rate": 1.1113892444825505e-05, "loss": 0.4315, "step": 43855 }, { "epoch": 0.930118131110687, "grad_norm": 0.41250428557395935, "learning_rate": 1.1113561024271858e-05, "loss": 0.4414, "step": 43856 }, { "epoch": 0.9301393395686199, "grad_norm": 1.0181828737258911, "learning_rate": 1.1113229602479716e-05, "loss": 0.5125, "step": 43857 }, { "epoch": 0.930160548026553, "grad_norm": 0.391047865152359, "learning_rate": 1.111289817944945e-05, "loss": 0.4995, "step": 43858 }, { "epoch": 0.930181756484486, "grad_norm": 0.3611837327480316, "learning_rate": 1.1112566755181431e-05, "loss": 0.5475, "step": 43859 }, { "epoch": 0.930202964942419, "grad_norm": 0.4057646691799164, "learning_rate": 1.1112235329676022e-05, "loss": 0.5282, "step": 43860 }, { "epoch": 0.930224173400352, "grad_norm": 0.49164673686027527, "learning_rate": 1.1111903902933596e-05, "loss": 0.5748, "step": 43861 }, { "epoch": 0.9302453818582851, "grad_norm": 0.39252516627311707, "learning_rate": 1.111157247495452e-05, "loss": 0.5331, "step": 43862 }, { "epoch": 0.9302665903162181, "grad_norm": 0.3775675892829895, "learning_rate": 1.1111241045739164e-05, "loss": 0.5815, "step": 43863 }, { "epoch": 0.9302877987741511, "grad_norm": 0.5155276656150818, "learning_rate": 1.1110909615287893e-05, "loss": 0.4719, "step": 43864 }, { "epoch": 0.9303090072320842, "grad_norm": 0.33343639969825745, "learning_rate": 1.111057818360108e-05, "loss": 0.4699, "step": 43865 }, { "epoch": 0.9303302156900172, "grad_norm": 0.36648595333099365, "learning_rate": 1.111024675067909e-05, "loss": 0.4637, "step": 43866 }, { "epoch": 0.9303514241479502, "grad_norm": 0.3425855338573456, "learning_rate": 1.1109915316522295e-05, "loss": 0.5018, "step": 43867 }, { "epoch": 0.9303726326058832, "grad_norm": 0.3743293285369873, "learning_rate": 1.1109583881131061e-05, "loss": 0.5727, "step": 43868 }, { "epoch": 0.9303938410638163, "grad_norm": 0.36876705288887024, "learning_rate": 1.1109252444505758e-05, "loss": 0.4846, "step": 43869 }, { "epoch": 0.9304150495217492, "grad_norm": 0.3864690959453583, "learning_rate": 1.1108921006646753e-05, "loss": 0.5456, "step": 43870 }, { "epoch": 0.9304362579796823, "grad_norm": 0.4127631187438965, "learning_rate": 1.1108589567554413e-05, "loss": 0.5703, "step": 43871 }, { "epoch": 0.9304574664376153, "grad_norm": 0.36109551787376404, "learning_rate": 1.1108258127229114e-05, "loss": 0.4548, "step": 43872 }, { "epoch": 0.9304786748955484, "grad_norm": 0.35403406620025635, "learning_rate": 1.110792668567122e-05, "loss": 0.4249, "step": 43873 }, { "epoch": 0.9304998833534813, "grad_norm": 0.47548967599868774, "learning_rate": 1.1107595242881095e-05, "loss": 0.5227, "step": 43874 }, { "epoch": 0.9305210918114144, "grad_norm": 0.44354718923568726, "learning_rate": 1.1107263798859116e-05, "loss": 0.4623, "step": 43875 }, { "epoch": 0.9305423002693474, "grad_norm": 0.36747899651527405, "learning_rate": 1.1106932353605647e-05, "loss": 0.4961, "step": 43876 }, { "epoch": 0.9305635087272804, "grad_norm": 0.3637920916080475, "learning_rate": 1.1106600907121056e-05, "loss": 0.4025, "step": 43877 }, { "epoch": 0.9305847171852135, "grad_norm": 0.33907589316368103, "learning_rate": 1.1106269459405715e-05, "loss": 0.4252, "step": 43878 }, { "epoch": 0.9306059256431465, "grad_norm": 0.3736598491668701, "learning_rate": 1.1105938010459992e-05, "loss": 0.5317, "step": 43879 }, { "epoch": 0.9306271341010796, "grad_norm": 0.3312814235687256, "learning_rate": 1.110560656028425e-05, "loss": 0.4153, "step": 43880 }, { "epoch": 0.9306483425590125, "grad_norm": 0.41215646266937256, "learning_rate": 1.1105275108878865e-05, "loss": 0.482, "step": 43881 }, { "epoch": 0.9306695510169456, "grad_norm": 0.3752021789550781, "learning_rate": 1.1104943656244202e-05, "loss": 0.504, "step": 43882 }, { "epoch": 0.9306907594748786, "grad_norm": 0.8179778456687927, "learning_rate": 1.110461220238063e-05, "loss": 0.3894, "step": 43883 }, { "epoch": 0.9307119679328116, "grad_norm": 0.3446383476257324, "learning_rate": 1.110428074728852e-05, "loss": 0.4835, "step": 43884 }, { "epoch": 0.9307331763907446, "grad_norm": 0.34762507677078247, "learning_rate": 1.1103949290968234e-05, "loss": 0.4685, "step": 43885 }, { "epoch": 0.9307543848486777, "grad_norm": 0.36159539222717285, "learning_rate": 1.1103617833420148e-05, "loss": 0.4954, "step": 43886 }, { "epoch": 0.9307755933066106, "grad_norm": 0.3393249809741974, "learning_rate": 1.110328637464463e-05, "loss": 0.4207, "step": 43887 }, { "epoch": 0.9307968017645437, "grad_norm": 0.3557412624359131, "learning_rate": 1.1102954914642044e-05, "loss": 0.4197, "step": 43888 }, { "epoch": 0.9308180102224767, "grad_norm": 0.3772748112678528, "learning_rate": 1.1102623453412761e-05, "loss": 0.5575, "step": 43889 }, { "epoch": 0.9308392186804098, "grad_norm": 0.36193013191223145, "learning_rate": 1.1102291990957151e-05, "loss": 0.4508, "step": 43890 }, { "epoch": 0.9308604271383427, "grad_norm": 0.6240697503089905, "learning_rate": 1.1101960527275577e-05, "loss": 0.4676, "step": 43891 }, { "epoch": 0.9308816355962758, "grad_norm": 0.4228825569152832, "learning_rate": 1.1101629062368418e-05, "loss": 0.4842, "step": 43892 }, { "epoch": 0.9309028440542089, "grad_norm": 0.3621237277984619, "learning_rate": 1.1101297596236036e-05, "loss": 0.4366, "step": 43893 }, { "epoch": 0.9309240525121418, "grad_norm": 0.4517851173877716, "learning_rate": 1.1100966128878797e-05, "loss": 0.4538, "step": 43894 }, { "epoch": 0.9309452609700749, "grad_norm": 0.3757646977901459, "learning_rate": 1.1100634660297076e-05, "loss": 0.4614, "step": 43895 }, { "epoch": 0.9309664694280079, "grad_norm": 0.3541210889816284, "learning_rate": 1.1100303190491238e-05, "loss": 0.4383, "step": 43896 }, { "epoch": 0.930987677885941, "grad_norm": 0.37547048926353455, "learning_rate": 1.1099971719461648e-05, "loss": 0.4862, "step": 43897 }, { "epoch": 0.9310088863438739, "grad_norm": 0.37260013818740845, "learning_rate": 1.1099640247208686e-05, "loss": 0.4775, "step": 43898 }, { "epoch": 0.931030094801807, "grad_norm": 0.34787049889564514, "learning_rate": 1.1099308773732709e-05, "loss": 0.5035, "step": 43899 }, { "epoch": 0.93105130325974, "grad_norm": 0.34838464856147766, "learning_rate": 1.1098977299034093e-05, "loss": 0.5004, "step": 43900 }, { "epoch": 0.931072511717673, "grad_norm": 0.4250820279121399, "learning_rate": 1.1098645823113203e-05, "loss": 0.5382, "step": 43901 }, { "epoch": 0.931093720175606, "grad_norm": 0.41709068417549133, "learning_rate": 1.1098314345970407e-05, "loss": 0.6027, "step": 43902 }, { "epoch": 0.9311149286335391, "grad_norm": 0.3480266332626343, "learning_rate": 1.1097982867606077e-05, "loss": 0.4154, "step": 43903 }, { "epoch": 0.931136137091472, "grad_norm": 0.35905614495277405, "learning_rate": 1.109765138802058e-05, "loss": 0.4451, "step": 43904 }, { "epoch": 0.9311573455494051, "grad_norm": 0.33276718854904175, "learning_rate": 1.1097319907214284e-05, "loss": 0.4872, "step": 43905 }, { "epoch": 0.9311785540073382, "grad_norm": 0.34340161085128784, "learning_rate": 1.109698842518756e-05, "loss": 0.4505, "step": 43906 }, { "epoch": 0.9311997624652711, "grad_norm": 0.31135889887809753, "learning_rate": 1.1096656941940773e-05, "loss": 0.4459, "step": 43907 }, { "epoch": 0.9312209709232042, "grad_norm": 0.40290042757987976, "learning_rate": 1.1096325457474295e-05, "loss": 0.4111, "step": 43908 }, { "epoch": 0.9312421793811372, "grad_norm": 0.3866508901119232, "learning_rate": 1.1095993971788492e-05, "loss": 0.4658, "step": 43909 }, { "epoch": 0.9312633878390703, "grad_norm": 0.3635076880455017, "learning_rate": 1.1095662484883735e-05, "loss": 0.5631, "step": 43910 }, { "epoch": 0.9312845962970032, "grad_norm": 0.35541296005249023, "learning_rate": 1.1095330996760392e-05, "loss": 0.4922, "step": 43911 }, { "epoch": 0.9313058047549363, "grad_norm": 0.38828083872795105, "learning_rate": 1.1094999507418831e-05, "loss": 0.4462, "step": 43912 }, { "epoch": 0.9313270132128693, "grad_norm": 0.3610222339630127, "learning_rate": 1.109466801685942e-05, "loss": 0.4784, "step": 43913 }, { "epoch": 0.9313482216708023, "grad_norm": 0.3773813545703888, "learning_rate": 1.1094336525082529e-05, "loss": 0.4844, "step": 43914 }, { "epoch": 0.9313694301287353, "grad_norm": 0.37533316016197205, "learning_rate": 1.109400503208853e-05, "loss": 0.5159, "step": 43915 }, { "epoch": 0.9313906385866684, "grad_norm": 0.4016782343387604, "learning_rate": 1.1093673537877783e-05, "loss": 0.447, "step": 43916 }, { "epoch": 0.9314118470446013, "grad_norm": 0.4223676323890686, "learning_rate": 1.1093342042450663e-05, "loss": 0.5133, "step": 43917 }, { "epoch": 0.9314330555025344, "grad_norm": 0.3552611470222473, "learning_rate": 1.109301054580754e-05, "loss": 0.4557, "step": 43918 }, { "epoch": 0.9314542639604675, "grad_norm": 0.3473406732082367, "learning_rate": 1.1092679047948777e-05, "loss": 0.5262, "step": 43919 }, { "epoch": 0.9314754724184005, "grad_norm": 0.391975075006485, "learning_rate": 1.1092347548874748e-05, "loss": 0.4845, "step": 43920 }, { "epoch": 0.9314966808763335, "grad_norm": 0.36952900886535645, "learning_rate": 1.1092016048585824e-05, "loss": 0.4872, "step": 43921 }, { "epoch": 0.9315178893342665, "grad_norm": 0.3277098536491394, "learning_rate": 1.109168454708236e-05, "loss": 0.4585, "step": 43922 }, { "epoch": 0.9315390977921996, "grad_norm": 0.44266819953918457, "learning_rate": 1.1091353044364738e-05, "loss": 0.5208, "step": 43923 }, { "epoch": 0.9315603062501325, "grad_norm": 0.35903000831604004, "learning_rate": 1.1091021540433324e-05, "loss": 0.4543, "step": 43924 }, { "epoch": 0.9315815147080656, "grad_norm": 0.3790699243545532, "learning_rate": 1.1090690035288483e-05, "loss": 0.4399, "step": 43925 }, { "epoch": 0.9316027231659986, "grad_norm": 0.4176214635372162, "learning_rate": 1.109035852893059e-05, "loss": 0.4649, "step": 43926 }, { "epoch": 0.9316239316239316, "grad_norm": 0.4536205530166626, "learning_rate": 1.1090027021360004e-05, "loss": 0.5381, "step": 43927 }, { "epoch": 0.9316451400818646, "grad_norm": 0.8678070902824402, "learning_rate": 1.1089695512577102e-05, "loss": 0.4895, "step": 43928 }, { "epoch": 0.9316663485397977, "grad_norm": 0.3597237169742584, "learning_rate": 1.1089364002582252e-05, "loss": 0.4334, "step": 43929 }, { "epoch": 0.9316875569977306, "grad_norm": 0.5505861639976501, "learning_rate": 1.1089032491375818e-05, "loss": 0.4276, "step": 43930 }, { "epoch": 0.9317087654556637, "grad_norm": 0.3892812132835388, "learning_rate": 1.1088700978958174e-05, "loss": 0.4612, "step": 43931 }, { "epoch": 0.9317299739135967, "grad_norm": 0.35817062854766846, "learning_rate": 1.1088369465329686e-05, "loss": 0.4981, "step": 43932 }, { "epoch": 0.9317511823715298, "grad_norm": 0.3693765103816986, "learning_rate": 1.108803795049072e-05, "loss": 0.5166, "step": 43933 }, { "epoch": 0.9317723908294628, "grad_norm": 0.38745465874671936, "learning_rate": 1.1087706434441652e-05, "loss": 0.4445, "step": 43934 }, { "epoch": 0.9317935992873958, "grad_norm": 0.3747890591621399, "learning_rate": 1.1087374917182843e-05, "loss": 0.5102, "step": 43935 }, { "epoch": 0.9318148077453289, "grad_norm": 0.507305920124054, "learning_rate": 1.1087043398714665e-05, "loss": 0.4953, "step": 43936 }, { "epoch": 0.9318360162032618, "grad_norm": 0.3714974522590637, "learning_rate": 1.1086711879037488e-05, "loss": 0.5314, "step": 43937 }, { "epoch": 0.9318572246611949, "grad_norm": 0.3795296847820282, "learning_rate": 1.1086380358151681e-05, "loss": 0.4744, "step": 43938 }, { "epoch": 0.9318784331191279, "grad_norm": 0.3196601867675781, "learning_rate": 1.1086048836057608e-05, "loss": 0.4161, "step": 43939 }, { "epoch": 0.931899641577061, "grad_norm": 0.4100381135940552, "learning_rate": 1.1085717312755645e-05, "loss": 0.5344, "step": 43940 }, { "epoch": 0.9319208500349939, "grad_norm": 0.35794201493263245, "learning_rate": 1.1085385788246153e-05, "loss": 0.4581, "step": 43941 }, { "epoch": 0.931942058492927, "grad_norm": 0.34892866015434265, "learning_rate": 1.1085054262529503e-05, "loss": 0.451, "step": 43942 }, { "epoch": 0.93196326695086, "grad_norm": 0.35225051641464233, "learning_rate": 1.1084722735606069e-05, "loss": 0.4197, "step": 43943 }, { "epoch": 0.931984475408793, "grad_norm": 0.3569386303424835, "learning_rate": 1.1084391207476213e-05, "loss": 0.4392, "step": 43944 }, { "epoch": 0.932005683866726, "grad_norm": 0.35612165927886963, "learning_rate": 1.108405967814031e-05, "loss": 0.47, "step": 43945 }, { "epoch": 0.9320268923246591, "grad_norm": 0.3424689769744873, "learning_rate": 1.1083728147598722e-05, "loss": 0.4515, "step": 43946 }, { "epoch": 0.9320481007825921, "grad_norm": 0.36605435609817505, "learning_rate": 1.1083396615851822e-05, "loss": 0.4404, "step": 43947 }, { "epoch": 0.9320693092405251, "grad_norm": 0.4111003279685974, "learning_rate": 1.1083065082899975e-05, "loss": 0.4832, "step": 43948 }, { "epoch": 0.9320905176984582, "grad_norm": 0.4305714964866638, "learning_rate": 1.1082733548743558e-05, "loss": 0.4978, "step": 43949 }, { "epoch": 0.9321117261563912, "grad_norm": 0.3203302323818207, "learning_rate": 1.1082402013382929e-05, "loss": 0.4617, "step": 43950 }, { "epoch": 0.9321329346143242, "grad_norm": 0.37497246265411377, "learning_rate": 1.1082070476818468e-05, "loss": 0.4252, "step": 43951 }, { "epoch": 0.9321541430722572, "grad_norm": 0.34883198142051697, "learning_rate": 1.1081738939050532e-05, "loss": 0.5233, "step": 43952 }, { "epoch": 0.9321753515301903, "grad_norm": 0.3398463726043701, "learning_rate": 1.1081407400079498e-05, "loss": 0.4227, "step": 43953 }, { "epoch": 0.9321965599881232, "grad_norm": 0.36542654037475586, "learning_rate": 1.108107585990573e-05, "loss": 0.5221, "step": 43954 }, { "epoch": 0.9322177684460563, "grad_norm": 0.38667887449264526, "learning_rate": 1.1080744318529599e-05, "loss": 0.5235, "step": 43955 }, { "epoch": 0.9322389769039893, "grad_norm": 0.4009474813938141, "learning_rate": 1.1080412775951475e-05, "loss": 0.4793, "step": 43956 }, { "epoch": 0.9322601853619223, "grad_norm": 0.3800771236419678, "learning_rate": 1.1080081232171726e-05, "loss": 0.476, "step": 43957 }, { "epoch": 0.9322813938198553, "grad_norm": 0.39079907536506653, "learning_rate": 1.1079749687190717e-05, "loss": 0.5074, "step": 43958 }, { "epoch": 0.9323026022777884, "grad_norm": 0.503621518611908, "learning_rate": 1.1079418141008824e-05, "loss": 0.4996, "step": 43959 }, { "epoch": 0.9323238107357215, "grad_norm": 0.3349061906337738, "learning_rate": 1.1079086593626407e-05, "loss": 0.5238, "step": 43960 }, { "epoch": 0.9323450191936544, "grad_norm": 0.3655036389827728, "learning_rate": 1.1078755045043843e-05, "loss": 0.4623, "step": 43961 }, { "epoch": 0.9323662276515875, "grad_norm": 0.3674885928630829, "learning_rate": 1.1078423495261494e-05, "loss": 0.4775, "step": 43962 }, { "epoch": 0.9323874361095205, "grad_norm": 0.5916300415992737, "learning_rate": 1.1078091944279735e-05, "loss": 0.4656, "step": 43963 }, { "epoch": 0.9324086445674535, "grad_norm": 0.3420584201812744, "learning_rate": 1.1077760392098929e-05, "loss": 0.4759, "step": 43964 }, { "epoch": 0.9324298530253865, "grad_norm": 0.4426768124103546, "learning_rate": 1.1077428838719448e-05, "loss": 0.5472, "step": 43965 }, { "epoch": 0.9324510614833196, "grad_norm": 0.389392614364624, "learning_rate": 1.1077097284141662e-05, "loss": 0.5022, "step": 43966 }, { "epoch": 0.9324722699412525, "grad_norm": 0.3727196156978607, "learning_rate": 1.1076765728365933e-05, "loss": 0.517, "step": 43967 }, { "epoch": 0.9324934783991856, "grad_norm": 0.3796755373477936, "learning_rate": 1.1076434171392639e-05, "loss": 0.4799, "step": 43968 }, { "epoch": 0.9325146868571186, "grad_norm": 0.35466036200523376, "learning_rate": 1.1076102613222141e-05, "loss": 0.4753, "step": 43969 }, { "epoch": 0.9325358953150517, "grad_norm": 0.36226287484169006, "learning_rate": 1.1075771053854813e-05, "loss": 0.4582, "step": 43970 }, { "epoch": 0.9325571037729846, "grad_norm": 0.39395302534103394, "learning_rate": 1.1075439493291022e-05, "loss": 0.4852, "step": 43971 }, { "epoch": 0.9325783122309177, "grad_norm": 0.39675143361091614, "learning_rate": 1.1075107931531136e-05, "loss": 0.407, "step": 43972 }, { "epoch": 0.9325995206888507, "grad_norm": 0.37607142329216003, "learning_rate": 1.1074776368575522e-05, "loss": 0.4329, "step": 43973 }, { "epoch": 0.9326207291467837, "grad_norm": 0.3823121190071106, "learning_rate": 1.1074444804424556e-05, "loss": 0.534, "step": 43974 }, { "epoch": 0.9326419376047168, "grad_norm": 0.35813573002815247, "learning_rate": 1.1074113239078599e-05, "loss": 0.5221, "step": 43975 }, { "epoch": 0.9326631460626498, "grad_norm": 0.35828617215156555, "learning_rate": 1.1073781672538023e-05, "loss": 0.4947, "step": 43976 }, { "epoch": 0.9326843545205828, "grad_norm": 0.38722819089889526, "learning_rate": 1.1073450104803197e-05, "loss": 0.5209, "step": 43977 }, { "epoch": 0.9327055629785158, "grad_norm": 0.3737426698207855, "learning_rate": 1.1073118535874486e-05, "loss": 0.4642, "step": 43978 }, { "epoch": 0.9327267714364489, "grad_norm": 0.335490345954895, "learning_rate": 1.1072786965752265e-05, "loss": 0.4297, "step": 43979 }, { "epoch": 0.9327479798943819, "grad_norm": 0.3691595494747162, "learning_rate": 1.1072455394436899e-05, "loss": 0.6194, "step": 43980 }, { "epoch": 0.9327691883523149, "grad_norm": 0.34558913111686707, "learning_rate": 1.1072123821928756e-05, "loss": 0.5311, "step": 43981 }, { "epoch": 0.9327903968102479, "grad_norm": 0.3841616213321686, "learning_rate": 1.1071792248228209e-05, "loss": 0.5412, "step": 43982 }, { "epoch": 0.932811605268181, "grad_norm": 0.37362125515937805, "learning_rate": 1.107146067333562e-05, "loss": 0.4532, "step": 43983 }, { "epoch": 0.9328328137261139, "grad_norm": 0.43647363781929016, "learning_rate": 1.1071129097251365e-05, "loss": 0.4282, "step": 43984 }, { "epoch": 0.932854022184047, "grad_norm": 0.397155225276947, "learning_rate": 1.1070797519975809e-05, "loss": 0.5774, "step": 43985 }, { "epoch": 0.93287523064198, "grad_norm": 0.44932982325553894, "learning_rate": 1.1070465941509321e-05, "loss": 0.5068, "step": 43986 }, { "epoch": 0.932896439099913, "grad_norm": 0.37045201659202576, "learning_rate": 1.1070134361852267e-05, "loss": 0.4784, "step": 43987 }, { "epoch": 0.9329176475578461, "grad_norm": 0.34605666995048523, "learning_rate": 1.1069802781005024e-05, "loss": 0.5519, "step": 43988 }, { "epoch": 0.9329388560157791, "grad_norm": 0.3579501509666443, "learning_rate": 1.1069471198967953e-05, "loss": 0.4585, "step": 43989 }, { "epoch": 0.9329600644737122, "grad_norm": 0.32497406005859375, "learning_rate": 1.1069139615741426e-05, "loss": 0.469, "step": 43990 }, { "epoch": 0.9329812729316451, "grad_norm": 0.3798292279243469, "learning_rate": 1.1068808031325813e-05, "loss": 0.4784, "step": 43991 }, { "epoch": 0.9330024813895782, "grad_norm": 0.3581126034259796, "learning_rate": 1.1068476445721476e-05, "loss": 0.4875, "step": 43992 }, { "epoch": 0.9330236898475112, "grad_norm": 0.41894593834877014, "learning_rate": 1.1068144858928792e-05, "loss": 0.4798, "step": 43993 }, { "epoch": 0.9330448983054442, "grad_norm": 0.3566456735134125, "learning_rate": 1.1067813270948127e-05, "loss": 0.518, "step": 43994 }, { "epoch": 0.9330661067633772, "grad_norm": 0.3550276458263397, "learning_rate": 1.1067481681779848e-05, "loss": 0.4633, "step": 43995 }, { "epoch": 0.9330873152213103, "grad_norm": 0.3705534338951111, "learning_rate": 1.1067150091424326e-05, "loss": 0.5209, "step": 43996 }, { "epoch": 0.9331085236792432, "grad_norm": 0.3692707419395447, "learning_rate": 1.106681849988193e-05, "loss": 0.4319, "step": 43997 }, { "epoch": 0.9331297321371763, "grad_norm": 0.39552268385887146, "learning_rate": 1.1066486907153027e-05, "loss": 0.4786, "step": 43998 }, { "epoch": 0.9331509405951093, "grad_norm": 0.40056735277175903, "learning_rate": 1.1066155313237986e-05, "loss": 0.4031, "step": 43999 }, { "epoch": 0.9331721490530424, "grad_norm": 0.3768966794013977, "learning_rate": 1.1065823718137177e-05, "loss": 0.4942, "step": 44000 }, { "epoch": 0.9331933575109754, "grad_norm": 0.37493735551834106, "learning_rate": 1.1065492121850966e-05, "loss": 0.4478, "step": 44001 }, { "epoch": 0.9332145659689084, "grad_norm": 0.38125160336494446, "learning_rate": 1.1065160524379725e-05, "loss": 0.4883, "step": 44002 }, { "epoch": 0.9332357744268415, "grad_norm": 0.3870464265346527, "learning_rate": 1.1064828925723825e-05, "loss": 0.4264, "step": 44003 }, { "epoch": 0.9332569828847744, "grad_norm": 0.3780118525028229, "learning_rate": 1.1064497325883628e-05, "loss": 0.5147, "step": 44004 }, { "epoch": 0.9332781913427075, "grad_norm": 0.3527214825153351, "learning_rate": 1.1064165724859506e-05, "loss": 0.4577, "step": 44005 }, { "epoch": 0.9332993998006405, "grad_norm": 0.35785773396492004, "learning_rate": 1.1063834122651828e-05, "loss": 0.4542, "step": 44006 }, { "epoch": 0.9333206082585735, "grad_norm": 0.36620089411735535, "learning_rate": 1.1063502519260965e-05, "loss": 0.5209, "step": 44007 }, { "epoch": 0.9333418167165065, "grad_norm": 0.3223850131034851, "learning_rate": 1.1063170914687282e-05, "loss": 0.4058, "step": 44008 }, { "epoch": 0.9333630251744396, "grad_norm": 0.3543857932090759, "learning_rate": 1.1062839308931151e-05, "loss": 0.4762, "step": 44009 }, { "epoch": 0.9333842336323726, "grad_norm": 0.34484297037124634, "learning_rate": 1.106250770199294e-05, "loss": 0.5264, "step": 44010 }, { "epoch": 0.9334054420903056, "grad_norm": 0.3508913516998291, "learning_rate": 1.1062176093873016e-05, "loss": 0.4436, "step": 44011 }, { "epoch": 0.9334266505482386, "grad_norm": 0.48724913597106934, "learning_rate": 1.1061844484571744e-05, "loss": 0.455, "step": 44012 }, { "epoch": 0.9334478590061717, "grad_norm": 0.4839368760585785, "learning_rate": 1.1061512874089506e-05, "loss": 0.5028, "step": 44013 }, { "epoch": 0.9334690674641047, "grad_norm": 0.5057681798934937, "learning_rate": 1.1061181262426658e-05, "loss": 0.5415, "step": 44014 }, { "epoch": 0.9334902759220377, "grad_norm": 0.37992197275161743, "learning_rate": 1.1060849649583575e-05, "loss": 0.5104, "step": 44015 }, { "epoch": 0.9335114843799708, "grad_norm": 0.3746304214000702, "learning_rate": 1.1060518035560626e-05, "loss": 0.5458, "step": 44016 }, { "epoch": 0.9335326928379037, "grad_norm": 1.3223158121109009, "learning_rate": 1.1060186420358177e-05, "loss": 0.439, "step": 44017 }, { "epoch": 0.9335539012958368, "grad_norm": 0.42690879106521606, "learning_rate": 1.1059854803976594e-05, "loss": 0.4945, "step": 44018 }, { "epoch": 0.9335751097537698, "grad_norm": 0.5496794581413269, "learning_rate": 1.1059523186416252e-05, "loss": 0.4897, "step": 44019 }, { "epoch": 0.9335963182117029, "grad_norm": 0.3795512020587921, "learning_rate": 1.1059191567677518e-05, "loss": 0.5428, "step": 44020 }, { "epoch": 0.9336175266696358, "grad_norm": 0.3688943088054657, "learning_rate": 1.105885994776076e-05, "loss": 0.4522, "step": 44021 }, { "epoch": 0.9336387351275689, "grad_norm": 0.4063608944416046, "learning_rate": 1.105852832666635e-05, "loss": 0.5145, "step": 44022 }, { "epoch": 0.9336599435855019, "grad_norm": 0.383044958114624, "learning_rate": 1.105819670439465e-05, "loss": 0.4293, "step": 44023 }, { "epoch": 0.9336811520434349, "grad_norm": 0.36055073142051697, "learning_rate": 1.1057865080946035e-05, "loss": 0.4618, "step": 44024 }, { "epoch": 0.9337023605013679, "grad_norm": 0.3840033710002899, "learning_rate": 1.105753345632087e-05, "loss": 0.394, "step": 44025 }, { "epoch": 0.933723568959301, "grad_norm": 0.3492094874382019, "learning_rate": 1.1057201830519526e-05, "loss": 0.4328, "step": 44026 }, { "epoch": 0.9337447774172339, "grad_norm": 0.6968204379081726, "learning_rate": 1.1056870203542372e-05, "loss": 0.4598, "step": 44027 }, { "epoch": 0.933765985875167, "grad_norm": 0.36133062839508057, "learning_rate": 1.1056538575389776e-05, "loss": 0.4062, "step": 44028 }, { "epoch": 0.9337871943331001, "grad_norm": 0.40189090371131897, "learning_rate": 1.1056206946062108e-05, "loss": 0.5096, "step": 44029 }, { "epoch": 0.933808402791033, "grad_norm": 0.3335229158401489, "learning_rate": 1.1055875315559734e-05, "loss": 0.4763, "step": 44030 }, { "epoch": 0.9338296112489661, "grad_norm": 0.3335706293582916, "learning_rate": 1.1055543683883025e-05, "loss": 0.4883, "step": 44031 }, { "epoch": 0.9338508197068991, "grad_norm": 0.3720611035823822, "learning_rate": 1.105521205103235e-05, "loss": 0.4912, "step": 44032 }, { "epoch": 0.9338720281648322, "grad_norm": 0.43560197949409485, "learning_rate": 1.1054880417008078e-05, "loss": 0.5364, "step": 44033 }, { "epoch": 0.9338932366227651, "grad_norm": 0.38846704363822937, "learning_rate": 1.1054548781810577e-05, "loss": 0.4975, "step": 44034 }, { "epoch": 0.9339144450806982, "grad_norm": 0.33956387639045715, "learning_rate": 1.1054217145440214e-05, "loss": 0.5455, "step": 44035 }, { "epoch": 0.9339356535386312, "grad_norm": 0.4312942624092102, "learning_rate": 1.1053885507897362e-05, "loss": 0.5413, "step": 44036 }, { "epoch": 0.9339568619965642, "grad_norm": 0.36897706985473633, "learning_rate": 1.1053553869182385e-05, "loss": 0.5431, "step": 44037 }, { "epoch": 0.9339780704544972, "grad_norm": 0.4046274721622467, "learning_rate": 1.1053222229295657e-05, "loss": 0.5003, "step": 44038 }, { "epoch": 0.9339992789124303, "grad_norm": 0.4482084810733795, "learning_rate": 1.1052890588237544e-05, "loss": 0.4602, "step": 44039 }, { "epoch": 0.9340204873703633, "grad_norm": 0.5159667134284973, "learning_rate": 1.1052558946008413e-05, "loss": 0.4308, "step": 44040 }, { "epoch": 0.9340416958282963, "grad_norm": 0.35366109013557434, "learning_rate": 1.105222730260864e-05, "loss": 0.5396, "step": 44041 }, { "epoch": 0.9340629042862294, "grad_norm": 0.3671916127204895, "learning_rate": 1.1051895658038587e-05, "loss": 0.4177, "step": 44042 }, { "epoch": 0.9340841127441624, "grad_norm": 0.39453408122062683, "learning_rate": 1.1051564012298623e-05, "loss": 0.5296, "step": 44043 }, { "epoch": 0.9341053212020954, "grad_norm": 0.3863551616668701, "learning_rate": 1.1051232365389118e-05, "loss": 0.5114, "step": 44044 }, { "epoch": 0.9341265296600284, "grad_norm": 0.3590107858181, "learning_rate": 1.1050900717310444e-05, "loss": 0.4394, "step": 44045 }, { "epoch": 0.9341477381179615, "grad_norm": 0.4179612398147583, "learning_rate": 1.1050569068062964e-05, "loss": 0.5687, "step": 44046 }, { "epoch": 0.9341689465758944, "grad_norm": 0.40446650981903076, "learning_rate": 1.1050237417647055e-05, "loss": 0.5141, "step": 44047 }, { "epoch": 0.9341901550338275, "grad_norm": 0.6753453016281128, "learning_rate": 1.104990576606308e-05, "loss": 0.5773, "step": 44048 }, { "epoch": 0.9342113634917605, "grad_norm": 0.428395539522171, "learning_rate": 1.1049574113311406e-05, "loss": 0.4753, "step": 44049 }, { "epoch": 0.9342325719496936, "grad_norm": 0.36352378129959106, "learning_rate": 1.1049242459392404e-05, "loss": 0.5942, "step": 44050 }, { "epoch": 0.9342537804076265, "grad_norm": 0.42697638273239136, "learning_rate": 1.1048910804306448e-05, "loss": 0.5592, "step": 44051 }, { "epoch": 0.9342749888655596, "grad_norm": 0.3730144202709198, "learning_rate": 1.1048579148053899e-05, "loss": 0.4838, "step": 44052 }, { "epoch": 0.9342961973234926, "grad_norm": 0.37363630533218384, "learning_rate": 1.1048247490635133e-05, "loss": 0.5132, "step": 44053 }, { "epoch": 0.9343174057814256, "grad_norm": 0.35454148054122925, "learning_rate": 1.104791583205051e-05, "loss": 0.4399, "step": 44054 }, { "epoch": 0.9343386142393587, "grad_norm": 0.3601546883583069, "learning_rate": 1.104758417230041e-05, "loss": 0.4948, "step": 44055 }, { "epoch": 0.9343598226972917, "grad_norm": 0.3740995526313782, "learning_rate": 1.1047252511385194e-05, "loss": 0.4835, "step": 44056 }, { "epoch": 0.9343810311552248, "grad_norm": 0.39973363280296326, "learning_rate": 1.1046920849305229e-05, "loss": 0.4247, "step": 44057 }, { "epoch": 0.9344022396131577, "grad_norm": 0.3689444363117218, "learning_rate": 1.1046589186060892e-05, "loss": 0.4677, "step": 44058 }, { "epoch": 0.9344234480710908, "grad_norm": 0.33899080753326416, "learning_rate": 1.1046257521652547e-05, "loss": 0.47, "step": 44059 }, { "epoch": 0.9344446565290238, "grad_norm": 0.36729830503463745, "learning_rate": 1.1045925856080562e-05, "loss": 0.4912, "step": 44060 }, { "epoch": 0.9344658649869568, "grad_norm": 0.34315258264541626, "learning_rate": 1.104559418934531e-05, "loss": 0.4006, "step": 44061 }, { "epoch": 0.9344870734448898, "grad_norm": 0.36985790729522705, "learning_rate": 1.1045262521447155e-05, "loss": 0.4723, "step": 44062 }, { "epoch": 0.9345082819028229, "grad_norm": 0.3395991325378418, "learning_rate": 1.1044930852386467e-05, "loss": 0.514, "step": 44063 }, { "epoch": 0.9345294903607558, "grad_norm": 0.45669668912887573, "learning_rate": 1.1044599182163617e-05, "loss": 0.6024, "step": 44064 }, { "epoch": 0.9345506988186889, "grad_norm": 0.36918869614601135, "learning_rate": 1.1044267510778975e-05, "loss": 0.5188, "step": 44065 }, { "epoch": 0.9345719072766219, "grad_norm": 0.38707372546195984, "learning_rate": 1.1043935838232904e-05, "loss": 0.5798, "step": 44066 }, { "epoch": 0.934593115734555, "grad_norm": 0.3478738069534302, "learning_rate": 1.104360416452578e-05, "loss": 0.4547, "step": 44067 }, { "epoch": 0.9346143241924879, "grad_norm": 0.35171040892601013, "learning_rate": 1.1043272489657968e-05, "loss": 0.4941, "step": 44068 }, { "epoch": 0.934635532650421, "grad_norm": 0.37859365344047546, "learning_rate": 1.1042940813629834e-05, "loss": 0.4652, "step": 44069 }, { "epoch": 0.9346567411083541, "grad_norm": 0.36453142762184143, "learning_rate": 1.1042609136441756e-05, "loss": 0.469, "step": 44070 }, { "epoch": 0.934677949566287, "grad_norm": 0.4017789661884308, "learning_rate": 1.1042277458094093e-05, "loss": 0.5163, "step": 44071 }, { "epoch": 0.9346991580242201, "grad_norm": 0.41284674406051636, "learning_rate": 1.104194577858722e-05, "loss": 0.5217, "step": 44072 }, { "epoch": 0.9347203664821531, "grad_norm": 0.47507810592651367, "learning_rate": 1.1041614097921504e-05, "loss": 0.4623, "step": 44073 }, { "epoch": 0.9347415749400861, "grad_norm": 0.37681299448013306, "learning_rate": 1.1041282416097312e-05, "loss": 0.4681, "step": 44074 }, { "epoch": 0.9347627833980191, "grad_norm": 0.3441205620765686, "learning_rate": 1.1040950733115017e-05, "loss": 0.4394, "step": 44075 }, { "epoch": 0.9347839918559522, "grad_norm": 0.39046069979667664, "learning_rate": 1.1040619048974983e-05, "loss": 0.4556, "step": 44076 }, { "epoch": 0.9348052003138851, "grad_norm": 0.36054760217666626, "learning_rate": 1.1040287363677581e-05, "loss": 0.5156, "step": 44077 }, { "epoch": 0.9348264087718182, "grad_norm": 0.3651728928089142, "learning_rate": 1.1039955677223185e-05, "loss": 0.4279, "step": 44078 }, { "epoch": 0.9348476172297512, "grad_norm": 0.4299030601978302, "learning_rate": 1.1039623989612156e-05, "loss": 0.5341, "step": 44079 }, { "epoch": 0.9348688256876843, "grad_norm": 0.3365481197834015, "learning_rate": 1.1039292300844865e-05, "loss": 0.5011, "step": 44080 }, { "epoch": 0.9348900341456172, "grad_norm": 0.3480805456638336, "learning_rate": 1.1038960610921688e-05, "loss": 0.4627, "step": 44081 }, { "epoch": 0.9349112426035503, "grad_norm": 0.40480348467826843, "learning_rate": 1.1038628919842982e-05, "loss": 0.4789, "step": 44082 }, { "epoch": 0.9349324510614834, "grad_norm": 0.34476345777511597, "learning_rate": 1.1038297227609123e-05, "loss": 0.4265, "step": 44083 }, { "epoch": 0.9349536595194163, "grad_norm": 0.40145033597946167, "learning_rate": 1.103796553422048e-05, "loss": 0.4577, "step": 44084 }, { "epoch": 0.9349748679773494, "grad_norm": 0.3872595429420471, "learning_rate": 1.103763383967742e-05, "loss": 0.4522, "step": 44085 }, { "epoch": 0.9349960764352824, "grad_norm": 0.3820008635520935, "learning_rate": 1.1037302143980314e-05, "loss": 0.5447, "step": 44086 }, { "epoch": 0.9350172848932155, "grad_norm": 0.3287896513938904, "learning_rate": 1.1036970447129532e-05, "loss": 0.4423, "step": 44087 }, { "epoch": 0.9350384933511484, "grad_norm": 0.3615745007991791, "learning_rate": 1.1036638749125435e-05, "loss": 0.5317, "step": 44088 }, { "epoch": 0.9350597018090815, "grad_norm": 0.37073829770088196, "learning_rate": 1.1036307049968399e-05, "loss": 0.4379, "step": 44089 }, { "epoch": 0.9350809102670145, "grad_norm": 0.4002189338207245, "learning_rate": 1.1035975349658791e-05, "loss": 0.4487, "step": 44090 }, { "epoch": 0.9351021187249475, "grad_norm": 0.3566279709339142, "learning_rate": 1.1035643648196982e-05, "loss": 0.4886, "step": 44091 }, { "epoch": 0.9351233271828805, "grad_norm": 0.36968865990638733, "learning_rate": 1.1035311945583338e-05, "loss": 0.4588, "step": 44092 }, { "epoch": 0.9351445356408136, "grad_norm": 0.3668060302734375, "learning_rate": 1.1034980241818232e-05, "loss": 0.4765, "step": 44093 }, { "epoch": 0.9351657440987465, "grad_norm": 0.3760988414287567, "learning_rate": 1.1034648536902026e-05, "loss": 0.4931, "step": 44094 }, { "epoch": 0.9351869525566796, "grad_norm": 0.39206627011299133, "learning_rate": 1.1034316830835093e-05, "loss": 0.5038, "step": 44095 }, { "epoch": 0.9352081610146127, "grad_norm": 0.4904610514640808, "learning_rate": 1.1033985123617803e-05, "loss": 0.5032, "step": 44096 }, { "epoch": 0.9352293694725456, "grad_norm": 0.3603048026561737, "learning_rate": 1.1033653415250525e-05, "loss": 0.4155, "step": 44097 }, { "epoch": 0.9352505779304787, "grad_norm": 0.3782685399055481, "learning_rate": 1.1033321705733626e-05, "loss": 0.4854, "step": 44098 }, { "epoch": 0.9352717863884117, "grad_norm": 0.36952337622642517, "learning_rate": 1.1032989995067473e-05, "loss": 0.5393, "step": 44099 }, { "epoch": 0.9352929948463448, "grad_norm": 0.39796942472457886, "learning_rate": 1.103265828325244e-05, "loss": 0.4742, "step": 44100 }, { "epoch": 0.9353142033042777, "grad_norm": 0.4208981990814209, "learning_rate": 1.1032326570288893e-05, "loss": 0.4186, "step": 44101 }, { "epoch": 0.9353354117622108, "grad_norm": 0.6087473034858704, "learning_rate": 1.1031994856177202e-05, "loss": 0.4779, "step": 44102 }, { "epoch": 0.9353566202201438, "grad_norm": 0.3758121132850647, "learning_rate": 1.1031663140917734e-05, "loss": 0.4633, "step": 44103 }, { "epoch": 0.9353778286780768, "grad_norm": 0.3537324368953705, "learning_rate": 1.103133142451086e-05, "loss": 0.5359, "step": 44104 }, { "epoch": 0.9353990371360098, "grad_norm": 0.36877334117889404, "learning_rate": 1.1030999706956946e-05, "loss": 0.4776, "step": 44105 }, { "epoch": 0.9354202455939429, "grad_norm": 0.45884668827056885, "learning_rate": 1.1030667988256368e-05, "loss": 0.4908, "step": 44106 }, { "epoch": 0.9354414540518758, "grad_norm": 0.36957892775535583, "learning_rate": 1.1030336268409488e-05, "loss": 0.451, "step": 44107 }, { "epoch": 0.9354626625098089, "grad_norm": 0.4688592255115509, "learning_rate": 1.1030004547416672e-05, "loss": 0.4749, "step": 44108 }, { "epoch": 0.9354838709677419, "grad_norm": 0.3360555171966553, "learning_rate": 1.1029672825278299e-05, "loss": 0.4349, "step": 44109 }, { "epoch": 0.935505079425675, "grad_norm": 0.41504013538360596, "learning_rate": 1.1029341101994732e-05, "loss": 0.511, "step": 44110 }, { "epoch": 0.935526287883608, "grad_norm": 0.33669736981391907, "learning_rate": 1.102900937756634e-05, "loss": 0.499, "step": 44111 }, { "epoch": 0.935547496341541, "grad_norm": 0.3458665907382965, "learning_rate": 1.1028677651993496e-05, "loss": 0.458, "step": 44112 }, { "epoch": 0.9355687047994741, "grad_norm": 0.359244167804718, "learning_rate": 1.1028345925276561e-05, "loss": 0.5003, "step": 44113 }, { "epoch": 0.935589913257407, "grad_norm": 0.3813435137271881, "learning_rate": 1.1028014197415909e-05, "loss": 0.387, "step": 44114 }, { "epoch": 0.9356111217153401, "grad_norm": 0.4162818193435669, "learning_rate": 1.102768246841191e-05, "loss": 0.4664, "step": 44115 }, { "epoch": 0.9356323301732731, "grad_norm": 0.3744265139102936, "learning_rate": 1.1027350738264932e-05, "loss": 0.5189, "step": 44116 }, { "epoch": 0.9356535386312061, "grad_norm": 0.7351588010787964, "learning_rate": 1.1027019006975343e-05, "loss": 0.5313, "step": 44117 }, { "epoch": 0.9356747470891391, "grad_norm": 0.3289180099964142, "learning_rate": 1.1026687274543512e-05, "loss": 0.3872, "step": 44118 }, { "epoch": 0.9356959555470722, "grad_norm": 0.3794999420642853, "learning_rate": 1.1026355540969808e-05, "loss": 0.5232, "step": 44119 }, { "epoch": 0.9357171640050052, "grad_norm": 0.37188905477523804, "learning_rate": 1.1026023806254601e-05, "loss": 0.4942, "step": 44120 }, { "epoch": 0.9357383724629382, "grad_norm": 0.36754080653190613, "learning_rate": 1.102569207039826e-05, "loss": 0.4835, "step": 44121 }, { "epoch": 0.9357595809208712, "grad_norm": 0.4564306139945984, "learning_rate": 1.1025360333401152e-05, "loss": 0.3944, "step": 44122 }, { "epoch": 0.9357807893788043, "grad_norm": 0.3624127507209778, "learning_rate": 1.1025028595263647e-05, "loss": 0.4834, "step": 44123 }, { "epoch": 0.9358019978367373, "grad_norm": 0.3423801362514496, "learning_rate": 1.1024696855986116e-05, "loss": 0.4888, "step": 44124 }, { "epoch": 0.9358232062946703, "grad_norm": 0.3569723963737488, "learning_rate": 1.1024365115568925e-05, "loss": 0.4616, "step": 44125 }, { "epoch": 0.9358444147526034, "grad_norm": 0.380493700504303, "learning_rate": 1.1024033374012445e-05, "loss": 0.4799, "step": 44126 }, { "epoch": 0.9358656232105363, "grad_norm": 0.449389785528183, "learning_rate": 1.102370163131704e-05, "loss": 0.5506, "step": 44127 }, { "epoch": 0.9358868316684694, "grad_norm": 0.3541412651538849, "learning_rate": 1.1023369887483084e-05, "loss": 0.4854, "step": 44128 }, { "epoch": 0.9359080401264024, "grad_norm": 0.39039042592048645, "learning_rate": 1.1023038142510949e-05, "loss": 0.4549, "step": 44129 }, { "epoch": 0.9359292485843355, "grad_norm": 0.3148614764213562, "learning_rate": 1.1022706396400998e-05, "loss": 0.4574, "step": 44130 }, { "epoch": 0.9359504570422684, "grad_norm": 0.36837902665138245, "learning_rate": 1.10223746491536e-05, "loss": 0.4964, "step": 44131 }, { "epoch": 0.9359716655002015, "grad_norm": 0.3518975079059601, "learning_rate": 1.1022042900769128e-05, "loss": 0.5079, "step": 44132 }, { "epoch": 0.9359928739581345, "grad_norm": 0.3331320285797119, "learning_rate": 1.1021711151247946e-05, "loss": 0.4604, "step": 44133 }, { "epoch": 0.9360140824160675, "grad_norm": 0.6373649835586548, "learning_rate": 1.1021379400590426e-05, "loss": 0.4964, "step": 44134 }, { "epoch": 0.9360352908740005, "grad_norm": 0.3478069305419922, "learning_rate": 1.1021047648796941e-05, "loss": 0.5092, "step": 44135 }, { "epoch": 0.9360564993319336, "grad_norm": 0.3635796308517456, "learning_rate": 1.1020715895867851e-05, "loss": 0.4442, "step": 44136 }, { "epoch": 0.9360777077898667, "grad_norm": 0.3339661955833435, "learning_rate": 1.1020384141803532e-05, "loss": 0.4903, "step": 44137 }, { "epoch": 0.9360989162477996, "grad_norm": 0.36234307289123535, "learning_rate": 1.1020052386604352e-05, "loss": 0.4703, "step": 44138 }, { "epoch": 0.9361201247057327, "grad_norm": 0.390566885471344, "learning_rate": 1.1019720630270676e-05, "loss": 0.4439, "step": 44139 }, { "epoch": 0.9361413331636657, "grad_norm": 0.34764569997787476, "learning_rate": 1.1019388872802875e-05, "loss": 0.4402, "step": 44140 }, { "epoch": 0.9361625416215987, "grad_norm": 0.4725866913795471, "learning_rate": 1.1019057114201322e-05, "loss": 0.3976, "step": 44141 }, { "epoch": 0.9361837500795317, "grad_norm": 0.3507402837276459, "learning_rate": 1.1018725354466377e-05, "loss": 0.4912, "step": 44142 }, { "epoch": 0.9362049585374648, "grad_norm": 0.432784765958786, "learning_rate": 1.101839359359842e-05, "loss": 0.5696, "step": 44143 }, { "epoch": 0.9362261669953977, "grad_norm": 0.4623684287071228, "learning_rate": 1.1018061831597814e-05, "loss": 0.455, "step": 44144 }, { "epoch": 0.9362473754533308, "grad_norm": 0.7199215888977051, "learning_rate": 1.1017730068464924e-05, "loss": 0.4155, "step": 44145 }, { "epoch": 0.9362685839112638, "grad_norm": 0.3621844947338104, "learning_rate": 1.1017398304200126e-05, "loss": 0.4913, "step": 44146 }, { "epoch": 0.9362897923691968, "grad_norm": 0.3823050856590271, "learning_rate": 1.1017066538803789e-05, "loss": 0.5021, "step": 44147 }, { "epoch": 0.9363110008271298, "grad_norm": 0.38140302896499634, "learning_rate": 1.1016734772276276e-05, "loss": 0.4678, "step": 44148 }, { "epoch": 0.9363322092850629, "grad_norm": 0.45737725496292114, "learning_rate": 1.1016403004617961e-05, "loss": 0.4462, "step": 44149 }, { "epoch": 0.9363534177429959, "grad_norm": 0.4557996392250061, "learning_rate": 1.1016071235829209e-05, "loss": 0.5101, "step": 44150 }, { "epoch": 0.9363746262009289, "grad_norm": 0.3857070207595825, "learning_rate": 1.1015739465910395e-05, "loss": 0.4935, "step": 44151 }, { "epoch": 0.936395834658862, "grad_norm": 0.458863228559494, "learning_rate": 1.1015407694861885e-05, "loss": 0.6125, "step": 44152 }, { "epoch": 0.936417043116795, "grad_norm": 0.43695583939552307, "learning_rate": 1.1015075922684042e-05, "loss": 0.5295, "step": 44153 }, { "epoch": 0.936438251574728, "grad_norm": 0.39444953203201294, "learning_rate": 1.1014744149377244e-05, "loss": 0.5056, "step": 44154 }, { "epoch": 0.936459460032661, "grad_norm": 0.3502855896949768, "learning_rate": 1.1014412374941857e-05, "loss": 0.4795, "step": 44155 }, { "epoch": 0.9364806684905941, "grad_norm": 1.2634979486465454, "learning_rate": 1.1014080599378247e-05, "loss": 0.46, "step": 44156 }, { "epoch": 0.936501876948527, "grad_norm": 0.40243399143218994, "learning_rate": 1.1013748822686787e-05, "loss": 0.5295, "step": 44157 }, { "epoch": 0.9365230854064601, "grad_norm": 0.37476569414138794, "learning_rate": 1.1013417044867847e-05, "loss": 0.5049, "step": 44158 }, { "epoch": 0.9365442938643931, "grad_norm": 0.46784159541130066, "learning_rate": 1.1013085265921786e-05, "loss": 0.4799, "step": 44159 }, { "epoch": 0.9365655023223262, "grad_norm": 0.3646899461746216, "learning_rate": 1.1012753485848987e-05, "loss": 0.5099, "step": 44160 }, { "epoch": 0.9365867107802591, "grad_norm": 0.37732967734336853, "learning_rate": 1.101242170464981e-05, "loss": 0.4939, "step": 44161 }, { "epoch": 0.9366079192381922, "grad_norm": 0.3921540379524231, "learning_rate": 1.1012089922324624e-05, "loss": 0.5237, "step": 44162 }, { "epoch": 0.9366291276961252, "grad_norm": 0.44961783289909363, "learning_rate": 1.1011758138873805e-05, "loss": 0.5835, "step": 44163 }, { "epoch": 0.9366503361540582, "grad_norm": 0.41576579213142395, "learning_rate": 1.1011426354297714e-05, "loss": 0.5404, "step": 44164 }, { "epoch": 0.9366715446119913, "grad_norm": 0.3463200032711029, "learning_rate": 1.1011094568596725e-05, "loss": 0.4232, "step": 44165 }, { "epoch": 0.9366927530699243, "grad_norm": 0.464933305978775, "learning_rate": 1.1010762781771204e-05, "loss": 0.5306, "step": 44166 }, { "epoch": 0.9367139615278574, "grad_norm": 0.36216607689857483, "learning_rate": 1.1010430993821522e-05, "loss": 0.5299, "step": 44167 }, { "epoch": 0.9367351699857903, "grad_norm": 0.3668772280216217, "learning_rate": 1.101009920474805e-05, "loss": 0.4154, "step": 44168 }, { "epoch": 0.9367563784437234, "grad_norm": 0.3471485376358032, "learning_rate": 1.1009767414551154e-05, "loss": 0.4355, "step": 44169 }, { "epoch": 0.9367775869016564, "grad_norm": 0.43930569291114807, "learning_rate": 1.1009435623231199e-05, "loss": 0.5097, "step": 44170 }, { "epoch": 0.9367987953595894, "grad_norm": 0.3434154689311981, "learning_rate": 1.1009103830788564e-05, "loss": 0.4594, "step": 44171 }, { "epoch": 0.9368200038175224, "grad_norm": 0.4271504878997803, "learning_rate": 1.1008772037223607e-05, "loss": 0.4921, "step": 44172 }, { "epoch": 0.9368412122754555, "grad_norm": 0.3831809163093567, "learning_rate": 1.1008440242536704e-05, "loss": 0.4956, "step": 44173 }, { "epoch": 0.9368624207333884, "grad_norm": 0.3634738624095917, "learning_rate": 1.1008108446728225e-05, "loss": 0.4929, "step": 44174 }, { "epoch": 0.9368836291913215, "grad_norm": 0.4781172275543213, "learning_rate": 1.1007776649798538e-05, "loss": 0.4244, "step": 44175 }, { "epoch": 0.9369048376492545, "grad_norm": 0.3851499855518341, "learning_rate": 1.1007444851748006e-05, "loss": 0.5873, "step": 44176 }, { "epoch": 0.9369260461071875, "grad_norm": 0.3256242275238037, "learning_rate": 1.1007113052577006e-05, "loss": 0.4375, "step": 44177 }, { "epoch": 0.9369472545651206, "grad_norm": 0.4347108006477356, "learning_rate": 1.1006781252285901e-05, "loss": 0.5101, "step": 44178 }, { "epoch": 0.9369684630230536, "grad_norm": 0.3420340418815613, "learning_rate": 1.1006449450875063e-05, "loss": 0.4648, "step": 44179 }, { "epoch": 0.9369896714809867, "grad_norm": 0.3915742337703705, "learning_rate": 1.100611764834486e-05, "loss": 0.4848, "step": 44180 }, { "epoch": 0.9370108799389196, "grad_norm": 0.3587561249732971, "learning_rate": 1.1005785844695664e-05, "loss": 0.4239, "step": 44181 }, { "epoch": 0.9370320883968527, "grad_norm": 0.4480483829975128, "learning_rate": 1.1005454039927841e-05, "loss": 0.5379, "step": 44182 }, { "epoch": 0.9370532968547857, "grad_norm": 0.4090994894504547, "learning_rate": 1.1005122234041764e-05, "loss": 0.4999, "step": 44183 }, { "epoch": 0.9370745053127187, "grad_norm": 0.49516549706459045, "learning_rate": 1.1004790427037792e-05, "loss": 0.5179, "step": 44184 }, { "epoch": 0.9370957137706517, "grad_norm": 0.35265040397644043, "learning_rate": 1.1004458618916303e-05, "loss": 0.4829, "step": 44185 }, { "epoch": 0.9371169222285848, "grad_norm": 0.41363704204559326, "learning_rate": 1.1004126809677666e-05, "loss": 0.4834, "step": 44186 }, { "epoch": 0.9371381306865177, "grad_norm": 0.38892728090286255, "learning_rate": 1.1003794999322246e-05, "loss": 0.4696, "step": 44187 }, { "epoch": 0.9371593391444508, "grad_norm": 0.37067171931266785, "learning_rate": 1.1003463187850416e-05, "loss": 0.5034, "step": 44188 }, { "epoch": 0.9371805476023838, "grad_norm": 0.39768552780151367, "learning_rate": 1.1003131375262543e-05, "loss": 0.554, "step": 44189 }, { "epoch": 0.9372017560603169, "grad_norm": 0.3789522349834442, "learning_rate": 1.1002799561558995e-05, "loss": 0.5877, "step": 44190 }, { "epoch": 0.9372229645182498, "grad_norm": 0.5750038027763367, "learning_rate": 1.100246774674014e-05, "loss": 0.4952, "step": 44191 }, { "epoch": 0.9372441729761829, "grad_norm": 0.47392675280570984, "learning_rate": 1.1002135930806352e-05, "loss": 0.4925, "step": 44192 }, { "epoch": 0.937265381434116, "grad_norm": 0.39980560541152954, "learning_rate": 1.1001804113757995e-05, "loss": 0.4993, "step": 44193 }, { "epoch": 0.9372865898920489, "grad_norm": 0.362211138010025, "learning_rate": 1.1001472295595442e-05, "loss": 0.5261, "step": 44194 }, { "epoch": 0.937307798349982, "grad_norm": 0.3808482885360718, "learning_rate": 1.1001140476319059e-05, "loss": 0.5257, "step": 44195 }, { "epoch": 0.937329006807915, "grad_norm": 0.35193052887916565, "learning_rate": 1.1000808655929216e-05, "loss": 0.4417, "step": 44196 }, { "epoch": 0.937350215265848, "grad_norm": 0.40108722448349, "learning_rate": 1.1000476834426283e-05, "loss": 0.4492, "step": 44197 }, { "epoch": 0.937371423723781, "grad_norm": 0.34622451663017273, "learning_rate": 1.1000145011810627e-05, "loss": 0.5055, "step": 44198 }, { "epoch": 0.9373926321817141, "grad_norm": 0.3634641170501709, "learning_rate": 1.099981318808262e-05, "loss": 0.541, "step": 44199 }, { "epoch": 0.9374138406396471, "grad_norm": 0.33435529470443726, "learning_rate": 1.099948136324263e-05, "loss": 0.4419, "step": 44200 }, { "epoch": 0.9374350490975801, "grad_norm": 0.6195215582847595, "learning_rate": 1.0999149537291025e-05, "loss": 0.4851, "step": 44201 }, { "epoch": 0.9374562575555131, "grad_norm": 0.3420272469520569, "learning_rate": 1.0998817710228175e-05, "loss": 0.4543, "step": 44202 }, { "epoch": 0.9374774660134462, "grad_norm": 0.3859095275402069, "learning_rate": 1.0998485882054448e-05, "loss": 0.5304, "step": 44203 }, { "epoch": 0.9374986744713791, "grad_norm": 0.34770873188972473, "learning_rate": 1.0998154052770212e-05, "loss": 0.4231, "step": 44204 }, { "epoch": 0.9375198829293122, "grad_norm": 0.4242163598537445, "learning_rate": 1.099782222237584e-05, "loss": 0.4513, "step": 44205 }, { "epoch": 0.9375410913872453, "grad_norm": 0.3570377826690674, "learning_rate": 1.0997490390871699e-05, "loss": 0.3737, "step": 44206 }, { "epoch": 0.9375622998451782, "grad_norm": 0.38317784667015076, "learning_rate": 1.0997158558258157e-05, "loss": 0.4956, "step": 44207 }, { "epoch": 0.9375835083031113, "grad_norm": 0.38307833671569824, "learning_rate": 1.0996826724535583e-05, "loss": 0.5024, "step": 44208 }, { "epoch": 0.9376047167610443, "grad_norm": 0.3522242605686188, "learning_rate": 1.0996494889704347e-05, "loss": 0.4867, "step": 44209 }, { "epoch": 0.9376259252189774, "grad_norm": 0.3832967281341553, "learning_rate": 1.0996163053764819e-05, "loss": 0.505, "step": 44210 }, { "epoch": 0.9376471336769103, "grad_norm": 0.6789393424987793, "learning_rate": 1.0995831216717368e-05, "loss": 0.4799, "step": 44211 }, { "epoch": 0.9376683421348434, "grad_norm": 0.3900819420814514, "learning_rate": 1.0995499378562362e-05, "loss": 0.4508, "step": 44212 }, { "epoch": 0.9376895505927764, "grad_norm": 0.402415007352829, "learning_rate": 1.0995167539300166e-05, "loss": 0.4489, "step": 44213 }, { "epoch": 0.9377107590507094, "grad_norm": 0.3852277100086212, "learning_rate": 1.0994835698931158e-05, "loss": 0.5248, "step": 44214 }, { "epoch": 0.9377319675086424, "grad_norm": 0.49688300490379333, "learning_rate": 1.09945038574557e-05, "loss": 0.4946, "step": 44215 }, { "epoch": 0.9377531759665755, "grad_norm": 0.3854629397392273, "learning_rate": 1.0994172014874167e-05, "loss": 0.4515, "step": 44216 }, { "epoch": 0.9377743844245084, "grad_norm": 0.3773745000362396, "learning_rate": 1.099384017118692e-05, "loss": 0.544, "step": 44217 }, { "epoch": 0.9377955928824415, "grad_norm": 0.40636324882507324, "learning_rate": 1.0993508326394333e-05, "loss": 0.4221, "step": 44218 }, { "epoch": 0.9378168013403746, "grad_norm": 0.35389190912246704, "learning_rate": 1.099317648049678e-05, "loss": 0.4575, "step": 44219 }, { "epoch": 0.9378380097983076, "grad_norm": 0.32227784395217896, "learning_rate": 1.0992844633494621e-05, "loss": 0.4667, "step": 44220 }, { "epoch": 0.9378592182562406, "grad_norm": 0.40318888425827026, "learning_rate": 1.0992512785388227e-05, "loss": 0.519, "step": 44221 }, { "epoch": 0.9378804267141736, "grad_norm": 0.3419841229915619, "learning_rate": 1.0992180936177972e-05, "loss": 0.4636, "step": 44222 }, { "epoch": 0.9379016351721067, "grad_norm": 0.34624963998794556, "learning_rate": 1.0991849085864219e-05, "loss": 0.5149, "step": 44223 }, { "epoch": 0.9379228436300396, "grad_norm": 0.31535452604293823, "learning_rate": 1.0991517234447342e-05, "loss": 0.4777, "step": 44224 }, { "epoch": 0.9379440520879727, "grad_norm": 0.37696829438209534, "learning_rate": 1.099118538192771e-05, "loss": 0.4694, "step": 44225 }, { "epoch": 0.9379652605459057, "grad_norm": 0.38191336393356323, "learning_rate": 1.0990853528305687e-05, "loss": 0.4742, "step": 44226 }, { "epoch": 0.9379864690038388, "grad_norm": 0.32349660992622375, "learning_rate": 1.0990521673581647e-05, "loss": 0.3733, "step": 44227 }, { "epoch": 0.9380076774617717, "grad_norm": 0.4273186922073364, "learning_rate": 1.0990189817755957e-05, "loss": 0.5562, "step": 44228 }, { "epoch": 0.9380288859197048, "grad_norm": 0.35916668176651, "learning_rate": 1.0989857960828985e-05, "loss": 0.3569, "step": 44229 }, { "epoch": 0.9380500943776378, "grad_norm": 0.38348403573036194, "learning_rate": 1.0989526102801102e-05, "loss": 0.5349, "step": 44230 }, { "epoch": 0.9380713028355708, "grad_norm": 0.32008039951324463, "learning_rate": 1.0989194243672678e-05, "loss": 0.4938, "step": 44231 }, { "epoch": 0.9380925112935038, "grad_norm": 0.37963220477104187, "learning_rate": 1.098886238344408e-05, "loss": 0.4521, "step": 44232 }, { "epoch": 0.9381137197514369, "grad_norm": 0.44598907232284546, "learning_rate": 1.098853052211568e-05, "loss": 0.538, "step": 44233 }, { "epoch": 0.93813492820937, "grad_norm": 0.3343598544597626, "learning_rate": 1.0988198659687844e-05, "loss": 0.4833, "step": 44234 }, { "epoch": 0.9381561366673029, "grad_norm": 0.44179627299308777, "learning_rate": 1.098786679616094e-05, "loss": 0.55, "step": 44235 }, { "epoch": 0.938177345125236, "grad_norm": 0.46528878808021545, "learning_rate": 1.098753493153534e-05, "loss": 0.4474, "step": 44236 }, { "epoch": 0.938198553583169, "grad_norm": 0.37760335206985474, "learning_rate": 1.0987203065811414e-05, "loss": 0.4934, "step": 44237 }, { "epoch": 0.938219762041102, "grad_norm": 0.4418346881866455, "learning_rate": 1.0986871198989526e-05, "loss": 0.5038, "step": 44238 }, { "epoch": 0.938240970499035, "grad_norm": 0.3783697783946991, "learning_rate": 1.0986539331070052e-05, "loss": 0.5118, "step": 44239 }, { "epoch": 0.9382621789569681, "grad_norm": 0.45446622371673584, "learning_rate": 1.0986207462053355e-05, "loss": 0.5004, "step": 44240 }, { "epoch": 0.938283387414901, "grad_norm": 0.363771915435791, "learning_rate": 1.0985875591939808e-05, "loss": 0.4989, "step": 44241 }, { "epoch": 0.9383045958728341, "grad_norm": 0.40897226333618164, "learning_rate": 1.0985543720729778e-05, "loss": 0.5, "step": 44242 }, { "epoch": 0.9383258043307671, "grad_norm": 0.349018394947052, "learning_rate": 1.0985211848423636e-05, "loss": 0.4187, "step": 44243 }, { "epoch": 0.9383470127887001, "grad_norm": 0.4660102427005768, "learning_rate": 1.0984879975021748e-05, "loss": 0.534, "step": 44244 }, { "epoch": 0.9383682212466331, "grad_norm": 0.9910506010055542, "learning_rate": 1.0984548100524485e-05, "loss": 0.4962, "step": 44245 }, { "epoch": 0.9383894297045662, "grad_norm": 0.34846973419189453, "learning_rate": 1.0984216224932216e-05, "loss": 0.4989, "step": 44246 }, { "epoch": 0.9384106381624993, "grad_norm": 0.36016371846199036, "learning_rate": 1.0983884348245312e-05, "loss": 0.4748, "step": 44247 }, { "epoch": 0.9384318466204322, "grad_norm": 0.41972434520721436, "learning_rate": 1.0983552470464143e-05, "loss": 0.4566, "step": 44248 }, { "epoch": 0.9384530550783653, "grad_norm": 0.389141321182251, "learning_rate": 1.0983220591589067e-05, "loss": 0.4531, "step": 44249 }, { "epoch": 0.9384742635362983, "grad_norm": 0.3666556477546692, "learning_rate": 1.098288871162047e-05, "loss": 0.465, "step": 44250 }, { "epoch": 0.9384954719942313, "grad_norm": 0.3142973482608795, "learning_rate": 1.098255683055871e-05, "loss": 0.4446, "step": 44251 }, { "epoch": 0.9385166804521643, "grad_norm": 0.37323838472366333, "learning_rate": 1.0982224948404157e-05, "loss": 0.5694, "step": 44252 }, { "epoch": 0.9385378889100974, "grad_norm": 0.3517921566963196, "learning_rate": 1.0981893065157186e-05, "loss": 0.4201, "step": 44253 }, { "epoch": 0.9385590973680303, "grad_norm": 0.3925534188747406, "learning_rate": 1.098156118081816e-05, "loss": 0.4552, "step": 44254 }, { "epoch": 0.9385803058259634, "grad_norm": 0.4470316469669342, "learning_rate": 1.0981229295387445e-05, "loss": 0.4485, "step": 44255 }, { "epoch": 0.9386015142838964, "grad_norm": 0.3829135298728943, "learning_rate": 1.0980897408865421e-05, "loss": 0.5045, "step": 44256 }, { "epoch": 0.9386227227418295, "grad_norm": 0.39470428228378296, "learning_rate": 1.0980565521252453e-05, "loss": 0.4822, "step": 44257 }, { "epoch": 0.9386439311997624, "grad_norm": 0.3916729986667633, "learning_rate": 1.0980233632548905e-05, "loss": 0.5148, "step": 44258 }, { "epoch": 0.9386651396576955, "grad_norm": 0.365837037563324, "learning_rate": 1.097990174275515e-05, "loss": 0.4927, "step": 44259 }, { "epoch": 0.9386863481156286, "grad_norm": 0.3862192928791046, "learning_rate": 1.0979569851871556e-05, "loss": 0.5038, "step": 44260 }, { "epoch": 0.9387075565735615, "grad_norm": 0.7890688180923462, "learning_rate": 1.0979237959898496e-05, "loss": 0.4942, "step": 44261 }, { "epoch": 0.9387287650314946, "grad_norm": 0.39777952432632446, "learning_rate": 1.0978906066836335e-05, "loss": 0.4607, "step": 44262 }, { "epoch": 0.9387499734894276, "grad_norm": 0.34858548641204834, "learning_rate": 1.097857417268544e-05, "loss": 0.4707, "step": 44263 }, { "epoch": 0.9387711819473606, "grad_norm": 0.3790169358253479, "learning_rate": 1.0978242277446187e-05, "loss": 0.4519, "step": 44264 }, { "epoch": 0.9387923904052936, "grad_norm": 0.36536169052124023, "learning_rate": 1.0977910381118942e-05, "loss": 0.4488, "step": 44265 }, { "epoch": 0.9388135988632267, "grad_norm": 0.4086502492427826, "learning_rate": 1.097757848370407e-05, "loss": 0.5578, "step": 44266 }, { "epoch": 0.9388348073211596, "grad_norm": 0.36440378427505493, "learning_rate": 1.0977246585201946e-05, "loss": 0.4617, "step": 44267 }, { "epoch": 0.9388560157790927, "grad_norm": 0.3432876765727997, "learning_rate": 1.0976914685612936e-05, "loss": 0.4946, "step": 44268 }, { "epoch": 0.9388772242370257, "grad_norm": 0.39281463623046875, "learning_rate": 1.0976582784937409e-05, "loss": 0.4654, "step": 44269 }, { "epoch": 0.9388984326949588, "grad_norm": 0.5077027678489685, "learning_rate": 1.0976250883175737e-05, "loss": 0.5245, "step": 44270 }, { "epoch": 0.9389196411528917, "grad_norm": 0.3712138235569, "learning_rate": 1.0975918980328288e-05, "loss": 0.507, "step": 44271 }, { "epoch": 0.9389408496108248, "grad_norm": 0.40553590655326843, "learning_rate": 1.097558707639543e-05, "loss": 0.5177, "step": 44272 }, { "epoch": 0.9389620580687578, "grad_norm": 0.3689838945865631, "learning_rate": 1.097525517137753e-05, "loss": 0.5438, "step": 44273 }, { "epoch": 0.9389832665266908, "grad_norm": 0.5048683881759644, "learning_rate": 1.0974923265274961e-05, "loss": 0.5592, "step": 44274 }, { "epoch": 0.9390044749846239, "grad_norm": 0.35361579060554504, "learning_rate": 1.097459135808809e-05, "loss": 0.4155, "step": 44275 }, { "epoch": 0.9390256834425569, "grad_norm": 0.36265432834625244, "learning_rate": 1.0974259449817288e-05, "loss": 0.4746, "step": 44276 }, { "epoch": 0.93904689190049, "grad_norm": 0.42616933584213257, "learning_rate": 1.0973927540462923e-05, "loss": 0.5095, "step": 44277 }, { "epoch": 0.9390681003584229, "grad_norm": 0.36008498072624207, "learning_rate": 1.0973595630025367e-05, "loss": 0.5026, "step": 44278 }, { "epoch": 0.939089308816356, "grad_norm": 0.3596614897251129, "learning_rate": 1.0973263718504984e-05, "loss": 0.4519, "step": 44279 }, { "epoch": 0.939110517274289, "grad_norm": 0.3583967387676239, "learning_rate": 1.0972931805902145e-05, "loss": 0.5111, "step": 44280 }, { "epoch": 0.939131725732222, "grad_norm": 0.32893678545951843, "learning_rate": 1.0972599892217218e-05, "loss": 0.4543, "step": 44281 }, { "epoch": 0.939152934190155, "grad_norm": 0.36579546332359314, "learning_rate": 1.0972267977450578e-05, "loss": 0.5254, "step": 44282 }, { "epoch": 0.9391741426480881, "grad_norm": 0.3503456711769104, "learning_rate": 1.0971936061602586e-05, "loss": 0.4316, "step": 44283 }, { "epoch": 0.939195351106021, "grad_norm": 0.35137832164764404, "learning_rate": 1.0971604144673618e-05, "loss": 0.4916, "step": 44284 }, { "epoch": 0.9392165595639541, "grad_norm": 0.6019046306610107, "learning_rate": 1.0971272226664041e-05, "loss": 0.4858, "step": 44285 }, { "epoch": 0.9392377680218871, "grad_norm": 0.4921285808086395, "learning_rate": 1.097094030757422e-05, "loss": 0.4544, "step": 44286 }, { "epoch": 0.9392589764798202, "grad_norm": 0.33873793482780457, "learning_rate": 1.0970608387404532e-05, "loss": 0.4733, "step": 44287 }, { "epoch": 0.9392801849377532, "grad_norm": 0.3355018198490143, "learning_rate": 1.0970276466155336e-05, "loss": 0.4022, "step": 44288 }, { "epoch": 0.9393013933956862, "grad_norm": 0.331278532743454, "learning_rate": 1.096994454382701e-05, "loss": 0.4994, "step": 44289 }, { "epoch": 0.9393226018536193, "grad_norm": 0.37901899218559265, "learning_rate": 1.0969612620419921e-05, "loss": 0.4721, "step": 44290 }, { "epoch": 0.9393438103115522, "grad_norm": 0.3523944616317749, "learning_rate": 1.0969280695934437e-05, "loss": 0.5049, "step": 44291 }, { "epoch": 0.9393650187694853, "grad_norm": 0.4286632239818573, "learning_rate": 1.0968948770370927e-05, "loss": 0.5446, "step": 44292 }, { "epoch": 0.9393862272274183, "grad_norm": 0.42475438117980957, "learning_rate": 1.0968616843729762e-05, "loss": 0.5096, "step": 44293 }, { "epoch": 0.9394074356853513, "grad_norm": 0.3438423275947571, "learning_rate": 1.0968284916011307e-05, "loss": 0.5707, "step": 44294 }, { "epoch": 0.9394286441432843, "grad_norm": 0.47823214530944824, "learning_rate": 1.0967952987215934e-05, "loss": 0.5344, "step": 44295 }, { "epoch": 0.9394498526012174, "grad_norm": 0.3341521918773651, "learning_rate": 1.0967621057344013e-05, "loss": 0.4947, "step": 44296 }, { "epoch": 0.9394710610591503, "grad_norm": 0.49484187364578247, "learning_rate": 1.0967289126395912e-05, "loss": 0.5424, "step": 44297 }, { "epoch": 0.9394922695170834, "grad_norm": 0.45396026968955994, "learning_rate": 1.0966957194372004e-05, "loss": 0.4242, "step": 44298 }, { "epoch": 0.9395134779750164, "grad_norm": 0.41821274161338806, "learning_rate": 1.0966625261272652e-05, "loss": 0.501, "step": 44299 }, { "epoch": 0.9395346864329495, "grad_norm": 0.4461943507194519, "learning_rate": 1.0966293327098223e-05, "loss": 0.4511, "step": 44300 }, { "epoch": 0.9395558948908825, "grad_norm": 0.3808967173099518, "learning_rate": 1.0965961391849098e-05, "loss": 0.5177, "step": 44301 }, { "epoch": 0.9395771033488155, "grad_norm": 0.3945603668689728, "learning_rate": 1.0965629455525637e-05, "loss": 0.5137, "step": 44302 }, { "epoch": 0.9395983118067486, "grad_norm": 0.3634774386882782, "learning_rate": 1.096529751812821e-05, "loss": 0.5277, "step": 44303 }, { "epoch": 0.9396195202646815, "grad_norm": 0.3838120698928833, "learning_rate": 1.0964965579657189e-05, "loss": 0.4719, "step": 44304 }, { "epoch": 0.9396407287226146, "grad_norm": 0.3660964071750641, "learning_rate": 1.0964633640112941e-05, "loss": 0.4477, "step": 44305 }, { "epoch": 0.9396619371805476, "grad_norm": 0.3617507815361023, "learning_rate": 1.0964301699495836e-05, "loss": 0.5239, "step": 44306 }, { "epoch": 0.9396831456384807, "grad_norm": 0.39881762862205505, "learning_rate": 1.0963969757806243e-05, "loss": 0.5133, "step": 44307 }, { "epoch": 0.9397043540964136, "grad_norm": 0.37347152829170227, "learning_rate": 1.0963637815044532e-05, "loss": 0.5048, "step": 44308 }, { "epoch": 0.9397255625543467, "grad_norm": 0.357729434967041, "learning_rate": 1.096330587121107e-05, "loss": 0.497, "step": 44309 }, { "epoch": 0.9397467710122797, "grad_norm": 0.3514150381088257, "learning_rate": 1.0962973926306228e-05, "loss": 0.4788, "step": 44310 }, { "epoch": 0.9397679794702127, "grad_norm": 0.32142847776412964, "learning_rate": 1.0962641980330373e-05, "loss": 0.4862, "step": 44311 }, { "epoch": 0.9397891879281457, "grad_norm": 0.35056912899017334, "learning_rate": 1.0962310033283878e-05, "loss": 0.4792, "step": 44312 }, { "epoch": 0.9398103963860788, "grad_norm": 0.3122069835662842, "learning_rate": 1.096197808516711e-05, "loss": 0.4353, "step": 44313 }, { "epoch": 0.9398316048440117, "grad_norm": 0.33597099781036377, "learning_rate": 1.0961646135980438e-05, "loss": 0.4964, "step": 44314 }, { "epoch": 0.9398528133019448, "grad_norm": 0.4168040156364441, "learning_rate": 1.0961314185724231e-05, "loss": 0.4871, "step": 44315 }, { "epoch": 0.9398740217598779, "grad_norm": 0.3703922927379608, "learning_rate": 1.0960982234398863e-05, "loss": 0.4653, "step": 44316 }, { "epoch": 0.9398952302178109, "grad_norm": 0.3528063893318176, "learning_rate": 1.0960650282004692e-05, "loss": 0.5612, "step": 44317 }, { "epoch": 0.9399164386757439, "grad_norm": 0.3406692147254944, "learning_rate": 1.09603183285421e-05, "loss": 0.4546, "step": 44318 }, { "epoch": 0.9399376471336769, "grad_norm": 0.4083200991153717, "learning_rate": 1.0959986374011447e-05, "loss": 0.4486, "step": 44319 }, { "epoch": 0.93995885559161, "grad_norm": 0.40020883083343506, "learning_rate": 1.0959654418413104e-05, "loss": 0.4714, "step": 44320 }, { "epoch": 0.9399800640495429, "grad_norm": 0.49934810400009155, "learning_rate": 1.0959322461747445e-05, "loss": 0.5119, "step": 44321 }, { "epoch": 0.940001272507476, "grad_norm": 0.3994489014148712, "learning_rate": 1.0958990504014837e-05, "loss": 0.5024, "step": 44322 }, { "epoch": 0.940022480965409, "grad_norm": 0.3780098259449005, "learning_rate": 1.0958658545215644e-05, "loss": 0.5106, "step": 44323 }, { "epoch": 0.940043689423342, "grad_norm": 0.38361796736717224, "learning_rate": 1.0958326585350242e-05, "loss": 0.4536, "step": 44324 }, { "epoch": 0.940064897881275, "grad_norm": 0.39967724680900574, "learning_rate": 1.0957994624418997e-05, "loss": 0.4111, "step": 44325 }, { "epoch": 0.9400861063392081, "grad_norm": 0.4301201105117798, "learning_rate": 1.0957662662422277e-05, "loss": 0.4319, "step": 44326 }, { "epoch": 0.940107314797141, "grad_norm": 0.6008175015449524, "learning_rate": 1.0957330699360455e-05, "loss": 0.5454, "step": 44327 }, { "epoch": 0.9401285232550741, "grad_norm": 0.37091657519340515, "learning_rate": 1.0956998735233897e-05, "loss": 0.4465, "step": 44328 }, { "epoch": 0.9401497317130072, "grad_norm": 0.3553735315799713, "learning_rate": 1.0956666770042973e-05, "loss": 0.5065, "step": 44329 }, { "epoch": 0.9401709401709402, "grad_norm": 0.5442826151847839, "learning_rate": 1.0956334803788056e-05, "loss": 0.5418, "step": 44330 }, { "epoch": 0.9401921486288732, "grad_norm": 0.33887651562690735, "learning_rate": 1.0956002836469508e-05, "loss": 0.5381, "step": 44331 }, { "epoch": 0.9402133570868062, "grad_norm": 0.3983829617500305, "learning_rate": 1.0955670868087702e-05, "loss": 0.4834, "step": 44332 }, { "epoch": 0.9402345655447393, "grad_norm": 0.3972316086292267, "learning_rate": 1.095533889864301e-05, "loss": 0.4317, "step": 44333 }, { "epoch": 0.9402557740026722, "grad_norm": 0.3407861888408661, "learning_rate": 1.0955006928135794e-05, "loss": 0.4764, "step": 44334 }, { "epoch": 0.9402769824606053, "grad_norm": 0.3415558636188507, "learning_rate": 1.0954674956566431e-05, "loss": 0.5226, "step": 44335 }, { "epoch": 0.9402981909185383, "grad_norm": 0.38100677728652954, "learning_rate": 1.0954342983935287e-05, "loss": 0.4585, "step": 44336 }, { "epoch": 0.9403193993764714, "grad_norm": 0.36837539076805115, "learning_rate": 1.095401101024273e-05, "loss": 0.4855, "step": 44337 }, { "epoch": 0.9403406078344043, "grad_norm": 0.3325255811214447, "learning_rate": 1.0953679035489131e-05, "loss": 0.4139, "step": 44338 }, { "epoch": 0.9403618162923374, "grad_norm": 0.41112327575683594, "learning_rate": 1.0953347059674858e-05, "loss": 0.3965, "step": 44339 }, { "epoch": 0.9403830247502704, "grad_norm": 0.3321554660797119, "learning_rate": 1.0953015082800279e-05, "loss": 0.4327, "step": 44340 }, { "epoch": 0.9404042332082034, "grad_norm": 0.42330238223075867, "learning_rate": 1.0952683104865767e-05, "loss": 0.4912, "step": 44341 }, { "epoch": 0.9404254416661365, "grad_norm": 0.4070887565612793, "learning_rate": 1.0952351125871686e-05, "loss": 0.4361, "step": 44342 }, { "epoch": 0.9404466501240695, "grad_norm": 0.37139907479286194, "learning_rate": 1.095201914581841e-05, "loss": 0.4942, "step": 44343 }, { "epoch": 0.9404678585820025, "grad_norm": 0.43893754482269287, "learning_rate": 1.095168716470631e-05, "loss": 0.4693, "step": 44344 }, { "epoch": 0.9404890670399355, "grad_norm": 0.34169527888298035, "learning_rate": 1.0951355182535746e-05, "loss": 0.4452, "step": 44345 }, { "epoch": 0.9405102754978686, "grad_norm": 0.4251066744327545, "learning_rate": 1.0951023199307098e-05, "loss": 0.5157, "step": 44346 }, { "epoch": 0.9405314839558016, "grad_norm": 0.37804561853408813, "learning_rate": 1.095069121502073e-05, "loss": 0.4694, "step": 44347 }, { "epoch": 0.9405526924137346, "grad_norm": 0.4091428518295288, "learning_rate": 1.0950359229677008e-05, "loss": 0.4271, "step": 44348 }, { "epoch": 0.9405739008716676, "grad_norm": 0.38116660714149475, "learning_rate": 1.0950027243276306e-05, "loss": 0.4442, "step": 44349 }, { "epoch": 0.9405951093296007, "grad_norm": 0.3909192383289337, "learning_rate": 1.0949695255818994e-05, "loss": 0.4987, "step": 44350 }, { "epoch": 0.9406163177875336, "grad_norm": 0.34027034044265747, "learning_rate": 1.0949363267305437e-05, "loss": 0.4893, "step": 44351 }, { "epoch": 0.9406375262454667, "grad_norm": 0.33815956115722656, "learning_rate": 1.0949031277736006e-05, "loss": 0.4692, "step": 44352 }, { "epoch": 0.9406587347033997, "grad_norm": 0.4329511225223541, "learning_rate": 1.0948699287111073e-05, "loss": 0.6016, "step": 44353 }, { "epoch": 0.9406799431613327, "grad_norm": 0.3850939869880676, "learning_rate": 1.0948367295431004e-05, "loss": 0.524, "step": 44354 }, { "epoch": 0.9407011516192658, "grad_norm": 0.38368478417396545, "learning_rate": 1.094803530269617e-05, "loss": 0.5541, "step": 44355 }, { "epoch": 0.9407223600771988, "grad_norm": 0.3767020106315613, "learning_rate": 1.0947703308906936e-05, "loss": 0.4979, "step": 44356 }, { "epoch": 0.9407435685351319, "grad_norm": 0.3364461064338684, "learning_rate": 1.094737131406368e-05, "loss": 0.437, "step": 44357 }, { "epoch": 0.9407647769930648, "grad_norm": 0.37976834177970886, "learning_rate": 1.0947039318166761e-05, "loss": 0.5114, "step": 44358 }, { "epoch": 0.9407859854509979, "grad_norm": 0.35298222303390503, "learning_rate": 1.0946707321216553e-05, "loss": 0.4401, "step": 44359 }, { "epoch": 0.9408071939089309, "grad_norm": 0.32851290702819824, "learning_rate": 1.0946375323213428e-05, "loss": 0.4374, "step": 44360 }, { "epoch": 0.9408284023668639, "grad_norm": 0.3762351870536804, "learning_rate": 1.0946043324157753e-05, "loss": 0.4901, "step": 44361 }, { "epoch": 0.9408496108247969, "grad_norm": 0.32975801825523376, "learning_rate": 1.0945711324049895e-05, "loss": 0.4072, "step": 44362 }, { "epoch": 0.94087081928273, "grad_norm": 0.3492712080478668, "learning_rate": 1.0945379322890226e-05, "loss": 0.4422, "step": 44363 }, { "epoch": 0.9408920277406629, "grad_norm": 0.3877853453159332, "learning_rate": 1.0945047320679115e-05, "loss": 0.4302, "step": 44364 }, { "epoch": 0.940913236198596, "grad_norm": 0.41639694571495056, "learning_rate": 1.0944715317416925e-05, "loss": 0.5566, "step": 44365 }, { "epoch": 0.940934444656529, "grad_norm": 0.39857885241508484, "learning_rate": 1.0944383313104038e-05, "loss": 0.4692, "step": 44366 }, { "epoch": 0.940955653114462, "grad_norm": 0.4551631808280945, "learning_rate": 1.0944051307740815e-05, "loss": 0.5363, "step": 44367 }, { "epoch": 0.940976861572395, "grad_norm": 0.36309435963630676, "learning_rate": 1.0943719301327623e-05, "loss": 0.5735, "step": 44368 }, { "epoch": 0.9409980700303281, "grad_norm": 0.33767759799957275, "learning_rate": 1.0943387293864837e-05, "loss": 0.4115, "step": 44369 }, { "epoch": 0.9410192784882612, "grad_norm": 0.3533930480480194, "learning_rate": 1.0943055285352822e-05, "loss": 0.451, "step": 44370 }, { "epoch": 0.9410404869461941, "grad_norm": 0.3823292851448059, "learning_rate": 1.0942723275791951e-05, "loss": 0.5047, "step": 44371 }, { "epoch": 0.9410616954041272, "grad_norm": 0.3655814826488495, "learning_rate": 1.0942391265182591e-05, "loss": 0.4755, "step": 44372 }, { "epoch": 0.9410829038620602, "grad_norm": 0.35151830315589905, "learning_rate": 1.094205925352511e-05, "loss": 0.5026, "step": 44373 }, { "epoch": 0.9411041123199932, "grad_norm": 0.40477657318115234, "learning_rate": 1.094172724081988e-05, "loss": 0.4917, "step": 44374 }, { "epoch": 0.9411253207779262, "grad_norm": 0.4176810085773468, "learning_rate": 1.0941395227067271e-05, "loss": 0.5236, "step": 44375 }, { "epoch": 0.9411465292358593, "grad_norm": 0.37638258934020996, "learning_rate": 1.0941063212267649e-05, "loss": 0.4921, "step": 44376 }, { "epoch": 0.9411677376937923, "grad_norm": 0.407638281583786, "learning_rate": 1.0940731196421384e-05, "loss": 0.4872, "step": 44377 }, { "epoch": 0.9411889461517253, "grad_norm": 0.5486547350883484, "learning_rate": 1.0940399179528847e-05, "loss": 0.5057, "step": 44378 }, { "epoch": 0.9412101546096583, "grad_norm": 0.3496498763561249, "learning_rate": 1.0940067161590403e-05, "loss": 0.5335, "step": 44379 }, { "epoch": 0.9412313630675914, "grad_norm": 0.3590954542160034, "learning_rate": 1.0939735142606428e-05, "loss": 0.5267, "step": 44380 }, { "epoch": 0.9412525715255243, "grad_norm": 0.3767118453979492, "learning_rate": 1.0939403122577288e-05, "loss": 0.5096, "step": 44381 }, { "epoch": 0.9412737799834574, "grad_norm": 0.3323628604412079, "learning_rate": 1.0939071101503349e-05, "loss": 0.3927, "step": 44382 }, { "epoch": 0.9412949884413905, "grad_norm": 0.3591305613517761, "learning_rate": 1.0938739079384987e-05, "loss": 0.3854, "step": 44383 }, { "epoch": 0.9413161968993234, "grad_norm": 0.3352447748184204, "learning_rate": 1.0938407056222563e-05, "loss": 0.4943, "step": 44384 }, { "epoch": 0.9413374053572565, "grad_norm": 0.34900420904159546, "learning_rate": 1.0938075032016452e-05, "loss": 0.4513, "step": 44385 }, { "epoch": 0.9413586138151895, "grad_norm": 0.359413743019104, "learning_rate": 1.0937743006767027e-05, "loss": 0.466, "step": 44386 }, { "epoch": 0.9413798222731226, "grad_norm": 0.49469664692878723, "learning_rate": 1.0937410980474647e-05, "loss": 0.504, "step": 44387 }, { "epoch": 0.9414010307310555, "grad_norm": 0.37289994955062866, "learning_rate": 1.0937078953139691e-05, "loss": 0.4577, "step": 44388 }, { "epoch": 0.9414222391889886, "grad_norm": 0.35811010003089905, "learning_rate": 1.0936746924762522e-05, "loss": 0.5183, "step": 44389 }, { "epoch": 0.9414434476469216, "grad_norm": 0.39578381180763245, "learning_rate": 1.093641489534351e-05, "loss": 0.5103, "step": 44390 }, { "epoch": 0.9414646561048546, "grad_norm": 0.4312814772129059, "learning_rate": 1.0936082864883024e-05, "loss": 0.4784, "step": 44391 }, { "epoch": 0.9414858645627876, "grad_norm": 0.3945213854312897, "learning_rate": 1.0935750833381437e-05, "loss": 0.5056, "step": 44392 }, { "epoch": 0.9415070730207207, "grad_norm": 0.38482043147087097, "learning_rate": 1.0935418800839117e-05, "loss": 0.4764, "step": 44393 }, { "epoch": 0.9415282814786536, "grad_norm": 0.39879125356674194, "learning_rate": 1.0935086767256431e-05, "loss": 0.552, "step": 44394 }, { "epoch": 0.9415494899365867, "grad_norm": 0.3892490863800049, "learning_rate": 1.0934754732633753e-05, "loss": 0.4584, "step": 44395 }, { "epoch": 0.9415706983945198, "grad_norm": 0.36220189929008484, "learning_rate": 1.0934422696971443e-05, "loss": 0.5118, "step": 44396 }, { "epoch": 0.9415919068524528, "grad_norm": 0.36786532402038574, "learning_rate": 1.0934090660269879e-05, "loss": 0.5413, "step": 44397 }, { "epoch": 0.9416131153103858, "grad_norm": 0.3999477028846741, "learning_rate": 1.093375862252943e-05, "loss": 0.4801, "step": 44398 }, { "epoch": 0.9416343237683188, "grad_norm": 0.37282848358154297, "learning_rate": 1.0933426583750457e-05, "loss": 0.4282, "step": 44399 }, { "epoch": 0.9416555322262519, "grad_norm": 0.4940269887447357, "learning_rate": 1.093309454393334e-05, "loss": 0.5006, "step": 44400 }, { "epoch": 0.9416767406841848, "grad_norm": 0.37134119868278503, "learning_rate": 1.093276250307844e-05, "loss": 0.4561, "step": 44401 }, { "epoch": 0.9416979491421179, "grad_norm": 0.4095318019390106, "learning_rate": 1.0932430461186132e-05, "loss": 0.4392, "step": 44402 }, { "epoch": 0.9417191576000509, "grad_norm": 0.36758220195770264, "learning_rate": 1.0932098418256783e-05, "loss": 0.4507, "step": 44403 }, { "epoch": 0.941740366057984, "grad_norm": 0.3353547751903534, "learning_rate": 1.0931766374290762e-05, "loss": 0.424, "step": 44404 }, { "epoch": 0.9417615745159169, "grad_norm": 0.3920659124851227, "learning_rate": 1.0931434329288437e-05, "loss": 0.4292, "step": 44405 }, { "epoch": 0.94178278297385, "grad_norm": 0.37384411692619324, "learning_rate": 1.093110228325018e-05, "loss": 0.5012, "step": 44406 }, { "epoch": 0.941803991431783, "grad_norm": 0.39110809564590454, "learning_rate": 1.0930770236176359e-05, "loss": 0.515, "step": 44407 }, { "epoch": 0.941825199889716, "grad_norm": 0.36396273970603943, "learning_rate": 1.0930438188067345e-05, "loss": 0.4651, "step": 44408 }, { "epoch": 0.941846408347649, "grad_norm": 0.4475693702697754, "learning_rate": 1.0930106138923502e-05, "loss": 0.4951, "step": 44409 }, { "epoch": 0.9418676168055821, "grad_norm": 0.3144921660423279, "learning_rate": 1.0929774088745204e-05, "loss": 0.43, "step": 44410 }, { "epoch": 0.9418888252635151, "grad_norm": 0.35277262330055237, "learning_rate": 1.0929442037532822e-05, "loss": 0.5386, "step": 44411 }, { "epoch": 0.9419100337214481, "grad_norm": 0.39335954189300537, "learning_rate": 1.0929109985286723e-05, "loss": 0.4606, "step": 44412 }, { "epoch": 0.9419312421793812, "grad_norm": 0.41429954767227173, "learning_rate": 1.0928777932007272e-05, "loss": 0.5145, "step": 44413 }, { "epoch": 0.9419524506373141, "grad_norm": 0.34363508224487305, "learning_rate": 1.0928445877694847e-05, "loss": 0.4202, "step": 44414 }, { "epoch": 0.9419736590952472, "grad_norm": 0.38588929176330566, "learning_rate": 1.0928113822349808e-05, "loss": 0.4385, "step": 44415 }, { "epoch": 0.9419948675531802, "grad_norm": 0.4112987220287323, "learning_rate": 1.092778176597253e-05, "loss": 0.5169, "step": 44416 }, { "epoch": 0.9420160760111133, "grad_norm": 0.37242478132247925, "learning_rate": 1.0927449708563383e-05, "loss": 0.5207, "step": 44417 }, { "epoch": 0.9420372844690462, "grad_norm": 0.41390565037727356, "learning_rate": 1.0927117650122734e-05, "loss": 0.5096, "step": 44418 }, { "epoch": 0.9420584929269793, "grad_norm": 0.35773515701293945, "learning_rate": 1.0926785590650952e-05, "loss": 0.5223, "step": 44419 }, { "epoch": 0.9420797013849123, "grad_norm": 0.3626667857170105, "learning_rate": 1.0926453530148409e-05, "loss": 0.4155, "step": 44420 }, { "epoch": 0.9421009098428453, "grad_norm": 0.3518649935722351, "learning_rate": 1.092612146861547e-05, "loss": 0.4685, "step": 44421 }, { "epoch": 0.9421221183007783, "grad_norm": 0.35552048683166504, "learning_rate": 1.0925789406052505e-05, "loss": 0.4425, "step": 44422 }, { "epoch": 0.9421433267587114, "grad_norm": 0.3587633967399597, "learning_rate": 1.092545734245989e-05, "loss": 0.479, "step": 44423 }, { "epoch": 0.9421645352166444, "grad_norm": 0.3560786545276642, "learning_rate": 1.0925125277837984e-05, "loss": 0.4785, "step": 44424 }, { "epoch": 0.9421857436745774, "grad_norm": 0.4055764675140381, "learning_rate": 1.0924793212187168e-05, "loss": 0.4936, "step": 44425 }, { "epoch": 0.9422069521325105, "grad_norm": 0.39226508140563965, "learning_rate": 1.0924461145507802e-05, "loss": 0.546, "step": 44426 }, { "epoch": 0.9422281605904435, "grad_norm": 0.3265202045440674, "learning_rate": 1.0924129077800256e-05, "loss": 0.4792, "step": 44427 }, { "epoch": 0.9422493690483765, "grad_norm": 0.36164093017578125, "learning_rate": 1.0923797009064905e-05, "loss": 0.4807, "step": 44428 }, { "epoch": 0.9422705775063095, "grad_norm": 0.33033090829849243, "learning_rate": 1.0923464939302113e-05, "loss": 0.4481, "step": 44429 }, { "epoch": 0.9422917859642426, "grad_norm": 0.42328542470932007, "learning_rate": 1.0923132868512252e-05, "loss": 0.5437, "step": 44430 }, { "epoch": 0.9423129944221755, "grad_norm": 0.37196990847587585, "learning_rate": 1.0922800796695692e-05, "loss": 0.4858, "step": 44431 }, { "epoch": 0.9423342028801086, "grad_norm": 0.35379230976104736, "learning_rate": 1.09224687238528e-05, "loss": 0.4969, "step": 44432 }, { "epoch": 0.9423554113380416, "grad_norm": 0.39647427201271057, "learning_rate": 1.0922136649983946e-05, "loss": 0.5193, "step": 44433 }, { "epoch": 0.9423766197959746, "grad_norm": 0.3550083339214325, "learning_rate": 1.0921804575089501e-05, "loss": 0.5177, "step": 44434 }, { "epoch": 0.9423978282539076, "grad_norm": 0.37799587845802307, "learning_rate": 1.092147249916983e-05, "loss": 0.4844, "step": 44435 }, { "epoch": 0.9424190367118407, "grad_norm": 0.3279828429222107, "learning_rate": 1.0921140422225305e-05, "loss": 0.5193, "step": 44436 }, { "epoch": 0.9424402451697738, "grad_norm": 0.4330008625984192, "learning_rate": 1.09208083442563e-05, "loss": 0.4744, "step": 44437 }, { "epoch": 0.9424614536277067, "grad_norm": 0.3603364825248718, "learning_rate": 1.0920476265263176e-05, "loss": 0.426, "step": 44438 }, { "epoch": 0.9424826620856398, "grad_norm": 0.34394851326942444, "learning_rate": 1.0920144185246308e-05, "loss": 0.472, "step": 44439 }, { "epoch": 0.9425038705435728, "grad_norm": 0.34687983989715576, "learning_rate": 1.0919812104206065e-05, "loss": 0.4755, "step": 44440 }, { "epoch": 0.9425250790015058, "grad_norm": 0.39036622643470764, "learning_rate": 1.091948002214281e-05, "loss": 0.5004, "step": 44441 }, { "epoch": 0.9425462874594388, "grad_norm": 0.4787546694278717, "learning_rate": 1.0919147939056923e-05, "loss": 0.4441, "step": 44442 }, { "epoch": 0.9425674959173719, "grad_norm": 0.35675978660583496, "learning_rate": 1.0918815854948766e-05, "loss": 0.4272, "step": 44443 }, { "epoch": 0.9425887043753048, "grad_norm": 0.4068961441516876, "learning_rate": 1.0918483769818707e-05, "loss": 0.4418, "step": 44444 }, { "epoch": 0.9426099128332379, "grad_norm": 0.3513564467430115, "learning_rate": 1.0918151683667122e-05, "loss": 0.5183, "step": 44445 }, { "epoch": 0.9426311212911709, "grad_norm": 0.4560149908065796, "learning_rate": 1.0917819596494377e-05, "loss": 0.4718, "step": 44446 }, { "epoch": 0.942652329749104, "grad_norm": 0.3960588872432709, "learning_rate": 1.0917487508300837e-05, "loss": 0.4924, "step": 44447 }, { "epoch": 0.9426735382070369, "grad_norm": 0.35908931493759155, "learning_rate": 1.0917155419086877e-05, "loss": 0.4339, "step": 44448 }, { "epoch": 0.94269474666497, "grad_norm": 0.35231509804725647, "learning_rate": 1.0916823328852866e-05, "loss": 0.5118, "step": 44449 }, { "epoch": 0.942715955122903, "grad_norm": 0.3454093039035797, "learning_rate": 1.0916491237599172e-05, "loss": 0.4974, "step": 44450 }, { "epoch": 0.942737163580836, "grad_norm": 0.36254990100860596, "learning_rate": 1.0916159145326165e-05, "loss": 0.4706, "step": 44451 }, { "epoch": 0.9427583720387691, "grad_norm": 0.40584439039230347, "learning_rate": 1.0915827052034212e-05, "loss": 0.4846, "step": 44452 }, { "epoch": 0.9427795804967021, "grad_norm": 0.36195340752601624, "learning_rate": 1.0915494957723685e-05, "loss": 0.5112, "step": 44453 }, { "epoch": 0.9428007889546351, "grad_norm": 0.3432657718658447, "learning_rate": 1.091516286239495e-05, "loss": 0.4622, "step": 44454 }, { "epoch": 0.9428219974125681, "grad_norm": 0.3893706500530243, "learning_rate": 1.0914830766048381e-05, "loss": 0.4461, "step": 44455 }, { "epoch": 0.9428432058705012, "grad_norm": 0.348990261554718, "learning_rate": 1.0914498668684346e-05, "loss": 0.4683, "step": 44456 }, { "epoch": 0.9428644143284342, "grad_norm": 0.3756827414035797, "learning_rate": 1.0914166570303212e-05, "loss": 0.4562, "step": 44457 }, { "epoch": 0.9428856227863672, "grad_norm": 0.350277841091156, "learning_rate": 1.0913834470905351e-05, "loss": 0.5213, "step": 44458 }, { "epoch": 0.9429068312443002, "grad_norm": 0.3665078282356262, "learning_rate": 1.091350237049113e-05, "loss": 0.4615, "step": 44459 }, { "epoch": 0.9429280397022333, "grad_norm": 0.3561812937259674, "learning_rate": 1.0913170269060924e-05, "loss": 0.4389, "step": 44460 }, { "epoch": 0.9429492481601662, "grad_norm": 0.3286858797073364, "learning_rate": 1.091283816661509e-05, "loss": 0.5319, "step": 44461 }, { "epoch": 0.9429704566180993, "grad_norm": 0.3630419969558716, "learning_rate": 1.0912506063154012e-05, "loss": 0.4665, "step": 44462 }, { "epoch": 0.9429916650760323, "grad_norm": 0.38759469985961914, "learning_rate": 1.0912173958678052e-05, "loss": 0.5884, "step": 44463 }, { "epoch": 0.9430128735339653, "grad_norm": 0.38230904936790466, "learning_rate": 1.0911841853187577e-05, "loss": 0.5095, "step": 44464 }, { "epoch": 0.9430340819918984, "grad_norm": 0.3821607530117035, "learning_rate": 1.091150974668296e-05, "loss": 0.4715, "step": 44465 }, { "epoch": 0.9430552904498314, "grad_norm": 0.3690445125102997, "learning_rate": 1.0911177639164569e-05, "loss": 0.4786, "step": 44466 }, { "epoch": 0.9430764989077645, "grad_norm": 0.33276766538619995, "learning_rate": 1.0910845530632777e-05, "loss": 0.4481, "step": 44467 }, { "epoch": 0.9430977073656974, "grad_norm": 0.4186968505382538, "learning_rate": 1.091051342108795e-05, "loss": 0.5087, "step": 44468 }, { "epoch": 0.9431189158236305, "grad_norm": 0.41500991582870483, "learning_rate": 1.0910181310530455e-05, "loss": 0.5443, "step": 44469 }, { "epoch": 0.9431401242815635, "grad_norm": 0.34623774886131287, "learning_rate": 1.0909849198960668e-05, "loss": 0.4905, "step": 44470 }, { "epoch": 0.9431613327394965, "grad_norm": 0.41536745429039, "learning_rate": 1.0909517086378954e-05, "loss": 0.489, "step": 44471 }, { "epoch": 0.9431825411974295, "grad_norm": 0.3747156262397766, "learning_rate": 1.090918497278568e-05, "loss": 0.4934, "step": 44472 }, { "epoch": 0.9432037496553626, "grad_norm": 0.33440709114074707, "learning_rate": 1.0908852858181222e-05, "loss": 0.4735, "step": 44473 }, { "epoch": 0.9432249581132955, "grad_norm": 0.3981612026691437, "learning_rate": 1.0908520742565945e-05, "loss": 0.4713, "step": 44474 }, { "epoch": 0.9432461665712286, "grad_norm": 0.35797226428985596, "learning_rate": 1.0908188625940216e-05, "loss": 0.4537, "step": 44475 }, { "epoch": 0.9432673750291616, "grad_norm": 0.4040062129497528, "learning_rate": 1.0907856508304411e-05, "loss": 0.5285, "step": 44476 }, { "epoch": 0.9432885834870947, "grad_norm": 0.34306928515434265, "learning_rate": 1.0907524389658897e-05, "loss": 0.4657, "step": 44477 }, { "epoch": 0.9433097919450277, "grad_norm": 0.37314319610595703, "learning_rate": 1.0907192270004037e-05, "loss": 0.5546, "step": 44478 }, { "epoch": 0.9433310004029607, "grad_norm": 0.4043158292770386, "learning_rate": 1.090686014934021e-05, "loss": 0.473, "step": 44479 }, { "epoch": 0.9433522088608938, "grad_norm": 0.36596477031707764, "learning_rate": 1.0906528027667777e-05, "loss": 0.5049, "step": 44480 }, { "epoch": 0.9433734173188267, "grad_norm": 0.3544118106365204, "learning_rate": 1.0906195904987113e-05, "loss": 0.4448, "step": 44481 }, { "epoch": 0.9433946257767598, "grad_norm": 0.4376501739025116, "learning_rate": 1.0905863781298588e-05, "loss": 0.4757, "step": 44482 }, { "epoch": 0.9434158342346928, "grad_norm": 0.4007188677787781, "learning_rate": 1.0905531656602569e-05, "loss": 0.5189, "step": 44483 }, { "epoch": 0.9434370426926258, "grad_norm": 0.3968833386898041, "learning_rate": 1.0905199530899424e-05, "loss": 0.477, "step": 44484 }, { "epoch": 0.9434582511505588, "grad_norm": 0.3514546751976013, "learning_rate": 1.0904867404189526e-05, "loss": 0.4797, "step": 44485 }, { "epoch": 0.9434794596084919, "grad_norm": 0.39940452575683594, "learning_rate": 1.090453527647324e-05, "loss": 0.5317, "step": 44486 }, { "epoch": 0.9435006680664249, "grad_norm": 0.3945859968662262, "learning_rate": 1.0904203147750938e-05, "loss": 0.5056, "step": 44487 }, { "epoch": 0.9435218765243579, "grad_norm": 0.3324695825576782, "learning_rate": 1.0903871018022989e-05, "loss": 0.486, "step": 44488 }, { "epoch": 0.9435430849822909, "grad_norm": 0.40742334723472595, "learning_rate": 1.0903538887289764e-05, "loss": 0.5535, "step": 44489 }, { "epoch": 0.943564293440224, "grad_norm": 0.3825298845767975, "learning_rate": 1.090320675555163e-05, "loss": 0.5218, "step": 44490 }, { "epoch": 0.9435855018981569, "grad_norm": 0.36561813950538635, "learning_rate": 1.090287462280896e-05, "loss": 0.5205, "step": 44491 }, { "epoch": 0.94360671035609, "grad_norm": 0.4113413393497467, "learning_rate": 1.0902542489062115e-05, "loss": 0.5296, "step": 44492 }, { "epoch": 0.9436279188140231, "grad_norm": 0.3713897466659546, "learning_rate": 1.0902210354311474e-05, "loss": 0.4859, "step": 44493 }, { "epoch": 0.943649127271956, "grad_norm": 0.45694684982299805, "learning_rate": 1.09018782185574e-05, "loss": 0.4802, "step": 44494 }, { "epoch": 0.9436703357298891, "grad_norm": 0.3606494963169098, "learning_rate": 1.0901546081800267e-05, "loss": 0.4589, "step": 44495 }, { "epoch": 0.9436915441878221, "grad_norm": 0.37168243527412415, "learning_rate": 1.0901213944040443e-05, "loss": 0.4909, "step": 44496 }, { "epoch": 0.9437127526457552, "grad_norm": 0.381579726934433, "learning_rate": 1.0900881805278294e-05, "loss": 0.4847, "step": 44497 }, { "epoch": 0.9437339611036881, "grad_norm": 0.4521721303462982, "learning_rate": 1.0900549665514196e-05, "loss": 0.5354, "step": 44498 }, { "epoch": 0.9437551695616212, "grad_norm": 0.38535377383232117, "learning_rate": 1.090021752474851e-05, "loss": 0.4261, "step": 44499 }, { "epoch": 0.9437763780195542, "grad_norm": 0.37278977036476135, "learning_rate": 1.0899885382981611e-05, "loss": 0.4914, "step": 44500 }, { "epoch": 0.9437975864774872, "grad_norm": 0.38909319043159485, "learning_rate": 1.089955324021387e-05, "loss": 0.5818, "step": 44501 }, { "epoch": 0.9438187949354202, "grad_norm": 0.38143810629844666, "learning_rate": 1.0899221096445651e-05, "loss": 0.5237, "step": 44502 }, { "epoch": 0.9438400033933533, "grad_norm": 0.36453449726104736, "learning_rate": 1.0898888951677326e-05, "loss": 0.4774, "step": 44503 }, { "epoch": 0.9438612118512862, "grad_norm": 0.3752620220184326, "learning_rate": 1.0898556805909266e-05, "loss": 0.4662, "step": 44504 }, { "epoch": 0.9438824203092193, "grad_norm": 0.3600127100944519, "learning_rate": 1.089822465914184e-05, "loss": 0.4857, "step": 44505 }, { "epoch": 0.9439036287671524, "grad_norm": 0.47559309005737305, "learning_rate": 1.0897892511375413e-05, "loss": 0.4748, "step": 44506 }, { "epoch": 0.9439248372250854, "grad_norm": 0.3496306836605072, "learning_rate": 1.089756036261036e-05, "loss": 0.4356, "step": 44507 }, { "epoch": 0.9439460456830184, "grad_norm": 0.5833747982978821, "learning_rate": 1.0897228212847048e-05, "loss": 0.5222, "step": 44508 }, { "epoch": 0.9439672541409514, "grad_norm": 0.38186031579971313, "learning_rate": 1.0896896062085844e-05, "loss": 0.4983, "step": 44509 }, { "epoch": 0.9439884625988845, "grad_norm": 0.34124311804771423, "learning_rate": 1.0896563910327126e-05, "loss": 0.4613, "step": 44510 }, { "epoch": 0.9440096710568174, "grad_norm": 0.3499387502670288, "learning_rate": 1.0896231757571252e-05, "loss": 0.4436, "step": 44511 }, { "epoch": 0.9440308795147505, "grad_norm": 0.3423505425453186, "learning_rate": 1.0895899603818596e-05, "loss": 0.4749, "step": 44512 }, { "epoch": 0.9440520879726835, "grad_norm": 0.37842270731925964, "learning_rate": 1.0895567449069532e-05, "loss": 0.4853, "step": 44513 }, { "epoch": 0.9440732964306165, "grad_norm": 0.40911197662353516, "learning_rate": 1.0895235293324426e-05, "loss": 0.5049, "step": 44514 }, { "epoch": 0.9440945048885495, "grad_norm": 0.358064740896225, "learning_rate": 1.0894903136583644e-05, "loss": 0.4827, "step": 44515 }, { "epoch": 0.9441157133464826, "grad_norm": 0.3466377854347229, "learning_rate": 1.0894570978847561e-05, "loss": 0.4619, "step": 44516 }, { "epoch": 0.9441369218044156, "grad_norm": 0.4752803146839142, "learning_rate": 1.0894238820116541e-05, "loss": 0.4725, "step": 44517 }, { "epoch": 0.9441581302623486, "grad_norm": 0.41933420300483704, "learning_rate": 1.0893906660390957e-05, "loss": 0.4895, "step": 44518 }, { "epoch": 0.9441793387202817, "grad_norm": 0.357011079788208, "learning_rate": 1.089357449967118e-05, "loss": 0.5239, "step": 44519 }, { "epoch": 0.9442005471782147, "grad_norm": 0.3618018329143524, "learning_rate": 1.0893242337957577e-05, "loss": 0.5275, "step": 44520 }, { "epoch": 0.9442217556361477, "grad_norm": 0.36194896697998047, "learning_rate": 1.0892910175250518e-05, "loss": 0.5026, "step": 44521 }, { "epoch": 0.9442429640940807, "grad_norm": 0.3143868148326874, "learning_rate": 1.089257801155037e-05, "loss": 0.4652, "step": 44522 }, { "epoch": 0.9442641725520138, "grad_norm": 0.37321460247039795, "learning_rate": 1.0892245846857505e-05, "loss": 0.4589, "step": 44523 }, { "epoch": 0.9442853810099467, "grad_norm": 0.373241126537323, "learning_rate": 1.0891913681172292e-05, "loss": 0.5062, "step": 44524 }, { "epoch": 0.9443065894678798, "grad_norm": 0.3683684766292572, "learning_rate": 1.0891581514495102e-05, "loss": 0.5145, "step": 44525 }, { "epoch": 0.9443277979258128, "grad_norm": 0.38224318623542786, "learning_rate": 1.0891249346826301e-05, "loss": 0.4644, "step": 44526 }, { "epoch": 0.9443490063837459, "grad_norm": 0.3664611279964447, "learning_rate": 1.0890917178166261e-05, "loss": 0.4895, "step": 44527 }, { "epoch": 0.9443702148416788, "grad_norm": 0.4078426957130432, "learning_rate": 1.089058500851535e-05, "loss": 0.4863, "step": 44528 }, { "epoch": 0.9443914232996119, "grad_norm": 0.3821065127849579, "learning_rate": 1.0890252837873938e-05, "loss": 0.5049, "step": 44529 }, { "epoch": 0.9444126317575449, "grad_norm": 0.41241565346717834, "learning_rate": 1.0889920666242394e-05, "loss": 0.4591, "step": 44530 }, { "epoch": 0.9444338402154779, "grad_norm": 0.7073000073432922, "learning_rate": 1.088958849362109e-05, "loss": 0.457, "step": 44531 }, { "epoch": 0.9444550486734109, "grad_norm": 0.4208364486694336, "learning_rate": 1.0889256320010391e-05, "loss": 0.4067, "step": 44532 }, { "epoch": 0.944476257131344, "grad_norm": 0.4890359342098236, "learning_rate": 1.088892414541067e-05, "loss": 0.4385, "step": 44533 }, { "epoch": 0.944497465589277, "grad_norm": 0.3844909965991974, "learning_rate": 1.0888591969822296e-05, "loss": 0.5838, "step": 44534 }, { "epoch": 0.94451867404721, "grad_norm": 0.38601019978523254, "learning_rate": 1.0888259793245637e-05, "loss": 0.4407, "step": 44535 }, { "epoch": 0.9445398825051431, "grad_norm": 0.3541756868362427, "learning_rate": 1.0887927615681066e-05, "loss": 0.4298, "step": 44536 }, { "epoch": 0.944561090963076, "grad_norm": 0.384030282497406, "learning_rate": 1.0887595437128946e-05, "loss": 0.488, "step": 44537 }, { "epoch": 0.9445822994210091, "grad_norm": 0.3385744094848633, "learning_rate": 1.0887263257589648e-05, "loss": 0.4789, "step": 44538 }, { "epoch": 0.9446035078789421, "grad_norm": 0.3741450905799866, "learning_rate": 1.088693107706355e-05, "loss": 0.5219, "step": 44539 }, { "epoch": 0.9446247163368752, "grad_norm": 0.3660169243812561, "learning_rate": 1.0886598895551011e-05, "loss": 0.521, "step": 44540 }, { "epoch": 0.9446459247948081, "grad_norm": 0.4190424680709839, "learning_rate": 1.0886266713052404e-05, "loss": 0.5516, "step": 44541 }, { "epoch": 0.9446671332527412, "grad_norm": 0.3871697187423706, "learning_rate": 1.08859345295681e-05, "loss": 0.4699, "step": 44542 }, { "epoch": 0.9446883417106742, "grad_norm": 0.41726329922676086, "learning_rate": 1.0885602345098467e-05, "loss": 0.5239, "step": 44543 }, { "epoch": 0.9447095501686072, "grad_norm": 0.42641764879226685, "learning_rate": 1.0885270159643874e-05, "loss": 0.5164, "step": 44544 }, { "epoch": 0.9447307586265402, "grad_norm": 0.41555407643318176, "learning_rate": 1.0884937973204692e-05, "loss": 0.4912, "step": 44545 }, { "epoch": 0.9447519670844733, "grad_norm": 0.3307000696659088, "learning_rate": 1.088460578578129e-05, "loss": 0.4756, "step": 44546 }, { "epoch": 0.9447731755424064, "grad_norm": 0.3773344159126282, "learning_rate": 1.0884273597374038e-05, "loss": 0.5291, "step": 44547 }, { "epoch": 0.9447943840003393, "grad_norm": 0.35482871532440186, "learning_rate": 1.0883941407983302e-05, "loss": 0.4072, "step": 44548 }, { "epoch": 0.9448155924582724, "grad_norm": 0.3772599399089813, "learning_rate": 1.0883609217609457e-05, "loss": 0.5883, "step": 44549 }, { "epoch": 0.9448368009162054, "grad_norm": 0.3601548969745636, "learning_rate": 1.088327702625287e-05, "loss": 0.5265, "step": 44550 }, { "epoch": 0.9448580093741384, "grad_norm": 0.33814477920532227, "learning_rate": 1.0882944833913904e-05, "loss": 0.534, "step": 44551 }, { "epoch": 0.9448792178320714, "grad_norm": 0.35920581221580505, "learning_rate": 1.088261264059294e-05, "loss": 0.4931, "step": 44552 }, { "epoch": 0.9449004262900045, "grad_norm": 0.3787134885787964, "learning_rate": 1.0882280446290341e-05, "loss": 0.4633, "step": 44553 }, { "epoch": 0.9449216347479374, "grad_norm": 0.3995645344257355, "learning_rate": 1.0881948251006475e-05, "loss": 0.4836, "step": 44554 }, { "epoch": 0.9449428432058705, "grad_norm": 0.4211841821670532, "learning_rate": 1.0881616054741716e-05, "loss": 0.5001, "step": 44555 }, { "epoch": 0.9449640516638035, "grad_norm": 0.32157567143440247, "learning_rate": 1.0881283857496432e-05, "loss": 0.4961, "step": 44556 }, { "epoch": 0.9449852601217366, "grad_norm": 0.36198264360427856, "learning_rate": 1.0880951659270988e-05, "loss": 0.4801, "step": 44557 }, { "epoch": 0.9450064685796695, "grad_norm": 0.3322004973888397, "learning_rate": 1.088061946006576e-05, "loss": 0.4173, "step": 44558 }, { "epoch": 0.9450276770376026, "grad_norm": 0.3431561589241028, "learning_rate": 1.0880287259881115e-05, "loss": 0.4795, "step": 44559 }, { "epoch": 0.9450488854955357, "grad_norm": 0.3590414822101593, "learning_rate": 1.0879955058717421e-05, "loss": 0.4941, "step": 44560 }, { "epoch": 0.9450700939534686, "grad_norm": 0.42182937264442444, "learning_rate": 1.0879622856575051e-05, "loss": 0.4775, "step": 44561 }, { "epoch": 0.9450913024114017, "grad_norm": 0.40857386589050293, "learning_rate": 1.0879290653454369e-05, "loss": 0.5674, "step": 44562 }, { "epoch": 0.9451125108693347, "grad_norm": 0.3815317451953888, "learning_rate": 1.0878958449355748e-05, "loss": 0.4177, "step": 44563 }, { "epoch": 0.9451337193272678, "grad_norm": 0.33586612343788147, "learning_rate": 1.0878626244279558e-05, "loss": 0.3893, "step": 44564 }, { "epoch": 0.9451549277852007, "grad_norm": 0.36535465717315674, "learning_rate": 1.0878294038226167e-05, "loss": 0.5311, "step": 44565 }, { "epoch": 0.9451761362431338, "grad_norm": 0.3548103868961334, "learning_rate": 1.0877961831195947e-05, "loss": 0.4627, "step": 44566 }, { "epoch": 0.9451973447010668, "grad_norm": 0.4318629503250122, "learning_rate": 1.0877629623189265e-05, "loss": 0.4967, "step": 44567 }, { "epoch": 0.9452185531589998, "grad_norm": 0.5697520971298218, "learning_rate": 1.0877297414206489e-05, "loss": 0.4651, "step": 44568 }, { "epoch": 0.9452397616169328, "grad_norm": 0.3734530806541443, "learning_rate": 1.087696520424799e-05, "loss": 0.5339, "step": 44569 }, { "epoch": 0.9452609700748659, "grad_norm": 0.4151430130004883, "learning_rate": 1.087663299331414e-05, "loss": 0.5339, "step": 44570 }, { "epoch": 0.9452821785327988, "grad_norm": 0.3571028709411621, "learning_rate": 1.0876300781405305e-05, "loss": 0.5001, "step": 44571 }, { "epoch": 0.9453033869907319, "grad_norm": 0.42731404304504395, "learning_rate": 1.0875968568521858e-05, "loss": 0.4022, "step": 44572 }, { "epoch": 0.9453245954486649, "grad_norm": 0.33112040162086487, "learning_rate": 1.0875636354664165e-05, "loss": 0.4172, "step": 44573 }, { "epoch": 0.945345803906598, "grad_norm": 0.4177887439727783, "learning_rate": 1.0875304139832597e-05, "loss": 0.5895, "step": 44574 }, { "epoch": 0.945367012364531, "grad_norm": 0.48880836367607117, "learning_rate": 1.0874971924027521e-05, "loss": 0.3715, "step": 44575 }, { "epoch": 0.945388220822464, "grad_norm": 0.32926473021507263, "learning_rate": 1.0874639707249313e-05, "loss": 0.4339, "step": 44576 }, { "epoch": 0.9454094292803971, "grad_norm": 0.35183948278427124, "learning_rate": 1.0874307489498332e-05, "loss": 0.4284, "step": 44577 }, { "epoch": 0.94543063773833, "grad_norm": 0.36826369166374207, "learning_rate": 1.087397527077496e-05, "loss": 0.484, "step": 44578 }, { "epoch": 0.9454518461962631, "grad_norm": 0.34165987372398376, "learning_rate": 1.087364305107956e-05, "loss": 0.4607, "step": 44579 }, { "epoch": 0.9454730546541961, "grad_norm": 0.37102699279785156, "learning_rate": 1.0873310830412498e-05, "loss": 0.4283, "step": 44580 }, { "epoch": 0.9454942631121291, "grad_norm": 0.38338953256607056, "learning_rate": 1.0872978608774152e-05, "loss": 0.5324, "step": 44581 }, { "epoch": 0.9455154715700621, "grad_norm": 0.4812811315059662, "learning_rate": 1.0872646386164883e-05, "loss": 0.4965, "step": 44582 }, { "epoch": 0.9455366800279952, "grad_norm": 0.37614551186561584, "learning_rate": 1.0872314162585065e-05, "loss": 0.4505, "step": 44583 }, { "epoch": 0.9455578884859281, "grad_norm": 0.36821457743644714, "learning_rate": 1.0871981938035067e-05, "loss": 0.4827, "step": 44584 }, { "epoch": 0.9455790969438612, "grad_norm": 0.49888384342193604, "learning_rate": 1.0871649712515257e-05, "loss": 0.3918, "step": 44585 }, { "epoch": 0.9456003054017942, "grad_norm": 0.321517676115036, "learning_rate": 1.087131748602601e-05, "loss": 0.4388, "step": 44586 }, { "epoch": 0.9456215138597273, "grad_norm": 0.3329625129699707, "learning_rate": 1.0870985258567689e-05, "loss": 0.4737, "step": 44587 }, { "epoch": 0.9456427223176603, "grad_norm": 0.35222506523132324, "learning_rate": 1.0870653030140663e-05, "loss": 0.5116, "step": 44588 }, { "epoch": 0.9456639307755933, "grad_norm": 0.35157540440559387, "learning_rate": 1.0870320800745307e-05, "loss": 0.4123, "step": 44589 }, { "epoch": 0.9456851392335264, "grad_norm": 0.3200364410877228, "learning_rate": 1.0869988570381986e-05, "loss": 0.414, "step": 44590 }, { "epoch": 0.9457063476914593, "grad_norm": 0.38470107316970825, "learning_rate": 1.0869656339051073e-05, "loss": 0.4529, "step": 44591 }, { "epoch": 0.9457275561493924, "grad_norm": 0.31914180517196655, "learning_rate": 1.0869324106752936e-05, "loss": 0.4157, "step": 44592 }, { "epoch": 0.9457487646073254, "grad_norm": 0.4108760952949524, "learning_rate": 1.0868991873487942e-05, "loss": 0.5057, "step": 44593 }, { "epoch": 0.9457699730652585, "grad_norm": 0.36263230443000793, "learning_rate": 1.0868659639256468e-05, "loss": 0.5201, "step": 44594 }, { "epoch": 0.9457911815231914, "grad_norm": 0.4188976585865021, "learning_rate": 1.0868327404058871e-05, "loss": 0.4605, "step": 44595 }, { "epoch": 0.9458123899811245, "grad_norm": 0.37447184324264526, "learning_rate": 1.0867995167895531e-05, "loss": 0.4217, "step": 44596 }, { "epoch": 0.9458335984390575, "grad_norm": 0.4432676434516907, "learning_rate": 1.0867662930766817e-05, "loss": 0.4949, "step": 44597 }, { "epoch": 0.9458548068969905, "grad_norm": 0.3792889714241028, "learning_rate": 1.0867330692673096e-05, "loss": 0.519, "step": 44598 }, { "epoch": 0.9458760153549235, "grad_norm": 0.3706890046596527, "learning_rate": 1.0866998453614733e-05, "loss": 0.4399, "step": 44599 }, { "epoch": 0.9458972238128566, "grad_norm": 0.3675747215747833, "learning_rate": 1.0866666213592103e-05, "loss": 0.5025, "step": 44600 }, { "epoch": 0.9459184322707896, "grad_norm": 0.36604031920433044, "learning_rate": 1.0866333972605577e-05, "loss": 0.4684, "step": 44601 }, { "epoch": 0.9459396407287226, "grad_norm": 0.43856728076934814, "learning_rate": 1.0866001730655516e-05, "loss": 0.4658, "step": 44602 }, { "epoch": 0.9459608491866557, "grad_norm": 0.5962067246437073, "learning_rate": 1.0865669487742302e-05, "loss": 0.5281, "step": 44603 }, { "epoch": 0.9459820576445886, "grad_norm": 0.695166826248169, "learning_rate": 1.0865337243866297e-05, "loss": 0.466, "step": 44604 }, { "epoch": 0.9460032661025217, "grad_norm": 0.36988675594329834, "learning_rate": 1.0865004999027868e-05, "loss": 0.5385, "step": 44605 }, { "epoch": 0.9460244745604547, "grad_norm": 0.31693050265312195, "learning_rate": 1.086467275322739e-05, "loss": 0.4266, "step": 44606 }, { "epoch": 0.9460456830183878, "grad_norm": 0.3846230208873749, "learning_rate": 1.0864340506465229e-05, "loss": 0.4949, "step": 44607 }, { "epoch": 0.9460668914763207, "grad_norm": 0.3524113595485687, "learning_rate": 1.0864008258741757e-05, "loss": 0.5099, "step": 44608 }, { "epoch": 0.9460880999342538, "grad_norm": 0.6345711350440979, "learning_rate": 1.0863676010057342e-05, "loss": 0.5133, "step": 44609 }, { "epoch": 0.9461093083921868, "grad_norm": 0.38088735938072205, "learning_rate": 1.0863343760412355e-05, "loss": 0.4886, "step": 44610 }, { "epoch": 0.9461305168501198, "grad_norm": 0.5151171088218689, "learning_rate": 1.0863011509807167e-05, "loss": 0.4868, "step": 44611 }, { "epoch": 0.9461517253080528, "grad_norm": 0.3625936210155487, "learning_rate": 1.086267925824214e-05, "loss": 0.5094, "step": 44612 }, { "epoch": 0.9461729337659859, "grad_norm": 0.3502698838710785, "learning_rate": 1.0862347005717653e-05, "loss": 0.4706, "step": 44613 }, { "epoch": 0.9461941422239188, "grad_norm": 0.7729199528694153, "learning_rate": 1.0862014752234069e-05, "loss": 0.449, "step": 44614 }, { "epoch": 0.9462153506818519, "grad_norm": 0.41103577613830566, "learning_rate": 1.0861682497791758e-05, "loss": 0.4925, "step": 44615 }, { "epoch": 0.946236559139785, "grad_norm": 0.3688814640045166, "learning_rate": 1.0861350242391092e-05, "loss": 0.5276, "step": 44616 }, { "epoch": 0.946257767597718, "grad_norm": 0.3457721173763275, "learning_rate": 1.0861017986032443e-05, "loss": 0.4687, "step": 44617 }, { "epoch": 0.946278976055651, "grad_norm": 0.3712186813354492, "learning_rate": 1.0860685728716176e-05, "loss": 0.5081, "step": 44618 }, { "epoch": 0.946300184513584, "grad_norm": 0.32597580552101135, "learning_rate": 1.086035347044266e-05, "loss": 0.5113, "step": 44619 }, { "epoch": 0.9463213929715171, "grad_norm": 0.3465210795402527, "learning_rate": 1.0860021211212268e-05, "loss": 0.472, "step": 44620 }, { "epoch": 0.94634260142945, "grad_norm": 0.3863767385482788, "learning_rate": 1.0859688951025367e-05, "loss": 0.444, "step": 44621 }, { "epoch": 0.9463638098873831, "grad_norm": 0.5750306248664856, "learning_rate": 1.0859356689882326e-05, "loss": 0.4984, "step": 44622 }, { "epoch": 0.9463850183453161, "grad_norm": 0.35907208919525146, "learning_rate": 1.0859024427783519e-05, "loss": 0.4508, "step": 44623 }, { "epoch": 0.9464062268032492, "grad_norm": 0.36094897985458374, "learning_rate": 1.085869216472931e-05, "loss": 0.4951, "step": 44624 }, { "epoch": 0.9464274352611821, "grad_norm": 0.3585823178291321, "learning_rate": 1.0858359900720073e-05, "loss": 0.5357, "step": 44625 }, { "epoch": 0.9464486437191152, "grad_norm": 0.39072737097740173, "learning_rate": 1.0858027635756176e-05, "loss": 0.5984, "step": 44626 }, { "epoch": 0.9464698521770482, "grad_norm": 0.3768126666545868, "learning_rate": 1.0857695369837986e-05, "loss": 0.3902, "step": 44627 }, { "epoch": 0.9464910606349812, "grad_norm": 0.36189383268356323, "learning_rate": 1.0857363102965875e-05, "loss": 0.5157, "step": 44628 }, { "epoch": 0.9465122690929143, "grad_norm": 0.365268737077713, "learning_rate": 1.0857030835140212e-05, "loss": 0.4581, "step": 44629 }, { "epoch": 0.9465334775508473, "grad_norm": 1.0280108451843262, "learning_rate": 1.0856698566361367e-05, "loss": 0.4962, "step": 44630 }, { "epoch": 0.9465546860087803, "grad_norm": 0.36055511236190796, "learning_rate": 1.085636629662971e-05, "loss": 0.5047, "step": 44631 }, { "epoch": 0.9465758944667133, "grad_norm": 0.3832593858242035, "learning_rate": 1.0856034025945612e-05, "loss": 0.5297, "step": 44632 }, { "epoch": 0.9465971029246464, "grad_norm": 0.38134488463401794, "learning_rate": 1.0855701754309436e-05, "loss": 0.5421, "step": 44633 }, { "epoch": 0.9466183113825793, "grad_norm": 0.3912307620048523, "learning_rate": 1.0855369481721555e-05, "loss": 0.5446, "step": 44634 }, { "epoch": 0.9466395198405124, "grad_norm": 0.3993009030818939, "learning_rate": 1.0855037208182346e-05, "loss": 0.4643, "step": 44635 }, { "epoch": 0.9466607282984454, "grad_norm": 0.35862764716148376, "learning_rate": 1.0854704933692167e-05, "loss": 0.4412, "step": 44636 }, { "epoch": 0.9466819367563785, "grad_norm": 0.32882317900657654, "learning_rate": 1.0854372658251395e-05, "loss": 0.4123, "step": 44637 }, { "epoch": 0.9467031452143114, "grad_norm": 0.497415691614151, "learning_rate": 1.0854040381860395e-05, "loss": 0.5511, "step": 44638 }, { "epoch": 0.9467243536722445, "grad_norm": 0.33437782526016235, "learning_rate": 1.085370810451954e-05, "loss": 0.4567, "step": 44639 }, { "epoch": 0.9467455621301775, "grad_norm": 0.35225746035575867, "learning_rate": 1.0853375826229198e-05, "loss": 0.4438, "step": 44640 }, { "epoch": 0.9467667705881105, "grad_norm": 0.34497353434562683, "learning_rate": 1.0853043546989739e-05, "loss": 0.5006, "step": 44641 }, { "epoch": 0.9467879790460436, "grad_norm": 0.3870896100997925, "learning_rate": 1.0852711266801531e-05, "loss": 0.5064, "step": 44642 }, { "epoch": 0.9468091875039766, "grad_norm": 0.3647719621658325, "learning_rate": 1.0852378985664949e-05, "loss": 0.4561, "step": 44643 }, { "epoch": 0.9468303959619097, "grad_norm": 0.4116244316101074, "learning_rate": 1.0852046703580355e-05, "loss": 0.5119, "step": 44644 }, { "epoch": 0.9468516044198426, "grad_norm": 0.35722795128822327, "learning_rate": 1.0851714420548123e-05, "loss": 0.399, "step": 44645 }, { "epoch": 0.9468728128777757, "grad_norm": 0.6662120819091797, "learning_rate": 1.0851382136568623e-05, "loss": 0.4402, "step": 44646 }, { "epoch": 0.9468940213357087, "grad_norm": 0.3538610637187958, "learning_rate": 1.0851049851642217e-05, "loss": 0.4733, "step": 44647 }, { "epoch": 0.9469152297936417, "grad_norm": 0.3854518234729767, "learning_rate": 1.0850717565769287e-05, "loss": 0.4756, "step": 44648 }, { "epoch": 0.9469364382515747, "grad_norm": 0.388956218957901, "learning_rate": 1.0850385278950195e-05, "loss": 0.4309, "step": 44649 }, { "epoch": 0.9469576467095078, "grad_norm": 0.4165431261062622, "learning_rate": 1.0850052991185312e-05, "loss": 0.5194, "step": 44650 }, { "epoch": 0.9469788551674407, "grad_norm": 0.38750991225242615, "learning_rate": 1.0849720702475008e-05, "loss": 0.5883, "step": 44651 }, { "epoch": 0.9470000636253738, "grad_norm": 0.5584138035774231, "learning_rate": 1.084938841281965e-05, "loss": 0.5083, "step": 44652 }, { "epoch": 0.9470212720833068, "grad_norm": 0.3931254744529724, "learning_rate": 1.0849056122219608e-05, "loss": 0.4593, "step": 44653 }, { "epoch": 0.9470424805412399, "grad_norm": 0.4398133456707001, "learning_rate": 1.0848723830675258e-05, "loss": 0.4418, "step": 44654 }, { "epoch": 0.9470636889991728, "grad_norm": 0.41563835740089417, "learning_rate": 1.0848391538186964e-05, "loss": 0.4711, "step": 44655 }, { "epoch": 0.9470848974571059, "grad_norm": 0.3708256483078003, "learning_rate": 1.0848059244755093e-05, "loss": 0.5197, "step": 44656 }, { "epoch": 0.947106105915039, "grad_norm": 0.32610753178596497, "learning_rate": 1.084772695038002e-05, "loss": 0.5575, "step": 44657 }, { "epoch": 0.9471273143729719, "grad_norm": 0.7239394783973694, "learning_rate": 1.084739465506211e-05, "loss": 0.503, "step": 44658 }, { "epoch": 0.947148522830905, "grad_norm": 0.3347054421901703, "learning_rate": 1.0847062358801741e-05, "loss": 0.5698, "step": 44659 }, { "epoch": 0.947169731288838, "grad_norm": 1.9294339418411255, "learning_rate": 1.0846730061599271e-05, "loss": 0.4007, "step": 44660 }, { "epoch": 0.947190939746771, "grad_norm": 0.3514460325241089, "learning_rate": 1.0846397763455077e-05, "loss": 0.4326, "step": 44661 }, { "epoch": 0.947212148204704, "grad_norm": 0.3236183226108551, "learning_rate": 1.0846065464369527e-05, "loss": 0.4534, "step": 44662 }, { "epoch": 0.9472333566626371, "grad_norm": 0.35438665747642517, "learning_rate": 1.0845733164342991e-05, "loss": 0.3826, "step": 44663 }, { "epoch": 0.94725456512057, "grad_norm": 0.35120299458503723, "learning_rate": 1.0845400863375834e-05, "loss": 0.4212, "step": 44664 }, { "epoch": 0.9472757735785031, "grad_norm": 0.38450467586517334, "learning_rate": 1.0845068561468435e-05, "loss": 0.5036, "step": 44665 }, { "epoch": 0.9472969820364361, "grad_norm": 0.35430803894996643, "learning_rate": 1.0844736258621153e-05, "loss": 0.493, "step": 44666 }, { "epoch": 0.9473181904943692, "grad_norm": 0.4359855353832245, "learning_rate": 1.0844403954834367e-05, "loss": 0.4112, "step": 44667 }, { "epoch": 0.9473393989523021, "grad_norm": 0.3209843337535858, "learning_rate": 1.084407165010844e-05, "loss": 0.3811, "step": 44668 }, { "epoch": 0.9473606074102352, "grad_norm": 0.3507407307624817, "learning_rate": 1.0843739344443745e-05, "loss": 0.4446, "step": 44669 }, { "epoch": 0.9473818158681683, "grad_norm": 0.3675759434700012, "learning_rate": 1.0843407037840649e-05, "loss": 0.4803, "step": 44670 }, { "epoch": 0.9474030243261012, "grad_norm": 0.32034265995025635, "learning_rate": 1.0843074730299525e-05, "loss": 0.429, "step": 44671 }, { "epoch": 0.9474242327840343, "grad_norm": 0.3700321614742279, "learning_rate": 1.0842742421820738e-05, "loss": 0.476, "step": 44672 }, { "epoch": 0.9474454412419673, "grad_norm": 0.3712208569049835, "learning_rate": 1.084241011240466e-05, "loss": 0.4518, "step": 44673 }, { "epoch": 0.9474666496999004, "grad_norm": 0.34939393401145935, "learning_rate": 1.0842077802051665e-05, "loss": 0.4156, "step": 44674 }, { "epoch": 0.9474878581578333, "grad_norm": 0.3458379805088043, "learning_rate": 1.0841745490762114e-05, "loss": 0.507, "step": 44675 }, { "epoch": 0.9475090666157664, "grad_norm": 0.36301684379577637, "learning_rate": 1.0841413178536385e-05, "loss": 0.4322, "step": 44676 }, { "epoch": 0.9475302750736994, "grad_norm": 0.37313202023506165, "learning_rate": 1.084108086537484e-05, "loss": 0.4569, "step": 44677 }, { "epoch": 0.9475514835316324, "grad_norm": 0.3665175437927246, "learning_rate": 1.0840748551277854e-05, "loss": 0.5476, "step": 44678 }, { "epoch": 0.9475726919895654, "grad_norm": 0.32596200704574585, "learning_rate": 1.0840416236245792e-05, "loss": 0.4478, "step": 44679 }, { "epoch": 0.9475939004474985, "grad_norm": 0.3647133409976959, "learning_rate": 1.084008392027903e-05, "loss": 0.5116, "step": 44680 }, { "epoch": 0.9476151089054314, "grad_norm": 0.5034822225570679, "learning_rate": 1.0839751603377933e-05, "loss": 0.5164, "step": 44681 }, { "epoch": 0.9476363173633645, "grad_norm": 0.42248523235321045, "learning_rate": 1.0839419285542872e-05, "loss": 0.4975, "step": 44682 }, { "epoch": 0.9476575258212976, "grad_norm": 0.35086438059806824, "learning_rate": 1.0839086966774215e-05, "loss": 0.4417, "step": 44683 }, { "epoch": 0.9476787342792306, "grad_norm": 0.39946117997169495, "learning_rate": 1.0838754647072332e-05, "loss": 0.4087, "step": 44684 }, { "epoch": 0.9476999427371636, "grad_norm": 0.3907325863838196, "learning_rate": 1.0838422326437593e-05, "loss": 0.4269, "step": 44685 }, { "epoch": 0.9477211511950966, "grad_norm": 0.3720652163028717, "learning_rate": 1.0838090004870371e-05, "loss": 0.5238, "step": 44686 }, { "epoch": 0.9477423596530297, "grad_norm": 0.38115444779396057, "learning_rate": 1.083775768237103e-05, "loss": 0.485, "step": 44687 }, { "epoch": 0.9477635681109626, "grad_norm": 0.5416383147239685, "learning_rate": 1.0837425358939943e-05, "loss": 0.4405, "step": 44688 }, { "epoch": 0.9477847765688957, "grad_norm": 0.3621894419193268, "learning_rate": 1.083709303457748e-05, "loss": 0.4373, "step": 44689 }, { "epoch": 0.9478059850268287, "grad_norm": 0.4462566077709198, "learning_rate": 1.0836760709284009e-05, "loss": 0.4702, "step": 44690 }, { "epoch": 0.9478271934847617, "grad_norm": 0.4357112646102905, "learning_rate": 1.08364283830599e-05, "loss": 0.5221, "step": 44691 }, { "epoch": 0.9478484019426947, "grad_norm": 0.4449625015258789, "learning_rate": 1.0836096055905518e-05, "loss": 0.5393, "step": 44692 }, { "epoch": 0.9478696104006278, "grad_norm": 0.43121400475502014, "learning_rate": 1.0835763727821243e-05, "loss": 0.567, "step": 44693 }, { "epoch": 0.9478908188585607, "grad_norm": 0.38786596059799194, "learning_rate": 1.0835431398807436e-05, "loss": 0.4837, "step": 44694 }, { "epoch": 0.9479120273164938, "grad_norm": 0.3487054109573364, "learning_rate": 1.0835099068864473e-05, "loss": 0.4812, "step": 44695 }, { "epoch": 0.9479332357744269, "grad_norm": 0.3357747495174408, "learning_rate": 1.0834766737992717e-05, "loss": 0.4776, "step": 44696 }, { "epoch": 0.9479544442323599, "grad_norm": 0.4256055951118469, "learning_rate": 1.0834434406192543e-05, "loss": 0.4986, "step": 44697 }, { "epoch": 0.9479756526902929, "grad_norm": 0.3438689708709717, "learning_rate": 1.0834102073464312e-05, "loss": 0.4809, "step": 44698 }, { "epoch": 0.9479968611482259, "grad_norm": 0.4180588722229004, "learning_rate": 1.0833769739808407e-05, "loss": 0.4819, "step": 44699 }, { "epoch": 0.948018069606159, "grad_norm": 0.3798505365848541, "learning_rate": 1.0833437405225188e-05, "loss": 0.4159, "step": 44700 }, { "epoch": 0.9480392780640919, "grad_norm": 0.4050961434841156, "learning_rate": 1.0833105069715026e-05, "loss": 0.4363, "step": 44701 }, { "epoch": 0.948060486522025, "grad_norm": 0.41984882950782776, "learning_rate": 1.0832772733278293e-05, "loss": 0.49, "step": 44702 }, { "epoch": 0.948081694979958, "grad_norm": 0.3788204491138458, "learning_rate": 1.0832440395915357e-05, "loss": 0.4691, "step": 44703 }, { "epoch": 0.948102903437891, "grad_norm": 0.6793611645698547, "learning_rate": 1.0832108057626587e-05, "loss": 0.4797, "step": 44704 }, { "epoch": 0.948124111895824, "grad_norm": 0.44781872630119324, "learning_rate": 1.0831775718412356e-05, "loss": 0.4591, "step": 44705 }, { "epoch": 0.9481453203537571, "grad_norm": 0.4011620283126831, "learning_rate": 1.083144337827303e-05, "loss": 0.505, "step": 44706 }, { "epoch": 0.9481665288116901, "grad_norm": 0.36835750937461853, "learning_rate": 1.083111103720898e-05, "loss": 0.5053, "step": 44707 }, { "epoch": 0.9481877372696231, "grad_norm": 0.37184539437294006, "learning_rate": 1.0830778695220578e-05, "loss": 0.4851, "step": 44708 }, { "epoch": 0.9482089457275561, "grad_norm": 0.46466711163520813, "learning_rate": 1.0830446352308189e-05, "loss": 0.4869, "step": 44709 }, { "epoch": 0.9482301541854892, "grad_norm": 0.3543361723423004, "learning_rate": 1.0830114008472186e-05, "loss": 0.5065, "step": 44710 }, { "epoch": 0.9482513626434222, "grad_norm": 0.3699973225593567, "learning_rate": 1.0829781663712933e-05, "loss": 0.4569, "step": 44711 }, { "epoch": 0.9482725711013552, "grad_norm": 0.35191166400909424, "learning_rate": 1.0829449318030806e-05, "loss": 0.4617, "step": 44712 }, { "epoch": 0.9482937795592883, "grad_norm": 0.36429527401924133, "learning_rate": 1.0829116971426175e-05, "loss": 0.4919, "step": 44713 }, { "epoch": 0.9483149880172212, "grad_norm": 0.4149460196495056, "learning_rate": 1.0828784623899407e-05, "loss": 0.4396, "step": 44714 }, { "epoch": 0.9483361964751543, "grad_norm": 0.3403342664241791, "learning_rate": 1.0828452275450871e-05, "loss": 0.4957, "step": 44715 }, { "epoch": 0.9483574049330873, "grad_norm": 0.35137712955474854, "learning_rate": 1.0828119926080938e-05, "loss": 0.4314, "step": 44716 }, { "epoch": 0.9483786133910204, "grad_norm": 0.3939303457736969, "learning_rate": 1.0827787575789976e-05, "loss": 0.4141, "step": 44717 }, { "epoch": 0.9483998218489533, "grad_norm": 0.34380897879600525, "learning_rate": 1.0827455224578356e-05, "loss": 0.4464, "step": 44718 }, { "epoch": 0.9484210303068864, "grad_norm": 0.5073527693748474, "learning_rate": 1.082712287244645e-05, "loss": 0.5235, "step": 44719 }, { "epoch": 0.9484422387648194, "grad_norm": 0.39008617401123047, "learning_rate": 1.082679051939462e-05, "loss": 0.5013, "step": 44720 }, { "epoch": 0.9484634472227524, "grad_norm": 0.38206127285957336, "learning_rate": 1.0826458165423246e-05, "loss": 0.4519, "step": 44721 }, { "epoch": 0.9484846556806854, "grad_norm": 0.6562498807907104, "learning_rate": 1.0826125810532691e-05, "loss": 0.5322, "step": 44722 }, { "epoch": 0.9485058641386185, "grad_norm": 0.36530497670173645, "learning_rate": 1.0825793454723325e-05, "loss": 0.4877, "step": 44723 }, { "epoch": 0.9485270725965516, "grad_norm": 0.42933252453804016, "learning_rate": 1.0825461097995518e-05, "loss": 0.5766, "step": 44724 }, { "epoch": 0.9485482810544845, "grad_norm": 0.3217933773994446, "learning_rate": 1.0825128740349641e-05, "loss": 0.4766, "step": 44725 }, { "epoch": 0.9485694895124176, "grad_norm": 0.3637695610523224, "learning_rate": 1.0824796381786065e-05, "loss": 0.4968, "step": 44726 }, { "epoch": 0.9485906979703506, "grad_norm": 0.3437201678752899, "learning_rate": 1.0824464022305157e-05, "loss": 0.4423, "step": 44727 }, { "epoch": 0.9486119064282836, "grad_norm": 0.370184063911438, "learning_rate": 1.0824131661907286e-05, "loss": 0.4187, "step": 44728 }, { "epoch": 0.9486331148862166, "grad_norm": 0.3623475134372711, "learning_rate": 1.0823799300592823e-05, "loss": 0.4809, "step": 44729 }, { "epoch": 0.9486543233441497, "grad_norm": 0.3733454942703247, "learning_rate": 1.0823466938362138e-05, "loss": 0.4803, "step": 44730 }, { "epoch": 0.9486755318020826, "grad_norm": 0.37954235076904297, "learning_rate": 1.08231345752156e-05, "loss": 0.5196, "step": 44731 }, { "epoch": 0.9486967402600157, "grad_norm": 0.37841296195983887, "learning_rate": 1.0822802211153579e-05, "loss": 0.529, "step": 44732 }, { "epoch": 0.9487179487179487, "grad_norm": 0.37642645835876465, "learning_rate": 1.0822469846176445e-05, "loss": 0.4013, "step": 44733 }, { "epoch": 0.9487391571758818, "grad_norm": 0.4012482166290283, "learning_rate": 1.0822137480284565e-05, "loss": 0.5725, "step": 44734 }, { "epoch": 0.9487603656338147, "grad_norm": 0.39485669136047363, "learning_rate": 1.0821805113478314e-05, "loss": 0.503, "step": 44735 }, { "epoch": 0.9487815740917478, "grad_norm": 0.3686752915382385, "learning_rate": 1.0821472745758054e-05, "loss": 0.5252, "step": 44736 }, { "epoch": 0.9488027825496809, "grad_norm": 0.39828893542289734, "learning_rate": 1.0821140377124164e-05, "loss": 0.5042, "step": 44737 }, { "epoch": 0.9488239910076138, "grad_norm": 0.3523799479007721, "learning_rate": 1.0820808007577006e-05, "loss": 0.3733, "step": 44738 }, { "epoch": 0.9488451994655469, "grad_norm": 0.3674847185611725, "learning_rate": 1.0820475637116955e-05, "loss": 0.4552, "step": 44739 }, { "epoch": 0.9488664079234799, "grad_norm": 0.3648068904876709, "learning_rate": 1.0820143265744376e-05, "loss": 0.5623, "step": 44740 }, { "epoch": 0.948887616381413, "grad_norm": 0.43147245049476624, "learning_rate": 1.0819810893459643e-05, "loss": 0.5122, "step": 44741 }, { "epoch": 0.9489088248393459, "grad_norm": 0.3847930431365967, "learning_rate": 1.0819478520263122e-05, "loss": 0.4804, "step": 44742 }, { "epoch": 0.948930033297279, "grad_norm": 0.331737756729126, "learning_rate": 1.0819146146155181e-05, "loss": 0.5008, "step": 44743 }, { "epoch": 0.948951241755212, "grad_norm": 0.36656421422958374, "learning_rate": 1.0818813771136196e-05, "loss": 0.5089, "step": 44744 }, { "epoch": 0.948972450213145, "grad_norm": 0.37013915181159973, "learning_rate": 1.0818481395206535e-05, "loss": 0.4389, "step": 44745 }, { "epoch": 0.948993658671078, "grad_norm": 0.3613177239894867, "learning_rate": 1.0818149018366564e-05, "loss": 0.5136, "step": 44746 }, { "epoch": 0.9490148671290111, "grad_norm": 0.32413434982299805, "learning_rate": 1.0817816640616656e-05, "loss": 0.4423, "step": 44747 }, { "epoch": 0.949036075586944, "grad_norm": 0.35716360807418823, "learning_rate": 1.0817484261957176e-05, "loss": 0.5093, "step": 44748 }, { "epoch": 0.9490572840448771, "grad_norm": 0.4167793095111847, "learning_rate": 1.0817151882388499e-05, "loss": 0.5118, "step": 44749 }, { "epoch": 0.9490784925028101, "grad_norm": 0.35619068145751953, "learning_rate": 1.0816819501910994e-05, "loss": 0.4944, "step": 44750 }, { "epoch": 0.9490997009607431, "grad_norm": 0.36922886967658997, "learning_rate": 1.081648712052503e-05, "loss": 0.5101, "step": 44751 }, { "epoch": 0.9491209094186762, "grad_norm": 0.34820830821990967, "learning_rate": 1.0816154738230975e-05, "loss": 0.394, "step": 44752 }, { "epoch": 0.9491421178766092, "grad_norm": 0.4601876139640808, "learning_rate": 1.08158223550292e-05, "loss": 0.502, "step": 44753 }, { "epoch": 0.9491633263345423, "grad_norm": 0.45784372091293335, "learning_rate": 1.0815489970920072e-05, "loss": 0.5112, "step": 44754 }, { "epoch": 0.9491845347924752, "grad_norm": 0.35531488060951233, "learning_rate": 1.0815157585903969e-05, "loss": 0.5233, "step": 44755 }, { "epoch": 0.9492057432504083, "grad_norm": 0.38557329773902893, "learning_rate": 1.0814825199981249e-05, "loss": 0.4922, "step": 44756 }, { "epoch": 0.9492269517083413, "grad_norm": 0.4172755479812622, "learning_rate": 1.081449281315229e-05, "loss": 0.524, "step": 44757 }, { "epoch": 0.9492481601662743, "grad_norm": 0.3656175136566162, "learning_rate": 1.0814160425417457e-05, "loss": 0.4602, "step": 44758 }, { "epoch": 0.9492693686242073, "grad_norm": 0.4408021569252014, "learning_rate": 1.0813828036777125e-05, "loss": 0.4412, "step": 44759 }, { "epoch": 0.9492905770821404, "grad_norm": 0.3579235076904297, "learning_rate": 1.0813495647231658e-05, "loss": 0.4856, "step": 44760 }, { "epoch": 0.9493117855400733, "grad_norm": 0.3483303189277649, "learning_rate": 1.0813163256781432e-05, "loss": 0.4514, "step": 44761 }, { "epoch": 0.9493329939980064, "grad_norm": 0.3958089351654053, "learning_rate": 1.081283086542681e-05, "loss": 0.4961, "step": 44762 }, { "epoch": 0.9493542024559394, "grad_norm": 0.37136879563331604, "learning_rate": 1.0812498473168162e-05, "loss": 0.4919, "step": 44763 }, { "epoch": 0.9493754109138725, "grad_norm": 0.32196712493896484, "learning_rate": 1.0812166080005863e-05, "loss": 0.3664, "step": 44764 }, { "epoch": 0.9493966193718055, "grad_norm": 0.38436025381088257, "learning_rate": 1.0811833685940283e-05, "loss": 0.5462, "step": 44765 }, { "epoch": 0.9494178278297385, "grad_norm": 0.3957116901874542, "learning_rate": 1.0811501290971785e-05, "loss": 0.5426, "step": 44766 }, { "epoch": 0.9494390362876716, "grad_norm": 0.36120668053627014, "learning_rate": 1.0811168895100744e-05, "loss": 0.4771, "step": 44767 }, { "epoch": 0.9494602447456045, "grad_norm": 0.382039874792099, "learning_rate": 1.0810836498327523e-05, "loss": 0.4909, "step": 44768 }, { "epoch": 0.9494814532035376, "grad_norm": 0.3623414933681488, "learning_rate": 1.0810504100652502e-05, "loss": 0.4853, "step": 44769 }, { "epoch": 0.9495026616614706, "grad_norm": 0.3677886426448822, "learning_rate": 1.0810171702076044e-05, "loss": 0.519, "step": 44770 }, { "epoch": 0.9495238701194036, "grad_norm": 0.37123772501945496, "learning_rate": 1.080983930259852e-05, "loss": 0.4284, "step": 44771 }, { "epoch": 0.9495450785773366, "grad_norm": 0.3994496166706085, "learning_rate": 1.08095069022203e-05, "loss": 0.5844, "step": 44772 }, { "epoch": 0.9495662870352697, "grad_norm": 0.38538557291030884, "learning_rate": 1.0809174500941755e-05, "loss": 0.553, "step": 44773 }, { "epoch": 0.9495874954932026, "grad_norm": 0.9713967442512512, "learning_rate": 1.0808842098763251e-05, "loss": 0.4644, "step": 44774 }, { "epoch": 0.9496087039511357, "grad_norm": 0.4585830271244049, "learning_rate": 1.0808509695685159e-05, "loss": 0.4609, "step": 44775 }, { "epoch": 0.9496299124090687, "grad_norm": 0.34838634729385376, "learning_rate": 1.080817729170785e-05, "loss": 0.4204, "step": 44776 }, { "epoch": 0.9496511208670018, "grad_norm": 0.4008100926876068, "learning_rate": 1.0807844886831694e-05, "loss": 0.4948, "step": 44777 }, { "epoch": 0.9496723293249348, "grad_norm": 0.37234199047088623, "learning_rate": 1.080751248105706e-05, "loss": 0.5352, "step": 44778 }, { "epoch": 0.9496935377828678, "grad_norm": 0.40359407663345337, "learning_rate": 1.080718007438432e-05, "loss": 0.5426, "step": 44779 }, { "epoch": 0.9497147462408009, "grad_norm": 0.36031702160835266, "learning_rate": 1.0806847666813838e-05, "loss": 0.4031, "step": 44780 }, { "epoch": 0.9497359546987338, "grad_norm": 0.3439410626888275, "learning_rate": 1.0806515258345986e-05, "loss": 0.3926, "step": 44781 }, { "epoch": 0.9497571631566669, "grad_norm": 0.3453321158885956, "learning_rate": 1.0806182848981137e-05, "loss": 0.4631, "step": 44782 }, { "epoch": 0.9497783716145999, "grad_norm": 0.35613805055618286, "learning_rate": 1.0805850438719659e-05, "loss": 0.54, "step": 44783 }, { "epoch": 0.949799580072533, "grad_norm": 0.359409362077713, "learning_rate": 1.0805518027561921e-05, "loss": 0.4896, "step": 44784 }, { "epoch": 0.9498207885304659, "grad_norm": 0.369017094373703, "learning_rate": 1.080518561550829e-05, "loss": 0.6182, "step": 44785 }, { "epoch": 0.949841996988399, "grad_norm": 0.36428704857826233, "learning_rate": 1.0804853202559142e-05, "loss": 0.4961, "step": 44786 }, { "epoch": 0.949863205446332, "grad_norm": 0.3615398406982422, "learning_rate": 1.0804520788714843e-05, "loss": 0.4928, "step": 44787 }, { "epoch": 0.949884413904265, "grad_norm": 0.38732901215553284, "learning_rate": 1.0804188373975759e-05, "loss": 0.5167, "step": 44788 }, { "epoch": 0.949905622362198, "grad_norm": 0.3796594738960266, "learning_rate": 1.0803855958342269e-05, "loss": 0.4642, "step": 44789 }, { "epoch": 0.9499268308201311, "grad_norm": 0.33410415053367615, "learning_rate": 1.0803523541814735e-05, "loss": 0.4424, "step": 44790 }, { "epoch": 0.949948039278064, "grad_norm": 0.3499087989330292, "learning_rate": 1.0803191124393528e-05, "loss": 0.5152, "step": 44791 }, { "epoch": 0.9499692477359971, "grad_norm": 0.3801628649234772, "learning_rate": 1.080285870607902e-05, "loss": 0.439, "step": 44792 }, { "epoch": 0.9499904561939302, "grad_norm": 0.4061741530895233, "learning_rate": 1.0802526286871582e-05, "loss": 0.5236, "step": 44793 }, { "epoch": 0.9500116646518632, "grad_norm": 0.39044052362442017, "learning_rate": 1.0802193866771575e-05, "loss": 0.4868, "step": 44794 }, { "epoch": 0.9500328731097962, "grad_norm": 0.3481464385986328, "learning_rate": 1.080186144577938e-05, "loss": 0.4661, "step": 44795 }, { "epoch": 0.9500540815677292, "grad_norm": 0.48338890075683594, "learning_rate": 1.0801529023895361e-05, "loss": 0.4825, "step": 44796 }, { "epoch": 0.9500752900256623, "grad_norm": 0.40826529264450073, "learning_rate": 1.0801196601119887e-05, "loss": 0.5286, "step": 44797 }, { "epoch": 0.9500964984835952, "grad_norm": 0.3386809825897217, "learning_rate": 1.0800864177453331e-05, "loss": 0.46, "step": 44798 }, { "epoch": 0.9501177069415283, "grad_norm": 0.3838498294353485, "learning_rate": 1.0800531752896058e-05, "loss": 0.516, "step": 44799 }, { "epoch": 0.9501389153994613, "grad_norm": 0.6203606724739075, "learning_rate": 1.0800199327448444e-05, "loss": 0.4784, "step": 44800 }, { "epoch": 0.9501601238573943, "grad_norm": 0.42674124240875244, "learning_rate": 1.0799866901110851e-05, "loss": 0.4957, "step": 44801 }, { "epoch": 0.9501813323153273, "grad_norm": 0.3585366904735565, "learning_rate": 1.0799534473883655e-05, "loss": 0.3883, "step": 44802 }, { "epoch": 0.9502025407732604, "grad_norm": 0.42051175236701965, "learning_rate": 1.0799202045767225e-05, "loss": 0.5202, "step": 44803 }, { "epoch": 0.9502237492311933, "grad_norm": 0.351544052362442, "learning_rate": 1.0798869616761929e-05, "loss": 0.4069, "step": 44804 }, { "epoch": 0.9502449576891264, "grad_norm": 0.3579654395580292, "learning_rate": 1.0798537186868136e-05, "loss": 0.5288, "step": 44805 }, { "epoch": 0.9502661661470595, "grad_norm": 0.3370572030544281, "learning_rate": 1.0798204756086219e-05, "loss": 0.5092, "step": 44806 }, { "epoch": 0.9502873746049925, "grad_norm": 1.8761777877807617, "learning_rate": 1.0797872324416543e-05, "loss": 0.4054, "step": 44807 }, { "epoch": 0.9503085830629255, "grad_norm": 0.35064414143562317, "learning_rate": 1.079753989185948e-05, "loss": 0.4426, "step": 44808 }, { "epoch": 0.9503297915208585, "grad_norm": 0.36688005924224854, "learning_rate": 1.0797207458415401e-05, "loss": 0.4565, "step": 44809 }, { "epoch": 0.9503509999787916, "grad_norm": 0.40315669775009155, "learning_rate": 1.0796875024084679e-05, "loss": 0.4738, "step": 44810 }, { "epoch": 0.9503722084367245, "grad_norm": 0.33697226643562317, "learning_rate": 1.0796542588867673e-05, "loss": 0.4556, "step": 44811 }, { "epoch": 0.9503934168946576, "grad_norm": 0.380611389875412, "learning_rate": 1.0796210152764763e-05, "loss": 0.5421, "step": 44812 }, { "epoch": 0.9504146253525906, "grad_norm": 0.41437798738479614, "learning_rate": 1.0795877715776315e-05, "loss": 0.5614, "step": 44813 }, { "epoch": 0.9504358338105237, "grad_norm": 0.37651965022087097, "learning_rate": 1.0795545277902697e-05, "loss": 0.5068, "step": 44814 }, { "epoch": 0.9504570422684566, "grad_norm": 0.3864666223526001, "learning_rate": 1.0795212839144283e-05, "loss": 0.5669, "step": 44815 }, { "epoch": 0.9504782507263897, "grad_norm": 0.3803204298019409, "learning_rate": 1.0794880399501435e-05, "loss": 0.5274, "step": 44816 }, { "epoch": 0.9504994591843227, "grad_norm": 0.6151195764541626, "learning_rate": 1.0794547958974533e-05, "loss": 0.4448, "step": 44817 }, { "epoch": 0.9505206676422557, "grad_norm": 0.49970781803131104, "learning_rate": 1.0794215517563941e-05, "loss": 0.4882, "step": 44818 }, { "epoch": 0.9505418761001888, "grad_norm": 0.3676673471927643, "learning_rate": 1.0793883075270028e-05, "loss": 0.4505, "step": 44819 }, { "epoch": 0.9505630845581218, "grad_norm": 0.35020607709884644, "learning_rate": 1.0793550632093164e-05, "loss": 0.4666, "step": 44820 }, { "epoch": 0.9505842930160548, "grad_norm": 0.343513548374176, "learning_rate": 1.0793218188033723e-05, "loss": 0.4127, "step": 44821 }, { "epoch": 0.9506055014739878, "grad_norm": 0.37449225783348083, "learning_rate": 1.079288574309207e-05, "loss": 0.5269, "step": 44822 }, { "epoch": 0.9506267099319209, "grad_norm": 0.3843914568424225, "learning_rate": 1.0792553297268575e-05, "loss": 0.4275, "step": 44823 }, { "epoch": 0.9506479183898539, "grad_norm": 0.36750733852386475, "learning_rate": 1.0792220850563612e-05, "loss": 0.5164, "step": 44824 }, { "epoch": 0.9506691268477869, "grad_norm": 0.4742826223373413, "learning_rate": 1.0791888402977546e-05, "loss": 0.4629, "step": 44825 }, { "epoch": 0.9506903353057199, "grad_norm": 0.3704776167869568, "learning_rate": 1.079155595451075e-05, "loss": 0.4799, "step": 44826 }, { "epoch": 0.950711543763653, "grad_norm": 0.3774271607398987, "learning_rate": 1.0791223505163591e-05, "loss": 0.4604, "step": 44827 }, { "epoch": 0.9507327522215859, "grad_norm": 0.3346777856349945, "learning_rate": 1.0790891054936438e-05, "loss": 0.4588, "step": 44828 }, { "epoch": 0.950753960679519, "grad_norm": 0.3763999342918396, "learning_rate": 1.0790558603829666e-05, "loss": 0.4767, "step": 44829 }, { "epoch": 0.950775169137452, "grad_norm": 0.4164743423461914, "learning_rate": 1.079022615184364e-05, "loss": 0.5315, "step": 44830 }, { "epoch": 0.950796377595385, "grad_norm": 0.35079383850097656, "learning_rate": 1.0789893698978734e-05, "loss": 0.4875, "step": 44831 }, { "epoch": 0.950817586053318, "grad_norm": 0.33963602781295776, "learning_rate": 1.0789561245235314e-05, "loss": 0.441, "step": 44832 }, { "epoch": 0.9508387945112511, "grad_norm": 0.33874645829200745, "learning_rate": 1.0789228790613751e-05, "loss": 0.5093, "step": 44833 }, { "epoch": 0.9508600029691842, "grad_norm": 0.35589396953582764, "learning_rate": 1.0788896335114411e-05, "loss": 0.4786, "step": 44834 }, { "epoch": 0.9508812114271171, "grad_norm": 0.3916599452495575, "learning_rate": 1.0788563878737672e-05, "loss": 0.4748, "step": 44835 }, { "epoch": 0.9509024198850502, "grad_norm": 0.3659854829311371, "learning_rate": 1.0788231421483895e-05, "loss": 0.4495, "step": 44836 }, { "epoch": 0.9509236283429832, "grad_norm": 0.39194366335868835, "learning_rate": 1.0787898963353458e-05, "loss": 0.5262, "step": 44837 }, { "epoch": 0.9509448368009162, "grad_norm": 0.33617112040519714, "learning_rate": 1.0787566504346726e-05, "loss": 0.53, "step": 44838 }, { "epoch": 0.9509660452588492, "grad_norm": 0.38609012961387634, "learning_rate": 1.0787234044464063e-05, "loss": 0.4792, "step": 44839 }, { "epoch": 0.9509872537167823, "grad_norm": 0.3747442960739136, "learning_rate": 1.0786901583705853e-05, "loss": 0.5159, "step": 44840 }, { "epoch": 0.9510084621747152, "grad_norm": 0.38178548216819763, "learning_rate": 1.0786569122072455e-05, "loss": 0.4426, "step": 44841 }, { "epoch": 0.9510296706326483, "grad_norm": 0.3402822017669678, "learning_rate": 1.0786236659564243e-05, "loss": 0.5133, "step": 44842 }, { "epoch": 0.9510508790905813, "grad_norm": 0.4263918101787567, "learning_rate": 1.0785904196181583e-05, "loss": 0.5133, "step": 44843 }, { "epoch": 0.9510720875485144, "grad_norm": 0.3847940266132355, "learning_rate": 1.0785571731924847e-05, "loss": 0.5049, "step": 44844 }, { "epoch": 0.9510932960064473, "grad_norm": 0.385437548160553, "learning_rate": 1.0785239266794407e-05, "loss": 0.4264, "step": 44845 }, { "epoch": 0.9511145044643804, "grad_norm": 0.6629654169082642, "learning_rate": 1.0784906800790629e-05, "loss": 0.4844, "step": 44846 }, { "epoch": 0.9511357129223135, "grad_norm": 0.3902798593044281, "learning_rate": 1.0784574333913887e-05, "loss": 0.422, "step": 44847 }, { "epoch": 0.9511569213802464, "grad_norm": 0.43614548444747925, "learning_rate": 1.0784241866164546e-05, "loss": 0.3857, "step": 44848 }, { "epoch": 0.9511781298381795, "grad_norm": 0.35050004720687866, "learning_rate": 1.078390939754298e-05, "loss": 0.5038, "step": 44849 }, { "epoch": 0.9511993382961125, "grad_norm": 0.7014691829681396, "learning_rate": 1.0783576928049552e-05, "loss": 0.5171, "step": 44850 }, { "epoch": 0.9512205467540455, "grad_norm": 0.36218202114105225, "learning_rate": 1.0783244457684643e-05, "loss": 0.4381, "step": 44851 }, { "epoch": 0.9512417552119785, "grad_norm": 0.5814506411552429, "learning_rate": 1.0782911986448612e-05, "loss": 0.5233, "step": 44852 }, { "epoch": 0.9512629636699116, "grad_norm": 0.35925212502479553, "learning_rate": 1.0782579514341833e-05, "loss": 0.4944, "step": 44853 }, { "epoch": 0.9512841721278446, "grad_norm": 0.38956859707832336, "learning_rate": 1.078224704136468e-05, "loss": 0.5453, "step": 44854 }, { "epoch": 0.9513053805857776, "grad_norm": 0.3821297287940979, "learning_rate": 1.0781914567517515e-05, "loss": 0.5022, "step": 44855 }, { "epoch": 0.9513265890437106, "grad_norm": 0.3885045349597931, "learning_rate": 1.0781582092800714e-05, "loss": 0.5123, "step": 44856 }, { "epoch": 0.9513477975016437, "grad_norm": 0.40858593583106995, "learning_rate": 1.0781249617214642e-05, "loss": 0.5346, "step": 44857 }, { "epoch": 0.9513690059595766, "grad_norm": 0.43355754017829895, "learning_rate": 1.078091714075967e-05, "loss": 0.5475, "step": 44858 }, { "epoch": 0.9513902144175097, "grad_norm": 0.44910308718681335, "learning_rate": 1.0780584663436168e-05, "loss": 0.5095, "step": 44859 }, { "epoch": 0.9514114228754428, "grad_norm": 0.4278948903083801, "learning_rate": 1.0780252185244511e-05, "loss": 0.4904, "step": 44860 }, { "epoch": 0.9514326313333757, "grad_norm": 0.4034396708011627, "learning_rate": 1.0779919706185063e-05, "loss": 0.4516, "step": 44861 }, { "epoch": 0.9514538397913088, "grad_norm": 0.4927457869052887, "learning_rate": 1.0779587226258193e-05, "loss": 0.5323, "step": 44862 }, { "epoch": 0.9514750482492418, "grad_norm": 0.4267357587814331, "learning_rate": 1.0779254745464276e-05, "loss": 0.4922, "step": 44863 }, { "epoch": 0.9514962567071749, "grad_norm": 0.4384576380252838, "learning_rate": 1.0778922263803674e-05, "loss": 0.5103, "step": 44864 }, { "epoch": 0.9515174651651078, "grad_norm": 0.37686172127723694, "learning_rate": 1.0778589781276765e-05, "loss": 0.4898, "step": 44865 }, { "epoch": 0.9515386736230409, "grad_norm": 0.4109312891960144, "learning_rate": 1.0778257297883914e-05, "loss": 0.4954, "step": 44866 }, { "epoch": 0.9515598820809739, "grad_norm": 0.3972317576408386, "learning_rate": 1.0777924813625491e-05, "loss": 0.4754, "step": 44867 }, { "epoch": 0.9515810905389069, "grad_norm": 0.37346503138542175, "learning_rate": 1.0777592328501869e-05, "loss": 0.4619, "step": 44868 }, { "epoch": 0.9516022989968399, "grad_norm": 0.39433038234710693, "learning_rate": 1.0777259842513417e-05, "loss": 0.4637, "step": 44869 }, { "epoch": 0.951623507454773, "grad_norm": 0.5382516980171204, "learning_rate": 1.0776927355660499e-05, "loss": 0.441, "step": 44870 }, { "epoch": 0.9516447159127059, "grad_norm": 0.5559909343719482, "learning_rate": 1.077659486794349e-05, "loss": 0.4397, "step": 44871 }, { "epoch": 0.951665924370639, "grad_norm": 0.3502647578716278, "learning_rate": 1.077626237936276e-05, "loss": 0.4746, "step": 44872 }, { "epoch": 0.951687132828572, "grad_norm": 0.37889423966407776, "learning_rate": 1.0775929889918677e-05, "loss": 0.5661, "step": 44873 }, { "epoch": 0.951708341286505, "grad_norm": 0.6532462239265442, "learning_rate": 1.0775597399611616e-05, "loss": 0.5524, "step": 44874 }, { "epoch": 0.9517295497444381, "grad_norm": 0.4118897318840027, "learning_rate": 1.0775264908441938e-05, "loss": 0.5206, "step": 44875 }, { "epoch": 0.9517507582023711, "grad_norm": 0.30492788553237915, "learning_rate": 1.0774932416410016e-05, "loss": 0.4492, "step": 44876 }, { "epoch": 0.9517719666603042, "grad_norm": 0.38775962591171265, "learning_rate": 1.0774599923516225e-05, "loss": 0.4822, "step": 44877 }, { "epoch": 0.9517931751182371, "grad_norm": 0.3854910433292389, "learning_rate": 1.0774267429760927e-05, "loss": 0.4832, "step": 44878 }, { "epoch": 0.9518143835761702, "grad_norm": 0.3549210727214813, "learning_rate": 1.0773934935144494e-05, "loss": 0.4605, "step": 44879 }, { "epoch": 0.9518355920341032, "grad_norm": 0.4012708067893982, "learning_rate": 1.0773602439667302e-05, "loss": 0.5333, "step": 44880 }, { "epoch": 0.9518568004920362, "grad_norm": 0.3573247194290161, "learning_rate": 1.0773269943329713e-05, "loss": 0.4734, "step": 44881 }, { "epoch": 0.9518780089499692, "grad_norm": 0.3576620817184448, "learning_rate": 1.0772937446132102e-05, "loss": 0.5019, "step": 44882 }, { "epoch": 0.9518992174079023, "grad_norm": 0.4280415177345276, "learning_rate": 1.0772604948074837e-05, "loss": 0.4534, "step": 44883 }, { "epoch": 0.9519204258658353, "grad_norm": 0.39342331886291504, "learning_rate": 1.0772272449158284e-05, "loss": 0.5469, "step": 44884 }, { "epoch": 0.9519416343237683, "grad_norm": 0.35018008947372437, "learning_rate": 1.077193994938282e-05, "loss": 0.4374, "step": 44885 }, { "epoch": 0.9519628427817013, "grad_norm": 0.375082403421402, "learning_rate": 1.0771607448748809e-05, "loss": 0.5013, "step": 44886 }, { "epoch": 0.9519840512396344, "grad_norm": 0.36400991678237915, "learning_rate": 1.0771274947256622e-05, "loss": 0.5025, "step": 44887 }, { "epoch": 0.9520052596975674, "grad_norm": 0.3722912669181824, "learning_rate": 1.0770942444906634e-05, "loss": 0.4856, "step": 44888 }, { "epoch": 0.9520264681555004, "grad_norm": 0.4006929397583008, "learning_rate": 1.0770609941699209e-05, "loss": 0.5636, "step": 44889 }, { "epoch": 0.9520476766134335, "grad_norm": 0.3477902114391327, "learning_rate": 1.0770277437634713e-05, "loss": 0.4811, "step": 44890 }, { "epoch": 0.9520688850713664, "grad_norm": 0.4305386245250702, "learning_rate": 1.0769944932713525e-05, "loss": 0.5532, "step": 44891 }, { "epoch": 0.9520900935292995, "grad_norm": 0.38320422172546387, "learning_rate": 1.0769612426936014e-05, "loss": 0.5264, "step": 44892 }, { "epoch": 0.9521113019872325, "grad_norm": 0.33332401514053345, "learning_rate": 1.0769279920302542e-05, "loss": 0.468, "step": 44893 }, { "epoch": 0.9521325104451656, "grad_norm": 0.3524326980113983, "learning_rate": 1.0768947412813487e-05, "loss": 0.4323, "step": 44894 }, { "epoch": 0.9521537189030985, "grad_norm": 0.3884018063545227, "learning_rate": 1.0768614904469212e-05, "loss": 0.4966, "step": 44895 }, { "epoch": 0.9521749273610316, "grad_norm": 0.3445611298084259, "learning_rate": 1.0768282395270094e-05, "loss": 0.5168, "step": 44896 }, { "epoch": 0.9521961358189646, "grad_norm": 0.4460280239582062, "learning_rate": 1.0767949885216496e-05, "loss": 0.5384, "step": 44897 }, { "epoch": 0.9522173442768976, "grad_norm": 0.4052952527999878, "learning_rate": 1.0767617374308789e-05, "loss": 0.4579, "step": 44898 }, { "epoch": 0.9522385527348306, "grad_norm": 0.3763757348060608, "learning_rate": 1.0767284862547349e-05, "loss": 0.4748, "step": 44899 }, { "epoch": 0.9522597611927637, "grad_norm": 0.37235593795776367, "learning_rate": 1.076695234993254e-05, "loss": 0.5524, "step": 44900 }, { "epoch": 0.9522809696506968, "grad_norm": 0.3190155625343323, "learning_rate": 1.0766619836464734e-05, "loss": 0.3794, "step": 44901 }, { "epoch": 0.9523021781086297, "grad_norm": 0.4043070673942566, "learning_rate": 1.0766287322144297e-05, "loss": 0.4378, "step": 44902 }, { "epoch": 0.9523233865665628, "grad_norm": 0.440065860748291, "learning_rate": 1.0765954806971603e-05, "loss": 0.4674, "step": 44903 }, { "epoch": 0.9523445950244958, "grad_norm": 0.38727572560310364, "learning_rate": 1.0765622290947023e-05, "loss": 0.5404, "step": 44904 }, { "epoch": 0.9523658034824288, "grad_norm": 0.34210604429244995, "learning_rate": 1.0765289774070923e-05, "loss": 0.468, "step": 44905 }, { "epoch": 0.9523870119403618, "grad_norm": 0.37412017583847046, "learning_rate": 1.0764957256343676e-05, "loss": 0.4761, "step": 44906 }, { "epoch": 0.9524082203982949, "grad_norm": 0.3600480556488037, "learning_rate": 1.0764624737765647e-05, "loss": 0.5194, "step": 44907 }, { "epoch": 0.9524294288562278, "grad_norm": 0.3714810609817505, "learning_rate": 1.0764292218337214e-05, "loss": 0.4558, "step": 44908 }, { "epoch": 0.9524506373141609, "grad_norm": 0.4335203766822815, "learning_rate": 1.0763959698058737e-05, "loss": 0.4498, "step": 44909 }, { "epoch": 0.9524718457720939, "grad_norm": 0.33088037371635437, "learning_rate": 1.076362717693059e-05, "loss": 0.4282, "step": 44910 }, { "epoch": 0.952493054230027, "grad_norm": 0.36122336983680725, "learning_rate": 1.0763294654953147e-05, "loss": 0.4902, "step": 44911 }, { "epoch": 0.9525142626879599, "grad_norm": 0.42172226309776306, "learning_rate": 1.0762962132126773e-05, "loss": 0.492, "step": 44912 }, { "epoch": 0.952535471145893, "grad_norm": 0.3295297920703888, "learning_rate": 1.076262960845184e-05, "loss": 0.4708, "step": 44913 }, { "epoch": 0.952556679603826, "grad_norm": 0.40268006920814514, "learning_rate": 1.0762297083928717e-05, "loss": 0.5877, "step": 44914 }, { "epoch": 0.952577888061759, "grad_norm": 0.36684781312942505, "learning_rate": 1.0761964558557772e-05, "loss": 0.499, "step": 44915 }, { "epoch": 0.9525990965196921, "grad_norm": 0.3620437681674957, "learning_rate": 1.0761632032339377e-05, "loss": 0.4491, "step": 44916 }, { "epoch": 0.9526203049776251, "grad_norm": 0.31874388456344604, "learning_rate": 1.0761299505273903e-05, "loss": 0.5474, "step": 44917 }, { "epoch": 0.9526415134355581, "grad_norm": 0.39501985907554626, "learning_rate": 1.0760966977361717e-05, "loss": 0.3732, "step": 44918 }, { "epoch": 0.9526627218934911, "grad_norm": 0.4023715853691101, "learning_rate": 1.076063444860319e-05, "loss": 0.4826, "step": 44919 }, { "epoch": 0.9526839303514242, "grad_norm": 0.36177772283554077, "learning_rate": 1.0760301918998695e-05, "loss": 0.4854, "step": 44920 }, { "epoch": 0.9527051388093571, "grad_norm": 0.38262516260147095, "learning_rate": 1.0759969388548594e-05, "loss": 0.4838, "step": 44921 }, { "epoch": 0.9527263472672902, "grad_norm": 0.36944207549095154, "learning_rate": 1.0759636857253263e-05, "loss": 0.5646, "step": 44922 }, { "epoch": 0.9527475557252232, "grad_norm": 0.3954606056213379, "learning_rate": 1.0759304325113073e-05, "loss": 0.5881, "step": 44923 }, { "epoch": 0.9527687641831563, "grad_norm": 0.37594863772392273, "learning_rate": 1.0758971792128389e-05, "loss": 0.4832, "step": 44924 }, { "epoch": 0.9527899726410892, "grad_norm": 0.34407928586006165, "learning_rate": 1.0758639258299586e-05, "loss": 0.4512, "step": 44925 }, { "epoch": 0.9528111810990223, "grad_norm": 0.3762209117412567, "learning_rate": 1.0758306723627028e-05, "loss": 0.4906, "step": 44926 }, { "epoch": 0.9528323895569553, "grad_norm": 0.3441620469093323, "learning_rate": 1.075797418811109e-05, "loss": 0.4655, "step": 44927 }, { "epoch": 0.9528535980148883, "grad_norm": 0.3582736551761627, "learning_rate": 1.075764165175214e-05, "loss": 0.4926, "step": 44928 }, { "epoch": 0.9528748064728214, "grad_norm": 0.36262238025665283, "learning_rate": 1.0757309114550544e-05, "loss": 0.5606, "step": 44929 }, { "epoch": 0.9528960149307544, "grad_norm": 0.3735678195953369, "learning_rate": 1.0756976576506677e-05, "loss": 0.4851, "step": 44930 }, { "epoch": 0.9529172233886875, "grad_norm": 0.35869041085243225, "learning_rate": 1.0756644037620909e-05, "loss": 0.4358, "step": 44931 }, { "epoch": 0.9529384318466204, "grad_norm": 0.35850682854652405, "learning_rate": 1.0756311497893606e-05, "loss": 0.51, "step": 44932 }, { "epoch": 0.9529596403045535, "grad_norm": 0.43780654668807983, "learning_rate": 1.0755978957325141e-05, "loss": 0.5248, "step": 44933 }, { "epoch": 0.9529808487624865, "grad_norm": 0.40824586153030396, "learning_rate": 1.0755646415915884e-05, "loss": 0.5447, "step": 44934 }, { "epoch": 0.9530020572204195, "grad_norm": 0.34682098031044006, "learning_rate": 1.0755313873666198e-05, "loss": 0.5309, "step": 44935 }, { "epoch": 0.9530232656783525, "grad_norm": 0.39946219325065613, "learning_rate": 1.0754981330576464e-05, "loss": 0.5578, "step": 44936 }, { "epoch": 0.9530444741362856, "grad_norm": 0.377863347530365, "learning_rate": 1.0754648786647047e-05, "loss": 0.5138, "step": 44937 }, { "epoch": 0.9530656825942185, "grad_norm": 0.36065322160720825, "learning_rate": 1.075431624187831e-05, "loss": 0.5213, "step": 44938 }, { "epoch": 0.9530868910521516, "grad_norm": 0.44688528776168823, "learning_rate": 1.0753983696270636e-05, "loss": 0.4546, "step": 44939 }, { "epoch": 0.9531080995100846, "grad_norm": 0.37252846360206604, "learning_rate": 1.0753651149824385e-05, "loss": 0.4642, "step": 44940 }, { "epoch": 0.9531293079680176, "grad_norm": 0.37267637252807617, "learning_rate": 1.0753318602539931e-05, "loss": 0.5131, "step": 44941 }, { "epoch": 0.9531505164259507, "grad_norm": 0.4059813320636749, "learning_rate": 1.075298605441764e-05, "loss": 0.4788, "step": 44942 }, { "epoch": 0.9531717248838837, "grad_norm": 0.40722838044166565, "learning_rate": 1.0752653505457886e-05, "loss": 0.6069, "step": 44943 }, { "epoch": 0.9531929333418168, "grad_norm": 0.3958154618740082, "learning_rate": 1.0752320955661035e-05, "loss": 0.4829, "step": 44944 }, { "epoch": 0.9532141417997497, "grad_norm": 0.3539692461490631, "learning_rate": 1.0751988405027462e-05, "loss": 0.4581, "step": 44945 }, { "epoch": 0.9532353502576828, "grad_norm": 0.3481834828853607, "learning_rate": 1.0751655853557535e-05, "loss": 0.4663, "step": 44946 }, { "epoch": 0.9532565587156158, "grad_norm": 0.4188326597213745, "learning_rate": 1.0751323301251621e-05, "loss": 0.4816, "step": 44947 }, { "epoch": 0.9532777671735488, "grad_norm": 0.3400372862815857, "learning_rate": 1.075099074811009e-05, "loss": 0.4769, "step": 44948 }, { "epoch": 0.9532989756314818, "grad_norm": 0.36414268612861633, "learning_rate": 1.0750658194133315e-05, "loss": 0.4335, "step": 44949 }, { "epoch": 0.9533201840894149, "grad_norm": 0.682883620262146, "learning_rate": 1.0750325639321666e-05, "loss": 0.4713, "step": 44950 }, { "epoch": 0.9533413925473478, "grad_norm": 0.4725545048713684, "learning_rate": 1.0749993083675511e-05, "loss": 0.519, "step": 44951 }, { "epoch": 0.9533626010052809, "grad_norm": 0.3569195568561554, "learning_rate": 1.0749660527195218e-05, "loss": 0.4222, "step": 44952 }, { "epoch": 0.9533838094632139, "grad_norm": 0.3556500971317291, "learning_rate": 1.0749327969881162e-05, "loss": 0.4263, "step": 44953 }, { "epoch": 0.953405017921147, "grad_norm": 0.3919984698295593, "learning_rate": 1.0748995411733706e-05, "loss": 0.4476, "step": 44954 }, { "epoch": 0.9534262263790799, "grad_norm": 0.3711238205432892, "learning_rate": 1.0748662852753227e-05, "loss": 0.4726, "step": 44955 }, { "epoch": 0.953447434837013, "grad_norm": 0.6630163788795471, "learning_rate": 1.0748330292940092e-05, "loss": 0.4524, "step": 44956 }, { "epoch": 0.9534686432949461, "grad_norm": 0.32394838333129883, "learning_rate": 1.074799773229467e-05, "loss": 0.4143, "step": 44957 }, { "epoch": 0.953489851752879, "grad_norm": 0.373825341463089, "learning_rate": 1.0747665170817333e-05, "loss": 0.4804, "step": 44958 }, { "epoch": 0.9535110602108121, "grad_norm": 0.3596963882446289, "learning_rate": 1.0747332608508448e-05, "loss": 0.481, "step": 44959 }, { "epoch": 0.9535322686687451, "grad_norm": 0.38242307305336, "learning_rate": 1.0747000045368383e-05, "loss": 0.4314, "step": 44960 }, { "epoch": 0.9535534771266782, "grad_norm": 0.333769828081131, "learning_rate": 1.0746667481397513e-05, "loss": 0.5277, "step": 44961 }, { "epoch": 0.9535746855846111, "grad_norm": 0.40216878056526184, "learning_rate": 1.0746334916596206e-05, "loss": 0.5755, "step": 44962 }, { "epoch": 0.9535958940425442, "grad_norm": 0.3527050018310547, "learning_rate": 1.0746002350964833e-05, "loss": 0.4451, "step": 44963 }, { "epoch": 0.9536171025004772, "grad_norm": 0.45010703802108765, "learning_rate": 1.0745669784503763e-05, "loss": 0.494, "step": 44964 }, { "epoch": 0.9536383109584102, "grad_norm": 0.36639586091041565, "learning_rate": 1.0745337217213366e-05, "loss": 0.4584, "step": 44965 }, { "epoch": 0.9536595194163432, "grad_norm": 0.4163844883441925, "learning_rate": 1.0745004649094006e-05, "loss": 0.5253, "step": 44966 }, { "epoch": 0.9536807278742763, "grad_norm": 0.38208499550819397, "learning_rate": 1.0744672080146062e-05, "loss": 0.4853, "step": 44967 }, { "epoch": 0.9537019363322092, "grad_norm": 0.4184098541736603, "learning_rate": 1.07443395103699e-05, "loss": 0.5127, "step": 44968 }, { "epoch": 0.9537231447901423, "grad_norm": 0.341991662979126, "learning_rate": 1.0744006939765891e-05, "loss": 0.4175, "step": 44969 }, { "epoch": 0.9537443532480754, "grad_norm": 0.3401527404785156, "learning_rate": 1.0743674368334404e-05, "loss": 0.4885, "step": 44970 }, { "epoch": 0.9537655617060083, "grad_norm": 0.37114188075065613, "learning_rate": 1.074334179607581e-05, "loss": 0.4711, "step": 44971 }, { "epoch": 0.9537867701639414, "grad_norm": 0.3868647813796997, "learning_rate": 1.0743009222990475e-05, "loss": 0.4943, "step": 44972 }, { "epoch": 0.9538079786218744, "grad_norm": 0.32795530557632446, "learning_rate": 1.0742676649078773e-05, "loss": 0.4263, "step": 44973 }, { "epoch": 0.9538291870798075, "grad_norm": 0.3967508375644684, "learning_rate": 1.074234407434107e-05, "loss": 0.5905, "step": 44974 }, { "epoch": 0.9538503955377404, "grad_norm": 0.42995455861091614, "learning_rate": 1.074201149877774e-05, "loss": 0.4175, "step": 44975 }, { "epoch": 0.9538716039956735, "grad_norm": 0.40568187832832336, "learning_rate": 1.0741678922389154e-05, "loss": 0.5129, "step": 44976 }, { "epoch": 0.9538928124536065, "grad_norm": 0.40699344873428345, "learning_rate": 1.0741346345175674e-05, "loss": 0.5265, "step": 44977 }, { "epoch": 0.9539140209115395, "grad_norm": 0.3801216185092926, "learning_rate": 1.074101376713768e-05, "loss": 0.5159, "step": 44978 }, { "epoch": 0.9539352293694725, "grad_norm": 0.3644379675388336, "learning_rate": 1.0740681188275534e-05, "loss": 0.484, "step": 44979 }, { "epoch": 0.9539564378274056, "grad_norm": 0.39894551038742065, "learning_rate": 1.0740348608589607e-05, "loss": 0.5402, "step": 44980 }, { "epoch": 0.9539776462853385, "grad_norm": 0.32835161685943604, "learning_rate": 1.0740016028080276e-05, "loss": 0.4899, "step": 44981 }, { "epoch": 0.9539988547432716, "grad_norm": 0.3648713231086731, "learning_rate": 1.0739683446747902e-05, "loss": 0.4942, "step": 44982 }, { "epoch": 0.9540200632012047, "grad_norm": 0.36875295639038086, "learning_rate": 1.073935086459286e-05, "loss": 0.474, "step": 44983 }, { "epoch": 0.9540412716591377, "grad_norm": 0.3543532192707062, "learning_rate": 1.0739018281615518e-05, "loss": 0.4643, "step": 44984 }, { "epoch": 0.9540624801170707, "grad_norm": 0.4094843566417694, "learning_rate": 1.0738685697816247e-05, "loss": 0.4579, "step": 44985 }, { "epoch": 0.9540836885750037, "grad_norm": 0.5579107999801636, "learning_rate": 1.0738353113195415e-05, "loss": 0.5065, "step": 44986 }, { "epoch": 0.9541048970329368, "grad_norm": 0.396697461605072, "learning_rate": 1.073802052775339e-05, "loss": 0.5325, "step": 44987 }, { "epoch": 0.9541261054908697, "grad_norm": 0.3624145984649658, "learning_rate": 1.0737687941490551e-05, "loss": 0.4442, "step": 44988 }, { "epoch": 0.9541473139488028, "grad_norm": 0.3845595121383667, "learning_rate": 1.0737355354407257e-05, "loss": 0.4983, "step": 44989 }, { "epoch": 0.9541685224067358, "grad_norm": 2.0918514728546143, "learning_rate": 1.0737022766503886e-05, "loss": 0.4061, "step": 44990 }, { "epoch": 0.9541897308646689, "grad_norm": 0.3838101923465729, "learning_rate": 1.0736690177780802e-05, "loss": 0.5424, "step": 44991 }, { "epoch": 0.9542109393226018, "grad_norm": 0.3658423125743866, "learning_rate": 1.0736357588238381e-05, "loss": 0.4173, "step": 44992 }, { "epoch": 0.9542321477805349, "grad_norm": 0.38668516278266907, "learning_rate": 1.0736024997876987e-05, "loss": 0.4568, "step": 44993 }, { "epoch": 0.9542533562384679, "grad_norm": 0.39011111855506897, "learning_rate": 1.0735692406696992e-05, "loss": 0.4915, "step": 44994 }, { "epoch": 0.9542745646964009, "grad_norm": 0.48635706305503845, "learning_rate": 1.0735359814698767e-05, "loss": 0.4904, "step": 44995 }, { "epoch": 0.954295773154334, "grad_norm": 0.3331945538520813, "learning_rate": 1.0735027221882683e-05, "loss": 0.5053, "step": 44996 }, { "epoch": 0.954316981612267, "grad_norm": 0.3689078986644745, "learning_rate": 1.0734694628249107e-05, "loss": 0.4712, "step": 44997 }, { "epoch": 0.9543381900702, "grad_norm": 0.36566588282585144, "learning_rate": 1.0734362033798411e-05, "loss": 0.5119, "step": 44998 }, { "epoch": 0.954359398528133, "grad_norm": 0.39699363708496094, "learning_rate": 1.073402943853096e-05, "loss": 0.4259, "step": 44999 }, { "epoch": 0.9543806069860661, "grad_norm": 0.34098678827285767, "learning_rate": 1.073369684244713e-05, "loss": 0.5094, "step": 45000 }, { "epoch": 0.954401815443999, "grad_norm": 0.43126940727233887, "learning_rate": 1.073336424554729e-05, "loss": 0.517, "step": 45001 }, { "epoch": 0.9544230239019321, "grad_norm": 0.3863014280796051, "learning_rate": 1.0733031647831808e-05, "loss": 0.5715, "step": 45002 }, { "epoch": 0.9544442323598651, "grad_norm": 0.3701601028442383, "learning_rate": 1.0732699049301053e-05, "loss": 0.4617, "step": 45003 }, { "epoch": 0.9544654408177982, "grad_norm": 0.32738515734672546, "learning_rate": 1.07323664499554e-05, "loss": 0.4323, "step": 45004 }, { "epoch": 0.9544866492757311, "grad_norm": 0.46404850482940674, "learning_rate": 1.0732033849795212e-05, "loss": 0.4955, "step": 45005 }, { "epoch": 0.9545078577336642, "grad_norm": 0.34016546607017517, "learning_rate": 1.0731701248820863e-05, "loss": 0.4465, "step": 45006 }, { "epoch": 0.9545290661915972, "grad_norm": 0.40852898359298706, "learning_rate": 1.0731368647032725e-05, "loss": 0.4813, "step": 45007 }, { "epoch": 0.9545502746495302, "grad_norm": 0.4178232252597809, "learning_rate": 1.073103604443116e-05, "loss": 0.4507, "step": 45008 }, { "epoch": 0.9545714831074632, "grad_norm": 0.37019047141075134, "learning_rate": 1.0730703441016547e-05, "loss": 0.5014, "step": 45009 }, { "epoch": 0.9545926915653963, "grad_norm": 0.42391300201416016, "learning_rate": 1.0730370836789252e-05, "loss": 0.4974, "step": 45010 }, { "epoch": 0.9546139000233294, "grad_norm": 0.3898616433143616, "learning_rate": 1.0730038231749644e-05, "loss": 0.5215, "step": 45011 }, { "epoch": 0.9546351084812623, "grad_norm": 0.39029985666275024, "learning_rate": 1.0729705625898093e-05, "loss": 0.3902, "step": 45012 }, { "epoch": 0.9546563169391954, "grad_norm": 0.43793785572052, "learning_rate": 1.0729373019234972e-05, "loss": 0.5437, "step": 45013 }, { "epoch": 0.9546775253971284, "grad_norm": 0.35840609669685364, "learning_rate": 1.0729040411760645e-05, "loss": 0.5107, "step": 45014 }, { "epoch": 0.9546987338550614, "grad_norm": 0.34290915727615356, "learning_rate": 1.072870780347549e-05, "loss": 0.4388, "step": 45015 }, { "epoch": 0.9547199423129944, "grad_norm": 0.35895442962646484, "learning_rate": 1.0728375194379869e-05, "loss": 0.4126, "step": 45016 }, { "epoch": 0.9547411507709275, "grad_norm": 0.37972816824913025, "learning_rate": 1.0728042584474156e-05, "loss": 0.4736, "step": 45017 }, { "epoch": 0.9547623592288604, "grad_norm": 0.4005788266658783, "learning_rate": 1.0727709973758722e-05, "loss": 0.4429, "step": 45018 }, { "epoch": 0.9547835676867935, "grad_norm": 0.3618331551551819, "learning_rate": 1.0727377362233935e-05, "loss": 0.4793, "step": 45019 }, { "epoch": 0.9548047761447265, "grad_norm": 0.36921143531799316, "learning_rate": 1.0727044749900162e-05, "loss": 0.4488, "step": 45020 }, { "epoch": 0.9548259846026595, "grad_norm": 0.37440016865730286, "learning_rate": 1.072671213675778e-05, "loss": 0.5487, "step": 45021 }, { "epoch": 0.9548471930605925, "grad_norm": 0.4808378517627716, "learning_rate": 1.0726379522807153e-05, "loss": 0.483, "step": 45022 }, { "epoch": 0.9548684015185256, "grad_norm": 0.3530375361442566, "learning_rate": 1.0726046908048655e-05, "loss": 0.4701, "step": 45023 }, { "epoch": 0.9548896099764587, "grad_norm": 0.45758211612701416, "learning_rate": 1.0725714292482655e-05, "loss": 0.5445, "step": 45024 }, { "epoch": 0.9549108184343916, "grad_norm": 0.4458177983760834, "learning_rate": 1.0725381676109517e-05, "loss": 0.4176, "step": 45025 }, { "epoch": 0.9549320268923247, "grad_norm": 0.32271039485931396, "learning_rate": 1.0725049058929617e-05, "loss": 0.4379, "step": 45026 }, { "epoch": 0.9549532353502577, "grad_norm": 0.4672643542289734, "learning_rate": 1.0724716440943326e-05, "loss": 0.4274, "step": 45027 }, { "epoch": 0.9549744438081907, "grad_norm": 0.36482661962509155, "learning_rate": 1.072438382215101e-05, "loss": 0.498, "step": 45028 }, { "epoch": 0.9549956522661237, "grad_norm": 0.7156510353088379, "learning_rate": 1.0724051202553042e-05, "loss": 0.497, "step": 45029 }, { "epoch": 0.9550168607240568, "grad_norm": 0.5295955538749695, "learning_rate": 1.0723718582149791e-05, "loss": 0.5375, "step": 45030 }, { "epoch": 0.9550380691819897, "grad_norm": 0.4930022358894348, "learning_rate": 1.0723385960941625e-05, "loss": 0.512, "step": 45031 }, { "epoch": 0.9550592776399228, "grad_norm": 0.3637861907482147, "learning_rate": 1.0723053338928915e-05, "loss": 0.4745, "step": 45032 }, { "epoch": 0.9550804860978558, "grad_norm": 0.3613506555557251, "learning_rate": 1.0722720716112035e-05, "loss": 0.5103, "step": 45033 }, { "epoch": 0.9551016945557889, "grad_norm": 0.3806118667125702, "learning_rate": 1.0722388092491346e-05, "loss": 0.5528, "step": 45034 }, { "epoch": 0.9551229030137218, "grad_norm": 0.3480115234851837, "learning_rate": 1.0722055468067227e-05, "loss": 0.4682, "step": 45035 }, { "epoch": 0.9551441114716549, "grad_norm": 0.3780777156352997, "learning_rate": 1.072172284284004e-05, "loss": 0.3875, "step": 45036 }, { "epoch": 0.955165319929588, "grad_norm": 0.483773410320282, "learning_rate": 1.0721390216810163e-05, "loss": 0.4537, "step": 45037 }, { "epoch": 0.9551865283875209, "grad_norm": 0.38717371225357056, "learning_rate": 1.072105758997796e-05, "loss": 0.5583, "step": 45038 }, { "epoch": 0.955207736845454, "grad_norm": 0.3611557185649872, "learning_rate": 1.0720724962343808e-05, "loss": 0.4175, "step": 45039 }, { "epoch": 0.955228945303387, "grad_norm": 0.37472566962242126, "learning_rate": 1.0720392333908065e-05, "loss": 0.4815, "step": 45040 }, { "epoch": 0.95525015376132, "grad_norm": 0.34946751594543457, "learning_rate": 1.0720059704671112e-05, "loss": 0.4677, "step": 45041 }, { "epoch": 0.955271362219253, "grad_norm": 0.3392272889614105, "learning_rate": 1.0719727074633312e-05, "loss": 0.5375, "step": 45042 }, { "epoch": 0.9552925706771861, "grad_norm": 0.37504562735557556, "learning_rate": 1.0719394443795042e-05, "loss": 0.4709, "step": 45043 }, { "epoch": 0.9553137791351191, "grad_norm": 0.3640146255493164, "learning_rate": 1.0719061812156664e-05, "loss": 0.4974, "step": 45044 }, { "epoch": 0.9553349875930521, "grad_norm": 0.42145177721977234, "learning_rate": 1.0718729179718552e-05, "loss": 0.575, "step": 45045 }, { "epoch": 0.9553561960509851, "grad_norm": 0.42368417978286743, "learning_rate": 1.0718396546481078e-05, "loss": 0.5501, "step": 45046 }, { "epoch": 0.9553774045089182, "grad_norm": 0.5218924880027771, "learning_rate": 1.071806391244461e-05, "loss": 0.4936, "step": 45047 }, { "epoch": 0.9553986129668511, "grad_norm": 0.3943612575531006, "learning_rate": 1.0717731277609513e-05, "loss": 0.4759, "step": 45048 }, { "epoch": 0.9554198214247842, "grad_norm": 0.376449853181839, "learning_rate": 1.0717398641976167e-05, "loss": 0.5046, "step": 45049 }, { "epoch": 0.9554410298827172, "grad_norm": 0.3881551921367645, "learning_rate": 1.0717066005544932e-05, "loss": 0.4225, "step": 45050 }, { "epoch": 0.9554622383406502, "grad_norm": 0.34721189737319946, "learning_rate": 1.0716733368316184e-05, "loss": 0.482, "step": 45051 }, { "epoch": 0.9554834467985833, "grad_norm": 0.3574138879776001, "learning_rate": 1.0716400730290292e-05, "loss": 0.4966, "step": 45052 }, { "epoch": 0.9555046552565163, "grad_norm": 0.34778815507888794, "learning_rate": 1.0716068091467627e-05, "loss": 0.4904, "step": 45053 }, { "epoch": 0.9555258637144494, "grad_norm": 0.35152676701545715, "learning_rate": 1.0715735451848554e-05, "loss": 0.5121, "step": 45054 }, { "epoch": 0.9555470721723823, "grad_norm": 0.40218159556388855, "learning_rate": 1.0715402811433448e-05, "loss": 0.4953, "step": 45055 }, { "epoch": 0.9555682806303154, "grad_norm": 0.42217355966567993, "learning_rate": 1.0715070170222677e-05, "loss": 0.598, "step": 45056 }, { "epoch": 0.9555894890882484, "grad_norm": 0.37578797340393066, "learning_rate": 1.0714737528216607e-05, "loss": 0.4615, "step": 45057 }, { "epoch": 0.9556106975461814, "grad_norm": 0.3306187689304352, "learning_rate": 1.071440488541562e-05, "loss": 0.4801, "step": 45058 }, { "epoch": 0.9556319060041144, "grad_norm": 0.3360227942466736, "learning_rate": 1.0714072241820071e-05, "loss": 0.4236, "step": 45059 }, { "epoch": 0.9556531144620475, "grad_norm": 0.40689411759376526, "learning_rate": 1.0713739597430341e-05, "loss": 0.4753, "step": 45060 }, { "epoch": 0.9556743229199804, "grad_norm": 0.3865756392478943, "learning_rate": 1.0713406952246797e-05, "loss": 0.4771, "step": 45061 }, { "epoch": 0.9556955313779135, "grad_norm": 0.36292147636413574, "learning_rate": 1.0713074306269803e-05, "loss": 0.4428, "step": 45062 }, { "epoch": 0.9557167398358465, "grad_norm": 0.38434386253356934, "learning_rate": 1.0712741659499741e-05, "loss": 0.473, "step": 45063 }, { "epoch": 0.9557379482937796, "grad_norm": 0.33803942799568176, "learning_rate": 1.0712409011936968e-05, "loss": 0.535, "step": 45064 }, { "epoch": 0.9557591567517126, "grad_norm": 0.44426387548446655, "learning_rate": 1.071207636358186e-05, "loss": 0.5912, "step": 45065 }, { "epoch": 0.9557803652096456, "grad_norm": 0.4667436182498932, "learning_rate": 1.071174371443479e-05, "loss": 0.4148, "step": 45066 }, { "epoch": 0.9558015736675787, "grad_norm": 0.37123048305511475, "learning_rate": 1.0711411064496127e-05, "loss": 0.4319, "step": 45067 }, { "epoch": 0.9558227821255116, "grad_norm": 0.4168824553489685, "learning_rate": 1.0711078413766233e-05, "loss": 0.5124, "step": 45068 }, { "epoch": 0.9558439905834447, "grad_norm": 0.3432413339614868, "learning_rate": 1.0710745762245487e-05, "loss": 0.4498, "step": 45069 }, { "epoch": 0.9558651990413777, "grad_norm": 0.39272594451904297, "learning_rate": 1.0710413109934255e-05, "loss": 0.526, "step": 45070 }, { "epoch": 0.9558864074993108, "grad_norm": 0.3788781464099884, "learning_rate": 1.0710080456832905e-05, "loss": 0.5025, "step": 45071 }, { "epoch": 0.9559076159572437, "grad_norm": 0.4033164083957672, "learning_rate": 1.0709747802941815e-05, "loss": 0.4778, "step": 45072 }, { "epoch": 0.9559288244151768, "grad_norm": 0.3338741958141327, "learning_rate": 1.0709415148261344e-05, "loss": 0.4515, "step": 45073 }, { "epoch": 0.9559500328731098, "grad_norm": 0.3529450297355652, "learning_rate": 1.0709082492791874e-05, "loss": 0.4842, "step": 45074 }, { "epoch": 0.9559712413310428, "grad_norm": 0.37622955441474915, "learning_rate": 1.0708749836533764e-05, "loss": 0.4814, "step": 45075 }, { "epoch": 0.9559924497889758, "grad_norm": 0.36100655794143677, "learning_rate": 1.0708417179487387e-05, "loss": 0.5124, "step": 45076 }, { "epoch": 0.9560136582469089, "grad_norm": 0.34962114691734314, "learning_rate": 1.070808452165312e-05, "loss": 0.4517, "step": 45077 }, { "epoch": 0.956034866704842, "grad_norm": 0.37774115800857544, "learning_rate": 1.0707751863031323e-05, "loss": 0.4843, "step": 45078 }, { "epoch": 0.9560560751627749, "grad_norm": 0.33420300483703613, "learning_rate": 1.0707419203622374e-05, "loss": 0.4093, "step": 45079 }, { "epoch": 0.956077283620708, "grad_norm": 0.3483978807926178, "learning_rate": 1.0707086543426636e-05, "loss": 0.5709, "step": 45080 }, { "epoch": 0.956098492078641, "grad_norm": 0.3282729387283325, "learning_rate": 1.0706753882444486e-05, "loss": 0.4449, "step": 45081 }, { "epoch": 0.956119700536574, "grad_norm": 0.4393867254257202, "learning_rate": 1.0706421220676288e-05, "loss": 0.5344, "step": 45082 }, { "epoch": 0.956140908994507, "grad_norm": 0.3441108167171478, "learning_rate": 1.0706088558122416e-05, "loss": 0.4481, "step": 45083 }, { "epoch": 0.9561621174524401, "grad_norm": 0.3342330753803253, "learning_rate": 1.0705755894783239e-05, "loss": 0.502, "step": 45084 }, { "epoch": 0.956183325910373, "grad_norm": 0.35814109444618225, "learning_rate": 1.0705423230659124e-05, "loss": 0.4667, "step": 45085 }, { "epoch": 0.9562045343683061, "grad_norm": 0.7647420763969421, "learning_rate": 1.0705090565750444e-05, "loss": 0.4804, "step": 45086 }, { "epoch": 0.9562257428262391, "grad_norm": 0.3319389522075653, "learning_rate": 1.070475790005757e-05, "loss": 0.4764, "step": 45087 }, { "epoch": 0.9562469512841721, "grad_norm": 0.4648232161998749, "learning_rate": 1.0704425233580869e-05, "loss": 0.446, "step": 45088 }, { "epoch": 0.9562681597421051, "grad_norm": 0.7386680841445923, "learning_rate": 1.0704092566320712e-05, "loss": 0.4621, "step": 45089 }, { "epoch": 0.9562893682000382, "grad_norm": 0.3952517509460449, "learning_rate": 1.0703759898277468e-05, "loss": 0.4221, "step": 45090 }, { "epoch": 0.9563105766579711, "grad_norm": 0.3936549425125122, "learning_rate": 1.0703427229451514e-05, "loss": 0.5319, "step": 45091 }, { "epoch": 0.9563317851159042, "grad_norm": 0.3568364679813385, "learning_rate": 1.070309455984321e-05, "loss": 0.3921, "step": 45092 }, { "epoch": 0.9563529935738373, "grad_norm": 0.3471672832965851, "learning_rate": 1.070276188945293e-05, "loss": 0.4819, "step": 45093 }, { "epoch": 0.9563742020317703, "grad_norm": 0.4247983694076538, "learning_rate": 1.0702429218281048e-05, "loss": 0.4768, "step": 45094 }, { "epoch": 0.9563954104897033, "grad_norm": 0.3798494040966034, "learning_rate": 1.070209654632793e-05, "loss": 0.3797, "step": 45095 }, { "epoch": 0.9564166189476363, "grad_norm": 0.36113396286964417, "learning_rate": 1.070176387359394e-05, "loss": 0.4668, "step": 45096 }, { "epoch": 0.9564378274055694, "grad_norm": 0.4737963080406189, "learning_rate": 1.070143120007946e-05, "loss": 0.5168, "step": 45097 }, { "epoch": 0.9564590358635023, "grad_norm": 0.36412283778190613, "learning_rate": 1.0701098525784853e-05, "loss": 0.4638, "step": 45098 }, { "epoch": 0.9564802443214354, "grad_norm": 0.3727336525917053, "learning_rate": 1.0700765850710488e-05, "loss": 0.4098, "step": 45099 }, { "epoch": 0.9565014527793684, "grad_norm": 0.357124000787735, "learning_rate": 1.0700433174856742e-05, "loss": 0.4702, "step": 45100 }, { "epoch": 0.9565226612373015, "grad_norm": 0.39551177620887756, "learning_rate": 1.0700100498223976e-05, "loss": 0.5126, "step": 45101 }, { "epoch": 0.9565438696952344, "grad_norm": 0.42407524585723877, "learning_rate": 1.0699767820812565e-05, "loss": 0.4871, "step": 45102 }, { "epoch": 0.9565650781531675, "grad_norm": 0.33003515005111694, "learning_rate": 1.0699435142622881e-05, "loss": 0.5388, "step": 45103 }, { "epoch": 0.9565862866111005, "grad_norm": 0.41758811473846436, "learning_rate": 1.0699102463655286e-05, "loss": 0.5233, "step": 45104 }, { "epoch": 0.9566074950690335, "grad_norm": 0.34846797585487366, "learning_rate": 1.0698769783910162e-05, "loss": 0.4644, "step": 45105 }, { "epoch": 0.9566287035269666, "grad_norm": 0.3706424832344055, "learning_rate": 1.0698437103387871e-05, "loss": 0.4823, "step": 45106 }, { "epoch": 0.9566499119848996, "grad_norm": 0.3594224452972412, "learning_rate": 1.069810442208878e-05, "loss": 0.4887, "step": 45107 }, { "epoch": 0.9566711204428326, "grad_norm": 0.335149884223938, "learning_rate": 1.0697771740013265e-05, "loss": 0.4904, "step": 45108 }, { "epoch": 0.9566923289007656, "grad_norm": 0.34814539551734924, "learning_rate": 1.0697439057161695e-05, "loss": 0.5592, "step": 45109 }, { "epoch": 0.9567135373586987, "grad_norm": 0.36706066131591797, "learning_rate": 1.0697106373534441e-05, "loss": 0.446, "step": 45110 }, { "epoch": 0.9567347458166316, "grad_norm": 0.36321449279785156, "learning_rate": 1.0696773689131869e-05, "loss": 0.4665, "step": 45111 }, { "epoch": 0.9567559542745647, "grad_norm": 0.3556000888347626, "learning_rate": 1.0696441003954355e-05, "loss": 0.4322, "step": 45112 }, { "epoch": 0.9567771627324977, "grad_norm": 0.388456255197525, "learning_rate": 1.0696108318002261e-05, "loss": 0.5521, "step": 45113 }, { "epoch": 0.9567983711904308, "grad_norm": 0.33867448568344116, "learning_rate": 1.0695775631275965e-05, "loss": 0.4031, "step": 45114 }, { "epoch": 0.9568195796483637, "grad_norm": 0.4109361469745636, "learning_rate": 1.0695442943775829e-05, "loss": 0.4763, "step": 45115 }, { "epoch": 0.9568407881062968, "grad_norm": 0.3413124680519104, "learning_rate": 1.069511025550223e-05, "loss": 0.4664, "step": 45116 }, { "epoch": 0.9568619965642298, "grad_norm": 0.38038870692253113, "learning_rate": 1.0694777566455537e-05, "loss": 0.5143, "step": 45117 }, { "epoch": 0.9568832050221628, "grad_norm": 0.3515768051147461, "learning_rate": 1.0694444876636114e-05, "loss": 0.567, "step": 45118 }, { "epoch": 0.9569044134800959, "grad_norm": 0.43780237436294556, "learning_rate": 1.0694112186044339e-05, "loss": 0.456, "step": 45119 }, { "epoch": 0.9569256219380289, "grad_norm": 0.36023855209350586, "learning_rate": 1.0693779494680579e-05, "loss": 0.5121, "step": 45120 }, { "epoch": 0.956946830395962, "grad_norm": 0.34746551513671875, "learning_rate": 1.06934468025452e-05, "loss": 0.4524, "step": 45121 }, { "epoch": 0.9569680388538949, "grad_norm": 0.3700450360774994, "learning_rate": 1.0693114109638579e-05, "loss": 0.4851, "step": 45122 }, { "epoch": 0.956989247311828, "grad_norm": 0.3591357469558716, "learning_rate": 1.069278141596108e-05, "loss": 0.465, "step": 45123 }, { "epoch": 0.957010455769761, "grad_norm": 0.3888109624385834, "learning_rate": 1.0692448721513075e-05, "loss": 0.5183, "step": 45124 }, { "epoch": 0.957031664227694, "grad_norm": 0.35694554448127747, "learning_rate": 1.069211602629494e-05, "loss": 0.5532, "step": 45125 }, { "epoch": 0.957052872685627, "grad_norm": 0.46625185012817383, "learning_rate": 1.0691783330307035e-05, "loss": 0.5, "step": 45126 }, { "epoch": 0.9570740811435601, "grad_norm": 0.35425636172294617, "learning_rate": 1.0691450633549734e-05, "loss": 0.4598, "step": 45127 }, { "epoch": 0.957095289601493, "grad_norm": 0.4738832712173462, "learning_rate": 1.0691117936023408e-05, "loss": 0.5222, "step": 45128 }, { "epoch": 0.9571164980594261, "grad_norm": 0.3559460937976837, "learning_rate": 1.069078523772843e-05, "loss": 0.5059, "step": 45129 }, { "epoch": 0.9571377065173591, "grad_norm": 0.3228131830692291, "learning_rate": 1.0690452538665162e-05, "loss": 0.4483, "step": 45130 }, { "epoch": 0.9571589149752922, "grad_norm": 0.3479562997817993, "learning_rate": 1.0690119838833982e-05, "loss": 0.4036, "step": 45131 }, { "epoch": 0.9571801234332251, "grad_norm": 0.4082367420196533, "learning_rate": 1.0689787138235255e-05, "loss": 0.5247, "step": 45132 }, { "epoch": 0.9572013318911582, "grad_norm": 0.3928304612636566, "learning_rate": 1.0689454436869352e-05, "loss": 0.6035, "step": 45133 }, { "epoch": 0.9572225403490913, "grad_norm": 0.33804944157600403, "learning_rate": 1.0689121734736644e-05, "loss": 0.446, "step": 45134 }, { "epoch": 0.9572437488070242, "grad_norm": 0.3617221713066101, "learning_rate": 1.0688789031837504e-05, "loss": 0.4813, "step": 45135 }, { "epoch": 0.9572649572649573, "grad_norm": 0.36192771792411804, "learning_rate": 1.0688456328172296e-05, "loss": 0.4662, "step": 45136 }, { "epoch": 0.9572861657228903, "grad_norm": 0.5370221138000488, "learning_rate": 1.0688123623741396e-05, "loss": 0.4921, "step": 45137 }, { "epoch": 0.9573073741808233, "grad_norm": 0.37313002347946167, "learning_rate": 1.0687790918545166e-05, "loss": 0.4784, "step": 45138 }, { "epoch": 0.9573285826387563, "grad_norm": 0.3410499691963196, "learning_rate": 1.0687458212583983e-05, "loss": 0.5038, "step": 45139 }, { "epoch": 0.9573497910966894, "grad_norm": 0.365359902381897, "learning_rate": 1.0687125505858218e-05, "loss": 0.4207, "step": 45140 }, { "epoch": 0.9573709995546223, "grad_norm": 0.3756767511367798, "learning_rate": 1.068679279836823e-05, "loss": 0.5986, "step": 45141 }, { "epoch": 0.9573922080125554, "grad_norm": 0.4075767993927002, "learning_rate": 1.0686460090114406e-05, "loss": 0.4348, "step": 45142 }, { "epoch": 0.9574134164704884, "grad_norm": 0.3619093894958496, "learning_rate": 1.0686127381097102e-05, "loss": 0.4262, "step": 45143 }, { "epoch": 0.9574346249284215, "grad_norm": 0.36725613474845886, "learning_rate": 1.0685794671316693e-05, "loss": 0.4895, "step": 45144 }, { "epoch": 0.9574558333863544, "grad_norm": 0.33359208703041077, "learning_rate": 1.0685461960773553e-05, "loss": 0.4626, "step": 45145 }, { "epoch": 0.9574770418442875, "grad_norm": 0.3541448712348938, "learning_rate": 1.0685129249468045e-05, "loss": 0.4802, "step": 45146 }, { "epoch": 0.9574982503022206, "grad_norm": 0.37165337800979614, "learning_rate": 1.0684796537400542e-05, "loss": 0.4415, "step": 45147 }, { "epoch": 0.9575194587601535, "grad_norm": 0.35778963565826416, "learning_rate": 1.0684463824571417e-05, "loss": 0.4917, "step": 45148 }, { "epoch": 0.9575406672180866, "grad_norm": 0.45163974165916443, "learning_rate": 1.0684131110981033e-05, "loss": 0.3983, "step": 45149 }, { "epoch": 0.9575618756760196, "grad_norm": 0.3685609698295593, "learning_rate": 1.0683798396629768e-05, "loss": 0.473, "step": 45150 }, { "epoch": 0.9575830841339527, "grad_norm": 0.38799986243247986, "learning_rate": 1.0683465681517988e-05, "loss": 0.4678, "step": 45151 }, { "epoch": 0.9576042925918856, "grad_norm": 0.3551572263240814, "learning_rate": 1.0683132965646061e-05, "loss": 0.4516, "step": 45152 }, { "epoch": 0.9576255010498187, "grad_norm": 0.43257108330726624, "learning_rate": 1.068280024901436e-05, "loss": 0.413, "step": 45153 }, { "epoch": 0.9576467095077517, "grad_norm": 0.39466631412506104, "learning_rate": 1.0682467531623258e-05, "loss": 0.5089, "step": 45154 }, { "epoch": 0.9576679179656847, "grad_norm": 0.5201970338821411, "learning_rate": 1.0682134813473118e-05, "loss": 0.5915, "step": 45155 }, { "epoch": 0.9576891264236177, "grad_norm": 0.38572701811790466, "learning_rate": 1.0681802094564316e-05, "loss": 0.4726, "step": 45156 }, { "epoch": 0.9577103348815508, "grad_norm": 3.951552152633667, "learning_rate": 1.0681469374897217e-05, "loss": 0.3736, "step": 45157 }, { "epoch": 0.9577315433394837, "grad_norm": 0.376887708902359, "learning_rate": 1.0681136654472196e-05, "loss": 0.4645, "step": 45158 }, { "epoch": 0.9577527517974168, "grad_norm": 0.3590467572212219, "learning_rate": 1.0680803933289618e-05, "loss": 0.5082, "step": 45159 }, { "epoch": 0.9577739602553499, "grad_norm": 0.363975465297699, "learning_rate": 1.0680471211349856e-05, "loss": 0.5058, "step": 45160 }, { "epoch": 0.9577951687132829, "grad_norm": 0.3497966229915619, "learning_rate": 1.0680138488653282e-05, "loss": 0.4842, "step": 45161 }, { "epoch": 0.9578163771712159, "grad_norm": 0.3928857147693634, "learning_rate": 1.0679805765200263e-05, "loss": 0.5318, "step": 45162 }, { "epoch": 0.9578375856291489, "grad_norm": 0.35323798656463623, "learning_rate": 1.0679473040991171e-05, "loss": 0.5508, "step": 45163 }, { "epoch": 0.957858794087082, "grad_norm": 0.3992484509944916, "learning_rate": 1.0679140316026375e-05, "loss": 0.5436, "step": 45164 }, { "epoch": 0.9578800025450149, "grad_norm": 0.37958237528800964, "learning_rate": 1.0678807590306244e-05, "loss": 0.4962, "step": 45165 }, { "epoch": 0.957901211002948, "grad_norm": 0.3574567139148712, "learning_rate": 1.0678474863831147e-05, "loss": 0.48, "step": 45166 }, { "epoch": 0.957922419460881, "grad_norm": 0.38901373744010925, "learning_rate": 1.067814213660146e-05, "loss": 0.4797, "step": 45167 }, { "epoch": 0.957943627918814, "grad_norm": 0.35889989137649536, "learning_rate": 1.0677809408617548e-05, "loss": 0.4818, "step": 45168 }, { "epoch": 0.957964836376747, "grad_norm": 0.38475310802459717, "learning_rate": 1.0677476679879779e-05, "loss": 0.5387, "step": 45169 }, { "epoch": 0.9579860448346801, "grad_norm": 0.3997019827365875, "learning_rate": 1.0677143950388532e-05, "loss": 0.4939, "step": 45170 }, { "epoch": 0.958007253292613, "grad_norm": 0.34006088972091675, "learning_rate": 1.0676811220144167e-05, "loss": 0.4076, "step": 45171 }, { "epoch": 0.9580284617505461, "grad_norm": 0.3885398507118225, "learning_rate": 1.067647848914706e-05, "loss": 0.5156, "step": 45172 }, { "epoch": 0.9580496702084791, "grad_norm": 0.34221184253692627, "learning_rate": 1.0676145757397579e-05, "loss": 0.441, "step": 45173 }, { "epoch": 0.9580708786664122, "grad_norm": 0.4374772608280182, "learning_rate": 1.0675813024896095e-05, "loss": 0.5297, "step": 45174 }, { "epoch": 0.9580920871243452, "grad_norm": 0.5792527198791504, "learning_rate": 1.0675480291642976e-05, "loss": 0.6108, "step": 45175 }, { "epoch": 0.9581132955822782, "grad_norm": 0.36714786291122437, "learning_rate": 1.0675147557638599e-05, "loss": 0.4263, "step": 45176 }, { "epoch": 0.9581345040402113, "grad_norm": 0.36769819259643555, "learning_rate": 1.0674814822883325e-05, "loss": 0.4268, "step": 45177 }, { "epoch": 0.9581557124981442, "grad_norm": 0.36657604575157166, "learning_rate": 1.0674482087377527e-05, "loss": 0.4894, "step": 45178 }, { "epoch": 0.9581769209560773, "grad_norm": 0.35726198554039, "learning_rate": 1.0674149351121575e-05, "loss": 0.4135, "step": 45179 }, { "epoch": 0.9581981294140103, "grad_norm": 0.40185487270355225, "learning_rate": 1.0673816614115844e-05, "loss": 0.4317, "step": 45180 }, { "epoch": 0.9582193378719434, "grad_norm": 0.3519922196865082, "learning_rate": 1.0673483876360694e-05, "loss": 0.4094, "step": 45181 }, { "epoch": 0.9582405463298763, "grad_norm": 0.39040809869766235, "learning_rate": 1.0673151137856509e-05, "loss": 0.5152, "step": 45182 }, { "epoch": 0.9582617547878094, "grad_norm": 0.9784442186355591, "learning_rate": 1.0672818398603644e-05, "loss": 0.4761, "step": 45183 }, { "epoch": 0.9582829632457424, "grad_norm": 0.980685293674469, "learning_rate": 1.0672485658602481e-05, "loss": 0.4857, "step": 45184 }, { "epoch": 0.9583041717036754, "grad_norm": 0.38734859228134155, "learning_rate": 1.0672152917853382e-05, "loss": 0.4726, "step": 45185 }, { "epoch": 0.9583253801616084, "grad_norm": 0.33189016580581665, "learning_rate": 1.0671820176356722e-05, "loss": 0.4727, "step": 45186 }, { "epoch": 0.9583465886195415, "grad_norm": 0.38789623975753784, "learning_rate": 1.0671487434112868e-05, "loss": 0.4556, "step": 45187 }, { "epoch": 0.9583677970774745, "grad_norm": 0.3871883749961853, "learning_rate": 1.0671154691122196e-05, "loss": 0.5237, "step": 45188 }, { "epoch": 0.9583890055354075, "grad_norm": 0.3959243595600128, "learning_rate": 1.0670821947385066e-05, "loss": 0.5622, "step": 45189 }, { "epoch": 0.9584102139933406, "grad_norm": 0.3504325747489929, "learning_rate": 1.0670489202901857e-05, "loss": 0.4478, "step": 45190 }, { "epoch": 0.9584314224512736, "grad_norm": 0.34877246618270874, "learning_rate": 1.0670156457672933e-05, "loss": 0.4146, "step": 45191 }, { "epoch": 0.9584526309092066, "grad_norm": 0.406067430973053, "learning_rate": 1.0669823711698668e-05, "loss": 0.4973, "step": 45192 }, { "epoch": 0.9584738393671396, "grad_norm": 0.44143909215927124, "learning_rate": 1.0669490964979433e-05, "loss": 0.4408, "step": 45193 }, { "epoch": 0.9584950478250727, "grad_norm": 0.40521249175071716, "learning_rate": 1.0669158217515596e-05, "loss": 0.4292, "step": 45194 }, { "epoch": 0.9585162562830056, "grad_norm": 0.35663458704948425, "learning_rate": 1.0668825469307524e-05, "loss": 0.5251, "step": 45195 }, { "epoch": 0.9585374647409387, "grad_norm": 0.38552799820899963, "learning_rate": 1.0668492720355591e-05, "loss": 0.4746, "step": 45196 }, { "epoch": 0.9585586731988717, "grad_norm": 0.4036768674850464, "learning_rate": 1.0668159970660167e-05, "loss": 0.5247, "step": 45197 }, { "epoch": 0.9585798816568047, "grad_norm": 0.35602235794067383, "learning_rate": 1.0667827220221621e-05, "loss": 0.4429, "step": 45198 }, { "epoch": 0.9586010901147377, "grad_norm": 0.35800763964653015, "learning_rate": 1.0667494469040325e-05, "loss": 0.4264, "step": 45199 }, { "epoch": 0.9586222985726708, "grad_norm": 0.3340987265110016, "learning_rate": 1.0667161717116643e-05, "loss": 0.4606, "step": 45200 }, { "epoch": 0.9586435070306039, "grad_norm": 0.3249002695083618, "learning_rate": 1.0666828964450954e-05, "loss": 0.4591, "step": 45201 }, { "epoch": 0.9586647154885368, "grad_norm": 0.887298583984375, "learning_rate": 1.0666496211043625e-05, "loss": 0.4392, "step": 45202 }, { "epoch": 0.9586859239464699, "grad_norm": 0.395521342754364, "learning_rate": 1.066616345689502e-05, "loss": 0.5013, "step": 45203 }, { "epoch": 0.9587071324044029, "grad_norm": 0.34412631392478943, "learning_rate": 1.0665830702005515e-05, "loss": 0.5208, "step": 45204 }, { "epoch": 0.9587283408623359, "grad_norm": 0.4422113597393036, "learning_rate": 1.0665497946375478e-05, "loss": 0.593, "step": 45205 }, { "epoch": 0.9587495493202689, "grad_norm": 0.4066859185695648, "learning_rate": 1.0665165190005282e-05, "loss": 0.498, "step": 45206 }, { "epoch": 0.958770757778202, "grad_norm": 0.4004465639591217, "learning_rate": 1.0664832432895296e-05, "loss": 0.5555, "step": 45207 }, { "epoch": 0.9587919662361349, "grad_norm": 0.3358369469642639, "learning_rate": 1.0664499675045887e-05, "loss": 0.5396, "step": 45208 }, { "epoch": 0.958813174694068, "grad_norm": 0.3907948434352875, "learning_rate": 1.0664166916457428e-05, "loss": 0.468, "step": 45209 }, { "epoch": 0.958834383152001, "grad_norm": 0.36232057213783264, "learning_rate": 1.0663834157130288e-05, "loss": 0.4634, "step": 45210 }, { "epoch": 0.958855591609934, "grad_norm": 0.3851923942565918, "learning_rate": 1.0663501397064837e-05, "loss": 0.4335, "step": 45211 }, { "epoch": 0.958876800067867, "grad_norm": 0.3691125810146332, "learning_rate": 1.0663168636261445e-05, "loss": 0.5532, "step": 45212 }, { "epoch": 0.9588980085258001, "grad_norm": 0.34661614894866943, "learning_rate": 1.0662835874720485e-05, "loss": 0.4488, "step": 45213 }, { "epoch": 0.9589192169837331, "grad_norm": 0.3942435383796692, "learning_rate": 1.0662503112442323e-05, "loss": 0.4947, "step": 45214 }, { "epoch": 0.9589404254416661, "grad_norm": 0.3806784152984619, "learning_rate": 1.0662170349427332e-05, "loss": 0.4741, "step": 45215 }, { "epoch": 0.9589616338995992, "grad_norm": 0.37686434388160706, "learning_rate": 1.0661837585675881e-05, "loss": 0.54, "step": 45216 }, { "epoch": 0.9589828423575322, "grad_norm": 0.36987394094467163, "learning_rate": 1.0661504821188338e-05, "loss": 0.4603, "step": 45217 }, { "epoch": 0.9590040508154652, "grad_norm": 0.32738760113716125, "learning_rate": 1.0661172055965075e-05, "loss": 0.4993, "step": 45218 }, { "epoch": 0.9590252592733982, "grad_norm": 0.36577174067497253, "learning_rate": 1.0660839290006464e-05, "loss": 0.4296, "step": 45219 }, { "epoch": 0.9590464677313313, "grad_norm": 0.3896263837814331, "learning_rate": 1.0660506523312871e-05, "loss": 0.4853, "step": 45220 }, { "epoch": 0.9590676761892643, "grad_norm": 0.35101643204689026, "learning_rate": 1.0660173755884671e-05, "loss": 0.4656, "step": 45221 }, { "epoch": 0.9590888846471973, "grad_norm": 0.39145076274871826, "learning_rate": 1.0659840987722232e-05, "loss": 0.4329, "step": 45222 }, { "epoch": 0.9591100931051303, "grad_norm": 0.39910563826560974, "learning_rate": 1.065950821882592e-05, "loss": 0.4718, "step": 45223 }, { "epoch": 0.9591313015630634, "grad_norm": 0.3969970941543579, "learning_rate": 1.0659175449196112e-05, "loss": 0.4737, "step": 45224 }, { "epoch": 0.9591525100209963, "grad_norm": 0.5714348554611206, "learning_rate": 1.0658842678833172e-05, "loss": 0.5072, "step": 45225 }, { "epoch": 0.9591737184789294, "grad_norm": 0.399512380361557, "learning_rate": 1.0658509907737474e-05, "loss": 0.5157, "step": 45226 }, { "epoch": 0.9591949269368624, "grad_norm": 0.39378753304481506, "learning_rate": 1.0658177135909389e-05, "loss": 0.5615, "step": 45227 }, { "epoch": 0.9592161353947954, "grad_norm": 0.36559411883354187, "learning_rate": 1.065784436334928e-05, "loss": 0.4509, "step": 45228 }, { "epoch": 0.9592373438527285, "grad_norm": 0.3956252336502075, "learning_rate": 1.065751159005753e-05, "loss": 0.4635, "step": 45229 }, { "epoch": 0.9592585523106615, "grad_norm": 0.3990461230278015, "learning_rate": 1.0657178816034494e-05, "loss": 0.4533, "step": 45230 }, { "epoch": 0.9592797607685946, "grad_norm": 0.933242678642273, "learning_rate": 1.0656846041280552e-05, "loss": 0.4277, "step": 45231 }, { "epoch": 0.9593009692265275, "grad_norm": 0.5108481645584106, "learning_rate": 1.0656513265796073e-05, "loss": 0.5119, "step": 45232 }, { "epoch": 0.9593221776844606, "grad_norm": 0.35074755549430847, "learning_rate": 1.0656180489581427e-05, "loss": 0.4997, "step": 45233 }, { "epoch": 0.9593433861423936, "grad_norm": 0.39291831851005554, "learning_rate": 1.0655847712636978e-05, "loss": 0.4981, "step": 45234 }, { "epoch": 0.9593645946003266, "grad_norm": 0.35973700881004333, "learning_rate": 1.0655514934963105e-05, "loss": 0.5197, "step": 45235 }, { "epoch": 0.9593858030582596, "grad_norm": 0.5283923745155334, "learning_rate": 1.0655182156560173e-05, "loss": 0.4148, "step": 45236 }, { "epoch": 0.9594070115161927, "grad_norm": 0.34780120849609375, "learning_rate": 1.065484937742855e-05, "loss": 0.4754, "step": 45237 }, { "epoch": 0.9594282199741256, "grad_norm": 0.4258882701396942, "learning_rate": 1.0654516597568612e-05, "loss": 0.3487, "step": 45238 }, { "epoch": 0.9594494284320587, "grad_norm": 0.3854921758174896, "learning_rate": 1.0654183816980727e-05, "loss": 0.476, "step": 45239 }, { "epoch": 0.9594706368899917, "grad_norm": 0.404198557138443, "learning_rate": 1.0653851035665261e-05, "loss": 0.4803, "step": 45240 }, { "epoch": 0.9594918453479248, "grad_norm": 0.409773588180542, "learning_rate": 1.0653518253622593e-05, "loss": 0.4874, "step": 45241 }, { "epoch": 0.9595130538058578, "grad_norm": 0.41625654697418213, "learning_rate": 1.0653185470853083e-05, "loss": 0.501, "step": 45242 }, { "epoch": 0.9595342622637908, "grad_norm": 0.4588194787502289, "learning_rate": 1.0652852687357108e-05, "loss": 0.4572, "step": 45243 }, { "epoch": 0.9595554707217239, "grad_norm": 0.3720437288284302, "learning_rate": 1.0652519903135035e-05, "loss": 0.482, "step": 45244 }, { "epoch": 0.9595766791796568, "grad_norm": 0.3763594925403595, "learning_rate": 1.0652187118187234e-05, "loss": 0.5033, "step": 45245 }, { "epoch": 0.9595978876375899, "grad_norm": 0.3647013008594513, "learning_rate": 1.0651854332514077e-05, "loss": 0.4534, "step": 45246 }, { "epoch": 0.9596190960955229, "grad_norm": 0.35118529200553894, "learning_rate": 1.0651521546115936e-05, "loss": 0.5173, "step": 45247 }, { "epoch": 0.959640304553456, "grad_norm": 0.35751086473464966, "learning_rate": 1.0651188758993176e-05, "loss": 0.5524, "step": 45248 }, { "epoch": 0.9596615130113889, "grad_norm": 0.39149653911590576, "learning_rate": 1.065085597114617e-05, "loss": 0.4693, "step": 45249 }, { "epoch": 0.959682721469322, "grad_norm": 0.34073585271835327, "learning_rate": 1.0650523182575286e-05, "loss": 0.5512, "step": 45250 }, { "epoch": 0.959703929927255, "grad_norm": 0.3420904874801636, "learning_rate": 1.0650190393280897e-05, "loss": 0.5172, "step": 45251 }, { "epoch": 0.959725138385188, "grad_norm": 0.3383861184120178, "learning_rate": 1.0649857603263372e-05, "loss": 0.5102, "step": 45252 }, { "epoch": 0.959746346843121, "grad_norm": 0.5461581945419312, "learning_rate": 1.0649524812523083e-05, "loss": 0.5396, "step": 45253 }, { "epoch": 0.9597675553010541, "grad_norm": 0.37842822074890137, "learning_rate": 1.0649192021060395e-05, "loss": 0.5734, "step": 45254 }, { "epoch": 0.959788763758987, "grad_norm": 0.34449419379234314, "learning_rate": 1.0648859228875684e-05, "loss": 0.4599, "step": 45255 }, { "epoch": 0.9598099722169201, "grad_norm": 0.5006318092346191, "learning_rate": 1.0648526435969312e-05, "loss": 0.4992, "step": 45256 }, { "epoch": 0.9598311806748532, "grad_norm": 0.38689467310905457, "learning_rate": 1.064819364234166e-05, "loss": 0.528, "step": 45257 }, { "epoch": 0.9598523891327861, "grad_norm": 0.4165842831134796, "learning_rate": 1.0647860847993092e-05, "loss": 0.4732, "step": 45258 }, { "epoch": 0.9598735975907192, "grad_norm": 0.42170679569244385, "learning_rate": 1.0647528052923976e-05, "loss": 0.5021, "step": 45259 }, { "epoch": 0.9598948060486522, "grad_norm": 0.3768247365951538, "learning_rate": 1.0647195257134688e-05, "loss": 0.473, "step": 45260 }, { "epoch": 0.9599160145065853, "grad_norm": 0.3289809226989746, "learning_rate": 1.0646862460625596e-05, "loss": 0.4215, "step": 45261 }, { "epoch": 0.9599372229645182, "grad_norm": 0.4069810211658478, "learning_rate": 1.0646529663397065e-05, "loss": 0.5306, "step": 45262 }, { "epoch": 0.9599584314224513, "grad_norm": 0.3835122287273407, "learning_rate": 1.064619686544947e-05, "loss": 0.5112, "step": 45263 }, { "epoch": 0.9599796398803843, "grad_norm": 0.3915386497974396, "learning_rate": 1.0645864066783182e-05, "loss": 0.5036, "step": 45264 }, { "epoch": 0.9600008483383173, "grad_norm": 0.38901203870773315, "learning_rate": 1.0645531267398567e-05, "loss": 0.4441, "step": 45265 }, { "epoch": 0.9600220567962503, "grad_norm": 0.3873775005340576, "learning_rate": 1.0645198467296002e-05, "loss": 0.5001, "step": 45266 }, { "epoch": 0.9600432652541834, "grad_norm": 0.34550318121910095, "learning_rate": 1.0644865666475851e-05, "loss": 0.4496, "step": 45267 }, { "epoch": 0.9600644737121163, "grad_norm": 0.35051167011260986, "learning_rate": 1.0644532864938486e-05, "loss": 0.5245, "step": 45268 }, { "epoch": 0.9600856821700494, "grad_norm": 0.35502517223358154, "learning_rate": 1.0644200062684277e-05, "loss": 0.5303, "step": 45269 }, { "epoch": 0.9601068906279825, "grad_norm": 0.3285178244113922, "learning_rate": 1.0643867259713596e-05, "loss": 0.511, "step": 45270 }, { "epoch": 0.9601280990859155, "grad_norm": 0.40755748748779297, "learning_rate": 1.0643534456026808e-05, "loss": 0.5384, "step": 45271 }, { "epoch": 0.9601493075438485, "grad_norm": 0.3663468658924103, "learning_rate": 1.0643201651624289e-05, "loss": 0.4632, "step": 45272 }, { "epoch": 0.9601705160017815, "grad_norm": 0.3105388879776001, "learning_rate": 1.0642868846506405e-05, "loss": 0.4432, "step": 45273 }, { "epoch": 0.9601917244597146, "grad_norm": 0.36532971262931824, "learning_rate": 1.064253604067353e-05, "loss": 0.4231, "step": 45274 }, { "epoch": 0.9602129329176475, "grad_norm": 0.3865630626678467, "learning_rate": 1.064220323412603e-05, "loss": 0.5671, "step": 45275 }, { "epoch": 0.9602341413755806, "grad_norm": 0.3806833028793335, "learning_rate": 1.064187042686428e-05, "loss": 0.488, "step": 45276 }, { "epoch": 0.9602553498335136, "grad_norm": 0.3583131730556488, "learning_rate": 1.0641537618888644e-05, "loss": 0.4899, "step": 45277 }, { "epoch": 0.9602765582914466, "grad_norm": 0.34937557578086853, "learning_rate": 1.0641204810199498e-05, "loss": 0.488, "step": 45278 }, { "epoch": 0.9602977667493796, "grad_norm": 0.39178580045700073, "learning_rate": 1.0640872000797207e-05, "loss": 0.4966, "step": 45279 }, { "epoch": 0.9603189752073127, "grad_norm": 0.36303332448005676, "learning_rate": 1.0640539190682147e-05, "loss": 0.4955, "step": 45280 }, { "epoch": 0.9603401836652457, "grad_norm": 0.3811127543449402, "learning_rate": 1.0640206379854683e-05, "loss": 0.4872, "step": 45281 }, { "epoch": 0.9603613921231787, "grad_norm": 0.33578523993492126, "learning_rate": 1.0639873568315185e-05, "loss": 0.4767, "step": 45282 }, { "epoch": 0.9603826005811118, "grad_norm": 0.37582504749298096, "learning_rate": 1.063954075606403e-05, "loss": 0.478, "step": 45283 }, { "epoch": 0.9604038090390448, "grad_norm": 0.4223870635032654, "learning_rate": 1.063920794310158e-05, "loss": 0.4418, "step": 45284 }, { "epoch": 0.9604250174969778, "grad_norm": 0.34726792573928833, "learning_rate": 1.0638875129428207e-05, "loss": 0.495, "step": 45285 }, { "epoch": 0.9604462259549108, "grad_norm": 0.3736583888530731, "learning_rate": 1.0638542315044287e-05, "loss": 0.5146, "step": 45286 }, { "epoch": 0.9604674344128439, "grad_norm": 0.406996488571167, "learning_rate": 1.0638209499950181e-05, "loss": 0.4731, "step": 45287 }, { "epoch": 0.9604886428707768, "grad_norm": 0.41484570503234863, "learning_rate": 1.0637876684146265e-05, "loss": 0.4873, "step": 45288 }, { "epoch": 0.9605098513287099, "grad_norm": 0.37349826097488403, "learning_rate": 1.0637543867632911e-05, "loss": 0.5431, "step": 45289 }, { "epoch": 0.9605310597866429, "grad_norm": 0.39504507184028625, "learning_rate": 1.0637211050410485e-05, "loss": 0.4634, "step": 45290 }, { "epoch": 0.960552268244576, "grad_norm": 0.481199711561203, "learning_rate": 1.0636878232479358e-05, "loss": 0.4639, "step": 45291 }, { "epoch": 0.9605734767025089, "grad_norm": 0.3727620542049408, "learning_rate": 1.0636545413839903e-05, "loss": 0.5079, "step": 45292 }, { "epoch": 0.960594685160442, "grad_norm": 0.37516605854034424, "learning_rate": 1.0636212594492482e-05, "loss": 0.4989, "step": 45293 }, { "epoch": 0.960615893618375, "grad_norm": 0.4103442132472992, "learning_rate": 1.0635879774437474e-05, "loss": 0.5517, "step": 45294 }, { "epoch": 0.960637102076308, "grad_norm": 0.3539293706417084, "learning_rate": 1.0635546953675248e-05, "loss": 0.4021, "step": 45295 }, { "epoch": 0.960658310534241, "grad_norm": 0.36634621024131775, "learning_rate": 1.0635214132206169e-05, "loss": 0.4904, "step": 45296 }, { "epoch": 0.9606795189921741, "grad_norm": 0.37032055854797363, "learning_rate": 1.0634881310030613e-05, "loss": 0.4429, "step": 45297 }, { "epoch": 0.9607007274501072, "grad_norm": 0.39410603046417236, "learning_rate": 1.0634548487148947e-05, "loss": 0.5846, "step": 45298 }, { "epoch": 0.9607219359080401, "grad_norm": 0.3685745298862457, "learning_rate": 1.063421566356154e-05, "loss": 0.5148, "step": 45299 }, { "epoch": 0.9607431443659732, "grad_norm": 0.41428402066230774, "learning_rate": 1.0633882839268766e-05, "loss": 0.3719, "step": 45300 }, { "epoch": 0.9607643528239062, "grad_norm": 0.3483361601829529, "learning_rate": 1.063355001427099e-05, "loss": 0.432, "step": 45301 }, { "epoch": 0.9607855612818392, "grad_norm": 0.46461549401283264, "learning_rate": 1.0633217188568587e-05, "loss": 0.4731, "step": 45302 }, { "epoch": 0.9608067697397722, "grad_norm": 0.36430996656417847, "learning_rate": 1.0632884362161926e-05, "loss": 0.5576, "step": 45303 }, { "epoch": 0.9608279781977053, "grad_norm": 0.4040926992893219, "learning_rate": 1.0632551535051378e-05, "loss": 0.598, "step": 45304 }, { "epoch": 0.9608491866556382, "grad_norm": 0.41082334518432617, "learning_rate": 1.0632218707237309e-05, "loss": 0.5268, "step": 45305 }, { "epoch": 0.9608703951135713, "grad_norm": 0.3757254481315613, "learning_rate": 1.0631885878720094e-05, "loss": 0.4887, "step": 45306 }, { "epoch": 0.9608916035715043, "grad_norm": 0.38022157549858093, "learning_rate": 1.0631553049500099e-05, "loss": 0.4972, "step": 45307 }, { "epoch": 0.9609128120294373, "grad_norm": 0.32865336537361145, "learning_rate": 1.0631220219577693e-05, "loss": 0.4389, "step": 45308 }, { "epoch": 0.9609340204873703, "grad_norm": 0.35586977005004883, "learning_rate": 1.0630887388953255e-05, "loss": 0.5004, "step": 45309 }, { "epoch": 0.9609552289453034, "grad_norm": 0.38688725233078003, "learning_rate": 1.0630554557627146e-05, "loss": 0.5281, "step": 45310 }, { "epoch": 0.9609764374032365, "grad_norm": 0.4337218105792999, "learning_rate": 1.0630221725599744e-05, "loss": 0.4188, "step": 45311 }, { "epoch": 0.9609976458611694, "grad_norm": 0.35534411668777466, "learning_rate": 1.0629888892871411e-05, "loss": 0.4302, "step": 45312 }, { "epoch": 0.9610188543191025, "grad_norm": 0.4238894581794739, "learning_rate": 1.0629556059442523e-05, "loss": 0.4823, "step": 45313 }, { "epoch": 0.9610400627770355, "grad_norm": 0.33992069959640503, "learning_rate": 1.0629223225313445e-05, "loss": 0.4054, "step": 45314 }, { "epoch": 0.9610612712349685, "grad_norm": 0.41529548168182373, "learning_rate": 1.0628890390484554e-05, "loss": 0.459, "step": 45315 }, { "epoch": 0.9610824796929015, "grad_norm": 0.4187355935573578, "learning_rate": 1.0628557554956217e-05, "loss": 0.432, "step": 45316 }, { "epoch": 0.9611036881508346, "grad_norm": 0.4107897877693176, "learning_rate": 1.0628224718728801e-05, "loss": 0.4717, "step": 45317 }, { "epoch": 0.9611248966087675, "grad_norm": 0.4696687161922455, "learning_rate": 1.0627891881802681e-05, "loss": 0.504, "step": 45318 }, { "epoch": 0.9611461050667006, "grad_norm": 0.3931235074996948, "learning_rate": 1.0627559044178224e-05, "loss": 0.5, "step": 45319 }, { "epoch": 0.9611673135246336, "grad_norm": 0.33064505457878113, "learning_rate": 1.0627226205855801e-05, "loss": 0.4841, "step": 45320 }, { "epoch": 0.9611885219825667, "grad_norm": 0.3276296555995941, "learning_rate": 1.0626893366835782e-05, "loss": 0.4669, "step": 45321 }, { "epoch": 0.9612097304404996, "grad_norm": 0.39096376299858093, "learning_rate": 1.0626560527118539e-05, "loss": 0.5352, "step": 45322 }, { "epoch": 0.9612309388984327, "grad_norm": 0.3350191116333008, "learning_rate": 1.062622768670444e-05, "loss": 0.4937, "step": 45323 }, { "epoch": 0.9612521473563658, "grad_norm": 0.35049670934677124, "learning_rate": 1.0625894845593857e-05, "loss": 0.383, "step": 45324 }, { "epoch": 0.9612733558142987, "grad_norm": 0.5099779963493347, "learning_rate": 1.0625562003787157e-05, "loss": 0.4937, "step": 45325 }, { "epoch": 0.9612945642722318, "grad_norm": 0.33511626720428467, "learning_rate": 1.0625229161284716e-05, "loss": 0.3958, "step": 45326 }, { "epoch": 0.9613157727301648, "grad_norm": 0.35021451115608215, "learning_rate": 1.0624896318086897e-05, "loss": 0.471, "step": 45327 }, { "epoch": 0.9613369811880978, "grad_norm": 0.36656835675239563, "learning_rate": 1.0624563474194076e-05, "loss": 0.564, "step": 45328 }, { "epoch": 0.9613581896460308, "grad_norm": 0.38074347376823425, "learning_rate": 1.062423062960662e-05, "loss": 0.5143, "step": 45329 }, { "epoch": 0.9613793981039639, "grad_norm": 0.34201478958129883, "learning_rate": 1.06238977843249e-05, "loss": 0.4333, "step": 45330 }, { "epoch": 0.9614006065618969, "grad_norm": 0.3771436810493469, "learning_rate": 1.0623564938349287e-05, "loss": 0.5146, "step": 45331 }, { "epoch": 0.9614218150198299, "grad_norm": 0.35248544812202454, "learning_rate": 1.0623232091680152e-05, "loss": 0.4548, "step": 45332 }, { "epoch": 0.9614430234777629, "grad_norm": 0.342922568321228, "learning_rate": 1.0622899244317857e-05, "loss": 0.4581, "step": 45333 }, { "epoch": 0.961464231935696, "grad_norm": 0.34590578079223633, "learning_rate": 1.0622566396262786e-05, "loss": 0.4837, "step": 45334 }, { "epoch": 0.9614854403936289, "grad_norm": 0.4057926535606384, "learning_rate": 1.0622233547515302e-05, "loss": 0.5098, "step": 45335 }, { "epoch": 0.961506648851562, "grad_norm": 0.45577436685562134, "learning_rate": 1.0621900698075772e-05, "loss": 0.5066, "step": 45336 }, { "epoch": 0.9615278573094951, "grad_norm": 0.4097426235675812, "learning_rate": 1.062156784794457e-05, "loss": 0.5293, "step": 45337 }, { "epoch": 0.961549065767428, "grad_norm": 0.3413892090320587, "learning_rate": 1.0621234997122065e-05, "loss": 0.4508, "step": 45338 }, { "epoch": 0.9615702742253611, "grad_norm": 0.35987335443496704, "learning_rate": 1.0620902145608627e-05, "loss": 0.5302, "step": 45339 }, { "epoch": 0.9615914826832941, "grad_norm": 0.32848814129829407, "learning_rate": 1.062056929340463e-05, "loss": 0.4016, "step": 45340 }, { "epoch": 0.9616126911412272, "grad_norm": 0.3352026343345642, "learning_rate": 1.0620236440510439e-05, "loss": 0.5703, "step": 45341 }, { "epoch": 0.9616338995991601, "grad_norm": 0.3554236590862274, "learning_rate": 1.061990358692643e-05, "loss": 0.5078, "step": 45342 }, { "epoch": 0.9616551080570932, "grad_norm": 0.3347329795360565, "learning_rate": 1.061957073265297e-05, "loss": 0.4497, "step": 45343 }, { "epoch": 0.9616763165150262, "grad_norm": 0.3250071108341217, "learning_rate": 1.0619237877690423e-05, "loss": 0.4641, "step": 45344 }, { "epoch": 0.9616975249729592, "grad_norm": 0.3831616938114166, "learning_rate": 1.0618905022039168e-05, "loss": 0.4775, "step": 45345 }, { "epoch": 0.9617187334308922, "grad_norm": 0.3797062039375305, "learning_rate": 1.0618572165699572e-05, "loss": 0.5341, "step": 45346 }, { "epoch": 0.9617399418888253, "grad_norm": 0.3723115026950836, "learning_rate": 1.0618239308672006e-05, "loss": 0.4527, "step": 45347 }, { "epoch": 0.9617611503467582, "grad_norm": 0.454862117767334, "learning_rate": 1.0617906450956838e-05, "loss": 0.5237, "step": 45348 }, { "epoch": 0.9617823588046913, "grad_norm": 0.44929268956184387, "learning_rate": 1.0617573592554442e-05, "loss": 0.5472, "step": 45349 }, { "epoch": 0.9618035672626243, "grad_norm": 0.3523889183998108, "learning_rate": 1.0617240733465184e-05, "loss": 0.3996, "step": 45350 }, { "epoch": 0.9618247757205574, "grad_norm": 0.3888099193572998, "learning_rate": 1.0616907873689439e-05, "loss": 0.5021, "step": 45351 }, { "epoch": 0.9618459841784904, "grad_norm": 0.37577033042907715, "learning_rate": 1.0616575013227572e-05, "loss": 0.5147, "step": 45352 }, { "epoch": 0.9618671926364234, "grad_norm": 0.46132349967956543, "learning_rate": 1.0616242152079955e-05, "loss": 0.5179, "step": 45353 }, { "epoch": 0.9618884010943565, "grad_norm": 0.3549811542034149, "learning_rate": 1.0615909290246962e-05, "loss": 0.477, "step": 45354 }, { "epoch": 0.9619096095522894, "grad_norm": 0.4054751992225647, "learning_rate": 1.0615576427728955e-05, "loss": 0.572, "step": 45355 }, { "epoch": 0.9619308180102225, "grad_norm": 0.3555232286453247, "learning_rate": 1.0615243564526315e-05, "loss": 0.4197, "step": 45356 }, { "epoch": 0.9619520264681555, "grad_norm": 0.4762158691883087, "learning_rate": 1.0614910700639404e-05, "loss": 0.5248, "step": 45357 }, { "epoch": 0.9619732349260885, "grad_norm": 0.3795219361782074, "learning_rate": 1.0614577836068594e-05, "loss": 0.5095, "step": 45358 }, { "epoch": 0.9619944433840215, "grad_norm": 0.3753310739994049, "learning_rate": 1.0614244970814256e-05, "loss": 0.6288, "step": 45359 }, { "epoch": 0.9620156518419546, "grad_norm": 0.35874879360198975, "learning_rate": 1.061391210487676e-05, "loss": 0.4529, "step": 45360 }, { "epoch": 0.9620368602998876, "grad_norm": 0.4027820825576782, "learning_rate": 1.0613579238256477e-05, "loss": 0.4328, "step": 45361 }, { "epoch": 0.9620580687578206, "grad_norm": 0.3852711021900177, "learning_rate": 1.0613246370953776e-05, "loss": 0.384, "step": 45362 }, { "epoch": 0.9620792772157536, "grad_norm": 0.4192962646484375, "learning_rate": 1.0612913502969028e-05, "loss": 0.4726, "step": 45363 }, { "epoch": 0.9621004856736867, "grad_norm": 0.34756338596343994, "learning_rate": 1.0612580634302602e-05, "loss": 0.443, "step": 45364 }, { "epoch": 0.9621216941316197, "grad_norm": 0.4239753186702728, "learning_rate": 1.061224776495487e-05, "loss": 0.4816, "step": 45365 }, { "epoch": 0.9621429025895527, "grad_norm": 0.3395455777645111, "learning_rate": 1.0611914894926203e-05, "loss": 0.4899, "step": 45366 }, { "epoch": 0.9621641110474858, "grad_norm": 0.8211416006088257, "learning_rate": 1.0611582024216966e-05, "loss": 0.5132, "step": 45367 }, { "epoch": 0.9621853195054187, "grad_norm": 0.3535970449447632, "learning_rate": 1.0611249152827536e-05, "loss": 0.4596, "step": 45368 }, { "epoch": 0.9622065279633518, "grad_norm": 0.4263712763786316, "learning_rate": 1.0610916280758278e-05, "loss": 0.4458, "step": 45369 }, { "epoch": 0.9622277364212848, "grad_norm": 0.43759265542030334, "learning_rate": 1.0610583408009567e-05, "loss": 0.5197, "step": 45370 }, { "epoch": 0.9622489448792179, "grad_norm": 0.33663037419319153, "learning_rate": 1.0610250534581767e-05, "loss": 0.5407, "step": 45371 }, { "epoch": 0.9622701533371508, "grad_norm": 0.39934438467025757, "learning_rate": 1.0609917660475255e-05, "loss": 0.5249, "step": 45372 }, { "epoch": 0.9622913617950839, "grad_norm": 0.36903512477874756, "learning_rate": 1.0609584785690395e-05, "loss": 0.5048, "step": 45373 }, { "epoch": 0.9623125702530169, "grad_norm": 0.4187398850917816, "learning_rate": 1.0609251910227561e-05, "loss": 0.5806, "step": 45374 }, { "epoch": 0.9623337787109499, "grad_norm": 0.4014153480529785, "learning_rate": 1.0608919034087121e-05, "loss": 0.5958, "step": 45375 }, { "epoch": 0.9623549871688829, "grad_norm": 0.4828852117061615, "learning_rate": 1.060858615726945e-05, "loss": 0.5586, "step": 45376 }, { "epoch": 0.962376195626816, "grad_norm": 0.37474799156188965, "learning_rate": 1.0608253279774914e-05, "loss": 0.4649, "step": 45377 }, { "epoch": 0.962397404084749, "grad_norm": 0.4042239487171173, "learning_rate": 1.0607920401603879e-05, "loss": 0.4492, "step": 45378 }, { "epoch": 0.962418612542682, "grad_norm": 0.4104747176170349, "learning_rate": 1.0607587522756723e-05, "loss": 0.4524, "step": 45379 }, { "epoch": 0.9624398210006151, "grad_norm": 0.613176703453064, "learning_rate": 1.0607254643233818e-05, "loss": 0.4197, "step": 45380 }, { "epoch": 0.9624610294585481, "grad_norm": 0.4512374699115753, "learning_rate": 1.0606921763035525e-05, "loss": 0.435, "step": 45381 }, { "epoch": 0.9624822379164811, "grad_norm": 0.4162527620792389, "learning_rate": 1.060658888216222e-05, "loss": 0.4564, "step": 45382 }, { "epoch": 0.9625034463744141, "grad_norm": 0.9021549224853516, "learning_rate": 1.060625600061427e-05, "loss": 0.5295, "step": 45383 }, { "epoch": 0.9625246548323472, "grad_norm": 0.3946116864681244, "learning_rate": 1.0605923118392049e-05, "loss": 0.4929, "step": 45384 }, { "epoch": 0.9625458632902801, "grad_norm": 0.38007256388664246, "learning_rate": 1.0605590235495927e-05, "loss": 0.5574, "step": 45385 }, { "epoch": 0.9625670717482132, "grad_norm": 0.3835904598236084, "learning_rate": 1.0605257351926273e-05, "loss": 0.5445, "step": 45386 }, { "epoch": 0.9625882802061462, "grad_norm": 0.43231701850891113, "learning_rate": 1.0604924467683455e-05, "loss": 0.4874, "step": 45387 }, { "epoch": 0.9626094886640792, "grad_norm": 0.44011253118515015, "learning_rate": 1.0604591582767847e-05, "loss": 0.4393, "step": 45388 }, { "epoch": 0.9626306971220122, "grad_norm": 0.38100466132164, "learning_rate": 1.0604258697179815e-05, "loss": 0.3934, "step": 45389 }, { "epoch": 0.9626519055799453, "grad_norm": 0.35832250118255615, "learning_rate": 1.0603925810919735e-05, "loss": 0.4217, "step": 45390 }, { "epoch": 0.9626731140378783, "grad_norm": 0.3714209496974945, "learning_rate": 1.060359292398797e-05, "loss": 0.5025, "step": 45391 }, { "epoch": 0.9626943224958113, "grad_norm": 0.4291262626647949, "learning_rate": 1.0603260036384894e-05, "loss": 0.5147, "step": 45392 }, { "epoch": 0.9627155309537444, "grad_norm": 0.40980061888694763, "learning_rate": 1.0602927148110882e-05, "loss": 0.5293, "step": 45393 }, { "epoch": 0.9627367394116774, "grad_norm": 0.38451698422431946, "learning_rate": 1.06025942591663e-05, "loss": 0.4973, "step": 45394 }, { "epoch": 0.9627579478696104, "grad_norm": 0.3462420701980591, "learning_rate": 1.0602261369551513e-05, "loss": 0.5, "step": 45395 }, { "epoch": 0.9627791563275434, "grad_norm": 0.3894917368888855, "learning_rate": 1.06019284792669e-05, "loss": 0.4772, "step": 45396 }, { "epoch": 0.9628003647854765, "grad_norm": 0.4443570077419281, "learning_rate": 1.0601595588312825e-05, "loss": 0.5127, "step": 45397 }, { "epoch": 0.9628215732434094, "grad_norm": 0.34914734959602356, "learning_rate": 1.060126269668966e-05, "loss": 0.4912, "step": 45398 }, { "epoch": 0.9628427817013425, "grad_norm": 0.3549555838108063, "learning_rate": 1.0600929804397779e-05, "loss": 0.4742, "step": 45399 }, { "epoch": 0.9628639901592755, "grad_norm": 0.3955058753490448, "learning_rate": 1.0600596911437549e-05, "loss": 0.4129, "step": 45400 }, { "epoch": 0.9628851986172086, "grad_norm": 0.4762174189090729, "learning_rate": 1.0600264017809336e-05, "loss": 0.5583, "step": 45401 }, { "epoch": 0.9629064070751415, "grad_norm": 0.3490539491176605, "learning_rate": 1.0599931123513519e-05, "loss": 0.4685, "step": 45402 }, { "epoch": 0.9629276155330746, "grad_norm": 0.3907833397388458, "learning_rate": 1.059959822855046e-05, "loss": 0.5236, "step": 45403 }, { "epoch": 0.9629488239910076, "grad_norm": 0.4348825514316559, "learning_rate": 1.0599265332920535e-05, "loss": 0.4486, "step": 45404 }, { "epoch": 0.9629700324489406, "grad_norm": 0.3356137275695801, "learning_rate": 1.0598932436624114e-05, "loss": 0.4764, "step": 45405 }, { "epoch": 0.9629912409068737, "grad_norm": 0.35479873418807983, "learning_rate": 1.0598599539661563e-05, "loss": 0.5051, "step": 45406 }, { "epoch": 0.9630124493648067, "grad_norm": 0.5507614612579346, "learning_rate": 1.0598266642033258e-05, "loss": 0.4712, "step": 45407 }, { "epoch": 0.9630336578227398, "grad_norm": 0.42168059945106506, "learning_rate": 1.0597933743739564e-05, "loss": 0.4852, "step": 45408 }, { "epoch": 0.9630548662806727, "grad_norm": 0.37032806873321533, "learning_rate": 1.0597600844780851e-05, "loss": 0.3958, "step": 45409 }, { "epoch": 0.9630760747386058, "grad_norm": 0.33568015694618225, "learning_rate": 1.0597267945157492e-05, "loss": 0.5156, "step": 45410 }, { "epoch": 0.9630972831965388, "grad_norm": 0.37998858094215393, "learning_rate": 1.059693504486986e-05, "loss": 0.4776, "step": 45411 }, { "epoch": 0.9631184916544718, "grad_norm": 0.35324031114578247, "learning_rate": 1.059660214391832e-05, "loss": 0.4749, "step": 45412 }, { "epoch": 0.9631397001124048, "grad_norm": 0.43125787377357483, "learning_rate": 1.0596269242303245e-05, "loss": 0.4995, "step": 45413 }, { "epoch": 0.9631609085703379, "grad_norm": 0.36570748686790466, "learning_rate": 1.0595936340025007e-05, "loss": 0.4728, "step": 45414 }, { "epoch": 0.9631821170282708, "grad_norm": 0.3557736873626709, "learning_rate": 1.059560343708397e-05, "loss": 0.5145, "step": 45415 }, { "epoch": 0.9632033254862039, "grad_norm": 0.3891345262527466, "learning_rate": 1.0595270533480508e-05, "loss": 0.5345, "step": 45416 }, { "epoch": 0.9632245339441369, "grad_norm": 0.39986851811408997, "learning_rate": 1.0594937629214991e-05, "loss": 0.4995, "step": 45417 }, { "epoch": 0.96324574240207, "grad_norm": 0.3559489846229553, "learning_rate": 1.0594604724287791e-05, "loss": 0.5135, "step": 45418 }, { "epoch": 0.963266950860003, "grad_norm": 0.367583304643631, "learning_rate": 1.0594271818699277e-05, "loss": 0.4405, "step": 45419 }, { "epoch": 0.963288159317936, "grad_norm": 0.3479449152946472, "learning_rate": 1.059393891244982e-05, "loss": 0.42, "step": 45420 }, { "epoch": 0.9633093677758691, "grad_norm": 0.3967055678367615, "learning_rate": 1.059360600553979e-05, "loss": 0.5098, "step": 45421 }, { "epoch": 0.963330576233802, "grad_norm": 0.3736172318458557, "learning_rate": 1.0593273097969554e-05, "loss": 0.5166, "step": 45422 }, { "epoch": 0.9633517846917351, "grad_norm": 0.4926474988460541, "learning_rate": 1.0592940189739482e-05, "loss": 0.458, "step": 45423 }, { "epoch": 0.9633729931496681, "grad_norm": 0.3389037847518921, "learning_rate": 1.0592607280849952e-05, "loss": 0.4278, "step": 45424 }, { "epoch": 0.9633942016076011, "grad_norm": 0.3560357689857483, "learning_rate": 1.059227437130133e-05, "loss": 0.5178, "step": 45425 }, { "epoch": 0.9634154100655341, "grad_norm": 0.3549637198448181, "learning_rate": 1.0591941461093982e-05, "loss": 0.4225, "step": 45426 }, { "epoch": 0.9634366185234672, "grad_norm": 0.32968392968177795, "learning_rate": 1.0591608550228283e-05, "loss": 0.4659, "step": 45427 }, { "epoch": 0.9634578269814001, "grad_norm": 0.36713504791259766, "learning_rate": 1.0591275638704604e-05, "loss": 0.4673, "step": 45428 }, { "epoch": 0.9634790354393332, "grad_norm": 0.4240332543849945, "learning_rate": 1.0590942726523307e-05, "loss": 0.5323, "step": 45429 }, { "epoch": 0.9635002438972662, "grad_norm": 0.4024929106235504, "learning_rate": 1.0590609813684775e-05, "loss": 0.5392, "step": 45430 }, { "epoch": 0.9635214523551993, "grad_norm": 0.3847790062427521, "learning_rate": 1.0590276900189374e-05, "loss": 0.48, "step": 45431 }, { "epoch": 0.9635426608131322, "grad_norm": 0.4172859489917755, "learning_rate": 1.0589943986037465e-05, "loss": 0.4705, "step": 45432 }, { "epoch": 0.9635638692710653, "grad_norm": 0.40510499477386475, "learning_rate": 1.0589611071229431e-05, "loss": 0.4028, "step": 45433 }, { "epoch": 0.9635850777289984, "grad_norm": 0.5191063284873962, "learning_rate": 1.0589278155765634e-05, "loss": 0.4591, "step": 45434 }, { "epoch": 0.9636062861869313, "grad_norm": 0.3453548848628998, "learning_rate": 1.0588945239646449e-05, "loss": 0.4853, "step": 45435 }, { "epoch": 0.9636274946448644, "grad_norm": 0.353877991437912, "learning_rate": 1.058861232287224e-05, "loss": 0.4749, "step": 45436 }, { "epoch": 0.9636487031027974, "grad_norm": 0.35995855927467346, "learning_rate": 1.0588279405443385e-05, "loss": 0.5302, "step": 45437 }, { "epoch": 0.9636699115607305, "grad_norm": 0.4281200170516968, "learning_rate": 1.058794648736025e-05, "loss": 0.5023, "step": 45438 }, { "epoch": 0.9636911200186634, "grad_norm": 0.35175853967666626, "learning_rate": 1.0587613568623208e-05, "loss": 0.5285, "step": 45439 }, { "epoch": 0.9637123284765965, "grad_norm": 0.36783742904663086, "learning_rate": 1.0587280649232622e-05, "loss": 0.4614, "step": 45440 }, { "epoch": 0.9637335369345295, "grad_norm": 0.39212673902511597, "learning_rate": 1.0586947729188872e-05, "loss": 0.4162, "step": 45441 }, { "epoch": 0.9637547453924625, "grad_norm": 0.3806287348270416, "learning_rate": 1.0586614808492324e-05, "loss": 0.4823, "step": 45442 }, { "epoch": 0.9637759538503955, "grad_norm": 0.35951846837997437, "learning_rate": 1.0586281887143344e-05, "loss": 0.4602, "step": 45443 }, { "epoch": 0.9637971623083286, "grad_norm": 0.3862578272819519, "learning_rate": 1.058594896514231e-05, "loss": 0.5017, "step": 45444 }, { "epoch": 0.9638183707662615, "grad_norm": 0.37099260091781616, "learning_rate": 1.058561604248959e-05, "loss": 0.4535, "step": 45445 }, { "epoch": 0.9638395792241946, "grad_norm": 0.3637528419494629, "learning_rate": 1.058528311918555e-05, "loss": 0.4778, "step": 45446 }, { "epoch": 0.9638607876821277, "grad_norm": 0.4049035608768463, "learning_rate": 1.0584950195230563e-05, "loss": 0.5909, "step": 45447 }, { "epoch": 0.9638819961400606, "grad_norm": 0.4050726592540741, "learning_rate": 1.0584617270625002e-05, "loss": 0.5277, "step": 45448 }, { "epoch": 0.9639032045979937, "grad_norm": 0.41552263498306274, "learning_rate": 1.0584284345369232e-05, "loss": 0.4821, "step": 45449 }, { "epoch": 0.9639244130559267, "grad_norm": 0.3871099352836609, "learning_rate": 1.0583951419463628e-05, "loss": 0.5055, "step": 45450 }, { "epoch": 0.9639456215138598, "grad_norm": 0.4068908989429474, "learning_rate": 1.0583618492908556e-05, "loss": 0.4998, "step": 45451 }, { "epoch": 0.9639668299717927, "grad_norm": 0.35633641481399536, "learning_rate": 1.0583285565704391e-05, "loss": 0.5102, "step": 45452 }, { "epoch": 0.9639880384297258, "grad_norm": 0.41787517070770264, "learning_rate": 1.05829526378515e-05, "loss": 0.5318, "step": 45453 }, { "epoch": 0.9640092468876588, "grad_norm": 0.3483433425426483, "learning_rate": 1.0582619709350255e-05, "loss": 0.5579, "step": 45454 }, { "epoch": 0.9640304553455918, "grad_norm": 0.4195197522640228, "learning_rate": 1.0582286780201023e-05, "loss": 0.5977, "step": 45455 }, { "epoch": 0.9640516638035248, "grad_norm": 0.3226889669895172, "learning_rate": 1.0581953850404178e-05, "loss": 0.4897, "step": 45456 }, { "epoch": 0.9640728722614579, "grad_norm": 0.39722925424575806, "learning_rate": 1.0581620919960089e-05, "loss": 0.4766, "step": 45457 }, { "epoch": 0.9640940807193908, "grad_norm": 0.3388851284980774, "learning_rate": 1.0581287988869127e-05, "loss": 0.4399, "step": 45458 }, { "epoch": 0.9641152891773239, "grad_norm": 0.324103444814682, "learning_rate": 1.0580955057131664e-05, "loss": 0.4776, "step": 45459 }, { "epoch": 0.964136497635257, "grad_norm": 0.40437328815460205, "learning_rate": 1.0580622124748062e-05, "loss": 0.5546, "step": 45460 }, { "epoch": 0.96415770609319, "grad_norm": 1.2044777870178223, "learning_rate": 1.0580289191718699e-05, "loss": 0.4353, "step": 45461 }, { "epoch": 0.964178914551123, "grad_norm": 0.33769872784614563, "learning_rate": 1.0579956258043946e-05, "loss": 0.4577, "step": 45462 }, { "epoch": 0.964200123009056, "grad_norm": 0.3483717739582062, "learning_rate": 1.0579623323724167e-05, "loss": 0.4349, "step": 45463 }, { "epoch": 0.9642213314669891, "grad_norm": 0.41064873337745667, "learning_rate": 1.057929038875974e-05, "loss": 0.5208, "step": 45464 }, { "epoch": 0.964242539924922, "grad_norm": 0.38718369603157043, "learning_rate": 1.0578957453151028e-05, "loss": 0.4801, "step": 45465 }, { "epoch": 0.9642637483828551, "grad_norm": 0.36499789357185364, "learning_rate": 1.0578624516898407e-05, "loss": 0.5406, "step": 45466 }, { "epoch": 0.9642849568407881, "grad_norm": 0.4164572060108185, "learning_rate": 1.0578291580002245e-05, "loss": 0.5168, "step": 45467 }, { "epoch": 0.9643061652987212, "grad_norm": 0.3613090217113495, "learning_rate": 1.057795864246291e-05, "loss": 0.4817, "step": 45468 }, { "epoch": 0.9643273737566541, "grad_norm": 0.383338987827301, "learning_rate": 1.0577625704280774e-05, "loss": 0.4835, "step": 45469 }, { "epoch": 0.9643485822145872, "grad_norm": 0.3741223216056824, "learning_rate": 1.0577292765456208e-05, "loss": 0.533, "step": 45470 }, { "epoch": 0.9643697906725202, "grad_norm": 0.3511117100715637, "learning_rate": 1.0576959825989583e-05, "loss": 0.4639, "step": 45471 }, { "epoch": 0.9643909991304532, "grad_norm": 0.40252041816711426, "learning_rate": 1.0576626885881269e-05, "loss": 0.514, "step": 45472 }, { "epoch": 0.9644122075883862, "grad_norm": 0.36141398549079895, "learning_rate": 1.0576293945131638e-05, "loss": 0.4744, "step": 45473 }, { "epoch": 0.9644334160463193, "grad_norm": 0.41142305731773376, "learning_rate": 1.0575961003741048e-05, "loss": 0.5494, "step": 45474 }, { "epoch": 0.9644546245042523, "grad_norm": 0.4301993250846863, "learning_rate": 1.0575628061709888e-05, "loss": 0.4444, "step": 45475 }, { "epoch": 0.9644758329621853, "grad_norm": 0.3385717570781708, "learning_rate": 1.057529511903852e-05, "loss": 0.4261, "step": 45476 }, { "epoch": 0.9644970414201184, "grad_norm": 0.36407822370529175, "learning_rate": 1.0574962175727311e-05, "loss": 0.3969, "step": 45477 }, { "epoch": 0.9645182498780513, "grad_norm": 0.39579883217811584, "learning_rate": 1.0574629231776634e-05, "loss": 0.4836, "step": 45478 }, { "epoch": 0.9645394583359844, "grad_norm": 0.3835009038448334, "learning_rate": 1.0574296287186858e-05, "loss": 0.5238, "step": 45479 }, { "epoch": 0.9645606667939174, "grad_norm": 0.3755198121070862, "learning_rate": 1.0573963341958356e-05, "loss": 0.4779, "step": 45480 }, { "epoch": 0.9645818752518505, "grad_norm": 0.34994786977767944, "learning_rate": 1.0573630396091497e-05, "loss": 0.4386, "step": 45481 }, { "epoch": 0.9646030837097834, "grad_norm": 0.37360653281211853, "learning_rate": 1.0573297449586653e-05, "loss": 0.4301, "step": 45482 }, { "epoch": 0.9646242921677165, "grad_norm": 0.33829784393310547, "learning_rate": 1.057296450244419e-05, "loss": 0.4335, "step": 45483 }, { "epoch": 0.9646455006256495, "grad_norm": 0.3802824020385742, "learning_rate": 1.0572631554664483e-05, "loss": 0.4304, "step": 45484 }, { "epoch": 0.9646667090835825, "grad_norm": 0.3997367024421692, "learning_rate": 1.0572298606247898e-05, "loss": 0.5423, "step": 45485 }, { "epoch": 0.9646879175415155, "grad_norm": 0.3428870439529419, "learning_rate": 1.0571965657194809e-05, "loss": 0.4201, "step": 45486 }, { "epoch": 0.9647091259994486, "grad_norm": 0.39105597138404846, "learning_rate": 1.0571632707505584e-05, "loss": 0.4775, "step": 45487 }, { "epoch": 0.9647303344573817, "grad_norm": 0.4239129424095154, "learning_rate": 1.0571299757180592e-05, "loss": 0.4576, "step": 45488 }, { "epoch": 0.9647515429153146, "grad_norm": 0.3925064504146576, "learning_rate": 1.057096680622021e-05, "loss": 0.5453, "step": 45489 }, { "epoch": 0.9647727513732477, "grad_norm": 0.3673352599143982, "learning_rate": 1.05706338546248e-05, "loss": 0.5004, "step": 45490 }, { "epoch": 0.9647939598311807, "grad_norm": 0.3836401104927063, "learning_rate": 1.0570300902394737e-05, "loss": 0.4896, "step": 45491 }, { "epoch": 0.9648151682891137, "grad_norm": 0.38136497139930725, "learning_rate": 1.0569967949530394e-05, "loss": 0.5215, "step": 45492 }, { "epoch": 0.9648363767470467, "grad_norm": 0.3917742073535919, "learning_rate": 1.0569634996032132e-05, "loss": 0.5145, "step": 45493 }, { "epoch": 0.9648575852049798, "grad_norm": 0.3426001965999603, "learning_rate": 1.0569302041900327e-05, "loss": 0.4219, "step": 45494 }, { "epoch": 0.9648787936629127, "grad_norm": 0.42812612652778625, "learning_rate": 1.0568969087135352e-05, "loss": 0.5554, "step": 45495 }, { "epoch": 0.9649000021208458, "grad_norm": 0.3650076687335968, "learning_rate": 1.0568636131737575e-05, "loss": 0.4431, "step": 45496 }, { "epoch": 0.9649212105787788, "grad_norm": 0.31704869866371155, "learning_rate": 1.0568303175707364e-05, "loss": 0.4534, "step": 45497 }, { "epoch": 0.9649424190367119, "grad_norm": 0.3810308277606964, "learning_rate": 1.0567970219045094e-05, "loss": 0.4936, "step": 45498 }, { "epoch": 0.9649636274946448, "grad_norm": 0.38800284266471863, "learning_rate": 1.0567637261751129e-05, "loss": 0.5069, "step": 45499 }, { "epoch": 0.9649848359525779, "grad_norm": 0.3373672068119049, "learning_rate": 1.0567304303825843e-05, "loss": 0.4933, "step": 45500 }, { "epoch": 0.965006044410511, "grad_norm": 0.5125625133514404, "learning_rate": 1.0566971345269608e-05, "loss": 0.4184, "step": 45501 }, { "epoch": 0.9650272528684439, "grad_norm": 0.347700834274292, "learning_rate": 1.056663838608279e-05, "loss": 0.4602, "step": 45502 }, { "epoch": 0.965048461326377, "grad_norm": 0.3770483732223511, "learning_rate": 1.0566305426265764e-05, "loss": 0.4695, "step": 45503 }, { "epoch": 0.96506966978431, "grad_norm": 0.37207192182540894, "learning_rate": 1.0565972465818898e-05, "loss": 0.4939, "step": 45504 }, { "epoch": 0.965090878242243, "grad_norm": 0.36387842893600464, "learning_rate": 1.0565639504742562e-05, "loss": 0.4344, "step": 45505 }, { "epoch": 0.965112086700176, "grad_norm": 0.39376765489578247, "learning_rate": 1.0565306543037124e-05, "loss": 0.507, "step": 45506 }, { "epoch": 0.9651332951581091, "grad_norm": 0.3739680051803589, "learning_rate": 1.0564973580702962e-05, "loss": 0.4623, "step": 45507 }, { "epoch": 0.965154503616042, "grad_norm": 0.7406128644943237, "learning_rate": 1.0564640617740435e-05, "loss": 0.4429, "step": 45508 }, { "epoch": 0.9651757120739751, "grad_norm": 0.34898272156715393, "learning_rate": 1.0564307654149926e-05, "loss": 0.4886, "step": 45509 }, { "epoch": 0.9651969205319081, "grad_norm": 0.35404935479164124, "learning_rate": 1.0563974689931798e-05, "loss": 0.4586, "step": 45510 }, { "epoch": 0.9652181289898412, "grad_norm": 0.3648732602596283, "learning_rate": 1.0563641725086417e-05, "loss": 0.4897, "step": 45511 }, { "epoch": 0.9652393374477741, "grad_norm": 0.3775700330734253, "learning_rate": 1.0563308759614164e-05, "loss": 0.5279, "step": 45512 }, { "epoch": 0.9652605459057072, "grad_norm": 0.34542495012283325, "learning_rate": 1.05629757935154e-05, "loss": 0.4524, "step": 45513 }, { "epoch": 0.9652817543636402, "grad_norm": 0.5214968323707581, "learning_rate": 1.05626428267905e-05, "loss": 0.4322, "step": 45514 }, { "epoch": 0.9653029628215732, "grad_norm": 0.36736738681793213, "learning_rate": 1.0562309859439837e-05, "loss": 0.4838, "step": 45515 }, { "epoch": 0.9653241712795063, "grad_norm": 0.341098815202713, "learning_rate": 1.0561976891463775e-05, "loss": 0.4751, "step": 45516 }, { "epoch": 0.9653453797374393, "grad_norm": 0.35633546113967896, "learning_rate": 1.0561643922862687e-05, "loss": 0.4607, "step": 45517 }, { "epoch": 0.9653665881953724, "grad_norm": 0.36141446232795715, "learning_rate": 1.0561310953636945e-05, "loss": 0.5054, "step": 45518 }, { "epoch": 0.9653877966533053, "grad_norm": 0.35812994837760925, "learning_rate": 1.0560977983786913e-05, "loss": 0.4725, "step": 45519 }, { "epoch": 0.9654090051112384, "grad_norm": 0.34275251626968384, "learning_rate": 1.0560645013312971e-05, "loss": 0.5145, "step": 45520 }, { "epoch": 0.9654302135691714, "grad_norm": 0.43101176619529724, "learning_rate": 1.0560312042215486e-05, "loss": 0.5226, "step": 45521 }, { "epoch": 0.9654514220271044, "grad_norm": 0.3581036627292633, "learning_rate": 1.0559979070494823e-05, "loss": 0.4271, "step": 45522 }, { "epoch": 0.9654726304850374, "grad_norm": 0.39891496300697327, "learning_rate": 1.055964609815136e-05, "loss": 0.5554, "step": 45523 }, { "epoch": 0.9654938389429705, "grad_norm": 0.3925722539424896, "learning_rate": 1.055931312518546e-05, "loss": 0.4637, "step": 45524 }, { "epoch": 0.9655150474009034, "grad_norm": 0.3910709619522095, "learning_rate": 1.0558980151597495e-05, "loss": 0.4536, "step": 45525 }, { "epoch": 0.9655362558588365, "grad_norm": 0.45348697900772095, "learning_rate": 1.0558647177387842e-05, "loss": 0.4564, "step": 45526 }, { "epoch": 0.9655574643167695, "grad_norm": 0.37939879298210144, "learning_rate": 1.0558314202556866e-05, "loss": 0.5297, "step": 45527 }, { "epoch": 0.9655786727747026, "grad_norm": 0.4630197286605835, "learning_rate": 1.0557981227104935e-05, "loss": 0.4986, "step": 45528 }, { "epoch": 0.9655998812326356, "grad_norm": 0.3573450446128845, "learning_rate": 1.0557648251032424e-05, "loss": 0.4562, "step": 45529 }, { "epoch": 0.9656210896905686, "grad_norm": 0.409185528755188, "learning_rate": 1.0557315274339698e-05, "loss": 0.5793, "step": 45530 }, { "epoch": 0.9656422981485017, "grad_norm": 0.3902926445007324, "learning_rate": 1.0556982297027136e-05, "loss": 0.5353, "step": 45531 }, { "epoch": 0.9656635066064346, "grad_norm": 0.756614625453949, "learning_rate": 1.0556649319095098e-05, "loss": 0.5786, "step": 45532 }, { "epoch": 0.9656847150643677, "grad_norm": 0.3680424690246582, "learning_rate": 1.0556316340543962e-05, "loss": 0.4873, "step": 45533 }, { "epoch": 0.9657059235223007, "grad_norm": 0.43769583106040955, "learning_rate": 1.0555983361374098e-05, "loss": 0.5137, "step": 45534 }, { "epoch": 0.9657271319802337, "grad_norm": 0.326021283864975, "learning_rate": 1.055565038158587e-05, "loss": 0.4339, "step": 45535 }, { "epoch": 0.9657483404381667, "grad_norm": 0.3894062638282776, "learning_rate": 1.0555317401179656e-05, "loss": 0.4905, "step": 45536 }, { "epoch": 0.9657695488960998, "grad_norm": 1.0037866830825806, "learning_rate": 1.0554984420155822e-05, "loss": 0.5117, "step": 45537 }, { "epoch": 0.9657907573540327, "grad_norm": 0.36785224080085754, "learning_rate": 1.0554651438514739e-05, "loss": 0.5466, "step": 45538 }, { "epoch": 0.9658119658119658, "grad_norm": 0.34311607480049133, "learning_rate": 1.0554318456256773e-05, "loss": 0.4903, "step": 45539 }, { "epoch": 0.9658331742698988, "grad_norm": 0.3634747564792633, "learning_rate": 1.0553985473382304e-05, "loss": 0.5103, "step": 45540 }, { "epoch": 0.9658543827278319, "grad_norm": 0.35345974564552307, "learning_rate": 1.0553652489891699e-05, "loss": 0.495, "step": 45541 }, { "epoch": 0.9658755911857649, "grad_norm": 0.3494979441165924, "learning_rate": 1.0553319505785323e-05, "loss": 0.5138, "step": 45542 }, { "epoch": 0.9658967996436979, "grad_norm": 0.3865531086921692, "learning_rate": 1.0552986521063551e-05, "loss": 0.4728, "step": 45543 }, { "epoch": 0.965918008101631, "grad_norm": 0.3697926998138428, "learning_rate": 1.055265353572675e-05, "loss": 0.5136, "step": 45544 }, { "epoch": 0.9659392165595639, "grad_norm": 0.3572513461112976, "learning_rate": 1.0552320549775293e-05, "loss": 0.4852, "step": 45545 }, { "epoch": 0.965960425017497, "grad_norm": 0.36916467547416687, "learning_rate": 1.0551987563209552e-05, "loss": 0.4448, "step": 45546 }, { "epoch": 0.96598163347543, "grad_norm": 0.6226433515548706, "learning_rate": 1.0551654576029892e-05, "loss": 0.5407, "step": 45547 }, { "epoch": 0.966002841933363, "grad_norm": 0.3959551155567169, "learning_rate": 1.0551321588236692e-05, "loss": 0.4974, "step": 45548 }, { "epoch": 0.966024050391296, "grad_norm": 0.3852112293243408, "learning_rate": 1.0550988599830313e-05, "loss": 0.4816, "step": 45549 }, { "epoch": 0.9660452588492291, "grad_norm": 0.3932783007621765, "learning_rate": 1.0550655610811129e-05, "loss": 0.5177, "step": 45550 }, { "epoch": 0.9660664673071621, "grad_norm": 0.3251875042915344, "learning_rate": 1.0550322621179511e-05, "loss": 0.4617, "step": 45551 }, { "epoch": 0.9660876757650951, "grad_norm": 0.37424254417419434, "learning_rate": 1.054998963093583e-05, "loss": 0.4171, "step": 45552 }, { "epoch": 0.9661088842230281, "grad_norm": 0.32084497809410095, "learning_rate": 1.0549656640080455e-05, "loss": 0.4062, "step": 45553 }, { "epoch": 0.9661300926809612, "grad_norm": 0.4023998975753784, "learning_rate": 1.0549323648613757e-05, "loss": 0.4252, "step": 45554 }, { "epoch": 0.9661513011388941, "grad_norm": 0.3212634027004242, "learning_rate": 1.0548990656536106e-05, "loss": 0.53, "step": 45555 }, { "epoch": 0.9661725095968272, "grad_norm": 0.39716577529907227, "learning_rate": 1.0548657663847873e-05, "loss": 0.4992, "step": 45556 }, { "epoch": 0.9661937180547603, "grad_norm": 0.4084762632846832, "learning_rate": 1.0548324670549424e-05, "loss": 0.53, "step": 45557 }, { "epoch": 0.9662149265126933, "grad_norm": 0.3364500105381012, "learning_rate": 1.0547991676641137e-05, "loss": 0.4204, "step": 45558 }, { "epoch": 0.9662361349706263, "grad_norm": 0.3609371781349182, "learning_rate": 1.0547658682123377e-05, "loss": 0.4923, "step": 45559 }, { "epoch": 0.9662573434285593, "grad_norm": 0.36851775646209717, "learning_rate": 1.0547325686996514e-05, "loss": 0.51, "step": 45560 }, { "epoch": 0.9662785518864924, "grad_norm": 0.3452568054199219, "learning_rate": 1.0546992691260922e-05, "loss": 0.4814, "step": 45561 }, { "epoch": 0.9662997603444253, "grad_norm": 0.40974247455596924, "learning_rate": 1.0546659694916971e-05, "loss": 0.5912, "step": 45562 }, { "epoch": 0.9663209688023584, "grad_norm": 0.9040464758872986, "learning_rate": 1.054632669796503e-05, "loss": 0.5225, "step": 45563 }, { "epoch": 0.9663421772602914, "grad_norm": 0.3653562366962433, "learning_rate": 1.0545993700405465e-05, "loss": 0.4407, "step": 45564 }, { "epoch": 0.9663633857182244, "grad_norm": 0.35850587487220764, "learning_rate": 1.0545660702238652e-05, "loss": 0.4232, "step": 45565 }, { "epoch": 0.9663845941761574, "grad_norm": 0.37607622146606445, "learning_rate": 1.0545327703464963e-05, "loss": 0.537, "step": 45566 }, { "epoch": 0.9664058026340905, "grad_norm": 0.34883663058280945, "learning_rate": 1.0544994704084762e-05, "loss": 0.5013, "step": 45567 }, { "epoch": 0.9664270110920234, "grad_norm": 0.3277026116847992, "learning_rate": 1.0544661704098425e-05, "loss": 0.4496, "step": 45568 }, { "epoch": 0.9664482195499565, "grad_norm": 0.37352702021598816, "learning_rate": 1.054432870350632e-05, "loss": 0.547, "step": 45569 }, { "epoch": 0.9664694280078896, "grad_norm": 0.3523080050945282, "learning_rate": 1.054399570230881e-05, "loss": 0.4656, "step": 45570 }, { "epoch": 0.9664906364658226, "grad_norm": 0.518012285232544, "learning_rate": 1.0543662700506283e-05, "loss": 0.4359, "step": 45571 }, { "epoch": 0.9665118449237556, "grad_norm": 0.37794843316078186, "learning_rate": 1.0543329698099096e-05, "loss": 0.4723, "step": 45572 }, { "epoch": 0.9665330533816886, "grad_norm": 0.38719603419303894, "learning_rate": 1.054299669508762e-05, "loss": 0.4545, "step": 45573 }, { "epoch": 0.9665542618396217, "grad_norm": 0.49939805269241333, "learning_rate": 1.054266369147223e-05, "loss": 0.4451, "step": 45574 }, { "epoch": 0.9665754702975546, "grad_norm": 0.36871635913848877, "learning_rate": 1.0542330687253293e-05, "loss": 0.4595, "step": 45575 }, { "epoch": 0.9665966787554877, "grad_norm": 0.390210896730423, "learning_rate": 1.0541997682431183e-05, "loss": 0.5506, "step": 45576 }, { "epoch": 0.9666178872134207, "grad_norm": 0.34258970618247986, "learning_rate": 1.0541664677006263e-05, "loss": 0.5027, "step": 45577 }, { "epoch": 0.9666390956713538, "grad_norm": 0.39518511295318604, "learning_rate": 1.0541331670978911e-05, "loss": 0.4531, "step": 45578 }, { "epoch": 0.9666603041292867, "grad_norm": 0.33651602268218994, "learning_rate": 1.0540998664349493e-05, "loss": 0.4289, "step": 45579 }, { "epoch": 0.9666815125872198, "grad_norm": 0.4770011305809021, "learning_rate": 1.0540665657118386e-05, "loss": 0.568, "step": 45580 }, { "epoch": 0.9667027210451528, "grad_norm": 0.44838500022888184, "learning_rate": 1.0540332649285949e-05, "loss": 0.4443, "step": 45581 }, { "epoch": 0.9667239295030858, "grad_norm": 0.3819064795970917, "learning_rate": 1.0539999640852563e-05, "loss": 0.4946, "step": 45582 }, { "epoch": 0.9667451379610189, "grad_norm": 0.35663124918937683, "learning_rate": 1.0539666631818593e-05, "loss": 0.4737, "step": 45583 }, { "epoch": 0.9667663464189519, "grad_norm": 0.35259461402893066, "learning_rate": 1.0539333622184409e-05, "loss": 0.4595, "step": 45584 }, { "epoch": 0.966787554876885, "grad_norm": 0.3465222418308258, "learning_rate": 1.0539000611950385e-05, "loss": 0.416, "step": 45585 }, { "epoch": 0.9668087633348179, "grad_norm": 0.46327489614486694, "learning_rate": 1.0538667601116888e-05, "loss": 0.4818, "step": 45586 }, { "epoch": 0.966829971792751, "grad_norm": 0.43533408641815186, "learning_rate": 1.0538334589684289e-05, "loss": 0.4506, "step": 45587 }, { "epoch": 0.966851180250684, "grad_norm": 0.3144274055957794, "learning_rate": 1.053800157765296e-05, "loss": 0.4847, "step": 45588 }, { "epoch": 0.966872388708617, "grad_norm": 0.38744592666625977, "learning_rate": 1.053766856502327e-05, "loss": 0.4774, "step": 45589 }, { "epoch": 0.96689359716655, "grad_norm": 0.6016002893447876, "learning_rate": 1.0537335551795589e-05, "loss": 0.4849, "step": 45590 }, { "epoch": 0.9669148056244831, "grad_norm": 1.0485520362854004, "learning_rate": 1.053700253797029e-05, "loss": 0.4647, "step": 45591 }, { "epoch": 0.966936014082416, "grad_norm": 0.4052964150905609, "learning_rate": 1.053666952354774e-05, "loss": 0.5093, "step": 45592 }, { "epoch": 0.9669572225403491, "grad_norm": 0.3691859245300293, "learning_rate": 1.0536336508528312e-05, "loss": 0.4698, "step": 45593 }, { "epoch": 0.9669784309982821, "grad_norm": 0.3601536750793457, "learning_rate": 1.0536003492912374e-05, "loss": 0.5044, "step": 45594 }, { "epoch": 0.9669996394562151, "grad_norm": 0.3846772015094757, "learning_rate": 1.0535670476700298e-05, "loss": 0.4396, "step": 45595 }, { "epoch": 0.9670208479141481, "grad_norm": 0.36779099702835083, "learning_rate": 1.0535337459892451e-05, "loss": 0.4334, "step": 45596 }, { "epoch": 0.9670420563720812, "grad_norm": 0.41789770126342773, "learning_rate": 1.0535004442489211e-05, "loss": 0.6139, "step": 45597 }, { "epoch": 0.9670632648300143, "grad_norm": 0.3266197741031647, "learning_rate": 1.0534671424490941e-05, "loss": 0.3855, "step": 45598 }, { "epoch": 0.9670844732879472, "grad_norm": 0.33721333742141724, "learning_rate": 1.0534338405898015e-05, "loss": 0.4881, "step": 45599 }, { "epoch": 0.9671056817458803, "grad_norm": 0.381102979183197, "learning_rate": 1.0534005386710803e-05, "loss": 0.4536, "step": 45600 }, { "epoch": 0.9671268902038133, "grad_norm": 0.37355026602745056, "learning_rate": 1.0533672366929674e-05, "loss": 0.4556, "step": 45601 }, { "epoch": 0.9671480986617463, "grad_norm": 0.3487328290939331, "learning_rate": 1.0533339346554999e-05, "loss": 0.4907, "step": 45602 }, { "epoch": 0.9671693071196793, "grad_norm": 0.34230518341064453, "learning_rate": 1.053300632558715e-05, "loss": 0.4658, "step": 45603 }, { "epoch": 0.9671905155776124, "grad_norm": 0.35564693808555603, "learning_rate": 1.0532673304026492e-05, "loss": 0.5141, "step": 45604 }, { "epoch": 0.9672117240355453, "grad_norm": 0.3579171299934387, "learning_rate": 1.0532340281873405e-05, "loss": 0.4755, "step": 45605 }, { "epoch": 0.9672329324934784, "grad_norm": 0.3435042202472687, "learning_rate": 1.0532007259128252e-05, "loss": 0.4957, "step": 45606 }, { "epoch": 0.9672541409514114, "grad_norm": 0.3523634970188141, "learning_rate": 1.0531674235791404e-05, "loss": 0.468, "step": 45607 }, { "epoch": 0.9672753494093445, "grad_norm": 0.45293518900871277, "learning_rate": 1.0531341211863232e-05, "loss": 0.5538, "step": 45608 }, { "epoch": 0.9672965578672774, "grad_norm": 0.3963190019130707, "learning_rate": 1.053100818734411e-05, "loss": 0.4888, "step": 45609 }, { "epoch": 0.9673177663252105, "grad_norm": 0.3414066433906555, "learning_rate": 1.0530675162234401e-05, "loss": 0.5257, "step": 45610 }, { "epoch": 0.9673389747831436, "grad_norm": 0.5217711329460144, "learning_rate": 1.0530342136534482e-05, "loss": 0.5563, "step": 45611 }, { "epoch": 0.9673601832410765, "grad_norm": 0.3458532691001892, "learning_rate": 1.0530009110244722e-05, "loss": 0.4609, "step": 45612 }, { "epoch": 0.9673813916990096, "grad_norm": 0.32577234506607056, "learning_rate": 1.0529676083365487e-05, "loss": 0.4276, "step": 45613 }, { "epoch": 0.9674026001569426, "grad_norm": 0.6056104898452759, "learning_rate": 1.0529343055897156e-05, "loss": 0.4581, "step": 45614 }, { "epoch": 0.9674238086148756, "grad_norm": 0.32254093885421753, "learning_rate": 1.0529010027840087e-05, "loss": 0.4417, "step": 45615 }, { "epoch": 0.9674450170728086, "grad_norm": 0.39583083987236023, "learning_rate": 1.0528676999194665e-05, "loss": 0.4978, "step": 45616 }, { "epoch": 0.9674662255307417, "grad_norm": 0.3692883551120758, "learning_rate": 1.052834396996125e-05, "loss": 0.4559, "step": 45617 }, { "epoch": 0.9674874339886746, "grad_norm": 0.39129406213760376, "learning_rate": 1.0528010940140214e-05, "loss": 0.4936, "step": 45618 }, { "epoch": 0.9675086424466077, "grad_norm": 0.381429523229599, "learning_rate": 1.0527677909731933e-05, "loss": 0.4254, "step": 45619 }, { "epoch": 0.9675298509045407, "grad_norm": 0.3372041583061218, "learning_rate": 1.052734487873677e-05, "loss": 0.4709, "step": 45620 }, { "epoch": 0.9675510593624738, "grad_norm": 0.41318172216415405, "learning_rate": 1.0527011847155099e-05, "loss": 0.4552, "step": 45621 }, { "epoch": 0.9675722678204067, "grad_norm": 0.36534377932548523, "learning_rate": 1.052667881498729e-05, "loss": 0.4849, "step": 45622 }, { "epoch": 0.9675934762783398, "grad_norm": 0.3634743094444275, "learning_rate": 1.0526345782233713e-05, "loss": 0.5037, "step": 45623 }, { "epoch": 0.9676146847362729, "grad_norm": 0.36843323707580566, "learning_rate": 1.0526012748894741e-05, "loss": 0.445, "step": 45624 }, { "epoch": 0.9676358931942058, "grad_norm": 0.3256564736366272, "learning_rate": 1.052567971497074e-05, "loss": 0.4585, "step": 45625 }, { "epoch": 0.9676571016521389, "grad_norm": 0.4171292781829834, "learning_rate": 1.0525346680462083e-05, "loss": 0.5217, "step": 45626 }, { "epoch": 0.9676783101100719, "grad_norm": 0.3792767822742462, "learning_rate": 1.052501364536914e-05, "loss": 0.4916, "step": 45627 }, { "epoch": 0.967699518568005, "grad_norm": 0.4180530607700348, "learning_rate": 1.0524680609692282e-05, "loss": 0.5701, "step": 45628 }, { "epoch": 0.9677207270259379, "grad_norm": 0.37369123101234436, "learning_rate": 1.0524347573431877e-05, "loss": 0.5205, "step": 45629 }, { "epoch": 0.967741935483871, "grad_norm": 0.39674270153045654, "learning_rate": 1.0524014536588299e-05, "loss": 0.4651, "step": 45630 }, { "epoch": 0.967763143941804, "grad_norm": 0.3965931236743927, "learning_rate": 1.0523681499161917e-05, "loss": 0.5054, "step": 45631 }, { "epoch": 0.967784352399737, "grad_norm": 0.39278796315193176, "learning_rate": 1.05233484611531e-05, "loss": 0.4772, "step": 45632 }, { "epoch": 0.96780556085767, "grad_norm": 0.35414519906044006, "learning_rate": 1.052301542256222e-05, "loss": 0.5439, "step": 45633 }, { "epoch": 0.9678267693156031, "grad_norm": 0.40935322642326355, "learning_rate": 1.0522682383389644e-05, "loss": 0.4834, "step": 45634 }, { "epoch": 0.967847977773536, "grad_norm": 0.34736964106559753, "learning_rate": 1.0522349343635747e-05, "loss": 0.4806, "step": 45635 }, { "epoch": 0.9678691862314691, "grad_norm": 0.3555474579334259, "learning_rate": 1.05220163033009e-05, "loss": 0.4865, "step": 45636 }, { "epoch": 0.9678903946894021, "grad_norm": 0.3731232285499573, "learning_rate": 1.0521683262385469e-05, "loss": 0.4921, "step": 45637 }, { "epoch": 0.9679116031473352, "grad_norm": 0.38206198811531067, "learning_rate": 1.0521350220889825e-05, "loss": 0.5138, "step": 45638 }, { "epoch": 0.9679328116052682, "grad_norm": 0.5461351275444031, "learning_rate": 1.0521017178814343e-05, "loss": 0.4741, "step": 45639 }, { "epoch": 0.9679540200632012, "grad_norm": 0.34970027208328247, "learning_rate": 1.0520684136159386e-05, "loss": 0.5211, "step": 45640 }, { "epoch": 0.9679752285211343, "grad_norm": 0.5712822675704956, "learning_rate": 1.0520351092925331e-05, "loss": 0.4428, "step": 45641 }, { "epoch": 0.9679964369790672, "grad_norm": 0.3558635115623474, "learning_rate": 1.0520018049112548e-05, "loss": 0.5095, "step": 45642 }, { "epoch": 0.9680176454370003, "grad_norm": 0.44432446360588074, "learning_rate": 1.0519685004721403e-05, "loss": 0.4946, "step": 45643 }, { "epoch": 0.9680388538949333, "grad_norm": 0.34060290455818176, "learning_rate": 1.051935195975227e-05, "loss": 0.4258, "step": 45644 }, { "epoch": 0.9680600623528663, "grad_norm": 0.37908822298049927, "learning_rate": 1.051901891420552e-05, "loss": 0.5301, "step": 45645 }, { "epoch": 0.9680812708107993, "grad_norm": 0.3268999457359314, "learning_rate": 1.0518685868081518e-05, "loss": 0.4289, "step": 45646 }, { "epoch": 0.9681024792687324, "grad_norm": 0.37827053666114807, "learning_rate": 1.0518352821380637e-05, "loss": 0.5101, "step": 45647 }, { "epoch": 0.9681236877266653, "grad_norm": 0.7389293313026428, "learning_rate": 1.0518019774103251e-05, "loss": 0.5123, "step": 45648 }, { "epoch": 0.9681448961845984, "grad_norm": 0.415296733379364, "learning_rate": 1.051768672624973e-05, "loss": 0.5212, "step": 45649 }, { "epoch": 0.9681661046425314, "grad_norm": 0.42371854186058044, "learning_rate": 1.0517353677820439e-05, "loss": 0.4432, "step": 45650 }, { "epoch": 0.9681873131004645, "grad_norm": 0.3816770613193512, "learning_rate": 1.0517020628815755e-05, "loss": 0.5391, "step": 45651 }, { "epoch": 0.9682085215583975, "grad_norm": 0.38710713386535645, "learning_rate": 1.0516687579236042e-05, "loss": 0.5428, "step": 45652 }, { "epoch": 0.9682297300163305, "grad_norm": 0.36662545800209045, "learning_rate": 1.0516354529081675e-05, "loss": 0.4209, "step": 45653 }, { "epoch": 0.9682509384742636, "grad_norm": 0.6622318029403687, "learning_rate": 1.0516021478353022e-05, "loss": 0.5138, "step": 45654 }, { "epoch": 0.9682721469321965, "grad_norm": 0.3731904625892639, "learning_rate": 1.0515688427050455e-05, "loss": 0.5539, "step": 45655 }, { "epoch": 0.9682933553901296, "grad_norm": 0.39261019229888916, "learning_rate": 1.0515355375174343e-05, "loss": 0.4949, "step": 45656 }, { "epoch": 0.9683145638480626, "grad_norm": 0.43125617504119873, "learning_rate": 1.0515022322725059e-05, "loss": 0.51, "step": 45657 }, { "epoch": 0.9683357723059957, "grad_norm": 0.3569934666156769, "learning_rate": 1.051468926970297e-05, "loss": 0.4215, "step": 45658 }, { "epoch": 0.9683569807639286, "grad_norm": 0.37897157669067383, "learning_rate": 1.051435621610845e-05, "loss": 0.4623, "step": 45659 }, { "epoch": 0.9683781892218617, "grad_norm": 0.37258321046829224, "learning_rate": 1.0514023161941863e-05, "loss": 0.5047, "step": 45660 }, { "epoch": 0.9683993976797947, "grad_norm": 0.3590218722820282, "learning_rate": 1.051369010720359e-05, "loss": 0.4984, "step": 45661 }, { "epoch": 0.9684206061377277, "grad_norm": 0.3725093603134155, "learning_rate": 1.0513357051893993e-05, "loss": 0.514, "step": 45662 }, { "epoch": 0.9684418145956607, "grad_norm": 0.3426489233970642, "learning_rate": 1.0513023996013443e-05, "loss": 0.5319, "step": 45663 }, { "epoch": 0.9684630230535938, "grad_norm": 0.36706262826919556, "learning_rate": 1.0512690939562313e-05, "loss": 0.5284, "step": 45664 }, { "epoch": 0.9684842315115268, "grad_norm": 0.3881368637084961, "learning_rate": 1.0512357882540975e-05, "loss": 0.426, "step": 45665 }, { "epoch": 0.9685054399694598, "grad_norm": 0.34785428643226624, "learning_rate": 1.0512024824949791e-05, "loss": 0.4487, "step": 45666 }, { "epoch": 0.9685266484273929, "grad_norm": 0.3748268187046051, "learning_rate": 1.0511691766789142e-05, "loss": 0.5502, "step": 45667 }, { "epoch": 0.9685478568853259, "grad_norm": 0.4298781752586365, "learning_rate": 1.0511358708059395e-05, "loss": 0.5151, "step": 45668 }, { "epoch": 0.9685690653432589, "grad_norm": 0.4139798581600189, "learning_rate": 1.0511025648760918e-05, "loss": 0.509, "step": 45669 }, { "epoch": 0.9685902738011919, "grad_norm": 0.38474276661872864, "learning_rate": 1.0510692588894082e-05, "loss": 0.4965, "step": 45670 }, { "epoch": 0.968611482259125, "grad_norm": 0.37785759568214417, "learning_rate": 1.0510359528459258e-05, "loss": 0.3502, "step": 45671 }, { "epoch": 0.9686326907170579, "grad_norm": 0.3714796006679535, "learning_rate": 1.0510026467456818e-05, "loss": 0.4879, "step": 45672 }, { "epoch": 0.968653899174991, "grad_norm": 0.3834417462348938, "learning_rate": 1.0509693405887129e-05, "loss": 0.4878, "step": 45673 }, { "epoch": 0.968675107632924, "grad_norm": 0.35260701179504395, "learning_rate": 1.0509360343750563e-05, "loss": 0.5132, "step": 45674 }, { "epoch": 0.968696316090857, "grad_norm": 0.36317178606987, "learning_rate": 1.0509027281047495e-05, "loss": 0.5395, "step": 45675 }, { "epoch": 0.96871752454879, "grad_norm": 0.3711792528629303, "learning_rate": 1.0508694217778288e-05, "loss": 0.4263, "step": 45676 }, { "epoch": 0.9687387330067231, "grad_norm": 0.3612854480743408, "learning_rate": 1.0508361153943316e-05, "loss": 0.4186, "step": 45677 }, { "epoch": 0.9687599414646562, "grad_norm": 0.41545072197914124, "learning_rate": 1.050802808954295e-05, "loss": 0.4981, "step": 45678 }, { "epoch": 0.9687811499225891, "grad_norm": 0.31977981328964233, "learning_rate": 1.0507695024577558e-05, "loss": 0.3751, "step": 45679 }, { "epoch": 0.9688023583805222, "grad_norm": 0.36308082938194275, "learning_rate": 1.0507361959047514e-05, "loss": 0.4342, "step": 45680 }, { "epoch": 0.9688235668384552, "grad_norm": 0.43406662344932556, "learning_rate": 1.0507028892953185e-05, "loss": 0.495, "step": 45681 }, { "epoch": 0.9688447752963882, "grad_norm": 0.3260347247123718, "learning_rate": 1.0506695826294943e-05, "loss": 0.4154, "step": 45682 }, { "epoch": 0.9688659837543212, "grad_norm": 0.39206862449645996, "learning_rate": 1.0506362759073159e-05, "loss": 0.5028, "step": 45683 }, { "epoch": 0.9688871922122543, "grad_norm": 0.3661453127861023, "learning_rate": 1.0506029691288201e-05, "loss": 0.4025, "step": 45684 }, { "epoch": 0.9689084006701872, "grad_norm": 0.375616192817688, "learning_rate": 1.0505696622940444e-05, "loss": 0.4122, "step": 45685 }, { "epoch": 0.9689296091281203, "grad_norm": 0.41917145252227783, "learning_rate": 1.0505363554030253e-05, "loss": 0.4937, "step": 45686 }, { "epoch": 0.9689508175860533, "grad_norm": 0.3309529423713684, "learning_rate": 1.0505030484558004e-05, "loss": 0.4894, "step": 45687 }, { "epoch": 0.9689720260439864, "grad_norm": 0.3847028613090515, "learning_rate": 1.0504697414524062e-05, "loss": 0.4955, "step": 45688 }, { "epoch": 0.9689932345019193, "grad_norm": 0.3884204924106598, "learning_rate": 1.05043643439288e-05, "loss": 0.4208, "step": 45689 }, { "epoch": 0.9690144429598524, "grad_norm": 0.3568781614303589, "learning_rate": 1.050403127277259e-05, "loss": 0.4823, "step": 45690 }, { "epoch": 0.9690356514177854, "grad_norm": 0.3508071005344391, "learning_rate": 1.0503698201055798e-05, "loss": 0.4789, "step": 45691 }, { "epoch": 0.9690568598757184, "grad_norm": 0.4123779833316803, "learning_rate": 1.0503365128778797e-05, "loss": 0.4712, "step": 45692 }, { "epoch": 0.9690780683336515, "grad_norm": 0.3679484724998474, "learning_rate": 1.0503032055941962e-05, "loss": 0.4784, "step": 45693 }, { "epoch": 0.9690992767915845, "grad_norm": 0.3814432919025421, "learning_rate": 1.0502698982545655e-05, "loss": 0.4886, "step": 45694 }, { "epoch": 0.9691204852495175, "grad_norm": 0.39907586574554443, "learning_rate": 1.0502365908590256e-05, "loss": 0.4732, "step": 45695 }, { "epoch": 0.9691416937074505, "grad_norm": 0.35804668068885803, "learning_rate": 1.0502032834076126e-05, "loss": 0.4444, "step": 45696 }, { "epoch": 0.9691629021653836, "grad_norm": 0.34920287132263184, "learning_rate": 1.0501699759003639e-05, "loss": 0.4666, "step": 45697 }, { "epoch": 0.9691841106233166, "grad_norm": 0.3755108714103699, "learning_rate": 1.0501366683373166e-05, "loss": 0.4962, "step": 45698 }, { "epoch": 0.9692053190812496, "grad_norm": 0.45864927768707275, "learning_rate": 1.0501033607185079e-05, "loss": 0.5073, "step": 45699 }, { "epoch": 0.9692265275391826, "grad_norm": 0.3340415358543396, "learning_rate": 1.0500700530439744e-05, "loss": 0.4629, "step": 45700 }, { "epoch": 0.9692477359971157, "grad_norm": 0.3593035042285919, "learning_rate": 1.0500367453137536e-05, "loss": 0.5078, "step": 45701 }, { "epoch": 0.9692689444550486, "grad_norm": 0.33403638005256653, "learning_rate": 1.0500034375278822e-05, "loss": 0.4616, "step": 45702 }, { "epoch": 0.9692901529129817, "grad_norm": 0.3914467394351959, "learning_rate": 1.0499701296863977e-05, "loss": 0.463, "step": 45703 }, { "epoch": 0.9693113613709147, "grad_norm": 0.4404570162296295, "learning_rate": 1.0499368217893368e-05, "loss": 0.5554, "step": 45704 }, { "epoch": 0.9693325698288477, "grad_norm": 0.3801911473274231, "learning_rate": 1.0499035138367367e-05, "loss": 0.4359, "step": 45705 }, { "epoch": 0.9693537782867808, "grad_norm": 0.3723212778568268, "learning_rate": 1.0498702058286338e-05, "loss": 0.493, "step": 45706 }, { "epoch": 0.9693749867447138, "grad_norm": 0.38694050908088684, "learning_rate": 1.0498368977650664e-05, "loss": 0.4583, "step": 45707 }, { "epoch": 0.9693961952026469, "grad_norm": 0.3498857915401459, "learning_rate": 1.0498035896460704e-05, "loss": 0.5195, "step": 45708 }, { "epoch": 0.9694174036605798, "grad_norm": 0.37596213817596436, "learning_rate": 1.0497702814716834e-05, "loss": 0.4283, "step": 45709 }, { "epoch": 0.9694386121185129, "grad_norm": 0.3248905837535858, "learning_rate": 1.0497369732419424e-05, "loss": 0.4494, "step": 45710 }, { "epoch": 0.9694598205764459, "grad_norm": 0.4069567322731018, "learning_rate": 1.0497036649568838e-05, "loss": 0.6143, "step": 45711 }, { "epoch": 0.9694810290343789, "grad_norm": 0.3608889877796173, "learning_rate": 1.049670356616546e-05, "loss": 0.484, "step": 45712 }, { "epoch": 0.9695022374923119, "grad_norm": 0.4029812514781952, "learning_rate": 1.049637048220965e-05, "loss": 0.4982, "step": 45713 }, { "epoch": 0.969523445950245, "grad_norm": 0.35363954305648804, "learning_rate": 1.049603739770178e-05, "loss": 0.4609, "step": 45714 }, { "epoch": 0.9695446544081779, "grad_norm": 0.5530892610549927, "learning_rate": 1.0495704312642223e-05, "loss": 0.4878, "step": 45715 }, { "epoch": 0.969565862866111, "grad_norm": 0.39104920625686646, "learning_rate": 1.0495371227031346e-05, "loss": 0.4557, "step": 45716 }, { "epoch": 0.969587071324044, "grad_norm": 0.4672723710536957, "learning_rate": 1.0495038140869525e-05, "loss": 0.5205, "step": 45717 }, { "epoch": 0.9696082797819771, "grad_norm": 0.4115249812602997, "learning_rate": 1.0494705054157122e-05, "loss": 0.4893, "step": 45718 }, { "epoch": 0.9696294882399101, "grad_norm": 0.38178524374961853, "learning_rate": 1.0494371966894516e-05, "loss": 0.4784, "step": 45719 }, { "epoch": 0.9696506966978431, "grad_norm": 0.3756224811077118, "learning_rate": 1.049403887908207e-05, "loss": 0.5142, "step": 45720 }, { "epoch": 0.9696719051557762, "grad_norm": 0.36177676916122437, "learning_rate": 1.0493705790720164e-05, "loss": 0.4885, "step": 45721 }, { "epoch": 0.9696931136137091, "grad_norm": 0.34166646003723145, "learning_rate": 1.0493372701809157e-05, "loss": 0.4452, "step": 45722 }, { "epoch": 0.9697143220716422, "grad_norm": 0.35995569825172424, "learning_rate": 1.049303961234943e-05, "loss": 0.4402, "step": 45723 }, { "epoch": 0.9697355305295752, "grad_norm": 0.41724759340286255, "learning_rate": 1.0492706522341344e-05, "loss": 0.4576, "step": 45724 }, { "epoch": 0.9697567389875082, "grad_norm": 0.355000376701355, "learning_rate": 1.0492373431785276e-05, "loss": 0.493, "step": 45725 }, { "epoch": 0.9697779474454412, "grad_norm": 0.3525576591491699, "learning_rate": 1.0492040340681595e-05, "loss": 0.4281, "step": 45726 }, { "epoch": 0.9697991559033743, "grad_norm": 0.41606634855270386, "learning_rate": 1.0491707249030672e-05, "loss": 0.5849, "step": 45727 }, { "epoch": 0.9698203643613073, "grad_norm": 0.3553216755390167, "learning_rate": 1.0491374156832875e-05, "loss": 0.5314, "step": 45728 }, { "epoch": 0.9698415728192403, "grad_norm": 0.423404723405838, "learning_rate": 1.0491041064088577e-05, "loss": 0.5235, "step": 45729 }, { "epoch": 0.9698627812771733, "grad_norm": 0.38263219594955444, "learning_rate": 1.0490707970798146e-05, "loss": 0.5205, "step": 45730 }, { "epoch": 0.9698839897351064, "grad_norm": 1.1808112859725952, "learning_rate": 1.0490374876961953e-05, "loss": 0.4785, "step": 45731 }, { "epoch": 0.9699051981930393, "grad_norm": 0.4841735363006592, "learning_rate": 1.049004178258037e-05, "loss": 0.4868, "step": 45732 }, { "epoch": 0.9699264066509724, "grad_norm": 0.40351784229278564, "learning_rate": 1.048970868765377e-05, "loss": 0.5014, "step": 45733 }, { "epoch": 0.9699476151089055, "grad_norm": 0.3384082615375519, "learning_rate": 1.0489375592182517e-05, "loss": 0.4779, "step": 45734 }, { "epoch": 0.9699688235668384, "grad_norm": 0.3525158762931824, "learning_rate": 1.0489042496166988e-05, "loss": 0.5309, "step": 45735 }, { "epoch": 0.9699900320247715, "grad_norm": 0.4330393671989441, "learning_rate": 1.0488709399607547e-05, "loss": 0.5013, "step": 45736 }, { "epoch": 0.9700112404827045, "grad_norm": 0.3973539471626282, "learning_rate": 1.0488376302504567e-05, "loss": 0.4313, "step": 45737 }, { "epoch": 0.9700324489406376, "grad_norm": 0.38070306181907654, "learning_rate": 1.0488043204858423e-05, "loss": 0.4894, "step": 45738 }, { "epoch": 0.9700536573985705, "grad_norm": 0.40786170959472656, "learning_rate": 1.0487710106669478e-05, "loss": 0.4505, "step": 45739 }, { "epoch": 0.9700748658565036, "grad_norm": 0.3761061131954193, "learning_rate": 1.0487377007938106e-05, "loss": 0.4429, "step": 45740 }, { "epoch": 0.9700960743144366, "grad_norm": 0.3404053747653961, "learning_rate": 1.048704390866468e-05, "loss": 0.4725, "step": 45741 }, { "epoch": 0.9701172827723696, "grad_norm": 0.3749524652957916, "learning_rate": 1.0486710808849567e-05, "loss": 0.4411, "step": 45742 }, { "epoch": 0.9701384912303026, "grad_norm": 0.3614336848258972, "learning_rate": 1.0486377708493137e-05, "loss": 0.4913, "step": 45743 }, { "epoch": 0.9701596996882357, "grad_norm": 0.39124196767807007, "learning_rate": 1.0486044607595764e-05, "loss": 0.5604, "step": 45744 }, { "epoch": 0.9701809081461686, "grad_norm": 0.39103764295578003, "learning_rate": 1.0485711506157814e-05, "loss": 0.5454, "step": 45745 }, { "epoch": 0.9702021166041017, "grad_norm": 0.3469890356063843, "learning_rate": 1.0485378404179663e-05, "loss": 0.4457, "step": 45746 }, { "epoch": 0.9702233250620348, "grad_norm": 0.38523727655410767, "learning_rate": 1.0485045301661679e-05, "loss": 0.5316, "step": 45747 }, { "epoch": 0.9702445335199678, "grad_norm": 0.35070860385894775, "learning_rate": 1.0484712198604228e-05, "loss": 0.5097, "step": 45748 }, { "epoch": 0.9702657419779008, "grad_norm": 0.3260883688926697, "learning_rate": 1.0484379095007686e-05, "loss": 0.4339, "step": 45749 }, { "epoch": 0.9702869504358338, "grad_norm": 0.3064251244068146, "learning_rate": 1.048404599087242e-05, "loss": 0.4191, "step": 45750 }, { "epoch": 0.9703081588937669, "grad_norm": 0.36819642782211304, "learning_rate": 1.0483712886198804e-05, "loss": 0.3726, "step": 45751 }, { "epoch": 0.9703293673516998, "grad_norm": 0.35823506116867065, "learning_rate": 1.0483379780987208e-05, "loss": 0.4251, "step": 45752 }, { "epoch": 0.9703505758096329, "grad_norm": 0.5623184442520142, "learning_rate": 1.0483046675237998e-05, "loss": 0.4374, "step": 45753 }, { "epoch": 0.9703717842675659, "grad_norm": 0.3755562901496887, "learning_rate": 1.048271356895155e-05, "loss": 0.412, "step": 45754 }, { "epoch": 0.970392992725499, "grad_norm": 0.37848979234695435, "learning_rate": 1.0482380462128235e-05, "loss": 0.566, "step": 45755 }, { "epoch": 0.9704142011834319, "grad_norm": 0.41659125685691833, "learning_rate": 1.0482047354768414e-05, "loss": 0.4515, "step": 45756 }, { "epoch": 0.970435409641365, "grad_norm": 0.8185825347900391, "learning_rate": 1.0481714246872467e-05, "loss": 0.4259, "step": 45757 }, { "epoch": 0.970456618099298, "grad_norm": 0.48331978917121887, "learning_rate": 1.0481381138440765e-05, "loss": 0.5387, "step": 45758 }, { "epoch": 0.970477826557231, "grad_norm": 0.34797438979148865, "learning_rate": 1.048104802947367e-05, "loss": 0.4343, "step": 45759 }, { "epoch": 0.9704990350151641, "grad_norm": 0.36768829822540283, "learning_rate": 1.0480714919971562e-05, "loss": 0.434, "step": 45760 }, { "epoch": 0.9705202434730971, "grad_norm": 0.348871111869812, "learning_rate": 1.0480381809934805e-05, "loss": 0.4875, "step": 45761 }, { "epoch": 0.9705414519310301, "grad_norm": 0.4249565601348877, "learning_rate": 1.048004869936377e-05, "loss": 0.4433, "step": 45762 }, { "epoch": 0.9705626603889631, "grad_norm": 0.37340623140335083, "learning_rate": 1.047971558825883e-05, "loss": 0.4934, "step": 45763 }, { "epoch": 0.9705838688468962, "grad_norm": 0.3584463596343994, "learning_rate": 1.0479382476620355e-05, "loss": 0.5261, "step": 45764 }, { "epoch": 0.9706050773048291, "grad_norm": 0.39150309562683105, "learning_rate": 1.0479049364448714e-05, "loss": 0.5468, "step": 45765 }, { "epoch": 0.9706262857627622, "grad_norm": 0.35785675048828125, "learning_rate": 1.0478716251744281e-05, "loss": 0.4827, "step": 45766 }, { "epoch": 0.9706474942206952, "grad_norm": 0.3839433193206787, "learning_rate": 1.047838313850742e-05, "loss": 0.4792, "step": 45767 }, { "epoch": 0.9706687026786283, "grad_norm": 0.3573494851589203, "learning_rate": 1.0478050024738509e-05, "loss": 0.4416, "step": 45768 }, { "epoch": 0.9706899111365612, "grad_norm": 0.35243526101112366, "learning_rate": 1.0477716910437912e-05, "loss": 0.4707, "step": 45769 }, { "epoch": 0.9707111195944943, "grad_norm": 0.3626123070716858, "learning_rate": 1.0477383795606005e-05, "loss": 0.4833, "step": 45770 }, { "epoch": 0.9707323280524273, "grad_norm": 0.416197270154953, "learning_rate": 1.0477050680243154e-05, "loss": 0.5251, "step": 45771 }, { "epoch": 0.9707535365103603, "grad_norm": 0.4162498712539673, "learning_rate": 1.0476717564349734e-05, "loss": 0.4752, "step": 45772 }, { "epoch": 0.9707747449682933, "grad_norm": 0.36531031131744385, "learning_rate": 1.047638444792611e-05, "loss": 0.4377, "step": 45773 }, { "epoch": 0.9707959534262264, "grad_norm": 0.5020070672035217, "learning_rate": 1.0476051330972656e-05, "loss": 0.5176, "step": 45774 }, { "epoch": 0.9708171618841595, "grad_norm": 0.3679957389831543, "learning_rate": 1.0475718213489743e-05, "loss": 0.447, "step": 45775 }, { "epoch": 0.9708383703420924, "grad_norm": 0.37750542163848877, "learning_rate": 1.0475385095477737e-05, "loss": 0.4965, "step": 45776 }, { "epoch": 0.9708595788000255, "grad_norm": 0.36785754561424255, "learning_rate": 1.0475051976937017e-05, "loss": 0.4214, "step": 45777 }, { "epoch": 0.9708807872579585, "grad_norm": 0.39167487621307373, "learning_rate": 1.0474718857867948e-05, "loss": 0.4379, "step": 45778 }, { "epoch": 0.9709019957158915, "grad_norm": 0.3401401937007904, "learning_rate": 1.0474385738270896e-05, "loss": 0.5164, "step": 45779 }, { "epoch": 0.9709232041738245, "grad_norm": 0.35181549191474915, "learning_rate": 1.047405261814624e-05, "loss": 0.468, "step": 45780 }, { "epoch": 0.9709444126317576, "grad_norm": 0.3717750906944275, "learning_rate": 1.0473719497494344e-05, "loss": 0.4949, "step": 45781 }, { "epoch": 0.9709656210896905, "grad_norm": 0.3960241973400116, "learning_rate": 1.0473386376315582e-05, "loss": 0.5203, "step": 45782 }, { "epoch": 0.9709868295476236, "grad_norm": 0.452793687582016, "learning_rate": 1.0473053254610326e-05, "loss": 0.4609, "step": 45783 }, { "epoch": 0.9710080380055566, "grad_norm": 0.3733394145965576, "learning_rate": 1.0472720132378943e-05, "loss": 0.5352, "step": 45784 }, { "epoch": 0.9710292464634896, "grad_norm": 0.36739200353622437, "learning_rate": 1.0472387009621806e-05, "loss": 0.4865, "step": 45785 }, { "epoch": 0.9710504549214226, "grad_norm": 0.34157636761665344, "learning_rate": 1.0472053886339283e-05, "loss": 0.4465, "step": 45786 }, { "epoch": 0.9710716633793557, "grad_norm": 0.3433738648891449, "learning_rate": 1.0471720762531746e-05, "loss": 0.4087, "step": 45787 }, { "epoch": 0.9710928718372888, "grad_norm": 0.3464702367782593, "learning_rate": 1.0471387638199563e-05, "loss": 0.4954, "step": 45788 }, { "epoch": 0.9711140802952217, "grad_norm": 0.37846967577934265, "learning_rate": 1.0471054513343111e-05, "loss": 0.4823, "step": 45789 }, { "epoch": 0.9711352887531548, "grad_norm": 0.3962709605693817, "learning_rate": 1.0470721387962752e-05, "loss": 0.6012, "step": 45790 }, { "epoch": 0.9711564972110878, "grad_norm": 0.37207406759262085, "learning_rate": 1.0470388262058864e-05, "loss": 0.4868, "step": 45791 }, { "epoch": 0.9711777056690208, "grad_norm": 0.36973264813423157, "learning_rate": 1.0470055135631813e-05, "loss": 0.4797, "step": 45792 }, { "epoch": 0.9711989141269538, "grad_norm": 0.3515470623970032, "learning_rate": 1.046972200868197e-05, "loss": 0.5048, "step": 45793 }, { "epoch": 0.9712201225848869, "grad_norm": 0.4484362006187439, "learning_rate": 1.0469388881209709e-05, "loss": 0.4898, "step": 45794 }, { "epoch": 0.9712413310428198, "grad_norm": 0.35374751687049866, "learning_rate": 1.0469055753215396e-05, "loss": 0.4102, "step": 45795 }, { "epoch": 0.9712625395007529, "grad_norm": 0.3374967873096466, "learning_rate": 1.0468722624699401e-05, "loss": 0.4193, "step": 45796 }, { "epoch": 0.9712837479586859, "grad_norm": 0.4602363109588623, "learning_rate": 1.0468389495662101e-05, "loss": 0.5339, "step": 45797 }, { "epoch": 0.971304956416619, "grad_norm": 0.33771586418151855, "learning_rate": 1.0468056366103857e-05, "loss": 0.5085, "step": 45798 }, { "epoch": 0.9713261648745519, "grad_norm": 0.3460601568222046, "learning_rate": 1.046772323602505e-05, "loss": 0.4201, "step": 45799 }, { "epoch": 0.971347373332485, "grad_norm": 0.42291316390037537, "learning_rate": 1.0467390105426046e-05, "loss": 0.4438, "step": 45800 }, { "epoch": 0.9713685817904181, "grad_norm": 0.5159562230110168, "learning_rate": 1.046705697430721e-05, "loss": 0.4676, "step": 45801 }, { "epoch": 0.971389790248351, "grad_norm": 0.7116568684577942, "learning_rate": 1.0466723842668917e-05, "loss": 0.5322, "step": 45802 }, { "epoch": 0.9714109987062841, "grad_norm": 0.3757476806640625, "learning_rate": 1.0466390710511543e-05, "loss": 0.4862, "step": 45803 }, { "epoch": 0.9714322071642171, "grad_norm": 0.3584668040275574, "learning_rate": 1.0466057577835448e-05, "loss": 0.4833, "step": 45804 }, { "epoch": 0.9714534156221502, "grad_norm": 0.36008188128471375, "learning_rate": 1.0465724444641012e-05, "loss": 0.4254, "step": 45805 }, { "epoch": 0.9714746240800831, "grad_norm": 0.42558029294013977, "learning_rate": 1.04653913109286e-05, "loss": 0.5125, "step": 45806 }, { "epoch": 0.9714958325380162, "grad_norm": 0.3434491455554962, "learning_rate": 1.0465058176698584e-05, "loss": 0.4918, "step": 45807 }, { "epoch": 0.9715170409959492, "grad_norm": 1.4368456602096558, "learning_rate": 1.0464725041951331e-05, "loss": 0.5445, "step": 45808 }, { "epoch": 0.9715382494538822, "grad_norm": 0.43897682428359985, "learning_rate": 1.046439190668722e-05, "loss": 0.4776, "step": 45809 }, { "epoch": 0.9715594579118152, "grad_norm": 0.3820974826812744, "learning_rate": 1.0464058770906614e-05, "loss": 0.5068, "step": 45810 }, { "epoch": 0.9715806663697483, "grad_norm": 0.36591801047325134, "learning_rate": 1.0463725634609884e-05, "loss": 0.4755, "step": 45811 }, { "epoch": 0.9716018748276812, "grad_norm": 0.34582772850990295, "learning_rate": 1.0463392497797404e-05, "loss": 0.4816, "step": 45812 }, { "epoch": 0.9716230832856143, "grad_norm": 0.4720878303050995, "learning_rate": 1.0463059360469546e-05, "loss": 0.537, "step": 45813 }, { "epoch": 0.9716442917435473, "grad_norm": 0.46789371967315674, "learning_rate": 1.046272622262667e-05, "loss": 0.4448, "step": 45814 }, { "epoch": 0.9716655002014803, "grad_norm": 0.4642983675003052, "learning_rate": 1.0462393084269162e-05, "loss": 0.5258, "step": 45815 }, { "epoch": 0.9716867086594134, "grad_norm": 0.39587390422821045, "learning_rate": 1.046205994539738e-05, "loss": 0.4896, "step": 45816 }, { "epoch": 0.9717079171173464, "grad_norm": 0.35909682512283325, "learning_rate": 1.04617268060117e-05, "loss": 0.3896, "step": 45817 }, { "epoch": 0.9717291255752795, "grad_norm": 0.4052402377128601, "learning_rate": 1.0461393666112492e-05, "loss": 0.4927, "step": 45818 }, { "epoch": 0.9717503340332124, "grad_norm": 0.3682137727737427, "learning_rate": 1.0461060525700124e-05, "loss": 0.4914, "step": 45819 }, { "epoch": 0.9717715424911455, "grad_norm": 0.3513031601905823, "learning_rate": 1.0460727384774969e-05, "loss": 0.4241, "step": 45820 }, { "epoch": 0.9717927509490785, "grad_norm": 0.4150722622871399, "learning_rate": 1.0460394243337398e-05, "loss": 0.5263, "step": 45821 }, { "epoch": 0.9718139594070115, "grad_norm": 0.38380780816078186, "learning_rate": 1.0460061101387779e-05, "loss": 0.5246, "step": 45822 }, { "epoch": 0.9718351678649445, "grad_norm": 0.4080735445022583, "learning_rate": 1.0459727958926489e-05, "loss": 0.5397, "step": 45823 }, { "epoch": 0.9718563763228776, "grad_norm": 0.44592708349227905, "learning_rate": 1.0459394815953888e-05, "loss": 0.5034, "step": 45824 }, { "epoch": 0.9718775847808105, "grad_norm": 0.32162338495254517, "learning_rate": 1.0459061672470354e-05, "loss": 0.4487, "step": 45825 }, { "epoch": 0.9718987932387436, "grad_norm": 2.0021684169769287, "learning_rate": 1.0458728528476255e-05, "loss": 0.5037, "step": 45826 }, { "epoch": 0.9719200016966766, "grad_norm": 0.42457249760627747, "learning_rate": 1.0458395383971962e-05, "loss": 0.4354, "step": 45827 }, { "epoch": 0.9719412101546097, "grad_norm": 0.3633280098438263, "learning_rate": 1.0458062238957847e-05, "loss": 0.5236, "step": 45828 }, { "epoch": 0.9719624186125427, "grad_norm": 0.38113805651664734, "learning_rate": 1.0457729093434279e-05, "loss": 0.5005, "step": 45829 }, { "epoch": 0.9719836270704757, "grad_norm": 0.4041026532649994, "learning_rate": 1.0457395947401627e-05, "loss": 0.4207, "step": 45830 }, { "epoch": 0.9720048355284088, "grad_norm": 0.38150838017463684, "learning_rate": 1.0457062800860264e-05, "loss": 0.5724, "step": 45831 }, { "epoch": 0.9720260439863417, "grad_norm": 0.5180511474609375, "learning_rate": 1.045672965381056e-05, "loss": 0.481, "step": 45832 }, { "epoch": 0.9720472524442748, "grad_norm": 0.3726457953453064, "learning_rate": 1.0456396506252884e-05, "loss": 0.479, "step": 45833 }, { "epoch": 0.9720684609022078, "grad_norm": 0.35608989000320435, "learning_rate": 1.0456063358187611e-05, "loss": 0.4564, "step": 45834 }, { "epoch": 0.9720896693601409, "grad_norm": 0.34676870703697205, "learning_rate": 1.0455730209615105e-05, "loss": 0.5112, "step": 45835 }, { "epoch": 0.9721108778180738, "grad_norm": 0.4121244251728058, "learning_rate": 1.0455397060535744e-05, "loss": 0.4285, "step": 45836 }, { "epoch": 0.9721320862760069, "grad_norm": 0.36633381247520447, "learning_rate": 1.0455063910949892e-05, "loss": 0.459, "step": 45837 }, { "epoch": 0.9721532947339399, "grad_norm": 0.5332531332969666, "learning_rate": 1.0454730760857918e-05, "loss": 0.4288, "step": 45838 }, { "epoch": 0.9721745031918729, "grad_norm": 0.4783017039299011, "learning_rate": 1.0454397610260202e-05, "loss": 0.5975, "step": 45839 }, { "epoch": 0.9721957116498059, "grad_norm": 0.4282611906528473, "learning_rate": 1.0454064459157105e-05, "loss": 0.4474, "step": 45840 }, { "epoch": 0.972216920107739, "grad_norm": 0.4088385999202728, "learning_rate": 1.0453731307549002e-05, "loss": 0.5015, "step": 45841 }, { "epoch": 0.972238128565672, "grad_norm": 0.3437865972518921, "learning_rate": 1.0453398155436265e-05, "loss": 0.535, "step": 45842 }, { "epoch": 0.972259337023605, "grad_norm": 0.3861529529094696, "learning_rate": 1.0453065002819265e-05, "loss": 0.5614, "step": 45843 }, { "epoch": 0.9722805454815381, "grad_norm": 0.3398568630218506, "learning_rate": 1.0452731849698366e-05, "loss": 0.429, "step": 45844 }, { "epoch": 0.972301753939471, "grad_norm": 0.44561904668807983, "learning_rate": 1.0452398696073942e-05, "loss": 0.4697, "step": 45845 }, { "epoch": 0.9723229623974041, "grad_norm": 0.3546513319015503, "learning_rate": 1.0452065541946366e-05, "loss": 0.4737, "step": 45846 }, { "epoch": 0.9723441708553371, "grad_norm": 1.0706559419631958, "learning_rate": 1.0451732387316004e-05, "loss": 0.5038, "step": 45847 }, { "epoch": 0.9723653793132702, "grad_norm": 0.4039093255996704, "learning_rate": 1.0451399232183232e-05, "loss": 0.4776, "step": 45848 }, { "epoch": 0.9723865877712031, "grad_norm": 0.8020541071891785, "learning_rate": 1.0451066076548415e-05, "loss": 0.4499, "step": 45849 }, { "epoch": 0.9724077962291362, "grad_norm": 0.3978447914123535, "learning_rate": 1.045073292041193e-05, "loss": 0.527, "step": 45850 }, { "epoch": 0.9724290046870692, "grad_norm": 0.39747583866119385, "learning_rate": 1.0450399763774143e-05, "loss": 0.5441, "step": 45851 }, { "epoch": 0.9724502131450022, "grad_norm": 0.3731781244277954, "learning_rate": 1.045006660663542e-05, "loss": 0.4422, "step": 45852 }, { "epoch": 0.9724714216029352, "grad_norm": 0.3818051218986511, "learning_rate": 1.0449733448996142e-05, "loss": 0.4378, "step": 45853 }, { "epoch": 0.9724926300608683, "grad_norm": 0.34261831641197205, "learning_rate": 1.0449400290856674e-05, "loss": 0.4619, "step": 45854 }, { "epoch": 0.9725138385188012, "grad_norm": 0.347412109375, "learning_rate": 1.0449067132217385e-05, "loss": 0.4722, "step": 45855 }, { "epoch": 0.9725350469767343, "grad_norm": 0.3908010423183441, "learning_rate": 1.044873397307865e-05, "loss": 0.4569, "step": 45856 }, { "epoch": 0.9725562554346674, "grad_norm": 0.4188074469566345, "learning_rate": 1.0448400813440838e-05, "loss": 0.565, "step": 45857 }, { "epoch": 0.9725774638926004, "grad_norm": 0.3679106533527374, "learning_rate": 1.0448067653304314e-05, "loss": 0.4995, "step": 45858 }, { "epoch": 0.9725986723505334, "grad_norm": 0.46883445978164673, "learning_rate": 1.0447734492669454e-05, "loss": 0.442, "step": 45859 }, { "epoch": 0.9726198808084664, "grad_norm": 0.34759294986724854, "learning_rate": 1.0447401331536631e-05, "loss": 0.3985, "step": 45860 }, { "epoch": 0.9726410892663995, "grad_norm": 0.3476756513118744, "learning_rate": 1.044706816990621e-05, "loss": 0.4517, "step": 45861 }, { "epoch": 0.9726622977243324, "grad_norm": 0.36040836572647095, "learning_rate": 1.0446735007778563e-05, "loss": 0.5182, "step": 45862 }, { "epoch": 0.9726835061822655, "grad_norm": 0.3638763725757599, "learning_rate": 1.0446401845154062e-05, "loss": 0.5178, "step": 45863 }, { "epoch": 0.9727047146401985, "grad_norm": 0.3954837918281555, "learning_rate": 1.0446068682033078e-05, "loss": 0.5097, "step": 45864 }, { "epoch": 0.9727259230981316, "grad_norm": 0.3455691337585449, "learning_rate": 1.0445735518415976e-05, "loss": 0.4716, "step": 45865 }, { "epoch": 0.9727471315560645, "grad_norm": 0.44246456027030945, "learning_rate": 1.0445402354303133e-05, "loss": 0.4924, "step": 45866 }, { "epoch": 0.9727683400139976, "grad_norm": 0.4275944232940674, "learning_rate": 1.0445069189694919e-05, "loss": 0.4993, "step": 45867 }, { "epoch": 0.9727895484719306, "grad_norm": 0.38627317547798157, "learning_rate": 1.0444736024591705e-05, "loss": 0.4845, "step": 45868 }, { "epoch": 0.9728107569298636, "grad_norm": 0.36201027035713196, "learning_rate": 1.0444402858993854e-05, "loss": 0.4635, "step": 45869 }, { "epoch": 0.9728319653877967, "grad_norm": 0.3550328314304352, "learning_rate": 1.0444069692901744e-05, "loss": 0.4664, "step": 45870 }, { "epoch": 0.9728531738457297, "grad_norm": 0.34720802307128906, "learning_rate": 1.0443736526315746e-05, "loss": 0.451, "step": 45871 }, { "epoch": 0.9728743823036627, "grad_norm": 0.3753051161766052, "learning_rate": 1.0443403359236221e-05, "loss": 0.567, "step": 45872 }, { "epoch": 0.9728955907615957, "grad_norm": 0.42571720480918884, "learning_rate": 1.0443070191663554e-05, "loss": 0.5191, "step": 45873 }, { "epoch": 0.9729167992195288, "grad_norm": 0.34616804122924805, "learning_rate": 1.0442737023598106e-05, "loss": 0.4589, "step": 45874 }, { "epoch": 0.9729380076774617, "grad_norm": 0.36781996488571167, "learning_rate": 1.0442403855040251e-05, "loss": 0.58, "step": 45875 }, { "epoch": 0.9729592161353948, "grad_norm": 0.36766207218170166, "learning_rate": 1.0442070685990358e-05, "loss": 0.4608, "step": 45876 }, { "epoch": 0.9729804245933278, "grad_norm": 0.3674679696559906, "learning_rate": 1.0441737516448793e-05, "loss": 0.454, "step": 45877 }, { "epoch": 0.9730016330512609, "grad_norm": 0.4402516484260559, "learning_rate": 1.0441404346415936e-05, "loss": 0.4593, "step": 45878 }, { "epoch": 0.9730228415091938, "grad_norm": 0.4910079836845398, "learning_rate": 1.0441071175892153e-05, "loss": 0.5885, "step": 45879 }, { "epoch": 0.9730440499671269, "grad_norm": 0.3571072220802307, "learning_rate": 1.044073800487781e-05, "loss": 0.4693, "step": 45880 }, { "epoch": 0.9730652584250599, "grad_norm": 0.33632996678352356, "learning_rate": 1.0440404833373288e-05, "loss": 0.4239, "step": 45881 }, { "epoch": 0.9730864668829929, "grad_norm": 0.3372480869293213, "learning_rate": 1.044007166137895e-05, "loss": 0.436, "step": 45882 }, { "epoch": 0.973107675340926, "grad_norm": 0.3922613263130188, "learning_rate": 1.0439738488895166e-05, "loss": 0.4528, "step": 45883 }, { "epoch": 0.973128883798859, "grad_norm": 0.33583584427833557, "learning_rate": 1.043940531592231e-05, "loss": 0.4353, "step": 45884 }, { "epoch": 0.973150092256792, "grad_norm": 0.42394155263900757, "learning_rate": 1.0439072142460752e-05, "loss": 0.5482, "step": 45885 }, { "epoch": 0.973171300714725, "grad_norm": 0.35037049651145935, "learning_rate": 1.0438738968510858e-05, "loss": 0.441, "step": 45886 }, { "epoch": 0.9731925091726581, "grad_norm": 0.42629384994506836, "learning_rate": 1.0438405794073007e-05, "loss": 0.5086, "step": 45887 }, { "epoch": 0.9732137176305911, "grad_norm": 0.38135194778442383, "learning_rate": 1.0438072619147563e-05, "loss": 0.5083, "step": 45888 }, { "epoch": 0.9732349260885241, "grad_norm": 0.3723706603050232, "learning_rate": 1.0437739443734898e-05, "loss": 0.5357, "step": 45889 }, { "epoch": 0.9732561345464571, "grad_norm": 0.3832758367061615, "learning_rate": 1.0437406267835383e-05, "loss": 0.5085, "step": 45890 }, { "epoch": 0.9732773430043902, "grad_norm": 0.3742693066596985, "learning_rate": 1.043707309144939e-05, "loss": 0.55, "step": 45891 }, { "epoch": 0.9732985514623231, "grad_norm": 0.3873387277126312, "learning_rate": 1.0436739914577285e-05, "loss": 0.4502, "step": 45892 }, { "epoch": 0.9733197599202562, "grad_norm": 0.4032060503959656, "learning_rate": 1.0436406737219446e-05, "loss": 0.5369, "step": 45893 }, { "epoch": 0.9733409683781892, "grad_norm": 0.41850754618644714, "learning_rate": 1.0436073559376236e-05, "loss": 0.4881, "step": 45894 }, { "epoch": 0.9733621768361223, "grad_norm": 0.38821840286254883, "learning_rate": 1.0435740381048029e-05, "loss": 0.4598, "step": 45895 }, { "epoch": 0.9733833852940552, "grad_norm": 0.39590609073638916, "learning_rate": 1.0435407202235199e-05, "loss": 0.4757, "step": 45896 }, { "epoch": 0.9734045937519883, "grad_norm": 0.38049447536468506, "learning_rate": 1.0435074022938108e-05, "loss": 0.4725, "step": 45897 }, { "epoch": 0.9734258022099214, "grad_norm": 0.38545840978622437, "learning_rate": 1.0434740843157132e-05, "loss": 0.5522, "step": 45898 }, { "epoch": 0.9734470106678543, "grad_norm": 0.44545361399650574, "learning_rate": 1.0434407662892645e-05, "loss": 0.4237, "step": 45899 }, { "epoch": 0.9734682191257874, "grad_norm": 0.3268217444419861, "learning_rate": 1.0434074482145009e-05, "loss": 0.4409, "step": 45900 }, { "epoch": 0.9734894275837204, "grad_norm": 0.3519262671470642, "learning_rate": 1.04337413009146e-05, "loss": 0.5054, "step": 45901 }, { "epoch": 0.9735106360416534, "grad_norm": 0.3921103775501251, "learning_rate": 1.0433408119201794e-05, "loss": 0.4507, "step": 45902 }, { "epoch": 0.9735318444995864, "grad_norm": 0.42293572425842285, "learning_rate": 1.043307493700695e-05, "loss": 0.4301, "step": 45903 }, { "epoch": 0.9735530529575195, "grad_norm": 0.3935592472553253, "learning_rate": 1.0432741754330439e-05, "loss": 0.5176, "step": 45904 }, { "epoch": 0.9735742614154524, "grad_norm": 0.4996633231639862, "learning_rate": 1.0432408571172643e-05, "loss": 0.4795, "step": 45905 }, { "epoch": 0.9735954698733855, "grad_norm": 0.3852505683898926, "learning_rate": 1.0432075387533924e-05, "loss": 0.4956, "step": 45906 }, { "epoch": 0.9736166783313185, "grad_norm": 0.4646380543708801, "learning_rate": 1.0431742203414656e-05, "loss": 0.4737, "step": 45907 }, { "epoch": 0.9736378867892516, "grad_norm": 0.373683363199234, "learning_rate": 1.0431409018815207e-05, "loss": 0.4744, "step": 45908 }, { "epoch": 0.9736590952471845, "grad_norm": 0.4496060609817505, "learning_rate": 1.0431075833735949e-05, "loss": 0.509, "step": 45909 }, { "epoch": 0.9736803037051176, "grad_norm": 0.3632834255695343, "learning_rate": 1.043074264817725e-05, "loss": 0.4408, "step": 45910 }, { "epoch": 0.9737015121630507, "grad_norm": 0.36906927824020386, "learning_rate": 1.0430409462139486e-05, "loss": 0.4793, "step": 45911 }, { "epoch": 0.9737227206209836, "grad_norm": 0.3932577967643738, "learning_rate": 1.043007627562302e-05, "loss": 0.5584, "step": 45912 }, { "epoch": 0.9737439290789167, "grad_norm": 0.3786773085594177, "learning_rate": 1.042974308862823e-05, "loss": 0.4678, "step": 45913 }, { "epoch": 0.9737651375368497, "grad_norm": 0.3726625442504883, "learning_rate": 1.0429409901155483e-05, "loss": 0.4993, "step": 45914 }, { "epoch": 0.9737863459947828, "grad_norm": 0.39805832505226135, "learning_rate": 1.0429076713205151e-05, "loss": 0.4682, "step": 45915 }, { "epoch": 0.9738075544527157, "grad_norm": 0.3538513779640198, "learning_rate": 1.0428743524777606e-05, "loss": 0.4248, "step": 45916 }, { "epoch": 0.9738287629106488, "grad_norm": 0.35664036870002747, "learning_rate": 1.042841033587321e-05, "loss": 0.4891, "step": 45917 }, { "epoch": 0.9738499713685818, "grad_norm": 0.3880619406700134, "learning_rate": 1.0428077146492342e-05, "loss": 0.5642, "step": 45918 }, { "epoch": 0.9738711798265148, "grad_norm": 0.3586818277835846, "learning_rate": 1.0427743956635373e-05, "loss": 0.4884, "step": 45919 }, { "epoch": 0.9738923882844478, "grad_norm": 0.37914401292800903, "learning_rate": 1.0427410766302666e-05, "loss": 0.4703, "step": 45920 }, { "epoch": 0.9739135967423809, "grad_norm": 0.3529137670993805, "learning_rate": 1.0427077575494602e-05, "loss": 0.5116, "step": 45921 }, { "epoch": 0.9739348052003138, "grad_norm": 0.40199315547943115, "learning_rate": 1.0426744384211543e-05, "loss": 0.5787, "step": 45922 }, { "epoch": 0.9739560136582469, "grad_norm": 0.36276260018348694, "learning_rate": 1.042641119245386e-05, "loss": 0.4189, "step": 45923 }, { "epoch": 0.97397722211618, "grad_norm": 0.5318586826324463, "learning_rate": 1.0426078000221931e-05, "loss": 0.5684, "step": 45924 }, { "epoch": 0.973998430574113, "grad_norm": 0.5982578992843628, "learning_rate": 1.042574480751612e-05, "loss": 0.5721, "step": 45925 }, { "epoch": 0.974019639032046, "grad_norm": 0.3538973927497864, "learning_rate": 1.0425411614336798e-05, "loss": 0.4909, "step": 45926 }, { "epoch": 0.974040847489979, "grad_norm": 0.35205861926078796, "learning_rate": 1.042507842068434e-05, "loss": 0.4791, "step": 45927 }, { "epoch": 0.9740620559479121, "grad_norm": 0.45343342423439026, "learning_rate": 1.042474522655911e-05, "loss": 0.489, "step": 45928 }, { "epoch": 0.974083264405845, "grad_norm": 0.3632582724094391, "learning_rate": 1.0424412031961485e-05, "loss": 0.454, "step": 45929 }, { "epoch": 0.9741044728637781, "grad_norm": 0.3796432614326477, "learning_rate": 1.042407883689183e-05, "loss": 0.4809, "step": 45930 }, { "epoch": 0.9741256813217111, "grad_norm": 0.3750098645687103, "learning_rate": 1.042374564135052e-05, "loss": 0.5234, "step": 45931 }, { "epoch": 0.9741468897796441, "grad_norm": 0.4238656163215637, "learning_rate": 1.0423412445337923e-05, "loss": 0.4768, "step": 45932 }, { "epoch": 0.9741680982375771, "grad_norm": 0.4579344093799591, "learning_rate": 1.0423079248854413e-05, "loss": 0.5043, "step": 45933 }, { "epoch": 0.9741893066955102, "grad_norm": 0.3543025255203247, "learning_rate": 1.0422746051900355e-05, "loss": 0.4916, "step": 45934 }, { "epoch": 0.9742105151534431, "grad_norm": 0.3482438325881958, "learning_rate": 1.0422412854476122e-05, "loss": 0.4949, "step": 45935 }, { "epoch": 0.9742317236113762, "grad_norm": 0.3808051645755768, "learning_rate": 1.0422079656582084e-05, "loss": 0.4074, "step": 45936 }, { "epoch": 0.9742529320693092, "grad_norm": 0.3821912705898285, "learning_rate": 1.0421746458218615e-05, "loss": 0.4813, "step": 45937 }, { "epoch": 0.9742741405272423, "grad_norm": 0.35027098655700684, "learning_rate": 1.0421413259386083e-05, "loss": 0.4697, "step": 45938 }, { "epoch": 0.9742953489851753, "grad_norm": 0.3822469711303711, "learning_rate": 1.0421080060084863e-05, "loss": 0.5625, "step": 45939 }, { "epoch": 0.9743165574431083, "grad_norm": 0.3662155270576477, "learning_rate": 1.0420746860315316e-05, "loss": 0.4572, "step": 45940 }, { "epoch": 0.9743377659010414, "grad_norm": 0.38376784324645996, "learning_rate": 1.0420413660077819e-05, "loss": 0.4228, "step": 45941 }, { "epoch": 0.9743589743589743, "grad_norm": 0.36407092213630676, "learning_rate": 1.0420080459372741e-05, "loss": 0.5039, "step": 45942 }, { "epoch": 0.9743801828169074, "grad_norm": 0.36537879705429077, "learning_rate": 1.0419747258200452e-05, "loss": 0.4885, "step": 45943 }, { "epoch": 0.9744013912748404, "grad_norm": 0.41178813576698303, "learning_rate": 1.0419414056561327e-05, "loss": 0.4094, "step": 45944 }, { "epoch": 0.9744225997327735, "grad_norm": 0.3913494050502777, "learning_rate": 1.0419080854455733e-05, "loss": 0.535, "step": 45945 }, { "epoch": 0.9744438081907064, "grad_norm": 0.42701390385627747, "learning_rate": 1.041874765188404e-05, "loss": 0.6339, "step": 45946 }, { "epoch": 0.9744650166486395, "grad_norm": 0.36225301027297974, "learning_rate": 1.0418414448846621e-05, "loss": 0.4824, "step": 45947 }, { "epoch": 0.9744862251065725, "grad_norm": 0.3434726297855377, "learning_rate": 1.0418081245343843e-05, "loss": 0.525, "step": 45948 }, { "epoch": 0.9745074335645055, "grad_norm": 0.4207950234413147, "learning_rate": 1.0417748041376079e-05, "loss": 0.4762, "step": 45949 }, { "epoch": 0.9745286420224385, "grad_norm": 0.35212308168411255, "learning_rate": 1.04174148369437e-05, "loss": 0.4793, "step": 45950 }, { "epoch": 0.9745498504803716, "grad_norm": 0.36790814995765686, "learning_rate": 1.0417081632047075e-05, "loss": 0.4385, "step": 45951 }, { "epoch": 0.9745710589383046, "grad_norm": 0.43847930431365967, "learning_rate": 1.0416748426686575e-05, "loss": 0.5341, "step": 45952 }, { "epoch": 0.9745922673962376, "grad_norm": 0.41697752475738525, "learning_rate": 1.0416415220862574e-05, "loss": 0.4475, "step": 45953 }, { "epoch": 0.9746134758541707, "grad_norm": 0.3630679249763489, "learning_rate": 1.0416082014575437e-05, "loss": 0.4425, "step": 45954 }, { "epoch": 0.9746346843121036, "grad_norm": 0.3517070412635803, "learning_rate": 1.0415748807825536e-05, "loss": 0.466, "step": 45955 }, { "epoch": 0.9746558927700367, "grad_norm": 0.3726568818092346, "learning_rate": 1.0415415600613246e-05, "loss": 0.5119, "step": 45956 }, { "epoch": 0.9746771012279697, "grad_norm": 0.3758007884025574, "learning_rate": 1.0415082392938932e-05, "loss": 0.483, "step": 45957 }, { "epoch": 0.9746983096859028, "grad_norm": 0.3642160892486572, "learning_rate": 1.0414749184802968e-05, "loss": 0.4212, "step": 45958 }, { "epoch": 0.9747195181438357, "grad_norm": 0.39722681045532227, "learning_rate": 1.0414415976205723e-05, "loss": 0.5451, "step": 45959 }, { "epoch": 0.9747407266017688, "grad_norm": 0.43195003271102905, "learning_rate": 1.0414082767147569e-05, "loss": 0.4582, "step": 45960 }, { "epoch": 0.9747619350597018, "grad_norm": 0.4418485760688782, "learning_rate": 1.0413749557628875e-05, "loss": 0.5149, "step": 45961 }, { "epoch": 0.9747831435176348, "grad_norm": 0.3969203531742096, "learning_rate": 1.0413416347650012e-05, "loss": 0.5639, "step": 45962 }, { "epoch": 0.9748043519755678, "grad_norm": 0.36110320687294006, "learning_rate": 1.041308313721135e-05, "loss": 0.4985, "step": 45963 }, { "epoch": 0.9748255604335009, "grad_norm": 0.4625149369239807, "learning_rate": 1.0412749926313264e-05, "loss": 0.5049, "step": 45964 }, { "epoch": 0.974846768891434, "grad_norm": 0.3627333641052246, "learning_rate": 1.0412416714956119e-05, "loss": 0.4254, "step": 45965 }, { "epoch": 0.9748679773493669, "grad_norm": 0.3606131076812744, "learning_rate": 1.041208350314029e-05, "loss": 0.4135, "step": 45966 }, { "epoch": 0.9748891858073, "grad_norm": 0.34430843591690063, "learning_rate": 1.0411750290866143e-05, "loss": 0.5102, "step": 45967 }, { "epoch": 0.974910394265233, "grad_norm": 0.3397577106952667, "learning_rate": 1.0411417078134047e-05, "loss": 0.4492, "step": 45968 }, { "epoch": 0.974931602723166, "grad_norm": 0.433366060256958, "learning_rate": 1.0411083864944382e-05, "loss": 0.4962, "step": 45969 }, { "epoch": 0.974952811181099, "grad_norm": 0.33506155014038086, "learning_rate": 1.0410750651297513e-05, "loss": 0.4494, "step": 45970 }, { "epoch": 0.9749740196390321, "grad_norm": 0.3819054663181305, "learning_rate": 1.0410417437193809e-05, "loss": 0.5658, "step": 45971 }, { "epoch": 0.974995228096965, "grad_norm": 0.3982357680797577, "learning_rate": 1.0410084222633644e-05, "loss": 0.4723, "step": 45972 }, { "epoch": 0.9750164365548981, "grad_norm": 0.3946477174758911, "learning_rate": 1.0409751007617383e-05, "loss": 0.4392, "step": 45973 }, { "epoch": 0.9750376450128311, "grad_norm": 0.3608381152153015, "learning_rate": 1.0409417792145403e-05, "loss": 0.458, "step": 45974 }, { "epoch": 0.9750588534707642, "grad_norm": 0.4731305241584778, "learning_rate": 1.0409084576218074e-05, "loss": 0.4612, "step": 45975 }, { "epoch": 0.9750800619286971, "grad_norm": 0.38518378138542175, "learning_rate": 1.0408751359835761e-05, "loss": 0.5522, "step": 45976 }, { "epoch": 0.9751012703866302, "grad_norm": 0.4393118619918823, "learning_rate": 1.0408418142998843e-05, "loss": 0.5049, "step": 45977 }, { "epoch": 0.9751224788445632, "grad_norm": 0.42219170928001404, "learning_rate": 1.0408084925707682e-05, "loss": 0.469, "step": 45978 }, { "epoch": 0.9751436873024962, "grad_norm": 0.37326475977897644, "learning_rate": 1.0407751707962653e-05, "loss": 0.4181, "step": 45979 }, { "epoch": 0.9751648957604293, "grad_norm": 0.3842713236808777, "learning_rate": 1.0407418489764127e-05, "loss": 0.532, "step": 45980 }, { "epoch": 0.9751861042183623, "grad_norm": 0.4521366357803345, "learning_rate": 1.0407085271112471e-05, "loss": 0.4786, "step": 45981 }, { "epoch": 0.9752073126762953, "grad_norm": 0.37505629658699036, "learning_rate": 1.040675205200806e-05, "loss": 0.5366, "step": 45982 }, { "epoch": 0.9752285211342283, "grad_norm": 0.3832874894142151, "learning_rate": 1.0406418832451267e-05, "loss": 0.4783, "step": 45983 }, { "epoch": 0.9752497295921614, "grad_norm": 0.35804426670074463, "learning_rate": 1.0406085612442454e-05, "loss": 0.5234, "step": 45984 }, { "epoch": 0.9752709380500943, "grad_norm": 0.39970359206199646, "learning_rate": 1.0405752391981996e-05, "loss": 0.5472, "step": 45985 }, { "epoch": 0.9752921465080274, "grad_norm": 0.3985251784324646, "learning_rate": 1.0405419171070266e-05, "loss": 0.4209, "step": 45986 }, { "epoch": 0.9753133549659604, "grad_norm": 0.3532962203025818, "learning_rate": 1.040508594970763e-05, "loss": 0.4133, "step": 45987 }, { "epoch": 0.9753345634238935, "grad_norm": 0.42300376296043396, "learning_rate": 1.0404752727894461e-05, "loss": 0.489, "step": 45988 }, { "epoch": 0.9753557718818264, "grad_norm": 0.987738847732544, "learning_rate": 1.040441950563113e-05, "loss": 0.4351, "step": 45989 }, { "epoch": 0.9753769803397595, "grad_norm": 0.39374932646751404, "learning_rate": 1.040408628291801e-05, "loss": 0.532, "step": 45990 }, { "epoch": 0.9753981887976925, "grad_norm": 0.37009990215301514, "learning_rate": 1.0403753059755466e-05, "loss": 0.4315, "step": 45991 }, { "epoch": 0.9754193972556255, "grad_norm": 0.42380616068840027, "learning_rate": 1.0403419836143873e-05, "loss": 0.472, "step": 45992 }, { "epoch": 0.9754406057135586, "grad_norm": 0.37065815925598145, "learning_rate": 1.0403086612083598e-05, "loss": 0.4812, "step": 45993 }, { "epoch": 0.9754618141714916, "grad_norm": 0.3159732222557068, "learning_rate": 1.0402753387575014e-05, "loss": 0.4552, "step": 45994 }, { "epoch": 0.9754830226294247, "grad_norm": 0.4188413619995117, "learning_rate": 1.0402420162618491e-05, "loss": 0.4971, "step": 45995 }, { "epoch": 0.9755042310873576, "grad_norm": 0.4073585271835327, "learning_rate": 1.0402086937214399e-05, "loss": 0.5277, "step": 45996 }, { "epoch": 0.9755254395452907, "grad_norm": 0.3871273994445801, "learning_rate": 1.0401753711363113e-05, "loss": 0.5018, "step": 45997 }, { "epoch": 0.9755466480032237, "grad_norm": 0.38174867630004883, "learning_rate": 1.0401420485065e-05, "loss": 0.5339, "step": 45998 }, { "epoch": 0.9755678564611567, "grad_norm": 0.3584062159061432, "learning_rate": 1.0401087258320426e-05, "loss": 0.5081, "step": 45999 }, { "epoch": 0.9755890649190897, "grad_norm": 0.4399269223213196, "learning_rate": 1.0400754031129767e-05, "loss": 0.4691, "step": 46000 }, { "epoch": 0.9756102733770228, "grad_norm": 0.4155004024505615, "learning_rate": 1.0400420803493396e-05, "loss": 0.5282, "step": 46001 }, { "epoch": 0.9756314818349557, "grad_norm": 0.4029502272605896, "learning_rate": 1.0400087575411677e-05, "loss": 0.4691, "step": 46002 }, { "epoch": 0.9756526902928888, "grad_norm": 0.4432862102985382, "learning_rate": 1.0399754346884987e-05, "loss": 0.5385, "step": 46003 }, { "epoch": 0.9756738987508218, "grad_norm": 0.38862520456314087, "learning_rate": 1.0399421117913693e-05, "loss": 0.6235, "step": 46004 }, { "epoch": 0.9756951072087549, "grad_norm": 0.3823074400424957, "learning_rate": 1.0399087888498167e-05, "loss": 0.5079, "step": 46005 }, { "epoch": 0.9757163156666879, "grad_norm": 0.38285180926322937, "learning_rate": 1.0398754658638776e-05, "loss": 0.4567, "step": 46006 }, { "epoch": 0.9757375241246209, "grad_norm": 0.6446599364280701, "learning_rate": 1.0398421428335896e-05, "loss": 0.419, "step": 46007 }, { "epoch": 0.975758732582554, "grad_norm": 0.45291033387184143, "learning_rate": 1.0398088197589895e-05, "loss": 0.5239, "step": 46008 }, { "epoch": 0.9757799410404869, "grad_norm": 0.372464656829834, "learning_rate": 1.0397754966401143e-05, "loss": 0.5156, "step": 46009 }, { "epoch": 0.97580114949842, "grad_norm": 0.3774551451206207, "learning_rate": 1.0397421734770013e-05, "loss": 0.3873, "step": 46010 }, { "epoch": 0.975822357956353, "grad_norm": 0.38976675271987915, "learning_rate": 1.0397088502696873e-05, "loss": 0.5007, "step": 46011 }, { "epoch": 0.975843566414286, "grad_norm": 0.3405761122703552, "learning_rate": 1.0396755270182096e-05, "loss": 0.4183, "step": 46012 }, { "epoch": 0.975864774872219, "grad_norm": 0.40067583322525024, "learning_rate": 1.0396422037226045e-05, "loss": 0.5292, "step": 46013 }, { "epoch": 0.9758859833301521, "grad_norm": 0.3834141194820404, "learning_rate": 1.0396088803829103e-05, "loss": 0.4393, "step": 46014 }, { "epoch": 0.975907191788085, "grad_norm": 0.39545074105262756, "learning_rate": 1.0395755569991635e-05, "loss": 0.5035, "step": 46015 }, { "epoch": 0.9759284002460181, "grad_norm": 0.3915836811065674, "learning_rate": 1.0395422335714008e-05, "loss": 0.581, "step": 46016 }, { "epoch": 0.9759496087039511, "grad_norm": 0.363668292760849, "learning_rate": 1.0395089100996597e-05, "loss": 0.4778, "step": 46017 }, { "epoch": 0.9759708171618842, "grad_norm": 0.38005536794662476, "learning_rate": 1.039475586583977e-05, "loss": 0.4932, "step": 46018 }, { "epoch": 0.9759920256198172, "grad_norm": 0.3681444823741913, "learning_rate": 1.0394422630243899e-05, "loss": 0.5007, "step": 46019 }, { "epoch": 0.9760132340777502, "grad_norm": 0.35656777024269104, "learning_rate": 1.0394089394209356e-05, "loss": 0.5354, "step": 46020 }, { "epoch": 0.9760344425356833, "grad_norm": 0.41424453258514404, "learning_rate": 1.0393756157736512e-05, "loss": 0.5002, "step": 46021 }, { "epoch": 0.9760556509936162, "grad_norm": 0.39957207441329956, "learning_rate": 1.039342292082573e-05, "loss": 0.5013, "step": 46022 }, { "epoch": 0.9760768594515493, "grad_norm": 0.3557434380054474, "learning_rate": 1.0393089683477392e-05, "loss": 0.4154, "step": 46023 }, { "epoch": 0.9760980679094823, "grad_norm": 0.34022456407546997, "learning_rate": 1.039275644569186e-05, "loss": 0.4435, "step": 46024 }, { "epoch": 0.9761192763674154, "grad_norm": 0.31603914499282837, "learning_rate": 1.0392423207469511e-05, "loss": 0.4248, "step": 46025 }, { "epoch": 0.9761404848253483, "grad_norm": 0.34711599349975586, "learning_rate": 1.0392089968810708e-05, "loss": 0.5319, "step": 46026 }, { "epoch": 0.9761616932832814, "grad_norm": 0.34102606773376465, "learning_rate": 1.0391756729715827e-05, "loss": 0.4246, "step": 46027 }, { "epoch": 0.9761829017412144, "grad_norm": 0.3343140482902527, "learning_rate": 1.0391423490185239e-05, "loss": 0.5162, "step": 46028 }, { "epoch": 0.9762041101991474, "grad_norm": 0.3745219111442566, "learning_rate": 1.0391090250219315e-05, "loss": 0.3918, "step": 46029 }, { "epoch": 0.9762253186570804, "grad_norm": 0.3929535150527954, "learning_rate": 1.0390757009818423e-05, "loss": 0.5064, "step": 46030 }, { "epoch": 0.9762465271150135, "grad_norm": 0.4379047751426697, "learning_rate": 1.0390423768982931e-05, "loss": 0.4686, "step": 46031 }, { "epoch": 0.9762677355729464, "grad_norm": 0.34952017664909363, "learning_rate": 1.0390090527713215e-05, "loss": 0.472, "step": 46032 }, { "epoch": 0.9762889440308795, "grad_norm": 0.4239184856414795, "learning_rate": 1.0389757286009644e-05, "loss": 0.5051, "step": 46033 }, { "epoch": 0.9763101524888126, "grad_norm": 0.3864634335041046, "learning_rate": 1.038942404387259e-05, "loss": 0.6227, "step": 46034 }, { "epoch": 0.9763313609467456, "grad_norm": 0.4778922200202942, "learning_rate": 1.0389090801302419e-05, "loss": 0.5529, "step": 46035 }, { "epoch": 0.9763525694046786, "grad_norm": 0.36683252453804016, "learning_rate": 1.0388757558299504e-05, "loss": 0.4945, "step": 46036 }, { "epoch": 0.9763737778626116, "grad_norm": 0.35294610261917114, "learning_rate": 1.0388424314864221e-05, "loss": 0.4755, "step": 46037 }, { "epoch": 0.9763949863205447, "grad_norm": 0.3963731825351715, "learning_rate": 1.0388091070996931e-05, "loss": 0.4799, "step": 46038 }, { "epoch": 0.9764161947784776, "grad_norm": 0.33741065859794617, "learning_rate": 1.038775782669801e-05, "loss": 0.4615, "step": 46039 }, { "epoch": 0.9764374032364107, "grad_norm": 0.6489496231079102, "learning_rate": 1.038742458196783e-05, "loss": 0.484, "step": 46040 }, { "epoch": 0.9764586116943437, "grad_norm": 0.4011225998401642, "learning_rate": 1.0387091336806759e-05, "loss": 0.501, "step": 46041 }, { "epoch": 0.9764798201522767, "grad_norm": 0.43368902802467346, "learning_rate": 1.038675809121517e-05, "loss": 0.4506, "step": 46042 }, { "epoch": 0.9765010286102097, "grad_norm": 0.35815688967704773, "learning_rate": 1.0386424845193432e-05, "loss": 0.466, "step": 46043 }, { "epoch": 0.9765222370681428, "grad_norm": 0.39472389221191406, "learning_rate": 1.0386091598741913e-05, "loss": 0.548, "step": 46044 }, { "epoch": 0.9765434455260757, "grad_norm": 0.34836992621421814, "learning_rate": 1.0385758351860986e-05, "loss": 0.4699, "step": 46045 }, { "epoch": 0.9765646539840088, "grad_norm": 0.35432925820350647, "learning_rate": 1.0385425104551024e-05, "loss": 0.4736, "step": 46046 }, { "epoch": 0.9765858624419419, "grad_norm": 0.39811423420906067, "learning_rate": 1.0385091856812395e-05, "loss": 0.4749, "step": 46047 }, { "epoch": 0.9766070708998749, "grad_norm": 0.35970568656921387, "learning_rate": 1.0384758608645471e-05, "loss": 0.472, "step": 46048 }, { "epoch": 0.9766282793578079, "grad_norm": 0.3081236779689789, "learning_rate": 1.0384425360050621e-05, "loss": 0.4221, "step": 46049 }, { "epoch": 0.9766494878157409, "grad_norm": 0.4178607165813446, "learning_rate": 1.0384092111028217e-05, "loss": 0.5066, "step": 46050 }, { "epoch": 0.976670696273674, "grad_norm": 0.3727557957172394, "learning_rate": 1.0383758861578626e-05, "loss": 0.481, "step": 46051 }, { "epoch": 0.9766919047316069, "grad_norm": 0.3495163321495056, "learning_rate": 1.0383425611702225e-05, "loss": 0.4856, "step": 46052 }, { "epoch": 0.97671311318954, "grad_norm": 0.4475172460079193, "learning_rate": 1.038309236139938e-05, "loss": 0.5589, "step": 46053 }, { "epoch": 0.976734321647473, "grad_norm": 0.3869887590408325, "learning_rate": 1.0382759110670465e-05, "loss": 0.5021, "step": 46054 }, { "epoch": 0.9767555301054061, "grad_norm": 0.33317801356315613, "learning_rate": 1.0382425859515845e-05, "loss": 0.4992, "step": 46055 }, { "epoch": 0.976776738563339, "grad_norm": 0.37147244811058044, "learning_rate": 1.0382092607935899e-05, "loss": 0.4897, "step": 46056 }, { "epoch": 0.9767979470212721, "grad_norm": 0.35306376218795776, "learning_rate": 1.0381759355930992e-05, "loss": 0.4971, "step": 46057 }, { "epoch": 0.9768191554792051, "grad_norm": 0.3956790268421173, "learning_rate": 1.0381426103501492e-05, "loss": 0.4861, "step": 46058 }, { "epoch": 0.9768403639371381, "grad_norm": 0.3404742181301117, "learning_rate": 1.0381092850647773e-05, "loss": 0.4888, "step": 46059 }, { "epoch": 0.9768615723950712, "grad_norm": 0.4033665060997009, "learning_rate": 1.0380759597370211e-05, "loss": 0.4753, "step": 46060 }, { "epoch": 0.9768827808530042, "grad_norm": 0.38152340054512024, "learning_rate": 1.0380426343669167e-05, "loss": 0.4863, "step": 46061 }, { "epoch": 0.9769039893109372, "grad_norm": 0.3723144829273224, "learning_rate": 1.0380093089545018e-05, "loss": 0.5047, "step": 46062 }, { "epoch": 0.9769251977688702, "grad_norm": 0.37922534346580505, "learning_rate": 1.0379759834998133e-05, "loss": 0.4229, "step": 46063 }, { "epoch": 0.9769464062268033, "grad_norm": 0.6268740892410278, "learning_rate": 1.0379426580028879e-05, "loss": 0.5926, "step": 46064 }, { "epoch": 0.9769676146847363, "grad_norm": 0.5101587772369385, "learning_rate": 1.0379093324637635e-05, "loss": 0.5297, "step": 46065 }, { "epoch": 0.9769888231426693, "grad_norm": 0.35163792967796326, "learning_rate": 1.0378760068824764e-05, "loss": 0.5078, "step": 46066 }, { "epoch": 0.9770100316006023, "grad_norm": 0.3638628423213959, "learning_rate": 1.0378426812590641e-05, "loss": 0.4631, "step": 46067 }, { "epoch": 0.9770312400585354, "grad_norm": 0.364485502243042, "learning_rate": 1.0378093555935634e-05, "loss": 0.4492, "step": 46068 }, { "epoch": 0.9770524485164683, "grad_norm": 0.36928582191467285, "learning_rate": 1.0377760298860113e-05, "loss": 0.4323, "step": 46069 }, { "epoch": 0.9770736569744014, "grad_norm": 0.3483916223049164, "learning_rate": 1.0377427041364455e-05, "loss": 0.4718, "step": 46070 }, { "epoch": 0.9770948654323344, "grad_norm": 0.3752644658088684, "learning_rate": 1.037709378344902e-05, "loss": 0.4778, "step": 46071 }, { "epoch": 0.9771160738902674, "grad_norm": 0.3403854966163635, "learning_rate": 1.0376760525114187e-05, "loss": 0.4507, "step": 46072 }, { "epoch": 0.9771372823482004, "grad_norm": 0.3973846733570099, "learning_rate": 1.0376427266360325e-05, "loss": 0.4504, "step": 46073 }, { "epoch": 0.9771584908061335, "grad_norm": 0.41655203700065613, "learning_rate": 1.0376094007187805e-05, "loss": 0.5667, "step": 46074 }, { "epoch": 0.9771796992640666, "grad_norm": 0.37889760732650757, "learning_rate": 1.0375760747596992e-05, "loss": 0.4629, "step": 46075 }, { "epoch": 0.9772009077219995, "grad_norm": 0.3982289135456085, "learning_rate": 1.0375427487588266e-05, "loss": 0.4333, "step": 46076 }, { "epoch": 0.9772221161799326, "grad_norm": 0.42661336064338684, "learning_rate": 1.037509422716199e-05, "loss": 0.4115, "step": 46077 }, { "epoch": 0.9772433246378656, "grad_norm": 0.3749542534351349, "learning_rate": 1.0374760966318537e-05, "loss": 0.5316, "step": 46078 }, { "epoch": 0.9772645330957986, "grad_norm": 0.3371943235397339, "learning_rate": 1.0374427705058281e-05, "loss": 0.5272, "step": 46079 }, { "epoch": 0.9772857415537316, "grad_norm": 0.37449708580970764, "learning_rate": 1.037409444338159e-05, "loss": 0.5143, "step": 46080 }, { "epoch": 0.9773069500116647, "grad_norm": 0.3853972256183624, "learning_rate": 1.037376118128883e-05, "loss": 0.4587, "step": 46081 }, { "epoch": 0.9773281584695976, "grad_norm": 0.33473101258277893, "learning_rate": 1.037342791878038e-05, "loss": 0.4294, "step": 46082 }, { "epoch": 0.9773493669275307, "grad_norm": 0.3352241814136505, "learning_rate": 1.0373094655856606e-05, "loss": 0.3828, "step": 46083 }, { "epoch": 0.9773705753854637, "grad_norm": 0.35226014256477356, "learning_rate": 1.0372761392517877e-05, "loss": 0.4571, "step": 46084 }, { "epoch": 0.9773917838433968, "grad_norm": 0.34600019454956055, "learning_rate": 1.0372428128764569e-05, "loss": 0.4531, "step": 46085 }, { "epoch": 0.9774129923013297, "grad_norm": 0.39551547169685364, "learning_rate": 1.0372094864597047e-05, "loss": 0.5136, "step": 46086 }, { "epoch": 0.9774342007592628, "grad_norm": 0.3980952203273773, "learning_rate": 1.0371761600015686e-05, "loss": 0.5205, "step": 46087 }, { "epoch": 0.9774554092171959, "grad_norm": 0.36272263526916504, "learning_rate": 1.0371428335020857e-05, "loss": 0.4613, "step": 46088 }, { "epoch": 0.9774766176751288, "grad_norm": 0.4331236481666565, "learning_rate": 1.0371095069612924e-05, "loss": 0.5173, "step": 46089 }, { "epoch": 0.9774978261330619, "grad_norm": 0.35897547006607056, "learning_rate": 1.0370761803792265e-05, "loss": 0.4787, "step": 46090 }, { "epoch": 0.9775190345909949, "grad_norm": 0.5256219506263733, "learning_rate": 1.0370428537559248e-05, "loss": 0.4908, "step": 46091 }, { "epoch": 0.977540243048928, "grad_norm": 0.3576202988624573, "learning_rate": 1.0370095270914243e-05, "loss": 0.4822, "step": 46092 }, { "epoch": 0.9775614515068609, "grad_norm": 0.36335596442222595, "learning_rate": 1.0369762003857621e-05, "loss": 0.4648, "step": 46093 }, { "epoch": 0.977582659964794, "grad_norm": 0.35236233472824097, "learning_rate": 1.0369428736389756e-05, "loss": 0.4417, "step": 46094 }, { "epoch": 0.977603868422727, "grad_norm": 0.36148956418037415, "learning_rate": 1.036909546851101e-05, "loss": 0.5338, "step": 46095 }, { "epoch": 0.97762507688066, "grad_norm": 0.42011505365371704, "learning_rate": 1.0368762200221762e-05, "loss": 0.5488, "step": 46096 }, { "epoch": 0.977646285338593, "grad_norm": 0.36924105882644653, "learning_rate": 1.036842893152238e-05, "loss": 0.4146, "step": 46097 }, { "epoch": 0.9776674937965261, "grad_norm": 0.35656455159187317, "learning_rate": 1.0368095662413236e-05, "loss": 0.4645, "step": 46098 }, { "epoch": 0.977688702254459, "grad_norm": 0.3351677656173706, "learning_rate": 1.0367762392894698e-05, "loss": 0.463, "step": 46099 }, { "epoch": 0.9777099107123921, "grad_norm": 0.3557191789150238, "learning_rate": 1.0367429122967137e-05, "loss": 0.4555, "step": 46100 }, { "epoch": 0.9777311191703252, "grad_norm": 0.36366334557533264, "learning_rate": 1.0367095852630926e-05, "loss": 0.5085, "step": 46101 }, { "epoch": 0.9777523276282581, "grad_norm": 0.38494741916656494, "learning_rate": 1.0366762581886434e-05, "loss": 0.507, "step": 46102 }, { "epoch": 0.9777735360861912, "grad_norm": 0.3348453640937805, "learning_rate": 1.036642931073403e-05, "loss": 0.4617, "step": 46103 }, { "epoch": 0.9777947445441242, "grad_norm": 0.4068642556667328, "learning_rate": 1.0366096039174088e-05, "loss": 0.4621, "step": 46104 }, { "epoch": 0.9778159530020573, "grad_norm": 0.3556106686592102, "learning_rate": 1.0365762767206978e-05, "loss": 0.4908, "step": 46105 }, { "epoch": 0.9778371614599902, "grad_norm": 0.3984578251838684, "learning_rate": 1.0365429494833067e-05, "loss": 0.4999, "step": 46106 }, { "epoch": 0.9778583699179233, "grad_norm": 0.40867072343826294, "learning_rate": 1.036509622205273e-05, "loss": 0.4539, "step": 46107 }, { "epoch": 0.9778795783758563, "grad_norm": 0.39397573471069336, "learning_rate": 1.0364762948866339e-05, "loss": 0.476, "step": 46108 }, { "epoch": 0.9779007868337893, "grad_norm": 0.5072268843650818, "learning_rate": 1.0364429675274255e-05, "loss": 0.4154, "step": 46109 }, { "epoch": 0.9779219952917223, "grad_norm": 0.3544858694076538, "learning_rate": 1.036409640127686e-05, "loss": 0.4579, "step": 46110 }, { "epoch": 0.9779432037496554, "grad_norm": 0.35967493057250977, "learning_rate": 1.0363763126874522e-05, "loss": 0.475, "step": 46111 }, { "epoch": 0.9779644122075883, "grad_norm": 0.35470709204673767, "learning_rate": 1.0363429852067605e-05, "loss": 0.463, "step": 46112 }, { "epoch": 0.9779856206655214, "grad_norm": 0.36668846011161804, "learning_rate": 1.0363096576856488e-05, "loss": 0.5122, "step": 46113 }, { "epoch": 0.9780068291234544, "grad_norm": 0.36664658784866333, "learning_rate": 1.0362763301241539e-05, "loss": 0.4573, "step": 46114 }, { "epoch": 0.9780280375813875, "grad_norm": 0.3556097149848938, "learning_rate": 1.0362430025223123e-05, "loss": 0.4916, "step": 46115 }, { "epoch": 0.9780492460393205, "grad_norm": 0.39678987860679626, "learning_rate": 1.036209674880162e-05, "loss": 0.522, "step": 46116 }, { "epoch": 0.9780704544972535, "grad_norm": 0.39399805665016174, "learning_rate": 1.0361763471977395e-05, "loss": 0.4234, "step": 46117 }, { "epoch": 0.9780916629551866, "grad_norm": 0.38866904377937317, "learning_rate": 1.0361430194750819e-05, "loss": 0.4813, "step": 46118 }, { "epoch": 0.9781128714131195, "grad_norm": 0.4356306791305542, "learning_rate": 1.0361096917122264e-05, "loss": 0.4268, "step": 46119 }, { "epoch": 0.9781340798710526, "grad_norm": 0.35579296946525574, "learning_rate": 1.03607636390921e-05, "loss": 0.452, "step": 46120 }, { "epoch": 0.9781552883289856, "grad_norm": 0.3586382269859314, "learning_rate": 1.0360430360660698e-05, "loss": 0.4884, "step": 46121 }, { "epoch": 0.9781764967869186, "grad_norm": 0.4450278878211975, "learning_rate": 1.0360097081828429e-05, "loss": 0.5442, "step": 46122 }, { "epoch": 0.9781977052448516, "grad_norm": 0.4275636374950409, "learning_rate": 1.0359763802595662e-05, "loss": 0.4869, "step": 46123 }, { "epoch": 0.9782189137027847, "grad_norm": 0.3133595585823059, "learning_rate": 1.035943052296277e-05, "loss": 0.4136, "step": 46124 }, { "epoch": 0.9782401221607177, "grad_norm": 0.3527851700782776, "learning_rate": 1.0359097242930122e-05, "loss": 0.3964, "step": 46125 }, { "epoch": 0.9782613306186507, "grad_norm": 0.3639349341392517, "learning_rate": 1.0358763962498089e-05, "loss": 0.5264, "step": 46126 }, { "epoch": 0.9782825390765837, "grad_norm": 0.34559693932533264, "learning_rate": 1.0358430681667041e-05, "loss": 0.4663, "step": 46127 }, { "epoch": 0.9783037475345168, "grad_norm": 0.3582374155521393, "learning_rate": 1.035809740043735e-05, "loss": 0.5331, "step": 46128 }, { "epoch": 0.9783249559924498, "grad_norm": 0.3748811185359955, "learning_rate": 1.0357764118809386e-05, "loss": 0.4628, "step": 46129 }, { "epoch": 0.9783461644503828, "grad_norm": 0.3749735653400421, "learning_rate": 1.0357430836783522e-05, "loss": 0.4901, "step": 46130 }, { "epoch": 0.9783673729083159, "grad_norm": 0.4019637703895569, "learning_rate": 1.0357097554360124e-05, "loss": 0.4789, "step": 46131 }, { "epoch": 0.9783885813662488, "grad_norm": 0.40476328134536743, "learning_rate": 1.0356764271539566e-05, "loss": 0.4457, "step": 46132 }, { "epoch": 0.9784097898241819, "grad_norm": 0.38894370198249817, "learning_rate": 1.0356430988322219e-05, "loss": 0.5475, "step": 46133 }, { "epoch": 0.9784309982821149, "grad_norm": 0.35562625527381897, "learning_rate": 1.035609770470845e-05, "loss": 0.4585, "step": 46134 }, { "epoch": 0.978452206740048, "grad_norm": 0.35086700320243835, "learning_rate": 1.0355764420698634e-05, "loss": 0.4881, "step": 46135 }, { "epoch": 0.9784734151979809, "grad_norm": 0.3541291654109955, "learning_rate": 1.035543113629314e-05, "loss": 0.5465, "step": 46136 }, { "epoch": 0.978494623655914, "grad_norm": 0.45486173033714294, "learning_rate": 1.0355097851492337e-05, "loss": 0.4602, "step": 46137 }, { "epoch": 0.978515832113847, "grad_norm": 0.3631988763809204, "learning_rate": 1.03547645662966e-05, "loss": 0.4936, "step": 46138 }, { "epoch": 0.97853704057178, "grad_norm": 0.3605898916721344, "learning_rate": 1.0354431280706297e-05, "loss": 0.4821, "step": 46139 }, { "epoch": 0.978558249029713, "grad_norm": 0.3814682364463806, "learning_rate": 1.0354097994721795e-05, "loss": 0.4971, "step": 46140 }, { "epoch": 0.9785794574876461, "grad_norm": 0.3492223024368286, "learning_rate": 1.035376470834347e-05, "loss": 0.493, "step": 46141 }, { "epoch": 0.9786006659455792, "grad_norm": 0.34372812509536743, "learning_rate": 1.035343142157169e-05, "loss": 0.49, "step": 46142 }, { "epoch": 0.9786218744035121, "grad_norm": 0.3666321933269501, "learning_rate": 1.035309813440683e-05, "loss": 0.5096, "step": 46143 }, { "epoch": 0.9786430828614452, "grad_norm": 0.40085744857788086, "learning_rate": 1.0352764846849255e-05, "loss": 0.4467, "step": 46144 }, { "epoch": 0.9786642913193782, "grad_norm": 0.33000752329826355, "learning_rate": 1.0352431558899341e-05, "loss": 0.4297, "step": 46145 }, { "epoch": 0.9786854997773112, "grad_norm": 0.3771061301231384, "learning_rate": 1.0352098270557451e-05, "loss": 0.5444, "step": 46146 }, { "epoch": 0.9787067082352442, "grad_norm": 0.3886314332485199, "learning_rate": 1.0351764981823961e-05, "loss": 0.4731, "step": 46147 }, { "epoch": 0.9787279166931773, "grad_norm": 0.36070263385772705, "learning_rate": 1.0351431692699243e-05, "loss": 0.5115, "step": 46148 }, { "epoch": 0.9787491251511102, "grad_norm": 0.4923773407936096, "learning_rate": 1.0351098403183664e-05, "loss": 0.4858, "step": 46149 }, { "epoch": 0.9787703336090433, "grad_norm": 0.4625309407711029, "learning_rate": 1.0350765113277599e-05, "loss": 0.5433, "step": 46150 }, { "epoch": 0.9787915420669763, "grad_norm": 0.35519546270370483, "learning_rate": 1.0350431822981413e-05, "loss": 0.5025, "step": 46151 }, { "epoch": 0.9788127505249093, "grad_norm": 0.4226395785808563, "learning_rate": 1.0350098532295483e-05, "loss": 0.4826, "step": 46152 }, { "epoch": 0.9788339589828423, "grad_norm": 0.39582234621047974, "learning_rate": 1.0349765241220175e-05, "loss": 0.6002, "step": 46153 }, { "epoch": 0.9788551674407754, "grad_norm": 0.3955865502357483, "learning_rate": 1.0349431949755858e-05, "loss": 0.5242, "step": 46154 }, { "epoch": 0.9788763758987084, "grad_norm": 0.39079758524894714, "learning_rate": 1.034909865790291e-05, "loss": 0.3946, "step": 46155 }, { "epoch": 0.9788975843566414, "grad_norm": 0.36454570293426514, "learning_rate": 1.0348765365661697e-05, "loss": 0.4187, "step": 46156 }, { "epoch": 0.9789187928145745, "grad_norm": 0.35131463408470154, "learning_rate": 1.0348432073032588e-05, "loss": 0.5147, "step": 46157 }, { "epoch": 0.9789400012725075, "grad_norm": 0.4125080704689026, "learning_rate": 1.0348098780015958e-05, "loss": 0.4069, "step": 46158 }, { "epoch": 0.9789612097304405, "grad_norm": 0.41055989265441895, "learning_rate": 1.0347765486612178e-05, "loss": 0.3989, "step": 46159 }, { "epoch": 0.9789824181883735, "grad_norm": 0.3815957307815552, "learning_rate": 1.0347432192821609e-05, "loss": 0.4831, "step": 46160 }, { "epoch": 0.9790036266463066, "grad_norm": 0.38506487011909485, "learning_rate": 1.0347098898644634e-05, "loss": 0.5422, "step": 46161 }, { "epoch": 0.9790248351042395, "grad_norm": 0.4151388108730316, "learning_rate": 1.034676560408162e-05, "loss": 0.4933, "step": 46162 }, { "epoch": 0.9790460435621726, "grad_norm": 0.6011890769004822, "learning_rate": 1.0346432309132932e-05, "loss": 0.4704, "step": 46163 }, { "epoch": 0.9790672520201056, "grad_norm": 0.38102224469184875, "learning_rate": 1.0346099013798947e-05, "loss": 0.4869, "step": 46164 }, { "epoch": 0.9790884604780387, "grad_norm": 0.341309130191803, "learning_rate": 1.0345765718080034e-05, "loss": 0.4039, "step": 46165 }, { "epoch": 0.9791096689359716, "grad_norm": 0.4208819270133972, "learning_rate": 1.0345432421976564e-05, "loss": 0.5041, "step": 46166 }, { "epoch": 0.9791308773939047, "grad_norm": 0.3427956998348236, "learning_rate": 1.0345099125488904e-05, "loss": 0.4819, "step": 46167 }, { "epoch": 0.9791520858518377, "grad_norm": 0.43234601616859436, "learning_rate": 1.0344765828617428e-05, "loss": 0.4703, "step": 46168 }, { "epoch": 0.9791732943097707, "grad_norm": 0.3772837519645691, "learning_rate": 1.0344432531362512e-05, "loss": 0.5534, "step": 46169 }, { "epoch": 0.9791945027677038, "grad_norm": 0.3701937794685364, "learning_rate": 1.0344099233724519e-05, "loss": 0.4117, "step": 46170 }, { "epoch": 0.9792157112256368, "grad_norm": 0.38180306553840637, "learning_rate": 1.0343765935703817e-05, "loss": 0.5076, "step": 46171 }, { "epoch": 0.9792369196835699, "grad_norm": 0.4405786395072937, "learning_rate": 1.0343432637300785e-05, "loss": 0.4885, "step": 46172 }, { "epoch": 0.9792581281415028, "grad_norm": 0.3661297857761383, "learning_rate": 1.0343099338515789e-05, "loss": 0.5392, "step": 46173 }, { "epoch": 0.9792793365994359, "grad_norm": 0.39633864164352417, "learning_rate": 1.03427660393492e-05, "loss": 0.4793, "step": 46174 }, { "epoch": 0.9793005450573689, "grad_norm": 0.3522703945636749, "learning_rate": 1.0342432739801395e-05, "loss": 0.4457, "step": 46175 }, { "epoch": 0.9793217535153019, "grad_norm": 0.536468505859375, "learning_rate": 1.0342099439872737e-05, "loss": 0.471, "step": 46176 }, { "epoch": 0.9793429619732349, "grad_norm": 0.42168310284614563, "learning_rate": 1.0341766139563596e-05, "loss": 0.4586, "step": 46177 }, { "epoch": 0.979364170431168, "grad_norm": 0.35011526942253113, "learning_rate": 1.0341432838874348e-05, "loss": 0.4615, "step": 46178 }, { "epoch": 0.9793853788891009, "grad_norm": 0.536981999874115, "learning_rate": 1.034109953780536e-05, "loss": 0.5227, "step": 46179 }, { "epoch": 0.979406587347034, "grad_norm": 0.3832903802394867, "learning_rate": 1.0340766236357005e-05, "loss": 0.5206, "step": 46180 }, { "epoch": 0.979427795804967, "grad_norm": 0.37198376655578613, "learning_rate": 1.0340432934529651e-05, "loss": 0.445, "step": 46181 }, { "epoch": 0.9794490042629, "grad_norm": 0.3590499758720398, "learning_rate": 1.0340099632323673e-05, "loss": 0.4582, "step": 46182 }, { "epoch": 0.9794702127208331, "grad_norm": 0.3374832570552826, "learning_rate": 1.0339766329739438e-05, "loss": 0.4156, "step": 46183 }, { "epoch": 0.9794914211787661, "grad_norm": 0.4114445447921753, "learning_rate": 1.0339433026777319e-05, "loss": 0.5724, "step": 46184 }, { "epoch": 0.9795126296366992, "grad_norm": 0.4002883732318878, "learning_rate": 1.0339099723437684e-05, "loss": 0.4716, "step": 46185 }, { "epoch": 0.9795338380946321, "grad_norm": 0.4205411374568939, "learning_rate": 1.0338766419720904e-05, "loss": 0.4783, "step": 46186 }, { "epoch": 0.9795550465525652, "grad_norm": 0.33330419659614563, "learning_rate": 1.0338433115627356e-05, "loss": 0.4778, "step": 46187 }, { "epoch": 0.9795762550104982, "grad_norm": 0.3494359850883484, "learning_rate": 1.0338099811157401e-05, "loss": 0.4483, "step": 46188 }, { "epoch": 0.9795974634684312, "grad_norm": 0.4084821045398712, "learning_rate": 1.0337766506311415e-05, "loss": 0.4757, "step": 46189 }, { "epoch": 0.9796186719263642, "grad_norm": 0.3633921444416046, "learning_rate": 1.0337433201089771e-05, "loss": 0.4652, "step": 46190 }, { "epoch": 0.9796398803842973, "grad_norm": 0.44362080097198486, "learning_rate": 1.0337099895492832e-05, "loss": 0.5155, "step": 46191 }, { "epoch": 0.9796610888422302, "grad_norm": 0.38732096552848816, "learning_rate": 1.0336766589520975e-05, "loss": 0.4801, "step": 46192 }, { "epoch": 0.9796822973001633, "grad_norm": 0.33675456047058105, "learning_rate": 1.0336433283174571e-05, "loss": 0.4217, "step": 46193 }, { "epoch": 0.9797035057580963, "grad_norm": 0.3392557203769684, "learning_rate": 1.0336099976453986e-05, "loss": 0.3805, "step": 46194 }, { "epoch": 0.9797247142160294, "grad_norm": 0.3814685344696045, "learning_rate": 1.0335766669359596e-05, "loss": 0.4679, "step": 46195 }, { "epoch": 0.9797459226739623, "grad_norm": 0.4503001868724823, "learning_rate": 1.0335433361891769e-05, "loss": 0.5218, "step": 46196 }, { "epoch": 0.9797671311318954, "grad_norm": 0.36698994040489197, "learning_rate": 1.0335100054050877e-05, "loss": 0.4709, "step": 46197 }, { "epoch": 0.9797883395898285, "grad_norm": 0.38241106271743774, "learning_rate": 1.0334766745837288e-05, "loss": 0.5085, "step": 46198 }, { "epoch": 0.9798095480477614, "grad_norm": 0.3956921100616455, "learning_rate": 1.0334433437251373e-05, "loss": 0.4698, "step": 46199 }, { "epoch": 0.9798307565056945, "grad_norm": 0.3830665647983551, "learning_rate": 1.0334100128293506e-05, "loss": 0.4911, "step": 46200 }, { "epoch": 0.9798519649636275, "grad_norm": 0.36057889461517334, "learning_rate": 1.0333766818964055e-05, "loss": 0.4485, "step": 46201 }, { "epoch": 0.9798731734215606, "grad_norm": 0.3712051212787628, "learning_rate": 1.0333433509263391e-05, "loss": 0.5115, "step": 46202 }, { "epoch": 0.9798943818794935, "grad_norm": 0.39793193340301514, "learning_rate": 1.0333100199191887e-05, "loss": 0.5401, "step": 46203 }, { "epoch": 0.9799155903374266, "grad_norm": 0.3836231529712677, "learning_rate": 1.0332766888749912e-05, "loss": 0.3982, "step": 46204 }, { "epoch": 0.9799367987953596, "grad_norm": 0.35166022181510925, "learning_rate": 1.0332433577937832e-05, "loss": 0.4684, "step": 46205 }, { "epoch": 0.9799580072532926, "grad_norm": 0.40491557121276855, "learning_rate": 1.0332100266756027e-05, "loss": 0.5021, "step": 46206 }, { "epoch": 0.9799792157112256, "grad_norm": 0.37036383152008057, "learning_rate": 1.0331766955204862e-05, "loss": 0.5333, "step": 46207 }, { "epoch": 0.9800004241691587, "grad_norm": 0.3819015324115753, "learning_rate": 1.0331433643284707e-05, "loss": 0.4835, "step": 46208 }, { "epoch": 0.9800216326270916, "grad_norm": 0.3718489408493042, "learning_rate": 1.0331100330995936e-05, "loss": 0.4433, "step": 46209 }, { "epoch": 0.9800428410850247, "grad_norm": 0.5331295132637024, "learning_rate": 1.0330767018338916e-05, "loss": 0.504, "step": 46210 }, { "epoch": 0.9800640495429578, "grad_norm": 0.3743153512477875, "learning_rate": 1.0330433705314022e-05, "loss": 0.5051, "step": 46211 }, { "epoch": 0.9800852580008907, "grad_norm": 0.37056371569633484, "learning_rate": 1.0330100391921622e-05, "loss": 0.5145, "step": 46212 }, { "epoch": 0.9801064664588238, "grad_norm": 0.3946717083454132, "learning_rate": 1.0329767078162085e-05, "loss": 0.4848, "step": 46213 }, { "epoch": 0.9801276749167568, "grad_norm": 0.4069919288158417, "learning_rate": 1.0329433764035787e-05, "loss": 0.5239, "step": 46214 }, { "epoch": 0.9801488833746899, "grad_norm": 0.3886406719684601, "learning_rate": 1.0329100449543097e-05, "loss": 0.5171, "step": 46215 }, { "epoch": 0.9801700918326228, "grad_norm": 0.38485807180404663, "learning_rate": 1.0328767134684381e-05, "loss": 0.5085, "step": 46216 }, { "epoch": 0.9801913002905559, "grad_norm": 0.36867693066596985, "learning_rate": 1.0328433819460015e-05, "loss": 0.4826, "step": 46217 }, { "epoch": 0.9802125087484889, "grad_norm": 0.4186994731426239, "learning_rate": 1.0328100503870366e-05, "loss": 0.5243, "step": 46218 }, { "epoch": 0.9802337172064219, "grad_norm": 0.37223130464553833, "learning_rate": 1.0327767187915805e-05, "loss": 0.4849, "step": 46219 }, { "epoch": 0.9802549256643549, "grad_norm": 0.4449302554130554, "learning_rate": 1.0327433871596709e-05, "loss": 0.4327, "step": 46220 }, { "epoch": 0.980276134122288, "grad_norm": 0.3668445348739624, "learning_rate": 1.032710055491344e-05, "loss": 0.4164, "step": 46221 }, { "epoch": 0.9802973425802209, "grad_norm": 0.36705151200294495, "learning_rate": 1.0326767237866375e-05, "loss": 0.4527, "step": 46222 }, { "epoch": 0.980318551038154, "grad_norm": 0.384768545627594, "learning_rate": 1.0326433920455881e-05, "loss": 0.4882, "step": 46223 }, { "epoch": 0.9803397594960871, "grad_norm": 0.3579159379005432, "learning_rate": 1.032610060268233e-05, "loss": 0.4871, "step": 46224 }, { "epoch": 0.9803609679540201, "grad_norm": 0.31526458263397217, "learning_rate": 1.032576728454609e-05, "loss": 0.4137, "step": 46225 }, { "epoch": 0.9803821764119531, "grad_norm": 1.2439887523651123, "learning_rate": 1.032543396604754e-05, "loss": 0.5227, "step": 46226 }, { "epoch": 0.9804033848698861, "grad_norm": 0.3531719446182251, "learning_rate": 1.032510064718704e-05, "loss": 0.5108, "step": 46227 }, { "epoch": 0.9804245933278192, "grad_norm": 0.3685021996498108, "learning_rate": 1.032476732796497e-05, "loss": 0.5357, "step": 46228 }, { "epoch": 0.9804458017857521, "grad_norm": 0.396637886762619, "learning_rate": 1.0324434008381698e-05, "loss": 0.5233, "step": 46229 }, { "epoch": 0.9804670102436852, "grad_norm": 0.33082082867622375, "learning_rate": 1.0324100688437588e-05, "loss": 0.4719, "step": 46230 }, { "epoch": 0.9804882187016182, "grad_norm": 0.3300088346004486, "learning_rate": 1.0323767368133017e-05, "loss": 0.4541, "step": 46231 }, { "epoch": 0.9805094271595513, "grad_norm": 0.3744507133960724, "learning_rate": 1.032343404746836e-05, "loss": 0.455, "step": 46232 }, { "epoch": 0.9805306356174842, "grad_norm": 0.34143713116645813, "learning_rate": 1.0323100726443975e-05, "loss": 0.5136, "step": 46233 }, { "epoch": 0.9805518440754173, "grad_norm": 0.36092206835746765, "learning_rate": 1.0322767405060246e-05, "loss": 0.4902, "step": 46234 }, { "epoch": 0.9805730525333503, "grad_norm": 0.3814673125743866, "learning_rate": 1.0322434083317536e-05, "loss": 0.4718, "step": 46235 }, { "epoch": 0.9805942609912833, "grad_norm": 0.38237136602401733, "learning_rate": 1.0322100761216217e-05, "loss": 0.5432, "step": 46236 }, { "epoch": 0.9806154694492163, "grad_norm": 0.4566880166530609, "learning_rate": 1.032176743875666e-05, "loss": 0.5046, "step": 46237 }, { "epoch": 0.9806366779071494, "grad_norm": 0.3459200859069824, "learning_rate": 1.0321434115939238e-05, "loss": 0.5159, "step": 46238 }, { "epoch": 0.9806578863650824, "grad_norm": 0.3661191761493683, "learning_rate": 1.0321100792764316e-05, "loss": 0.5582, "step": 46239 }, { "epoch": 0.9806790948230154, "grad_norm": 0.3487602770328522, "learning_rate": 1.0320767469232274e-05, "loss": 0.4837, "step": 46240 }, { "epoch": 0.9807003032809485, "grad_norm": 0.35408520698547363, "learning_rate": 1.0320434145343473e-05, "loss": 0.4169, "step": 46241 }, { "epoch": 0.9807215117388814, "grad_norm": 0.3622856140136719, "learning_rate": 1.0320100821098289e-05, "loss": 0.5328, "step": 46242 }, { "epoch": 0.9807427201968145, "grad_norm": 0.3981734812259674, "learning_rate": 1.0319767496497094e-05, "loss": 0.4846, "step": 46243 }, { "epoch": 0.9807639286547475, "grad_norm": 0.3337497413158417, "learning_rate": 1.0319434171540253e-05, "loss": 0.4716, "step": 46244 }, { "epoch": 0.9807851371126806, "grad_norm": 0.35162612795829773, "learning_rate": 1.0319100846228138e-05, "loss": 0.4465, "step": 46245 }, { "epoch": 0.9808063455706135, "grad_norm": 0.3706214427947998, "learning_rate": 1.0318767520561128e-05, "loss": 0.4441, "step": 46246 }, { "epoch": 0.9808275540285466, "grad_norm": 0.32382774353027344, "learning_rate": 1.0318434194539584e-05, "loss": 0.4113, "step": 46247 }, { "epoch": 0.9808487624864796, "grad_norm": 0.3876940608024597, "learning_rate": 1.031810086816388e-05, "loss": 0.3951, "step": 46248 }, { "epoch": 0.9808699709444126, "grad_norm": 0.36043721437454224, "learning_rate": 1.031776754143439e-05, "loss": 0.4071, "step": 46249 }, { "epoch": 0.9808911794023456, "grad_norm": 0.5270776152610779, "learning_rate": 1.0317434214351476e-05, "loss": 0.4724, "step": 46250 }, { "epoch": 0.9809123878602787, "grad_norm": 0.4467264413833618, "learning_rate": 1.0317100886915519e-05, "loss": 0.4465, "step": 46251 }, { "epoch": 0.9809335963182118, "grad_norm": 0.39157989621162415, "learning_rate": 1.0316767559126886e-05, "loss": 0.4606, "step": 46252 }, { "epoch": 0.9809548047761447, "grad_norm": 0.42126181721687317, "learning_rate": 1.0316434230985944e-05, "loss": 0.5414, "step": 46253 }, { "epoch": 0.9809760132340778, "grad_norm": 0.5115448832511902, "learning_rate": 1.0316100902493068e-05, "loss": 0.537, "step": 46254 }, { "epoch": 0.9809972216920108, "grad_norm": 0.3921564519405365, "learning_rate": 1.0315767573648625e-05, "loss": 0.4735, "step": 46255 }, { "epoch": 0.9810184301499438, "grad_norm": 0.3881996273994446, "learning_rate": 1.0315434244452991e-05, "loss": 0.528, "step": 46256 }, { "epoch": 0.9810396386078768, "grad_norm": 0.41444915533065796, "learning_rate": 1.0315100914906533e-05, "loss": 0.4522, "step": 46257 }, { "epoch": 0.9810608470658099, "grad_norm": 0.3990359604358673, "learning_rate": 1.0314767585009622e-05, "loss": 0.4694, "step": 46258 }, { "epoch": 0.9810820555237428, "grad_norm": 0.3740050196647644, "learning_rate": 1.0314434254762628e-05, "loss": 0.4426, "step": 46259 }, { "epoch": 0.9811032639816759, "grad_norm": 0.40549877285957336, "learning_rate": 1.0314100924165926e-05, "loss": 0.5408, "step": 46260 }, { "epoch": 0.9811244724396089, "grad_norm": 0.36589229106903076, "learning_rate": 1.0313767593219881e-05, "loss": 0.5074, "step": 46261 }, { "epoch": 0.981145680897542, "grad_norm": 0.4756612777709961, "learning_rate": 1.031343426192487e-05, "loss": 0.4803, "step": 46262 }, { "epoch": 0.9811668893554749, "grad_norm": 0.409544974565506, "learning_rate": 1.0313100930281254e-05, "loss": 0.5547, "step": 46263 }, { "epoch": 0.981188097813408, "grad_norm": 0.3699615001678467, "learning_rate": 1.0312767598289412e-05, "loss": 0.5598, "step": 46264 }, { "epoch": 0.9812093062713411, "grad_norm": 0.4050522446632385, "learning_rate": 1.0312434265949719e-05, "loss": 0.4763, "step": 46265 }, { "epoch": 0.981230514729274, "grad_norm": 0.34878167510032654, "learning_rate": 1.0312100933262535e-05, "loss": 0.4172, "step": 46266 }, { "epoch": 0.9812517231872071, "grad_norm": 0.3867983818054199, "learning_rate": 1.031176760022823e-05, "loss": 0.5126, "step": 46267 }, { "epoch": 0.9812729316451401, "grad_norm": 0.3792319595813751, "learning_rate": 1.0311434266847187e-05, "loss": 0.4958, "step": 46268 }, { "epoch": 0.9812941401030731, "grad_norm": 0.36201679706573486, "learning_rate": 1.0311100933119767e-05, "loss": 0.4618, "step": 46269 }, { "epoch": 0.9813153485610061, "grad_norm": 0.3785441517829895, "learning_rate": 1.0310767599046342e-05, "loss": 0.5073, "step": 46270 }, { "epoch": 0.9813365570189392, "grad_norm": 0.3650539517402649, "learning_rate": 1.0310434264627284e-05, "loss": 0.4976, "step": 46271 }, { "epoch": 0.9813577654768721, "grad_norm": 0.36823025345802307, "learning_rate": 1.0310100929862967e-05, "loss": 0.4962, "step": 46272 }, { "epoch": 0.9813789739348052, "grad_norm": 0.36697766184806824, "learning_rate": 1.0309767594753754e-05, "loss": 0.5445, "step": 46273 }, { "epoch": 0.9814001823927382, "grad_norm": 0.38172677159309387, "learning_rate": 1.0309434259300024e-05, "loss": 0.4669, "step": 46274 }, { "epoch": 0.9814213908506713, "grad_norm": 0.38177159428596497, "learning_rate": 1.030910092350214e-05, "loss": 0.4692, "step": 46275 }, { "epoch": 0.9814425993086042, "grad_norm": 0.34413862228393555, "learning_rate": 1.0308767587360477e-05, "loss": 0.4513, "step": 46276 }, { "epoch": 0.9814638077665373, "grad_norm": 0.4579344093799591, "learning_rate": 1.030843425087541e-05, "loss": 0.665, "step": 46277 }, { "epoch": 0.9814850162244703, "grad_norm": 0.38937070965766907, "learning_rate": 1.0308100914047301e-05, "loss": 0.4316, "step": 46278 }, { "epoch": 0.9815062246824033, "grad_norm": 0.38962388038635254, "learning_rate": 1.0307767576876527e-05, "loss": 0.4549, "step": 46279 }, { "epoch": 0.9815274331403364, "grad_norm": 0.3509170114994049, "learning_rate": 1.0307434239363456e-05, "loss": 0.4193, "step": 46280 }, { "epoch": 0.9815486415982694, "grad_norm": 0.3560301661491394, "learning_rate": 1.030710090150846e-05, "loss": 0.4507, "step": 46281 }, { "epoch": 0.9815698500562025, "grad_norm": 0.39116790890693665, "learning_rate": 1.0306767563311905e-05, "loss": 0.511, "step": 46282 }, { "epoch": 0.9815910585141354, "grad_norm": 0.398072212934494, "learning_rate": 1.0306434224774172e-05, "loss": 0.4934, "step": 46283 }, { "epoch": 0.9816122669720685, "grad_norm": 0.3443906605243683, "learning_rate": 1.030610088589562e-05, "loss": 0.4986, "step": 46284 }, { "epoch": 0.9816334754300015, "grad_norm": 0.3513968288898468, "learning_rate": 1.0305767546676628e-05, "loss": 0.4763, "step": 46285 }, { "epoch": 0.9816546838879345, "grad_norm": 0.4514511227607727, "learning_rate": 1.0305434207117566e-05, "loss": 0.465, "step": 46286 }, { "epoch": 0.9816758923458675, "grad_norm": 0.4071752727031708, "learning_rate": 1.0305100867218798e-05, "loss": 0.4625, "step": 46287 }, { "epoch": 0.9816971008038006, "grad_norm": 0.38898277282714844, "learning_rate": 1.0304767526980704e-05, "loss": 0.4933, "step": 46288 }, { "epoch": 0.9817183092617335, "grad_norm": 0.3885554373264313, "learning_rate": 1.0304434186403646e-05, "loss": 0.5341, "step": 46289 }, { "epoch": 0.9817395177196666, "grad_norm": 0.38725465536117554, "learning_rate": 1.0304100845488002e-05, "loss": 0.4853, "step": 46290 }, { "epoch": 0.9817607261775996, "grad_norm": 0.33491915464401245, "learning_rate": 1.0303767504234139e-05, "loss": 0.4724, "step": 46291 }, { "epoch": 0.9817819346355326, "grad_norm": 0.3970906436443329, "learning_rate": 1.0303434162642425e-05, "loss": 0.5524, "step": 46292 }, { "epoch": 0.9818031430934657, "grad_norm": 0.3718641996383667, "learning_rate": 1.030310082071324e-05, "loss": 0.4878, "step": 46293 }, { "epoch": 0.9818243515513987, "grad_norm": 0.46693968772888184, "learning_rate": 1.0302767478446947e-05, "loss": 0.5782, "step": 46294 }, { "epoch": 0.9818455600093318, "grad_norm": 0.360080748796463, "learning_rate": 1.0302434135843915e-05, "loss": 0.4147, "step": 46295 }, { "epoch": 0.9818667684672647, "grad_norm": 0.3517371416091919, "learning_rate": 1.030210079290452e-05, "loss": 0.487, "step": 46296 }, { "epoch": 0.9818879769251978, "grad_norm": 0.3407039940357208, "learning_rate": 1.0301767449629134e-05, "loss": 0.4595, "step": 46297 }, { "epoch": 0.9819091853831308, "grad_norm": 0.34954479336738586, "learning_rate": 1.0301434106018122e-05, "loss": 0.4653, "step": 46298 }, { "epoch": 0.9819303938410638, "grad_norm": 0.3888651728630066, "learning_rate": 1.030110076207186e-05, "loss": 0.4553, "step": 46299 }, { "epoch": 0.9819516022989968, "grad_norm": 0.3503429591655731, "learning_rate": 1.0300767417790715e-05, "loss": 0.4221, "step": 46300 }, { "epoch": 0.9819728107569299, "grad_norm": 0.44738420844078064, "learning_rate": 1.0300434073175058e-05, "loss": 0.4321, "step": 46301 }, { "epoch": 0.9819940192148628, "grad_norm": 0.35617202520370483, "learning_rate": 1.0300100728225262e-05, "loss": 0.4687, "step": 46302 }, { "epoch": 0.9820152276727959, "grad_norm": 0.3798880875110626, "learning_rate": 1.0299767382941699e-05, "loss": 0.5358, "step": 46303 }, { "epoch": 0.9820364361307289, "grad_norm": 0.47207826375961304, "learning_rate": 1.0299434037324734e-05, "loss": 0.4043, "step": 46304 }, { "epoch": 0.982057644588662, "grad_norm": 0.34633103013038635, "learning_rate": 1.0299100691374742e-05, "loss": 0.4814, "step": 46305 }, { "epoch": 0.982078853046595, "grad_norm": 0.36451342701911926, "learning_rate": 1.029876734509209e-05, "loss": 0.4273, "step": 46306 }, { "epoch": 0.982100061504528, "grad_norm": 0.34736424684524536, "learning_rate": 1.0298433998477158e-05, "loss": 0.4403, "step": 46307 }, { "epoch": 0.9821212699624611, "grad_norm": 0.5095524787902832, "learning_rate": 1.0298100651530305e-05, "loss": 0.4856, "step": 46308 }, { "epoch": 0.982142478420394, "grad_norm": 0.3770323097705841, "learning_rate": 1.0297767304251907e-05, "loss": 0.4671, "step": 46309 }, { "epoch": 0.9821636868783271, "grad_norm": 0.368669718503952, "learning_rate": 1.0297433956642337e-05, "loss": 0.5322, "step": 46310 }, { "epoch": 0.9821848953362601, "grad_norm": 0.3776961863040924, "learning_rate": 1.0297100608701964e-05, "loss": 0.4682, "step": 46311 }, { "epoch": 0.9822061037941932, "grad_norm": 0.3659617006778717, "learning_rate": 1.0296767260431158e-05, "loss": 0.4986, "step": 46312 }, { "epoch": 0.9822273122521261, "grad_norm": 0.32509008049964905, "learning_rate": 1.029643391183029e-05, "loss": 0.4578, "step": 46313 }, { "epoch": 0.9822485207100592, "grad_norm": 0.3546719551086426, "learning_rate": 1.0296100562899729e-05, "loss": 0.4508, "step": 46314 }, { "epoch": 0.9822697291679922, "grad_norm": 0.32604581117630005, "learning_rate": 1.0295767213639846e-05, "loss": 0.4625, "step": 46315 }, { "epoch": 0.9822909376259252, "grad_norm": 0.37124472856521606, "learning_rate": 1.0295433864051018e-05, "loss": 0.4864, "step": 46316 }, { "epoch": 0.9823121460838582, "grad_norm": 0.3972321152687073, "learning_rate": 1.0295100514133609e-05, "loss": 0.5576, "step": 46317 }, { "epoch": 0.9823333545417913, "grad_norm": 0.3979281783103943, "learning_rate": 1.0294767163887992e-05, "loss": 0.4883, "step": 46318 }, { "epoch": 0.9823545629997242, "grad_norm": 0.3646392822265625, "learning_rate": 1.0294433813314537e-05, "loss": 0.5053, "step": 46319 }, { "epoch": 0.9823757714576573, "grad_norm": 0.46995389461517334, "learning_rate": 1.0294100462413614e-05, "loss": 0.4876, "step": 46320 }, { "epoch": 0.9823969799155904, "grad_norm": 0.486372709274292, "learning_rate": 1.0293767111185598e-05, "loss": 0.5574, "step": 46321 }, { "epoch": 0.9824181883735233, "grad_norm": 0.37746697664260864, "learning_rate": 1.0293433759630855e-05, "loss": 0.5233, "step": 46322 }, { "epoch": 0.9824393968314564, "grad_norm": 0.37192675471305847, "learning_rate": 1.0293100407749756e-05, "loss": 0.4786, "step": 46323 }, { "epoch": 0.9824606052893894, "grad_norm": 0.3710688054561615, "learning_rate": 1.0292767055542677e-05, "loss": 0.4758, "step": 46324 }, { "epoch": 0.9824818137473225, "grad_norm": 0.39738890528678894, "learning_rate": 1.0292433703009986e-05, "loss": 0.4775, "step": 46325 }, { "epoch": 0.9825030222052554, "grad_norm": 0.3463321626186371, "learning_rate": 1.0292100350152047e-05, "loss": 0.536, "step": 46326 }, { "epoch": 0.9825242306631885, "grad_norm": 0.35385662317276, "learning_rate": 1.029176699696924e-05, "loss": 0.5144, "step": 46327 }, { "epoch": 0.9825454391211215, "grad_norm": 0.3678164780139923, "learning_rate": 1.029143364346193e-05, "loss": 0.4786, "step": 46328 }, { "epoch": 0.9825666475790545, "grad_norm": 0.6425961256027222, "learning_rate": 1.029110028963049e-05, "loss": 0.506, "step": 46329 }, { "epoch": 0.9825878560369875, "grad_norm": 2.2733583450317383, "learning_rate": 1.0290766935475296e-05, "loss": 0.501, "step": 46330 }, { "epoch": 0.9826090644949206, "grad_norm": 0.36941322684288025, "learning_rate": 1.029043358099671e-05, "loss": 0.4947, "step": 46331 }, { "epoch": 0.9826302729528535, "grad_norm": 0.3686204254627228, "learning_rate": 1.0290100226195105e-05, "loss": 0.5182, "step": 46332 }, { "epoch": 0.9826514814107866, "grad_norm": 0.4066789746284485, "learning_rate": 1.0289766871070854e-05, "loss": 0.4989, "step": 46333 }, { "epoch": 0.9826726898687197, "grad_norm": 0.3549284040927887, "learning_rate": 1.0289433515624327e-05, "loss": 0.4515, "step": 46334 }, { "epoch": 0.9826938983266527, "grad_norm": 0.34236887097358704, "learning_rate": 1.0289100159855894e-05, "loss": 0.5068, "step": 46335 }, { "epoch": 0.9827151067845857, "grad_norm": 0.38716498017311096, "learning_rate": 1.0288766803765927e-05, "loss": 0.4783, "step": 46336 }, { "epoch": 0.9827363152425187, "grad_norm": 0.3441446125507355, "learning_rate": 1.0288433447354798e-05, "loss": 0.4593, "step": 46337 }, { "epoch": 0.9827575237004518, "grad_norm": 0.39790886640548706, "learning_rate": 1.0288100090622875e-05, "loss": 0.4576, "step": 46338 }, { "epoch": 0.9827787321583847, "grad_norm": 0.36951303482055664, "learning_rate": 1.0287766733570528e-05, "loss": 0.5017, "step": 46339 }, { "epoch": 0.9827999406163178, "grad_norm": 0.3446215093135834, "learning_rate": 1.0287433376198128e-05, "loss": 0.4305, "step": 46340 }, { "epoch": 0.9828211490742508, "grad_norm": 0.4612136781215668, "learning_rate": 1.0287100018506049e-05, "loss": 0.5017, "step": 46341 }, { "epoch": 0.9828423575321839, "grad_norm": 0.5881301760673523, "learning_rate": 1.028676666049466e-05, "loss": 0.5047, "step": 46342 }, { "epoch": 0.9828635659901168, "grad_norm": 0.39782121777534485, "learning_rate": 1.0286433302164332e-05, "loss": 0.4487, "step": 46343 }, { "epoch": 0.9828847744480499, "grad_norm": 0.4160447418689728, "learning_rate": 1.0286099943515435e-05, "loss": 0.4339, "step": 46344 }, { "epoch": 0.9829059829059829, "grad_norm": 0.4105081558227539, "learning_rate": 1.0285766584548343e-05, "loss": 0.4529, "step": 46345 }, { "epoch": 0.9829271913639159, "grad_norm": 0.3367505669593811, "learning_rate": 1.0285433225263415e-05, "loss": 0.5085, "step": 46346 }, { "epoch": 0.982948399821849, "grad_norm": 0.36398568749427795, "learning_rate": 1.028509986566104e-05, "loss": 0.4701, "step": 46347 }, { "epoch": 0.982969608279782, "grad_norm": 0.40493130683898926, "learning_rate": 1.0284766505741574e-05, "loss": 0.5449, "step": 46348 }, { "epoch": 0.982990816737715, "grad_norm": 0.8848413228988647, "learning_rate": 1.0284433145505395e-05, "loss": 0.4939, "step": 46349 }, { "epoch": 0.983012025195648, "grad_norm": 0.3386765122413635, "learning_rate": 1.0284099784952872e-05, "loss": 0.4298, "step": 46350 }, { "epoch": 0.9830332336535811, "grad_norm": 0.3509632647037506, "learning_rate": 1.0283766424084375e-05, "loss": 0.4877, "step": 46351 }, { "epoch": 0.983054442111514, "grad_norm": 0.385195255279541, "learning_rate": 1.028343306290028e-05, "loss": 0.5226, "step": 46352 }, { "epoch": 0.9830756505694471, "grad_norm": 1.5118110179901123, "learning_rate": 1.0283099701400947e-05, "loss": 0.3733, "step": 46353 }, { "epoch": 0.9830968590273801, "grad_norm": 0.35002970695495605, "learning_rate": 1.0282766339586757e-05, "loss": 0.3991, "step": 46354 }, { "epoch": 0.9831180674853132, "grad_norm": 0.3679468035697937, "learning_rate": 1.0282432977458072e-05, "loss": 0.4882, "step": 46355 }, { "epoch": 0.9831392759432461, "grad_norm": 0.3758733570575714, "learning_rate": 1.0282099615015273e-05, "loss": 0.4104, "step": 46356 }, { "epoch": 0.9831604844011792, "grad_norm": 0.4211978316307068, "learning_rate": 1.028176625225872e-05, "loss": 0.4322, "step": 46357 }, { "epoch": 0.9831816928591122, "grad_norm": 0.569162130355835, "learning_rate": 1.0281432889188793e-05, "loss": 0.5167, "step": 46358 }, { "epoch": 0.9832029013170452, "grad_norm": 0.3795722723007202, "learning_rate": 1.0281099525805857e-05, "loss": 0.357, "step": 46359 }, { "epoch": 0.9832241097749783, "grad_norm": 0.3654661178588867, "learning_rate": 1.0280766162110285e-05, "loss": 0.4934, "step": 46360 }, { "epoch": 0.9832453182329113, "grad_norm": 0.40496376156806946, "learning_rate": 1.0280432798102446e-05, "loss": 0.5026, "step": 46361 }, { "epoch": 0.9832665266908444, "grad_norm": 0.3894360065460205, "learning_rate": 1.0280099433782715e-05, "loss": 0.4819, "step": 46362 }, { "epoch": 0.9832877351487773, "grad_norm": 0.41778501868247986, "learning_rate": 1.0279766069151458e-05, "loss": 0.5193, "step": 46363 }, { "epoch": 0.9833089436067104, "grad_norm": 0.4209687411785126, "learning_rate": 1.027943270420905e-05, "loss": 0.4892, "step": 46364 }, { "epoch": 0.9833301520646434, "grad_norm": 0.3517720401287079, "learning_rate": 1.0279099338955855e-05, "loss": 0.3882, "step": 46365 }, { "epoch": 0.9833513605225764, "grad_norm": 0.3618910312652588, "learning_rate": 1.0278765973392252e-05, "loss": 0.5266, "step": 46366 }, { "epoch": 0.9833725689805094, "grad_norm": 0.35267430543899536, "learning_rate": 1.0278432607518605e-05, "loss": 0.4767, "step": 46367 }, { "epoch": 0.9833937774384425, "grad_norm": 0.3892848789691925, "learning_rate": 1.027809924133529e-05, "loss": 0.4846, "step": 46368 }, { "epoch": 0.9834149858963754, "grad_norm": 0.38801509141921997, "learning_rate": 1.0277765874842673e-05, "loss": 0.5388, "step": 46369 }, { "epoch": 0.9834361943543085, "grad_norm": 0.37474966049194336, "learning_rate": 1.0277432508041133e-05, "loss": 0.4856, "step": 46370 }, { "epoch": 0.9834574028122415, "grad_norm": 0.3316952884197235, "learning_rate": 1.0277099140931028e-05, "loss": 0.526, "step": 46371 }, { "epoch": 0.9834786112701746, "grad_norm": 0.35137760639190674, "learning_rate": 1.0276765773512738e-05, "loss": 0.4126, "step": 46372 }, { "epoch": 0.9834998197281075, "grad_norm": 0.3385190963745117, "learning_rate": 1.0276432405786632e-05, "loss": 0.4436, "step": 46373 }, { "epoch": 0.9835210281860406, "grad_norm": 0.3597436547279358, "learning_rate": 1.027609903775308e-05, "loss": 0.4747, "step": 46374 }, { "epoch": 0.9835422366439737, "grad_norm": 0.3297731876373291, "learning_rate": 1.0275765669412454e-05, "loss": 0.4466, "step": 46375 }, { "epoch": 0.9835634451019066, "grad_norm": 0.38211789727211, "learning_rate": 1.0275432300765122e-05, "loss": 0.3957, "step": 46376 }, { "epoch": 0.9835846535598397, "grad_norm": 0.3819868862628937, "learning_rate": 1.0275098931811457e-05, "loss": 0.4742, "step": 46377 }, { "epoch": 0.9836058620177727, "grad_norm": 0.33493632078170776, "learning_rate": 1.0274765562551829e-05, "loss": 0.3665, "step": 46378 }, { "epoch": 0.9836270704757057, "grad_norm": 0.3716889023780823, "learning_rate": 1.0274432192986611e-05, "loss": 0.4747, "step": 46379 }, { "epoch": 0.9836482789336387, "grad_norm": 0.3495456576347351, "learning_rate": 1.0274098823116172e-05, "loss": 0.4091, "step": 46380 }, { "epoch": 0.9836694873915718, "grad_norm": 0.3645274341106415, "learning_rate": 1.0273765452940883e-05, "loss": 0.5636, "step": 46381 }, { "epoch": 0.9836906958495047, "grad_norm": 0.40832599997520447, "learning_rate": 1.0273432082461112e-05, "loss": 0.5399, "step": 46382 }, { "epoch": 0.9837119043074378, "grad_norm": 0.6773219704627991, "learning_rate": 1.0273098711677234e-05, "loss": 0.4813, "step": 46383 }, { "epoch": 0.9837331127653708, "grad_norm": 0.390777587890625, "learning_rate": 1.0272765340589619e-05, "loss": 0.5328, "step": 46384 }, { "epoch": 0.9837543212233039, "grad_norm": 0.3216250240802765, "learning_rate": 1.0272431969198632e-05, "loss": 0.4371, "step": 46385 }, { "epoch": 0.9837755296812368, "grad_norm": 0.8659808039665222, "learning_rate": 1.0272098597504651e-05, "loss": 0.4375, "step": 46386 }, { "epoch": 0.9837967381391699, "grad_norm": 0.46905517578125, "learning_rate": 1.0271765225508046e-05, "loss": 0.543, "step": 46387 }, { "epoch": 0.983817946597103, "grad_norm": 0.3348511755466461, "learning_rate": 1.0271431853209184e-05, "loss": 0.4881, "step": 46388 }, { "epoch": 0.9838391550550359, "grad_norm": 0.4099617302417755, "learning_rate": 1.0271098480608441e-05, "loss": 0.4883, "step": 46389 }, { "epoch": 0.983860363512969, "grad_norm": 0.3989810049533844, "learning_rate": 1.0270765107706181e-05, "loss": 0.4975, "step": 46390 }, { "epoch": 0.983881571970902, "grad_norm": 0.3145827651023865, "learning_rate": 1.027043173450278e-05, "loss": 0.3901, "step": 46391 }, { "epoch": 0.983902780428835, "grad_norm": 0.40405020117759705, "learning_rate": 1.0270098360998606e-05, "loss": 0.5112, "step": 46392 }, { "epoch": 0.983923988886768, "grad_norm": 0.33563685417175293, "learning_rate": 1.0269764987194033e-05, "loss": 0.4964, "step": 46393 }, { "epoch": 0.9839451973447011, "grad_norm": 0.5035576820373535, "learning_rate": 1.0269431613089429e-05, "loss": 0.6196, "step": 46394 }, { "epoch": 0.9839664058026341, "grad_norm": 1.0224148035049438, "learning_rate": 1.0269098238685166e-05, "loss": 0.5034, "step": 46395 }, { "epoch": 0.9839876142605671, "grad_norm": 0.3599260449409485, "learning_rate": 1.0268764863981615e-05, "loss": 0.4514, "step": 46396 }, { "epoch": 0.9840088227185001, "grad_norm": 0.37291890382766724, "learning_rate": 1.0268431488979142e-05, "loss": 0.4816, "step": 46397 }, { "epoch": 0.9840300311764332, "grad_norm": 0.44010788202285767, "learning_rate": 1.0268098113678124e-05, "loss": 0.498, "step": 46398 }, { "epoch": 0.9840512396343661, "grad_norm": 0.6770631670951843, "learning_rate": 1.0267764738078932e-05, "loss": 0.5312, "step": 46399 }, { "epoch": 0.9840724480922992, "grad_norm": 0.3902340531349182, "learning_rate": 1.026743136218193e-05, "loss": 0.4321, "step": 46400 }, { "epoch": 0.9840936565502323, "grad_norm": 0.33985191583633423, "learning_rate": 1.0267097985987498e-05, "loss": 0.4313, "step": 46401 }, { "epoch": 0.9841148650081653, "grad_norm": 0.39848440885543823, "learning_rate": 1.0266764609495997e-05, "loss": 0.4272, "step": 46402 }, { "epoch": 0.9841360734660983, "grad_norm": 0.4878135323524475, "learning_rate": 1.0266431232707808e-05, "loss": 0.4808, "step": 46403 }, { "epoch": 0.9841572819240313, "grad_norm": 0.3534213900566101, "learning_rate": 1.0266097855623291e-05, "loss": 0.4499, "step": 46404 }, { "epoch": 0.9841784903819644, "grad_norm": 0.42493224143981934, "learning_rate": 1.0265764478242826e-05, "loss": 0.5012, "step": 46405 }, { "epoch": 0.9841996988398973, "grad_norm": 0.3547998368740082, "learning_rate": 1.0265431100566778e-05, "loss": 0.4644, "step": 46406 }, { "epoch": 0.9842209072978304, "grad_norm": 0.37243732810020447, "learning_rate": 1.0265097722595523e-05, "loss": 0.4557, "step": 46407 }, { "epoch": 0.9842421157557634, "grad_norm": 0.36528095602989197, "learning_rate": 1.0264764344329425e-05, "loss": 0.4916, "step": 46408 }, { "epoch": 0.9842633242136964, "grad_norm": 0.4252508878707886, "learning_rate": 1.0264430965768862e-05, "loss": 0.5009, "step": 46409 }, { "epoch": 0.9842845326716294, "grad_norm": 0.4628152847290039, "learning_rate": 1.0264097586914197e-05, "loss": 0.4718, "step": 46410 }, { "epoch": 0.9843057411295625, "grad_norm": 0.35217246413230896, "learning_rate": 1.0263764207765808e-05, "loss": 0.4769, "step": 46411 }, { "epoch": 0.9843269495874954, "grad_norm": 0.3964494466781616, "learning_rate": 1.026343082832406e-05, "loss": 0.4682, "step": 46412 }, { "epoch": 0.9843481580454285, "grad_norm": 0.38146230578422546, "learning_rate": 1.026309744858933e-05, "loss": 0.4568, "step": 46413 }, { "epoch": 0.9843693665033615, "grad_norm": 0.38001549243927, "learning_rate": 1.0262764068561983e-05, "loss": 0.4067, "step": 46414 }, { "epoch": 0.9843905749612946, "grad_norm": 0.4897864758968353, "learning_rate": 1.0262430688242393e-05, "loss": 0.5058, "step": 46415 }, { "epoch": 0.9844117834192276, "grad_norm": 0.3574743866920471, "learning_rate": 1.026209730763093e-05, "loss": 0.4718, "step": 46416 }, { "epoch": 0.9844329918771606, "grad_norm": 0.384445458650589, "learning_rate": 1.026176392672796e-05, "loss": 0.5355, "step": 46417 }, { "epoch": 0.9844542003350937, "grad_norm": 0.3613661527633667, "learning_rate": 1.0261430545533865e-05, "loss": 0.4691, "step": 46418 }, { "epoch": 0.9844754087930266, "grad_norm": 0.3849361836910248, "learning_rate": 1.0261097164049007e-05, "loss": 0.4448, "step": 46419 }, { "epoch": 0.9844966172509597, "grad_norm": 0.37697088718414307, "learning_rate": 1.0260763782273759e-05, "loss": 0.5321, "step": 46420 }, { "epoch": 0.9845178257088927, "grad_norm": 0.4523312747478485, "learning_rate": 1.0260430400208495e-05, "loss": 0.4485, "step": 46421 }, { "epoch": 0.9845390341668258, "grad_norm": 0.39141085743904114, "learning_rate": 1.0260097017853576e-05, "loss": 0.5245, "step": 46422 }, { "epoch": 0.9845602426247587, "grad_norm": 0.3585975468158722, "learning_rate": 1.025976363520938e-05, "loss": 0.4876, "step": 46423 }, { "epoch": 0.9845814510826918, "grad_norm": 3.995434284210205, "learning_rate": 1.0259430252276284e-05, "loss": 0.5362, "step": 46424 }, { "epoch": 0.9846026595406248, "grad_norm": 0.3826918601989746, "learning_rate": 1.0259096869054646e-05, "loss": 0.5252, "step": 46425 }, { "epoch": 0.9846238679985578, "grad_norm": 0.4207002520561218, "learning_rate": 1.0258763485544845e-05, "loss": 0.5134, "step": 46426 }, { "epoch": 0.9846450764564908, "grad_norm": 0.36955806612968445, "learning_rate": 1.025843010174725e-05, "loss": 0.4618, "step": 46427 }, { "epoch": 0.9846662849144239, "grad_norm": 0.3690989911556244, "learning_rate": 1.0258096717662229e-05, "loss": 0.5073, "step": 46428 }, { "epoch": 0.984687493372357, "grad_norm": 0.3508380651473999, "learning_rate": 1.0257763333290159e-05, "loss": 0.496, "step": 46429 }, { "epoch": 0.9847087018302899, "grad_norm": 0.7968631982803345, "learning_rate": 1.02574299486314e-05, "loss": 0.4963, "step": 46430 }, { "epoch": 0.984729910288223, "grad_norm": 0.38670608401298523, "learning_rate": 1.0257096563686333e-05, "loss": 0.5071, "step": 46431 }, { "epoch": 0.984751118746156, "grad_norm": 0.3802203834056854, "learning_rate": 1.025676317845533e-05, "loss": 0.4515, "step": 46432 }, { "epoch": 0.984772327204089, "grad_norm": 0.42448654770851135, "learning_rate": 1.0256429792938752e-05, "loss": 0.4584, "step": 46433 }, { "epoch": 0.984793535662022, "grad_norm": 0.6271160840988159, "learning_rate": 1.0256096407136975e-05, "loss": 0.5444, "step": 46434 }, { "epoch": 0.9848147441199551, "grad_norm": 0.3389182984828949, "learning_rate": 1.0255763021050373e-05, "loss": 0.4776, "step": 46435 }, { "epoch": 0.984835952577888, "grad_norm": 0.3695664703845978, "learning_rate": 1.025542963467931e-05, "loss": 0.4114, "step": 46436 }, { "epoch": 0.9848571610358211, "grad_norm": 0.33916884660720825, "learning_rate": 1.0255096248024161e-05, "loss": 0.4795, "step": 46437 }, { "epoch": 0.9848783694937541, "grad_norm": 0.565365195274353, "learning_rate": 1.0254762861085298e-05, "loss": 0.5743, "step": 46438 }, { "epoch": 0.9848995779516871, "grad_norm": 0.3935423195362091, "learning_rate": 1.025442947386309e-05, "loss": 0.5077, "step": 46439 }, { "epoch": 0.9849207864096201, "grad_norm": 0.5883468985557556, "learning_rate": 1.0254096086357907e-05, "loss": 0.542, "step": 46440 }, { "epoch": 0.9849419948675532, "grad_norm": 0.3646804094314575, "learning_rate": 1.0253762698570123e-05, "loss": 0.5316, "step": 46441 }, { "epoch": 0.9849632033254863, "grad_norm": 0.3548484742641449, "learning_rate": 1.02534293105001e-05, "loss": 0.488, "step": 46442 }, { "epoch": 0.9849844117834192, "grad_norm": 0.37251749634742737, "learning_rate": 1.025309592214822e-05, "loss": 0.4607, "step": 46443 }, { "epoch": 0.9850056202413523, "grad_norm": 0.37253907322883606, "learning_rate": 1.0252762533514848e-05, "loss": 0.4637, "step": 46444 }, { "epoch": 0.9850268286992853, "grad_norm": 0.3803419768810272, "learning_rate": 1.0252429144600355e-05, "loss": 0.5412, "step": 46445 }, { "epoch": 0.9850480371572183, "grad_norm": 0.44521141052246094, "learning_rate": 1.0252095755405115e-05, "loss": 0.5405, "step": 46446 }, { "epoch": 0.9850692456151513, "grad_norm": 0.5994967818260193, "learning_rate": 1.0251762365929494e-05, "loss": 0.4744, "step": 46447 }, { "epoch": 0.9850904540730844, "grad_norm": 0.33327335119247437, "learning_rate": 1.0251428976173865e-05, "loss": 0.4389, "step": 46448 }, { "epoch": 0.9851116625310173, "grad_norm": 0.35850632190704346, "learning_rate": 1.0251095586138599e-05, "loss": 0.4957, "step": 46449 }, { "epoch": 0.9851328709889504, "grad_norm": 0.38114359974861145, "learning_rate": 1.025076219582407e-05, "loss": 0.5149, "step": 46450 }, { "epoch": 0.9851540794468834, "grad_norm": 0.3810422718524933, "learning_rate": 1.0250428805230639e-05, "loss": 0.4467, "step": 46451 }, { "epoch": 0.9851752879048165, "grad_norm": 0.4141128361225128, "learning_rate": 1.0250095414358689e-05, "loss": 0.5112, "step": 46452 }, { "epoch": 0.9851964963627494, "grad_norm": 0.42989233136177063, "learning_rate": 1.024976202320858e-05, "loss": 0.4497, "step": 46453 }, { "epoch": 0.9852177048206825, "grad_norm": 0.340005487203598, "learning_rate": 1.0249428631780693e-05, "loss": 0.4214, "step": 46454 }, { "epoch": 0.9852389132786155, "grad_norm": 0.3429439067840576, "learning_rate": 1.0249095240075391e-05, "loss": 0.5399, "step": 46455 }, { "epoch": 0.9852601217365485, "grad_norm": 0.3705737292766571, "learning_rate": 1.0248761848093045e-05, "loss": 0.4716, "step": 46456 }, { "epoch": 0.9852813301944816, "grad_norm": 0.38567307591438293, "learning_rate": 1.0248428455834035e-05, "loss": 0.5064, "step": 46457 }, { "epoch": 0.9853025386524146, "grad_norm": 0.35155779123306274, "learning_rate": 1.024809506329872e-05, "loss": 0.4998, "step": 46458 }, { "epoch": 0.9853237471103476, "grad_norm": 0.3806142210960388, "learning_rate": 1.0247761670487475e-05, "loss": 0.4665, "step": 46459 }, { "epoch": 0.9853449555682806, "grad_norm": 0.3226242661476135, "learning_rate": 1.0247428277400676e-05, "loss": 0.4232, "step": 46460 }, { "epoch": 0.9853661640262137, "grad_norm": 0.3660803735256195, "learning_rate": 1.0247094884038686e-05, "loss": 0.4947, "step": 46461 }, { "epoch": 0.9853873724841467, "grad_norm": 0.4249741733074188, "learning_rate": 1.0246761490401878e-05, "loss": 0.5508, "step": 46462 }, { "epoch": 0.9854085809420797, "grad_norm": 0.34646153450012207, "learning_rate": 1.0246428096490628e-05, "loss": 0.4145, "step": 46463 }, { "epoch": 0.9854297894000127, "grad_norm": 0.3548484742641449, "learning_rate": 1.0246094702305303e-05, "loss": 0.4507, "step": 46464 }, { "epoch": 0.9854509978579458, "grad_norm": 0.3563927114009857, "learning_rate": 1.0245761307846269e-05, "loss": 0.4813, "step": 46465 }, { "epoch": 0.9854722063158787, "grad_norm": 0.35913601517677307, "learning_rate": 1.0245427913113905e-05, "loss": 0.5084, "step": 46466 }, { "epoch": 0.9854934147738118, "grad_norm": 0.5949937105178833, "learning_rate": 1.0245094518108576e-05, "loss": 0.4701, "step": 46467 }, { "epoch": 0.9855146232317448, "grad_norm": 0.3932785391807556, "learning_rate": 1.0244761122830656e-05, "loss": 0.4777, "step": 46468 }, { "epoch": 0.9855358316896778, "grad_norm": 0.550372838973999, "learning_rate": 1.0244427727280515e-05, "loss": 0.4733, "step": 46469 }, { "epoch": 0.9855570401476109, "grad_norm": 0.38026362657546997, "learning_rate": 1.0244094331458523e-05, "loss": 0.5352, "step": 46470 }, { "epoch": 0.9855782486055439, "grad_norm": 0.39498403668403625, "learning_rate": 1.0243760935365052e-05, "loss": 0.4696, "step": 46471 }, { "epoch": 0.985599457063477, "grad_norm": 0.3491477072238922, "learning_rate": 1.0243427539000474e-05, "loss": 0.4575, "step": 46472 }, { "epoch": 0.9856206655214099, "grad_norm": 0.36489996314048767, "learning_rate": 1.0243094142365155e-05, "loss": 0.4685, "step": 46473 }, { "epoch": 0.985641873979343, "grad_norm": 0.4262600541114807, "learning_rate": 1.0242760745459472e-05, "loss": 0.4457, "step": 46474 }, { "epoch": 0.985663082437276, "grad_norm": 0.3508651852607727, "learning_rate": 1.0242427348283789e-05, "loss": 0.4847, "step": 46475 }, { "epoch": 0.985684290895209, "grad_norm": 0.34635379910469055, "learning_rate": 1.024209395083848e-05, "loss": 0.5165, "step": 46476 }, { "epoch": 0.985705499353142, "grad_norm": 0.35670754313468933, "learning_rate": 1.024176055312392e-05, "loss": 0.4492, "step": 46477 }, { "epoch": 0.9857267078110751, "grad_norm": 0.5481594204902649, "learning_rate": 1.0241427155140475e-05, "loss": 0.4831, "step": 46478 }, { "epoch": 0.985747916269008, "grad_norm": 0.3961162567138672, "learning_rate": 1.0241093756888516e-05, "loss": 0.4784, "step": 46479 }, { "epoch": 0.9857691247269411, "grad_norm": 0.3779946565628052, "learning_rate": 1.0240760358368416e-05, "loss": 0.4364, "step": 46480 }, { "epoch": 0.9857903331848741, "grad_norm": 0.35272303223609924, "learning_rate": 1.024042695958054e-05, "loss": 0.5025, "step": 46481 }, { "epoch": 0.9858115416428072, "grad_norm": 0.36843180656433105, "learning_rate": 1.0240093560525265e-05, "loss": 0.5198, "step": 46482 }, { "epoch": 0.9858327501007402, "grad_norm": 0.37562280893325806, "learning_rate": 1.0239760161202966e-05, "loss": 0.4753, "step": 46483 }, { "epoch": 0.9858539585586732, "grad_norm": 0.36999577283859253, "learning_rate": 1.0239426761614e-05, "loss": 0.4541, "step": 46484 }, { "epoch": 0.9858751670166063, "grad_norm": 0.35167422890663147, "learning_rate": 1.023909336175875e-05, "loss": 0.4253, "step": 46485 }, { "epoch": 0.9858963754745392, "grad_norm": 0.387641966342926, "learning_rate": 1.0238759961637583e-05, "loss": 0.5243, "step": 46486 }, { "epoch": 0.9859175839324723, "grad_norm": 0.39254316687583923, "learning_rate": 1.0238426561250864e-05, "loss": 0.5095, "step": 46487 }, { "epoch": 0.9859387923904053, "grad_norm": 0.38715794682502747, "learning_rate": 1.0238093160598973e-05, "loss": 0.5291, "step": 46488 }, { "epoch": 0.9859600008483383, "grad_norm": 0.4001341164112091, "learning_rate": 1.0237759759682278e-05, "loss": 0.3882, "step": 46489 }, { "epoch": 0.9859812093062713, "grad_norm": 0.4403351843357086, "learning_rate": 1.0237426358501146e-05, "loss": 0.5605, "step": 46490 }, { "epoch": 0.9860024177642044, "grad_norm": 0.34488871693611145, "learning_rate": 1.0237092957055951e-05, "loss": 0.535, "step": 46491 }, { "epoch": 0.9860236262221374, "grad_norm": 0.4660305380821228, "learning_rate": 1.0236759555347067e-05, "loss": 0.4679, "step": 46492 }, { "epoch": 0.9860448346800704, "grad_norm": 0.44439616799354553, "learning_rate": 1.0236426153374856e-05, "loss": 0.4771, "step": 46493 }, { "epoch": 0.9860660431380034, "grad_norm": 0.3480563163757324, "learning_rate": 1.0236092751139695e-05, "loss": 0.5134, "step": 46494 }, { "epoch": 0.9860872515959365, "grad_norm": 0.3966868817806244, "learning_rate": 1.0235759348641957e-05, "loss": 0.5196, "step": 46495 }, { "epoch": 0.9861084600538694, "grad_norm": 0.6532123684883118, "learning_rate": 1.0235425945882004e-05, "loss": 0.4509, "step": 46496 }, { "epoch": 0.9861296685118025, "grad_norm": 0.4117264151573181, "learning_rate": 1.0235092542860217e-05, "loss": 0.4757, "step": 46497 }, { "epoch": 0.9861508769697356, "grad_norm": 0.4103512167930603, "learning_rate": 1.0234759139576959e-05, "loss": 0.4643, "step": 46498 }, { "epoch": 0.9861720854276685, "grad_norm": 0.39074283838272095, "learning_rate": 1.0234425736032607e-05, "loss": 0.4863, "step": 46499 }, { "epoch": 0.9861932938856016, "grad_norm": 0.4103831946849823, "learning_rate": 1.0234092332227525e-05, "loss": 0.4805, "step": 46500 }, { "epoch": 0.9862145023435346, "grad_norm": 0.3868239223957062, "learning_rate": 1.023375892816209e-05, "loss": 0.4959, "step": 46501 }, { "epoch": 0.9862357108014677, "grad_norm": 0.43649524450302124, "learning_rate": 1.0233425523836672e-05, "loss": 0.4717, "step": 46502 }, { "epoch": 0.9862569192594006, "grad_norm": 0.4218659996986389, "learning_rate": 1.0233092119251638e-05, "loss": 0.4659, "step": 46503 }, { "epoch": 0.9862781277173337, "grad_norm": 0.3755658268928528, "learning_rate": 1.0232758714407359e-05, "loss": 0.5375, "step": 46504 }, { "epoch": 0.9862993361752667, "grad_norm": 0.3302396237850189, "learning_rate": 1.023242530930421e-05, "loss": 0.4151, "step": 46505 }, { "epoch": 0.9863205446331997, "grad_norm": 0.34245434403419495, "learning_rate": 1.0232091903942562e-05, "loss": 0.4319, "step": 46506 }, { "epoch": 0.9863417530911327, "grad_norm": 0.37041807174682617, "learning_rate": 1.0231758498322777e-05, "loss": 0.5005, "step": 46507 }, { "epoch": 0.9863629615490658, "grad_norm": 0.36837270855903625, "learning_rate": 1.0231425092445237e-05, "loss": 0.4564, "step": 46508 }, { "epoch": 0.9863841700069987, "grad_norm": 0.35097604990005493, "learning_rate": 1.0231091686310309e-05, "loss": 0.4176, "step": 46509 }, { "epoch": 0.9864053784649318, "grad_norm": 0.32787999510765076, "learning_rate": 1.023075827991836e-05, "loss": 0.4517, "step": 46510 }, { "epoch": 0.9864265869228649, "grad_norm": 0.3794119656085968, "learning_rate": 1.0230424873269764e-05, "loss": 0.4978, "step": 46511 }, { "epoch": 0.9864477953807979, "grad_norm": 0.3916248381137848, "learning_rate": 1.0230091466364891e-05, "loss": 0.5052, "step": 46512 }, { "epoch": 0.9864690038387309, "grad_norm": 0.3971509039402008, "learning_rate": 1.0229758059204113e-05, "loss": 0.5662, "step": 46513 }, { "epoch": 0.9864902122966639, "grad_norm": 0.35685938596725464, "learning_rate": 1.0229424651787802e-05, "loss": 0.5049, "step": 46514 }, { "epoch": 0.986511420754597, "grad_norm": 0.36816537380218506, "learning_rate": 1.0229091244116326e-05, "loss": 0.5231, "step": 46515 }, { "epoch": 0.9865326292125299, "grad_norm": 0.3858712613582611, "learning_rate": 1.0228757836190055e-05, "loss": 0.4986, "step": 46516 }, { "epoch": 0.986553837670463, "grad_norm": 0.3515241742134094, "learning_rate": 1.0228424428009363e-05, "loss": 0.4778, "step": 46517 }, { "epoch": 0.986575046128396, "grad_norm": 0.4434709846973419, "learning_rate": 1.0228091019574618e-05, "loss": 0.486, "step": 46518 }, { "epoch": 0.986596254586329, "grad_norm": 0.5033316016197205, "learning_rate": 1.0227757610886194e-05, "loss": 0.4486, "step": 46519 }, { "epoch": 0.986617463044262, "grad_norm": 0.3819441497325897, "learning_rate": 1.0227424201944458e-05, "loss": 0.5232, "step": 46520 }, { "epoch": 0.9866386715021951, "grad_norm": 0.4550146758556366, "learning_rate": 1.0227090792749784e-05, "loss": 0.4546, "step": 46521 }, { "epoch": 0.986659879960128, "grad_norm": 0.37043651938438416, "learning_rate": 1.0226757383302542e-05, "loss": 0.4553, "step": 46522 }, { "epoch": 0.9866810884180611, "grad_norm": 0.3550224304199219, "learning_rate": 1.02264239736031e-05, "loss": 0.4942, "step": 46523 }, { "epoch": 0.9867022968759942, "grad_norm": 0.3242519795894623, "learning_rate": 1.0226090563651833e-05, "loss": 0.421, "step": 46524 }, { "epoch": 0.9867235053339272, "grad_norm": 0.3426879346370697, "learning_rate": 1.0225757153449112e-05, "loss": 0.5, "step": 46525 }, { "epoch": 0.9867447137918602, "grad_norm": 0.3747674822807312, "learning_rate": 1.0225423742995302e-05, "loss": 0.4805, "step": 46526 }, { "epoch": 0.9867659222497932, "grad_norm": 0.43576106429100037, "learning_rate": 1.0225090332290779e-05, "loss": 0.4551, "step": 46527 }, { "epoch": 0.9867871307077263, "grad_norm": 0.428970068693161, "learning_rate": 1.0224756921335914e-05, "loss": 0.5396, "step": 46528 }, { "epoch": 0.9868083391656592, "grad_norm": 0.32340577244758606, "learning_rate": 1.0224423510131074e-05, "loss": 0.5129, "step": 46529 }, { "epoch": 0.9868295476235923, "grad_norm": 0.3471938371658325, "learning_rate": 1.0224090098676635e-05, "loss": 0.4645, "step": 46530 }, { "epoch": 0.9868507560815253, "grad_norm": 0.3815741539001465, "learning_rate": 1.0223756686972965e-05, "loss": 0.4952, "step": 46531 }, { "epoch": 0.9868719645394584, "grad_norm": 0.4981159567832947, "learning_rate": 1.0223423275020431e-05, "loss": 0.4842, "step": 46532 }, { "epoch": 0.9868931729973913, "grad_norm": 0.3655059337615967, "learning_rate": 1.0223089862819409e-05, "loss": 0.4628, "step": 46533 }, { "epoch": 0.9869143814553244, "grad_norm": 0.4340031147003174, "learning_rate": 1.022275645037027e-05, "loss": 0.4988, "step": 46534 }, { "epoch": 0.9869355899132574, "grad_norm": 0.3244713544845581, "learning_rate": 1.0222423037673381e-05, "loss": 0.4574, "step": 46535 }, { "epoch": 0.9869567983711904, "grad_norm": 0.3380720615386963, "learning_rate": 1.0222089624729116e-05, "loss": 0.4441, "step": 46536 }, { "epoch": 0.9869780068291234, "grad_norm": 0.36190730333328247, "learning_rate": 1.0221756211537848e-05, "loss": 0.4803, "step": 46537 }, { "epoch": 0.9869992152870565, "grad_norm": 0.39084431529045105, "learning_rate": 1.022142279809994e-05, "loss": 0.4917, "step": 46538 }, { "epoch": 0.9870204237449896, "grad_norm": 0.44809433817863464, "learning_rate": 1.022108938441577e-05, "loss": 0.5041, "step": 46539 }, { "epoch": 0.9870416322029225, "grad_norm": 0.3673303723335266, "learning_rate": 1.0220755970485705e-05, "loss": 0.5618, "step": 46540 }, { "epoch": 0.9870628406608556, "grad_norm": 0.40276843309402466, "learning_rate": 1.0220422556310116e-05, "loss": 0.496, "step": 46541 }, { "epoch": 0.9870840491187886, "grad_norm": 0.33405566215515137, "learning_rate": 1.022008914188938e-05, "loss": 0.4867, "step": 46542 }, { "epoch": 0.9871052575767216, "grad_norm": 0.3604360818862915, "learning_rate": 1.0219755727223857e-05, "loss": 0.4935, "step": 46543 }, { "epoch": 0.9871264660346546, "grad_norm": 0.3602112829685211, "learning_rate": 1.0219422312313927e-05, "loss": 0.4673, "step": 46544 }, { "epoch": 0.9871476744925877, "grad_norm": 0.3453657627105713, "learning_rate": 1.0219088897159956e-05, "loss": 0.4366, "step": 46545 }, { "epoch": 0.9871688829505206, "grad_norm": 0.3925917446613312, "learning_rate": 1.021875548176232e-05, "loss": 0.5149, "step": 46546 }, { "epoch": 0.9871900914084537, "grad_norm": 0.3557405471801758, "learning_rate": 1.0218422066121381e-05, "loss": 0.4604, "step": 46547 }, { "epoch": 0.9872112998663867, "grad_norm": 0.3775058388710022, "learning_rate": 1.0218088650237517e-05, "loss": 0.4541, "step": 46548 }, { "epoch": 0.9872325083243197, "grad_norm": 0.3894793391227722, "learning_rate": 1.0217755234111095e-05, "loss": 0.4232, "step": 46549 }, { "epoch": 0.9872537167822527, "grad_norm": 0.36235901713371277, "learning_rate": 1.021742181774249e-05, "loss": 0.4458, "step": 46550 }, { "epoch": 0.9872749252401858, "grad_norm": 0.38200828433036804, "learning_rate": 1.0217088401132069e-05, "loss": 0.5143, "step": 46551 }, { "epoch": 0.9872961336981189, "grad_norm": 0.3821703791618347, "learning_rate": 1.0216754984280202e-05, "loss": 0.4947, "step": 46552 }, { "epoch": 0.9873173421560518, "grad_norm": 0.3488976061344147, "learning_rate": 1.0216421567187265e-05, "loss": 0.4909, "step": 46553 }, { "epoch": 0.9873385506139849, "grad_norm": 0.34112223982810974, "learning_rate": 1.0216088149853628e-05, "loss": 0.4717, "step": 46554 }, { "epoch": 0.9873597590719179, "grad_norm": 0.43118512630462646, "learning_rate": 1.0215754732279655e-05, "loss": 0.6276, "step": 46555 }, { "epoch": 0.9873809675298509, "grad_norm": 0.3698559105396271, "learning_rate": 1.0215421314465723e-05, "loss": 0.4906, "step": 46556 }, { "epoch": 0.9874021759877839, "grad_norm": 0.4002150595188141, "learning_rate": 1.0215087896412201e-05, "loss": 0.435, "step": 46557 }, { "epoch": 0.987423384445717, "grad_norm": 0.3565833568572998, "learning_rate": 1.021475447811946e-05, "loss": 0.5008, "step": 46558 }, { "epoch": 0.9874445929036499, "grad_norm": 0.4050808548927307, "learning_rate": 1.0214421059587874e-05, "loss": 0.4223, "step": 46559 }, { "epoch": 0.987465801361583, "grad_norm": 0.41296446323394775, "learning_rate": 1.021408764081781e-05, "loss": 0.4408, "step": 46560 }, { "epoch": 0.987487009819516, "grad_norm": 0.3745043873786926, "learning_rate": 1.0213754221809636e-05, "loss": 0.4641, "step": 46561 }, { "epoch": 0.9875082182774491, "grad_norm": 0.3746050000190735, "learning_rate": 1.0213420802563729e-05, "loss": 0.5079, "step": 46562 }, { "epoch": 0.987529426735382, "grad_norm": 0.3933752775192261, "learning_rate": 1.0213087383080456e-05, "loss": 0.5409, "step": 46563 }, { "epoch": 0.9875506351933151, "grad_norm": 0.4167308807373047, "learning_rate": 1.0212753963360186e-05, "loss": 0.3826, "step": 46564 }, { "epoch": 0.9875718436512482, "grad_norm": 0.33693331480026245, "learning_rate": 1.0212420543403298e-05, "loss": 0.4166, "step": 46565 }, { "epoch": 0.9875930521091811, "grad_norm": 0.3850928246974945, "learning_rate": 1.0212087123210157e-05, "loss": 0.4466, "step": 46566 }, { "epoch": 0.9876142605671142, "grad_norm": 0.4789258539676666, "learning_rate": 1.0211753702781135e-05, "loss": 0.4615, "step": 46567 }, { "epoch": 0.9876354690250472, "grad_norm": 0.3545338809490204, "learning_rate": 1.0211420282116602e-05, "loss": 0.5102, "step": 46568 }, { "epoch": 0.9876566774829802, "grad_norm": 0.35592252016067505, "learning_rate": 1.0211086861216929e-05, "loss": 0.4998, "step": 46569 }, { "epoch": 0.9876778859409132, "grad_norm": 0.3329712748527527, "learning_rate": 1.0210753440082485e-05, "loss": 0.4339, "step": 46570 }, { "epoch": 0.9876990943988463, "grad_norm": 0.42982178926467896, "learning_rate": 1.0210420018713642e-05, "loss": 0.5116, "step": 46571 }, { "epoch": 0.9877203028567793, "grad_norm": 0.3480493128299713, "learning_rate": 1.0210086597110776e-05, "loss": 0.4834, "step": 46572 }, { "epoch": 0.9877415113147123, "grad_norm": 0.371462345123291, "learning_rate": 1.020975317527425e-05, "loss": 0.465, "step": 46573 }, { "epoch": 0.9877627197726453, "grad_norm": 0.4005764126777649, "learning_rate": 1.020941975320444e-05, "loss": 0.5009, "step": 46574 }, { "epoch": 0.9877839282305784, "grad_norm": 0.41144034266471863, "learning_rate": 1.0209086330901717e-05, "loss": 0.5235, "step": 46575 }, { "epoch": 0.9878051366885113, "grad_norm": 0.5905182957649231, "learning_rate": 1.0208752908366449e-05, "loss": 0.5068, "step": 46576 }, { "epoch": 0.9878263451464444, "grad_norm": 0.38884201645851135, "learning_rate": 1.0208419485599004e-05, "loss": 0.5237, "step": 46577 }, { "epoch": 0.9878475536043774, "grad_norm": 0.36286526918411255, "learning_rate": 1.0208086062599758e-05, "loss": 0.4603, "step": 46578 }, { "epoch": 0.9878687620623104, "grad_norm": 0.35515958070755005, "learning_rate": 1.0207752639369084e-05, "loss": 0.4283, "step": 46579 }, { "epoch": 0.9878899705202435, "grad_norm": 0.3862553536891937, "learning_rate": 1.0207419215907347e-05, "loss": 0.3656, "step": 46580 }, { "epoch": 0.9879111789781765, "grad_norm": 0.3988145887851715, "learning_rate": 1.0207085792214921e-05, "loss": 0.588, "step": 46581 }, { "epoch": 0.9879323874361096, "grad_norm": 0.43314027786254883, "learning_rate": 1.0206752368292176e-05, "loss": 0.4863, "step": 46582 }, { "epoch": 0.9879535958940425, "grad_norm": 0.4343523681163788, "learning_rate": 1.0206418944139483e-05, "loss": 0.4715, "step": 46583 }, { "epoch": 0.9879748043519756, "grad_norm": 0.396296888589859, "learning_rate": 1.020608551975721e-05, "loss": 0.4719, "step": 46584 }, { "epoch": 0.9879960128099086, "grad_norm": 0.34109947085380554, "learning_rate": 1.0205752095145734e-05, "loss": 0.521, "step": 46585 }, { "epoch": 0.9880172212678416, "grad_norm": 0.34457042813301086, "learning_rate": 1.0205418670305418e-05, "loss": 0.4755, "step": 46586 }, { "epoch": 0.9880384297257746, "grad_norm": 0.38416218757629395, "learning_rate": 1.020508524523664e-05, "loss": 0.4735, "step": 46587 }, { "epoch": 0.9880596381837077, "grad_norm": 0.3894674479961395, "learning_rate": 1.0204751819939769e-05, "loss": 0.4705, "step": 46588 }, { "epoch": 0.9880808466416406, "grad_norm": 0.33062753081321716, "learning_rate": 1.0204418394415172e-05, "loss": 0.4426, "step": 46589 }, { "epoch": 0.9881020550995737, "grad_norm": 0.3541640341281891, "learning_rate": 1.0204084968663223e-05, "loss": 0.475, "step": 46590 }, { "epoch": 0.9881232635575067, "grad_norm": 0.3905559182167053, "learning_rate": 1.0203751542684297e-05, "loss": 0.441, "step": 46591 }, { "epoch": 0.9881444720154398, "grad_norm": 0.35267308354377747, "learning_rate": 1.0203418116478754e-05, "loss": 0.4483, "step": 46592 }, { "epoch": 0.9881656804733728, "grad_norm": 0.42044830322265625, "learning_rate": 1.0203084690046976e-05, "loss": 0.4716, "step": 46593 }, { "epoch": 0.9881868889313058, "grad_norm": 0.4223223328590393, "learning_rate": 1.0202751263389326e-05, "loss": 0.5217, "step": 46594 }, { "epoch": 0.9882080973892389, "grad_norm": 0.3603121042251587, "learning_rate": 1.020241783650618e-05, "loss": 0.4826, "step": 46595 }, { "epoch": 0.9882293058471718, "grad_norm": 0.38315171003341675, "learning_rate": 1.0202084409397906e-05, "loss": 0.4686, "step": 46596 }, { "epoch": 0.9882505143051049, "grad_norm": 0.3662187159061432, "learning_rate": 1.0201750982064872e-05, "loss": 0.3713, "step": 46597 }, { "epoch": 0.9882717227630379, "grad_norm": 0.36313843727111816, "learning_rate": 1.0201417554507456e-05, "loss": 0.4553, "step": 46598 }, { "epoch": 0.988292931220971, "grad_norm": 0.375812292098999, "learning_rate": 1.0201084126726027e-05, "loss": 0.4775, "step": 46599 }, { "epoch": 0.9883141396789039, "grad_norm": 0.40103381872177124, "learning_rate": 1.020075069872095e-05, "loss": 0.5204, "step": 46600 }, { "epoch": 0.988335348136837, "grad_norm": 0.4213293194770813, "learning_rate": 1.0200417270492602e-05, "loss": 0.5097, "step": 46601 }, { "epoch": 0.98835655659477, "grad_norm": 0.3723284900188446, "learning_rate": 1.0200083842041353e-05, "loss": 0.4848, "step": 46602 }, { "epoch": 0.988377765052703, "grad_norm": 0.38400253653526306, "learning_rate": 1.0199750413367565e-05, "loss": 0.5088, "step": 46603 }, { "epoch": 0.988398973510636, "grad_norm": 0.3739987909793854, "learning_rate": 1.0199416984471624e-05, "loss": 0.5084, "step": 46604 }, { "epoch": 0.9884201819685691, "grad_norm": 0.38643836975097656, "learning_rate": 1.0199083555353893e-05, "loss": 0.4811, "step": 46605 }, { "epoch": 0.9884413904265021, "grad_norm": 0.32921266555786133, "learning_rate": 1.0198750126014738e-05, "loss": 0.5308, "step": 46606 }, { "epoch": 0.9884625988844351, "grad_norm": 0.37441566586494446, "learning_rate": 1.0198416696454539e-05, "loss": 0.487, "step": 46607 }, { "epoch": 0.9884838073423682, "grad_norm": 0.3624984323978424, "learning_rate": 1.0198083266673661e-05, "loss": 0.4754, "step": 46608 }, { "epoch": 0.9885050158003011, "grad_norm": 0.4548795521259308, "learning_rate": 1.0197749836672475e-05, "loss": 0.532, "step": 46609 }, { "epoch": 0.9885262242582342, "grad_norm": 0.4195195138454437, "learning_rate": 1.0197416406451357e-05, "loss": 0.5087, "step": 46610 }, { "epoch": 0.9885474327161672, "grad_norm": 0.39546218514442444, "learning_rate": 1.0197082976010671e-05, "loss": 0.4743, "step": 46611 }, { "epoch": 0.9885686411741003, "grad_norm": 0.32593509554862976, "learning_rate": 1.0196749545350795e-05, "loss": 0.4583, "step": 46612 }, { "epoch": 0.9885898496320332, "grad_norm": 0.3726576566696167, "learning_rate": 1.0196416114472095e-05, "loss": 0.4676, "step": 46613 }, { "epoch": 0.9886110580899663, "grad_norm": 0.693983256816864, "learning_rate": 1.0196082683374938e-05, "loss": 0.5123, "step": 46614 }, { "epoch": 0.9886322665478993, "grad_norm": 0.32915905117988586, "learning_rate": 1.0195749252059704e-05, "loss": 0.3871, "step": 46615 }, { "epoch": 0.9886534750058323, "grad_norm": 0.37322530150413513, "learning_rate": 1.0195415820526757e-05, "loss": 0.4616, "step": 46616 }, { "epoch": 0.9886746834637653, "grad_norm": 0.40560004115104675, "learning_rate": 1.019508238877647e-05, "loss": 0.47, "step": 46617 }, { "epoch": 0.9886958919216984, "grad_norm": 0.407025545835495, "learning_rate": 1.0194748956809216e-05, "loss": 0.512, "step": 46618 }, { "epoch": 0.9887171003796313, "grad_norm": 0.3716109097003937, "learning_rate": 1.0194415524625365e-05, "loss": 0.4283, "step": 46619 }, { "epoch": 0.9887383088375644, "grad_norm": 0.3467860519886017, "learning_rate": 1.0194082092225283e-05, "loss": 0.4658, "step": 46620 }, { "epoch": 0.9887595172954975, "grad_norm": 0.3298971652984619, "learning_rate": 1.0193748659609349e-05, "loss": 0.4046, "step": 46621 }, { "epoch": 0.9887807257534305, "grad_norm": 0.3439231514930725, "learning_rate": 1.0193415226777926e-05, "loss": 0.424, "step": 46622 }, { "epoch": 0.9888019342113635, "grad_norm": 0.7371388673782349, "learning_rate": 1.0193081793731387e-05, "loss": 0.5744, "step": 46623 }, { "epoch": 0.9888231426692965, "grad_norm": 0.389903724193573, "learning_rate": 1.0192748360470107e-05, "loss": 0.5545, "step": 46624 }, { "epoch": 0.9888443511272296, "grad_norm": 0.404307097196579, "learning_rate": 1.0192414926994452e-05, "loss": 0.4726, "step": 46625 }, { "epoch": 0.9888655595851625, "grad_norm": 0.37344637513160706, "learning_rate": 1.0192081493304797e-05, "loss": 0.4625, "step": 46626 }, { "epoch": 0.9888867680430956, "grad_norm": 0.38221511244773865, "learning_rate": 1.0191748059401512e-05, "loss": 0.4039, "step": 46627 }, { "epoch": 0.9889079765010286, "grad_norm": 0.399754136800766, "learning_rate": 1.0191414625284962e-05, "loss": 0.5272, "step": 46628 }, { "epoch": 0.9889291849589616, "grad_norm": 0.44560202956199646, "learning_rate": 1.0191081190955524e-05, "loss": 0.5359, "step": 46629 }, { "epoch": 0.9889503934168946, "grad_norm": 0.41909360885620117, "learning_rate": 1.0190747756413569e-05, "loss": 0.5618, "step": 46630 }, { "epoch": 0.9889716018748277, "grad_norm": 0.3648984730243683, "learning_rate": 1.0190414321659464e-05, "loss": 0.4057, "step": 46631 }, { "epoch": 0.9889928103327607, "grad_norm": 0.3969745337963104, "learning_rate": 1.0190080886693582e-05, "loss": 0.5345, "step": 46632 }, { "epoch": 0.9890140187906937, "grad_norm": 0.34040236473083496, "learning_rate": 1.0189747451516295e-05, "loss": 0.4524, "step": 46633 }, { "epoch": 0.9890352272486268, "grad_norm": 0.3507387340068817, "learning_rate": 1.0189414016127969e-05, "loss": 0.4628, "step": 46634 }, { "epoch": 0.9890564357065598, "grad_norm": 0.33039289712905884, "learning_rate": 1.0189080580528981e-05, "loss": 0.4528, "step": 46635 }, { "epoch": 0.9890776441644928, "grad_norm": 0.3267524540424347, "learning_rate": 1.0188747144719702e-05, "loss": 0.4629, "step": 46636 }, { "epoch": 0.9890988526224258, "grad_norm": 0.4776540696620941, "learning_rate": 1.0188413708700494e-05, "loss": 0.4431, "step": 46637 }, { "epoch": 0.9891200610803589, "grad_norm": 0.3758563995361328, "learning_rate": 1.018808027247174e-05, "loss": 0.4813, "step": 46638 }, { "epoch": 0.9891412695382918, "grad_norm": 0.6040403246879578, "learning_rate": 1.01877468360338e-05, "loss": 0.5225, "step": 46639 }, { "epoch": 0.9891624779962249, "grad_norm": 0.37471258640289307, "learning_rate": 1.0187413399387053e-05, "loss": 0.5059, "step": 46640 }, { "epoch": 0.9891836864541579, "grad_norm": 0.3435855209827423, "learning_rate": 1.0187079962531865e-05, "loss": 0.3923, "step": 46641 }, { "epoch": 0.989204894912091, "grad_norm": 0.41211360692977905, "learning_rate": 1.018674652546861e-05, "loss": 0.457, "step": 46642 }, { "epoch": 0.9892261033700239, "grad_norm": 0.3794236481189728, "learning_rate": 1.0186413088197653e-05, "loss": 0.4585, "step": 46643 }, { "epoch": 0.989247311827957, "grad_norm": 0.33172523975372314, "learning_rate": 1.0186079650719372e-05, "loss": 0.4415, "step": 46644 }, { "epoch": 0.98926852028589, "grad_norm": 0.4192436635494232, "learning_rate": 1.0185746213034133e-05, "loss": 0.5199, "step": 46645 }, { "epoch": 0.989289728743823, "grad_norm": 0.3769291043281555, "learning_rate": 1.0185412775142312e-05, "loss": 0.5468, "step": 46646 }, { "epoch": 0.9893109372017561, "grad_norm": 0.35849273204803467, "learning_rate": 1.0185079337044275e-05, "loss": 0.4128, "step": 46647 }, { "epoch": 0.9893321456596891, "grad_norm": 0.41841861605644226, "learning_rate": 1.0184745898740391e-05, "loss": 0.4537, "step": 46648 }, { "epoch": 0.9893533541176222, "grad_norm": 0.38477933406829834, "learning_rate": 1.0184412460231037e-05, "loss": 0.5111, "step": 46649 }, { "epoch": 0.9893745625755551, "grad_norm": 0.4575141668319702, "learning_rate": 1.0184079021516582e-05, "loss": 0.5005, "step": 46650 }, { "epoch": 0.9893957710334882, "grad_norm": 0.34610530734062195, "learning_rate": 1.0183745582597395e-05, "loss": 0.4288, "step": 46651 }, { "epoch": 0.9894169794914212, "grad_norm": 0.41158556938171387, "learning_rate": 1.018341214347385e-05, "loss": 0.4286, "step": 46652 }, { "epoch": 0.9894381879493542, "grad_norm": 0.37153753638267517, "learning_rate": 1.0183078704146311e-05, "loss": 0.447, "step": 46653 }, { "epoch": 0.9894593964072872, "grad_norm": 0.3410930931568146, "learning_rate": 1.0182745264615154e-05, "loss": 0.4704, "step": 46654 }, { "epoch": 0.9894806048652203, "grad_norm": 0.40925684571266174, "learning_rate": 1.0182411824880753e-05, "loss": 0.5467, "step": 46655 }, { "epoch": 0.9895018133231532, "grad_norm": 0.3894762396812439, "learning_rate": 1.0182078384943474e-05, "loss": 0.5322, "step": 46656 }, { "epoch": 0.9895230217810863, "grad_norm": 0.38193702697753906, "learning_rate": 1.0181744944803688e-05, "loss": 0.4947, "step": 46657 }, { "epoch": 0.9895442302390193, "grad_norm": 0.3793792426586151, "learning_rate": 1.0181411504461768e-05, "loss": 0.4889, "step": 46658 }, { "epoch": 0.9895654386969523, "grad_norm": 0.37585434317588806, "learning_rate": 1.0181078063918082e-05, "loss": 0.5385, "step": 46659 }, { "epoch": 0.9895866471548854, "grad_norm": 0.46460628509521484, "learning_rate": 1.0180744623173004e-05, "loss": 0.4349, "step": 46660 }, { "epoch": 0.9896078556128184, "grad_norm": 0.3908744156360626, "learning_rate": 1.01804111822269e-05, "loss": 0.5627, "step": 46661 }, { "epoch": 0.9896290640707515, "grad_norm": 0.3856683671474457, "learning_rate": 1.0180077741080147e-05, "loss": 0.4479, "step": 46662 }, { "epoch": 0.9896502725286844, "grad_norm": 0.3701001703739166, "learning_rate": 1.0179744299733114e-05, "loss": 0.4347, "step": 46663 }, { "epoch": 0.9896714809866175, "grad_norm": 0.3506430685520172, "learning_rate": 1.0179410858186172e-05, "loss": 0.4344, "step": 46664 }, { "epoch": 0.9896926894445505, "grad_norm": 0.38974514603614807, "learning_rate": 1.0179077416439689e-05, "loss": 0.4912, "step": 46665 }, { "epoch": 0.9897138979024835, "grad_norm": 0.3598511219024658, "learning_rate": 1.0178743974494039e-05, "loss": 0.5054, "step": 46666 }, { "epoch": 0.9897351063604165, "grad_norm": 0.35231927037239075, "learning_rate": 1.0178410532349587e-05, "loss": 0.4954, "step": 46667 }, { "epoch": 0.9897563148183496, "grad_norm": 0.36257195472717285, "learning_rate": 1.017807709000671e-05, "loss": 0.459, "step": 46668 }, { "epoch": 0.9897775232762825, "grad_norm": 0.40535423159599304, "learning_rate": 1.0177743647465781e-05, "loss": 0.5157, "step": 46669 }, { "epoch": 0.9897987317342156, "grad_norm": 0.3791404962539673, "learning_rate": 1.0177410204727165e-05, "loss": 0.5326, "step": 46670 }, { "epoch": 0.9898199401921486, "grad_norm": 0.40406396985054016, "learning_rate": 1.0177076761791233e-05, "loss": 0.4928, "step": 46671 }, { "epoch": 0.9898411486500817, "grad_norm": 0.3493850529193878, "learning_rate": 1.0176743318658362e-05, "loss": 0.4679, "step": 46672 }, { "epoch": 0.9898623571080146, "grad_norm": 0.31468746066093445, "learning_rate": 1.0176409875328914e-05, "loss": 0.4343, "step": 46673 }, { "epoch": 0.9898835655659477, "grad_norm": 0.4047793745994568, "learning_rate": 1.0176076431803266e-05, "loss": 0.4802, "step": 46674 }, { "epoch": 0.9899047740238808, "grad_norm": 0.3425981104373932, "learning_rate": 1.017574298808179e-05, "loss": 0.4628, "step": 46675 }, { "epoch": 0.9899259824818137, "grad_norm": 0.3718653917312622, "learning_rate": 1.0175409544164849e-05, "loss": 0.4911, "step": 46676 }, { "epoch": 0.9899471909397468, "grad_norm": 0.3603573739528656, "learning_rate": 1.0175076100052824e-05, "loss": 0.4818, "step": 46677 }, { "epoch": 0.9899683993976798, "grad_norm": 0.37840011715888977, "learning_rate": 1.017474265574608e-05, "loss": 0.4789, "step": 46678 }, { "epoch": 0.9899896078556129, "grad_norm": 0.35070621967315674, "learning_rate": 1.0174409211244987e-05, "loss": 0.422, "step": 46679 }, { "epoch": 0.9900108163135458, "grad_norm": 0.34971603751182556, "learning_rate": 1.0174075766549916e-05, "loss": 0.4827, "step": 46680 }, { "epoch": 0.9900320247714789, "grad_norm": 0.43484267592430115, "learning_rate": 1.0173742321661242e-05, "loss": 0.4866, "step": 46681 }, { "epoch": 0.9900532332294119, "grad_norm": 0.3300504684448242, "learning_rate": 1.0173408876579334e-05, "loss": 0.4083, "step": 46682 }, { "epoch": 0.9900744416873449, "grad_norm": 0.33946317434310913, "learning_rate": 1.017307543130456e-05, "loss": 0.5459, "step": 46683 }, { "epoch": 0.9900956501452779, "grad_norm": 0.37965863943099976, "learning_rate": 1.0172741985837296e-05, "loss": 0.3939, "step": 46684 }, { "epoch": 0.990116858603211, "grad_norm": 0.36196252703666687, "learning_rate": 1.0172408540177908e-05, "loss": 0.5056, "step": 46685 }, { "epoch": 0.9901380670611439, "grad_norm": 0.33463138341903687, "learning_rate": 1.0172075094326765e-05, "loss": 0.4621, "step": 46686 }, { "epoch": 0.990159275519077, "grad_norm": 0.35197076201438904, "learning_rate": 1.0171741648284247e-05, "loss": 0.4611, "step": 46687 }, { "epoch": 0.9901804839770101, "grad_norm": 0.3600252568721771, "learning_rate": 1.0171408202050717e-05, "loss": 0.4507, "step": 46688 }, { "epoch": 0.990201692434943, "grad_norm": 0.3412518799304962, "learning_rate": 1.0171074755626551e-05, "loss": 0.4549, "step": 46689 }, { "epoch": 0.9902229008928761, "grad_norm": 0.3794976472854614, "learning_rate": 1.0170741309012113e-05, "loss": 0.492, "step": 46690 }, { "epoch": 0.9902441093508091, "grad_norm": 0.36549749970436096, "learning_rate": 1.0170407862207782e-05, "loss": 0.5556, "step": 46691 }, { "epoch": 0.9902653178087422, "grad_norm": 0.32160016894340515, "learning_rate": 1.0170074415213924e-05, "loss": 0.4635, "step": 46692 }, { "epoch": 0.9902865262666751, "grad_norm": 0.3466342091560364, "learning_rate": 1.0169740968030905e-05, "loss": 0.4567, "step": 46693 }, { "epoch": 0.9903077347246082, "grad_norm": 0.3956034183502197, "learning_rate": 1.016940752065911e-05, "loss": 0.4469, "step": 46694 }, { "epoch": 0.9903289431825412, "grad_norm": 0.49539947509765625, "learning_rate": 1.0169074073098897e-05, "loss": 0.551, "step": 46695 }, { "epoch": 0.9903501516404742, "grad_norm": 0.3700789511203766, "learning_rate": 1.0168740625350641e-05, "loss": 0.4716, "step": 46696 }, { "epoch": 0.9903713600984072, "grad_norm": 0.35547709465026855, "learning_rate": 1.0168407177414717e-05, "loss": 0.5051, "step": 46697 }, { "epoch": 0.9903925685563403, "grad_norm": 0.40596312284469604, "learning_rate": 1.0168073729291488e-05, "loss": 0.4153, "step": 46698 }, { "epoch": 0.9904137770142732, "grad_norm": 0.3525952994823456, "learning_rate": 1.0167740280981327e-05, "loss": 0.53, "step": 46699 }, { "epoch": 0.9904349854722063, "grad_norm": 0.4021029770374298, "learning_rate": 1.0167406832484613e-05, "loss": 0.5659, "step": 46700 }, { "epoch": 0.9904561939301394, "grad_norm": 0.4023286700248718, "learning_rate": 1.0167073383801707e-05, "loss": 0.5403, "step": 46701 }, { "epoch": 0.9904774023880724, "grad_norm": 0.3903310000896454, "learning_rate": 1.016673993493298e-05, "loss": 0.5325, "step": 46702 }, { "epoch": 0.9904986108460054, "grad_norm": 0.4111437499523163, "learning_rate": 1.0166406485878813e-05, "loss": 0.5552, "step": 46703 }, { "epoch": 0.9905198193039384, "grad_norm": 0.5025666952133179, "learning_rate": 1.0166073036639566e-05, "loss": 0.4263, "step": 46704 }, { "epoch": 0.9905410277618715, "grad_norm": 0.6246111392974854, "learning_rate": 1.0165739587215614e-05, "loss": 0.468, "step": 46705 }, { "epoch": 0.9905622362198044, "grad_norm": 0.3359985649585724, "learning_rate": 1.016540613760733e-05, "loss": 0.4944, "step": 46706 }, { "epoch": 0.9905834446777375, "grad_norm": 0.4203883111476898, "learning_rate": 1.016507268781508e-05, "loss": 0.4853, "step": 46707 }, { "epoch": 0.9906046531356705, "grad_norm": 0.38376083970069885, "learning_rate": 1.0164739237839241e-05, "loss": 0.4623, "step": 46708 }, { "epoch": 0.9906258615936036, "grad_norm": 0.4569467008113861, "learning_rate": 1.016440578768018e-05, "loss": 0.5128, "step": 46709 }, { "epoch": 0.9906470700515365, "grad_norm": 0.3724333345890045, "learning_rate": 1.0164072337338267e-05, "loss": 0.5033, "step": 46710 }, { "epoch": 0.9906682785094696, "grad_norm": 0.3489445745944977, "learning_rate": 1.0163738886813875e-05, "loss": 0.4599, "step": 46711 }, { "epoch": 0.9906894869674026, "grad_norm": 0.35801199078559875, "learning_rate": 1.0163405436107371e-05, "loss": 0.513, "step": 46712 }, { "epoch": 0.9907106954253356, "grad_norm": 0.34849920868873596, "learning_rate": 1.016307198521913e-05, "loss": 0.5458, "step": 46713 }, { "epoch": 0.9907319038832686, "grad_norm": 0.3894955813884735, "learning_rate": 1.0162738534149526e-05, "loss": 0.5617, "step": 46714 }, { "epoch": 0.9907531123412017, "grad_norm": 0.4244770407676697, "learning_rate": 1.0162405082898923e-05, "loss": 0.4808, "step": 46715 }, { "epoch": 0.9907743207991347, "grad_norm": 0.437664270401001, "learning_rate": 1.0162071631467693e-05, "loss": 0.5235, "step": 46716 }, { "epoch": 0.9907955292570677, "grad_norm": 0.33479002118110657, "learning_rate": 1.0161738179856212e-05, "loss": 0.5116, "step": 46717 }, { "epoch": 0.9908167377150008, "grad_norm": 0.3396078646183014, "learning_rate": 1.016140472806484e-05, "loss": 0.4347, "step": 46718 }, { "epoch": 0.9908379461729337, "grad_norm": 0.3557048439979553, "learning_rate": 1.0161071276093961e-05, "loss": 0.4997, "step": 46719 }, { "epoch": 0.9908591546308668, "grad_norm": 0.3456835448741913, "learning_rate": 1.0160737823943937e-05, "loss": 0.4905, "step": 46720 }, { "epoch": 0.9908803630887998, "grad_norm": 0.39886119961738586, "learning_rate": 1.0160404371615144e-05, "loss": 0.4844, "step": 46721 }, { "epoch": 0.9909015715467329, "grad_norm": 0.42963477969169617, "learning_rate": 1.0160070919107952e-05, "loss": 0.4756, "step": 46722 }, { "epoch": 0.9909227800046658, "grad_norm": 0.33352112770080566, "learning_rate": 1.0159737466422728e-05, "loss": 0.4938, "step": 46723 }, { "epoch": 0.9909439884625989, "grad_norm": 0.3277597725391388, "learning_rate": 1.0159404013559846e-05, "loss": 0.4736, "step": 46724 }, { "epoch": 0.9909651969205319, "grad_norm": 0.33655864000320435, "learning_rate": 1.0159070560519676e-05, "loss": 0.4513, "step": 46725 }, { "epoch": 0.9909864053784649, "grad_norm": 0.3463858366012573, "learning_rate": 1.015873710730259e-05, "loss": 0.4066, "step": 46726 }, { "epoch": 0.9910076138363979, "grad_norm": 0.35347065329551697, "learning_rate": 1.0158403653908955e-05, "loss": 0.4185, "step": 46727 }, { "epoch": 0.991028822294331, "grad_norm": 0.34027332067489624, "learning_rate": 1.0158070200339147e-05, "loss": 0.4223, "step": 46728 }, { "epoch": 0.991050030752264, "grad_norm": 0.3951321244239807, "learning_rate": 1.0157736746593537e-05, "loss": 0.4413, "step": 46729 }, { "epoch": 0.991071239210197, "grad_norm": 0.3510449230670929, "learning_rate": 1.0157403292672489e-05, "loss": 0.5047, "step": 46730 }, { "epoch": 0.9910924476681301, "grad_norm": 0.3772714138031006, "learning_rate": 1.015706983857638e-05, "loss": 0.4931, "step": 46731 }, { "epoch": 0.9911136561260631, "grad_norm": 0.3326406478881836, "learning_rate": 1.0156736384305583e-05, "loss": 0.4487, "step": 46732 }, { "epoch": 0.9911348645839961, "grad_norm": 0.31981977820396423, "learning_rate": 1.015640292986046e-05, "loss": 0.3791, "step": 46733 }, { "epoch": 0.9911560730419291, "grad_norm": 0.40590205788612366, "learning_rate": 1.015606947524139e-05, "loss": 0.5247, "step": 46734 }, { "epoch": 0.9911772814998622, "grad_norm": 0.3869342803955078, "learning_rate": 1.015573602044874e-05, "loss": 0.3707, "step": 46735 }, { "epoch": 0.9911984899577951, "grad_norm": 0.35529252886772156, "learning_rate": 1.0155402565482882e-05, "loss": 0.443, "step": 46736 }, { "epoch": 0.9912196984157282, "grad_norm": 0.40676096081733704, "learning_rate": 1.0155069110344189e-05, "loss": 0.479, "step": 46737 }, { "epoch": 0.9912409068736612, "grad_norm": 0.36688047647476196, "learning_rate": 1.0154735655033023e-05, "loss": 0.5254, "step": 46738 }, { "epoch": 0.9912621153315943, "grad_norm": 0.372772216796875, "learning_rate": 1.0154402199549767e-05, "loss": 0.4738, "step": 46739 }, { "epoch": 0.9912833237895272, "grad_norm": 0.3856103718280792, "learning_rate": 1.0154068743894784e-05, "loss": 0.434, "step": 46740 }, { "epoch": 0.9913045322474603, "grad_norm": 0.3546678423881531, "learning_rate": 1.0153735288068446e-05, "loss": 0.4064, "step": 46741 }, { "epoch": 0.9913257407053934, "grad_norm": 0.5079969167709351, "learning_rate": 1.0153401832071128e-05, "loss": 0.4903, "step": 46742 }, { "epoch": 0.9913469491633263, "grad_norm": 0.3746584951877594, "learning_rate": 1.0153068375903198e-05, "loss": 0.4733, "step": 46743 }, { "epoch": 0.9913681576212594, "grad_norm": 0.42283469438552856, "learning_rate": 1.0152734919565022e-05, "loss": 0.4999, "step": 46744 }, { "epoch": 0.9913893660791924, "grad_norm": 0.3553677797317505, "learning_rate": 1.015240146305698e-05, "loss": 0.4287, "step": 46745 }, { "epoch": 0.9914105745371254, "grad_norm": 0.3546563684940338, "learning_rate": 1.0152068006379439e-05, "loss": 0.4419, "step": 46746 }, { "epoch": 0.9914317829950584, "grad_norm": 0.44677555561065674, "learning_rate": 1.0151734549532766e-05, "loss": 0.4383, "step": 46747 }, { "epoch": 0.9914529914529915, "grad_norm": 0.4175337255001068, "learning_rate": 1.0151401092517336e-05, "loss": 0.5164, "step": 46748 }, { "epoch": 0.9914741999109244, "grad_norm": 0.3474309742450714, "learning_rate": 1.0151067635333518e-05, "loss": 0.4342, "step": 46749 }, { "epoch": 0.9914954083688575, "grad_norm": 0.3595593571662903, "learning_rate": 1.0150734177981686e-05, "loss": 0.5148, "step": 46750 }, { "epoch": 0.9915166168267905, "grad_norm": 0.3618510663509369, "learning_rate": 1.0150400720462208e-05, "loss": 0.4157, "step": 46751 }, { "epoch": 0.9915378252847236, "grad_norm": 0.3634692430496216, "learning_rate": 1.0150067262775454e-05, "loss": 0.4517, "step": 46752 }, { "epoch": 0.9915590337426565, "grad_norm": 0.45748162269592285, "learning_rate": 1.01497338049218e-05, "loss": 0.5497, "step": 46753 }, { "epoch": 0.9915802422005896, "grad_norm": 0.33447349071502686, "learning_rate": 1.0149400346901611e-05, "loss": 0.4038, "step": 46754 }, { "epoch": 0.9916014506585226, "grad_norm": 0.31995299458503723, "learning_rate": 1.0149066888715262e-05, "loss": 0.4614, "step": 46755 }, { "epoch": 0.9916226591164556, "grad_norm": 0.41985857486724854, "learning_rate": 1.0148733430363122e-05, "loss": 0.4521, "step": 46756 }, { "epoch": 0.9916438675743887, "grad_norm": 0.43160152435302734, "learning_rate": 1.014839997184556e-05, "loss": 0.4368, "step": 46757 }, { "epoch": 0.9916650760323217, "grad_norm": 0.37442219257354736, "learning_rate": 1.0148066513162948e-05, "loss": 0.497, "step": 46758 }, { "epoch": 0.9916862844902548, "grad_norm": 0.42819905281066895, "learning_rate": 1.014773305431566e-05, "loss": 0.4935, "step": 46759 }, { "epoch": 0.9917074929481877, "grad_norm": 0.3568339943885803, "learning_rate": 1.0147399595304065e-05, "loss": 0.4472, "step": 46760 }, { "epoch": 0.9917287014061208, "grad_norm": 0.3452840745449066, "learning_rate": 1.0147066136128532e-05, "loss": 0.3853, "step": 46761 }, { "epoch": 0.9917499098640538, "grad_norm": 0.3818318545818329, "learning_rate": 1.0146732676789435e-05, "loss": 0.485, "step": 46762 }, { "epoch": 0.9917711183219868, "grad_norm": 0.4076845049858093, "learning_rate": 1.0146399217287142e-05, "loss": 0.4969, "step": 46763 }, { "epoch": 0.9917923267799198, "grad_norm": 0.45559680461883545, "learning_rate": 1.0146065757622023e-05, "loss": 0.3818, "step": 46764 }, { "epoch": 0.9918135352378529, "grad_norm": 0.35161080956459045, "learning_rate": 1.0145732297794452e-05, "loss": 0.4087, "step": 46765 }, { "epoch": 0.9918347436957858, "grad_norm": 0.5791000723838806, "learning_rate": 1.01453988378048e-05, "loss": 0.5478, "step": 46766 }, { "epoch": 0.9918559521537189, "grad_norm": 0.3493883013725281, "learning_rate": 1.0145065377653437e-05, "loss": 0.482, "step": 46767 }, { "epoch": 0.9918771606116519, "grad_norm": 0.3571503162384033, "learning_rate": 1.0144731917340735e-05, "loss": 0.4231, "step": 46768 }, { "epoch": 0.991898369069585, "grad_norm": 0.37933847308158875, "learning_rate": 1.014439845686706e-05, "loss": 0.5531, "step": 46769 }, { "epoch": 0.991919577527518, "grad_norm": 0.36701998114585876, "learning_rate": 1.0144064996232788e-05, "loss": 0.4475, "step": 46770 }, { "epoch": 0.991940785985451, "grad_norm": 0.34960728883743286, "learning_rate": 1.0143731535438288e-05, "loss": 0.4494, "step": 46771 }, { "epoch": 0.9919619944433841, "grad_norm": 0.3751654624938965, "learning_rate": 1.014339807448393e-05, "loss": 0.5075, "step": 46772 }, { "epoch": 0.991983202901317, "grad_norm": 0.3667517304420471, "learning_rate": 1.0143064613370089e-05, "loss": 0.5281, "step": 46773 }, { "epoch": 0.9920044113592501, "grad_norm": 0.33797821402549744, "learning_rate": 1.0142731152097129e-05, "loss": 0.5371, "step": 46774 }, { "epoch": 0.9920256198171831, "grad_norm": 0.3389582633972168, "learning_rate": 1.0142397690665427e-05, "loss": 0.5027, "step": 46775 }, { "epoch": 0.9920468282751161, "grad_norm": 0.3728416860103607, "learning_rate": 1.014206422907535e-05, "loss": 0.4881, "step": 46776 }, { "epoch": 0.9920680367330491, "grad_norm": 0.4214276969432831, "learning_rate": 1.014173076732727e-05, "loss": 0.537, "step": 46777 }, { "epoch": 0.9920892451909822, "grad_norm": 0.37830010056495667, "learning_rate": 1.0141397305421559e-05, "loss": 0.4656, "step": 46778 }, { "epoch": 0.9921104536489151, "grad_norm": 0.346821129322052, "learning_rate": 1.014106384335859e-05, "loss": 0.4347, "step": 46779 }, { "epoch": 0.9921316621068482, "grad_norm": 0.3394504189491272, "learning_rate": 1.0140730381138726e-05, "loss": 0.5008, "step": 46780 }, { "epoch": 0.9921528705647812, "grad_norm": 0.6059387922286987, "learning_rate": 1.0140396918762348e-05, "loss": 0.5141, "step": 46781 }, { "epoch": 0.9921740790227143, "grad_norm": 0.4048190712928772, "learning_rate": 1.0140063456229818e-05, "loss": 0.4584, "step": 46782 }, { "epoch": 0.9921952874806473, "grad_norm": 0.39175310730934143, "learning_rate": 1.0139729993541512e-05, "loss": 0.5596, "step": 46783 }, { "epoch": 0.9922164959385803, "grad_norm": 0.3495480716228485, "learning_rate": 1.0139396530697797e-05, "loss": 0.5089, "step": 46784 }, { "epoch": 0.9922377043965134, "grad_norm": 0.3447870910167694, "learning_rate": 1.013906306769905e-05, "loss": 0.4424, "step": 46785 }, { "epoch": 0.9922589128544463, "grad_norm": 0.36998894810676575, "learning_rate": 1.0138729604545639e-05, "loss": 0.5738, "step": 46786 }, { "epoch": 0.9922801213123794, "grad_norm": 0.3630779981613159, "learning_rate": 1.0138396141237931e-05, "loss": 0.4988, "step": 46787 }, { "epoch": 0.9923013297703124, "grad_norm": 0.35640957951545715, "learning_rate": 1.0138062677776304e-05, "loss": 0.5074, "step": 46788 }, { "epoch": 0.9923225382282455, "grad_norm": 0.40559324622154236, "learning_rate": 1.0137729214161119e-05, "loss": 0.512, "step": 46789 }, { "epoch": 0.9923437466861784, "grad_norm": 0.346118688583374, "learning_rate": 1.0137395750392756e-05, "loss": 0.4625, "step": 46790 }, { "epoch": 0.9923649551441115, "grad_norm": 0.392535924911499, "learning_rate": 1.0137062286471584e-05, "loss": 0.5635, "step": 46791 }, { "epoch": 0.9923861636020445, "grad_norm": 0.42453533411026, "learning_rate": 1.013672882239797e-05, "loss": 0.5214, "step": 46792 }, { "epoch": 0.9924073720599775, "grad_norm": 0.3652265965938568, "learning_rate": 1.0136395358172288e-05, "loss": 0.4335, "step": 46793 }, { "epoch": 0.9924285805179105, "grad_norm": 0.3333425521850586, "learning_rate": 1.0136061893794908e-05, "loss": 0.4274, "step": 46794 }, { "epoch": 0.9924497889758436, "grad_norm": 0.3909343183040619, "learning_rate": 1.0135728429266203e-05, "loss": 0.4509, "step": 46795 }, { "epoch": 0.9924709974337765, "grad_norm": 0.37097102403640747, "learning_rate": 1.013539496458654e-05, "loss": 0.5264, "step": 46796 }, { "epoch": 0.9924922058917096, "grad_norm": 0.32604730129241943, "learning_rate": 1.0135061499756294e-05, "loss": 0.4347, "step": 46797 }, { "epoch": 0.9925134143496427, "grad_norm": 0.38885498046875, "learning_rate": 1.0134728034775832e-05, "loss": 0.5178, "step": 46798 }, { "epoch": 0.9925346228075757, "grad_norm": 0.3673190474510193, "learning_rate": 1.013439456964553e-05, "loss": 0.5181, "step": 46799 }, { "epoch": 0.9925558312655087, "grad_norm": 0.3500903248786926, "learning_rate": 1.0134061104365748e-05, "loss": 0.4298, "step": 46800 }, { "epoch": 0.9925770397234417, "grad_norm": 0.3483071029186249, "learning_rate": 1.0133727638936872e-05, "loss": 0.5611, "step": 46801 }, { "epoch": 0.9925982481813748, "grad_norm": 0.4295450747013092, "learning_rate": 1.013339417335926e-05, "loss": 0.4628, "step": 46802 }, { "epoch": 0.9926194566393077, "grad_norm": 1.5178637504577637, "learning_rate": 1.0133060707633291e-05, "loss": 0.6151, "step": 46803 }, { "epoch": 0.9926406650972408, "grad_norm": 0.34372714161872864, "learning_rate": 1.0132727241759333e-05, "loss": 0.5058, "step": 46804 }, { "epoch": 0.9926618735551738, "grad_norm": 0.33372658491134644, "learning_rate": 1.013239377573776e-05, "loss": 0.454, "step": 46805 }, { "epoch": 0.9926830820131068, "grad_norm": 0.37192508578300476, "learning_rate": 1.0132060309568934e-05, "loss": 0.5636, "step": 46806 }, { "epoch": 0.9927042904710398, "grad_norm": 0.37915176153182983, "learning_rate": 1.0131726843253233e-05, "loss": 0.5291, "step": 46807 }, { "epoch": 0.9927254989289729, "grad_norm": 0.3893294930458069, "learning_rate": 1.0131393376791028e-05, "loss": 0.6105, "step": 46808 }, { "epoch": 0.9927467073869058, "grad_norm": 0.3710313141345978, "learning_rate": 1.0131059910182685e-05, "loss": 0.3884, "step": 46809 }, { "epoch": 0.9927679158448389, "grad_norm": 0.3339478075504303, "learning_rate": 1.0130726443428584e-05, "loss": 0.4211, "step": 46810 }, { "epoch": 0.992789124302772, "grad_norm": 0.36299312114715576, "learning_rate": 1.013039297652909e-05, "loss": 0.4844, "step": 46811 }, { "epoch": 0.992810332760705, "grad_norm": 0.5494765639305115, "learning_rate": 1.0130059509484569e-05, "loss": 0.492, "step": 46812 }, { "epoch": 0.992831541218638, "grad_norm": 0.3902415633201599, "learning_rate": 1.0129726042295401e-05, "loss": 0.5, "step": 46813 }, { "epoch": 0.992852749676571, "grad_norm": 0.33541399240493774, "learning_rate": 1.012939257496195e-05, "loss": 0.4711, "step": 46814 }, { "epoch": 0.9928739581345041, "grad_norm": 0.3711632490158081, "learning_rate": 1.0129059107484589e-05, "loss": 0.4854, "step": 46815 }, { "epoch": 0.992895166592437, "grad_norm": 0.3883652687072754, "learning_rate": 1.0128725639863693e-05, "loss": 0.4423, "step": 46816 }, { "epoch": 0.9929163750503701, "grad_norm": 0.36160582304000854, "learning_rate": 1.0128392172099628e-05, "loss": 0.4922, "step": 46817 }, { "epoch": 0.9929375835083031, "grad_norm": 0.40763354301452637, "learning_rate": 1.0128058704192768e-05, "loss": 0.5341, "step": 46818 }, { "epoch": 0.9929587919662362, "grad_norm": 0.4497128427028656, "learning_rate": 1.012772523614348e-05, "loss": 0.5033, "step": 46819 }, { "epoch": 0.9929800004241691, "grad_norm": 0.3844597041606903, "learning_rate": 1.0127391767952138e-05, "loss": 0.4917, "step": 46820 }, { "epoch": 0.9930012088821022, "grad_norm": 0.3485877513885498, "learning_rate": 1.012705829961911e-05, "loss": 0.4057, "step": 46821 }, { "epoch": 0.9930224173400352, "grad_norm": 0.41083788871765137, "learning_rate": 1.0126724831144772e-05, "loss": 0.42, "step": 46822 }, { "epoch": 0.9930436257979682, "grad_norm": 0.34056028723716736, "learning_rate": 1.0126391362529489e-05, "loss": 0.4343, "step": 46823 }, { "epoch": 0.9930648342559013, "grad_norm": 0.3913327157497406, "learning_rate": 1.0126057893773635e-05, "loss": 0.559, "step": 46824 }, { "epoch": 0.9930860427138343, "grad_norm": 0.3108149766921997, "learning_rate": 1.0125724424877584e-05, "loss": 0.3954, "step": 46825 }, { "epoch": 0.9931072511717673, "grad_norm": 0.35754820704460144, "learning_rate": 1.01253909558417e-05, "loss": 0.4683, "step": 46826 }, { "epoch": 0.9931284596297003, "grad_norm": 0.3781992197036743, "learning_rate": 1.0125057486666356e-05, "loss": 0.4701, "step": 46827 }, { "epoch": 0.9931496680876334, "grad_norm": 0.37428680062294006, "learning_rate": 1.0124724017351928e-05, "loss": 0.4587, "step": 46828 }, { "epoch": 0.9931708765455664, "grad_norm": 0.33171436190605164, "learning_rate": 1.012439054789878e-05, "loss": 0.4397, "step": 46829 }, { "epoch": 0.9931920850034994, "grad_norm": 0.3306691646575928, "learning_rate": 1.0124057078307287e-05, "loss": 0.4895, "step": 46830 }, { "epoch": 0.9932132934614324, "grad_norm": 0.3490298390388489, "learning_rate": 1.0123723608577818e-05, "loss": 0.4929, "step": 46831 }, { "epoch": 0.9932345019193655, "grad_norm": 0.3596283197402954, "learning_rate": 1.0123390138710747e-05, "loss": 0.5568, "step": 46832 }, { "epoch": 0.9932557103772984, "grad_norm": 0.46018528938293457, "learning_rate": 1.0123056668706443e-05, "loss": 0.5147, "step": 46833 }, { "epoch": 0.9932769188352315, "grad_norm": 0.4139348566532135, "learning_rate": 1.0122723198565272e-05, "loss": 0.4737, "step": 46834 }, { "epoch": 0.9932981272931645, "grad_norm": 0.36183080077171326, "learning_rate": 1.0122389728287612e-05, "loss": 0.5202, "step": 46835 }, { "epoch": 0.9933193357510975, "grad_norm": 0.3320234417915344, "learning_rate": 1.0122056257873831e-05, "loss": 0.4281, "step": 46836 }, { "epoch": 0.9933405442090305, "grad_norm": 0.36561885476112366, "learning_rate": 1.0121722787324299e-05, "loss": 0.4963, "step": 46837 }, { "epoch": 0.9933617526669636, "grad_norm": 0.3527902364730835, "learning_rate": 1.0121389316639392e-05, "loss": 0.5081, "step": 46838 }, { "epoch": 0.9933829611248967, "grad_norm": 0.36331936717033386, "learning_rate": 1.0121055845819475e-05, "loss": 0.5075, "step": 46839 }, { "epoch": 0.9934041695828296, "grad_norm": 0.41294458508491516, "learning_rate": 1.0120722374864917e-05, "loss": 0.4968, "step": 46840 }, { "epoch": 0.9934253780407627, "grad_norm": 0.35356056690216064, "learning_rate": 1.0120388903776095e-05, "loss": 0.4861, "step": 46841 }, { "epoch": 0.9934465864986957, "grad_norm": 0.37625205516815186, "learning_rate": 1.012005543255338e-05, "loss": 0.5198, "step": 46842 }, { "epoch": 0.9934677949566287, "grad_norm": 0.4079091548919678, "learning_rate": 1.0119721961197137e-05, "loss": 0.5063, "step": 46843 }, { "epoch": 0.9934890034145617, "grad_norm": 0.3524484634399414, "learning_rate": 1.0119388489707742e-05, "loss": 0.5108, "step": 46844 }, { "epoch": 0.9935102118724948, "grad_norm": 0.3762242794036865, "learning_rate": 1.0119055018085564e-05, "loss": 0.4959, "step": 46845 }, { "epoch": 0.9935314203304277, "grad_norm": 0.5341875553131104, "learning_rate": 1.0118721546330973e-05, "loss": 0.5395, "step": 46846 }, { "epoch": 0.9935526287883608, "grad_norm": 0.32766488194465637, "learning_rate": 1.0118388074444343e-05, "loss": 0.3945, "step": 46847 }, { "epoch": 0.9935738372462938, "grad_norm": 0.4076003134250641, "learning_rate": 1.0118054602426039e-05, "loss": 0.4634, "step": 46848 }, { "epoch": 0.9935950457042269, "grad_norm": 0.36351871490478516, "learning_rate": 1.0117721130276441e-05, "loss": 0.5046, "step": 46849 }, { "epoch": 0.9936162541621598, "grad_norm": 0.45678576827049255, "learning_rate": 1.0117387657995912e-05, "loss": 0.4148, "step": 46850 }, { "epoch": 0.9936374626200929, "grad_norm": 0.3598592281341553, "learning_rate": 1.0117054185584825e-05, "loss": 0.4783, "step": 46851 }, { "epoch": 0.993658671078026, "grad_norm": 0.3583469092845917, "learning_rate": 1.0116720713043554e-05, "loss": 0.5228, "step": 46852 }, { "epoch": 0.9936798795359589, "grad_norm": 0.33406272530555725, "learning_rate": 1.0116387240372463e-05, "loss": 0.4374, "step": 46853 }, { "epoch": 0.993701087993892, "grad_norm": 0.33294418454170227, "learning_rate": 1.0116053767571927e-05, "loss": 0.4521, "step": 46854 }, { "epoch": 0.993722296451825, "grad_norm": 0.3947029411792755, "learning_rate": 1.011572029464232e-05, "loss": 0.4827, "step": 46855 }, { "epoch": 0.993743504909758, "grad_norm": 0.3824346661567688, "learning_rate": 1.011538682158401e-05, "loss": 0.4586, "step": 46856 }, { "epoch": 0.993764713367691, "grad_norm": 0.38948264718055725, "learning_rate": 1.0115053348397366e-05, "loss": 0.4734, "step": 46857 }, { "epoch": 0.9937859218256241, "grad_norm": 0.34997183084487915, "learning_rate": 1.0114719875082762e-05, "loss": 0.3816, "step": 46858 }, { "epoch": 0.993807130283557, "grad_norm": 0.39497697353363037, "learning_rate": 1.0114386401640566e-05, "loss": 0.4588, "step": 46859 }, { "epoch": 0.9938283387414901, "grad_norm": 0.4073961079120636, "learning_rate": 1.011405292807115e-05, "loss": 0.4684, "step": 46860 }, { "epoch": 0.9938495471994231, "grad_norm": 0.3501133918762207, "learning_rate": 1.0113719454374888e-05, "loss": 0.4817, "step": 46861 }, { "epoch": 0.9938707556573562, "grad_norm": 0.3757602870464325, "learning_rate": 1.0113385980552146e-05, "loss": 0.4584, "step": 46862 }, { "epoch": 0.9938919641152891, "grad_norm": 0.3940128684043884, "learning_rate": 1.0113052506603298e-05, "loss": 0.5011, "step": 46863 }, { "epoch": 0.9939131725732222, "grad_norm": 0.3866882622241974, "learning_rate": 1.0112719032528716e-05, "loss": 0.4372, "step": 46864 }, { "epoch": 0.9939343810311553, "grad_norm": 0.4038703143596649, "learning_rate": 1.0112385558328766e-05, "loss": 0.5442, "step": 46865 }, { "epoch": 0.9939555894890882, "grad_norm": 0.40141621232032776, "learning_rate": 1.0112052084003821e-05, "loss": 0.4711, "step": 46866 }, { "epoch": 0.9939767979470213, "grad_norm": 0.36425861716270447, "learning_rate": 1.0111718609554254e-05, "loss": 0.5, "step": 46867 }, { "epoch": 0.9939980064049543, "grad_norm": 0.42357292771339417, "learning_rate": 1.0111385134980433e-05, "loss": 0.5466, "step": 46868 }, { "epoch": 0.9940192148628874, "grad_norm": 0.421233594417572, "learning_rate": 1.0111051660282736e-05, "loss": 0.502, "step": 46869 }, { "epoch": 0.9940404233208203, "grad_norm": 0.35979366302490234, "learning_rate": 1.0110718185461525e-05, "loss": 0.4457, "step": 46870 }, { "epoch": 0.9940616317787534, "grad_norm": 0.3668549954891205, "learning_rate": 1.011038471051717e-05, "loss": 0.4743, "step": 46871 }, { "epoch": 0.9940828402366864, "grad_norm": 0.3641377389431, "learning_rate": 1.0110051235450051e-05, "loss": 0.4883, "step": 46872 }, { "epoch": 0.9941040486946194, "grad_norm": 0.4988512694835663, "learning_rate": 1.0109717760260533e-05, "loss": 0.3802, "step": 46873 }, { "epoch": 0.9941252571525524, "grad_norm": 2.419269561767578, "learning_rate": 1.0109384284948986e-05, "loss": 0.4778, "step": 46874 }, { "epoch": 0.9941464656104855, "grad_norm": 0.40692514181137085, "learning_rate": 1.0109050809515786e-05, "loss": 0.5012, "step": 46875 }, { "epoch": 0.9941676740684184, "grad_norm": 0.49766451120376587, "learning_rate": 1.0108717333961298e-05, "loss": 0.4457, "step": 46876 }, { "epoch": 0.9941888825263515, "grad_norm": 0.3782385587692261, "learning_rate": 1.0108383858285897e-05, "loss": 0.4751, "step": 46877 }, { "epoch": 0.9942100909842845, "grad_norm": 0.3943488597869873, "learning_rate": 1.0108050382489954e-05, "loss": 0.5376, "step": 46878 }, { "epoch": 0.9942312994422176, "grad_norm": 0.3717726469039917, "learning_rate": 1.0107716906573834e-05, "loss": 0.4928, "step": 46879 }, { "epoch": 0.9942525079001506, "grad_norm": 0.45493441820144653, "learning_rate": 1.0107383430537914e-05, "loss": 0.4931, "step": 46880 }, { "epoch": 0.9942737163580836, "grad_norm": 0.3683464825153351, "learning_rate": 1.0107049954382566e-05, "loss": 0.4849, "step": 46881 }, { "epoch": 0.9942949248160167, "grad_norm": 0.37202394008636475, "learning_rate": 1.0106716478108153e-05, "loss": 0.4834, "step": 46882 }, { "epoch": 0.9943161332739496, "grad_norm": 0.30851617455482483, "learning_rate": 1.0106383001715054e-05, "loss": 0.4854, "step": 46883 }, { "epoch": 0.9943373417318827, "grad_norm": 0.3865194022655487, "learning_rate": 1.0106049525203638e-05, "loss": 0.4789, "step": 46884 }, { "epoch": 0.9943585501898157, "grad_norm": 0.36285868287086487, "learning_rate": 1.010571604857427e-05, "loss": 0.4474, "step": 46885 }, { "epoch": 0.9943797586477487, "grad_norm": 0.4093337953090668, "learning_rate": 1.0105382571827329e-05, "loss": 0.5314, "step": 46886 }, { "epoch": 0.9944009671056817, "grad_norm": 0.36207717657089233, "learning_rate": 1.0105049094963183e-05, "loss": 0.5819, "step": 46887 }, { "epoch": 0.9944221755636148, "grad_norm": 0.38483887910842896, "learning_rate": 1.0104715617982198e-05, "loss": 0.4755, "step": 46888 }, { "epoch": 0.9944433840215477, "grad_norm": 0.3892086148262024, "learning_rate": 1.0104382140884754e-05, "loss": 0.5388, "step": 46889 }, { "epoch": 0.9944645924794808, "grad_norm": 0.3965894877910614, "learning_rate": 1.0104048663671215e-05, "loss": 0.4369, "step": 46890 }, { "epoch": 0.9944858009374138, "grad_norm": 0.4175271987915039, "learning_rate": 1.0103715186341953e-05, "loss": 0.527, "step": 46891 }, { "epoch": 0.9945070093953469, "grad_norm": 0.4449436068534851, "learning_rate": 1.0103381708897342e-05, "loss": 0.5114, "step": 46892 }, { "epoch": 0.9945282178532799, "grad_norm": 0.3853906989097595, "learning_rate": 1.0103048231337752e-05, "loss": 0.4749, "step": 46893 }, { "epoch": 0.9945494263112129, "grad_norm": 0.4333952069282532, "learning_rate": 1.0102714753663549e-05, "loss": 0.4726, "step": 46894 }, { "epoch": 0.994570634769146, "grad_norm": 0.3430466949939728, "learning_rate": 1.010238127587511e-05, "loss": 0.4103, "step": 46895 }, { "epoch": 0.9945918432270789, "grad_norm": 0.44803479313850403, "learning_rate": 1.01020477979728e-05, "loss": 0.4484, "step": 46896 }, { "epoch": 0.994613051685012, "grad_norm": 0.386465460062027, "learning_rate": 1.0101714319957e-05, "loss": 0.4702, "step": 46897 }, { "epoch": 0.994634260142945, "grad_norm": 0.45643550157546997, "learning_rate": 1.0101380841828069e-05, "loss": 0.521, "step": 46898 }, { "epoch": 0.9946554686008781, "grad_norm": 0.3577996492385864, "learning_rate": 1.0101047363586381e-05, "loss": 0.513, "step": 46899 }, { "epoch": 0.994676677058811, "grad_norm": 0.3794175386428833, "learning_rate": 1.0100713885232314e-05, "loss": 0.4679, "step": 46900 }, { "epoch": 0.9946978855167441, "grad_norm": 0.3293837606906891, "learning_rate": 1.0100380406766232e-05, "loss": 0.4182, "step": 46901 }, { "epoch": 0.9947190939746771, "grad_norm": 0.3527229428291321, "learning_rate": 1.0100046928188508e-05, "loss": 0.4493, "step": 46902 }, { "epoch": 0.9947403024326101, "grad_norm": 0.3747078776359558, "learning_rate": 1.0099713449499512e-05, "loss": 0.4968, "step": 46903 }, { "epoch": 0.9947615108905431, "grad_norm": 0.36028262972831726, "learning_rate": 1.0099379970699616e-05, "loss": 0.4454, "step": 46904 }, { "epoch": 0.9947827193484762, "grad_norm": 0.38297203183174133, "learning_rate": 1.0099046491789191e-05, "loss": 0.5002, "step": 46905 }, { "epoch": 0.9948039278064092, "grad_norm": 0.3740450441837311, "learning_rate": 1.0098713012768606e-05, "loss": 0.4095, "step": 46906 }, { "epoch": 0.9948251362643422, "grad_norm": 0.3510846495628357, "learning_rate": 1.0098379533638237e-05, "loss": 0.4271, "step": 46907 }, { "epoch": 0.9948463447222753, "grad_norm": 0.4486851394176483, "learning_rate": 1.0098046054398448e-05, "loss": 0.5128, "step": 46908 }, { "epoch": 0.9948675531802083, "grad_norm": 0.44454753398895264, "learning_rate": 1.0097712575049612e-05, "loss": 0.4814, "step": 46909 }, { "epoch": 0.9948887616381413, "grad_norm": 0.4120039641857147, "learning_rate": 1.0097379095592102e-05, "loss": 0.478, "step": 46910 }, { "epoch": 0.9949099700960743, "grad_norm": 0.3625938892364502, "learning_rate": 1.0097045616026286e-05, "loss": 0.494, "step": 46911 }, { "epoch": 0.9949311785540074, "grad_norm": 0.33785438537597656, "learning_rate": 1.009671213635254e-05, "loss": 0.4118, "step": 46912 }, { "epoch": 0.9949523870119403, "grad_norm": 0.48621562123298645, "learning_rate": 1.0096378656571231e-05, "loss": 0.5082, "step": 46913 }, { "epoch": 0.9949735954698734, "grad_norm": 0.35725903511047363, "learning_rate": 1.0096045176682729e-05, "loss": 0.4796, "step": 46914 }, { "epoch": 0.9949948039278064, "grad_norm": 0.41669395565986633, "learning_rate": 1.0095711696687409e-05, "loss": 0.4469, "step": 46915 }, { "epoch": 0.9950160123857394, "grad_norm": 0.3547818958759308, "learning_rate": 1.0095378216585637e-05, "loss": 0.474, "step": 46916 }, { "epoch": 0.9950372208436724, "grad_norm": 0.37849050760269165, "learning_rate": 1.0095044736377786e-05, "loss": 0.5058, "step": 46917 }, { "epoch": 0.9950584293016055, "grad_norm": 0.3649267554283142, "learning_rate": 1.0094711256064227e-05, "loss": 0.4164, "step": 46918 }, { "epoch": 0.9950796377595384, "grad_norm": 0.4402881860733032, "learning_rate": 1.0094377775645332e-05, "loss": 0.5348, "step": 46919 }, { "epoch": 0.9951008462174715, "grad_norm": 0.39870592951774597, "learning_rate": 1.0094044295121472e-05, "loss": 0.4954, "step": 46920 }, { "epoch": 0.9951220546754046, "grad_norm": 0.32773205637931824, "learning_rate": 1.0093710814493016e-05, "loss": 0.4346, "step": 46921 }, { "epoch": 0.9951432631333376, "grad_norm": 0.35373151302337646, "learning_rate": 1.0093377333760332e-05, "loss": 0.5059, "step": 46922 }, { "epoch": 0.9951644715912706, "grad_norm": 0.33044397830963135, "learning_rate": 1.0093043852923801e-05, "loss": 0.4244, "step": 46923 }, { "epoch": 0.9951856800492036, "grad_norm": 0.3841073513031006, "learning_rate": 1.0092710371983783e-05, "loss": 0.5094, "step": 46924 }, { "epoch": 0.9952068885071367, "grad_norm": 0.3658670485019684, "learning_rate": 1.0092376890940651e-05, "loss": 0.4734, "step": 46925 }, { "epoch": 0.9952280969650696, "grad_norm": 0.36719733476638794, "learning_rate": 1.0092043409794782e-05, "loss": 0.5128, "step": 46926 }, { "epoch": 0.9952493054230027, "grad_norm": 0.32634449005126953, "learning_rate": 1.0091709928546543e-05, "loss": 0.4854, "step": 46927 }, { "epoch": 0.9952705138809357, "grad_norm": 0.34137579798698425, "learning_rate": 1.0091376447196305e-05, "loss": 0.5234, "step": 46928 }, { "epoch": 0.9952917223388688, "grad_norm": 0.3285246193408966, "learning_rate": 1.0091042965744439e-05, "loss": 0.4399, "step": 46929 }, { "epoch": 0.9953129307968017, "grad_norm": 0.39494144916534424, "learning_rate": 1.0090709484191315e-05, "loss": 0.5553, "step": 46930 }, { "epoch": 0.9953341392547348, "grad_norm": 0.40125060081481934, "learning_rate": 1.0090376002537306e-05, "loss": 0.4881, "step": 46931 }, { "epoch": 0.9953553477126678, "grad_norm": 0.3552858531475067, "learning_rate": 1.0090042520782781e-05, "loss": 0.3802, "step": 46932 }, { "epoch": 0.9953765561706008, "grad_norm": 0.4260834753513336, "learning_rate": 1.008970903892811e-05, "loss": 0.4574, "step": 46933 }, { "epoch": 0.9953977646285339, "grad_norm": 0.4147837162017822, "learning_rate": 1.0089375556973669e-05, "loss": 0.5155, "step": 46934 }, { "epoch": 0.9954189730864669, "grad_norm": 0.37651970982551575, "learning_rate": 1.0089042074919821e-05, "loss": 0.4955, "step": 46935 }, { "epoch": 0.9954401815444, "grad_norm": 0.39744889736175537, "learning_rate": 1.008870859276694e-05, "loss": 0.4556, "step": 46936 }, { "epoch": 0.9954613900023329, "grad_norm": 0.38287270069122314, "learning_rate": 1.0088375110515402e-05, "loss": 0.513, "step": 46937 }, { "epoch": 0.995482598460266, "grad_norm": 0.3859223425388336, "learning_rate": 1.0088041628165574e-05, "loss": 0.4878, "step": 46938 }, { "epoch": 0.995503806918199, "grad_norm": 0.34837865829467773, "learning_rate": 1.0087708145717828e-05, "loss": 0.4668, "step": 46939 }, { "epoch": 0.995525015376132, "grad_norm": 0.3415467143058777, "learning_rate": 1.0087374663172532e-05, "loss": 0.5166, "step": 46940 }, { "epoch": 0.995546223834065, "grad_norm": 0.3820561170578003, "learning_rate": 1.0087041180530057e-05, "loss": 0.4548, "step": 46941 }, { "epoch": 0.9955674322919981, "grad_norm": 0.352364718914032, "learning_rate": 1.0086707697790777e-05, "loss": 0.4511, "step": 46942 }, { "epoch": 0.995588640749931, "grad_norm": 0.320260614156723, "learning_rate": 1.0086374214955062e-05, "loss": 0.4729, "step": 46943 }, { "epoch": 0.9956098492078641, "grad_norm": 0.45086273550987244, "learning_rate": 1.0086040732023279e-05, "loss": 0.5299, "step": 46944 }, { "epoch": 0.9956310576657971, "grad_norm": 0.3821215033531189, "learning_rate": 1.0085707248995806e-05, "loss": 0.4769, "step": 46945 }, { "epoch": 0.9956522661237301, "grad_norm": 0.388857901096344, "learning_rate": 1.008537376587301e-05, "loss": 0.5209, "step": 46946 }, { "epoch": 0.9956734745816632, "grad_norm": 0.3417122960090637, "learning_rate": 1.008504028265526e-05, "loss": 0.437, "step": 46947 }, { "epoch": 0.9956946830395962, "grad_norm": 0.3427072763442993, "learning_rate": 1.0084706799342932e-05, "loss": 0.4226, "step": 46948 }, { "epoch": 0.9957158914975293, "grad_norm": 0.37187454104423523, "learning_rate": 1.0084373315936392e-05, "loss": 0.3849, "step": 46949 }, { "epoch": 0.9957370999554622, "grad_norm": 0.36519038677215576, "learning_rate": 1.008403983243601e-05, "loss": 0.4823, "step": 46950 }, { "epoch": 0.9957583084133953, "grad_norm": 0.4044424295425415, "learning_rate": 1.0083706348842164e-05, "loss": 0.5102, "step": 46951 }, { "epoch": 0.9957795168713283, "grad_norm": 0.36428603529930115, "learning_rate": 1.0083372865155218e-05, "loss": 0.5166, "step": 46952 }, { "epoch": 0.9958007253292613, "grad_norm": 0.36975201964378357, "learning_rate": 1.0083039381375546e-05, "loss": 0.4792, "step": 46953 }, { "epoch": 0.9958219337871943, "grad_norm": 0.3297519385814667, "learning_rate": 1.008270589750352e-05, "loss": 0.3988, "step": 46954 }, { "epoch": 0.9958431422451274, "grad_norm": 0.3871605694293976, "learning_rate": 1.0082372413539507e-05, "loss": 0.4572, "step": 46955 }, { "epoch": 0.9958643507030603, "grad_norm": 0.40249860286712646, "learning_rate": 1.008203892948388e-05, "loss": 0.5583, "step": 46956 }, { "epoch": 0.9958855591609934, "grad_norm": 0.3553207516670227, "learning_rate": 1.008170544533701e-05, "loss": 0.4865, "step": 46957 }, { "epoch": 0.9959067676189264, "grad_norm": 0.3777478337287903, "learning_rate": 1.0081371961099268e-05, "loss": 0.4843, "step": 46958 }, { "epoch": 0.9959279760768595, "grad_norm": 0.36791864037513733, "learning_rate": 1.0081038476771029e-05, "loss": 0.52, "step": 46959 }, { "epoch": 0.9959491845347924, "grad_norm": 0.37511271238327026, "learning_rate": 1.0080704992352656e-05, "loss": 0.4888, "step": 46960 }, { "epoch": 0.9959703929927255, "grad_norm": 0.33836039900779724, "learning_rate": 1.0080371507844523e-05, "loss": 0.4564, "step": 46961 }, { "epoch": 0.9959916014506586, "grad_norm": 0.38362234830856323, "learning_rate": 1.0080038023247001e-05, "loss": 0.4891, "step": 46962 }, { "epoch": 0.9960128099085915, "grad_norm": 0.4078079164028168, "learning_rate": 1.0079704538560463e-05, "loss": 0.5741, "step": 46963 }, { "epoch": 0.9960340183665246, "grad_norm": 0.4953366219997406, "learning_rate": 1.0079371053785279e-05, "loss": 0.4914, "step": 46964 }, { "epoch": 0.9960552268244576, "grad_norm": 0.3799251616001129, "learning_rate": 1.007903756892182e-05, "loss": 0.5289, "step": 46965 }, { "epoch": 0.9960764352823906, "grad_norm": 0.3801892101764679, "learning_rate": 1.0078704083970454e-05, "loss": 0.4672, "step": 46966 }, { "epoch": 0.9960976437403236, "grad_norm": 0.3899405896663666, "learning_rate": 1.0078370598931553e-05, "loss": 0.4514, "step": 46967 }, { "epoch": 0.9961188521982567, "grad_norm": 0.39165881276130676, "learning_rate": 1.007803711380549e-05, "loss": 0.4272, "step": 46968 }, { "epoch": 0.9961400606561897, "grad_norm": 0.35550037026405334, "learning_rate": 1.0077703628592637e-05, "loss": 0.4859, "step": 46969 }, { "epoch": 0.9961612691141227, "grad_norm": 0.37458914518356323, "learning_rate": 1.0077370143293359e-05, "loss": 0.4621, "step": 46970 }, { "epoch": 0.9961824775720557, "grad_norm": 0.4915088713169098, "learning_rate": 1.0077036657908033e-05, "loss": 0.4589, "step": 46971 }, { "epoch": 0.9962036860299888, "grad_norm": 0.3959026634693146, "learning_rate": 1.0076703172437025e-05, "loss": 0.5194, "step": 46972 }, { "epoch": 0.9962248944879217, "grad_norm": 0.3617609441280365, "learning_rate": 1.0076369686880712e-05, "loss": 0.4536, "step": 46973 }, { "epoch": 0.9962461029458548, "grad_norm": 0.3423686623573303, "learning_rate": 1.0076036201239461e-05, "loss": 0.4515, "step": 46974 }, { "epoch": 0.9962673114037879, "grad_norm": 0.3728427290916443, "learning_rate": 1.007570271551364e-05, "loss": 0.4679, "step": 46975 }, { "epoch": 0.9962885198617208, "grad_norm": 0.3639150857925415, "learning_rate": 1.0075369229703625e-05, "loss": 0.5089, "step": 46976 }, { "epoch": 0.9963097283196539, "grad_norm": 0.39256933331489563, "learning_rate": 1.0075035743809785e-05, "loss": 0.465, "step": 46977 }, { "epoch": 0.9963309367775869, "grad_norm": 0.3855968713760376, "learning_rate": 1.007470225783249e-05, "loss": 0.4918, "step": 46978 }, { "epoch": 0.99635214523552, "grad_norm": 0.33942797780036926, "learning_rate": 1.0074368771772114e-05, "loss": 0.417, "step": 46979 }, { "epoch": 0.9963733536934529, "grad_norm": 1.2074319124221802, "learning_rate": 1.0074035285629025e-05, "loss": 0.5204, "step": 46980 }, { "epoch": 0.996394562151386, "grad_norm": 0.3573528230190277, "learning_rate": 1.0073701799403591e-05, "loss": 0.4525, "step": 46981 }, { "epoch": 0.996415770609319, "grad_norm": 0.34422269463539124, "learning_rate": 1.007336831309619e-05, "loss": 0.453, "step": 46982 }, { "epoch": 0.996436979067252, "grad_norm": 0.374090313911438, "learning_rate": 1.007303482670719e-05, "loss": 0.4661, "step": 46983 }, { "epoch": 0.996458187525185, "grad_norm": 0.36000698804855347, "learning_rate": 1.0072701340236959e-05, "loss": 0.4336, "step": 46984 }, { "epoch": 0.9964793959831181, "grad_norm": 0.42296984791755676, "learning_rate": 1.0072367853685874e-05, "loss": 0.4794, "step": 46985 }, { "epoch": 0.996500604441051, "grad_norm": 0.43896374106407166, "learning_rate": 1.0072034367054298e-05, "loss": 0.4779, "step": 46986 }, { "epoch": 0.9965218128989841, "grad_norm": 0.35758596658706665, "learning_rate": 1.0071700880342608e-05, "loss": 0.4511, "step": 46987 }, { "epoch": 0.9965430213569172, "grad_norm": 0.3662113845348358, "learning_rate": 1.007136739355117e-05, "loss": 0.4676, "step": 46988 }, { "epoch": 0.9965642298148502, "grad_norm": 0.39491933584213257, "learning_rate": 1.0071033906680361e-05, "loss": 0.5216, "step": 46989 }, { "epoch": 0.9965854382727832, "grad_norm": 0.3622260093688965, "learning_rate": 1.0070700419730548e-05, "loss": 0.5083, "step": 46990 }, { "epoch": 0.9966066467307162, "grad_norm": 0.5467561483383179, "learning_rate": 1.0070366932702103e-05, "loss": 0.4783, "step": 46991 }, { "epoch": 0.9966278551886493, "grad_norm": 0.44700583815574646, "learning_rate": 1.0070033445595396e-05, "loss": 0.4946, "step": 46992 }, { "epoch": 0.9966490636465822, "grad_norm": 0.35517391562461853, "learning_rate": 1.0069699958410799e-05, "loss": 0.5476, "step": 46993 }, { "epoch": 0.9966702721045153, "grad_norm": 0.3580245077610016, "learning_rate": 1.0069366471148681e-05, "loss": 0.453, "step": 46994 }, { "epoch": 0.9966914805624483, "grad_norm": 0.4881007671356201, "learning_rate": 1.0069032983809412e-05, "loss": 0.4157, "step": 46995 }, { "epoch": 0.9967126890203813, "grad_norm": 0.3505218029022217, "learning_rate": 1.006869949639337e-05, "loss": 0.4505, "step": 46996 }, { "epoch": 0.9967338974783143, "grad_norm": 0.3452695608139038, "learning_rate": 1.006836600890092e-05, "loss": 0.5085, "step": 46997 }, { "epoch": 0.9967551059362474, "grad_norm": 0.3371152877807617, "learning_rate": 1.0068032521332431e-05, "loss": 0.4183, "step": 46998 }, { "epoch": 0.9967763143941804, "grad_norm": 0.35455310344696045, "learning_rate": 1.006769903368828e-05, "loss": 0.4689, "step": 46999 }, { "epoch": 0.9967975228521134, "grad_norm": 0.3702090382575989, "learning_rate": 1.0067365545968832e-05, "loss": 0.4866, "step": 47000 }, { "epoch": 0.9968187313100465, "grad_norm": 0.5007563233375549, "learning_rate": 1.0067032058174461e-05, "loss": 0.5137, "step": 47001 }, { "epoch": 0.9968399397679795, "grad_norm": 0.38491493463516235, "learning_rate": 1.006669857030554e-05, "loss": 0.4519, "step": 47002 }, { "epoch": 0.9968611482259125, "grad_norm": 0.4034494459629059, "learning_rate": 1.0066365082362438e-05, "loss": 0.5131, "step": 47003 }, { "epoch": 0.9968823566838455, "grad_norm": 0.4362962245941162, "learning_rate": 1.0066031594345522e-05, "loss": 0.4952, "step": 47004 }, { "epoch": 0.9969035651417786, "grad_norm": 0.4070970118045807, "learning_rate": 1.006569810625517e-05, "loss": 0.472, "step": 47005 }, { "epoch": 0.9969247735997115, "grad_norm": 0.34752872586250305, "learning_rate": 1.0065364618091744e-05, "loss": 0.5329, "step": 47006 }, { "epoch": 0.9969459820576446, "grad_norm": 0.3801282048225403, "learning_rate": 1.0065031129855622e-05, "loss": 0.4505, "step": 47007 }, { "epoch": 0.9969671905155776, "grad_norm": 0.39855828881263733, "learning_rate": 1.0064697641547175e-05, "loss": 0.4955, "step": 47008 }, { "epoch": 0.9969883989735107, "grad_norm": 0.3916283845901489, "learning_rate": 1.006436415316677e-05, "loss": 0.4679, "step": 47009 }, { "epoch": 0.9970096074314436, "grad_norm": 0.40021124482154846, "learning_rate": 1.0064030664714781e-05, "loss": 0.5472, "step": 47010 }, { "epoch": 0.9970308158893767, "grad_norm": 0.37427201867103577, "learning_rate": 1.0063697176191577e-05, "loss": 0.4858, "step": 47011 }, { "epoch": 0.9970520243473097, "grad_norm": 0.4040047526359558, "learning_rate": 1.006336368759753e-05, "loss": 0.5337, "step": 47012 }, { "epoch": 0.9970732328052427, "grad_norm": 0.3855384290218353, "learning_rate": 1.0063030198933008e-05, "loss": 0.5019, "step": 47013 }, { "epoch": 0.9970944412631757, "grad_norm": 0.37353456020355225, "learning_rate": 1.0062696710198388e-05, "loss": 0.5603, "step": 47014 }, { "epoch": 0.9971156497211088, "grad_norm": 0.5344344973564148, "learning_rate": 1.0062363221394034e-05, "loss": 0.455, "step": 47015 }, { "epoch": 0.9971368581790419, "grad_norm": 0.32591477036476135, "learning_rate": 1.0062029732520324e-05, "loss": 0.426, "step": 47016 }, { "epoch": 0.9971580666369748, "grad_norm": 0.43321576714515686, "learning_rate": 1.0061696243577624e-05, "loss": 0.4504, "step": 47017 }, { "epoch": 0.9971792750949079, "grad_norm": 0.41241025924682617, "learning_rate": 1.0061362754566303e-05, "loss": 0.4325, "step": 47018 }, { "epoch": 0.9972004835528409, "grad_norm": 0.34514379501342773, "learning_rate": 1.0061029265486738e-05, "loss": 0.4704, "step": 47019 }, { "epoch": 0.9972216920107739, "grad_norm": 0.3426448702812195, "learning_rate": 1.0060695776339296e-05, "loss": 0.5275, "step": 47020 }, { "epoch": 0.9972429004687069, "grad_norm": 0.3840571939945221, "learning_rate": 1.0060362287124346e-05, "loss": 0.5752, "step": 47021 }, { "epoch": 0.99726410892664, "grad_norm": 0.3933526277542114, "learning_rate": 1.0060028797842265e-05, "loss": 0.4998, "step": 47022 }, { "epoch": 0.9972853173845729, "grad_norm": 0.34766754508018494, "learning_rate": 1.0059695308493419e-05, "loss": 0.4897, "step": 47023 }, { "epoch": 0.997306525842506, "grad_norm": 0.36900538206100464, "learning_rate": 1.0059361819078179e-05, "loss": 0.5527, "step": 47024 }, { "epoch": 0.997327734300439, "grad_norm": 0.3419787585735321, "learning_rate": 1.005902832959692e-05, "loss": 0.4301, "step": 47025 }, { "epoch": 0.997348942758372, "grad_norm": 0.3756166696548462, "learning_rate": 1.0058694840050007e-05, "loss": 0.5545, "step": 47026 }, { "epoch": 0.997370151216305, "grad_norm": 0.3607144057750702, "learning_rate": 1.0058361350437817e-05, "loss": 0.4879, "step": 47027 }, { "epoch": 0.9973913596742381, "grad_norm": 0.35828542709350586, "learning_rate": 1.005802786076072e-05, "loss": 0.4587, "step": 47028 }, { "epoch": 0.9974125681321712, "grad_norm": 0.34664085507392883, "learning_rate": 1.005769437101908e-05, "loss": 0.5315, "step": 47029 }, { "epoch": 0.9974337765901041, "grad_norm": 0.39928343892097473, "learning_rate": 1.0057360881213276e-05, "loss": 0.5459, "step": 47030 }, { "epoch": 0.9974549850480372, "grad_norm": 0.33837220072746277, "learning_rate": 1.0057027391343676e-05, "loss": 0.4389, "step": 47031 }, { "epoch": 0.9974761935059702, "grad_norm": 0.3694221079349518, "learning_rate": 1.0056693901410649e-05, "loss": 0.4997, "step": 47032 }, { "epoch": 0.9974974019639032, "grad_norm": 0.37105923891067505, "learning_rate": 1.0056360411414565e-05, "loss": 0.4761, "step": 47033 }, { "epoch": 0.9975186104218362, "grad_norm": 0.3889099359512329, "learning_rate": 1.0056026921355802e-05, "loss": 0.4728, "step": 47034 }, { "epoch": 0.9975398188797693, "grad_norm": 0.4118591248989105, "learning_rate": 1.0055693431234723e-05, "loss": 0.4258, "step": 47035 }, { "epoch": 0.9975610273377022, "grad_norm": 0.38973671197891235, "learning_rate": 1.0055359941051705e-05, "loss": 0.5609, "step": 47036 }, { "epoch": 0.9975822357956353, "grad_norm": 0.3714054822921753, "learning_rate": 1.0055026450807115e-05, "loss": 0.4307, "step": 47037 }, { "epoch": 0.9976034442535683, "grad_norm": 0.45568183064460754, "learning_rate": 1.0054692960501327e-05, "loss": 0.477, "step": 47038 }, { "epoch": 0.9976246527115014, "grad_norm": 0.3400478959083557, "learning_rate": 1.0054359470134707e-05, "loss": 0.4701, "step": 47039 }, { "epoch": 0.9976458611694343, "grad_norm": 0.38319143652915955, "learning_rate": 1.0054025979707628e-05, "loss": 0.5593, "step": 47040 }, { "epoch": 0.9976670696273674, "grad_norm": 0.7205501794815063, "learning_rate": 1.0053692489220465e-05, "loss": 0.4666, "step": 47041 }, { "epoch": 0.9976882780853005, "grad_norm": 0.34687793254852295, "learning_rate": 1.0053358998673586e-05, "loss": 0.4655, "step": 47042 }, { "epoch": 0.9977094865432334, "grad_norm": 0.3737810254096985, "learning_rate": 1.0053025508067359e-05, "loss": 0.4948, "step": 47043 }, { "epoch": 0.9977306950011665, "grad_norm": 0.3917964696884155, "learning_rate": 1.0052692017402159e-05, "loss": 0.5436, "step": 47044 }, { "epoch": 0.9977519034590995, "grad_norm": 0.3655679225921631, "learning_rate": 1.0052358526678353e-05, "loss": 0.4773, "step": 47045 }, { "epoch": 0.9977731119170326, "grad_norm": 0.5707777738571167, "learning_rate": 1.0052025035896316e-05, "loss": 0.4747, "step": 47046 }, { "epoch": 0.9977943203749655, "grad_norm": 0.3515501320362091, "learning_rate": 1.0051691545056419e-05, "loss": 0.472, "step": 47047 }, { "epoch": 0.9978155288328986, "grad_norm": 0.40602925419807434, "learning_rate": 1.0051358054159031e-05, "loss": 0.5062, "step": 47048 }, { "epoch": 0.9978367372908316, "grad_norm": 0.33710435032844543, "learning_rate": 1.0051024563204519e-05, "loss": 0.4345, "step": 47049 }, { "epoch": 0.9978579457487646, "grad_norm": 0.43092355132102966, "learning_rate": 1.0050691072193262e-05, "loss": 0.6161, "step": 47050 }, { "epoch": 0.9978791542066976, "grad_norm": 0.34472087025642395, "learning_rate": 1.0050357581125626e-05, "loss": 0.4852, "step": 47051 }, { "epoch": 0.9979003626646307, "grad_norm": 0.4143409729003906, "learning_rate": 1.005002409000198e-05, "loss": 0.5461, "step": 47052 }, { "epoch": 0.9979215711225636, "grad_norm": 0.36585158109664917, "learning_rate": 1.00496905988227e-05, "loss": 0.4408, "step": 47053 }, { "epoch": 0.9979427795804967, "grad_norm": 0.3752371072769165, "learning_rate": 1.0049357107588153e-05, "loss": 0.524, "step": 47054 }, { "epoch": 0.9979639880384297, "grad_norm": 0.34879985451698303, "learning_rate": 1.0049023616298714e-05, "loss": 0.4856, "step": 47055 }, { "epoch": 0.9979851964963627, "grad_norm": 0.3386971652507782, "learning_rate": 1.0048690124954751e-05, "loss": 0.5016, "step": 47056 }, { "epoch": 0.9980064049542958, "grad_norm": 0.5140697360038757, "learning_rate": 1.0048356633556633e-05, "loss": 0.4524, "step": 47057 }, { "epoch": 0.9980276134122288, "grad_norm": 0.35466691851615906, "learning_rate": 1.0048023142104734e-05, "loss": 0.4654, "step": 47058 }, { "epoch": 0.9980488218701619, "grad_norm": 0.36283913254737854, "learning_rate": 1.0047689650599426e-05, "loss": 0.4532, "step": 47059 }, { "epoch": 0.9980700303280948, "grad_norm": 0.43358665704727173, "learning_rate": 1.0047356159041076e-05, "loss": 0.4675, "step": 47060 }, { "epoch": 0.9980912387860279, "grad_norm": 0.37730517983436584, "learning_rate": 1.0047022667430057e-05, "loss": 0.4949, "step": 47061 }, { "epoch": 0.9981124472439609, "grad_norm": 0.37605777382850647, "learning_rate": 1.0046689175766741e-05, "loss": 0.5669, "step": 47062 }, { "epoch": 0.9981336557018939, "grad_norm": 0.3761512339115143, "learning_rate": 1.0046355684051497e-05, "loss": 0.3502, "step": 47063 }, { "epoch": 0.9981548641598269, "grad_norm": 0.37085050344467163, "learning_rate": 1.0046022192284697e-05, "loss": 0.4685, "step": 47064 }, { "epoch": 0.99817607261776, "grad_norm": 0.4312538802623749, "learning_rate": 1.0045688700466711e-05, "loss": 0.5231, "step": 47065 }, { "epoch": 0.9981972810756929, "grad_norm": 0.3463793694972992, "learning_rate": 1.0045355208597908e-05, "loss": 0.4278, "step": 47066 }, { "epoch": 0.998218489533626, "grad_norm": 0.3564566373825073, "learning_rate": 1.0045021716678666e-05, "loss": 0.52, "step": 47067 }, { "epoch": 0.998239697991559, "grad_norm": 0.3755810260772705, "learning_rate": 1.0044688224709346e-05, "loss": 0.4696, "step": 47068 }, { "epoch": 0.9982609064494921, "grad_norm": 0.34962198138237, "learning_rate": 1.0044354732690329e-05, "loss": 0.4137, "step": 47069 }, { "epoch": 0.9982821149074251, "grad_norm": 0.35074397921562195, "learning_rate": 1.0044021240621982e-05, "loss": 0.5134, "step": 47070 }, { "epoch": 0.9983033233653581, "grad_norm": 0.3939667344093323, "learning_rate": 1.0043687748504672e-05, "loss": 0.594, "step": 47071 }, { "epoch": 0.9983245318232912, "grad_norm": 0.3484449088573456, "learning_rate": 1.004335425633877e-05, "loss": 0.4577, "step": 47072 }, { "epoch": 0.9983457402812241, "grad_norm": 0.3455810546875, "learning_rate": 1.0043020764124654e-05, "loss": 0.5205, "step": 47073 }, { "epoch": 0.9983669487391572, "grad_norm": 0.37485623359680176, "learning_rate": 1.004268727186269e-05, "loss": 0.4893, "step": 47074 }, { "epoch": 0.9983881571970902, "grad_norm": 0.3128628134727478, "learning_rate": 1.004235377955325e-05, "loss": 0.3961, "step": 47075 }, { "epoch": 0.9984093656550233, "grad_norm": 0.40185558795928955, "learning_rate": 1.0042020287196703e-05, "loss": 0.4857, "step": 47076 }, { "epoch": 0.9984305741129562, "grad_norm": 0.35959967970848083, "learning_rate": 1.004168679479342e-05, "loss": 0.5086, "step": 47077 }, { "epoch": 0.9984517825708893, "grad_norm": 0.42224833369255066, "learning_rate": 1.0041353302343777e-05, "loss": 0.4858, "step": 47078 }, { "epoch": 0.9984729910288223, "grad_norm": 0.38186630606651306, "learning_rate": 1.0041019809848141e-05, "loss": 0.5064, "step": 47079 }, { "epoch": 0.9984941994867553, "grad_norm": 0.4437284767627716, "learning_rate": 1.004068631730688e-05, "loss": 0.4767, "step": 47080 }, { "epoch": 0.9985154079446883, "grad_norm": 0.3662774860858917, "learning_rate": 1.004035282472037e-05, "loss": 0.4439, "step": 47081 }, { "epoch": 0.9985366164026214, "grad_norm": 0.3910999596118927, "learning_rate": 1.004001933208898e-05, "loss": 0.5227, "step": 47082 }, { "epoch": 0.9985578248605544, "grad_norm": 0.37658482789993286, "learning_rate": 1.003968583941308e-05, "loss": 0.5026, "step": 47083 }, { "epoch": 0.9985790333184874, "grad_norm": 0.3852289319038391, "learning_rate": 1.0039352346693041e-05, "loss": 0.5265, "step": 47084 }, { "epoch": 0.9986002417764205, "grad_norm": 0.3712456524372101, "learning_rate": 1.0039018853929236e-05, "loss": 0.5051, "step": 47085 }, { "epoch": 0.9986214502343534, "grad_norm": 0.3621373176574707, "learning_rate": 1.0038685361122036e-05, "loss": 0.4945, "step": 47086 }, { "epoch": 0.9986426586922865, "grad_norm": 0.36029252409935, "learning_rate": 1.003835186827181e-05, "loss": 0.4642, "step": 47087 }, { "epoch": 0.9986638671502195, "grad_norm": 0.39243295788764954, "learning_rate": 1.0038018375378926e-05, "loss": 0.4922, "step": 47088 }, { "epoch": 0.9986850756081526, "grad_norm": 0.43241986632347107, "learning_rate": 1.0037684882443761e-05, "loss": 0.5297, "step": 47089 }, { "epoch": 0.9987062840660855, "grad_norm": 0.34109067916870117, "learning_rate": 1.0037351389466683e-05, "loss": 0.3929, "step": 47090 }, { "epoch": 0.9987274925240186, "grad_norm": 0.36055514216423035, "learning_rate": 1.0037017896448062e-05, "loss": 0.4725, "step": 47091 }, { "epoch": 0.9987487009819516, "grad_norm": 0.39360812306404114, "learning_rate": 1.003668440338827e-05, "loss": 0.4383, "step": 47092 }, { "epoch": 0.9987699094398846, "grad_norm": 0.3711393475532532, "learning_rate": 1.0036350910287681e-05, "loss": 0.5015, "step": 47093 }, { "epoch": 0.9987911178978176, "grad_norm": 0.4057929217815399, "learning_rate": 1.0036017417146659e-05, "loss": 0.4803, "step": 47094 }, { "epoch": 0.9988123263557507, "grad_norm": 0.34341320395469666, "learning_rate": 1.0035683923965582e-05, "loss": 0.4364, "step": 47095 }, { "epoch": 0.9988335348136836, "grad_norm": 0.4006690979003906, "learning_rate": 1.0035350430744814e-05, "loss": 0.5027, "step": 47096 }, { "epoch": 0.9988547432716167, "grad_norm": 0.3440590500831604, "learning_rate": 1.0035016937484731e-05, "loss": 0.4972, "step": 47097 }, { "epoch": 0.9988759517295498, "grad_norm": 0.4305303394794464, "learning_rate": 1.0034683444185705e-05, "loss": 0.5066, "step": 47098 }, { "epoch": 0.9988971601874828, "grad_norm": 0.360276460647583, "learning_rate": 1.0034349950848103e-05, "loss": 0.3997, "step": 47099 }, { "epoch": 0.9989183686454158, "grad_norm": 0.5298224091529846, "learning_rate": 1.0034016457472298e-05, "loss": 0.5614, "step": 47100 }, { "epoch": 0.9989395771033488, "grad_norm": 0.3550459146499634, "learning_rate": 1.003368296405866e-05, "loss": 0.4637, "step": 47101 }, { "epoch": 0.9989607855612819, "grad_norm": 0.4060583710670471, "learning_rate": 1.0033349470607557e-05, "loss": 0.4934, "step": 47102 }, { "epoch": 0.9989819940192148, "grad_norm": 0.37055066227912903, "learning_rate": 1.0033015977119364e-05, "loss": 0.5437, "step": 47103 }, { "epoch": 0.9990032024771479, "grad_norm": 0.3924363851547241, "learning_rate": 1.0032682483594454e-05, "loss": 0.497, "step": 47104 }, { "epoch": 0.9990244109350809, "grad_norm": 0.34528496861457825, "learning_rate": 1.0032348990033192e-05, "loss": 0.4859, "step": 47105 }, { "epoch": 0.999045619393014, "grad_norm": 0.36877307295799255, "learning_rate": 1.0032015496435952e-05, "loss": 0.5169, "step": 47106 }, { "epoch": 0.9990668278509469, "grad_norm": 0.3312600553035736, "learning_rate": 1.0031682002803106e-05, "loss": 0.4701, "step": 47107 }, { "epoch": 0.99908803630888, "grad_norm": 0.3577600419521332, "learning_rate": 1.0031348509135021e-05, "loss": 0.5358, "step": 47108 }, { "epoch": 0.999109244766813, "grad_norm": 0.3424564599990845, "learning_rate": 1.0031015015432075e-05, "loss": 0.4703, "step": 47109 }, { "epoch": 0.999130453224746, "grad_norm": 0.36115142703056335, "learning_rate": 1.003068152169463e-05, "loss": 0.5176, "step": 47110 }, { "epoch": 0.9991516616826791, "grad_norm": 0.3751416802406311, "learning_rate": 1.0030348027923062e-05, "loss": 0.4149, "step": 47111 }, { "epoch": 0.9991728701406121, "grad_norm": 0.35841986536979675, "learning_rate": 1.0030014534117742e-05, "loss": 0.4639, "step": 47112 }, { "epoch": 0.9991940785985451, "grad_norm": 0.3520173728466034, "learning_rate": 1.0029681040279042e-05, "loss": 0.426, "step": 47113 }, { "epoch": 0.9992152870564781, "grad_norm": 0.42404642701148987, "learning_rate": 1.0029347546407328e-05, "loss": 0.444, "step": 47114 }, { "epoch": 0.9992364955144112, "grad_norm": 0.3302263915538788, "learning_rate": 1.0029014052502976e-05, "loss": 0.449, "step": 47115 }, { "epoch": 0.9992577039723441, "grad_norm": 0.3801281154155731, "learning_rate": 1.0028680558566353e-05, "loss": 0.4799, "step": 47116 }, { "epoch": 0.9992789124302772, "grad_norm": 0.6643409132957458, "learning_rate": 1.0028347064597831e-05, "loss": 0.5065, "step": 47117 }, { "epoch": 0.9993001208882102, "grad_norm": 0.3669392466545105, "learning_rate": 1.0028013570597785e-05, "loss": 0.5153, "step": 47118 }, { "epoch": 0.9993213293461433, "grad_norm": 0.773834764957428, "learning_rate": 1.002768007656658e-05, "loss": 0.6287, "step": 47119 }, { "epoch": 0.9993425378040762, "grad_norm": 0.4234229326248169, "learning_rate": 1.002734658250459e-05, "loss": 0.5469, "step": 47120 }, { "epoch": 0.9993637462620093, "grad_norm": 0.3866916298866272, "learning_rate": 1.0027013088412187e-05, "loss": 0.49, "step": 47121 }, { "epoch": 0.9993849547199423, "grad_norm": 0.31969353556632996, "learning_rate": 1.0026679594289736e-05, "loss": 0.4363, "step": 47122 }, { "epoch": 0.9994061631778753, "grad_norm": 0.37141454219818115, "learning_rate": 1.0026346100137616e-05, "loss": 0.4187, "step": 47123 }, { "epoch": 0.9994273716358084, "grad_norm": 0.4286839962005615, "learning_rate": 1.0026012605956194e-05, "loss": 0.457, "step": 47124 }, { "epoch": 0.9994485800937414, "grad_norm": 0.45627811551094055, "learning_rate": 1.002567911174584e-05, "loss": 0.5317, "step": 47125 }, { "epoch": 0.9994697885516745, "grad_norm": 0.3516696095466614, "learning_rate": 1.0025345617506927e-05, "loss": 0.5155, "step": 47126 }, { "epoch": 0.9994909970096074, "grad_norm": 0.35375964641571045, "learning_rate": 1.0025012123239825e-05, "loss": 0.4463, "step": 47127 }, { "epoch": 0.9995122054675405, "grad_norm": 0.3613772988319397, "learning_rate": 1.0024678628944903e-05, "loss": 0.4985, "step": 47128 }, { "epoch": 0.9995334139254735, "grad_norm": 0.3380490243434906, "learning_rate": 1.0024345134622532e-05, "loss": 0.3805, "step": 47129 }, { "epoch": 0.9995546223834065, "grad_norm": 0.3554517924785614, "learning_rate": 1.002401164027309e-05, "loss": 0.4574, "step": 47130 }, { "epoch": 0.9995758308413395, "grad_norm": 0.36377137899398804, "learning_rate": 1.0023678145896938e-05, "loss": 0.5501, "step": 47131 }, { "epoch": 0.9995970392992726, "grad_norm": 0.34961211681365967, "learning_rate": 1.0023344651494453e-05, "loss": 0.4719, "step": 47132 }, { "epoch": 0.9996182477572055, "grad_norm": 0.4264480769634247, "learning_rate": 1.0023011157066003e-05, "loss": 0.5417, "step": 47133 }, { "epoch": 0.9996394562151386, "grad_norm": 0.3404742479324341, "learning_rate": 1.0022677662611962e-05, "loss": 0.5361, "step": 47134 }, { "epoch": 0.9996606646730716, "grad_norm": 0.3643343150615692, "learning_rate": 1.0022344168132699e-05, "loss": 0.5261, "step": 47135 }, { "epoch": 0.9996818731310047, "grad_norm": 0.4117244780063629, "learning_rate": 1.0022010673628581e-05, "loss": 0.5744, "step": 47136 }, { "epoch": 0.9997030815889376, "grad_norm": 0.38745057582855225, "learning_rate": 1.0021677179099988e-05, "loss": 0.4783, "step": 47137 }, { "epoch": 0.9997242900468707, "grad_norm": 0.3847028911113739, "learning_rate": 1.0021343684547284e-05, "loss": 0.4639, "step": 47138 }, { "epoch": 0.9997454985048038, "grad_norm": 0.35543084144592285, "learning_rate": 1.0021010189970842e-05, "loss": 0.4483, "step": 47139 }, { "epoch": 0.9997667069627367, "grad_norm": 0.4568321108818054, "learning_rate": 1.0020676695371034e-05, "loss": 0.5204, "step": 47140 }, { "epoch": 0.9997879154206698, "grad_norm": 0.33825787901878357, "learning_rate": 1.0020343200748229e-05, "loss": 0.4446, "step": 47141 }, { "epoch": 0.9998091238786028, "grad_norm": 0.3539775311946869, "learning_rate": 1.0020009706102794e-05, "loss": 0.4871, "step": 47142 }, { "epoch": 0.9998303323365358, "grad_norm": 0.4315244257450104, "learning_rate": 1.001967621143511e-05, "loss": 0.4973, "step": 47143 }, { "epoch": 0.9998515407944688, "grad_norm": 0.32262638211250305, "learning_rate": 1.001934271674554e-05, "loss": 0.4893, "step": 47144 }, { "epoch": 0.9998727492524019, "grad_norm": 0.5130905508995056, "learning_rate": 1.0019009222034457e-05, "loss": 0.5505, "step": 47145 }, { "epoch": 0.9998939577103348, "grad_norm": 0.35601285099983215, "learning_rate": 1.0018675727302234e-05, "loss": 0.4858, "step": 47146 }, { "epoch": 0.9999151661682679, "grad_norm": 0.33371737599372864, "learning_rate": 1.0018342232549237e-05, "loss": 0.4669, "step": 47147 }, { "epoch": 0.9999363746262009, "grad_norm": 0.3467673063278198, "learning_rate": 1.001800873777584e-05, "loss": 0.4804, "step": 47148 }, { "epoch": 0.999957583084134, "grad_norm": 0.32791680097579956, "learning_rate": 1.0017675242982416e-05, "loss": 0.4272, "step": 47149 }, { "epoch": 0.9999787915420669, "grad_norm": 0.3737533390522003, "learning_rate": 1.0017341748169332e-05, "loss": 0.4668, "step": 47150 }, { "epoch": 1.0, "grad_norm": 0.398488849401474, "learning_rate": 1.0017008253336961e-05, "loss": 0.5608, "step": 47151 }, { "epoch": 1.0, "eval_loss": 0.2345746010541916, "eval_runtime": 176.8914, "eval_samples_per_second": 135.671, "eval_steps_per_second": 16.96, "step": 47151 } ], "logging_steps": 1, "max_steps": 94302, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.520144860934025e+20, "train_batch_size": 1, "trial_name": null, "trial_params": null }